graphql

Search for an npm package
import { syntaxError } from '../error/syntaxError.mjs';
import { Token } from './ast.mjs';
import { dedentBlockStringLines } from './blockString.mjs';
import { isDigit, isNameContinue, isNameStart } from './characterClasses.mjs';
import { TokenKind } from './tokenKind.mjs';
/**
* Given a Source object, creates a Lexer for that source.
* A Lexer is a stateful stream generator in that every time
* it is advanced, it returns the next token in the Source. Assuming the
* source lexes, the final Token emitted by the lexer will be of kind
* EOF, after which the lexer will repeatedly return the same EOF token
* whenever called.
*/
export class Lexer {
/**
* The previously focused non-ignored token.
*/
/**
* The currently focused non-ignored token.
*/
/**
* The (1-indexed) line containing the current token.
*/
/**
* The character offset at which the current line begins.
*/
constructor(source) {
const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0);
this.source = source;
this.lastToken = startOfFileToken;
this.token = startOfFileToken;
this.line = 1;
this.lineStart = 0;
}
get [Symbol.toStringTag]() {
return 'Lexer';
}
/**
* Advances the token stream to the next non-ignored token.
*/
advance() {
this.lastToken = this.token;
const token = (this.token = this.lookahead());
return token;
}
/**
* Looks ahead and returns the next non-ignored token, but does not change
* the state of Lexer.
*/
lookahead() {
let token = this.token;
if (token.kind !== TokenKind.EOF) {
do {
if (token.next) {
token = token.next;
} else {
// Read the next token and form a link in the token linked-list.
const nextToken = readNextToken(this, token.end); // @ts-expect-error next is only mutable during parsing.
token.next = nextToken; // @ts-expect-error prev is only mutable during parsing.
nextToken.prev = token;
token = nextToken;
}
} while (token.kind === TokenKind.COMMENT);
}
return token;
}
}
/**
* @internal
*/
export function isPunctuatorTokenKind(kind) {
return (
kind === TokenKind.BANG ||
kind === TokenKind.DOLLAR ||
kind === TokenKind.AMP ||
kind === TokenKind.PAREN_L ||
kind === TokenKind.PAREN_R ||
kind === TokenKind.SPREAD ||
kind === TokenKind.COLON ||
kind === TokenKind.EQUALS ||
kind === TokenKind.AT ||
kind === TokenKind.BRACKET_L ||
kind === TokenKind.BRACKET_R ||
kind === TokenKind.BRACE_L ||
kind === TokenKind.PIPE ||
kind === TokenKind.BRACE_R
);
}
/**
* A Unicode scalar value is any Unicode code point except surrogate code
* points. In other words, the inclusive ranges of values 0x0000 to 0xD7FF and
* 0xE000 to 0x10FFFF.
*
* SourceCharacter ::
* - "Any Unicode scalar value"
*/
function isUnicodeScalarValue(code) {
return (
(code >= 0x0000 && code <= 0xd7ff) || (code >= 0xe000 && code <= 0x10ffff)
);
}
/**
* The GraphQL specification defines source text as a sequence of unicode scalar
* values (which Unicode defines to exclude surrogate code points). However
* JavaScript defines strings as a sequence of UTF-16 code units which may
* include surrogates. A surrogate pair is a valid source character as it
* encodes a supplementary code point (above U+FFFF), but unpaired surrogate
* code points are not valid source characters.
*/
function isSupplementaryCodePoint(body, location) {
return (
isLeadingSurrogate(body.charCodeAt(location)) &&
isTrailingSurrogate(body.charCodeAt(location + 1))
);
}
function isLeadingSurrogate(code) {
return code >= 0xd800 && code <= 0xdbff;
}
function isTrailingSurrogate(code) {
return code >= 0xdc00 && code <= 0xdfff;
}
/**
* Prints the code point (or end of file reference) at a given location in a
* source for use in error messages.
*
* Printable ASCII is printed quoted, while other points are printed in Unicode
* code point form (ie. U+1234).
*/
function printCodePointAt(lexer, location) {
const code = lexer.source.body.codePointAt(location);
if (code === undefined) {
return TokenKind.EOF;
} else if (code >= 0x0020 && code <= 0x007e) {
// Printable ASCII
const char = String.fromCodePoint(code);
return char === '"' ? "'\"'" : `"${char}"`;
} // Unicode code point
return 'U+' + code.toString(16).toUpperCase().padStart(4, '0');
}
/**
* Create a token with line and column location information.
*/
function createToken(lexer, kind, start, end, value) {
const line = lexer.line;
const col = 1 + start - lexer.lineStart;
return new Token(kind, start, end, line, col, value);
}
/**
* Gets the next token from the source starting at the given position.
*
* This skips over whitespace until it finds the next lexable token, then lexes
* punctuators immediately or calls the appropriate helper function for more
* complicated tokens.
*/
function readNextToken(lexer, start) {
const body = lexer.source.body;
const bodyLength = body.length;
let position = start;
while (position < bodyLength) {
const code = body.charCodeAt(position); // SourceCharacter
switch (code) {
// Ignored ::
// - UnicodeBOM
// - WhiteSpace
// - LineTerminator
// - Comment
// - Comma
//
// UnicodeBOM :: "Byte Order Mark (U+FEFF)"
//
// WhiteSpace ::
// - "Horizontal Tab (U+0009)"
// - "Space (U+0020)"
//
// Comma :: ,
case 0xfeff: // <BOM>
case 0x0009: // \t
case 0x0020: // <space>
case 0x002c:
// ,
++position;
continue;
// LineTerminator ::
// - "New Line (U+000A)"
// - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
// - "Carriage Return (U+000D)" "New Line (U+000A)"
case 0x000a:
// \n
++position;
++lexer.line;
lexer.lineStart = position;
continue;
case 0x000d:
// \r
if (body.charCodeAt(position + 1) === 0x000a) {
position += 2;
} else {
++position;
}
++lexer.line;
lexer.lineStart = position;
continue;
// Comment
case 0x0023:
// #
return readComment(lexer, position);
// Token ::
// - Punctuator
// - Name
// - IntValue
// - FloatValue
// - StringValue
//
// Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
case 0x0021:
// !
return createToken(lexer, TokenKind.BANG, position, position + 1);
case 0x0024:
// $
return createToken(lexer, TokenKind.DOLLAR, position, position + 1);
case 0x0026:
// &
return createToken(lexer, TokenKind.AMP, position, position + 1);
case 0x0028:
// (
return createToken(lexer, TokenKind.PAREN_L, position, position + 1);
case 0x0029:
// )
return createToken(lexer, TokenKind.PAREN_R, position, position + 1);
case 0x002e:
// .
if (
body.charCodeAt(position + 1) === 0x002e &&
body.charCodeAt(position + 2) === 0x002e
) {
return createToken(lexer, TokenKind.SPREAD, position, position + 3);
}
break;
case 0x003a:
// :
return createToken(lexer, TokenKind.COLON, position, position + 1);
case 0x003d:
// =
return createToken(lexer, TokenKind.EQUALS, position, position + 1);
case 0x0040:
// @
return createToken(lexer, TokenKind.AT, position, position + 1);
case 0x005b:
// [
return createToken(lexer, TokenKind.BRACKET_L, position, position + 1);
case 0x005d:
// ]
return createToken(lexer, TokenKind.BRACKET_R, position, position + 1);
case 0x007b:
// {
return createToken(lexer, TokenKind.BRACE_L, position, position + 1);
case 0x007c:
// |
return createToken(lexer, TokenKind.PIPE, position, position + 1);
case 0x007d:
// }
return createToken(lexer, TokenKind.BRACE_R, position, position + 1);
// StringValue
case 0x0022:
// "
if (
body.charCodeAt(position + 1) === 0x0022 &&
body.charCodeAt(position + 2) === 0x0022
) {
return readBlockString(lexer, position);
}
return readString(lexer, position);
} // IntValue | FloatValue (Digit | -)
if (isDigit(code) || code === 0x002d) {
return readNumber(lexer, position, code);
} // Name
if (isNameStart(code)) {
return readName(lexer, position);
}
throw syntaxError(
lexer.source,
position,
code === 0x0027
? 'Unexpected single quote character (\'), did you mean to use a double quote (")?'
: isUnicodeScalarValue(code) || isSupplementaryCodePoint(body, position)
? `Unexpected character: ${printCodePointAt(lexer, position)}.`
: `Invalid character: ${printCodePointAt(lexer, position)}.`,
);
}
return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength);
}
/**
* Reads a comment token from the source file.
*
* ```
* Comment :: # CommentChar* [lookahead != CommentChar]
*
* CommentChar :: SourceCharacter but not LineTerminator
* ```
*/
function readComment(lexer, start) {
const body = lexer.source.body;
const bodyLength = body.length;
let position = start + 1;
while (position < bodyLength) {
const code = body.charCodeAt(position); // LineTerminator (\n | \r)
if (code === 0x000a || code === 0x000d) {
break;
} // SourceCharacter
if (isUnicodeScalarValue(code)) {
++position;
} else if (isSupplementaryCodePoint(body, position)) {
position += 2;
} else {
break;
}
}
return createToken(
lexer,
TokenKind.COMMENT,
start,
position,
body.slice(start + 1, position),
);
}
/**
* Reads a number token from the source file, either a FloatValue or an IntValue
* depending on whether a FractionalPart or ExponentPart is encountered.
*
* ```
* IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
*
* IntegerPart ::
* - NegativeSign? 0
* - NegativeSign? NonZeroDigit Digit*
*
* NegativeSign :: -
*
* NonZeroDigit :: Digit but not `0`
*
* FloatValue ::
* - IntegerPart FractionalPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
* - IntegerPart FractionalPart [lookahead != {Digit, `.`, NameStart}]
* - IntegerPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
*
* FractionalPart :: . Digit+
*
* ExponentPart :: ExponentIndicator Sign? Digit+
*
* ExponentIndicator :: one of `e` `E`
*
* Sign :: one of + -
* ```
*/
function readNumber(lexer, start, firstCode) {
const body = lexer.source.body;
let position = start;
let code = firstCode;
let isFloat = false; // NegativeSign (-)
if (code === 0x002d) {
code = body.charCodeAt(++position);
} // Zero (0)
if (code === 0x0030) {
code = body.charCodeAt(++position);
if (isDigit(code)) {
throw syntaxError(
lexer.source,
position,
`Invalid number, unexpected digit after 0: ${printCodePointAt(
lexer,
position,
)}.`,
);
}
} else {
position = readDigits(lexer, position, code);
code = body.charCodeAt(position);
} // Full stop (.)
if (code === 0x002e) {
isFloat = true;
code = body.charCodeAt(++position);
position = readDigits(lexer, position, code);
code = body.charCodeAt(position);
} // E e
if (code === 0x0045 || code === 0x0065) {
isFloat = true;
code = body.charCodeAt(++position); // + -
if (code === 0x002b || code === 0x002d) {
code = body.charCodeAt(++position);
}
position = readDigits(lexer, position, code);
code = body.charCodeAt(position);
} // Numbers cannot be followed by . or NameStart
if (code === 0x002e || isNameStart(code)) {
throw syntaxError(
lexer.source,
position,
`Invalid number, expected digit but got: ${printCodePointAt(
lexer,
position,
)}.`,
);
}
return createToken(
lexer,
isFloat ? TokenKind.FLOAT : TokenKind.INT,
start,
position,
body.slice(start, position),
);
}
/**
* Returns the new position in the source after reading one or more digits.
*/
function readDigits(lexer, start, firstCode) {
if (!isDigit(firstCode)) {
throw syntaxError(
lexer.source,
start,
`Invalid number, expected digit but got: ${printCodePointAt(
lexer,
start,
)}.`,
);
}
const body = lexer.source.body;
let position = start + 1; // +1 to skip first firstCode
while (isDigit(body.charCodeAt(position))) {
++position;
}
return position;
}
/**
* Reads a single-quote string token from the source file.
*
* ```
* StringValue ::
* - `""` [lookahead != `"`]
* - `"` StringCharacter+ `"`
*
* StringCharacter ::
* - SourceCharacter but not `"` or `\` or LineTerminator
* - `\u` EscapedUnicode
* - `\` EscapedCharacter
*
* EscapedUnicode ::
* - `{` HexDigit+ `}`
* - HexDigit HexDigit HexDigit HexDigit
*
* EscapedCharacter :: one of `"` `\` `/` `b` `f` `n` `r` `t`
* ```
*/
function readString(lexer, start) {
const body = lexer.source.body;
const bodyLength = body.length;
let position = start + 1;
let chunkStart = position;
let value = '';
while (position < bodyLength) {
const code = body.charCodeAt(position); // Closing Quote (")
if (code === 0x0022) {
value += body.slice(chunkStart, position);
return createToken(lexer, TokenKind.STRING, start, position + 1, value);
} // Escape Sequence (\)
if (code === 0x005c) {
value += body.slice(chunkStart, position);
const escape =
body.charCodeAt(position + 1) === 0x0075 // u
? body.charCodeAt(position + 2) === 0x007b // {
? readEscapedUnicodeVariableWidth(lexer, position)
: readEscapedUnicodeFixedWidth(lexer, position)
: readEscapedCharacter(lexer, position);
value += escape.value;
position += escape.size;
chunkStart = position;
continue;
} // LineTerminator (\n | \r)
if (code === 0x000a || code === 0x000d) {
break;
} // SourceCharacter
if (isUnicodeScalarValue(code)) {
++position;
} else if (isSupplementaryCodePoint(body, position)) {
position += 2;
} else {
throw syntaxError(
lexer.source,
position,
`Invalid character within String: ${printCodePointAt(
lexer,
position,
)}.`,
);
}
}
throw syntaxError(lexer.source, position, 'Unterminated string.');
} // The string value and lexed size of an escape sequence.
function readEscapedUnicodeVariableWidth(lexer, position) {
const body = lexer.source.body;
let point = 0;
let size = 3; // Cannot be larger than 12 chars (\u{00000000}).
while (size < 12) {
const code = body.charCodeAt(position + size++); // Closing Brace (})
if (code === 0x007d) {
// Must be at least 5 chars (\u{0}) and encode a Unicode scalar value.
if (size < 5 || !isUnicodeScalarValue(point)) {
break;
}
return {
value: String.fromCodePoint(point),
size,
};
} // Append this hex digit to the code point.
point = (point << 4) | readHexDigit(code);
if (point < 0) {
break;
}
}
throw syntaxError(
lexer.source,
position,
`Invalid Unicode escape sequence: "${body.slice(
position,
position + size,
)}".`,
);
}
function readEscapedUnicodeFixedWidth(lexer, position) {
const body = lexer.source.body;
const code = read16BitHexCode(body, position + 2);
if (isUnicodeScalarValue(code)) {
return {
value: String.fromCodePoint(code),
size: 6,
};
} // GraphQL allows JSON-style surrogate pair escape sequences, but only when
// a valid pair is formed.
if (isLeadingSurrogate(code)) {
// \u
if (
body.charCodeAt(position + 6) === 0x005c &&
body.charCodeAt(position + 7) === 0x0075
) {
const trailingCode = read16BitHexCode(body, position + 8);
if (isTrailingSurrogate(trailingCode)) {
// JavaScript defines strings as a sequence of UTF-16 code units and
// encodes Unicode code points above U+FFFF using a surrogate pair of
// code units. Since this is a surrogate pair escape sequence, just
// include both codes into the JavaScript string value. Had JavaScript
// not been internally based on UTF-16, then this surrogate pair would
// be decoded to retrieve the supplementary code point.
return {
value: String.fromCodePoint(code, trailingCode),
size: 12,
};
}
}
}
throw syntaxError(
lexer.source,
position,
`Invalid Unicode escape sequence: "${body.slice(position, position + 6)}".`,
);
}
/**
* Reads four hexadecimal characters and returns the positive integer that 16bit
* hexadecimal string represents. For example, "000f" will return 15, and "dead"
* will return 57005.
*
* Returns a negative number if any char was not a valid hexadecimal digit.
*/
function read16BitHexCode(body, position) {
// readHexDigit() returns -1 on error. ORing a negative value with any other
// value always produces a negative value.
return (
(readHexDigit(body.charCodeAt(position)) << 12) |
(readHexDigit(body.charCodeAt(position + 1)) << 8) |
(readHexDigit(body.charCodeAt(position + 2)) << 4) |
readHexDigit(body.charCodeAt(position + 3))
);
}
/**
* Reads a hexadecimal character and returns its positive integer value (0-15).
*
* '0' becomes 0, '9' becomes 9
* 'A' becomes 10, 'F' becomes 15
* 'a' becomes 10, 'f' becomes 15
*
* Returns -1 if the provided character code was not a valid hexadecimal digit.
*
* HexDigit :: one of
* - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
* - `A` `B` `C` `D` `E` `F`
* - `a` `b` `c` `d` `e` `f`
*/
function readHexDigit(code) {
return code >= 0x0030 && code <= 0x0039 // 0-9
? code - 0x0030
: code >= 0x0041 && code <= 0x0046 // A-F
? code - 0x0037
: code >= 0x0061 && code <= 0x0066 // a-f
? code - 0x0057
: -1;
}
/**
* | Escaped Character | Code Point | Character Name |
* | ----------------- | ---------- | ---------------------------- |
* | `"` | U+0022 | double quote |
* | `\` | U+005C | reverse solidus (back slash) |
* | `/` | U+002F | solidus (forward slash) |
* | `b` | U+0008 | backspace |
* | `f` | U+000C | form feed |
* | `n` | U+000A | line feed (new line) |
* | `r` | U+000D | carriage return |
* | `t` | U+0009 | horizontal tab |
*/
function readEscapedCharacter(lexer, position) {
const body = lexer.source.body;
const code = body.charCodeAt(position + 1);
switch (code) {
case 0x0022:
// "
return {
value: '\u0022',
size: 2,
};
case 0x005c:
// \
return {
value: '\u005c',
size: 2,
};
case 0x002f:
// /
return {
value: '\u002f',
size: 2,
};
case 0x0062:
// b
return {
value: '\u0008',
size: 2,
};
case 0x0066:
// f
return {
value: '\u000c',
size: 2,
};
case 0x006e:
// n
return {
value: '\u000a',
size: 2,
};
case 0x0072:
// r
return {
value: '\u000d',
size: 2,
};
case 0x0074:
// t
return {
value: '\u0009',
size: 2,
};
}
throw syntaxError(
lexer.source,
position,
`Invalid character escape sequence: "${body.slice(
position,
position + 2,
)}".`,
);
}
/**
* Reads a block string token from the source file.
*
* ```
* StringValue ::
* - `"""` BlockStringCharacter* `"""`
*
* BlockStringCharacter ::
* - SourceCharacter but not `"""` or `\"""`
* - `\"""`
* ```
*/
function readBlockString(lexer, start) {
const body = lexer.source.body;
const bodyLength = body.length;
let lineStart = lexer.lineStart;
let position = start + 3;
let chunkStart = position;
let currentLine = '';
const blockLines = [];
while (position < bodyLength) {
const code = body.charCodeAt(position); // Closing Triple-Quote (""")
if (
code === 0x0022 &&
body.charCodeAt(position + 1) === 0x0022 &&
body.charCodeAt(position + 2) === 0x0022
) {
currentLine += body.slice(chunkStart, position);
blockLines.push(currentLine);
const token = createToken(
lexer,
TokenKind.BLOCK_STRING,
start,
position + 3, // Return a string of the lines joined with U+000A.
dedentBlockStringLines(blockLines).join('\n'),
);
lexer.line += blockLines.length - 1;
lexer.lineStart = lineStart;
return token;
} // Escaped Triple-Quote (\""")
if (
code === 0x005c &&
body.charCodeAt(position + 1) === 0x0022 &&
body.charCodeAt(position + 2) === 0x0022 &&
body.charCodeAt(position + 3) === 0x0022
) {
currentLine += body.slice(chunkStart, position);
chunkStart = position + 1; // skip only slash
position += 4;
continue;
} // LineTerminator
if (code === 0x000a || code === 0x000d) {
currentLine += body.slice(chunkStart, position);
blockLines.push(currentLine);
if (code === 0x000d && body.charCodeAt(position + 1) === 0x000a) {
position += 2;
} else {
++position;
}
currentLine = '';
chunkStart = position;
lineStart = position;
continue;
} // SourceCharacter
if (isUnicodeScalarValue(code)) {
++position;
} else if (isSupplementaryCodePoint(body, position)) {
position += 2;
} else {
throw syntaxError(
lexer.source,
position,
`Invalid character within String: ${printCodePointAt(
lexer,
position,
)}.`,
);
}
}
throw syntaxError(lexer.source, position, 'Unterminated string.');
}
/**
* Reads an alphanumeric + underscore name from the source.
*
* ```
* Name ::
* - NameStart NameContinue* [lookahead != NameContinue]
* ```
*/
function readName(lexer, start) {
const body = lexer.source.body;
const bodyLength = body.length;
let position = start + 1;
while (position < bodyLength) {
const code = body.charCodeAt(position);
if (isNameContinue(code)) {
++position;
} else {
break;
}
}
return createToken(
lexer,
TokenKind.NAME,
start,
position,
body.slice(start, position),
);
}