-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Issue a diagnostic if we try to parse a source file that is too large. #4429
Changes from all commits
06a0e37
e02272c
9d713e3
ecd78c9
b013265
20f2daf
9fca859
fc0bd90
7299761
597880b
ca76669
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,10 @@ class TokenDiagnosticConverter : public DiagnosticConverter<TokenIndex> { | |
// `HasError` returning true. | ||
class TokenizedBuffer : public Printable<TokenizedBuffer> { | ||
public: | ||
// The maximum number of tokens that can be stored in the buffer, including | ||
// the FileStart and FileEnd tokens. | ||
static constexpr int MaxTokens = 1 << 23; | ||
|
||
// A comment, which can be a block of lines. | ||
// | ||
// This is the API version of `CommentData`. | ||
|
@@ -306,7 +310,6 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> { | |
} | ||
auto set_ident_id(IdentifierId ident_id) -> void { | ||
CARBON_DCHECK(kind() == TokenKind::Identifier); | ||
CARBON_DCHECK(ident_id.index < (2 << PayloadBits)); | ||
token_payload_ = ident_id.index; | ||
} | ||
|
||
|
@@ -334,7 +337,6 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> { | |
} | ||
auto set_closing_token_index(TokenIndex closing_index) -> void { | ||
CARBON_DCHECK(kind().is_opening_symbol()); | ||
CARBON_DCHECK(closing_index.index < (2 << PayloadBits)); | ||
token_payload_ = closing_index.index; | ||
} | ||
|
||
|
@@ -344,7 +346,6 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> { | |
} | ||
auto set_opening_token_index(TokenIndex opening_index) -> void { | ||
CARBON_DCHECK(kind().is_closing_symbol()); | ||
CARBON_DCHECK(opening_index.index < (2 << PayloadBits)); | ||
token_payload_ = opening_index.index; | ||
} | ||
|
||
|
@@ -395,18 +396,23 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> { | |
: kind_(kind), | ||
has_leading_space_(has_leading_space), | ||
token_payload_(payload), | ||
byte_offset_(byte_offset) { | ||
CARBON_DCHECK(payload >= 0 && payload < (2 << PayloadBits), | ||
"Payload won't fit into unsigned bit pack: {0}", payload); | ||
} | ||
byte_offset_(byte_offset) {} | ||
|
||
// A bitfield that encodes the token's kind, the leading space flag, and the | ||
// remaining bits in a payload. These are encoded together as a bitfield for | ||
// density and because these are the hottest fields of tokens for consumers | ||
// after lexing. | ||
// | ||
// Payload values are typically ID types for which we create at most one per | ||
// token, so we ensure that `token_payload_` is large enough to fit any | ||
// token index. Stores to this field may overflow, but we produce an error | ||
// in `Lexer::Finalize` if the file has more than `MaxTokens` tokens, so | ||
// this value never overflows if lexing succeeds. | ||
TokenKind::RawEnumType kind_ : sizeof(TokenKind) * 8; | ||
bool has_leading_space_ : 1; | ||
unsigned token_payload_ : PayloadBits; | ||
static_assert(MaxTokens <= 1 << PayloadBits, | ||
"Not enough payload bits to store a token index"); | ||
Comment on lines
+414
to
+415
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What made you choose this approach versus relocating PayloadBits to make MaxTokens directly calculated based on it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted an explicit value for |
||
|
||
// Separate storage for the byte offset, this is hot while lexing but then | ||
// generally cold. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whitespace suggestion due to the long comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes the previous comment that's trying to talk about all three bit-fields less clear.
What do you think about moving this comment before all the bit-fields, with the other one?