Skip to content

Commit

Permalink
Updated generate_token.py to generate the correct token.c
Browse files Browse the repository at this point in the history
  • Loading branch information
DefinitelyNotAnOrca committed Oct 25, 2024
1 parent 10ed2f7 commit ff781c8
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 16 deletions.
12 changes: 4 additions & 8 deletions Parser/token.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 36 additions & 8 deletions Tools/build/generate_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def update_file(file, content):
#define ISSTRINGLIT(x) ((x) == STRING || \\
(x) == FSTRING_MIDDLE)
#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) // Generate a 16-bit integer from 2 8-bit characters
#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) // Generate a 24-bit integer from 3 8-bit characters
// Export these 4 symbols for 'test_peg_generator'
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
Expand Down Expand Up @@ -149,6 +151,31 @@ def make_h(infile, outfile='Include/internal/pycore_token.h'):
}
"""

def generate_one_char_tokens(tokens):
result = []
result.append(' switch (c1) {\n')
for c, name in sorted(tokens.items()):
result.append(" case '%s': return %s;\n" % (c, name))
result.append(' }\n')
return ''.join(result)

def generate_two_char_tokens(tokens):
result = []
result.append(' switch (GENERATE_2CHAR_CODE(c1, c2)) {\n')
for (c1, c2), name in sorted(tokens.items()):
result.append(" case GENERATE_2CHAR_CODE('%s', '%s'): return %s;\n" % (c1, c2, name))
result.append(' }\n')
return ''.join(result)

def generate_three_char_tokens(tokens):
result = []
result.append(' switch (GENERATE_3CHAR_CODE(c1, c2, c3)) {\n')
for (c1, c2, c3), name in sorted(tokens.items()):
result.append(" case GENERATE_3CHAR_CODE('%s', '%s', '%s'): return %s;\n" % (c1, c2, c3, name))
result.append(' }\n')
return ''.join(result)


def generate_chars_to_token(mapping, n=1):
result = []
write = result.append
Expand All @@ -172,14 +199,15 @@ def generate_chars_to_token(mapping, n=1):
def make_c(infile, outfile='Parser/token.c'):
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
string_to_tok['<>'] = string_to_tok['!=']
chars_to_token = {}
chars_to_token = {
1: {},
2: {},
3: {},
}
for string, value in string_to_tok.items():
assert 1 <= len(string) <= 3
name = tok_names[value]
m = chars_to_token.setdefault(len(string), {})
for c in string[:-1]:
m = m.setdefault(c, {})
m[string[-1]] = name
chars_to_token[len(string)][string] = name

names = []
for value, name in enumerate(tok_names):
Expand All @@ -190,9 +218,9 @@ def make_c(infile, outfile='Parser/token.c'):

if update_file(outfile, token_c_template % (
''.join(names),
generate_chars_to_token(chars_to_token[1]),
generate_chars_to_token(chars_to_token[2]),
generate_chars_to_token(chars_to_token[3])
generate_one_char_tokens(chars_to_token[1]),
generate_two_char_tokens(chars_to_token[2]),
generate_three_char_tokens(chars_to_token[3])
)):
print("%s regenerated from %s" % (outfile, infile))

Expand Down

0 comments on commit ff781c8

Please sign in to comment.