-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokenizer.py
51 lines (44 loc) · 1.01 KB
/
tokenizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from dataclasses import dataclass
from enum import Enum
class TokenType(Enum):
NUMBER = 0
OPERATOR = 1
UNKNOWN = 2
TOKENLIST = "(){}[],.:;*+-/@#$\"\' =^~|%<>\n"
@dataclass
class Token:
start: int
end: int
value: str
type: TokenType
def tokenize(code):
tokens = []
sptokens = []
for i in code:
if i in TOKENLIST:
if len(sptokens):
tokens.append(''.join(sptokens))
sptokens = []
tokens.append(i)
else:
sptokens.append(i)
if len(sptokens):
tokens.append(''.join(sptokens))
sptokens = []
return tokens
def token_type(i):
if i in "+-/*":
return TokenType.OPERATOR
elif i.isdigit():
return TokenType.NUMBER
else:
return TokenType.UNKNOWN
def advtok(tokenized):
total = []
pos = 0
for i in tokenize(tokenized):
total.append(Token(
pos, pos+len(i), i, token_type(i)
))
pos += len(i)
return total