-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
232 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#include "lexer.h" | ||
#include <ctype.h> | ||
#include <stdio.h> | ||
#include <string.h> | ||
|
||
const char *token_to_string(enum TokenType token) { | ||
switch (token) { | ||
#define F(T) case T: return #T; | ||
FOREACH_TOKEN(F) | ||
#undef F | ||
default: return "unknown"; | ||
} | ||
} | ||
|
||
int lexer_create(struct Lexer *lex, const char *beg, const char *end) { | ||
lex->cur = beg; | ||
lex->end = end; | ||
return 0; | ||
} | ||
|
||
int lexer_destroy(struct Lexer *lex) { | ||
return 0; | ||
} | ||
|
||
int is_letter(char c) { | ||
return isalpha(c) || c == '_'; | ||
} | ||
|
||
enum TokenType lookup_ident(const char *beg, const char *end) { | ||
int len = end - beg; | ||
if (beg >= end) { | ||
return Illegal; | ||
} | ||
//printf("Checking ident: '%.*s'\n", len, beg); | ||
|
||
// REVISIT(plesslie): would ideally use a perfect hash generator like gperf here | ||
if (strncmp(beg, "fn", len) == 0) { | ||
return Function; | ||
} else if (strncmp(beg, "let", len) == 0) { | ||
return Let; | ||
} else if (strncmp(beg, "true", len) == 0) { | ||
return True; | ||
} else if (strncmp(beg, "false", len) == 0) { | ||
return False; | ||
} else if (strncmp(beg, "if", len) == 0) { | ||
return If; | ||
} else if (strncmp(beg, "else", len) == 0) { | ||
return Else; | ||
} else if (strncmp(beg, "return", len) == 0) { | ||
return Return; | ||
} else { | ||
return Ident; | ||
} | ||
} | ||
|
||
int lexer_next_token(struct Lexer *lex, struct Token *token) { | ||
char c; | ||
|
||
// skip whitespace | ||
while (lex->cur < lex->end && isspace(*lex->cur)) { | ||
++lex->cur; | ||
} | ||
|
||
if (lex->cur >= lex->end) { | ||
token->type = EndOfFile; | ||
return 0; | ||
} | ||
|
||
c = *lex->cur++; | ||
if (c == '=') { | ||
if (lex->cur < lex->end && *lex->cur == '=') { | ||
++lex->cur; | ||
token->type = Equal; | ||
} else { | ||
token->type = Assign; | ||
} | ||
} else if (c == '!') { | ||
if (lex->cur < lex->end && *lex->cur == '=') { | ||
++lex->cur; | ||
token->type = NotEqual; | ||
} else { | ||
token->type = Bang; | ||
} | ||
} else if (isdigit(c)) { // should this be checking for '.' as well? | ||
token->type = Integer; | ||
// read_number() | ||
token->beg = lex->cur - 1; | ||
while (lex->cur < lex->end && isdigit(*lex->cur)) { | ||
++lex->cur; | ||
} | ||
token->end = lex->cur; | ||
} else if (is_letter(c)) { | ||
// read_ident() | ||
token->beg = lex->cur - 1; | ||
while (lex->cur < lex->end && is_letter(*lex->cur)) { | ||
++lex->cur; | ||
} | ||
token->end = lex->cur; | ||
token->type = lookup_ident(token->beg, token->end); | ||
} else { | ||
switch (c) { | ||
case '+': token->type = Plus; break; | ||
case '-': token->type = Minus; break; | ||
case '/': token->type = Slash; break; | ||
case '*': token->type = Asterisk; break; | ||
case '<': token->type = LowerThan; break; | ||
case '>': token->type = GreaterThan; break; | ||
case ';': token->type = Semicolon; break; | ||
case ',': token->type = Comma; break; | ||
case '{': token->type = LeftBrace; break; | ||
case '}': token->type = RightBrace; break; | ||
case '(': token->type = LeftParenthesis; break; | ||
case ')': token->type = RightParenthesis; break; | ||
default: | ||
// case '=' or '==' | ||
// case '!' or '!=' | ||
// is_letter | ||
// is_numeric | ||
return 1; | ||
} | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#ifndef LEXER__H_ | ||
#define LEXER__H_ | ||
|
||
|
||
#define FOREACH_TOKEN(F) \ | ||
F(Illegal) \ | ||
F(EndOfFile) \ | ||
F(Ident) \ | ||
F(Integer) \ | ||
F(Assign) \ | ||
F(Plus) \ | ||
F(Minus) \ | ||
F(Bang) \ | ||
F(Asterisk) \ | ||
F(Slash) \ | ||
F(LowerThan) \ | ||
F(GreaterThan) \ | ||
F(Equal) \ | ||
F(NotEqual) \ | ||
F(Comma) \ | ||
F(Semicolon) \ | ||
F(LeftParenthesis) \ | ||
F(RightParenthesis) \ | ||
F(LeftBrace) \ | ||
F(RightBrace) \ | ||
F(Function) \ | ||
F(Let) \ | ||
F(True) \ | ||
F(False) \ | ||
F(If) \ | ||
F(Else) \ | ||
F(Return) \ | ||
|
||
enum TokenType { | ||
#define F(token) token, | ||
FOREACH_TOKEN(F) | ||
#undef F | ||
}; | ||
|
||
extern const char *token_to_string(enum TokenType token); | ||
|
||
struct Token { | ||
enum TokenType type; | ||
const char *beg; | ||
const char *end; | ||
}; | ||
|
||
struct Lexer { | ||
const char *cur; | ||
const char *end; | ||
}; | ||
int lexer_create(struct Lexer *lex, const char *beg, const char *end); | ||
int lexer_destroy(struct Lexer *lex); | ||
int lexer_next_token(struct Lexer *lex, struct Token *token); | ||
|
||
#endif // LEXER__H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,56 @@ | ||
#define _GNU_SOURCE | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <unistd.h> | ||
#include "lexer.h" | ||
|
||
int main(int argc, char **argv) { | ||
FILE *stream; | ||
char *line = NULL; | ||
size_t len = 0; | ||
ssize_t read; | ||
struct Lexer lexer; | ||
struct Token token; | ||
|
||
stream = fdopen(STDIN_FILENO, "rb"); | ||
if (!stream) { | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
setbuf(stdout, 0); | ||
while(1) { | ||
printf(">> "); | ||
read = getline(&line, &len, stream); | ||
if (read > 0) { | ||
if (lexer_create(&lexer, line, line+read) != 0) { | ||
fprintf(stderr, "Failed to create lexer!\n"); | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
// printf("Retrieved line of length %zu :\n", read); | ||
// printf("%s", line); | ||
|
||
do { | ||
if (lexer_next_token(&lexer, &token) != 0) { | ||
fprintf(stderr, "Lexer failed to get next Token!\n"); | ||
break; | ||
} | ||
printf("%s\n", token_to_string(token.type)); | ||
} while (token.type != EndOfFile); | ||
|
||
if (lexer_destroy(&lexer) != 0) { | ||
fprintf(stderr, "Failed to destroy lexer!\n"); | ||
exit(EXIT_FAILURE); | ||
} | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
free(line); | ||
fclose(stream); | ||
exit(EXIT_SUCCESS); | ||
|
||
return 0; | ||
} |