Skip to content

Commit

Permalink
Basic lexer in C
Browse files Browse the repository at this point in the history
  • Loading branch information
selavy committed Dec 15, 2016
1 parent 1c008f1 commit 4d1e224
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CC=gcc
RELEASE=-O3 -fstrict-aliasing -flto
DEBUG=-g -O0
CFLAGS=-Wall -Werror -pedantic $(DEBUG) -std=c11
OBJS = main.o
OBJS = lexer.o main.o
TARGET=interpreter

$(TARGET): $(OBJS)
Expand Down
124 changes: 124 additions & 0 deletions lexer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#include "lexer.h"
#include <ctype.h>
#include <stdio.h>
#include <string.h>

const char *token_to_string(enum TokenType token) {
switch (token) {
#define F(T) case T: return #T;
FOREACH_TOKEN(F)
#undef F
default: return "unknown";
}
}

int lexer_create(struct Lexer *lex, const char *beg, const char *end) {
lex->cur = beg;
lex->end = end;
return 0;
}

int lexer_destroy(struct Lexer *lex) {
return 0;
}

int is_letter(char c) {
return isalpha(c) || c == '_';
}

enum TokenType lookup_ident(const char *beg, const char *end) {
int len = end - beg;
if (beg >= end) {
return Illegal;
}
//printf("Checking ident: '%.*s'\n", len, beg);

// REVISIT(plesslie): would ideally use a perfect hash generator like gperf here
if (strncmp(beg, "fn", len) == 0) {
return Function;
} else if (strncmp(beg, "let", len) == 0) {
return Let;
} else if (strncmp(beg, "true", len) == 0) {
return True;
} else if (strncmp(beg, "false", len) == 0) {
return False;
} else if (strncmp(beg, "if", len) == 0) {
return If;
} else if (strncmp(beg, "else", len) == 0) {
return Else;
} else if (strncmp(beg, "return", len) == 0) {
return Return;
} else {
return Ident;
}
}

int lexer_next_token(struct Lexer *lex, struct Token *token) {
char c;

// skip whitespace
while (lex->cur < lex->end && isspace(*lex->cur)) {
++lex->cur;
}

if (lex->cur >= lex->end) {
token->type = EndOfFile;
return 0;
}

c = *lex->cur++;
if (c == '=') {
if (lex->cur < lex->end && *lex->cur == '=') {
++lex->cur;
token->type = Equal;
} else {
token->type = Assign;
}
} else if (c == '!') {
if (lex->cur < lex->end && *lex->cur == '=') {
++lex->cur;
token->type = NotEqual;
} else {
token->type = Bang;
}
} else if (isdigit(c)) { // should this be checking for '.' as well?
token->type = Integer;
// read_number()
token->beg = lex->cur - 1;
while (lex->cur < lex->end && isdigit(*lex->cur)) {
++lex->cur;
}
token->end = lex->cur;
} else if (is_letter(c)) {
// read_ident()
token->beg = lex->cur - 1;
while (lex->cur < lex->end && is_letter(*lex->cur)) {
++lex->cur;
}
token->end = lex->cur;
token->type = lookup_ident(token->beg, token->end);
} else {
switch (c) {
case '+': token->type = Plus; break;
case '-': token->type = Minus; break;
case '/': token->type = Slash; break;
case '*': token->type = Asterisk; break;
case '<': token->type = LowerThan; break;
case '>': token->type = GreaterThan; break;
case ';': token->type = Semicolon; break;
case ',': token->type = Comma; break;
case '{': token->type = LeftBrace; break;
case '}': token->type = RightBrace; break;
case '(': token->type = LeftParenthesis; break;
case ')': token->type = RightParenthesis; break;
default:
// case '=' or '=='
// case '!' or '!='
// is_letter
// is_numeric
return 1;
}
}

return 0;
}
56 changes: 56 additions & 0 deletions lexer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef LEXER__H_
#define LEXER__H_


#define FOREACH_TOKEN(F) \
F(Illegal) \
F(EndOfFile) \
F(Ident) \
F(Integer) \
F(Assign) \
F(Plus) \
F(Minus) \
F(Bang) \
F(Asterisk) \
F(Slash) \
F(LowerThan) \
F(GreaterThan) \
F(Equal) \
F(NotEqual) \
F(Comma) \
F(Semicolon) \
F(LeftParenthesis) \
F(RightParenthesis) \
F(LeftBrace) \
F(RightBrace) \
F(Function) \
F(Let) \
F(True) \
F(False) \
F(If) \
F(Else) \
F(Return) \

enum TokenType {
#define F(token) token,
FOREACH_TOKEN(F)
#undef F
};

extern const char *token_to_string(enum TokenType token);

struct Token {
enum TokenType type;
const char *beg;
const char *end;
};

struct Lexer {
const char *cur;
const char *end;
};
int lexer_create(struct Lexer *lex, const char *beg, const char *end);
int lexer_destroy(struct Lexer *lex);
int lexer_next_token(struct Lexer *lex, struct Token *token);

#endif // LEXER__H_
51 changes: 51 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,56 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "lexer.h"

int main(int argc, char **argv) {
FILE *stream;
char *line = NULL;
size_t len = 0;
ssize_t read;
struct Lexer lexer;
struct Token token;

stream = fdopen(STDIN_FILENO, "rb");
if (!stream) {
exit(EXIT_FAILURE);
}

setbuf(stdout, 0);
while(1) {
printf(">> ");
read = getline(&line, &len, stream);
if (read > 0) {
if (lexer_create(&lexer, line, line+read) != 0) {
fprintf(stderr, "Failed to create lexer!\n");
exit(EXIT_FAILURE);
}

// printf("Retrieved line of length %zu :\n", read);
// printf("%s", line);

do {
if (lexer_next_token(&lexer, &token) != 0) {
fprintf(stderr, "Lexer failed to get next Token!\n");
break;
}
printf("%s\n", token_to_string(token.type));
} while (token.type != EndOfFile);

if (lexer_destroy(&lexer) != 0) {
fprintf(stderr, "Failed to destroy lexer!\n");
exit(EXIT_FAILURE);
}
} else {
break;
}
}

free(line);
fclose(stream);
exit(EXIT_SUCCESS);

return 0;
}

0 comments on commit 4d1e224

Please sign in to comment.