From 505a6a0122e7d78e29042b2b481b10dba494884c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20M=C3=B6llmann?= Date: Sat, 3 Jun 2017 21:42:39 -0300 Subject: [PATCH] add type annotation to literals and declarations. --- README.md | 15 ++++++- astree.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++- astree.h | 4 ++ main.c | 4 +- symbol_table.c | 33 ++++++++++++++- symbol_table.h | 38 +++++++++++++++++ 6 files changed, 199 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ae25fcf..c74ce4f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,18 @@ # simple_compiler -## Hashmap structure: +## Files and their constants + +### astree.h + +- `AST_*`: the AST node type. + +### symtab.h + +- `SYMBOL_*`: the symbol type in the symbol table (identifier, integer literal, real literal, character literal, string literal) +- `ID_*`: the "nature" of the identifier (variable, vector, function) +- `TP_*`: the data type of the identifier (byte, short, long, float, double) + +## Hashmap structure `struct hashmap` - `size`: how many buckets @@ -9,7 +21,6 @@ - `value_size`: size of the value stored in items - `buckets`: array of pointers to items - ``` +---------------+ | buckets | +--------+ +--------+ diff --git a/astree.c b/astree.c index e1049ee..cbdd788 100644 --- a/astree.c +++ b/astree.c @@ -1,4 +1,5 @@ #include "astree.h" +#include "symbol_table.h" #include #include @@ -96,7 +97,8 @@ void ast_fprint(FILE *stream, int level, struct astree *tree) { fprintf(stream, "%s", type_to_string[tree->type]); if (tree->type == AST_SYM) { - fprintf(stream, ": %s", tree->symbol->key); + fprintf(stream, ": %s, ", tree->symbol->key); + symtab_fprint_item(stream, tree->symbol->value); } fprintf(stream, "\n"); @@ -346,3 +348,110 @@ void print_identation(FILE* stream, int level){ for(i = 0; i < level; i++) fprintf(stream, " "); } + +/* Annotate symbol table when tree is a declaration (i.e., of type AST_VAR, + * AST_VEC or AST_FHEADER). */ +static void annotate_identifier(struct astree *tree) { + struct astree *id_node; + struct astree *type_node; + switch (tree->type) { + case AST_VAR: + id_node = tree->children[0]; + type_node = tree->children[1]; + break; + case AST_VEC: + id_node = tree->children[0]; + type_node = tree->children[1]; + ast_annotate(tree->children[2]); + ast_annotate(tree->children[3]); + break; + case AST_FHEADER: + id_node = tree->children[1]; + type_node = tree->children[0]; + + /* TODO: Since here tree->children[2]->children[2] is a list of parameters, + * we might have to create a new scope for those variables declared here. + * Right now, this accomplishes nothing, since this node is of type + * AST_IDENTIFIER. Test program with the argument of `tests/fun_dec.txt` + * to see. */ + ast_annotate(tree->children[2]); + break; + } + + struct symtab_item *item = id_node->symbol->value; + switch (tree->type) { + case AST_VAR: + item->id_type = ID_VAR; + break; + case AST_VEC: + item->id_type = ID_VEC; + break; + case AST_FHEADER: + item->id_type = ID_FUN; + break; + } + + switch (type_node->type) { + case AST_KW_BYTE: + item->data_type = TP_BYTE; + break; + case AST_KW_SHORT: + item->data_type = TP_SHORT; + break; + case AST_KW_LONG: + item->data_type = TP_LONG; + break; + case AST_KW_FLOAT: + item->data_type = TP_FLOAT; + break; + case AST_KW_DOUBLE: + item->data_type = TP_DOUBLE; + break; + } +} + +/* Annotate symbol table when tree is a literal or an id (i.e., of + * type AST_SYM) */ +static void annotate_symbol(struct astree *tree) { + struct symtab_item *item = tree->symbol->value; + switch (item->code) { + case SYMBOL_LIT_INT: + item->data_type = TP_LONG; + break; + case SYMBOL_LIT_REAL: + item->data_type = TP_DOUBLE; + break; + case SYMBOL_LIT_CHAR: + item->data_type = TP_BYTE; + break; + case SYMBOL_LIT_STRING: + break; + case SYMBOL_IDENTIFIER: + break; + } +} + +void ast_annotate(struct astree* tree) { + if (tree == NULL) { + return; + } + + /* Annotating children before the root seems more intuitive, but I don't think + * it makes any difference. */ + for (int i = 0; i < AST_MAXCHILDREN; i++) { + ast_annotate(tree->children[i]); + } + + int node_type = tree->type; + int sym = node_type == AST_SYM; + int decl = (node_type == AST_VAR) + || (node_type == AST_VEC) + || (node_type == AST_FHEADER); + + if (sym) { + annotate_symbol(tree); + } else if (decl) { + annotate_identifier(tree); + } + +} diff --git a/astree.h b/astree.h index eebecfe..a88e7d0 100644 --- a/astree.h +++ b/astree.h @@ -75,4 +75,8 @@ void ast_make_source(FILE* stream, struct astree* tree, int level); /* Internal function to help make source*/ void print_identation(FILE* stream, int level); + +/* Traverses the tree annotating the type of the symbols in the symbol table. + * Annotates `data_type` for all symbols and `id_type` for identifiers. */ +void ast_annotate(struct astree* tree); #endif /* ifndef AST_H */ diff --git a/main.c b/main.c index 8f4f5aa..b81b04b 100644 --- a/main.c +++ b/main.c @@ -37,10 +37,12 @@ void main(int argc, char* argv[]){ } yyparse(); printf("SUCESS:\n\t Program was accepted.\n"); + ast_annotate(program); symtab_print(); - ast_fprint(stdout, 0, program); printf("SOURCE:\n"); ast_make_source(out, program, 0); + ast_fprint(stdout, 0, program); + symtab_destroy(); ast_terminate(program); exit(0); diff --git a/symbol_table.c b/symbol_table.c index 1193843..5c321c1 100644 --- a/symbol_table.c +++ b/symbol_table.c @@ -6,11 +6,17 @@ struct hashmap hash; void symtab_init(void){ - hm_initialize(8, 0.5, sizeof(int), &hash); + hm_initialize(8, 0.5, sizeof(struct symtab_item), &hash); } struct hm_item *symtab_insert(char* symbol, int code){ - hm_put(&hash, symbol, &code); + struct symtab_item item; + item.code = code; + item.data_type = 0; + item.id_type = 0; + item.decl = NULL; + + hm_put(&hash, symbol, &item); return hm_getref(&hash, symbol); } @@ -19,6 +25,29 @@ void symtab_print(void){ hm_fprint(stdout, &hash, 0); } +static const char *data_type_to_string[] = { + "", + "byte", + "short", + "long", + "float", + "double" +}; + +static const char *id_type_to_string[] = { + "", + "var", + "vec", + "fun" +}; + + +void symtab_fprint_item(FILE *stream, struct symtab_item *item) { + fprintf(stream, "data_t: %s, id_t: %s", + data_type_to_string[item->data_type], + id_type_to_string[item->id_type]); +} + void symtab_destroy(void){ hm_terminate(&hash); } diff --git a/symbol_table.h b/symbol_table.h index 34b0f58..8d8dde2 100644 --- a/symbol_table.h +++ b/symbol_table.h @@ -1,15 +1,53 @@ #ifndef SYMTAB_H #define SYMTAB_H +#include "hashmap.h" /* hm_item */ + #define SYMBOL_LIT_INT 1 #define SYMBOL_LIT_REAL 2 #define SYMBOL_LIT_CHAR 3 #define SYMBOL_LIT_STRING 4 #define SYMBOL_IDENTIFIER 5 +#define ID_VAR 1; +#define ID_VEC 2; +#define ID_FUN 3; + +#define TP_BYTE 1; +#define TP_SHORT 2; +#define TP_LONG 3; +#define TP_FLOAT 4; +#define TP_DOUBLE 5; + +/** + * Symbols are: + * - identifiers + * - integer literals + * - real literals + * - character literals + * - string literals + * + * The fields in a symbol table item are: + * - code: the type of symbol. It's filled for all symbols. + * - data_type: the type of data attached to that symbol. It's filled for all + * symbols. + * - id_type: the "nature" of the identifier: varible, vector or function. + * It's only *not* zero when code == SYMBOL_IDENTIFIER. + * - decl: pointer to the symbol declaration in the astree. It's only *not* + * NULL when code == SYMBOL_IDENTIFIER. + */ + +struct symtab_item { + int code; + int data_type; + int id_type; + struct hm_item *decl; +}; + void symtab_init(void); struct hm_item *symtab_insert(char* symbol, int code); void symtab_print(void); +void symtab_fprint_item(FILE *stream, struct symtab_item *item); void symtab_destroy(void); #endif