From d2f815244e51ab08d3a54a21fdd69baae6197f17 Mon Sep 17 00:00:00 2001 From: halosghost Date: Sun, 10 May 2020 11:15:06 -0500 Subject: [PATCH] Update .gitattributes to correctly handle line-endings - should correctly default all line-endings to platform-native - override default to require Makefile to have eol=lf --- .gitattributes | 4 +- Makefile | 142 +- README.md | 1990 +++++----- examples/line_reader.c | 68 +- examples/prelude.lspy | 478 +-- mpc.c | 8136 ++++++++++++++++++++-------------------- mpc.h | 780 ++-- tests/combinators.c | 178 +- tests/core.c | 468 +-- tests/grammar.c | 826 ++-- tests/maths.grammar | 12 +- tests/regex.c | 362 +- tests/test.c | 32 +- 13 files changed, 6739 insertions(+), 6737 deletions(-) diff --git a/.gitattributes b/.gitattributes index 77879c8..d1e7236 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ -"* text=auto" +* text=auto + +Makefile text eol=lf diff --git a/Makefile b/Makefile index 5583556..e12f3f9 100644 --- a/Makefile +++ b/Makefile @@ -1,71 +1,71 @@ -PROJ = mpc -CC ?= gcc -STD ?= -ansi -DIST = build -MKDIR ?= mkdir -p -PREFIX ?= /usr/local -CFLAGS ?= $(STD) -pedantic -O3 -g -Wall -Werror -Wextra -Wformat=2 -Wshadow \ - -Wno-long-long -Wno-overlength-strings -Wno-format-nonliteral -Wcast-align \ - -Wwrite-strings -Wstrict-prototypes -Wold-style-definition -Wredundant-decls \ - -Wnested-externs -Wmissing-include-dirs -Wswitch-default - -TESTS = $(wildcard tests/*.c) -EXAMPLES = $(wildcard examples/*.c) -EXAMPLESEXE = $(EXAMPLES:.c=) - -.PHONY: all check clean libs $(DIST)/$(PROJ).pc - -all: $(EXAMPLESEXE) check - -$(DIST): - $(MKDIR) $(DIST)/examples - -check: $(DIST)/test-file $(DIST)/test-static $(DIST)/test-dynamic - ./$(DIST)/test-file - ./$(DIST)/test-static - LD_LIBRARY_PATH=$(DIST) ./$(DIST)/test-dynamic - -$(DIST)/test-file: $(TESTS) $(PROJ).c $(PROJ).h tests/ptest.h - $(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) $(PROJ).c -lm -o $(DIST)/test-file - -$(DIST)/test-dynamic: $(TESTS) $(DIST)/lib$(PROJ).so $(PROJ).h tests/ptest.h - $(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) -lm -L$(DIST) -l$(PROJ) -o $(DIST)/test-dynamic - -$(DIST)/test-static: $(TESTS) $(DIST)/lib$(PROJ).a $(PROJ).h tests/ptest.h - $(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) -lm -L$(DIST) -l$(PROJ) -static -o $(DIST)/test-static - -examples/%: $(DIST) examples/%.c $(PROJ).c $(PROJ).h - $(CC) $(CFLAGS) $(filter-out $(DIST) $(PROJ).h, $^) -lm -o $(DIST)/$@ - -$(DIST)/lib$(PROJ).so: $(PROJ).c $(PROJ).h -ifneq ($(OS),Windows_NT) - $(CC) $(CFLAGS) -fPIC -shared $(PROJ).c -o $(DIST)/lib$(PROJ).so -else - $(CC) $(CFLAGS) -shared $(PROJ).c -o $(DIST)/lib$(PROJ).so -endif - -$(DIST)/lib$(PROJ).a: $(PROJ).c $(PROJ).h - $(CC) $(CFLAGS) -c $(PROJ).c -o $(DIST)/$(PROJ).o - $(AR) rcs $(DIST)/lib$(PROJ).a $(DIST)/$(PROJ).o - -libs: $(DIST)/lib$(PROJ).so $(DIST)/lib$(PROJ).a - -$(DIST)/$(PROJ).pc: $(DIST) $(PROJ).pc - cp $(PROJ).pc $(DIST)/$(PROJ).pc - sed -i '1i\prefix=$(PREFIX)/' $(DIST)/$(PROJ).pc - -clean: - rm -rf -- $(DIST) - -install: all - install -d -m644 $(DESTDIR)$(PREFIX)/{include,lib/pkgconfig,share/$(PROJ)} - install -m755 -t $(DESTDIR)$(PREFIX)/lib $(DIST)/lib* - install -m644 -t $(DESTDIR)$(PREFIX)/share/$(PROJ) $(PROJ).{c,h} - install -m644 $(PROJ).h $(DESTDIR)$(PREFIX)/include/$(PROJ).h - install -m644 $(DIST)/$(PROJ).pc $(DESTDIR)$(PREFIX)/lib/pkgconfig/$(PROJ).pc - -uninstall: - rm -rf -- \ - $(DESTDIR)$(PREFIX)/include/$(PROJ).h \ - $(DESTDIR)$(PREFIX)/share/$(PROJ)/$(PROJ).{c,h} \ - $(DESTDIR)$(PREFIX)/lib/lib$(PROJ).{so,a} +PROJ = mpc +CC ?= gcc +STD ?= -ansi +DIST = build +MKDIR ?= mkdir -p +PREFIX ?= /usr/local +CFLAGS ?= $(STD) -pedantic -O3 -g -Wall -Werror -Wextra -Wformat=2 -Wshadow \ + -Wno-long-long -Wno-overlength-strings -Wno-format-nonliteral -Wcast-align \ + -Wwrite-strings -Wstrict-prototypes -Wold-style-definition -Wredundant-decls \ + -Wnested-externs -Wmissing-include-dirs -Wswitch-default + +TESTS = $(wildcard tests/*.c) +EXAMPLES = $(wildcard examples/*.c) +EXAMPLESEXE = $(EXAMPLES:.c=) + +.PHONY: all check clean libs $(DIST)/$(PROJ).pc + +all: $(EXAMPLESEXE) check + +$(DIST): + $(MKDIR) $(DIST)/examples + +check: $(DIST)/test-file $(DIST)/test-static $(DIST)/test-dynamic + ./$(DIST)/test-file + ./$(DIST)/test-static + LD_LIBRARY_PATH=$(DIST) ./$(DIST)/test-dynamic + +$(DIST)/test-file: $(TESTS) $(PROJ).c $(PROJ).h tests/ptest.h + $(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) $(PROJ).c -lm -o $(DIST)/test-file + +$(DIST)/test-dynamic: $(TESTS) $(DIST)/lib$(PROJ).so $(PROJ).h tests/ptest.h + $(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) -lm -L$(DIST) -l$(PROJ) -o $(DIST)/test-dynamic + +$(DIST)/test-static: $(TESTS) $(DIST)/lib$(PROJ).a $(PROJ).h tests/ptest.h + $(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) -lm -L$(DIST) -l$(PROJ) -static -o $(DIST)/test-static + +examples/%: $(DIST) examples/%.c $(PROJ).c $(PROJ).h + $(CC) $(CFLAGS) $(filter-out $(DIST) $(PROJ).h, $^) -lm -o $(DIST)/$@ + +$(DIST)/lib$(PROJ).so: $(PROJ).c $(PROJ).h +ifneq ($(OS),Windows_NT) + $(CC) $(CFLAGS) -fPIC -shared $(PROJ).c -o $(DIST)/lib$(PROJ).so +else + $(CC) $(CFLAGS) -shared $(PROJ).c -o $(DIST)/lib$(PROJ).so +endif + +$(DIST)/lib$(PROJ).a: $(PROJ).c $(PROJ).h + $(CC) $(CFLAGS) -c $(PROJ).c -o $(DIST)/$(PROJ).o + $(AR) rcs $(DIST)/lib$(PROJ).a $(DIST)/$(PROJ).o + +libs: $(DIST)/lib$(PROJ).so $(DIST)/lib$(PROJ).a + +$(DIST)/$(PROJ).pc: $(DIST) $(PROJ).pc + cp $(PROJ).pc $(DIST)/$(PROJ).pc + sed -i '1i\prefix=$(PREFIX)/' $(DIST)/$(PROJ).pc + +clean: + rm -rf -- $(DIST) + +install: all + install -d -m644 $(DESTDIR)$(PREFIX)/{include,lib/pkgconfig,share/$(PROJ)} + install -m755 -t $(DESTDIR)$(PREFIX)/lib $(DIST)/lib* + install -m644 -t $(DESTDIR)$(PREFIX)/share/$(PROJ) $(PROJ).{c,h} + install -m644 $(PROJ).h $(DESTDIR)$(PREFIX)/include/$(PROJ).h + install -m644 $(DIST)/$(PROJ).pc $(DESTDIR)$(PREFIX)/lib/pkgconfig/$(PROJ).pc + +uninstall: + rm -rf -- \ + $(DESTDIR)$(PREFIX)/include/$(PROJ).h \ + $(DESTDIR)$(PREFIX)/share/$(PROJ)/$(PROJ).{c,h} \ + $(DESTDIR)$(PREFIX)/lib/lib$(PROJ).{so,a} diff --git a/README.md b/README.md index 98ea680..b08933b 100644 --- a/README.md +++ b/README.md @@ -1,995 +1,995 @@ -Micro Parser Combinators -======================== - -Version 0.9.0 - - -About ------ - -_mpc_ is a lightweight and powerful Parser Combinator library for C. - -Using _mpc_ might be of interest to you if you are... - -* Building a new programming language -* Building a new data format -* Parsing an existing programming language -* Parsing an existing data format -* Embedding a Domain Specific Language -* Implementing [Greenspun's Tenth Rule](http://en.wikipedia.org/wiki/Greenspun%27s_tenth_rule) - - -Features --------- - -* Type-Generic -* Predictive, Recursive Descent -* Easy to Integrate (One Source File in ANSI C) -* Automatic Error Message Generation -* Regular Expression Parser Generator -* Language/Grammar Parser Generator - - -Alternatives ------------- - -The current main alternative for a C based parser combinator library is a branch of [Cesium3](https://github.com/wbhart/Cesium3/tree/combinators). - -_mpc_ provides a number of features that this project does not offer, and also overcomes a number of potential downsides: - -* _mpc_ Works for Generic Types -* _mpc_ Doesn't rely on Boehm-Demers-Weiser Garbage Collection -* _mpc_ Doesn't use `setjmp` and `longjmp` for errors -* _mpc_ Doesn't pollute the namespace - - -Quickstart -========== - -Here is how one would use _mpc_ to create a parser for a basic mathematical expression language. - -```c -mpc_parser_t *Expr = mpc_new("expression"); -mpc_parser_t *Prod = mpc_new("product"); -mpc_parser_t *Value = mpc_new("value"); -mpc_parser_t *Maths = mpc_new("maths"); - -mpca_lang(MPCA_LANG_DEFAULT, - " expression : (('+' | '-') )*; " - " product : (('*' | '/') )*; " - " value : /[0-9]+/ | '(' ')'; " - " maths : /^/ /$/; ", - Expr, Prod, Value, Maths, NULL); - -mpc_result_t r; - -if (mpc_parse("input", input, Maths, &r)) { - mpc_ast_print(r.output); - mpc_ast_delete(r.output); -} else { - mpc_err_print(r.error); - mpc_err_delete(r.error); -} - -mpc_cleanup(4, Expr, Prod, Value, Maths); -``` - -If you were to set `input` to the string `(4 * 2 * 11 + 2) - 5`, the printed output would look like this. - -``` -> - regex - expression|> - value|> - char:1:1 '(' - expression|> - product|> - value|regex:1:2 '4' - char:1:4 '*' - value|regex:1:6 '2' - char:1:8 '*' - value|regex:1:10 '11' - char:1:13 '+' - product|value|regex:1:15 '2' - char:1:16 ')' - char:1:18 '-' - product|value|regex:1:20 '5' - regex -``` - -Getting Started -=============== - -Introduction ------------- - -Parser Combinators are structures that encode how to parse particular languages. They can be combined using intuitive operators to create new parsers of increasing complexity. Using these operators detailed grammars and languages can be parsed and processed in a quick, efficient, and easy way. - -The trick behind Parser Combinators is the observation that by structuring the library in a particular way, one can make building parser combinators look like writing a grammar itself. Therefore instead of describing _how to parse a language_, a user must only specify _the language itself_, and the library will work out how to parse it ... as if by magic! - -_mpc_ can be used in this mode, or, as shown in the above example, you can specify the grammar directly as a string or in a file. - -Basic Parsers -------------- - -### String Parsers - -All the following functions construct new basic parsers of the type `mpc_parser_t *`. All of those parsers return a newly allocated `char *` with the character(s) they manage to match. If unsuccessful they will return an error. They have the following functionality. - -* * * - -```c -mpc_parser_t *mpc_any(void); -``` - -Matches any individual character - -* * * - -```c -mpc_parser_t *mpc_char(char c); -``` - -Matches a single given character `c` - -* * * - -```c -mpc_parser_t *mpc_range(char s, char e); -``` - -Matches any single given character in the range `s` to `e` (inclusive) - -* * * - -```c -mpc_parser_t *mpc_oneof(const char *s); -``` - -Matches any single given character in the string `s` - -* * * - -```c -mpc_parser_t *mpc_noneof(const char *s); -``` - -Matches any single given character not in the string `s` - -* * * - -```c -mpc_parser_t *mpc_satisfy(int(*f)(char)); -``` - -Matches any single given character satisfying function `f` - -* * * - -```c -mpc_parser_t *mpc_string(const char *s); -``` - -Matches exactly the string `s` - - -### Other Parsers - -Several other functions exist that construct parsers with some other special functionality. - -* * * - -```c -mpc_parser_t *mpc_pass(void); -``` - -Consumes no input, always successful, returns `NULL` - -* * * - -```c -mpc_parser_t *mpc_fail(const char *m); -mpc_parser_t *mpc_failf(const char *fmt, ...); -``` - -Consumes no input, always fails with message `m` or formatted string `fmt`. - -* * * - -```c -mpc_parser_t *mpc_lift(mpc_ctor_t f); -``` - -Consumes no input, always successful, returns the result of function `f` - -* * * - -```c -mpc_parser_t *mpc_lift_val(mpc_val_t *x); -``` - -Consumes no input, always successful, returns `x` - -* * * - -```c -mpc_parser_t *mpc_state(void); -``` - -Consumes no input, always successful, returns a copy of the parser state as a `mpc_state_t *`. This state is newly allocated and so needs to be released with `free` when finished with. - -* * * - -```c -mpc_parser_t *mpc_anchor(int(*f)(char,char)); -``` - -Consumes no input. Successful when function `f` returns true. Always returns `NULL`. - -Function `f` is a _anchor_ function. It takes as input the last character parsed, and the next character in the input, and returns success or failure. This function can be set by the user to ensure some condition is met. For example to test that the input is at a boundary between words and non-words. - -At the start of the input the first argument is set to `'\0'`. At the end of the input the second argument is set to `'\0'`. - - - -Parsing -------- - -Once you've build a parser, you can run it on some input using one of the following functions. These functions return `1` on success and `0` on failure. They output either the result, or an error to a `mpc_result_t` variable. This type is defined as follows. - -```c -typedef union { - mpc_err_t *error; - mpc_val_t *output; -} mpc_result_t; -``` - -where `mpc_val_t *` is synonymous with `void *` and simply represents some pointer to data - the exact type of which is dependant on the parser. - - -* * * - -```c -int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); -``` - -Run a parser on some string. - -* * * - -```c -int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); -``` - -Run a parser on some file. - -* * * - -```c -int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); -``` - -Run a parser on some pipe (such as `stdin`). - -* * * - -```c -int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); -``` - -Run a parser on the contents of some file. - - -Combinators ------------ - -Combinators are functions that take one or more parsers and return a new parser of some given functionality. - -These combinators work independently of exactly what data type the parser(s) supplied as input return. In languages such as Haskell ensuring you don't input one type of data into a parser requiring a different type is done by the compiler. But in C we don't have that luxury. So it is at the discretion of the programmer to ensure that he or she deals correctly with the outputs of different parser types. - -A second annoyance in C is that of manual memory management. Some parsers might get half-way and then fail. This means they need to clean up any partial result that has been collected in the parse. In Haskell this is handled by the Garbage Collector, but in C these combinators will need to take _destructor_ functions as input, which say how clean up any partial data that has been collected. - -Here are the main combinators and how to use then. - -* * * - -```c -mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); -mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); -``` - -Returns a parser that runs `a`, and on success returns the result of `a`, while on failure reports that `e` was expected. - -* * * - -```c -mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); -mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); -``` - -Returns a parser that applies function `f` (optionality taking extra input `x`) to the result of parser `a`. - -* * * - -```c -mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); -mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); -mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); -mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); -``` - -Returns a parser that applies function `f` (optionally taking extra input `x`) to the result of parser `a`. If `f` returns non-zero, then the parser succeeds and returns the value of `a` (possibly modified by `f`). If `f` returns zero, then the parser fails with message `e`, and the result of `a` is destroyed with the destructor `da`. - -* * * - -```c -mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); -mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); -``` - -Returns a parser with the following behaviour. If parser `a` succeeds, then it fails and consumes no input. If parser `a` fails, then it succeeds, consumes no input and returns `NULL` (or the result of lift function `lf`). Destructor `da` is used to destroy the result of `a` on success. - -* * * - -```c -mpc_parser_t *mpc_maybe(mpc_parser_t *a); -mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); -``` - -Returns a parser that runs `a`. If `a` is successful then it returns the result of `a`. If `a` is unsuccessful then it succeeds, but returns `NULL` (or the result of `lf`). - -* * * - -```c -mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); -``` - -Runs `a` zero or more times until it fails. Results are combined using fold function `f`. See the _Function Types_ section for more details. - -* * * - -```c -mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); -``` - -Runs `a` one or more times until it fails. Results are combined with fold function `f`. - -* * * - -```c -mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); -``` - -Runs `a` exactly `n` times. If this fails, any partial results are destructed with `da`. If successful results of `a` are combined using fold function `f`. - -* * * - -```c -mpc_parser_t *mpc_or(int n, ...); -``` - -Attempts to run `n` parsers in sequence, returning the first one that succeeds. If all fail, returns an error. - -* * * - -```c -mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); -``` - -Attempts to run `n` parsers in sequence, returning the fold of the results using fold function `f`. First parsers must be specified, followed by destructors for each parser, excluding the final parser. These are used in case of partial success. For example: `mpc_and(3, mpcf_strfold, mpc_char('a'), mpc_char('b'), mpc_char('c'), free, free);` would attempt to match `'a'` followed by `'b'` followed by `'c'`, and if successful would concatenate them using `mpcf_strfold`. Otherwise would use `free` on the partial results. - -* * * - -```c -mpc_parser_t *mpc_predictive(mpc_parser_t *a); -``` - -Returns a parser that runs `a` with backtracking disabled. This means if `a` consumes more than one character, it will not be reverted, even on failure. Turning backtracking off has good performance benefits for grammars which are `LL(1)`. These are grammars where the first character completely determines the parse result - such as the decision of parsing either a C identifier, number, or string literal. This option should not be used for non `LL(1)` grammars or it will produce incorrect results or crash the parser. - -Another way to think of `mpc_predictive` is that it can be applied to a parser (for a performance improvement) if either successfully parsing the first character will result in a completely successful parse, or all of the referenced sub-parsers are also `LL(1)`. - - -Function Types --------------- - -The combinator functions take a number of special function types as function pointers. Here is a short explanation of those types are how they are expected to behave. It is important that these behave correctly otherwise it is easy to introduce memory leaks or crashes into the system. - -* * * - -```c -typedef void(*mpc_dtor_t)(mpc_val_t*); -``` - -Given some pointer to a data value it will ensure the memory it points to is freed correctly. - -* * * - -```c -typedef mpc_val_t*(*mpc_ctor_t)(void); -``` - -Returns some data value when called. It can be used to create _empty_ versions of data types when certain combinators have no known default value to return. For example it may be used to return a newly allocated empty string. - -* * * - -```c -typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); -``` - -This takes in some pointer to data and outputs some new or modified pointer to data, ensuring to free the input data if it is no longer used. The `apply_to` variation takes in an extra pointer to some data such as global state. - -* * * - -```c -typedef int(*mpc_check_t)(mpc_val_t**); -typedef int(*mpc_check_with_t)(mpc_val_t**,void*); -``` - -This takes in some pointer to data and outputs 0 if parsing should stop with an error. Additionally, this may change or free the input data. The `check_with` variation takes in an extra pointer to some data such as global state. - -* * * - -```c -typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); -``` - -This takes a list of pointers to data values and must return some combined or folded version of these data values. It must ensure to free any input data that is no longer used once the combination has taken place. - - -Case Study - Identifier -======================= - -Combinator Method ------------------ - -Using the above combinators we can create a parser that matches a C identifier. - -When using the combinators we need to supply a function that says how to combine two `char *`. - -For this we build a fold function that will concatenate zero or more strings together. For this sake of this tutorial we will write it by hand, but this (as well as many other useful fold functions), are actually included in _mpc_ under the `mpcf_*` namespace, such as `mpcf_strfold`. - -```c -mpc_val_t *strfold(int n, mpc_val_t **xs) { - char *x = calloc(1, 1); - int i; - for (i = 0; i < n; i++) { - x = realloc(x, strlen(x) + strlen(xs[i]) + 1); - strcat(x, xs[i]); - free(xs[i]); - } - return x; -} -``` - -We can use this to specify a C identifier, making use of some combinators to say how the basic parsers are combined. - -```c -mpc_parser_t *alpha = mpc_or(2, mpc_range('a', 'z'), mpc_range('A', 'Z')); -mpc_parser_t *digit = mpc_range('0', '9'); -mpc_parser_t *underscore = mpc_char('_'); - -mpc_parser_t *ident = mpc_and(2, strfold, - mpc_or(2, alpha, underscore), - mpc_many(strfold, mpc_or(3, alpha, digit, underscore)), - free); - -/* Do Some Parsing... */ - -mpc_delete(ident); -``` - -Notice that previous parsers are used as input to new parsers we construct from the combinators. Note that only the final parser `ident` must be deleted. When we input a parser into a combinator we should consider it to be part of the output of that combinator. - -Because of this we shouldn't create a parser and input it into multiple places, or it will be doubly freed. - - -Regex Method ------------- - -There is an easier way to do this than the above method. _mpc_ comes with a handy regex function for constructing parsers using regex syntax. We can specify an identifier using a regex pattern as shown below. - -```c -mpc_parser_t *ident = mpc_re("[a-zA-Z_][a-zA-Z_0-9]*"); - -/* Do Some Parsing... */ - -mpc_delete(ident); -``` - - -Library Method --------------- - -Although if we really wanted to create a parser for C identifiers, a function for creating this parser comes included in _mpc_ along with many other common parsers. - -```c -mpc_parser_t *ident = mpc_ident(); - -/* Do Some Parsing... */ - -mpc_delete(ident); -``` - -Parser References -================= - -Building parsers in the above way can have issues with self-reference or cyclic-reference. To overcome this we can separate the construction of parsers into two different steps. Construction and Definition. - -* * * - -```c -mpc_parser_t *mpc_new(const char *name); -``` - -This will construct a parser called `name` which can then be used as input to others, including itself, without fear of being deleted. Any parser created using `mpc_new` is said to be _retained_. This means it will behave differently to a normal parser when referenced. When deleting a parser that includes a _retained_ parser, the _retained_ parser will not be deleted along with it. To delete a retained parser `mpc_delete` must be used on it directly. - -A _retained_ parser can then be _defined_ using... - -* * * - -```c -mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); -``` - -This assigns the contents of parser `a` to `p`, and deletes `a`. With this technique parsers can now reference each other, as well as themselves, without trouble. - -* * * - -```c -mpc_parser_t *mpc_undefine(mpc_parser_t *p); -``` - -A final step is required. Parsers that reference each other must all be undefined before they are deleted. It is important to do any undefining before deletion. The reason for this is that to delete a parser it must look at each sub-parser that is used by it. If any of these have already been deleted a segfault is unavoidable - even if they were retained beforehand. - -* * * - -```c -void mpc_cleanup(int n, ...); -``` - -To ease the task of undefining and then deleting parsers `mpc_cleanup` can be used. It takes `n` parsers as input, and undefines them all, before deleting them all. - -* * * - -```c -mpc_parser_t *mpc_copy(mpc_parser_t *a); -``` - -This function makes a copy of a parser `a`. This can be useful when you want to -use a parser as input for some other parsers multiple times without retaining -it. - -* * * - -```c -mpc_parser_t *mpc_re(const char *re); -mpc_parser_t *mpc_re_mode(const char *re, int mode); -``` - -This function takes as input the regular expression `re` and builds a parser -for it. With the `mpc_re_mode` function optional mode flags can also be given. -Available flags are `MPC_RE_MULTILINE` / `MPC_RE_M` where the start of input -character `^` also matches the beginning of new lines and the end of input `$` -character also matches new lines, and `MPC_RE_DOTALL` / `MPC_RE_S` where the -any character token `.` also matches newlines (by default it doesn't). - - -Library Reference -================= - -Common Parsers --------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mpc_soiMatches only the start of input, returns NULL
mpc_eoiMatches only the end of input, returns NULL
mpc_boundaryMatches only the boundary between words, returns NULL
mpc_boundary_newlineMatches the start of a new line, returns NULL
mpc_whitespaceMatches any whitespace character " \f\n\r\t\v"
mpc_whitespacesMatches zero or more whitespace characters
mpc_blankMatches whitespaces and frees the result, returns NULL
mpc_newlineMatches '\n'
mpc_tabMatches '\t'
mpc_escapeMatches a backslash followed by any character
mpc_digitMatches any character in the range '0' - '9'
mpc_hexdigitMatches any character in the range '0 - '9' as well as 'A' - 'F' and 'a' - 'f'
mpc_octdigitMatches any character in the range '0' - '7'
mpc_digitsMatches one or more digit
mpc_hexdigitsMatches one or more hexdigit
mpc_octdigitsMatches one or more octdigit
mpc_lowerMatches any lower case character
mpc_upperMatches any upper case character
mpc_alphaMatches any alphabet character
mpc_underscoreMatches '_'
mpc_alphanumMatches any alphabet character, underscore or digit
mpc_intMatches digits and returns an int*
mpc_hexMatches hexdigits and returns an int*
mpc_octMatches octdigits and returns an int*
mpc_numberMatches mpc_int, mpc_hex or mpc_oct
mpc_realMatches some floating point number as a string
mpc_floatMatches some floating point number and returns a float*
mpc_char_litMatches some character literal surrounded by '
mpc_string_litMatches some string literal surrounded by "
mpc_regex_litMatches some regex literal surrounded by /
mpc_identMatches a C style identifier
- - -Useful Parsers --------------- - - - - - - - - - - - - - - - - - - - - - - - -
mpc_startswith(mpc_parser_t *a);Matches the start of input followed by a
mpc_endswith(mpc_parser_t *a, mpc_dtor_t da);Matches a followed by the end of input
mpc_whole(mpc_parser_t *a, mpc_dtor_t da);Matches the start of input, a, and the end of input
mpc_stripl(mpc_parser_t *a);Matches a first consuming any whitespace to the left
mpc_stripr(mpc_parser_t *a);Matches a then consumes any whitespace to the right
mpc_strip(mpc_parser_t *a);Matches a consuming any surrounding whitespace
mpc_tok(mpc_parser_t *a);Matches a and consumes any trailing whitespace
mpc_sym(const char *s);Matches string s and consumes any trailing whitespace
mpc_total(mpc_parser_t *a, mpc_dtor_t da);Matches the whitespace consumed a, enclosed in the start and end of input
mpc_between(mpc_parser_t *a, mpc_dtor_t ad,
const char *o, const char *c);
Matches a between strings o and c
mpc_parens(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "(" and ")"
mpc_braces(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "<" and ">"
mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "{" and "}"
mpc_squares(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "[" and "]"
mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad,
const char *o, const char *c);
Matches a between o and c, where o and c have their trailing whitespace striped.
mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "(" and ")"
mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "<" and ">"
mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "{" and "}"
mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "[" and "]"
- - -Apply Functions ---------------- - - - - - - - - - - - - - - - - - - - - - - -
void mpcf_dtor_null(mpc_val_t *x);Empty destructor. Does nothing
mpc_val_t *mpcf_ctor_null(void);Returns NULL
mpc_val_t *mpcf_ctor_str(void);Returns ""
mpc_val_t *mpcf_free(mpc_val_t *x);Frees x and returns NULL
mpc_val_t *mpcf_int(mpc_val_t *x);Converts a decimal string x to an int*
mpc_val_t *mpcf_hex(mpc_val_t *x);Converts a hex string x to an int*
mpc_val_t *mpcf_oct(mpc_val_t *x);Converts a oct string x to an int*
mpc_val_t *mpcf_float(mpc_val_t *x);Converts a string x to a float*
mpc_val_t *mpcf_escape(mpc_val_t *x);Converts a string x to an escaped version
mpc_val_t *mpcf_escape_regex(mpc_val_t *x);Converts a regex x to an escaped version
mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x);Converts a raw string x to an escaped version
mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x);Converts a raw character x to an escaped version
mpc_val_t *mpcf_unescape(mpc_val_t *x);Converts a string x to an unescaped version
mpc_val_t *mpcf_unescape_regex(mpc_val_t *x);Converts a regex x to an unescaped version
mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x);Converts a raw string x to an unescaped version
mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x);Converts a raw character x to an unescaped version
mpc_val_t *mpcf_strtriml(mpc_val_t *x);Trims whitespace from the left of string x
mpc_val_t *mpcf_strtrimr(mpc_val_t *x);Trims whitespace from the right of string x
mpc_val_t *mpcf_strtrim(mpc_val_t *x);Trims whitespace from either side of string x
- - -Fold Functions --------------- - - - - - - - - - - - - - - -
mpc_val_t *mpcf_null(int n, mpc_val_t** xs);Returns NULL
mpc_val_t *mpcf_fst(int n, mpc_val_t** xs);Returns first element of xs
mpc_val_t *mpcf_snd(int n, mpc_val_t** xs);Returns second element of xs
mpc_val_t *mpcf_trd(int n, mpc_val_t** xs);Returns third element of xs
mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs);Returns first element of xs and calls free on others
mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs);Returns second element of xs and calls free on others
mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs);Returns third element of xs and calls free on others
mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs);Calls free on all elements of xs and returns NULL
mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs);Concatenates all xs together as strings and returns result
- - -Case Study - Maths Language -=========================== - -Combinator Approach -------------------- - -Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own output types for parsers. For example we could design our own syntax tree type to use. We can also use this method to do some specific house-keeping or data processing in the parsing phase. - -As an example of this power, we can specify a simple maths grammar, that outputs `int *`, and computes the result of the expression as it goes along. - -We start with a fold function that will fold two `int *` into a new `int *` based on some `char *` operator. - -```c -mpc_val_t *fold_maths(int n, mpc_val_t **xs) { - - int **vs = (int**)xs; - - if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; } - if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; } - if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; } - if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; } - if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; } - - free(xs[1]); free(xs[2]); - - return xs[0]; -} -``` - -And then we use this to specify a basic grammar, which folds together any results. - -```c -mpc_parser_t *Expr = mpc_new("expr"); -mpc_parser_t *Factor = mpc_new("factor"); -mpc_parser_t *Term = mpc_new("term"); -mpc_parser_t *Maths = mpc_new("maths"); - -mpc_define(Expr, mpc_or(2, - mpc_and(3, fold_maths, - Factor, mpc_oneof("+-"), Factor, - free, free), - Factor -)); - -mpc_define(Factor, mpc_or(2, - mpc_and(3, fold_maths, - Term, mpc_oneof("*/"), Term, - free, free), - Term -)); - -mpc_define(Term, mpc_or(2, mpc_int(), mpc_parens(Expr, free))); -mpc_define(Maths, mpc_whole(Expr, free)); - -/* Do Some Parsing... */ - -mpc_delete(Maths); -``` - -If we supply this function with something like `(4*2)+5`, we can expect it to output `13`. - - -Language Approach ------------------ - -It is possible to avoid passing in and around all those function pointers, if you don't care what type is output by _mpc_. For this, a generic Abstract Syntax Tree type `mpc_ast_t` is included in _mpc_. The combinator functions which act on this don't need information on how to destruct or fold instances of the result as they know it will be a `mpc_ast_t`. So there are a number of combinator functions which work specifically (and only) on parsers that return this type. They reside under `mpca_*`. - -Doing things via this method means that all the data processing must take place after the parsing. In many instances this is not an issue, or even preferable. - -It also allows for one more trick. As all the fold and destructor functions are implicit, the user can simply specify the grammar of the language in some nice way and the system can try to build a parser for the AST type from this alone. For this there are a few functions supplied which take in a string, and output a parser. The format for these grammars is simple and familiar to those who have used parser generators before. It looks something like this. - -``` -number "number" : /[0-9]+/ ; -expression : (('+' | '-') )* ; -product : (('*' | '/') )* ; -value : | '(' ')' ; -maths : /^/ /$/ ; -``` - -The syntax for this is defined as follows. - - - - - - - - - - - -
"ab"The string ab is required.
'a'The character a is required.
'a' 'b'First 'a' is required, then 'b' is required..
'a' | 'b'Either 'a' is required, or 'b' is required.
'a'*Zero or more 'a' are required.
'a'+One or more 'a' are required.
'a'?Zero or one 'a' is required.
'a'{x}Exactly x (integer) copies of 'a' are required.
<abba>The rule called abba is required.
- -Rules are specified by rule name, optionally followed by an _expected_ string, followed by a colon `:`, followed by the definition, and ending in a semicolon `;`. Multiple rules can be specified. The _rule names_ must match the names given to any parsers created by `mpc_new`, otherwise the function will crash. - -The flags variable is a set of flags `MPCA_LANG_DEFAULT`, `MPCA_LANG_PREDICTIVE`, or `MPCA_LANG_WHITESPACE_SENSITIVE`. For specifying if the language is predictive or whitespace sensitive. - -Like with the regular expressions, this user input is parsed by existing parts of the _mpc_ library. It provides one of the more powerful features of the library. - -* * * - -```c -mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); -``` - -This takes in some single right hand side of a rule, as well as a list of any of the parsers referenced, and outputs a parser that does what is specified by the rule. The list of parsers referenced can be terminated with `NULL` to get an error instead of a crash when a parser required is not supplied. - -* * * - -```c -mpc_err_t *mpca_lang(int flags, const char *lang, ...); -``` - -This takes in a full language (zero or more rules) as well as any parsers referred to by either the right or left hand sides. Any parsers specified on the left hand side of any rule will be assigned a parser equivalent to what is specified on the right. On valid user input this returns `NULL`, while if there are any errors in the user input it will return an instance of `mpc_err_t` describing the issues. The list of parsers referenced can be terminated with `NULL` to get an error instead of a crash when a parser required is not supplied. - -* * * - -```c -mpc_err_t *mpca_lang_file(int flags, FILE* f, ...); -``` - -This reads in the contents of file `f` and inputs it into `mpca_lang`. - -* * * - -```c -mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); -``` - -This opens and reads in the contents of the file given by `filename` and passes it to `mpca_lang`. - -Case Study - Tokenizer -====================== - -Another common task we might be interested in doing is tokenizing some block of -text (splitting the text into individual elements) and performing some function -on each one of these elements as it is read. We can do this with `mpc` too. - -First, we can build a regular expression which parses an individual token. For -example if our tokens are identifiers, integers, commas, periods and colons we -could build something like this `mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")`. -Next we can strip any whitespace, and add a callback function using `mpc_apply` -which gets called every time this regex is parsed successfully -`mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)`. -Finally we can surround all of this in `mpc_many` to parse it zero or more -times. The final code might look something like this: - -```c -static mpc_val_t *print_token(mpc_val_t *x) { - printf("Token: '%s'\n", (char*)x); - return x; -} - -int main(int argc, char **argv) { - - const char *input = " hello 4352 , \n foo.bar \n\n test:ing "; - - mpc_parser_t* Tokens = mpc_many( - mpcf_all_free, - mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)); - - mpc_result_t r; - mpc_parse("input", input, Tokens, &r); - - mpc_delete(Tokens); - - return 0; -} -``` - -Running this program will produce an output something like this: - -``` -Token: 'hello' -Token: '4352' -Token: ',' -Token: 'foo' -Token: '.' -Token: 'bar' -Token: 'test' -Token: ':' -Token: 'ing' -``` - -By extending the regex we can easily extend this to parse many more types of -tokens and quickly and easily build a tokenizer for whatever language we are -interested in. - - -Error Reporting -=============== - -_mpc_ provides some automatic generation of error messages. These can be enhanced by the user, with use of `mpc_expect`, but many of the defaults should provide both useful and readable. An example of an error message might look something like this: - -``` -:0:3: error: expected one or more of 'a' or 'd' at 'k' -``` - -Misc -==== - -Here are some other misc functions that mpc provides. These functions are susceptible to change between versions so use them with some care. - -* * * - -```c -void mpc_print(mpc_parser_t *p); -``` - -Prints out a parser in some weird format. This is generally used for debugging so don't expect to be able to understand the output right away without looking at the source code a little bit. - -* * * - -```c -void mpc_stats(mpc_parser_t *p); -``` - -Prints out some basic stats about a parser. Again used for debugging and optimisation. - -* * * - -```c -void mpc_optimise(mpc_parser_t *p); -``` - -Performs some basic optimisations on a parser to reduce it's size and increase its running speed. - - -Limitations & FAQ -================= - -### Does _mpc_ support Unicode? - -_mpc_ Only supports ASCII. Sorry! Writing a parser library that supports Unicode is pretty difficult. I welcome contributions! - - -### Is _mpc_ binary safe? - -No. Sorry! Including NULL characters in a string or a file will probably break it. Avoid this if possible. - - -### The Parser is going into an infinite loop! - -While it is certainly possible there is an issue with _mpc_, it is probably the case that your grammar contains _left recursion_. This is something _mpc_ cannot deal with. _Left recursion_ is when a rule directly or indirectly references itself on the left hand side of a derivation. For example consider this left recursive grammar intended to parse an expression. - -``` -expr : '+' ( | | ); -``` - -When the rule `expr` is called, it looks the first rule on the left. This happens to be the rule `expr` again. So again it looks for the first rule on the left. Which is `expr` again. And so on. To avoid left recursion this can be rewritten (for example) as the following. Note that rewriting as follows also changes the operator associativity. - -``` -value : | ; -expr : ('+' )* ; -``` - -Avoiding left recursion can be tricky, but is easy once you get a feel for it. For more information you can look on [wikipedia](http://en.wikipedia.org/wiki/Left_recursion) which covers some common techniques and more examples. Possibly in the future _mpc_ will support functionality to warn the user or re-write grammars which contain left recursion, but it wont for now. - - -### Backtracking isn't working! - -_mpc_ supports backtracking, but it may not work as you expect. It isn't a silver bullet, and you still must structure your grammar to be unambiguous. To demonstrate this behaviour examine the following erroneous grammar, intended to parse either a C style identifier, or a C style function call. - -``` -factor : - | '(' ? (',' )* ')' ; -``` - -This grammar will never correctly parse a function call because it will always first succeed parsing the initial identifier and return a factor. At this point it will encounter the parenthesis of the function call, give up, and throw an error. Even if it were to try and parse a factor again on this failure it would never reach the correct function call option because it always tries the other options first, and always succeeds with the identifier. - -The solution to this is to always structure grammars with the most specific clause first, and more general clauses afterwards. This is the natural technique used for avoiding left-recursive grammars and unambiguity, so is a good habit to get into anyway. - -Now the parser will try to match a function first, and if this fails backtrack and try to match just an identifier. - -``` -factor : '(' ? (',' )* ')' - | ; -``` - -An alternative, and better option is to remove the ambiguity completely by factoring out the first identifier. This is better because it removes any need for backtracking at all! Now the grammar is predictive! - -``` -factor : ('(' ? (',' )* ')')? ; -``` - - -### How can I avoid the maximum string literal length? - -Some compilers limit the maximum length of string literals. If you have a huge language string in the source file to be passed into `mpca_lang` you might encounter this. The ANSI standard says that 509 is the maximum length allowed for a string literal. Most compilers support greater than this. Visual Studio supports up to 2048 characters, while gcc allocates memory dynamically and so has no real limit. - -There are a couple of ways to overcome this issue if it arises. You could instead use `mpca_lang_contents` and load the language from file or you could use a string literal for each line and let the preprocessor automatically concatenate them together, avoiding the limit. The final option is to upgrade your compiler. In C99 this limit has been increased to 4095. - - -### The automatic tags in the AST are annoying! - -When parsing from a grammar, the abstract syntax tree is tagged with different tags for each primitive type it encounters. For example a regular expression will be automatically tagged as `regex`. Character literals as `char` and strings as `string`. This is to help people wondering exactly how they might need to convert the node contents. - -If you have a rule in your grammar called `string`, `char` or `regex`, you may encounter some confusion. This is because nodes will be tagged with (for example) `string` _either_ if they are a string primitive, _or_ if they were parsed via your `string` rule. If you are detecting node type using something like `strstr`, in this situation it might break. One solution to this is to always check that `string` is the innermost tag to test for string primitives, or to rename your rule called `string` to something that doesn't conflict. - -Yes it is annoying but its probably not going to change! +Micro Parser Combinators +======================== + +Version 0.9.0 + + +About +----- + +_mpc_ is a lightweight and powerful Parser Combinator library for C. + +Using _mpc_ might be of interest to you if you are... + +* Building a new programming language +* Building a new data format +* Parsing an existing programming language +* Parsing an existing data format +* Embedding a Domain Specific Language +* Implementing [Greenspun's Tenth Rule](http://en.wikipedia.org/wiki/Greenspun%27s_tenth_rule) + + +Features +-------- + +* Type-Generic +* Predictive, Recursive Descent +* Easy to Integrate (One Source File in ANSI C) +* Automatic Error Message Generation +* Regular Expression Parser Generator +* Language/Grammar Parser Generator + + +Alternatives +------------ + +The current main alternative for a C based parser combinator library is a branch of [Cesium3](https://github.com/wbhart/Cesium3/tree/combinators). + +_mpc_ provides a number of features that this project does not offer, and also overcomes a number of potential downsides: + +* _mpc_ Works for Generic Types +* _mpc_ Doesn't rely on Boehm-Demers-Weiser Garbage Collection +* _mpc_ Doesn't use `setjmp` and `longjmp` for errors +* _mpc_ Doesn't pollute the namespace + + +Quickstart +========== + +Here is how one would use _mpc_ to create a parser for a basic mathematical expression language. + +```c +mpc_parser_t *Expr = mpc_new("expression"); +mpc_parser_t *Prod = mpc_new("product"); +mpc_parser_t *Value = mpc_new("value"); +mpc_parser_t *Maths = mpc_new("maths"); + +mpca_lang(MPCA_LANG_DEFAULT, + " expression : (('+' | '-') )*; " + " product : (('*' | '/') )*; " + " value : /[0-9]+/ | '(' ')'; " + " maths : /^/ /$/; ", + Expr, Prod, Value, Maths, NULL); + +mpc_result_t r; + +if (mpc_parse("input", input, Maths, &r)) { + mpc_ast_print(r.output); + mpc_ast_delete(r.output); +} else { + mpc_err_print(r.error); + mpc_err_delete(r.error); +} + +mpc_cleanup(4, Expr, Prod, Value, Maths); +``` + +If you were to set `input` to the string `(4 * 2 * 11 + 2) - 5`, the printed output would look like this. + +``` +> + regex + expression|> + value|> + char:1:1 '(' + expression|> + product|> + value|regex:1:2 '4' + char:1:4 '*' + value|regex:1:6 '2' + char:1:8 '*' + value|regex:1:10 '11' + char:1:13 '+' + product|value|regex:1:15 '2' + char:1:16 ')' + char:1:18 '-' + product|value|regex:1:20 '5' + regex +``` + +Getting Started +=============== + +Introduction +------------ + +Parser Combinators are structures that encode how to parse particular languages. They can be combined using intuitive operators to create new parsers of increasing complexity. Using these operators detailed grammars and languages can be parsed and processed in a quick, efficient, and easy way. + +The trick behind Parser Combinators is the observation that by structuring the library in a particular way, one can make building parser combinators look like writing a grammar itself. Therefore instead of describing _how to parse a language_, a user must only specify _the language itself_, and the library will work out how to parse it ... as if by magic! + +_mpc_ can be used in this mode, or, as shown in the above example, you can specify the grammar directly as a string or in a file. + +Basic Parsers +------------- + +### String Parsers + +All the following functions construct new basic parsers of the type `mpc_parser_t *`. All of those parsers return a newly allocated `char *` with the character(s) they manage to match. If unsuccessful they will return an error. They have the following functionality. + +* * * + +```c +mpc_parser_t *mpc_any(void); +``` + +Matches any individual character + +* * * + +```c +mpc_parser_t *mpc_char(char c); +``` + +Matches a single given character `c` + +* * * + +```c +mpc_parser_t *mpc_range(char s, char e); +``` + +Matches any single given character in the range `s` to `e` (inclusive) + +* * * + +```c +mpc_parser_t *mpc_oneof(const char *s); +``` + +Matches any single given character in the string `s` + +* * * + +```c +mpc_parser_t *mpc_noneof(const char *s); +``` + +Matches any single given character not in the string `s` + +* * * + +```c +mpc_parser_t *mpc_satisfy(int(*f)(char)); +``` + +Matches any single given character satisfying function `f` + +* * * + +```c +mpc_parser_t *mpc_string(const char *s); +``` + +Matches exactly the string `s` + + +### Other Parsers + +Several other functions exist that construct parsers with some other special functionality. + +* * * + +```c +mpc_parser_t *mpc_pass(void); +``` + +Consumes no input, always successful, returns `NULL` + +* * * + +```c +mpc_parser_t *mpc_fail(const char *m); +mpc_parser_t *mpc_failf(const char *fmt, ...); +``` + +Consumes no input, always fails with message `m` or formatted string `fmt`. + +* * * + +```c +mpc_parser_t *mpc_lift(mpc_ctor_t f); +``` + +Consumes no input, always successful, returns the result of function `f` + +* * * + +```c +mpc_parser_t *mpc_lift_val(mpc_val_t *x); +``` + +Consumes no input, always successful, returns `x` + +* * * + +```c +mpc_parser_t *mpc_state(void); +``` + +Consumes no input, always successful, returns a copy of the parser state as a `mpc_state_t *`. This state is newly allocated and so needs to be released with `free` when finished with. + +* * * + +```c +mpc_parser_t *mpc_anchor(int(*f)(char,char)); +``` + +Consumes no input. Successful when function `f` returns true. Always returns `NULL`. + +Function `f` is a _anchor_ function. It takes as input the last character parsed, and the next character in the input, and returns success or failure. This function can be set by the user to ensure some condition is met. For example to test that the input is at a boundary between words and non-words. + +At the start of the input the first argument is set to `'\0'`. At the end of the input the second argument is set to `'\0'`. + + + +Parsing +------- + +Once you've build a parser, you can run it on some input using one of the following functions. These functions return `1` on success and `0` on failure. They output either the result, or an error to a `mpc_result_t` variable. This type is defined as follows. + +```c +typedef union { + mpc_err_t *error; + mpc_val_t *output; +} mpc_result_t; +``` + +where `mpc_val_t *` is synonymous with `void *` and simply represents some pointer to data - the exact type of which is dependant on the parser. + + +* * * + +```c +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); +``` + +Run a parser on some string. + +* * * + +```c +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); +``` + +Run a parser on some file. + +* * * + +```c +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); +``` + +Run a parser on some pipe (such as `stdin`). + +* * * + +```c +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); +``` + +Run a parser on the contents of some file. + + +Combinators +----------- + +Combinators are functions that take one or more parsers and return a new parser of some given functionality. + +These combinators work independently of exactly what data type the parser(s) supplied as input return. In languages such as Haskell ensuring you don't input one type of data into a parser requiring a different type is done by the compiler. But in C we don't have that luxury. So it is at the discretion of the programmer to ensure that he or she deals correctly with the outputs of different parser types. + +A second annoyance in C is that of manual memory management. Some parsers might get half-way and then fail. This means they need to clean up any partial result that has been collected in the parse. In Haskell this is handled by the Garbage Collector, but in C these combinators will need to take _destructor_ functions as input, which say how clean up any partial data that has been collected. + +Here are the main combinators and how to use then. + +* * * + +```c +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); +``` + +Returns a parser that runs `a`, and on success returns the result of `a`, while on failure reports that `e` was expected. + +* * * + +```c +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); +``` + +Returns a parser that applies function `f` (optionality taking extra input `x`) to the result of parser `a`. + +* * * + +```c +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); +``` + +Returns a parser that applies function `f` (optionally taking extra input `x`) to the result of parser `a`. If `f` returns non-zero, then the parser succeeds and returns the value of `a` (possibly modified by `f`). If `f` returns zero, then the parser fails with message `e`, and the result of `a` is destroyed with the destructor `da`. + +* * * + +```c +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); +``` + +Returns a parser with the following behaviour. If parser `a` succeeds, then it fails and consumes no input. If parser `a` fails, then it succeeds, consumes no input and returns `NULL` (or the result of lift function `lf`). Destructor `da` is used to destroy the result of `a` on success. + +* * * + +```c +mpc_parser_t *mpc_maybe(mpc_parser_t *a); +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); +``` + +Returns a parser that runs `a`. If `a` is successful then it returns the result of `a`. If `a` is unsuccessful then it succeeds, but returns `NULL` (or the result of `lf`). + +* * * + +```c +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); +``` + +Runs `a` zero or more times until it fails. Results are combined using fold function `f`. See the _Function Types_ section for more details. + +* * * + +```c +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); +``` + +Runs `a` one or more times until it fails. Results are combined with fold function `f`. + +* * * + +```c +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); +``` + +Runs `a` exactly `n` times. If this fails, any partial results are destructed with `da`. If successful results of `a` are combined using fold function `f`. + +* * * + +```c +mpc_parser_t *mpc_or(int n, ...); +``` + +Attempts to run `n` parsers in sequence, returning the first one that succeeds. If all fail, returns an error. + +* * * + +```c +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); +``` + +Attempts to run `n` parsers in sequence, returning the fold of the results using fold function `f`. First parsers must be specified, followed by destructors for each parser, excluding the final parser. These are used in case of partial success. For example: `mpc_and(3, mpcf_strfold, mpc_char('a'), mpc_char('b'), mpc_char('c'), free, free);` would attempt to match `'a'` followed by `'b'` followed by `'c'`, and if successful would concatenate them using `mpcf_strfold`. Otherwise would use `free` on the partial results. + +* * * + +```c +mpc_parser_t *mpc_predictive(mpc_parser_t *a); +``` + +Returns a parser that runs `a` with backtracking disabled. This means if `a` consumes more than one character, it will not be reverted, even on failure. Turning backtracking off has good performance benefits for grammars which are `LL(1)`. These are grammars where the first character completely determines the parse result - such as the decision of parsing either a C identifier, number, or string literal. This option should not be used for non `LL(1)` grammars or it will produce incorrect results or crash the parser. + +Another way to think of `mpc_predictive` is that it can be applied to a parser (for a performance improvement) if either successfully parsing the first character will result in a completely successful parse, or all of the referenced sub-parsers are also `LL(1)`. + + +Function Types +-------------- + +The combinator functions take a number of special function types as function pointers. Here is a short explanation of those types are how they are expected to behave. It is important that these behave correctly otherwise it is easy to introduce memory leaks or crashes into the system. + +* * * + +```c +typedef void(*mpc_dtor_t)(mpc_val_t*); +``` + +Given some pointer to a data value it will ensure the memory it points to is freed correctly. + +* * * + +```c +typedef mpc_val_t*(*mpc_ctor_t)(void); +``` + +Returns some data value when called. It can be used to create _empty_ versions of data types when certain combinators have no known default value to return. For example it may be used to return a newly allocated empty string. + +* * * + +```c +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); +``` + +This takes in some pointer to data and outputs some new or modified pointer to data, ensuring to free the input data if it is no longer used. The `apply_to` variation takes in an extra pointer to some data such as global state. + +* * * + +```c +typedef int(*mpc_check_t)(mpc_val_t**); +typedef int(*mpc_check_with_t)(mpc_val_t**,void*); +``` + +This takes in some pointer to data and outputs 0 if parsing should stop with an error. Additionally, this may change or free the input data. The `check_with` variation takes in an extra pointer to some data such as global state. + +* * * + +```c +typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); +``` + +This takes a list of pointers to data values and must return some combined or folded version of these data values. It must ensure to free any input data that is no longer used once the combination has taken place. + + +Case Study - Identifier +======================= + +Combinator Method +----------------- + +Using the above combinators we can create a parser that matches a C identifier. + +When using the combinators we need to supply a function that says how to combine two `char *`. + +For this we build a fold function that will concatenate zero or more strings together. For this sake of this tutorial we will write it by hand, but this (as well as many other useful fold functions), are actually included in _mpc_ under the `mpcf_*` namespace, such as `mpcf_strfold`. + +```c +mpc_val_t *strfold(int n, mpc_val_t **xs) { + char *x = calloc(1, 1); + int i; + for (i = 0; i < n; i++) { + x = realloc(x, strlen(x) + strlen(xs[i]) + 1); + strcat(x, xs[i]); + free(xs[i]); + } + return x; +} +``` + +We can use this to specify a C identifier, making use of some combinators to say how the basic parsers are combined. + +```c +mpc_parser_t *alpha = mpc_or(2, mpc_range('a', 'z'), mpc_range('A', 'Z')); +mpc_parser_t *digit = mpc_range('0', '9'); +mpc_parser_t *underscore = mpc_char('_'); + +mpc_parser_t *ident = mpc_and(2, strfold, + mpc_or(2, alpha, underscore), + mpc_many(strfold, mpc_or(3, alpha, digit, underscore)), + free); + +/* Do Some Parsing... */ + +mpc_delete(ident); +``` + +Notice that previous parsers are used as input to new parsers we construct from the combinators. Note that only the final parser `ident` must be deleted. When we input a parser into a combinator we should consider it to be part of the output of that combinator. + +Because of this we shouldn't create a parser and input it into multiple places, or it will be doubly freed. + + +Regex Method +------------ + +There is an easier way to do this than the above method. _mpc_ comes with a handy regex function for constructing parsers using regex syntax. We can specify an identifier using a regex pattern as shown below. + +```c +mpc_parser_t *ident = mpc_re("[a-zA-Z_][a-zA-Z_0-9]*"); + +/* Do Some Parsing... */ + +mpc_delete(ident); +``` + + +Library Method +-------------- + +Although if we really wanted to create a parser for C identifiers, a function for creating this parser comes included in _mpc_ along with many other common parsers. + +```c +mpc_parser_t *ident = mpc_ident(); + +/* Do Some Parsing... */ + +mpc_delete(ident); +``` + +Parser References +================= + +Building parsers in the above way can have issues with self-reference or cyclic-reference. To overcome this we can separate the construction of parsers into two different steps. Construction and Definition. + +* * * + +```c +mpc_parser_t *mpc_new(const char *name); +``` + +This will construct a parser called `name` which can then be used as input to others, including itself, without fear of being deleted. Any parser created using `mpc_new` is said to be _retained_. This means it will behave differently to a normal parser when referenced. When deleting a parser that includes a _retained_ parser, the _retained_ parser will not be deleted along with it. To delete a retained parser `mpc_delete` must be used on it directly. + +A _retained_ parser can then be _defined_ using... + +* * * + +```c +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); +``` + +This assigns the contents of parser `a` to `p`, and deletes `a`. With this technique parsers can now reference each other, as well as themselves, without trouble. + +* * * + +```c +mpc_parser_t *mpc_undefine(mpc_parser_t *p); +``` + +A final step is required. Parsers that reference each other must all be undefined before they are deleted. It is important to do any undefining before deletion. The reason for this is that to delete a parser it must look at each sub-parser that is used by it. If any of these have already been deleted a segfault is unavoidable - even if they were retained beforehand. + +* * * + +```c +void mpc_cleanup(int n, ...); +``` + +To ease the task of undefining and then deleting parsers `mpc_cleanup` can be used. It takes `n` parsers as input, and undefines them all, before deleting them all. + +* * * + +```c +mpc_parser_t *mpc_copy(mpc_parser_t *a); +``` + +This function makes a copy of a parser `a`. This can be useful when you want to +use a parser as input for some other parsers multiple times without retaining +it. + +* * * + +```c +mpc_parser_t *mpc_re(const char *re); +mpc_parser_t *mpc_re_mode(const char *re, int mode); +``` + +This function takes as input the regular expression `re` and builds a parser +for it. With the `mpc_re_mode` function optional mode flags can also be given. +Available flags are `MPC_RE_MULTILINE` / `MPC_RE_M` where the start of input +character `^` also matches the beginning of new lines and the end of input `$` +character also matches new lines, and `MPC_RE_DOTALL` / `MPC_RE_S` where the +any character token `.` also matches newlines (by default it doesn't). + + +Library Reference +================= + +Common Parsers +-------------- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
mpc_soiMatches only the start of input, returns NULL
mpc_eoiMatches only the end of input, returns NULL
mpc_boundaryMatches only the boundary between words, returns NULL
mpc_boundary_newlineMatches the start of a new line, returns NULL
mpc_whitespaceMatches any whitespace character " \f\n\r\t\v"
mpc_whitespacesMatches zero or more whitespace characters
mpc_blankMatches whitespaces and frees the result, returns NULL
mpc_newlineMatches '\n'
mpc_tabMatches '\t'
mpc_escapeMatches a backslash followed by any character
mpc_digitMatches any character in the range '0' - '9'
mpc_hexdigitMatches any character in the range '0 - '9' as well as 'A' - 'F' and 'a' - 'f'
mpc_octdigitMatches any character in the range '0' - '7'
mpc_digitsMatches one or more digit
mpc_hexdigitsMatches one or more hexdigit
mpc_octdigitsMatches one or more octdigit
mpc_lowerMatches any lower case character
mpc_upperMatches any upper case character
mpc_alphaMatches any alphabet character
mpc_underscoreMatches '_'
mpc_alphanumMatches any alphabet character, underscore or digit
mpc_intMatches digits and returns an int*
mpc_hexMatches hexdigits and returns an int*
mpc_octMatches octdigits and returns an int*
mpc_numberMatches mpc_int, mpc_hex or mpc_oct
mpc_realMatches some floating point number as a string
mpc_floatMatches some floating point number and returns a float*
mpc_char_litMatches some character literal surrounded by '
mpc_string_litMatches some string literal surrounded by "
mpc_regex_litMatches some regex literal surrounded by /
mpc_identMatches a C style identifier
+ + +Useful Parsers +-------------- + + + + + + + + + + + + + + + + + + + + + + + +
mpc_startswith(mpc_parser_t *a);Matches the start of input followed by a
mpc_endswith(mpc_parser_t *a, mpc_dtor_t da);Matches a followed by the end of input
mpc_whole(mpc_parser_t *a, mpc_dtor_t da);Matches the start of input, a, and the end of input
mpc_stripl(mpc_parser_t *a);Matches a first consuming any whitespace to the left
mpc_stripr(mpc_parser_t *a);Matches a then consumes any whitespace to the right
mpc_strip(mpc_parser_t *a);Matches a consuming any surrounding whitespace
mpc_tok(mpc_parser_t *a);Matches a and consumes any trailing whitespace
mpc_sym(const char *s);Matches string s and consumes any trailing whitespace
mpc_total(mpc_parser_t *a, mpc_dtor_t da);Matches the whitespace consumed a, enclosed in the start and end of input
mpc_between(mpc_parser_t *a, mpc_dtor_t ad,
const char *o, const char *c);
Matches a between strings o and c
mpc_parens(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "(" and ")"
mpc_braces(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "<" and ">"
mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "{" and "}"
mpc_squares(mpc_parser_t *a, mpc_dtor_t ad);Matches a between "[" and "]"
mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad,
const char *o, const char *c);
Matches a between o and c, where o and c have their trailing whitespace striped.
mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "(" and ")"
mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "<" and ">"
mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "{" and "}"
mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad);Matches a between trailing whitespace consumed "[" and "]"
+ + +Apply Functions +--------------- + + + + + + + + + + + + + + + + + + + + + + +
void mpcf_dtor_null(mpc_val_t *x);Empty destructor. Does nothing
mpc_val_t *mpcf_ctor_null(void);Returns NULL
mpc_val_t *mpcf_ctor_str(void);Returns ""
mpc_val_t *mpcf_free(mpc_val_t *x);Frees x and returns NULL
mpc_val_t *mpcf_int(mpc_val_t *x);Converts a decimal string x to an int*
mpc_val_t *mpcf_hex(mpc_val_t *x);Converts a hex string x to an int*
mpc_val_t *mpcf_oct(mpc_val_t *x);Converts a oct string x to an int*
mpc_val_t *mpcf_float(mpc_val_t *x);Converts a string x to a float*
mpc_val_t *mpcf_escape(mpc_val_t *x);Converts a string x to an escaped version
mpc_val_t *mpcf_escape_regex(mpc_val_t *x);Converts a regex x to an escaped version
mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x);Converts a raw string x to an escaped version
mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x);Converts a raw character x to an escaped version
mpc_val_t *mpcf_unescape(mpc_val_t *x);Converts a string x to an unescaped version
mpc_val_t *mpcf_unescape_regex(mpc_val_t *x);Converts a regex x to an unescaped version
mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x);Converts a raw string x to an unescaped version
mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x);Converts a raw character x to an unescaped version
mpc_val_t *mpcf_strtriml(mpc_val_t *x);Trims whitespace from the left of string x
mpc_val_t *mpcf_strtrimr(mpc_val_t *x);Trims whitespace from the right of string x
mpc_val_t *mpcf_strtrim(mpc_val_t *x);Trims whitespace from either side of string x
+ + +Fold Functions +-------------- + + + + + + + + + + + + + + +
mpc_val_t *mpcf_null(int n, mpc_val_t** xs);Returns NULL
mpc_val_t *mpcf_fst(int n, mpc_val_t** xs);Returns first element of xs
mpc_val_t *mpcf_snd(int n, mpc_val_t** xs);Returns second element of xs
mpc_val_t *mpcf_trd(int n, mpc_val_t** xs);Returns third element of xs
mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs);Returns first element of xs and calls free on others
mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs);Returns second element of xs and calls free on others
mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs);Returns third element of xs and calls free on others
mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs);Calls free on all elements of xs and returns NULL
mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs);Concatenates all xs together as strings and returns result
+ + +Case Study - Maths Language +=========================== + +Combinator Approach +------------------- + +Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own output types for parsers. For example we could design our own syntax tree type to use. We can also use this method to do some specific house-keeping or data processing in the parsing phase. + +As an example of this power, we can specify a simple maths grammar, that outputs `int *`, and computes the result of the expression as it goes along. + +We start with a fold function that will fold two `int *` into a new `int *` based on some `char *` operator. + +```c +mpc_val_t *fold_maths(int n, mpc_val_t **xs) { + + int **vs = (int**)xs; + + if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; } + if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; } + if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; } + if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; } + if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; } + + free(xs[1]); free(xs[2]); + + return xs[0]; +} +``` + +And then we use this to specify a basic grammar, which folds together any results. + +```c +mpc_parser_t *Expr = mpc_new("expr"); +mpc_parser_t *Factor = mpc_new("factor"); +mpc_parser_t *Term = mpc_new("term"); +mpc_parser_t *Maths = mpc_new("maths"); + +mpc_define(Expr, mpc_or(2, + mpc_and(3, fold_maths, + Factor, mpc_oneof("+-"), Factor, + free, free), + Factor +)); + +mpc_define(Factor, mpc_or(2, + mpc_and(3, fold_maths, + Term, mpc_oneof("*/"), Term, + free, free), + Term +)); + +mpc_define(Term, mpc_or(2, mpc_int(), mpc_parens(Expr, free))); +mpc_define(Maths, mpc_whole(Expr, free)); + +/* Do Some Parsing... */ + +mpc_delete(Maths); +``` + +If we supply this function with something like `(4*2)+5`, we can expect it to output `13`. + + +Language Approach +----------------- + +It is possible to avoid passing in and around all those function pointers, if you don't care what type is output by _mpc_. For this, a generic Abstract Syntax Tree type `mpc_ast_t` is included in _mpc_. The combinator functions which act on this don't need information on how to destruct or fold instances of the result as they know it will be a `mpc_ast_t`. So there are a number of combinator functions which work specifically (and only) on parsers that return this type. They reside under `mpca_*`. + +Doing things via this method means that all the data processing must take place after the parsing. In many instances this is not an issue, or even preferable. + +It also allows for one more trick. As all the fold and destructor functions are implicit, the user can simply specify the grammar of the language in some nice way and the system can try to build a parser for the AST type from this alone. For this there are a few functions supplied which take in a string, and output a parser. The format for these grammars is simple and familiar to those who have used parser generators before. It looks something like this. + +``` +number "number" : /[0-9]+/ ; +expression : (('+' | '-') )* ; +product : (('*' | '/') )* ; +value : | '(' ')' ; +maths : /^/ /$/ ; +``` + +The syntax for this is defined as follows. + + + + + + + + + + + +
"ab"The string ab is required.
'a'The character a is required.
'a' 'b'First 'a' is required, then 'b' is required..
'a' | 'b'Either 'a' is required, or 'b' is required.
'a'*Zero or more 'a' are required.
'a'+One or more 'a' are required.
'a'?Zero or one 'a' is required.
'a'{x}Exactly x (integer) copies of 'a' are required.
<abba>The rule called abba is required.
+ +Rules are specified by rule name, optionally followed by an _expected_ string, followed by a colon `:`, followed by the definition, and ending in a semicolon `;`. Multiple rules can be specified. The _rule names_ must match the names given to any parsers created by `mpc_new`, otherwise the function will crash. + +The flags variable is a set of flags `MPCA_LANG_DEFAULT`, `MPCA_LANG_PREDICTIVE`, or `MPCA_LANG_WHITESPACE_SENSITIVE`. For specifying if the language is predictive or whitespace sensitive. + +Like with the regular expressions, this user input is parsed by existing parts of the _mpc_ library. It provides one of the more powerful features of the library. + +* * * + +```c +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); +``` + +This takes in some single right hand side of a rule, as well as a list of any of the parsers referenced, and outputs a parser that does what is specified by the rule. The list of parsers referenced can be terminated with `NULL` to get an error instead of a crash when a parser required is not supplied. + +* * * + +```c +mpc_err_t *mpca_lang(int flags, const char *lang, ...); +``` + +This takes in a full language (zero or more rules) as well as any parsers referred to by either the right or left hand sides. Any parsers specified on the left hand side of any rule will be assigned a parser equivalent to what is specified on the right. On valid user input this returns `NULL`, while if there are any errors in the user input it will return an instance of `mpc_err_t` describing the issues. The list of parsers referenced can be terminated with `NULL` to get an error instead of a crash when a parser required is not supplied. + +* * * + +```c +mpc_err_t *mpca_lang_file(int flags, FILE* f, ...); +``` + +This reads in the contents of file `f` and inputs it into `mpca_lang`. + +* * * + +```c +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); +``` + +This opens and reads in the contents of the file given by `filename` and passes it to `mpca_lang`. + +Case Study - Tokenizer +====================== + +Another common task we might be interested in doing is tokenizing some block of +text (splitting the text into individual elements) and performing some function +on each one of these elements as it is read. We can do this with `mpc` too. + +First, we can build a regular expression which parses an individual token. For +example if our tokens are identifiers, integers, commas, periods and colons we +could build something like this `mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")`. +Next we can strip any whitespace, and add a callback function using `mpc_apply` +which gets called every time this regex is parsed successfully +`mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)`. +Finally we can surround all of this in `mpc_many` to parse it zero or more +times. The final code might look something like this: + +```c +static mpc_val_t *print_token(mpc_val_t *x) { + printf("Token: '%s'\n", (char*)x); + return x; +} + +int main(int argc, char **argv) { + + const char *input = " hello 4352 , \n foo.bar \n\n test:ing "; + + mpc_parser_t* Tokens = mpc_many( + mpcf_all_free, + mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)); + + mpc_result_t r; + mpc_parse("input", input, Tokens, &r); + + mpc_delete(Tokens); + + return 0; +} +``` + +Running this program will produce an output something like this: + +``` +Token: 'hello' +Token: '4352' +Token: ',' +Token: 'foo' +Token: '.' +Token: 'bar' +Token: 'test' +Token: ':' +Token: 'ing' +``` + +By extending the regex we can easily extend this to parse many more types of +tokens and quickly and easily build a tokenizer for whatever language we are +interested in. + + +Error Reporting +=============== + +_mpc_ provides some automatic generation of error messages. These can be enhanced by the user, with use of `mpc_expect`, but many of the defaults should provide both useful and readable. An example of an error message might look something like this: + +``` +:0:3: error: expected one or more of 'a' or 'd' at 'k' +``` + +Misc +==== + +Here are some other misc functions that mpc provides. These functions are susceptible to change between versions so use them with some care. + +* * * + +```c +void mpc_print(mpc_parser_t *p); +``` + +Prints out a parser in some weird format. This is generally used for debugging so don't expect to be able to understand the output right away without looking at the source code a little bit. + +* * * + +```c +void mpc_stats(mpc_parser_t *p); +``` + +Prints out some basic stats about a parser. Again used for debugging and optimisation. + +* * * + +```c +void mpc_optimise(mpc_parser_t *p); +``` + +Performs some basic optimisations on a parser to reduce it's size and increase its running speed. + + +Limitations & FAQ +================= + +### Does _mpc_ support Unicode? + +_mpc_ Only supports ASCII. Sorry! Writing a parser library that supports Unicode is pretty difficult. I welcome contributions! + + +### Is _mpc_ binary safe? + +No. Sorry! Including NULL characters in a string or a file will probably break it. Avoid this if possible. + + +### The Parser is going into an infinite loop! + +While it is certainly possible there is an issue with _mpc_, it is probably the case that your grammar contains _left recursion_. This is something _mpc_ cannot deal with. _Left recursion_ is when a rule directly or indirectly references itself on the left hand side of a derivation. For example consider this left recursive grammar intended to parse an expression. + +``` +expr : '+' ( | | ); +``` + +When the rule `expr` is called, it looks the first rule on the left. This happens to be the rule `expr` again. So again it looks for the first rule on the left. Which is `expr` again. And so on. To avoid left recursion this can be rewritten (for example) as the following. Note that rewriting as follows also changes the operator associativity. + +``` +value : | ; +expr : ('+' )* ; +``` + +Avoiding left recursion can be tricky, but is easy once you get a feel for it. For more information you can look on [wikipedia](http://en.wikipedia.org/wiki/Left_recursion) which covers some common techniques and more examples. Possibly in the future _mpc_ will support functionality to warn the user or re-write grammars which contain left recursion, but it wont for now. + + +### Backtracking isn't working! + +_mpc_ supports backtracking, but it may not work as you expect. It isn't a silver bullet, and you still must structure your grammar to be unambiguous. To demonstrate this behaviour examine the following erroneous grammar, intended to parse either a C style identifier, or a C style function call. + +``` +factor : + | '(' ? (',' )* ')' ; +``` + +This grammar will never correctly parse a function call because it will always first succeed parsing the initial identifier and return a factor. At this point it will encounter the parenthesis of the function call, give up, and throw an error. Even if it were to try and parse a factor again on this failure it would never reach the correct function call option because it always tries the other options first, and always succeeds with the identifier. + +The solution to this is to always structure grammars with the most specific clause first, and more general clauses afterwards. This is the natural technique used for avoiding left-recursive grammars and unambiguity, so is a good habit to get into anyway. + +Now the parser will try to match a function first, and if this fails backtrack and try to match just an identifier. + +``` +factor : '(' ? (',' )* ')' + | ; +``` + +An alternative, and better option is to remove the ambiguity completely by factoring out the first identifier. This is better because it removes any need for backtracking at all! Now the grammar is predictive! + +``` +factor : ('(' ? (',' )* ')')? ; +``` + + +### How can I avoid the maximum string literal length? + +Some compilers limit the maximum length of string literals. If you have a huge language string in the source file to be passed into `mpca_lang` you might encounter this. The ANSI standard says that 509 is the maximum length allowed for a string literal. Most compilers support greater than this. Visual Studio supports up to 2048 characters, while gcc allocates memory dynamically and so has no real limit. + +There are a couple of ways to overcome this issue if it arises. You could instead use `mpca_lang_contents` and load the language from file or you could use a string literal for each line and let the preprocessor automatically concatenate them together, avoiding the limit. The final option is to upgrade your compiler. In C99 this limit has been increased to 4095. + + +### The automatic tags in the AST are annoying! + +When parsing from a grammar, the abstract syntax tree is tagged with different tags for each primitive type it encounters. For example a regular expression will be automatically tagged as `regex`. Character literals as `char` and strings as `string`. This is to help people wondering exactly how they might need to convert the node contents. + +If you have a rule in your grammar called `string`, `char` or `regex`, you may encounter some confusion. This is because nodes will be tagged with (for example) `string` _either_ if they are a string primitive, _or_ if they were parsed via your `string` rule. If you are detecting node type using something like `strstr`, in this situation it might break. One solution to this is to always check that `string` is the innermost tag to test for string primitives, or to rename your rule called `string` to something that doesn't conflict. + +Yes it is annoying but its probably not going to change! diff --git a/examples/line_reader.c b/examples/line_reader.c index 48d01be..5442d04 100644 --- a/examples/line_reader.c +++ b/examples/line_reader.c @@ -1,34 +1,34 @@ -#include "../mpc.h" - -static void* read_line(void* line) { - printf("Reading Line: %s", (char*)line); - return line; -} - -int main(int argc, char **argv) { - - const char *input = - "abcHVwufvyuevuy3y436782\n" - "\n" - "\n" - "rehre\n" - "rew\n" - "-ql.;qa\n" - "eg"; - - mpc_parser_t* Line = mpc_many( - mpcf_strfold, - mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)); - - mpc_result_t r; - - (void)argc; (void)argv; - - mpc_parse("input", input, Line, &r); - printf("\nParsed String: %s", (char*)r.output); - free(r.output); - - mpc_delete(Line); - - return 0; -} +#include "../mpc.h" + +static void* read_line(void* line) { + printf("Reading Line: %s", (char*)line); + return line; +} + +int main(int argc, char **argv) { + + const char *input = + "abcHVwufvyuevuy3y436782\n" + "\n" + "\n" + "rehre\n" + "rew\n" + "-ql.;qa\n" + "eg"; + + mpc_parser_t* Line = mpc_many( + mpcf_strfold, + mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)); + + mpc_result_t r; + + (void)argc; (void)argv; + + mpc_parse("input", input, Line, &r); + printf("\nParsed String: %s", (char*)r.output); + free(r.output); + + mpc_delete(Line); + + return 0; +} diff --git a/examples/prelude.lspy b/examples/prelude.lspy index 51ad081..01c2256 100644 --- a/examples/prelude.lspy +++ b/examples/prelude.lspy @@ -1,239 +1,239 @@ -;;; -;;; Lispy Standard Prelude -;;; - -;;; Atoms -(def {nil} {}) -(def {true} 1) -(def {false} 0) - -;;; Functional Functions - -; Function Definitions -(def {fun} (\ {f b} { - def (head f) (\ (tail f) b) -})) - -; Open new scope -(fun {let b} { - ((\ {_} b) ()) -}) - -; Unpack List to Function -(fun {unpack f l} { - eval (join (list f) l) -}) - -; Unapply List to Function -(fun {pack f & xs} {f xs}) - -; Curried and Uncurried calling -(def {curry} {unpack}) -(def {uncurry} {pack}) - -; Perform Several things in Sequence -(fun {do & l} { - if (== l {}) - {{}} - {last l} -}) - -;;; Logical Functions - -; Logical Functions -(fun {not x} {- 1 x}) -(fun {or x y} {+ x y}) -(fun {and x y} {* x y}) - - -;;; Numeric Functions - -; Minimum of Arguments -(fun {min & xs} { - if (== (tail xs) {}) {fst xs} - {do - (= {rest} (unpack min (tail xs))) - (= {item} (fst xs)) - (if (< item rest) {item} {rest}) - } -}) - -; Minimum of Arguments -(fun {max & xs} { - if (== (tail xs) {}) {fst xs} - {do - (= {rest} (unpack max (tail xs))) - (= {item} (fst xs)) - (if (> item rest) {item} {rest}) - } -}) - -;;; Conditional Functions - -(fun {select & cs} { - if (== cs {}) - {error "No Selection Found"} - {if (fst (fst cs)) {snd (fst cs)} {unpack select (tail cs)}} -}) - -(fun {case x & cs} { - if (== cs {}) - {error "No Case Found"} - {if (== x (fst (fst cs))) {snd (fst cs)} {unpack case (join (list x) (tail cs))}} -}) - -(def {otherwise} true) - - -;;; Misc Functions - -(fun {flip f a b} {f b a}) -(fun {ghost & xs} {eval xs}) -(fun {comp f g x} {f (g x)}) - -;;; List Functions - -; First, Second, or Third Item in List -(fun {fst l} { eval (head l) }) -(fun {snd l} { eval (head (tail l)) }) -(fun {trd l} { eval (head (tail (tail l))) }) - -; List Length -(fun {len l} { - if (== l {}) - {0} - {+ 1 (len (tail l))} -}) - -; Nth item in List -(fun {nth n l} { - if (== n 0) - {fst l} - {nth (- n 1) (tail l)} -}) - -; Last item in List -(fun {last l} {nth (- (len l) 1) l}) - -; Apply Function to List -(fun {map f l} { - if (== l {}) - {{}} - {join (list (f (fst l))) (map f (tail l))} -}) - -; Apply Filter to List -(fun {filter f l} { - if (== l {}) - {{}} - {join (if (f (fst l)) {head l} {{}}) (filter f (tail l))} -}) - -; Return all of list but last element -(fun {init l} { - if (== (tail l) {}) - {{}} - {join (head l) (init (tail l))} -}) - -; Reverse List -(fun {reverse l} { - if (== l {}) - {{}} - {join (reverse (tail l)) (head l)} -}) - -; Fold Left -(fun {foldl f z l} { - if (== l {}) - {z} - {foldl f (f z (fst l)) (tail l)} -}) - -; Fold Right -(fun {foldr f z l} { - if (== l {}) - {z} - {f (fst l) (foldr f z (tail l))} -}) - -(fun {sum l} {foldl + 0 l}) -(fun {product l} {foldl * 1 l}) - -; Take N items -(fun {take n l} { - if (== n 0) - {{}} - {join (head l) (take (- n 1) (tail l))} -}) - -; Drop N items -(fun {drop n l} { - if (== n 0) - {l} - {drop (- n 1) (tail l)} -}) - -; Split at N -(fun {split n l} {list (take n l) (drop n l)}) - -; Take While -(fun {take-while f l} { - if (not (unpack f (head l))) - {{}} - {join (head l) (take-while f (tail l))} -}) - -; Drop While -(fun {drop-while f l} { - if (not (unpack f (head l))) - {l} - {drop-while f (tail l)} -}) - -; Element of List -(fun {elem x l} { - if (== l {}) - {false} - {if (== x (fst l)) {true} {elem x (tail l)}} -}) - -; Find element in list of pairs -(fun {lookup x l} { - if (== l {}) - {error "No Element Found"} - {do - (= {key} (fst (fst l))) - (= {val} (snd (fst l))) - (if (== key x) {val} {lookup x (tail l)}) - } -}) - -; Zip two lists together into a list of pairs -(fun {zip x y} { - if (or (== x {}) (== y {})) - {{}} - {join (list (join (head x) (head y))) (zip (tail x) (tail y))} -}) - -; Unzip a list of pairs into two lists -(fun {unzip l} { - if (== l {}) - {{{} {}}} - {do - (= {x} (fst l)) - (= {xs} (unzip (tail l))) - (list (join (head x) (fst xs)) (join (tail x) (snd xs))) - } -}) - -;;; Other Fun - -; Fibonacci -(fun {fib n} { - select - { (== n 0) 0 } - { (== n 1) 1 } - { otherwise (+ (fib (- n 1)) (fib (- n 2))) } -}) - +;;; +;;; Lispy Standard Prelude +;;; + +;;; Atoms +(def {nil} {}) +(def {true} 1) +(def {false} 0) + +;;; Functional Functions + +; Function Definitions +(def {fun} (\ {f b} { + def (head f) (\ (tail f) b) +})) + +; Open new scope +(fun {let b} { + ((\ {_} b) ()) +}) + +; Unpack List to Function +(fun {unpack f l} { + eval (join (list f) l) +}) + +; Unapply List to Function +(fun {pack f & xs} {f xs}) + +; Curried and Uncurried calling +(def {curry} {unpack}) +(def {uncurry} {pack}) + +; Perform Several things in Sequence +(fun {do & l} { + if (== l {}) + {{}} + {last l} +}) + +;;; Logical Functions + +; Logical Functions +(fun {not x} {- 1 x}) +(fun {or x y} {+ x y}) +(fun {and x y} {* x y}) + + +;;; Numeric Functions + +; Minimum of Arguments +(fun {min & xs} { + if (== (tail xs) {}) {fst xs} + {do + (= {rest} (unpack min (tail xs))) + (= {item} (fst xs)) + (if (< item rest) {item} {rest}) + } +}) + +; Minimum of Arguments +(fun {max & xs} { + if (== (tail xs) {}) {fst xs} + {do + (= {rest} (unpack max (tail xs))) + (= {item} (fst xs)) + (if (> item rest) {item} {rest}) + } +}) + +;;; Conditional Functions + +(fun {select & cs} { + if (== cs {}) + {error "No Selection Found"} + {if (fst (fst cs)) {snd (fst cs)} {unpack select (tail cs)}} +}) + +(fun {case x & cs} { + if (== cs {}) + {error "No Case Found"} + {if (== x (fst (fst cs))) {snd (fst cs)} {unpack case (join (list x) (tail cs))}} +}) + +(def {otherwise} true) + + +;;; Misc Functions + +(fun {flip f a b} {f b a}) +(fun {ghost & xs} {eval xs}) +(fun {comp f g x} {f (g x)}) + +;;; List Functions + +; First, Second, or Third Item in List +(fun {fst l} { eval (head l) }) +(fun {snd l} { eval (head (tail l)) }) +(fun {trd l} { eval (head (tail (tail l))) }) + +; List Length +(fun {len l} { + if (== l {}) + {0} + {+ 1 (len (tail l))} +}) + +; Nth item in List +(fun {nth n l} { + if (== n 0) + {fst l} + {nth (- n 1) (tail l)} +}) + +; Last item in List +(fun {last l} {nth (- (len l) 1) l}) + +; Apply Function to List +(fun {map f l} { + if (== l {}) + {{}} + {join (list (f (fst l))) (map f (tail l))} +}) + +; Apply Filter to List +(fun {filter f l} { + if (== l {}) + {{}} + {join (if (f (fst l)) {head l} {{}}) (filter f (tail l))} +}) + +; Return all of list but last element +(fun {init l} { + if (== (tail l) {}) + {{}} + {join (head l) (init (tail l))} +}) + +; Reverse List +(fun {reverse l} { + if (== l {}) + {{}} + {join (reverse (tail l)) (head l)} +}) + +; Fold Left +(fun {foldl f z l} { + if (== l {}) + {z} + {foldl f (f z (fst l)) (tail l)} +}) + +; Fold Right +(fun {foldr f z l} { + if (== l {}) + {z} + {f (fst l) (foldr f z (tail l))} +}) + +(fun {sum l} {foldl + 0 l}) +(fun {product l} {foldl * 1 l}) + +; Take N items +(fun {take n l} { + if (== n 0) + {{}} + {join (head l) (take (- n 1) (tail l))} +}) + +; Drop N items +(fun {drop n l} { + if (== n 0) + {l} + {drop (- n 1) (tail l)} +}) + +; Split at N +(fun {split n l} {list (take n l) (drop n l)}) + +; Take While +(fun {take-while f l} { + if (not (unpack f (head l))) + {{}} + {join (head l) (take-while f (tail l))} +}) + +; Drop While +(fun {drop-while f l} { + if (not (unpack f (head l))) + {l} + {drop-while f (tail l)} +}) + +; Element of List +(fun {elem x l} { + if (== l {}) + {false} + {if (== x (fst l)) {true} {elem x (tail l)}} +}) + +; Find element in list of pairs +(fun {lookup x l} { + if (== l {}) + {error "No Element Found"} + {do + (= {key} (fst (fst l))) + (= {val} (snd (fst l))) + (if (== key x) {val} {lookup x (tail l)}) + } +}) + +; Zip two lists together into a list of pairs +(fun {zip x y} { + if (or (== x {}) (== y {})) + {{}} + {join (list (join (head x) (head y))) (zip (tail x) (tail y))} +}) + +; Unzip a list of pairs into two lists +(fun {unzip l} { + if (== l {}) + {{{} {}}} + {do + (= {x} (fst l)) + (= {xs} (unzip (tail l))) + (list (join (head x) (fst xs)) (join (tail x) (snd xs))) + } +}) + +;;; Other Fun + +; Fibonacci +(fun {fib n} { + select + { (== n 0) 0 } + { (== n 1) 1 } + { otherwise (+ (fib (- n 1)) (fib (- n 2))) } +}) + diff --git a/mpc.c b/mpc.c index b9e077c..fc6bea7 100644 --- a/mpc.c +++ b/mpc.c @@ -1,4068 +1,4068 @@ -#include "mpc.h" - -/* -** State Type -*/ - -static mpc_state_t mpc_state_invalid(void) { - mpc_state_t s; - s.pos = -1; - s.row = -1; - s.col = -1; - s.term = 0; - return s; -} - -static mpc_state_t mpc_state_new(void) { - mpc_state_t s; - s.pos = 0; - s.row = 0; - s.col = 0; - s.term = 0; - return s; -} - -/* -** Input Type -*/ - -/* -** In mpc the input type has three modes of -** operation: String, File and Pipe. -** -** String is easy. The whole contents are -** loaded into a buffer and scanned through. -** The cursor can jump around at will making -** backtracking easy. -** -** The second is a File which is also somewhat -** easy. The contents are never loaded into -** memory but backtracking can still be achieved -** by seeking in the file at different positions. -** -** The final mode is Pipe. This is the difficult -** one. As we assume pipes cannot be seeked - and -** only support a single character lookahead at -** any point, when the input is marked for a -** potential backtracking we start buffering any -** input. -** -** This means that if we are requested to seek -** back we can simply start reading from the -** buffer instead of the input. -** -** Of course using `mpc_predictive` will disable -** backtracking and make LL(1) grammars easy -** to parse for all input methods. -** -*/ - -enum { - MPC_INPUT_STRING = 0, - MPC_INPUT_FILE = 1, - MPC_INPUT_PIPE = 2 -}; - -enum { - MPC_INPUT_MARKS_MIN = 32 -}; - -enum { - MPC_INPUT_MEM_NUM = 512 -}; - -typedef struct { - char mem[64]; -} mpc_mem_t; - -typedef struct { - - int type; - char *filename; - mpc_state_t state; - - char *string; - char *buffer; - FILE *file; - - int suppress; - int backtrack; - int marks_slots; - int marks_num; - mpc_state_t *marks; - - char *lasts; - char last; - - size_t mem_index; - char mem_full[MPC_INPUT_MEM_NUM]; - mpc_mem_t mem[MPC_INPUT_MEM_NUM]; - -} mpc_input_t; - -static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - i->type = MPC_INPUT_STRING; - - i->state = mpc_state_new(); - - i->string = malloc(strlen(string) + 1); - strcpy(i->string, string); - i->buffer = NULL; - i->file = NULL; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; -} - -static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - i->type = MPC_INPUT_STRING; - - i->state = mpc_state_new(); - - i->string = malloc(length + 1); - strncpy(i->string, string, length); - i->string[length] = '\0'; - i->buffer = NULL; - i->file = NULL; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; - -} - -static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - - i->type = MPC_INPUT_PIPE; - i->state = mpc_state_new(); - - i->string = NULL; - i->buffer = NULL; - i->file = pipe; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; - -} - -static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - i->type = MPC_INPUT_FILE; - i->state = mpc_state_new(); - - i->string = NULL; - i->buffer = NULL; - i->file = file; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; -} - -static void mpc_input_delete(mpc_input_t *i) { - - free(i->filename); - - if (i->type == MPC_INPUT_STRING) { free(i->string); } - if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } - - free(i->marks); - free(i->lasts); - free(i); -} - -static int mpc_mem_ptr(mpc_input_t *i, void *p) { - return - (char*)p >= (char*)(i->mem) && - (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); -} - -static void *mpc_malloc(mpc_input_t *i, size_t n) { - size_t j; - char *p; - - if (n > sizeof(mpc_mem_t)) { return malloc(n); } - - j = i->mem_index; - do { - if (!i->mem_full[i->mem_index]) { - p = (void*)(i->mem + i->mem_index); - i->mem_full[i->mem_index] = 1; - i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; - return p; - } - i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; - } while (j != i->mem_index); - - return malloc(n); -} - -static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { - char *x = mpc_malloc(i, n * m); - memset(x, 0, n * m); - return x; -} - -static void mpc_free(mpc_input_t *i, void *p) { - size_t j; - if (!mpc_mem_ptr(i, p)) { free(p); return; } - j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); - i->mem_full[j] = 0; -} - -static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { - - char *q = NULL; - - if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } - - if (n > sizeof(mpc_mem_t)) { - q = malloc(n); - memcpy(q, p, sizeof(mpc_mem_t)); - mpc_free(i, p); - return q; - } - - return p; -} - -static void *mpc_export(mpc_input_t *i, void *p) { - char *q = NULL; - if (!mpc_mem_ptr(i, p)) { return p; } - q = malloc(sizeof(mpc_mem_t)); - memcpy(q, p, sizeof(mpc_mem_t)); - mpc_free(i, p); - return q; -} - -static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } -static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } - -static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } -static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } - -static void mpc_input_mark(mpc_input_t *i) { - - if (i->backtrack < 1) { return; } - - i->marks_num++; - - if (i->marks_num > i->marks_slots) { - i->marks_slots = i->marks_num + i->marks_num / 2; - i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); - i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); - } - - i->marks[i->marks_num-1] = i->state; - i->lasts[i->marks_num-1] = i->last; - - if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { - i->buffer = calloc(1, 1); - } - -} - -static void mpc_input_unmark(mpc_input_t *i) { - int j; - - if (i->backtrack < 1) { return; } - - i->marks_num--; - - if (i->marks_slots > i->marks_num + i->marks_num / 2 - && i->marks_slots > MPC_INPUT_MARKS_MIN) { - i->marks_slots = - i->marks_num > MPC_INPUT_MARKS_MIN ? - i->marks_num : MPC_INPUT_MARKS_MIN; - i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); - i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); - } - - if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { - for (j = strlen(i->buffer) - 1; j >= 0; j--) - ungetc(i->buffer[j], i->file); - - free(i->buffer); - i->buffer = NULL; - } - -} - -static void mpc_input_rewind(mpc_input_t *i) { - - if (i->backtrack < 1) { return; } - - i->state = i->marks[i->marks_num-1]; - i->last = i->lasts[i->marks_num-1]; - - if (i->type == MPC_INPUT_FILE) { - fseek(i->file, i->state.pos, SEEK_SET); - } - - mpc_input_unmark(i); -} - -static int mpc_input_buffer_in_range(mpc_input_t *i) { - return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); -} - -static char mpc_input_buffer_get(mpc_input_t *i) { - return i->buffer[i->state.pos - i->marks[0].pos]; -} - -static char mpc_input_getc(mpc_input_t *i) { - - char c = '\0'; - - switch (i->type) { - - case MPC_INPUT_STRING: return i->string[i->state.pos]; - case MPC_INPUT_FILE: c = fgetc(i->file); return c; - case MPC_INPUT_PIPE: - - if (!i->buffer) { c = getc(i->file); return c; } - - if (i->buffer && mpc_input_buffer_in_range(i)) { - c = mpc_input_buffer_get(i); - return c; - } else { - c = getc(i->file); - return c; - } - - default: return c; - } -} - -static char mpc_input_peekc(mpc_input_t *i) { - - char c = '\0'; - - switch (i->type) { - case MPC_INPUT_STRING: return i->string[i->state.pos]; - case MPC_INPUT_FILE: - - c = fgetc(i->file); - if (feof(i->file)) { return '\0'; } - - fseek(i->file, -1, SEEK_CUR); - return c; - - case MPC_INPUT_PIPE: - - if (!i->buffer) { - c = getc(i->file); - if (feof(i->file)) { return '\0'; } - ungetc(c, i->file); - return c; - } - - if (i->buffer && mpc_input_buffer_in_range(i)) { - return mpc_input_buffer_get(i); - } else { - c = getc(i->file); - if (feof(i->file)) { return '\0'; } - ungetc(c, i->file); - return c; - } - - default: return c; - } - -} - -static int mpc_input_terminated(mpc_input_t *i) { - return mpc_input_peekc(i) == '\0'; -} - -static int mpc_input_failure(mpc_input_t *i, char c) { - - switch (i->type) { - case MPC_INPUT_STRING: { break; } - case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } - case MPC_INPUT_PIPE: { - - if (!i->buffer) { ungetc(c, i->file); break; } - - if (i->buffer && mpc_input_buffer_in_range(i)) { - break; - } else { - ungetc(c, i->file); - } - } - default: { break; } - } - return 0; -} - -static int mpc_input_success(mpc_input_t *i, char c, char **o) { - - if (i->type == MPC_INPUT_PIPE - && i->buffer && !mpc_input_buffer_in_range(i)) { - i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); - i->buffer[strlen(i->buffer) + 1] = '\0'; - i->buffer[strlen(i->buffer) + 0] = c; - } - - i->last = c; - i->state.pos++; - i->state.col++; - - if (c == '\n') { - i->state.col = 0; - i->state.row++; - } - - if (o) { - (*o) = mpc_malloc(i, 2); - (*o)[0] = c; - (*o)[1] = '\0'; - } - - return 1; -} - -static int mpc_input_any(mpc_input_t *i, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return mpc_input_success(i, x, o); -} - -static int mpc_input_char(mpc_input_t *i, char c, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { - - const char *x = c; - - mpc_input_mark(i); - while (*x) { - if (!mpc_input_char(i, *x, NULL)) { - mpc_input_rewind(i); - return 0; - } - x++; - } - mpc_input_unmark(i); - - *o = mpc_malloc(i, strlen(c) + 1); - strcpy(*o, c); - return 1; -} - -static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { - *o = NULL; - return f(i->last, mpc_input_peekc(i)); -} - -static int mpc_input_soi(mpc_input_t* i, char **o) { - *o = NULL; - return i->last == '\0'; -} - -static int mpc_input_eoi(mpc_input_t* i, char **o) { - *o = NULL; - if (i->state.term) { - return 0; - } else if (mpc_input_terminated(i)) { - i->state.term = 1; - return 1; - } else { - return 0; - } -} - -static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { - mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); - memcpy(r, &i->state, sizeof(mpc_state_t)); - return r; -} - -/* -** Error Type -*/ - -void mpc_err_delete(mpc_err_t *x) { - int i; - for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } - free(x->expected); - free(x->filename); - free(x->failure); - free(x); -} - -void mpc_err_print(mpc_err_t *x) { - mpc_err_print_to(x, stdout); -} - -void mpc_err_print_to(mpc_err_t *x, FILE *f) { - char *str = mpc_err_string(x); - fprintf(f, "%s", str); - free(str); -} - -static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { - /* TODO: Error Checking on Length */ - int left = ((*max) - (*pos)); - va_list va; - va_start(va, fmt); - if (left < 0) { left = 0;} - (*pos) += vsprintf(buffer + (*pos), fmt, va); - va_end(va); -} - -static char char_unescape_buffer[4]; - -static const char *mpc_err_char_unescape(char c) { - - char_unescape_buffer[0] = '\''; - char_unescape_buffer[1] = ' '; - char_unescape_buffer[2] = '\''; - char_unescape_buffer[3] = '\0'; - - switch (c) { - case '\a': return "bell"; - case '\b': return "backspace"; - case '\f': return "formfeed"; - case '\r': return "carriage return"; - case '\v': return "vertical tab"; - case '\0': return "end of input"; - case '\n': return "newline"; - case '\t': return "tab"; - case ' ' : return "space"; - default: - char_unescape_buffer[1] = c; - return char_unescape_buffer; - } - -} - -char *mpc_err_string(mpc_err_t *x) { - - int i; - int pos = 0; - int max = 1023; - char *buffer = calloc(1, 1024); - - if (x->failure) { - mpc_err_string_cat(buffer, &pos, &max, - "%s: error: %s\n", x->filename, x->failure); - return buffer; - } - - mpc_err_string_cat(buffer, &pos, &max, - "%s:%li:%li: error: expected ", x->filename, x->state.row+1, x->state.col+1); - - if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } - if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } - if (x->expected_num >= 2) { - - for (i = 0; i < x->expected_num-2; i++) { - mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); - } - - mpc_err_string_cat(buffer, &pos, &max, "%s or %s", - x->expected[x->expected_num-2], - x->expected[x->expected_num-1]); - } - - mpc_err_string_cat(buffer, &pos, &max, " at "); - mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received)); - mpc_err_string_cat(buffer, &pos, &max, "\n"); - - return realloc(buffer, strlen(buffer) + 1); -} - -static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { - mpc_err_t *x; - if (i->suppress) { return NULL; } - x = mpc_malloc(i, sizeof(mpc_err_t)); - x->filename = mpc_malloc(i, strlen(i->filename) + 1); - strcpy(x->filename, i->filename); - x->state = i->state; - x->expected_num = 1; - x->expected = mpc_malloc(i, sizeof(char*)); - x->expected[0] = mpc_malloc(i, strlen(expected) + 1); - strcpy(x->expected[0], expected); - x->failure = NULL; - x->received = mpc_input_peekc(i); - return x; -} - -static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { - mpc_err_t *x; - if (i->suppress) { return NULL; } - x = mpc_malloc(i, sizeof(mpc_err_t)); - x->filename = mpc_malloc(i, strlen(i->filename) + 1); - strcpy(x->filename, i->filename); - x->state = i->state; - x->expected_num = 0; - x->expected = NULL; - x->failure = mpc_malloc(i, strlen(failure) + 1); - strcpy(x->failure, failure); - x->received = ' '; - return x; -} - -static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { - mpc_err_t *x; - x = malloc(sizeof(mpc_err_t)); - x->filename = malloc(strlen(filename) + 1); - strcpy(x->filename, filename); - x->state = mpc_state_new(); - x->expected_num = 0; - x->expected = NULL; - x->failure = malloc(strlen(failure) + 1); - strcpy(x->failure, failure); - x->received = ' '; - return x; -} - -static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { - int j; - if (x == NULL) { return; } - for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } - mpc_free(i, x->expected); - mpc_free(i, x->filename); - mpc_free(i, x->failure); - mpc_free(i, x); -} - -static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { - int j; - for (j = 0; j < x->expected_num; j++) { - x->expected[j] = mpc_export(i, x->expected[j]); - } - x->expected = mpc_export(i, x->expected); - x->filename = mpc_export(i, x->filename); - x->failure = mpc_export(i, x->failure); - return mpc_export(i, x); -} - -static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { - int j; - (void)i; - for (j = 0; j < x->expected_num; j++) { - if (strcmp(x->expected[j], expected) == 0) { return 1; } - } - return 0; -} - -static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { - (void)i; - x->expected_num++; - x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); - x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); - strcpy(x->expected[x->expected_num-1], expected); -} - -static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { - - int j, k, fst; - mpc_err_t *e; - - fst = -1; - for (j = 0; j < n; j++) { - if (x[j] != NULL) { fst = j; } - } - - if (fst == -1) { return NULL; } - - e = mpc_malloc(i, sizeof(mpc_err_t)); - e->state = mpc_state_invalid(); - e->expected_num = 0; - e->expected = NULL; - e->failure = NULL; - e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); - strcpy(e->filename, x[fst]->filename); - - for (j = 0; j < n; j++) { - if (x[j] == NULL) { continue; } - if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } - } - - for (j = 0; j < n; j++) { - if (x[j] == NULL) { continue; } - if (x[j]->state.pos < e->state.pos) { continue; } - - if (x[j]->failure) { - e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); - strcpy(e->failure, x[j]->failure); - break; - } - - e->received = x[j]->received; - - for (k = 0; k < x[j]->expected_num; k++) { - if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { - mpc_err_add_expected(i, e, x[j]->expected[k]); - } - } - } - - for (j = 0; j < n; j++) { - if (x[j] == NULL) { continue; } - mpc_err_delete_internal(i, x[j]); - } - - return e; -} - -static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { - - int j = 0; - size_t l = 0; - char *expect = NULL; - - if (x == NULL) { return NULL; } - - if (x->expected_num == 0) { - expect = mpc_calloc(i, 1, 1); - x->expected_num = 1; - x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); - x->expected[0] = expect; - return x; - } - - else if (x->expected_num == 1) { - expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); - strcpy(expect, prefix); - strcat(expect, x->expected[0]); - mpc_free(i, x->expected[0]); - x->expected[0] = expect; - return x; - } - - else if (x->expected_num > 1) { - - l += strlen(prefix); - for (j = 0; j < x->expected_num-2; j++) { - l += strlen(x->expected[j]) + strlen(", "); - } - l += strlen(x->expected[x->expected_num-2]); - l += strlen(" or "); - l += strlen(x->expected[x->expected_num-1]); - - expect = mpc_malloc(i, l + 1); - - strcpy(expect, prefix); - for (j = 0; j < x->expected_num-2; j++) { - strcat(expect, x->expected[j]); strcat(expect, ", "); - } - strcat(expect, x->expected[x->expected_num-2]); - strcat(expect, " or "); - strcat(expect, x->expected[x->expected_num-1]); - - for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } - - x->expected_num = 1; - x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); - x->expected[0] = expect; - return x; - } - - return NULL; -} - -static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { - return mpc_err_repeat(i, x, "one or more of "); -} - -static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { - mpc_err_t *y; - int digits = n/10 + 1; - char *prefix; - prefix = mpc_malloc(i, digits + strlen(" of ") + 1); - sprintf(prefix, "%i of ", n); - y = mpc_err_repeat(i, x, prefix); - mpc_free(i, prefix); - return y; -} - -static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { - mpc_err_t *errs[2]; - errs[0] = x; - errs[1] = y; - return mpc_err_or(i, errs, 2); -} - -/* -** Parser Type -*/ - -enum { - MPC_TYPE_UNDEFINED = 0, - MPC_TYPE_PASS = 1, - MPC_TYPE_FAIL = 2, - MPC_TYPE_LIFT = 3, - MPC_TYPE_LIFT_VAL = 4, - MPC_TYPE_EXPECT = 5, - MPC_TYPE_ANCHOR = 6, - MPC_TYPE_STATE = 7, - - MPC_TYPE_ANY = 8, - MPC_TYPE_SINGLE = 9, - MPC_TYPE_ONEOF = 10, - MPC_TYPE_NONEOF = 11, - MPC_TYPE_RANGE = 12, - MPC_TYPE_SATISFY = 13, - MPC_TYPE_STRING = 14, - - MPC_TYPE_APPLY = 15, - MPC_TYPE_APPLY_TO = 16, - MPC_TYPE_PREDICT = 17, - MPC_TYPE_NOT = 18, - MPC_TYPE_MAYBE = 19, - MPC_TYPE_MANY = 20, - MPC_TYPE_MANY1 = 21, - MPC_TYPE_COUNT = 22, - - MPC_TYPE_OR = 23, - MPC_TYPE_AND = 24, - - MPC_TYPE_CHECK = 25, - MPC_TYPE_CHECK_WITH = 26, - - MPC_TYPE_SOI = 27, - MPC_TYPE_EOI = 28 -}; - -typedef struct { char *m; } mpc_pdata_fail_t; -typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; -typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; -typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; -typedef struct { char x; } mpc_pdata_single_t; -typedef struct { char x; char y; } mpc_pdata_range_t; -typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; -typedef struct { char *x; } mpc_pdata_string_t; -typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; -typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; -typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; -typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; -typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; -typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; -typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; -typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; -typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; - -typedef union { - mpc_pdata_fail_t fail; - mpc_pdata_lift_t lift; - mpc_pdata_expect_t expect; - mpc_pdata_anchor_t anchor; - mpc_pdata_single_t single; - mpc_pdata_range_t range; - mpc_pdata_satisfy_t satisfy; - mpc_pdata_string_t string; - mpc_pdata_apply_t apply; - mpc_pdata_apply_to_t apply_to; - mpc_pdata_check_t check; - mpc_pdata_check_with_t check_with; - mpc_pdata_predict_t predict; - mpc_pdata_not_t not; - mpc_pdata_repeat_t repeat; - mpc_pdata_and_t and; - mpc_pdata_or_t or; -} mpc_pdata_t; - -struct mpc_parser_t { - char *name; - mpc_pdata_t data; - char type; - char retained; -}; - -static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { - int j; - for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } - return xs[x]; -} - -static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } -static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } -static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } - -static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { - int j; - size_t l = 0; - if (n == 0) { return mpc_calloc(i, 1, 1); } - for (j = 0; j < n; j++) { l += strlen(xs[j]); } - xs[0] = mpc_realloc(i, xs[0], l + 1); - for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } - return xs[0]; -} - -static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { - mpc_state_t *s = ((mpc_state_t**)xs)[0]; - mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; - a = mpc_ast_state(a, *s); - mpc_free(i, s); - (void) n; - return a; -} - -static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { - int j; - if (f == mpcf_null) { return mpcf_null(n, xs); } - if (f == mpcf_fst) { return mpcf_fst(n, xs); } - if (f == mpcf_snd) { return mpcf_snd(n, xs); } - if (f == mpcf_trd) { return mpcf_trd(n, xs); } - if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } - if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } - if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } - if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } - if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } - for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } - return f(j, xs); -} - -static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { - mpc_free(i, x); - return NULL; -} - -static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { - mpc_ast_t *a = mpc_ast_new("", c); - mpc_free(i, c); - return a; -} - -static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { - if (f == mpcf_free) { return mpcf_input_free(i, x); } - if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } - return f(mpc_export(i, x)); -} - -static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { - return f(mpc_export(i, x), d); -} - -static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { - if (d == free) { mpc_free(i, x); return; } - d(mpc_export(i, x)); -} - -enum { - MPC_PARSE_STACK_MIN = 4 -}; - -#define MPC_SUCCESS(x) r->output = x; return 1 -#define MPC_FAILURE(x) r->error = x; return 0 -#define MPC_PRIMITIVE(x) \ - if (x) { MPC_SUCCESS(r->output); } \ - else { MPC_FAILURE(NULL); } - -#define MPC_MAX_RECURSION_DEPTH 1000 - -static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { - - int j = 0, k = 0; - mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; - mpc_result_t *results; - int results_slots = MPC_PARSE_STACK_MIN; - - if (depth == MPC_MAX_RECURSION_DEPTH) - { - MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); - } - - switch (p->type) { - - /* Basic Parsers */ - - case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); - case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); - case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); - case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); - case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); - case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); - case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); - case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); - case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); - case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); - - /* Other parsers */ - - case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); - case MPC_TYPE_PASS: MPC_SUCCESS(NULL); - case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); - case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); - case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); - case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); - - /* Application Parsers */ - - case MPC_TYPE_APPLY: - if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { - MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); - } else { - MPC_FAILURE(r->output); - } - - case MPC_TYPE_APPLY_TO: - if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { - MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); - } else { - MPC_FAILURE(r->error); - } - - case MPC_TYPE_CHECK: - if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { - if (p->data.check.f(&r->output)) { - MPC_SUCCESS(r->output); - } else { - mpc_parse_dtor(i, p->data.check.dx, r->output); - MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); - } - } else { - MPC_FAILURE(r->error); - } - - case MPC_TYPE_CHECK_WITH: - if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { - if (p->data.check_with.f(&r->output, p->data.check_with.d)) { - MPC_SUCCESS(r->output); - } else { - mpc_parse_dtor(i, p->data.check.dx, r->output); - MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); - } - } else { - MPC_FAILURE(r->error); - } - - case MPC_TYPE_EXPECT: - mpc_input_suppress_enable(i); - if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { - mpc_input_suppress_disable(i); - MPC_SUCCESS(r->output); - } else { - mpc_input_suppress_disable(i); - MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); - } - - case MPC_TYPE_PREDICT: - mpc_input_backtrack_disable(i); - if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { - mpc_input_backtrack_enable(i); - MPC_SUCCESS(r->output); - } else { - mpc_input_backtrack_enable(i); - MPC_FAILURE(r->error); - } - - /* Optional Parsers */ - - /* TODO: Update Not Error Message */ - - case MPC_TYPE_NOT: - mpc_input_mark(i); - mpc_input_suppress_enable(i); - if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { - mpc_input_rewind(i); - mpc_input_suppress_disable(i); - mpc_parse_dtor(i, p->data.not.dx, r->output); - MPC_FAILURE(mpc_err_new(i, "opposite")); - } else { - mpc_input_unmark(i); - mpc_input_suppress_disable(i); - MPC_SUCCESS(p->data.not.lf()); - } - - case MPC_TYPE_MAYBE: - if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { - MPC_SUCCESS(r->output); - } else { - *e = mpc_err_merge(i, *e, r->error); - MPC_SUCCESS(p->data.not.lf()); - } - - /* Repeat Parsers */ - - case MPC_TYPE_MANY: - - results = results_stk; - - while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { - j++; - if (j == MPC_PARSE_STACK_MIN) { - results_slots = j + j / 2; - results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); - memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); - } else if (j >= results_slots) { - results_slots = j + j / 2; - results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); - } - } - - *e = mpc_err_merge(i, *e, results[j].error); - - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - - case MPC_TYPE_MANY1: - - results = results_stk; - - while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { - j++; - if (j == MPC_PARSE_STACK_MIN) { - results_slots = j + j / 2; - results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); - memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); - } else if (j >= results_slots) { - results_slots = j + j / 2; - results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); - } - } - - if (j == 0) { - MPC_FAILURE( - mpc_err_many1(i, results[j].error); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - - *e = mpc_err_merge(i, *e, results[j].error); - - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - - case MPC_TYPE_COUNT: - - results = p->data.repeat.n > MPC_PARSE_STACK_MIN - ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) - : results_stk; - - while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { - j++; - if (j == p->data.repeat.n) { break; } - } - - if (j == p->data.repeat.n) { - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - for (k = 0; k < j; k++) { - mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); - } - MPC_FAILURE( - mpc_err_count(i, results[j].error, p->data.repeat.n); - if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - - /* Combinatory Parsers */ - - case MPC_TYPE_OR: - - if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } - - results = p->data.or.n > MPC_PARSE_STACK_MIN - ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) - : results_stk; - - for (j = 0; j < p->data.or.n; j++) { - if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { - MPC_SUCCESS(results[j].output; - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - *e = mpc_err_merge(i, *e, results[j].error); - } - } - - MPC_FAILURE(NULL; - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - - case MPC_TYPE_AND: - - if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } - - results = p->data.or.n > MPC_PARSE_STACK_MIN - ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) - : results_stk; - - mpc_input_mark(i); - for (j = 0; j < p->data.and.n; j++) { - if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { - mpc_input_rewind(i); - for (k = 0; k < j; k++) { - mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); - } - MPC_FAILURE(results[j].error; - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - } - mpc_input_unmark(i); - MPC_SUCCESS( - mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - - /* End */ - - default: - - MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); - } - - return 0; - -} - -#undef MPC_SUCCESS -#undef MPC_FAILURE -#undef MPC_PRIMITIVE - -int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); - e->state = mpc_state_invalid(); - x = mpc_parse_run(i, p, r, &e, 0); - if (x) { - mpc_err_delete_internal(i, e); - r->output = mpc_export(i, r->output); - } else { - r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); - } - return x; -} - -int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_string(filename, string); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_nstring(filename, string, length); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_file(filename, file); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_pipe(filename, pipe); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { - - FILE *f = fopen(filename, "rb"); - int res; - - if (f == NULL) { - r->output = NULL; - r->error = mpc_err_file(filename, "Unable to open file!"); - return 0; - } - - res = mpc_parse_file(filename, f, p, r); - fclose(f); - return res; -} - -/* -** Building a Parser -*/ - -static void mpc_undefine_unretained(mpc_parser_t *p, int force); - -static void mpc_undefine_or(mpc_parser_t *p) { - - int i; - for (i = 0; i < p->data.or.n; i++) { - mpc_undefine_unretained(p->data.or.xs[i], 0); - } - free(p->data.or.xs); - -} - -static void mpc_undefine_and(mpc_parser_t *p) { - - int i; - for (i = 0; i < p->data.and.n; i++) { - mpc_undefine_unretained(p->data.and.xs[i], 0); - } - free(p->data.and.xs); - free(p->data.and.dxs); - -} - -static void mpc_undefine_unretained(mpc_parser_t *p, int force) { - - if (p->retained && !force) { return; } - - switch (p->type) { - - case MPC_TYPE_FAIL: free(p->data.fail.m); break; - - case MPC_TYPE_ONEOF: - case MPC_TYPE_NONEOF: - case MPC_TYPE_STRING: - free(p->data.string.x); - break; - - case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; - case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; - case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; - - case MPC_TYPE_MAYBE: - case MPC_TYPE_NOT: - mpc_undefine_unretained(p->data.not.x, 0); - break; - - case MPC_TYPE_EXPECT: - mpc_undefine_unretained(p->data.expect.x, 0); - free(p->data.expect.m); - break; - - case MPC_TYPE_MANY: - case MPC_TYPE_MANY1: - case MPC_TYPE_COUNT: - mpc_undefine_unretained(p->data.repeat.x, 0); - break; - - case MPC_TYPE_OR: mpc_undefine_or(p); break; - case MPC_TYPE_AND: mpc_undefine_and(p); break; - - case MPC_TYPE_CHECK: - mpc_undefine_unretained(p->data.check.x, 0); - free(p->data.check.e); - break; - - case MPC_TYPE_CHECK_WITH: - mpc_undefine_unretained(p->data.check_with.x, 0); - free(p->data.check_with.e); - break; - - default: break; - } - - if (!force) { - free(p->name); - free(p); - } - -} - -void mpc_delete(mpc_parser_t *p) { - if (p->retained) { - - if (p->type != MPC_TYPE_UNDEFINED) { - mpc_undefine_unretained(p, 0); - } - - free(p->name); - free(p); - - } else { - mpc_undefine_unretained(p, 0); - } -} - -static void mpc_soft_delete(mpc_val_t *x) { - mpc_undefine_unretained(x, 0); -} - -static mpc_parser_t *mpc_undefined(void) { - mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); - p->retained = 0; - p->type = MPC_TYPE_UNDEFINED; - p->name = NULL; - return p; -} - -mpc_parser_t *mpc_new(const char *name) { - mpc_parser_t *p = mpc_undefined(); - p->retained = 1; - p->name = realloc(p->name, strlen(name) + 1); - strcpy(p->name, name); - return p; -} - -mpc_parser_t *mpc_copy(mpc_parser_t *a) { - int i = 0; - mpc_parser_t *p; - - if (a->retained) { return a; } - - p = mpc_undefined(); - p->retained = a->retained; - p->type = a->type; - p->data = a->data; - - if (a->name) { - p->name = malloc(strlen(a->name)+1); - strcpy(p->name, a->name); - } - - switch (a->type) { - - case MPC_TYPE_FAIL: - p->data.fail.m = malloc(strlen(a->data.fail.m)+1); - strcpy(p->data.fail.m, a->data.fail.m); - break; - - case MPC_TYPE_ONEOF: - case MPC_TYPE_NONEOF: - case MPC_TYPE_STRING: - p->data.string.x = malloc(strlen(a->data.string.x)+1); - strcpy(p->data.string.x, a->data.string.x); - break; - - case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; - case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; - case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; - - case MPC_TYPE_MAYBE: - case MPC_TYPE_NOT: - p->data.not.x = mpc_copy(a->data.not.x); - break; - - case MPC_TYPE_EXPECT: - p->data.expect.x = mpc_copy(a->data.expect.x); - p->data.expect.m = malloc(strlen(a->data.expect.m)+1); - strcpy(p->data.expect.m, a->data.expect.m); - break; - - case MPC_TYPE_MANY: - case MPC_TYPE_MANY1: - case MPC_TYPE_COUNT: - p->data.repeat.x = mpc_copy(a->data.repeat.x); - break; - - case MPC_TYPE_OR: - p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); - for (i = 0; i < a->data.or.n; i++) { - p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); - } - break; - case MPC_TYPE_AND: - p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); - for (i = 0; i < a->data.and.n; i++) { - p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); - } - p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); - for (i = 0; i < a->data.and.n-1; i++) { - p->data.and.dxs[i] = a->data.and.dxs[i]; - } - break; - - case MPC_TYPE_CHECK: - p->data.check.x = mpc_copy(a->data.check.x); - p->data.check.e = malloc(strlen(a->data.check.e)+1); - strcpy(p->data.check.e, a->data.check.e); - break; - case MPC_TYPE_CHECK_WITH: - p->data.check_with.x = mpc_copy(a->data.check_with.x); - p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); - strcpy(p->data.check_with.e, a->data.check_with.e); - break; - - default: break; - } - - - return p; -} - -mpc_parser_t *mpc_undefine(mpc_parser_t *p) { - mpc_undefine_unretained(p, 1); - p->type = MPC_TYPE_UNDEFINED; - return p; -} - -mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { - - if (p->retained) { - p->type = a->type; - p->data = a->data; - } else { - mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); - p->type = a2->type; - p->data = a2->data; - free(a2); - } - - free(a); - return p; -} - -void mpc_cleanup(int n, ...) { - int i; - mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); - - va_list va; - va_start(va, n); - for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } - for (i = 0; i < n; i++) { mpc_undefine(list[i]); } - for (i = 0; i < n; i++) { mpc_delete(list[i]); } - va_end(va); - - free(list); -} - -mpc_parser_t *mpc_pass(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_PASS; - return p; -} - -mpc_parser_t *mpc_fail(const char *m) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_FAIL; - p->data.fail.m = malloc(strlen(m) + 1); - strcpy(p->data.fail.m, m); - return p; -} - -/* -** As `snprintf` is not ANSI standard this -** function `mpc_failf` should be considered -** unsafe. -** -** You have a few options if this is going to be -** trouble. -** -** - Ensure the format string does not exceed -** the buffer length using precision specifiers -** such as `%.512s`. -** -** - Patch this function in your code base to -** use `snprintf` or whatever variant your -** system supports. -** -** - Avoid it altogether. -** -*/ - -mpc_parser_t *mpc_failf(const char *fmt, ...) { - - va_list va; - char *buffer; - - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_FAIL; - - va_start(va, fmt); - buffer = malloc(2048); - vsprintf(buffer, fmt, va); - va_end(va); - - buffer = realloc(buffer, strlen(buffer) + 1); - p->data.fail.m = buffer; - return p; - -} - -mpc_parser_t *mpc_lift_val(mpc_val_t *x) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_LIFT_VAL; - p->data.lift.x = x; - return p; -} - -mpc_parser_t *mpc_lift(mpc_ctor_t lf) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_LIFT; - p->data.lift.lf = lf; - return p; -} - -mpc_parser_t *mpc_anchor(int(*f)(char,char)) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_ANCHOR; - p->data.anchor.f = f; - return mpc_expect(p, "anchor"); -} - -mpc_parser_t *mpc_state(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_STATE; - return p; -} - -mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_EXPECT; - p->data.expect.x = a; - p->data.expect.m = malloc(strlen(expected) + 1); - strcpy(p->data.expect.m, expected); - return p; -} - -/* -** As `snprintf` is not ANSI standard this -** function `mpc_expectf` should be considered -** unsafe. -** -** You have a few options if this is going to be -** trouble. -** -** - Ensure the format string does not exceed -** the buffer length using precision specifiers -** such as `%.512s`. -** -** - Patch this function in your code base to -** use `snprintf` or whatever variant your -** system supports. -** -** - Avoid it altogether. -** -*/ - -mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { - va_list va; - char *buffer; - - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_EXPECT; - - va_start(va, fmt); - buffer = malloc(2048); - vsprintf(buffer, fmt, va); - va_end(va); - - buffer = realloc(buffer, strlen(buffer) + 1); - p->data.expect.x = a; - p->data.expect.m = buffer; - return p; -} - -/* -** Basic Parsers -*/ - -mpc_parser_t *mpc_any(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_ANY; - return mpc_expect(p, "any character"); -} - -mpc_parser_t *mpc_char(char c) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SINGLE; - p->data.single.x = c; - return mpc_expectf(p, "'%c'", c); -} - -mpc_parser_t *mpc_range(char s, char e) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_RANGE; - p->data.range.x = s; - p->data.range.y = e; - return mpc_expectf(p, "character between '%c' and '%c'", s, e); -} - -mpc_parser_t *mpc_oneof(const char *s) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_ONEOF; - p->data.string.x = malloc(strlen(s) + 1); - strcpy(p->data.string.x, s); - return mpc_expectf(p, "one of '%s'", s); -} - -mpc_parser_t *mpc_noneof(const char *s) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_NONEOF; - p->data.string.x = malloc(strlen(s) + 1); - strcpy(p->data.string.x, s); - return mpc_expectf(p, "none of '%s'", s); - -} - -mpc_parser_t *mpc_satisfy(int(*f)(char)) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SATISFY; - p->data.satisfy.f = f; - return mpc_expectf(p, "character satisfying function %p", f); -} - -mpc_parser_t *mpc_string(const char *s) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_STRING; - p->data.string.x = malloc(strlen(s) + 1); - strcpy(p->data.string.x, s); - return mpc_expectf(p, "\"%s\"", s); -} - -/* -** Core Parsers -*/ - -mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_APPLY; - p->data.apply.x = a; - p->data.apply.f = f; - return p; -} - -mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_APPLY_TO; - p->data.apply_to.x = a; - p->data.apply_to.f = f; - p->data.apply_to.d = x; - return p; -} - -mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_CHECK; - p->data.check.x = a; - p->data.check.dx = da; - p->data.check.f = f; - p->data.check.e = malloc(strlen(e) + 1); - strcpy(p->data.check.e, e); - return p; -} - -mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_CHECK_WITH; - p->data.check_with.x = a; - p->data.check_with.dx = da; - p->data.check_with.f = f; - p->data.check_with.d = x; - p->data.check_with.e = malloc(strlen(e) + 1); - strcpy(p->data.check_with.e, e); - return p; -} - -mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { - va_list va; - char *buffer; - mpc_parser_t *p; - - va_start(va, fmt); - buffer = malloc(2048); - vsprintf(buffer, fmt, va); - va_end(va); - - p = mpc_check(a, da, f, buffer); - free(buffer); - - return p; -} - -mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { - va_list va; - char *buffer; - mpc_parser_t *p; - - va_start(va, fmt); - buffer = malloc(2048); - vsprintf(buffer, fmt, va); - va_end(va); - - p = mpc_check_with(a, da, f, x, buffer); - free(buffer); - - return p; -} - -mpc_parser_t *mpc_predictive(mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_PREDICT; - p->data.predict.x = a; - return p; -} - -mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_NOT; - p->data.not.x = a; - p->data.not.dx = da; - p->data.not.lf = lf; - return p; -} - -mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { - return mpc_not_lift(a, da, mpcf_ctor_null); -} - -mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_MAYBE; - p->data.not.x = a; - p->data.not.lf = lf; - return p; -} - -mpc_parser_t *mpc_maybe(mpc_parser_t *a) { - return mpc_maybe_lift(a, mpcf_ctor_null); -} - -mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_MANY; - p->data.repeat.x = a; - p->data.repeat.f = f; - return p; -} - -mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_MANY1; - p->data.repeat.x = a; - p->data.repeat.f = f; - return p; -} - -mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_COUNT; - p->data.repeat.n = n; - p->data.repeat.f = f; - p->data.repeat.x = a; - p->data.repeat.dx = da; - return p; -} - -mpc_parser_t *mpc_or(int n, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_OR; - p->data.or.n = n; - p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - - va_start(va, n); - for (i = 0; i < n; i++) { - p->data.or.xs[i] = va_arg(va, mpc_parser_t*); - } - va_end(va); - - return p; -} - -mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_AND; - p->data.and.n = n; - p->data.and.f = f; - p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); - p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - - va_start(va, f); - for (i = 0; i < n; i++) { - p->data.and.xs[i] = va_arg(va, mpc_parser_t*); - } - for (i = 0; i < (n-1); i++) { - p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); - } - va_end(va); - - return p; -} - -/* -** Common Parsers -*/ - -mpc_parser_t *mpc_soi(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SOI; - return mpc_expect(p, "start of input"); -} - -mpc_parser_t *mpc_eoi(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_EOI; - return mpc_expect(p, "end of input"); -} - -static int mpc_boundary_anchor(char prev, char next) { - const char* word = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "0123456789_"; - if ( strchr(word, next) && prev == '\0') { return 1; } - if ( strchr(word, prev) && next == '\0') { return 1; } - if ( strchr(word, next) && !strchr(word, prev)) { return 1; } - if (!strchr(word, next) && strchr(word, prev)) { return 1; } - return 0; -} - -static int mpc_boundary_newline_anchor(char prev, char next) { - (void)next; - return prev == '\n'; -} - -mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } -mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } - -mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } -mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } -mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } - -mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } -mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } -mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } - -mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } -mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } -mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } -mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } -mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } -mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } - -mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } -mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } -mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } -mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } -mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } - -mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } -mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } -mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } -mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } - -mpc_parser_t *mpc_real(void) { - - /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ - - mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; - - p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); - p1 = mpc_digits(); - p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); - p30 = mpc_oneof("eE"); - p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); - p32 = mpc_digits(); - p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); - - return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); - -} - -mpc_parser_t *mpc_float(void) { - return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); -} - -mpc_parser_t *mpc_char_lit(void) { - return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); -} - -mpc_parser_t *mpc_string_lit(void) { - mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); - return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); -} - -mpc_parser_t *mpc_regex_lit(void) { - mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); - return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); -} - -mpc_parser_t *mpc_ident(void) { - mpc_parser_t *p0, *p1; - p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); - p1 = mpc_many(mpcf_strfold, mpc_alphanum()); - return mpc_and(2, mpcf_strfold, p0, p1, free); -} - -/* -** Useful Parsers -*/ - -mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } -mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } -mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } - -mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } -mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } -mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } -mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } -mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } - -mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } - -mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { - return mpc_and(3, mpcf_snd_free, - mpc_string(o), a, mpc_string(c), - free, ad); -} - -mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } -mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } -mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } -mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } - -mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { - return mpc_and(3, mpcf_snd_free, - mpc_sym(o), mpc_tok(a), mpc_sym(c), - free, ad); -} - -mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } -mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } -mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } -mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } - -/* -** Regular Expression Parsers -*/ - -/* -** So here is a cute bootstrapping. -** -** I'm using the previously defined -** mpc constructs and functions to -** parse the user regex string and -** construct a parser from it. -** -** As it turns out lots of the standard -** mpc functions look a lot like `fold` -** functions and so can be used indirectly -** by many of the parsing functions to build -** a parser directly - as we are parsing. -** -** This is certainly something that -** would be less elegant/interesting -** in a two-phase parser which first -** builds an AST and then traverses it -** to generate the object. -** -** This whole thing acts as a great -** case study for how trivial it can be -** to write a great parser in a few -** lines of code using mpc. -*/ - -/* -** -** ### Regular Expression Grammar -** -** : | ( "|" ) -** -** : * -** -** : -** | "*" -** | "+" -** | "?" -** | "{" "}" -** -** : -** | "\" -** | "(" ")" -** | "[" "]" -*/ - -static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { - (void) n; - if (xs[1] == NULL) { return xs[0]; } - else { return mpc_or(2, xs[0], xs[1]); } -} - -static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { - int i; - mpc_parser_t *p = mpc_lift(mpcf_ctor_str); - for (i = 0; i < n; i++) { - p = mpc_and(2, mpcf_strfold, p, xs[i], free); - } - return p; -} - -static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { - int num; - (void) n; - if (xs[1] == NULL) { return xs[0]; } - switch(((char*)xs[1])[0]) - { - case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; - case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; - case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; - default: - num = *(int*)xs[1]; - free(xs[1]); - } - - return mpc_count(num, mpcf_strfold, xs[0], free); -} - -static mpc_parser_t *mpc_re_escape_char(char c) { - switch (c) { - case 'a': return mpc_char('\a'); - case 'f': return mpc_char('\f'); - case 'n': return mpc_char('\n'); - case 'r': return mpc_char('\r'); - case 't': return mpc_char('\t'); - case 'v': return mpc_char('\v'); - case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); - case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); - case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); - case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); - case 'd': return mpc_digit(); - case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); - case 's': return mpc_whitespace(); - case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); - case 'w': return mpc_alphanum(); - case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); - default: return NULL; - } -} - -static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { - - int mode = *((int*)data); - char *s = x; - mpc_parser_t *p; - - /* Any Character */ - if (s[0] == '.') { - free(s); - if (mode & MPC_RE_DOTALL) { - return mpc_any(); - } else { - return mpc_expect(mpc_noneof("\n"), "any character except a newline"); - } - } - - /* Start of Input */ - if (s[0] == '^') { - free(s); - if (mode & MPC_RE_MULTILINE) { - return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); - } else { - return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); - } - } - - /* End of Input */ - if (s[0] == '$') { - free(s); - if (mode & MPC_RE_MULTILINE) { - return mpc_or(2, - mpc_newline(), - mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); - } else { - return mpc_or(2, - mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), - mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); - } - } - - /* Regex Escape */ - if (s[0] == '\\') { - p = mpc_re_escape_char(s[1]); - p = (p == NULL) ? mpc_char(s[1]) : p; - free(s); - return p; - } - - /* Regex Standard */ - p = mpc_char(s[0]); - free(s); - return p; -} - -static const char *mpc_re_range_escape_char(char c) { - switch (c) { - case '-': return "-"; - case 'a': return "\a"; - case 'f': return "\f"; - case 'n': return "\n"; - case 'r': return "\r"; - case 't': return "\t"; - case 'v': return "\v"; - case 'b': return "\b"; - case 'd': return "0123456789"; - case 's': return " \f\n\r\t\v"; - case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; - default: return NULL; - } -} - -static mpc_val_t *mpcf_re_range(mpc_val_t *x) { - - mpc_parser_t *out; - size_t i, j; - size_t start, end; - const char *tmp = NULL; - const char *s = x; - int comp = s[0] == '^' ? 1 : 0; - char *range = calloc(1,1); - - if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } - if (s[0] == '^' && - s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } - - for (i = comp; i < strlen(s); i++){ - - /* Regex Range Escape */ - if (s[i] == '\\') { - tmp = mpc_re_range_escape_char(s[i+1]); - if (tmp != NULL) { - range = realloc(range, strlen(range) + strlen(tmp) + 1); - strcat(range, tmp); - } else { - range = realloc(range, strlen(range) + 1 + 1); - range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = s[i+1]; - } - i++; - } - - /* Regex Range...Range */ - else if (s[i] == '-') { - if (s[i+1] == '\0' || i == 0) { - range = realloc(range, strlen(range) + strlen("-") + 1); - strcat(range, "-"); - } else { - start = s[i-1]+1; - end = s[i+1]-1; - for (j = start; j <= end; j++) { - range = realloc(range, strlen(range) + 1 + 1 + 1); - range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = (char)j; - } - } - } - - /* Regex Range Normal */ - else { - range = realloc(range, strlen(range) + 1 + 1); - range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = s[i]; - } - - } - - out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); - - free(x); - free(range); - - return out; -} - -mpc_parser_t *mpc_re(const char *re) { - return mpc_re_mode(re, MPC_RE_DEFAULT); -} - -mpc_parser_t *mpc_re_mode(const char *re, int mode) { - - char *err_msg; - mpc_parser_t *err_out; - mpc_result_t r; - mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; - - Regex = mpc_new("regex"); - Term = mpc_new("term"); - Factor = mpc_new("factor"); - Base = mpc_new("base"); - Range = mpc_new("range"); - - mpc_define(Regex, mpc_and(2, mpcf_re_or, - Term, - mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), - (mpc_dtor_t)mpc_delete - )); - - mpc_define(Term, mpc_many(mpcf_re_and, Factor)); - - mpc_define(Factor, mpc_and(2, mpcf_re_repeat, - Base, - mpc_or(5, - mpc_char('*'), mpc_char('+'), mpc_char('?'), - mpc_brackets(mpc_int(), free), - mpc_pass()), - (mpc_dtor_t)mpc_delete - )); - - mpc_define(Base, mpc_or(4, - mpc_parens(Regex, (mpc_dtor_t)mpc_delete), - mpc_squares(Range, (mpc_dtor_t)mpc_delete), - mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), - mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) - )); - - mpc_define(Range, mpc_apply( - mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), - mpcf_re_range - )); - - RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); - - mpc_optimise(RegexEnclose); - mpc_optimise(Regex); - mpc_optimise(Term); - mpc_optimise(Factor); - mpc_optimise(Base); - mpc_optimise(Range); - - if(!mpc_parse("", re, RegexEnclose, &r)) { - err_msg = mpc_err_string(r.error); - err_out = mpc_failf("Invalid Regex: %s", err_msg); - mpc_err_delete(r.error); - free(err_msg); - r.output = err_out; - } - - mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); - - mpc_optimise(r.output); - - return r.output; - -} - -/* -** Common Fold Functions -*/ - -void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } - -mpc_val_t *mpcf_ctor_null(void) { return NULL; } -mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } -mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } - -mpc_val_t *mpcf_int(mpc_val_t *x) { - int *y = malloc(sizeof(int)); - *y = strtol(x, NULL, 10); - free(x); - return y; -} - -mpc_val_t *mpcf_hex(mpc_val_t *x) { - int *y = malloc(sizeof(int)); - *y = strtol(x, NULL, 16); - free(x); - return y; -} - -mpc_val_t *mpcf_oct(mpc_val_t *x) { - int *y = malloc(sizeof(int)); - *y = strtol(x, NULL, 8); - free(x); - return y; -} - -mpc_val_t *mpcf_float(mpc_val_t *x) { - float *y = malloc(sizeof(float)); - *y = strtod(x, NULL); - free(x); - return y; -} - -mpc_val_t *mpcf_strtriml(mpc_val_t *x) { - char *s = x; - while (isspace((unsigned char)*s)) { - memmove(s, s+1, strlen(s)); - } - return s; -} - -mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { - char *s = x; - size_t l = strlen(s); - while (l > 0 && isspace((unsigned char)s[l-1])) { - s[l-1] = '\0'; l--; - } - return s; -} - -mpc_val_t *mpcf_strtrim(mpc_val_t *x) { - return mpcf_strtriml(mpcf_strtrimr(x)); -} - -static const char mpc_escape_input_c[] = { - '\a', '\b', '\f', '\n', '\r', - '\t', '\v', '\\', '\'', '\"', '\0'}; - -static const char *mpc_escape_output_c[] = { - "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", - "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; - -static const char mpc_escape_input_raw_re[] = { '/' }; -static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; - -static const char mpc_escape_input_raw_cstr[] = { '"' }; -static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; - -static const char mpc_escape_input_raw_cchar[] = { '\'' }; -static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; - -static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { - - int i; - int found; - char buff[2]; - char *s = x; - char *y = calloc(1, 1); - - while (*s) { - - i = 0; - found = 0; - - while (output[i]) { - if (*s == input[i]) { - y = realloc(y, strlen(y) + strlen(output[i]) + 1); - strcat(y, output[i]); - found = 1; - break; - } - i++; - } - - if (!found) { - y = realloc(y, strlen(y) + 2); - buff[0] = *s; buff[1] = '\0'; - strcat(y, buff); - } - - s++; - } - - - return y; -} - -static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { - - int i; - int found = 0; - char buff[2]; - char *s = x; - char *y = calloc(1, 1); - - while (*s) { - - i = 0; - found = 0; - - while (output[i]) { - if ((*(s+0)) == output[i][0] && - (*(s+1)) == output[i][1]) { - y = realloc(y, strlen(y) + 1 + 1); - buff[0] = input[i]; buff[1] = '\0'; - strcat(y, buff); - found = 1; - s++; - break; - } - i++; - } - - if (!found) { - y = realloc(y, strlen(y) + 1 + 1); - buff[0] = *s; buff[1] = '\0'; - strcat(y, buff); - } - - if (*s == '\0') { break; } - else { s++; } - } - - return y; - -} - -mpc_val_t *mpcf_escape(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); - free(x); - return y; -} - -mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); - free(x); - return y; -} - -mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); - free(x); - return y; -} - -mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); - free(x); - return y; -} - -mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } -mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } -mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } -mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } - -static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { - int i; - for (i = 0; i < n; i++) { - if (i != x) { free(xs[i]); } - } - return xs[x]; -} - -mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } -mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } -mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } - -mpc_val_t *mpcf_freefold(int n, mpc_val_t **xs) { - int i; - for (i = 0; i < n; i++) { - free(xs[i]); - } - return NULL; -} - -mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { - int i; - size_t l = 0; - - if (n == 0) { return calloc(1, 1); } - - for (i = 0; i < n; i++) { l += strlen(xs[i]); } - - xs[0] = realloc(xs[0], l + 1); - - for (i = 1; i < n; i++) { - strcat(xs[0], xs[i]); free(xs[i]); - } - - return xs[0]; -} - -mpc_val_t *mpcf_maths(int n, mpc_val_t **xs) { - int **vs = (int**)xs; - (void) n; - - switch(((char*)xs[1])[0]) - { - case '*': { *vs[0] *= *vs[2]; }; break; - case '/': { *vs[0] /= *vs[2]; }; break; - case '%': { *vs[0] %= *vs[2]; }; break; - case '+': { *vs[0] += *vs[2]; }; break; - case '-': { *vs[0] -= *vs[2]; }; break; - default: break; - } - - free(xs[1]); free(xs[2]); - - return xs[0]; -} - -/* -** Printing -*/ - -static void mpc_print_unretained(mpc_parser_t *p, int force) { - - /* TODO: Print Everything Escaped */ - - int i; - char *s, *e; - char buff[2]; - - if (p->retained && !force) {; - if (p->name) { printf("<%s>", p->name); } - else { printf(""); } - return; - } - - if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } - if (p->type == MPC_TYPE_PASS) { printf("<:>"); } - if (p->type == MPC_TYPE_FAIL) { printf(""); } - if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } - if (p->type == MPC_TYPE_STATE) { printf(""); } - if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } - if (p->type == MPC_TYPE_EXPECT) { - printf("%s", p->data.expect.m); - /*mpc_print_unretained(p->data.expect.x, 0);*/ - } - - if (p->type == MPC_TYPE_ANY) { printf("<.>"); } - if (p->type == MPC_TYPE_SATISFY) { printf(""); } - - if (p->type == MPC_TYPE_SINGLE) { - buff[0] = p->data.single.x; buff[1] = '\0'; - s = mpcf_escape_new( - buff, - mpc_escape_input_c, - mpc_escape_output_c); - printf("'%s'", s); - free(s); - } - - if (p->type == MPC_TYPE_RANGE) { - buff[0] = p->data.range.x; buff[1] = '\0'; - s = mpcf_escape_new( - buff, - mpc_escape_input_c, - mpc_escape_output_c); - buff[0] = p->data.range.y; buff[1] = '\0'; - e = mpcf_escape_new( - buff, - mpc_escape_input_c, - mpc_escape_output_c); - printf("[%s-%s]", s, e); - free(s); - free(e); - } - - if (p->type == MPC_TYPE_ONEOF) { - s = mpcf_escape_new( - p->data.string.x, - mpc_escape_input_c, - mpc_escape_output_c); - printf("[%s]", s); - free(s); - } - - if (p->type == MPC_TYPE_NONEOF) { - s = mpcf_escape_new( - p->data.string.x, - mpc_escape_input_c, - mpc_escape_output_c); - printf("[^%s]", s); - free(s); - } - - if (p->type == MPC_TYPE_STRING) { - s = mpcf_escape_new( - p->data.string.x, - mpc_escape_input_c, - mpc_escape_output_c); - printf("\"%s\"", s); - free(s); - } - - if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } - if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } - if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } - - if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } - if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } - - if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } - if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } - if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } - - if (p->type == MPC_TYPE_OR) { - printf("("); - for(i = 0; i < p->data.or.n-1; i++) { - mpc_print_unretained(p->data.or.xs[i], 0); - printf(" | "); - } - mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); - printf(")"); - } - - if (p->type == MPC_TYPE_AND) { - printf("("); - for(i = 0; i < p->data.and.n-1; i++) { - mpc_print_unretained(p->data.and.xs[i], 0); - printf(" "); - } - mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); - printf(")"); - } - - if (p->type == MPC_TYPE_CHECK) { - mpc_print_unretained(p->data.check.x, 0); - printf("->?"); - } - if (p->type == MPC_TYPE_CHECK_WITH) { - mpc_print_unretained(p->data.check_with.x, 0); - printf("->?"); - } - -} - -void mpc_print(mpc_parser_t *p) { - mpc_print_unretained(p, 1); - printf("\n"); -} - -/* -** Testing -*/ - -/* -** These functions are slightly unwieldy and -** also the whole of the testing suite for mpc -** mpc is pretty shaky. -** -** It could do with a lot more tests and more -** precision. Currently I am only really testing -** changes off of the examples. -** -*/ - -int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)) { - mpc_result_t r; - (void) printer; - if (mpc_parse("", s, p, &r)) { - - if (tester(r.output, d)) { - destructor(r.output); - return 0; - } else { - destructor(r.output); - return 1; - } - - } else { - mpc_err_delete(r.error); - return 1; - } - -} - -int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)) { - - mpc_result_t r; - if (mpc_parse("", s, p, &r)) { - - if (tester(r.output, d)) { - destructor(r.output); - return 1; - } else { - printf("Got "); printer(r.output); printf("\n"); - printf("Expected "); printer(d); printf("\n"); - destructor(r.output); - return 0; - } - - } else { - mpc_err_print(r.error); - mpc_err_delete(r.error); - return 0; - - } - -} - - -/* -** AST -*/ - -void mpc_ast_delete(mpc_ast_t *a) { - - int i; - - if (a == NULL) { return; } - - for (i = 0; i < a->children_num; i++) { - mpc_ast_delete(a->children[i]); - } - - free(a->children); - free(a->tag); - free(a->contents); - free(a); - -} - -static void mpc_ast_delete_no_children(mpc_ast_t *a) { - free(a->children); - free(a->tag); - free(a->contents); - free(a); -} - -mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { - - mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); - - a->tag = malloc(strlen(tag) + 1); - strcpy(a->tag, tag); - - a->contents = malloc(strlen(contents) + 1); - strcpy(a->contents, contents); - - a->state = mpc_state_new(); - - a->children_num = 0; - a->children = NULL; - return a; - -} - -mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { - - mpc_ast_t *a = mpc_ast_new(tag, ""); - - int i; - va_list va; - va_start(va, tag); - - for (i = 0; i < n; i++) { - mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); - } - - va_end(va); - - return a; - -} - -mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { - - mpc_ast_t *r; - - if (a == NULL) { return a; } - if (a->children_num == 0) { return a; } - if (a->children_num == 1) { return a; } - - r = mpc_ast_new(">", ""); - mpc_ast_add_child(r, a); - return r; -} - -int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { - - int i; - - if (strcmp(a->tag, b->tag) != 0) { return 0; } - if (strcmp(a->contents, b->contents) != 0) { return 0; } - if (a->children_num != b->children_num) { return 0; } - - for (i = 0; i < a->children_num; i++) { - if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } - } - - return 1; -} - -mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { - r->children_num++; - r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); - r->children[r->children_num-1] = a; - return r; -} - -mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { - if (a == NULL) { return a; } - a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); - memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); - memmove(a->tag, t, strlen(t)); - memmove(a->tag + strlen(t), "|", 1); - return a; -} - -mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { - if (a == NULL) { return a; } - a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); - memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); - memmove(a->tag, t, (strlen(t)-1)); - return a; -} - -mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { - a->tag = realloc(a->tag, strlen(t) + 1); - strcpy(a->tag, t); - return a; -} - -mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { - if (a == NULL) { return a; } - a->state = s; - return a; -} - -static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { - - int i; - - if (a == NULL) { - fprintf(fp, "NULL\n"); - return; - } - - for (i = 0; i < d; i++) { fprintf(fp, " "); } - - if (strlen(a->contents)) { - fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, - (long unsigned int)(a->state.row+1), - (long unsigned int)(a->state.col+1), - a->contents); - } else { - fprintf(fp, "%s \n", a->tag); - } - - for (i = 0; i < a->children_num; i++) { - mpc_ast_print_depth(a->children[i], d+1, fp); - } - -} - -void mpc_ast_print(mpc_ast_t *a) { - mpc_ast_print_depth(a, 0, stdout); -} - -void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { - mpc_ast_print_depth(a, 0, fp); -} - -int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { - return mpc_ast_get_index_lb(ast, tag, 0); -} - -int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { - int i; - - for(i=lb; ichildren_num; i++) { - if(strcmp(ast->children[i]->tag, tag) == 0) { - return i; - } - } - - return -1; -} - -mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { - return mpc_ast_get_child_lb(ast, tag, 0); -} - -mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { - int i; - - for(i=lb; ichildren_num; i++) { - if(strcmp(ast->children[i]->tag, tag) == 0) { - return ast->children[i]; - } - } - - return NULL; -} - -mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, - mpc_ast_trav_order_t order) -{ - mpc_ast_trav_t *trav, *n_trav; - mpc_ast_t *cnode = ast; - - /* Create the traversal structure */ - trav = malloc(sizeof(mpc_ast_trav_t)); - trav->curr_node = cnode; - trav->parent = NULL; - trav->curr_child = 0; - trav->order = order; - - /* Get start node */ - switch(order) { - case mpc_ast_trav_order_pre: - /* Nothing else is needed for pre order start */ - break; - - case mpc_ast_trav_order_post: - while(cnode->children_num > 0) { - cnode = cnode->children[0]; - - n_trav = malloc(sizeof(mpc_ast_trav_t)); - n_trav->curr_node = cnode; - n_trav->parent = trav; - n_trav->curr_child = 0; - n_trav->order = order; - - trav = n_trav; - } - - break; - - default: - /* Unreachable, but compiler complaints */ - break; - } - - return trav; -} - -mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { - mpc_ast_trav_t *n_trav, *to_free; - mpc_ast_t *ret = NULL; - int cchild; - - /* The end of traversal was reached */ - if(*trav == NULL) return NULL; - - switch((*trav)->order) { - case mpc_ast_trav_order_pre: - ret = (*trav)->curr_node; - - /* If there aren't any more children, go up */ - while(*trav != NULL && - (*trav)->curr_child >= (*trav)->curr_node->children_num) - { - to_free = *trav; - *trav = (*trav)->parent; - free(to_free); - } - - /* If trav is NULL, the end was reached */ - if(*trav == NULL) { - break; - } - - /* Go to next child */ - n_trav = malloc(sizeof(mpc_ast_trav_t)); - - cchild = (*trav)->curr_child; - n_trav->curr_node = (*trav)->curr_node->children[cchild]; - n_trav->parent = *trav; - n_trav->curr_child = 0; - n_trav->order = (*trav)->order; - - (*trav)->curr_child++; - *trav = n_trav; - - break; - - case mpc_ast_trav_order_post: - ret = (*trav)->curr_node; - - /* Move up tree to the parent If the parent doesn't have any more nodes, - * then this is the current node. If it does, move down to its left most - * child. Also, free the previous traversal node */ - to_free = *trav; - *trav = (*trav)->parent; - free(to_free); - - if(*trav == NULL) - break; - - /* Next child */ - (*trav)->curr_child++; - - /* If there aren't any more children, this is the next node */ - if((*trav)->curr_child >= (*trav)->curr_node->children_num) { - break; - } - - /* If there are still more children, find the leftmost child from this - * node */ - while((*trav)->curr_node->children_num > 0) { - n_trav = malloc(sizeof(mpc_ast_trav_t)); - - cchild = (*trav)->curr_child; - n_trav->curr_node = (*trav)->curr_node->children[cchild]; - n_trav->parent = *trav; - n_trav->curr_child = 0; - n_trav->order = (*trav)->order; - - *trav = n_trav; - } - - default: - /* Unreachable, but compiler complaints */ - break; - } - - return ret; -} - -void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { - mpc_ast_trav_t *n_trav; - - /* Go through parents until all are free */ - while(*trav != NULL) { - n_trav = (*trav)->parent; - free(*trav); - *trav = n_trav; - } -} - -mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { - - int i, j; - mpc_ast_t** as = (mpc_ast_t**)xs; - mpc_ast_t *r; - - if (n == 0) { return NULL; } - if (n == 1) { return xs[0]; } - if (n == 2 && xs[1] == NULL) { return xs[0]; } - if (n == 2 && xs[0] == NULL) { return xs[1]; } - - r = mpc_ast_new(">", ""); - - for (i = 0; i < n; i++) { - - if (as[i] == NULL) { continue; } - - if (as[i] && as[i]->children_num == 0) { - mpc_ast_add_child(r, as[i]); - } else if (as[i] && as[i]->children_num == 1) { - mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); - mpc_ast_delete_no_children(as[i]); - } else if (as[i] && as[i]->children_num >= 2) { - for (j = 0; j < as[i]->children_num; j++) { - mpc_ast_add_child(r, as[i]->children[j]); - } - mpc_ast_delete_no_children(as[i]); - } - - } - - if (r->children_num) { - r->state = r->children[0]->state; - } - - return r; -} - -mpc_val_t *mpcf_str_ast(mpc_val_t *c) { - mpc_ast_t *a = mpc_ast_new("", c); - free(c); - return a; -} - -mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { - mpc_state_t *s = ((mpc_state_t**)xs)[0]; - mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; - (void)n; - a = mpc_ast_state(a, *s); - free(s); - return a; -} - -mpc_parser_t *mpca_state(mpc_parser_t *a) { - return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); -} - -mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { - return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); -} - -mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { - return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); -} - -mpc_parser_t *mpca_root(mpc_parser_t *a) { - return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); -} - -mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } -mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } -mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } -mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } -mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } - -mpc_parser_t *mpca_or(int n, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_OR; - p->data.or.n = n; - p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - - va_start(va, n); - for (i = 0; i < n; i++) { - p->data.or.xs[i] = va_arg(va, mpc_parser_t*); - } - va_end(va); - - return p; - -} - -mpc_parser_t *mpca_and(int n, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_AND; - p->data.and.n = n; - p->data.and.f = mpcf_fold_ast; - p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); - p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - - va_start(va, n); - for (i = 0; i < n; i++) { - p->data.and.xs[i] = va_arg(va, mpc_parser_t*); - } - for (i = 0; i < (n-1); i++) { - p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; - } - va_end(va); - - return p; -} - -mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } - -/* -** Grammar Parser -*/ - -/* -** This is another interesting bootstrapping. -** -** Having a general purpose AST type allows -** users to specify the grammar alone and -** let all fold rules be automatically taken -** care of by existing functions. -** -** You don't get to control the type spat -** out but this means you can make a nice -** parser to take in some grammar in nice -** syntax and spit out a parser that works. -** -** The grammar for this looks surprisingly -** like regex but the main difference is that -** it is now whitespace insensitive and the -** base type takes literals of some form. -*/ - -/* -** -** ### Grammar Grammar -** -** : ( "|" ) | -** -** : * -** -** : -** | "*" -** | "+" -** | "?" -** | "{" "}" -** -** : "<" ( | ) ">" -** | -** | -** | -** | "(" ")" -*/ - -typedef struct { - va_list *va; - int parsers_num; - mpc_parser_t **parsers; - int flags; -} mpca_grammar_st_t; - -static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { - (void) n; - if (xs[1] == NULL) { return xs[0]; } - else { return mpca_or(2, xs[0], xs[1]); } -} - -static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { - int i; - mpc_parser_t *p = mpc_pass(); - for (i = 0; i < n; i++) { - if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } - } - return p; -} - -static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { - int num; - (void) n; - if (xs[1] == NULL) { return xs[0]; } - switch(((char*)xs[1])[0]) - { - case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; - case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; - case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; - case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; - default: - num = *((int*)xs[1]); - free(xs[1]); - } - return mpca_count(num, xs[0]); -} - -static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { - mpca_grammar_st_t *st = s; - char *y = mpcf_unescape(x); - mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); - free(y); - return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); -} - -static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { - mpca_grammar_st_t *st = s; - char *y = mpcf_unescape(x); - mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); - free(y); - return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); -} - -static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { - char *y = xs[0]; - char *m = xs[1]; - mpca_grammar_st_t *st = xs[2]; - mpc_parser_t *p; - int mode = MPC_RE_DEFAULT; - - (void)n; - if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } - if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } - y = mpcf_unescape_regex(y); - p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); - free(y); - free(m); - - return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); -} - -/* Should this just use `isdigit` instead? */ -static int is_number(const char* s) { - size_t i; - for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } - return 1; -} - -static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { - - int i; - mpc_parser_t *p; - - /* Case of Number */ - if (is_number(x)) { - - i = strtol(x, NULL, 10); - - while (st->parsers_num <= i) { - st->parsers_num++; - st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); - st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); - if (st->parsers[st->parsers_num-1] == NULL) { - return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); - } - } - - return st->parsers[st->parsers_num-1]; - - /* Case of Identifier */ - } else { - - /* Search Existing Parsers */ - for (i = 0; i < st->parsers_num; i++) { - mpc_parser_t *q = st->parsers[i]; - if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } - if (q->name && strcmp(q->name, x) == 0) { return q; } - } - - /* Search New Parsers */ - while (1) { - - p = va_arg(*st->va, mpc_parser_t*); - - st->parsers_num++; - st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); - st->parsers[st->parsers_num-1] = p; - - if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } - if (p->name && strcmp(p->name, x) == 0) { return p; } - - } - - } - -} - -static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { - - mpca_grammar_st_t *st = s; - mpc_parser_t *p = mpca_grammar_find_parser(x, st); - free(x); - - if (p->name) { - return mpca_state(mpca_root(mpca_add_tag(p, p->name))); - } else { - return mpca_state(mpca_root(p)); - } -} - -mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { - - char *err_msg; - mpc_parser_t *err_out; - mpc_result_t r; - mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; - - GrammarTotal = mpc_new("grammar_total"); - Grammar = mpc_new("grammar"); - Term = mpc_new("term"); - Factor = mpc_new("factor"); - Base = mpc_new("base"); - - mpc_define(GrammarTotal, - mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) - ); - - mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, - Term, - mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), - mpc_soft_delete - )); - - mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); - - mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, - Base, - mpc_or(6, - mpc_sym("*"), - mpc_sym("+"), - mpc_sym("?"), - mpc_sym("!"), - mpc_tok_brackets(mpc_int(), free), - mpc_pass()), - mpc_soft_delete - )); - - mpc_define(Base, mpc_or(5, - mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), - mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), - mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), - mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), - mpc_tok_parens(Grammar, mpc_soft_delete) - )); - - mpc_optimise(GrammarTotal); - mpc_optimise(Grammar); - mpc_optimise(Factor); - mpc_optimise(Term); - mpc_optimise(Base); - - if(!mpc_parse("", grammar, GrammarTotal, &r)) { - err_msg = mpc_err_string(r.error); - err_out = mpc_failf("Invalid Grammar: %s", err_msg); - mpc_err_delete(r.error); - free(err_msg); - r.output = err_out; - } - - mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); - - mpc_optimise(r.output); - - return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; - -} - -mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { - mpca_grammar_st_t st; - mpc_parser_t *res; - va_list va; - va_start(va, grammar); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - res = mpca_grammar_st(grammar, &st); - free(st.parsers); - va_end(va); - return res; -} - -typedef struct { - char *ident; - char *name; - mpc_parser_t *grammar; -} mpca_stmt_t; - -static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { - mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); - stmt->ident = ((char**)xs)[0]; - stmt->name = ((char**)xs)[1]; - stmt->grammar = ((mpc_parser_t**)xs)[3]; - (void) n; - free(((char**)xs)[2]); - free(((char**)xs)[4]); - - return stmt; -} - -static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { - - int i; - mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); - - for (i = 0; i < n; i++) { - stmts[i] = xs[i]; - } - stmts[n] = NULL; - - return stmts; -} - -static void mpca_stmt_list_delete(mpc_val_t *x) { - - mpca_stmt_t **stmts = x; - - while(*stmts) { - mpca_stmt_t *stmt = *stmts; - free(stmt->ident); - free(stmt->name); - mpc_soft_delete(stmt->grammar); - free(stmt); - stmts++; - } - free(x); - -} - -static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { - - mpca_grammar_st_t *st = s; - mpca_stmt_t *stmt; - mpca_stmt_t **stmts = x; - mpc_parser_t *left; - - while(*stmts) { - stmt = *stmts; - left = mpca_grammar_find_parser(stmt->ident, st); - if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } - if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } - mpc_optimise(stmt->grammar); - mpc_define(left, stmt->grammar); - free(stmt->ident); - free(stmt->name); - free(stmt); - stmts++; - } - - free(x); - - return NULL; -} - -static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { - - mpc_result_t r; - mpc_err_t *e; - mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; - - Lang = mpc_new("lang"); - Stmt = mpc_new("stmt"); - Grammar = mpc_new("grammar"); - Term = mpc_new("term"); - Factor = mpc_new("factor"); - Base = mpc_new("base"); - - mpc_define(Lang, mpc_apply_to( - mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), - mpca_stmt_list_apply_to, st - )); - - mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, - mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), - free, free, free, mpc_soft_delete - )); - - mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, - Term, - mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), - mpc_soft_delete - )); - - mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); - - mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, - Base, - mpc_or(6, - mpc_sym("*"), - mpc_sym("+"), - mpc_sym("?"), - mpc_sym("!"), - mpc_tok_brackets(mpc_int(), free), - mpc_pass()), - mpc_soft_delete - )); - - mpc_define(Base, mpc_or(5, - mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), - mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), - mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), - mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), - mpc_tok_parens(Grammar, mpc_soft_delete) - )); - - mpc_optimise(Lang); - mpc_optimise(Stmt); - mpc_optimise(Grammar); - mpc_optimise(Term); - mpc_optimise(Factor); - mpc_optimise(Base); - - if (!mpc_parse_input(i, Lang, &r)) { - e = r.error; - } else { - e = NULL; - } - - mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); - - return e; -} - -mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - va_start(va, f); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_file("", f); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - return err; -} - -mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - va_start(va, p); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_pipe("", p); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - return err; -} - -mpc_err_t *mpca_lang(int flags, const char *language, ...) { - - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - va_start(va, language); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_string("", language); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - return err; -} - -mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { - - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - - FILE *f = fopen(filename, "rb"); - - if (f == NULL) { - err = mpc_err_file(filename, "Unable to open file!"); - return err; - } - - va_start(va, filename); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_file(filename, f); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - - fclose(f); - - return err; -} - -static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { - - int i, total; - - if (p->retained && !force) { return 0; } - - if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } - - if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } - if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } - if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } - - if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } - if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } - - if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } - if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } - - if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } - - if (p->type == MPC_TYPE_OR) { - total = 1; - for(i = 0; i < p->data.or.n; i++) { - total += mpc_nodecount_unretained(p->data.or.xs[i], 0); - } - return total; - } - - if (p->type == MPC_TYPE_AND) { - total = 1; - for(i = 0; i < p->data.and.n; i++) { - total += mpc_nodecount_unretained(p->data.and.xs[i], 0); - } - return total; - } - - return 1; - -} - -void mpc_stats(mpc_parser_t* p) { - printf("Stats\n"); - printf("=====\n"); - printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); -} - -static void mpc_optimise_unretained(mpc_parser_t *p, int force) { - - int i, n, m; - mpc_parser_t *t; - - if (p->retained && !force) { return; } - - /* Optimise Subexpressions */ - - if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } - if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } - if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } - if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } - if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } - if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } - if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } - if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } - if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } - - if (p->type == MPC_TYPE_OR) { - for(i = 0; i < p->data.or.n; i++) { - mpc_optimise_unretained(p->data.or.xs[i], 0); - } - } - - if (p->type == MPC_TYPE_AND) { - for(i = 0; i < p->data.and.n; i++) { - mpc_optimise_unretained(p->data.and.xs[i], 0); - } - } - - /* Perform optimisations */ - - while (1) { - - /* Merge rhs `or` */ - if (p->type == MPC_TYPE_OR - && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR - && !p->data.or.xs[p->data.or.n-1]->retained) { - t = p->data.or.xs[p->data.or.n-1]; - n = p->data.or.n; m = t->data.or.n; - p->data.or.n = n + m - 1; - p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); - memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); - free(t->data.or.xs); free(t->name); free(t); - continue; - } - - /* Merge lhs `or` */ - if (p->type == MPC_TYPE_OR - && p->data.or.xs[0]->type == MPC_TYPE_OR - && !p->data.or.xs[0]->retained) { - t = p->data.or.xs[0]; - n = p->data.or.n; m = t->data.or.n; - p->data.or.n = n + m - 1; - p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); - memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); - memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); - free(t->data.or.xs); free(t->name); free(t); - continue; - } - - /* Remove ast `pass` */ - if (p->type == MPC_TYPE_AND - && p->data.and.n == 2 - && p->data.and.xs[0]->type == MPC_TYPE_PASS - && !p->data.and.xs[0]->retained - && p->data.and.f == mpcf_fold_ast) { - t = p->data.and.xs[1]; - mpc_delete(p->data.and.xs[0]); - free(p->data.and.xs); free(p->data.and.dxs); free(p->name); - memcpy(p, t, sizeof(mpc_parser_t)); - free(t); - continue; - } - - /* Merge ast lhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_fold_ast - && p->data.and.xs[0]->type == MPC_TYPE_AND - && !p->data.and.xs[0]->retained - && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { - t = p->data.and.xs[0]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); - memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - /* Merge ast rhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_fold_ast - && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND - && !p->data.and.xs[p->data.and.n-1]->retained - && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { - t = p->data.and.xs[p->data.and.n-1]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - /* Remove re `lift` */ - if (p->type == MPC_TYPE_AND - && p->data.and.n == 2 - && p->data.and.xs[0]->type == MPC_TYPE_LIFT - && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str - && !p->data.and.xs[0]->retained - && p->data.and.f == mpcf_strfold) { - t = p->data.and.xs[1]; - mpc_delete(p->data.and.xs[0]); - free(p->data.and.xs); free(p->data.and.dxs); free(p->name); - memcpy(p, t, sizeof(mpc_parser_t)); - free(t); - continue; - } - - /* Merge re lhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_strfold - && p->data.and.xs[0]->type == MPC_TYPE_AND - && !p->data.and.xs[0]->retained - && p->data.and.xs[0]->data.and.f == mpcf_strfold) { - t = p->data.and.xs[0]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); - memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - /* Merge re rhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_strfold - && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND - && !p->data.and.xs[p->data.and.n-1]->retained - && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { - t = p->data.and.xs[p->data.and.n-1]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - return; - - } - -} - -void mpc_optimise(mpc_parser_t *p) { - mpc_optimise_unretained(p, 1); -} - +#include "mpc.h" + +/* +** State Type +*/ + +static mpc_state_t mpc_state_invalid(void) { + mpc_state_t s; + s.pos = -1; + s.row = -1; + s.col = -1; + s.term = 0; + return s; +} + +static mpc_state_t mpc_state_new(void) { + mpc_state_t s; + s.pos = 0; + s.row = 0; + s.col = 0; + s.term = 0; + return s; +} + +/* +** Input Type +*/ + +/* +** In mpc the input type has three modes of +** operation: String, File and Pipe. +** +** String is easy. The whole contents are +** loaded into a buffer and scanned through. +** The cursor can jump around at will making +** backtracking easy. +** +** The second is a File which is also somewhat +** easy. The contents are never loaded into +** memory but backtracking can still be achieved +** by seeking in the file at different positions. +** +** The final mode is Pipe. This is the difficult +** one. As we assume pipes cannot be seeked - and +** only support a single character lookahead at +** any point, when the input is marked for a +** potential backtracking we start buffering any +** input. +** +** This means that if we are requested to seek +** back we can simply start reading from the +** buffer instead of the input. +** +** Of course using `mpc_predictive` will disable +** backtracking and make LL(1) grammars easy +** to parse for all input methods. +** +*/ + +enum { + MPC_INPUT_STRING = 0, + MPC_INPUT_FILE = 1, + MPC_INPUT_PIPE = 2 +}; + +enum { + MPC_INPUT_MARKS_MIN = 32 +}; + +enum { + MPC_INPUT_MEM_NUM = 512 +}; + +typedef struct { + char mem[64]; +} mpc_mem_t; + +typedef struct { + + int type; + char *filename; + mpc_state_t state; + + char *string; + char *buffer; + FILE *file; + + int suppress; + int backtrack; + int marks_slots; + int marks_num; + mpc_state_t *marks; + + char *lasts; + char last; + + size_t mem_index; + char mem_full[MPC_INPUT_MEM_NUM]; + mpc_mem_t mem[MPC_INPUT_MEM_NUM]; + +} mpc_input_t; + +static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_STRING; + + i->state = mpc_state_new(); + + i->string = malloc(strlen(string) + 1); + strcpy(i->string, string); + i->buffer = NULL; + i->file = NULL; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; +} + +static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_STRING; + + i->state = mpc_state_new(); + + i->string = malloc(length + 1); + strncpy(i->string, string, length); + i->string[length] = '\0'; + i->buffer = NULL; + i->file = NULL; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; + +} + +static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + + i->type = MPC_INPUT_PIPE; + i->state = mpc_state_new(); + + i->string = NULL; + i->buffer = NULL; + i->file = pipe; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; + +} + +static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_FILE; + i->state = mpc_state_new(); + + i->string = NULL; + i->buffer = NULL; + i->file = file; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; +} + +static void mpc_input_delete(mpc_input_t *i) { + + free(i->filename); + + if (i->type == MPC_INPUT_STRING) { free(i->string); } + if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } + + free(i->marks); + free(i->lasts); + free(i); +} + +static int mpc_mem_ptr(mpc_input_t *i, void *p) { + return + (char*)p >= (char*)(i->mem) && + (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); +} + +static void *mpc_malloc(mpc_input_t *i, size_t n) { + size_t j; + char *p; + + if (n > sizeof(mpc_mem_t)) { return malloc(n); } + + j = i->mem_index; + do { + if (!i->mem_full[i->mem_index]) { + p = (void*)(i->mem + i->mem_index); + i->mem_full[i->mem_index] = 1; + i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; + return p; + } + i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; + } while (j != i->mem_index); + + return malloc(n); +} + +static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { + char *x = mpc_malloc(i, n * m); + memset(x, 0, n * m); + return x; +} + +static void mpc_free(mpc_input_t *i, void *p) { + size_t j; + if (!mpc_mem_ptr(i, p)) { free(p); return; } + j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); + i->mem_full[j] = 0; +} + +static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { + + char *q = NULL; + + if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } + + if (n > sizeof(mpc_mem_t)) { + q = malloc(n); + memcpy(q, p, sizeof(mpc_mem_t)); + mpc_free(i, p); + return q; + } + + return p; +} + +static void *mpc_export(mpc_input_t *i, void *p) { + char *q = NULL; + if (!mpc_mem_ptr(i, p)) { return p; } + q = malloc(sizeof(mpc_mem_t)); + memcpy(q, p, sizeof(mpc_mem_t)); + mpc_free(i, p); + return q; +} + +static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } +static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } + +static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } +static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } + +static void mpc_input_mark(mpc_input_t *i) { + + if (i->backtrack < 1) { return; } + + i->marks_num++; + + if (i->marks_num > i->marks_slots) { + i->marks_slots = i->marks_num + i->marks_num / 2; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); + i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); + } + + i->marks[i->marks_num-1] = i->state; + i->lasts[i->marks_num-1] = i->last; + + if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { + i->buffer = calloc(1, 1); + } + +} + +static void mpc_input_unmark(mpc_input_t *i) { + int j; + + if (i->backtrack < 1) { return; } + + i->marks_num--; + + if (i->marks_slots > i->marks_num + i->marks_num / 2 + && i->marks_slots > MPC_INPUT_MARKS_MIN) { + i->marks_slots = + i->marks_num > MPC_INPUT_MARKS_MIN ? + i->marks_num : MPC_INPUT_MARKS_MIN; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); + i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); + } + + if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { + for (j = strlen(i->buffer) - 1; j >= 0; j--) + ungetc(i->buffer[j], i->file); + + free(i->buffer); + i->buffer = NULL; + } + +} + +static void mpc_input_rewind(mpc_input_t *i) { + + if (i->backtrack < 1) { return; } + + i->state = i->marks[i->marks_num-1]; + i->last = i->lasts[i->marks_num-1]; + + if (i->type == MPC_INPUT_FILE) { + fseek(i->file, i->state.pos, SEEK_SET); + } + + mpc_input_unmark(i); +} + +static int mpc_input_buffer_in_range(mpc_input_t *i) { + return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); +} + +static char mpc_input_buffer_get(mpc_input_t *i) { + return i->buffer[i->state.pos - i->marks[0].pos]; +} + +static char mpc_input_getc(mpc_input_t *i) { + + char c = '\0'; + + switch (i->type) { + + case MPC_INPUT_STRING: return i->string[i->state.pos]; + case MPC_INPUT_FILE: c = fgetc(i->file); return c; + case MPC_INPUT_PIPE: + + if (!i->buffer) { c = getc(i->file); return c; } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + c = mpc_input_buffer_get(i); + return c; + } else { + c = getc(i->file); + return c; + } + + default: return c; + } +} + +static char mpc_input_peekc(mpc_input_t *i) { + + char c = '\0'; + + switch (i->type) { + case MPC_INPUT_STRING: return i->string[i->state.pos]; + case MPC_INPUT_FILE: + + c = fgetc(i->file); + if (feof(i->file)) { return '\0'; } + + fseek(i->file, -1, SEEK_CUR); + return c; + + case MPC_INPUT_PIPE: + + if (!i->buffer) { + c = getc(i->file); + if (feof(i->file)) { return '\0'; } + ungetc(c, i->file); + return c; + } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + return mpc_input_buffer_get(i); + } else { + c = getc(i->file); + if (feof(i->file)) { return '\0'; } + ungetc(c, i->file); + return c; + } + + default: return c; + } + +} + +static int mpc_input_terminated(mpc_input_t *i) { + return mpc_input_peekc(i) == '\0'; +} + +static int mpc_input_failure(mpc_input_t *i, char c) { + + switch (i->type) { + case MPC_INPUT_STRING: { break; } + case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } + case MPC_INPUT_PIPE: { + + if (!i->buffer) { ungetc(c, i->file); break; } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + break; + } else { + ungetc(c, i->file); + } + } + default: { break; } + } + return 0; +} + +static int mpc_input_success(mpc_input_t *i, char c, char **o) { + + if (i->type == MPC_INPUT_PIPE + && i->buffer && !mpc_input_buffer_in_range(i)) { + i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); + i->buffer[strlen(i->buffer) + 1] = '\0'; + i->buffer[strlen(i->buffer) + 0] = c; + } + + i->last = c; + i->state.pos++; + i->state.col++; + + if (c == '\n') { + i->state.col = 0; + i->state.row++; + } + + if (o) { + (*o) = mpc_malloc(i, 2); + (*o)[0] = c; + (*o)[1] = '\0'; + } + + return 1; +} + +static int mpc_input_any(mpc_input_t *i, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return mpc_input_success(i, x, o); +} + +static int mpc_input_char(mpc_input_t *i, char c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { + + const char *x = c; + + mpc_input_mark(i); + while (*x) { + if (!mpc_input_char(i, *x, NULL)) { + mpc_input_rewind(i); + return 0; + } + x++; + } + mpc_input_unmark(i); + + *o = mpc_malloc(i, strlen(c) + 1); + strcpy(*o, c); + return 1; +} + +static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { + *o = NULL; + return f(i->last, mpc_input_peekc(i)); +} + +static int mpc_input_soi(mpc_input_t* i, char **o) { + *o = NULL; + return i->last == '\0'; +} + +static int mpc_input_eoi(mpc_input_t* i, char **o) { + *o = NULL; + if (i->state.term) { + return 0; + } else if (mpc_input_terminated(i)) { + i->state.term = 1; + return 1; + } else { + return 0; + } +} + +static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { + mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); + memcpy(r, &i->state, sizeof(mpc_state_t)); + return r; +} + +/* +** Error Type +*/ + +void mpc_err_delete(mpc_err_t *x) { + int i; + for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } + free(x->expected); + free(x->filename); + free(x->failure); + free(x); +} + +void mpc_err_print(mpc_err_t *x) { + mpc_err_print_to(x, stdout); +} + +void mpc_err_print_to(mpc_err_t *x, FILE *f) { + char *str = mpc_err_string(x); + fprintf(f, "%s", str); + free(str); +} + +static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { + /* TODO: Error Checking on Length */ + int left = ((*max) - (*pos)); + va_list va; + va_start(va, fmt); + if (left < 0) { left = 0;} + (*pos) += vsprintf(buffer + (*pos), fmt, va); + va_end(va); +} + +static char char_unescape_buffer[4]; + +static const char *mpc_err_char_unescape(char c) { + + char_unescape_buffer[0] = '\''; + char_unescape_buffer[1] = ' '; + char_unescape_buffer[2] = '\''; + char_unescape_buffer[3] = '\0'; + + switch (c) { + case '\a': return "bell"; + case '\b': return "backspace"; + case '\f': return "formfeed"; + case '\r': return "carriage return"; + case '\v': return "vertical tab"; + case '\0': return "end of input"; + case '\n': return "newline"; + case '\t': return "tab"; + case ' ' : return "space"; + default: + char_unescape_buffer[1] = c; + return char_unescape_buffer; + } + +} + +char *mpc_err_string(mpc_err_t *x) { + + int i; + int pos = 0; + int max = 1023; + char *buffer = calloc(1, 1024); + + if (x->failure) { + mpc_err_string_cat(buffer, &pos, &max, + "%s: error: %s\n", x->filename, x->failure); + return buffer; + } + + mpc_err_string_cat(buffer, &pos, &max, + "%s:%li:%li: error: expected ", x->filename, x->state.row+1, x->state.col+1); + + if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } + if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } + if (x->expected_num >= 2) { + + for (i = 0; i < x->expected_num-2; i++) { + mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); + } + + mpc_err_string_cat(buffer, &pos, &max, "%s or %s", + x->expected[x->expected_num-2], + x->expected[x->expected_num-1]); + } + + mpc_err_string_cat(buffer, &pos, &max, " at "); + mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received)); + mpc_err_string_cat(buffer, &pos, &max, "\n"); + + return realloc(buffer, strlen(buffer) + 1); +} + +static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { + mpc_err_t *x; + if (i->suppress) { return NULL; } + x = mpc_malloc(i, sizeof(mpc_err_t)); + x->filename = mpc_malloc(i, strlen(i->filename) + 1); + strcpy(x->filename, i->filename); + x->state = i->state; + x->expected_num = 1; + x->expected = mpc_malloc(i, sizeof(char*)); + x->expected[0] = mpc_malloc(i, strlen(expected) + 1); + strcpy(x->expected[0], expected); + x->failure = NULL; + x->received = mpc_input_peekc(i); + return x; +} + +static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { + mpc_err_t *x; + if (i->suppress) { return NULL; } + x = mpc_malloc(i, sizeof(mpc_err_t)); + x->filename = mpc_malloc(i, strlen(i->filename) + 1); + strcpy(x->filename, i->filename); + x->state = i->state; + x->expected_num = 0; + x->expected = NULL; + x->failure = mpc_malloc(i, strlen(failure) + 1); + strcpy(x->failure, failure); + x->received = ' '; + return x; +} + +static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { + mpc_err_t *x; + x = malloc(sizeof(mpc_err_t)); + x->filename = malloc(strlen(filename) + 1); + strcpy(x->filename, filename); + x->state = mpc_state_new(); + x->expected_num = 0; + x->expected = NULL; + x->failure = malloc(strlen(failure) + 1); + strcpy(x->failure, failure); + x->received = ' '; + return x; +} + +static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { + int j; + if (x == NULL) { return; } + for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } + mpc_free(i, x->expected); + mpc_free(i, x->filename); + mpc_free(i, x->failure); + mpc_free(i, x); +} + +static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { + int j; + for (j = 0; j < x->expected_num; j++) { + x->expected[j] = mpc_export(i, x->expected[j]); + } + x->expected = mpc_export(i, x->expected); + x->filename = mpc_export(i, x->filename); + x->failure = mpc_export(i, x->failure); + return mpc_export(i, x); +} + +static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { + int j; + (void)i; + for (j = 0; j < x->expected_num; j++) { + if (strcmp(x->expected[j], expected) == 0) { return 1; } + } + return 0; +} + +static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { + (void)i; + x->expected_num++; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); + strcpy(x->expected[x->expected_num-1], expected); +} + +static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { + + int j, k, fst; + mpc_err_t *e; + + fst = -1; + for (j = 0; j < n; j++) { + if (x[j] != NULL) { fst = j; } + } + + if (fst == -1) { return NULL; } + + e = mpc_malloc(i, sizeof(mpc_err_t)); + e->state = mpc_state_invalid(); + e->expected_num = 0; + e->expected = NULL; + e->failure = NULL; + e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); + strcpy(e->filename, x[fst]->filename); + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } + } + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + if (x[j]->state.pos < e->state.pos) { continue; } + + if (x[j]->failure) { + e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); + strcpy(e->failure, x[j]->failure); + break; + } + + e->received = x[j]->received; + + for (k = 0; k < x[j]->expected_num; k++) { + if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { + mpc_err_add_expected(i, e, x[j]->expected[k]); + } + } + } + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + mpc_err_delete_internal(i, x[j]); + } + + return e; +} + +static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { + + int j = 0; + size_t l = 0; + char *expect = NULL; + + if (x == NULL) { return NULL; } + + if (x->expected_num == 0) { + expect = mpc_calloc(i, 1, 1); + x->expected_num = 1; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = expect; + return x; + } + + else if (x->expected_num == 1) { + expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); + strcpy(expect, prefix); + strcat(expect, x->expected[0]); + mpc_free(i, x->expected[0]); + x->expected[0] = expect; + return x; + } + + else if (x->expected_num > 1) { + + l += strlen(prefix); + for (j = 0; j < x->expected_num-2; j++) { + l += strlen(x->expected[j]) + strlen(", "); + } + l += strlen(x->expected[x->expected_num-2]); + l += strlen(" or "); + l += strlen(x->expected[x->expected_num-1]); + + expect = mpc_malloc(i, l + 1); + + strcpy(expect, prefix); + for (j = 0; j < x->expected_num-2; j++) { + strcat(expect, x->expected[j]); strcat(expect, ", "); + } + strcat(expect, x->expected[x->expected_num-2]); + strcat(expect, " or "); + strcat(expect, x->expected[x->expected_num-1]); + + for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } + + x->expected_num = 1; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = expect; + return x; + } + + return NULL; +} + +static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { + return mpc_err_repeat(i, x, "one or more of "); +} + +static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { + mpc_err_t *y; + int digits = n/10 + 1; + char *prefix; + prefix = mpc_malloc(i, digits + strlen(" of ") + 1); + sprintf(prefix, "%i of ", n); + y = mpc_err_repeat(i, x, prefix); + mpc_free(i, prefix); + return y; +} + +static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { + mpc_err_t *errs[2]; + errs[0] = x; + errs[1] = y; + return mpc_err_or(i, errs, 2); +} + +/* +** Parser Type +*/ + +enum { + MPC_TYPE_UNDEFINED = 0, + MPC_TYPE_PASS = 1, + MPC_TYPE_FAIL = 2, + MPC_TYPE_LIFT = 3, + MPC_TYPE_LIFT_VAL = 4, + MPC_TYPE_EXPECT = 5, + MPC_TYPE_ANCHOR = 6, + MPC_TYPE_STATE = 7, + + MPC_TYPE_ANY = 8, + MPC_TYPE_SINGLE = 9, + MPC_TYPE_ONEOF = 10, + MPC_TYPE_NONEOF = 11, + MPC_TYPE_RANGE = 12, + MPC_TYPE_SATISFY = 13, + MPC_TYPE_STRING = 14, + + MPC_TYPE_APPLY = 15, + MPC_TYPE_APPLY_TO = 16, + MPC_TYPE_PREDICT = 17, + MPC_TYPE_NOT = 18, + MPC_TYPE_MAYBE = 19, + MPC_TYPE_MANY = 20, + MPC_TYPE_MANY1 = 21, + MPC_TYPE_COUNT = 22, + + MPC_TYPE_OR = 23, + MPC_TYPE_AND = 24, + + MPC_TYPE_CHECK = 25, + MPC_TYPE_CHECK_WITH = 26, + + MPC_TYPE_SOI = 27, + MPC_TYPE_EOI = 28 +}; + +typedef struct { char *m; } mpc_pdata_fail_t; +typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; +typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; +typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; +typedef struct { char x; } mpc_pdata_single_t; +typedef struct { char x; char y; } mpc_pdata_range_t; +typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; +typedef struct { char *x; } mpc_pdata_string_t; +typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; +typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; +typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; +typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; + +typedef union { + mpc_pdata_fail_t fail; + mpc_pdata_lift_t lift; + mpc_pdata_expect_t expect; + mpc_pdata_anchor_t anchor; + mpc_pdata_single_t single; + mpc_pdata_range_t range; + mpc_pdata_satisfy_t satisfy; + mpc_pdata_string_t string; + mpc_pdata_apply_t apply; + mpc_pdata_apply_to_t apply_to; + mpc_pdata_check_t check; + mpc_pdata_check_with_t check_with; + mpc_pdata_predict_t predict; + mpc_pdata_not_t not; + mpc_pdata_repeat_t repeat; + mpc_pdata_and_t and; + mpc_pdata_or_t or; +} mpc_pdata_t; + +struct mpc_parser_t { + char *name; + mpc_pdata_t data; + char type; + char retained; +}; + +static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { + int j; + for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } + return xs[x]; +} + +static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } +static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } +static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } + +static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { + int j; + size_t l = 0; + if (n == 0) { return mpc_calloc(i, 1, 1); } + for (j = 0; j < n; j++) { l += strlen(xs[j]); } + xs[0] = mpc_realloc(i, xs[0], l + 1); + for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } + return xs[0]; +} + +static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { + mpc_state_t *s = ((mpc_state_t**)xs)[0]; + mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; + a = mpc_ast_state(a, *s); + mpc_free(i, s); + (void) n; + return a; +} + +static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { + int j; + if (f == mpcf_null) { return mpcf_null(n, xs); } + if (f == mpcf_fst) { return mpcf_fst(n, xs); } + if (f == mpcf_snd) { return mpcf_snd(n, xs); } + if (f == mpcf_trd) { return mpcf_trd(n, xs); } + if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } + if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } + if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } + if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } + if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } + for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } + return f(j, xs); +} + +static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { + mpc_free(i, x); + return NULL; +} + +static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { + mpc_ast_t *a = mpc_ast_new("", c); + mpc_free(i, c); + return a; +} + +static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { + if (f == mpcf_free) { return mpcf_input_free(i, x); } + if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } + return f(mpc_export(i, x)); +} + +static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { + return f(mpc_export(i, x), d); +} + +static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { + if (d == free) { mpc_free(i, x); return; } + d(mpc_export(i, x)); +} + +enum { + MPC_PARSE_STACK_MIN = 4 +}; + +#define MPC_SUCCESS(x) r->output = x; return 1 +#define MPC_FAILURE(x) r->error = x; return 0 +#define MPC_PRIMITIVE(x) \ + if (x) { MPC_SUCCESS(r->output); } \ + else { MPC_FAILURE(NULL); } + +#define MPC_MAX_RECURSION_DEPTH 1000 + +static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { + + int j = 0, k = 0; + mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; + mpc_result_t *results; + int results_slots = MPC_PARSE_STACK_MIN; + + if (depth == MPC_MAX_RECURSION_DEPTH) + { + MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); + } + + switch (p->type) { + + /* Basic Parsers */ + + case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); + case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); + case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); + case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); + case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); + case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); + case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); + + /* Other parsers */ + + case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); + case MPC_TYPE_PASS: MPC_SUCCESS(NULL); + case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); + case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); + case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); + case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); + + /* Application Parsers */ + + case MPC_TYPE_APPLY: + if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { + MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); + } else { + MPC_FAILURE(r->output); + } + + case MPC_TYPE_APPLY_TO: + if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { + MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_CHECK: + if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { + if (p->data.check.f(&r->output)) { + MPC_SUCCESS(r->output); + } else { + mpc_parse_dtor(i, p->data.check.dx, r->output); + MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); + } + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_CHECK_WITH: + if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { + if (p->data.check_with.f(&r->output, p->data.check_with.d)) { + MPC_SUCCESS(r->output); + } else { + mpc_parse_dtor(i, p->data.check.dx, r->output); + MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); + } + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_EXPECT: + mpc_input_suppress_enable(i); + if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { + mpc_input_suppress_disable(i); + MPC_SUCCESS(r->output); + } else { + mpc_input_suppress_disable(i); + MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); + } + + case MPC_TYPE_PREDICT: + mpc_input_backtrack_disable(i); + if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { + mpc_input_backtrack_enable(i); + MPC_SUCCESS(r->output); + } else { + mpc_input_backtrack_enable(i); + MPC_FAILURE(r->error); + } + + /* Optional Parsers */ + + /* TODO: Update Not Error Message */ + + case MPC_TYPE_NOT: + mpc_input_mark(i); + mpc_input_suppress_enable(i); + if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { + mpc_input_rewind(i); + mpc_input_suppress_disable(i); + mpc_parse_dtor(i, p->data.not.dx, r->output); + MPC_FAILURE(mpc_err_new(i, "opposite")); + } else { + mpc_input_unmark(i); + mpc_input_suppress_disable(i); + MPC_SUCCESS(p->data.not.lf()); + } + + case MPC_TYPE_MAYBE: + if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { + MPC_SUCCESS(r->output); + } else { + *e = mpc_err_merge(i, *e, r->error); + MPC_SUCCESS(p->data.not.lf()); + } + + /* Repeat Parsers */ + + case MPC_TYPE_MANY: + + results = results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + if (j == MPC_PARSE_STACK_MIN) { + results_slots = j + j / 2; + results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); + memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); + } else if (j >= results_slots) { + results_slots = j + j / 2; + results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); + } + } + + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + case MPC_TYPE_MANY1: + + results = results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + if (j == MPC_PARSE_STACK_MIN) { + results_slots = j + j / 2; + results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); + memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); + } else if (j >= results_slots) { + results_slots = j + j / 2; + results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); + } + } + + if (j == 0) { + MPC_FAILURE( + mpc_err_many1(i, results[j].error); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + case MPC_TYPE_COUNT: + + results = p->data.repeat.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) + : results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + if (j == p->data.repeat.n) { break; } + } + + if (j == p->data.repeat.n) { + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + for (k = 0; k < j; k++) { + mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); + } + MPC_FAILURE( + mpc_err_count(i, results[j].error, p->data.repeat.n); + if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + /* Combinatory Parsers */ + + case MPC_TYPE_OR: + + if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } + + results = p->data.or.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) + : results_stk; + + for (j = 0; j < p->data.or.n; j++) { + if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { + MPC_SUCCESS(results[j].output; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + *e = mpc_err_merge(i, *e, results[j].error); + } + } + + MPC_FAILURE(NULL; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + case MPC_TYPE_AND: + + if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } + + results = p->data.or.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) + : results_stk; + + mpc_input_mark(i); + for (j = 0; j < p->data.and.n; j++) { + if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { + mpc_input_rewind(i); + for (k = 0; k < j; k++) { + mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); + } + MPC_FAILURE(results[j].error; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + } + mpc_input_unmark(i); + MPC_SUCCESS( + mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + /* End */ + + default: + + MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); + } + + return 0; + +} + +#undef MPC_SUCCESS +#undef MPC_FAILURE +#undef MPC_PRIMITIVE + +int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); + e->state = mpc_state_invalid(); + x = mpc_parse_run(i, p, r, &e, 0); + if (x) { + mpc_err_delete_internal(i, e); + r->output = mpc_export(i, r->output); + } else { + r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); + } + return x; +} + +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_string(filename, string); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_nstring(filename, string, length); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_file(filename, file); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_pipe(filename, pipe); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { + + FILE *f = fopen(filename, "rb"); + int res; + + if (f == NULL) { + r->output = NULL; + r->error = mpc_err_file(filename, "Unable to open file!"); + return 0; + } + + res = mpc_parse_file(filename, f, p, r); + fclose(f); + return res; +} + +/* +** Building a Parser +*/ + +static void mpc_undefine_unretained(mpc_parser_t *p, int force); + +static void mpc_undefine_or(mpc_parser_t *p) { + + int i; + for (i = 0; i < p->data.or.n; i++) { + mpc_undefine_unretained(p->data.or.xs[i], 0); + } + free(p->data.or.xs); + +} + +static void mpc_undefine_and(mpc_parser_t *p) { + + int i; + for (i = 0; i < p->data.and.n; i++) { + mpc_undefine_unretained(p->data.and.xs[i], 0); + } + free(p->data.and.xs); + free(p->data.and.dxs); + +} + +static void mpc_undefine_unretained(mpc_parser_t *p, int force) { + + if (p->retained && !force) { return; } + + switch (p->type) { + + case MPC_TYPE_FAIL: free(p->data.fail.m); break; + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + free(p->data.string.x); + break; + + case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; + case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; + case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_NOT: + mpc_undefine_unretained(p->data.not.x, 0); + break; + + case MPC_TYPE_EXPECT: + mpc_undefine_unretained(p->data.expect.x, 0); + free(p->data.expect.m); + break; + + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + mpc_undefine_unretained(p->data.repeat.x, 0); + break; + + case MPC_TYPE_OR: mpc_undefine_or(p); break; + case MPC_TYPE_AND: mpc_undefine_and(p); break; + + case MPC_TYPE_CHECK: + mpc_undefine_unretained(p->data.check.x, 0); + free(p->data.check.e); + break; + + case MPC_TYPE_CHECK_WITH: + mpc_undefine_unretained(p->data.check_with.x, 0); + free(p->data.check_with.e); + break; + + default: break; + } + + if (!force) { + free(p->name); + free(p); + } + +} + +void mpc_delete(mpc_parser_t *p) { + if (p->retained) { + + if (p->type != MPC_TYPE_UNDEFINED) { + mpc_undefine_unretained(p, 0); + } + + free(p->name); + free(p); + + } else { + mpc_undefine_unretained(p, 0); + } +} + +static void mpc_soft_delete(mpc_val_t *x) { + mpc_undefine_unretained(x, 0); +} + +static mpc_parser_t *mpc_undefined(void) { + mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); + p->retained = 0; + p->type = MPC_TYPE_UNDEFINED; + p->name = NULL; + return p; +} + +mpc_parser_t *mpc_new(const char *name) { + mpc_parser_t *p = mpc_undefined(); + p->retained = 1; + p->name = realloc(p->name, strlen(name) + 1); + strcpy(p->name, name); + return p; +} + +mpc_parser_t *mpc_copy(mpc_parser_t *a) { + int i = 0; + mpc_parser_t *p; + + if (a->retained) { return a; } + + p = mpc_undefined(); + p->retained = a->retained; + p->type = a->type; + p->data = a->data; + + if (a->name) { + p->name = malloc(strlen(a->name)+1); + strcpy(p->name, a->name); + } + + switch (a->type) { + + case MPC_TYPE_FAIL: + p->data.fail.m = malloc(strlen(a->data.fail.m)+1); + strcpy(p->data.fail.m, a->data.fail.m); + break; + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + p->data.string.x = malloc(strlen(a->data.string.x)+1); + strcpy(p->data.string.x, a->data.string.x); + break; + + case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; + case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; + case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_NOT: + p->data.not.x = mpc_copy(a->data.not.x); + break; + + case MPC_TYPE_EXPECT: + p->data.expect.x = mpc_copy(a->data.expect.x); + p->data.expect.m = malloc(strlen(a->data.expect.m)+1); + strcpy(p->data.expect.m, a->data.expect.m); + break; + + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + p->data.repeat.x = mpc_copy(a->data.repeat.x); + break; + + case MPC_TYPE_OR: + p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); + for (i = 0; i < a->data.or.n; i++) { + p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); + } + break; + case MPC_TYPE_AND: + p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); + for (i = 0; i < a->data.and.n; i++) { + p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); + } + p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); + for (i = 0; i < a->data.and.n-1; i++) { + p->data.and.dxs[i] = a->data.and.dxs[i]; + } + break; + + case MPC_TYPE_CHECK: + p->data.check.x = mpc_copy(a->data.check.x); + p->data.check.e = malloc(strlen(a->data.check.e)+1); + strcpy(p->data.check.e, a->data.check.e); + break; + case MPC_TYPE_CHECK_WITH: + p->data.check_with.x = mpc_copy(a->data.check_with.x); + p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); + strcpy(p->data.check_with.e, a->data.check_with.e); + break; + + default: break; + } + + + return p; +} + +mpc_parser_t *mpc_undefine(mpc_parser_t *p) { + mpc_undefine_unretained(p, 1); + p->type = MPC_TYPE_UNDEFINED; + return p; +} + +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { + + if (p->retained) { + p->type = a->type; + p->data = a->data; + } else { + mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); + p->type = a2->type; + p->data = a2->data; + free(a2); + } + + free(a); + return p; +} + +void mpc_cleanup(int n, ...) { + int i; + mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); + + va_list va; + va_start(va, n); + for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } + for (i = 0; i < n; i++) { mpc_undefine(list[i]); } + for (i = 0; i < n; i++) { mpc_delete(list[i]); } + va_end(va); + + free(list); +} + +mpc_parser_t *mpc_pass(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_PASS; + return p; +} + +mpc_parser_t *mpc_fail(const char *m) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_FAIL; + p->data.fail.m = malloc(strlen(m) + 1); + strcpy(p->data.fail.m, m); + return p; +} + +/* +** As `snprintf` is not ANSI standard this +** function `mpc_failf` should be considered +** unsafe. +** +** You have a few options if this is going to be +** trouble. +** +** - Ensure the format string does not exceed +** the buffer length using precision specifiers +** such as `%.512s`. +** +** - Patch this function in your code base to +** use `snprintf` or whatever variant your +** system supports. +** +** - Avoid it altogether. +** +*/ + +mpc_parser_t *mpc_failf(const char *fmt, ...) { + + va_list va; + char *buffer; + + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_FAIL; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + buffer = realloc(buffer, strlen(buffer) + 1); + p->data.fail.m = buffer; + return p; + +} + +mpc_parser_t *mpc_lift_val(mpc_val_t *x) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_LIFT_VAL; + p->data.lift.x = x; + return p; +} + +mpc_parser_t *mpc_lift(mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_LIFT; + p->data.lift.lf = lf; + return p; +} + +mpc_parser_t *mpc_anchor(int(*f)(char,char)) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ANCHOR; + p->data.anchor.f = f; + return mpc_expect(p, "anchor"); +} + +mpc_parser_t *mpc_state(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_STATE; + return p; +} + +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EXPECT; + p->data.expect.x = a; + p->data.expect.m = malloc(strlen(expected) + 1); + strcpy(p->data.expect.m, expected); + return p; +} + +/* +** As `snprintf` is not ANSI standard this +** function `mpc_expectf` should be considered +** unsafe. +** +** You have a few options if this is going to be +** trouble. +** +** - Ensure the format string does not exceed +** the buffer length using precision specifiers +** such as `%.512s`. +** +** - Patch this function in your code base to +** use `snprintf` or whatever variant your +** system supports. +** +** - Avoid it altogether. +** +*/ + +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { + va_list va; + char *buffer; + + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EXPECT; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + buffer = realloc(buffer, strlen(buffer) + 1); + p->data.expect.x = a; + p->data.expect.m = buffer; + return p; +} + +/* +** Basic Parsers +*/ + +mpc_parser_t *mpc_any(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ANY; + return mpc_expect(p, "any character"); +} + +mpc_parser_t *mpc_char(char c) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SINGLE; + p->data.single.x = c; + return mpc_expectf(p, "'%c'", c); +} + +mpc_parser_t *mpc_range(char s, char e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_RANGE; + p->data.range.x = s; + p->data.range.y = e; + return mpc_expectf(p, "character between '%c' and '%c'", s, e); +} + +mpc_parser_t *mpc_oneof(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "one of '%s'", s); +} + +mpc_parser_t *mpc_noneof(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_NONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "none of '%s'", s); + +} + +mpc_parser_t *mpc_satisfy(int(*f)(char)) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SATISFY; + p->data.satisfy.f = f; + return mpc_expectf(p, "character satisfying function %p", f); +} + +mpc_parser_t *mpc_string(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_STRING; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "\"%s\"", s); +} + +/* +** Core Parsers +*/ + +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_APPLY; + p->data.apply.x = a; + p->data.apply.f = f; + return p; +} + +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_APPLY_TO; + p->data.apply_to.x = a; + p->data.apply_to.f = f; + p->data.apply_to.d = x; + return p; +} + +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_CHECK; + p->data.check.x = a; + p->data.check.dx = da; + p->data.check.f = f; + p->data.check.e = malloc(strlen(e) + 1); + strcpy(p->data.check.e, e); + return p; +} + +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_CHECK_WITH; + p->data.check_with.x = a; + p->data.check_with.dx = da; + p->data.check_with.f = f; + p->data.check_with.d = x; + p->data.check_with.e = malloc(strlen(e) + 1); + strcpy(p->data.check_with.e, e); + return p; +} + +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { + va_list va; + char *buffer; + mpc_parser_t *p; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + p = mpc_check(a, da, f, buffer); + free(buffer); + + return p; +} + +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { + va_list va; + char *buffer; + mpc_parser_t *p; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + p = mpc_check_with(a, da, f, x, buffer); + free(buffer); + + return p; +} + +mpc_parser_t *mpc_predictive(mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_PREDICT; + p->data.predict.x = a; + return p; +} + +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_NOT; + p->data.not.x = a; + p->data.not.dx = da; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { + return mpc_not_lift(a, da, mpcf_ctor_null); +} + +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MAYBE; + p->data.not.x = a; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t *mpc_maybe(mpc_parser_t *a) { + return mpc_maybe_lift(a, mpcf_ctor_null); +} + +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MANY; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MANY1; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_COUNT; + p->data.repeat.n = n; + p->data.repeat.f = f; + p->data.repeat.x = a; + p->data.repeat.dx = da; + return p; +} + +mpc_parser_t *mpc_or(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; +} + +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = f; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_start(va, f); + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); + } + va_end(va); + + return p; +} + +/* +** Common Parsers +*/ + +mpc_parser_t *mpc_soi(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SOI; + return mpc_expect(p, "start of input"); +} + +mpc_parser_t *mpc_eoi(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EOI; + return mpc_expect(p, "end of input"); +} + +static int mpc_boundary_anchor(char prev, char next) { + const char* word = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789_"; + if ( strchr(word, next) && prev == '\0') { return 1; } + if ( strchr(word, prev) && next == '\0') { return 1; } + if ( strchr(word, next) && !strchr(word, prev)) { return 1; } + if (!strchr(word, next) && strchr(word, prev)) { return 1; } + return 0; +} + +static int mpc_boundary_newline_anchor(char prev, char next) { + (void)next; + return prev == '\n'; +} + +mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } +mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } + +mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } +mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } +mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } + +mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } +mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } +mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } + +mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } +mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } +mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } +mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } +mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } +mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } + +mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } +mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } +mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } +mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } +mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } + +mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } +mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } +mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } +mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } + +mpc_parser_t *mpc_real(void) { + + /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ + + mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; + + p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); + p1 = mpc_digits(); + p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); + p30 = mpc_oneof("eE"); + p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); + p32 = mpc_digits(); + p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); + + return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); + +} + +mpc_parser_t *mpc_float(void) { + return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); +} + +mpc_parser_t *mpc_char_lit(void) { + return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); +} + +mpc_parser_t *mpc_string_lit(void) { + mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); + return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); +} + +mpc_parser_t *mpc_regex_lit(void) { + mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); + return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); +} + +mpc_parser_t *mpc_ident(void) { + mpc_parser_t *p0, *p1; + p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); + p1 = mpc_many(mpcf_strfold, mpc_alphanum()); + return mpc_and(2, mpcf_strfold, p0, p1, free); +} + +/* +** Useful Parsers +*/ + +mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } +mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } +mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } + +mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } +mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } +mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } +mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } +mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } + +mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } + +mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { + return mpc_and(3, mpcf_snd_free, + mpc_string(o), a, mpc_string(c), + free, ad); +} + +mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } +mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } +mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } +mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } + +mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { + return mpc_and(3, mpcf_snd_free, + mpc_sym(o), mpc_tok(a), mpc_sym(c), + free, ad); +} + +mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } +mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } +mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } +mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } + +/* +** Regular Expression Parsers +*/ + +/* +** So here is a cute bootstrapping. +** +** I'm using the previously defined +** mpc constructs and functions to +** parse the user regex string and +** construct a parser from it. +** +** As it turns out lots of the standard +** mpc functions look a lot like `fold` +** functions and so can be used indirectly +** by many of the parsing functions to build +** a parser directly - as we are parsing. +** +** This is certainly something that +** would be less elegant/interesting +** in a two-phase parser which first +** builds an AST and then traverses it +** to generate the object. +** +** This whole thing acts as a great +** case study for how trivial it can be +** to write a great parser in a few +** lines of code using mpc. +*/ + +/* +** +** ### Regular Expression Grammar +** +** : | ( "|" ) +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : +** | "\" +** | "(" ")" +** | "[" "]" +*/ + +static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { + (void) n; + if (xs[1] == NULL) { return xs[0]; } + else { return mpc_or(2, xs[0], xs[1]); } +} + +static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { + int i; + mpc_parser_t *p = mpc_lift(mpcf_ctor_str); + for (i = 0; i < n; i++) { + p = mpc_and(2, mpcf_strfold, p, xs[i], free); + } + return p; +} + +static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { + int num; + (void) n; + if (xs[1] == NULL) { return xs[0]; } + switch(((char*)xs[1])[0]) + { + case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; + case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; + case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; + default: + num = *(int*)xs[1]; + free(xs[1]); + } + + return mpc_count(num, mpcf_strfold, xs[0], free); +} + +static mpc_parser_t *mpc_re_escape_char(char c) { + switch (c) { + case 'a': return mpc_char('\a'); + case 'f': return mpc_char('\f'); + case 'n': return mpc_char('\n'); + case 'r': return mpc_char('\r'); + case 't': return mpc_char('\t'); + case 'v': return mpc_char('\v'); + case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); + case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); + case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); + case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); + case 'd': return mpc_digit(); + case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); + case 's': return mpc_whitespace(); + case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); + case 'w': return mpc_alphanum(); + case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); + default: return NULL; + } +} + +static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { + + int mode = *((int*)data); + char *s = x; + mpc_parser_t *p; + + /* Any Character */ + if (s[0] == '.') { + free(s); + if (mode & MPC_RE_DOTALL) { + return mpc_any(); + } else { + return mpc_expect(mpc_noneof("\n"), "any character except a newline"); + } + } + + /* Start of Input */ + if (s[0] == '^') { + free(s); + if (mode & MPC_RE_MULTILINE) { + return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); + } else { + return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); + } + } + + /* End of Input */ + if (s[0] == '$') { + free(s); + if (mode & MPC_RE_MULTILINE) { + return mpc_or(2, + mpc_newline(), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } else { + return mpc_or(2, + mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } + } + + /* Regex Escape */ + if (s[0] == '\\') { + p = mpc_re_escape_char(s[1]); + p = (p == NULL) ? mpc_char(s[1]) : p; + free(s); + return p; + } + + /* Regex Standard */ + p = mpc_char(s[0]); + free(s); + return p; +} + +static const char *mpc_re_range_escape_char(char c) { + switch (c) { + case '-': return "-"; + case 'a': return "\a"; + case 'f': return "\f"; + case 'n': return "\n"; + case 'r': return "\r"; + case 't': return "\t"; + case 'v': return "\v"; + case 'b': return "\b"; + case 'd': return "0123456789"; + case 's': return " \f\n\r\t\v"; + case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; + default: return NULL; + } +} + +static mpc_val_t *mpcf_re_range(mpc_val_t *x) { + + mpc_parser_t *out; + size_t i, j; + size_t start, end; + const char *tmp = NULL; + const char *s = x; + int comp = s[0] == '^' ? 1 : 0; + char *range = calloc(1,1); + + if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } + if (s[0] == '^' && + s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } + + for (i = comp; i < strlen(s); i++){ + + /* Regex Range Escape */ + if (s[i] == '\\') { + tmp = mpc_re_range_escape_char(s[i+1]); + if (tmp != NULL) { + range = realloc(range, strlen(range) + strlen(tmp) + 1); + strcat(range, tmp); + } else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i+1]; + } + i++; + } + + /* Regex Range...Range */ + else if (s[i] == '-') { + if (s[i+1] == '\0' || i == 0) { + range = realloc(range, strlen(range) + strlen("-") + 1); + strcat(range, "-"); + } else { + start = s[i-1]+1; + end = s[i+1]-1; + for (j = start; j <= end; j++) { + range = realloc(range, strlen(range) + 1 + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = (char)j; + } + } + } + + /* Regex Range Normal */ + else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i]; + } + + } + + out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); + + free(x); + free(range); + + return out; +} + +mpc_parser_t *mpc_re(const char *re) { + return mpc_re_mode(re, MPC_RE_DEFAULT); +} + +mpc_parser_t *mpc_re_mode(const char *re, int mode) { + + char *err_msg; + mpc_parser_t *err_out; + mpc_result_t r; + mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; + + Regex = mpc_new("regex"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + Range = mpc_new("range"); + + mpc_define(Regex, mpc_and(2, mpcf_re_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), + (mpc_dtor_t)mpc_delete + )); + + mpc_define(Term, mpc_many(mpcf_re_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcf_re_repeat, + Base, + mpc_or(5, + mpc_char('*'), mpc_char('+'), mpc_char('?'), + mpc_brackets(mpc_int(), free), + mpc_pass()), + (mpc_dtor_t)mpc_delete + )); + + mpc_define(Base, mpc_or(4, + mpc_parens(Regex, (mpc_dtor_t)mpc_delete), + mpc_squares(Range, (mpc_dtor_t)mpc_delete), + mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), + mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) + )); + + mpc_define(Range, mpc_apply( + mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), + mpcf_re_range + )); + + RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); + + mpc_optimise(RegexEnclose); + mpc_optimise(Regex); + mpc_optimise(Term); + mpc_optimise(Factor); + mpc_optimise(Base); + mpc_optimise(Range); + + if(!mpc_parse("", re, RegexEnclose, &r)) { + err_msg = mpc_err_string(r.error); + err_out = mpc_failf("Invalid Regex: %s", err_msg); + mpc_err_delete(r.error); + free(err_msg); + r.output = err_out; + } + + mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); + + mpc_optimise(r.output); + + return r.output; + +} + +/* +** Common Fold Functions +*/ + +void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } + +mpc_val_t *mpcf_ctor_null(void) { return NULL; } +mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } +mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } + +mpc_val_t *mpcf_int(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 10); + free(x); + return y; +} + +mpc_val_t *mpcf_hex(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 16); + free(x); + return y; +} + +mpc_val_t *mpcf_oct(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 8); + free(x); + return y; +} + +mpc_val_t *mpcf_float(mpc_val_t *x) { + float *y = malloc(sizeof(float)); + *y = strtod(x, NULL); + free(x); + return y; +} + +mpc_val_t *mpcf_strtriml(mpc_val_t *x) { + char *s = x; + while (isspace((unsigned char)*s)) { + memmove(s, s+1, strlen(s)); + } + return s; +} + +mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { + char *s = x; + size_t l = strlen(s); + while (l > 0 && isspace((unsigned char)s[l-1])) { + s[l-1] = '\0'; l--; + } + return s; +} + +mpc_val_t *mpcf_strtrim(mpc_val_t *x) { + return mpcf_strtriml(mpcf_strtrimr(x)); +} + +static const char mpc_escape_input_c[] = { + '\a', '\b', '\f', '\n', '\r', + '\t', '\v', '\\', '\'', '\"', '\0'}; + +static const char *mpc_escape_output_c[] = { + "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", + "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; + +static const char mpc_escape_input_raw_re[] = { '/' }; +static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; + +static const char mpc_escape_input_raw_cstr[] = { '"' }; +static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; + +static const char mpc_escape_input_raw_cchar[] = { '\'' }; +static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; + +static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { + + int i; + int found; + char buff[2]; + char *s = x; + char *y = calloc(1, 1); + + while (*s) { + + i = 0; + found = 0; + + while (output[i]) { + if (*s == input[i]) { + y = realloc(y, strlen(y) + strlen(output[i]) + 1); + strcat(y, output[i]); + found = 1; + break; + } + i++; + } + + if (!found) { + y = realloc(y, strlen(y) + 2); + buff[0] = *s; buff[1] = '\0'; + strcat(y, buff); + } + + s++; + } + + + return y; +} + +static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { + + int i; + int found = 0; + char buff[2]; + char *s = x; + char *y = calloc(1, 1); + + while (*s) { + + i = 0; + found = 0; + + while (output[i]) { + if ((*(s+0)) == output[i][0] && + (*(s+1)) == output[i][1]) { + y = realloc(y, strlen(y) + 1 + 1); + buff[0] = input[i]; buff[1] = '\0'; + strcat(y, buff); + found = 1; + s++; + break; + } + i++; + } + + if (!found) { + y = realloc(y, strlen(y) + 1 + 1); + buff[0] = *s; buff[1] = '\0'; + strcat(y, buff); + } + + if (*s == '\0') { break; } + else { s++; } + } + + return y; + +} + +mpc_val_t *mpcf_escape(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); + free(x); + return y; +} + +mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } +mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } +mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } +mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } + +static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { + int i; + for (i = 0; i < n; i++) { + if (i != x) { free(xs[i]); } + } + return xs[x]; +} + +mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } +mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } +mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } + +mpc_val_t *mpcf_freefold(int n, mpc_val_t **xs) { + int i; + for (i = 0; i < n; i++) { + free(xs[i]); + } + return NULL; +} + +mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { + int i; + size_t l = 0; + + if (n == 0) { return calloc(1, 1); } + + for (i = 0; i < n; i++) { l += strlen(xs[i]); } + + xs[0] = realloc(xs[0], l + 1); + + for (i = 1; i < n; i++) { + strcat(xs[0], xs[i]); free(xs[i]); + } + + return xs[0]; +} + +mpc_val_t *mpcf_maths(int n, mpc_val_t **xs) { + int **vs = (int**)xs; + (void) n; + + switch(((char*)xs[1])[0]) + { + case '*': { *vs[0] *= *vs[2]; }; break; + case '/': { *vs[0] /= *vs[2]; }; break; + case '%': { *vs[0] %= *vs[2]; }; break; + case '+': { *vs[0] += *vs[2]; }; break; + case '-': { *vs[0] -= *vs[2]; }; break; + default: break; + } + + free(xs[1]); free(xs[2]); + + return xs[0]; +} + +/* +** Printing +*/ + +static void mpc_print_unretained(mpc_parser_t *p, int force) { + + /* TODO: Print Everything Escaped */ + + int i; + char *s, *e; + char buff[2]; + + if (p->retained && !force) {; + if (p->name) { printf("<%s>", p->name); } + else { printf(""); } + return; + } + + if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } + if (p->type == MPC_TYPE_PASS) { printf("<:>"); } + if (p->type == MPC_TYPE_FAIL) { printf(""); } + if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } + if (p->type == MPC_TYPE_STATE) { printf(""); } + if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } + if (p->type == MPC_TYPE_EXPECT) { + printf("%s", p->data.expect.m); + /*mpc_print_unretained(p->data.expect.x, 0);*/ + } + + if (p->type == MPC_TYPE_ANY) { printf("<.>"); } + if (p->type == MPC_TYPE_SATISFY) { printf(""); } + + if (p->type == MPC_TYPE_SINGLE) { + buff[0] = p->data.single.x; buff[1] = '\0'; + s = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + printf("'%s'", s); + free(s); + } + + if (p->type == MPC_TYPE_RANGE) { + buff[0] = p->data.range.x; buff[1] = '\0'; + s = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + buff[0] = p->data.range.y; buff[1] = '\0'; + e = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[%s-%s]", s, e); + free(s); + free(e); + } + + if (p->type == MPC_TYPE_ONEOF) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_NONEOF) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[^%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_STRING) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("\"%s\"", s); + free(s); + } + + if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } + + if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } + if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } + + if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } + if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } + if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } + + if (p->type == MPC_TYPE_OR) { + printf("("); + for(i = 0; i < p->data.or.n-1; i++) { + mpc_print_unretained(p->data.or.xs[i], 0); + printf(" | "); + } + mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); + printf(")"); + } + + if (p->type == MPC_TYPE_AND) { + printf("("); + for(i = 0; i < p->data.and.n-1; i++) { + mpc_print_unretained(p->data.and.xs[i], 0); + printf(" "); + } + mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); + printf(")"); + } + + if (p->type == MPC_TYPE_CHECK) { + mpc_print_unretained(p->data.check.x, 0); + printf("->?"); + } + if (p->type == MPC_TYPE_CHECK_WITH) { + mpc_print_unretained(p->data.check_with.x, 0); + printf("->?"); + } + +} + +void mpc_print(mpc_parser_t *p) { + mpc_print_unretained(p, 1); + printf("\n"); +} + +/* +** Testing +*/ + +/* +** These functions are slightly unwieldy and +** also the whole of the testing suite for mpc +** mpc is pretty shaky. +** +** It could do with a lot more tests and more +** precision. Currently I am only really testing +** changes off of the examples. +** +*/ + +int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)) { + mpc_result_t r; + (void) printer; + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return 0; + } else { + destructor(r.output); + return 1; + } + + } else { + mpc_err_delete(r.error); + return 1; + } + +} + +int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)) { + + mpc_result_t r; + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return 1; + } else { + printf("Got "); printer(r.output); printf("\n"); + printf("Expected "); printer(d); printf("\n"); + destructor(r.output); + return 0; + } + + } else { + mpc_err_print(r.error); + mpc_err_delete(r.error); + return 0; + + } + +} + + +/* +** AST +*/ + +void mpc_ast_delete(mpc_ast_t *a) { + + int i; + + if (a == NULL) { return; } + + for (i = 0; i < a->children_num; i++) { + mpc_ast_delete(a->children[i]); + } + + free(a->children); + free(a->tag); + free(a->contents); + free(a); + +} + +static void mpc_ast_delete_no_children(mpc_ast_t *a) { + free(a->children); + free(a->tag); + free(a->contents); + free(a); +} + +mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { + + mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); + + a->tag = malloc(strlen(tag) + 1); + strcpy(a->tag, tag); + + a->contents = malloc(strlen(contents) + 1); + strcpy(a->contents, contents); + + a->state = mpc_state_new(); + + a->children_num = 0; + a->children = NULL; + return a; + +} + +mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { + + mpc_ast_t *a = mpc_ast_new(tag, ""); + + int i; + va_list va; + va_start(va, tag); + + for (i = 0; i < n; i++) { + mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); + } + + va_end(va); + + return a; + +} + +mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { + + mpc_ast_t *r; + + if (a == NULL) { return a; } + if (a->children_num == 0) { return a; } + if (a->children_num == 1) { return a; } + + r = mpc_ast_new(">", ""); + mpc_ast_add_child(r, a); + return r; +} + +int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { + + int i; + + if (strcmp(a->tag, b->tag) != 0) { return 0; } + if (strcmp(a->contents, b->contents) != 0) { return 0; } + if (a->children_num != b->children_num) { return 0; } + + for (i = 0; i < a->children_num; i++) { + if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } + } + + return 1; +} + +mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { + r->children_num++; + r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); + r->children[r->children_num-1] = a; + return r; +} + +mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { + if (a == NULL) { return a; } + a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); + memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); + memmove(a->tag, t, strlen(t)); + memmove(a->tag + strlen(t), "|", 1); + return a; +} + +mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { + if (a == NULL) { return a; } + a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); + memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); + memmove(a->tag, t, (strlen(t)-1)); + return a; +} + +mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { + a->tag = realloc(a->tag, strlen(t) + 1); + strcpy(a->tag, t); + return a; +} + +mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { + if (a == NULL) { return a; } + a->state = s; + return a; +} + +static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { + + int i; + + if (a == NULL) { + fprintf(fp, "NULL\n"); + return; + } + + for (i = 0; i < d; i++) { fprintf(fp, " "); } + + if (strlen(a->contents)) { + fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, + (long unsigned int)(a->state.row+1), + (long unsigned int)(a->state.col+1), + a->contents); + } else { + fprintf(fp, "%s \n", a->tag); + } + + for (i = 0; i < a->children_num; i++) { + mpc_ast_print_depth(a->children[i], d+1, fp); + } + +} + +void mpc_ast_print(mpc_ast_t *a) { + mpc_ast_print_depth(a, 0, stdout); +} + +void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { + mpc_ast_print_depth(a, 0, fp); +} + +int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { + return mpc_ast_get_index_lb(ast, tag, 0); +} + +int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { + int i; + + for(i=lb; ichildren_num; i++) { + if(strcmp(ast->children[i]->tag, tag) == 0) { + return i; + } + } + + return -1; +} + +mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { + return mpc_ast_get_child_lb(ast, tag, 0); +} + +mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { + int i; + + for(i=lb; ichildren_num; i++) { + if(strcmp(ast->children[i]->tag, tag) == 0) { + return ast->children[i]; + } + } + + return NULL; +} + +mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, + mpc_ast_trav_order_t order) +{ + mpc_ast_trav_t *trav, *n_trav; + mpc_ast_t *cnode = ast; + + /* Create the traversal structure */ + trav = malloc(sizeof(mpc_ast_trav_t)); + trav->curr_node = cnode; + trav->parent = NULL; + trav->curr_child = 0; + trav->order = order; + + /* Get start node */ + switch(order) { + case mpc_ast_trav_order_pre: + /* Nothing else is needed for pre order start */ + break; + + case mpc_ast_trav_order_post: + while(cnode->children_num > 0) { + cnode = cnode->children[0]; + + n_trav = malloc(sizeof(mpc_ast_trav_t)); + n_trav->curr_node = cnode; + n_trav->parent = trav; + n_trav->curr_child = 0; + n_trav->order = order; + + trav = n_trav; + } + + break; + + default: + /* Unreachable, but compiler complaints */ + break; + } + + return trav; +} + +mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { + mpc_ast_trav_t *n_trav, *to_free; + mpc_ast_t *ret = NULL; + int cchild; + + /* The end of traversal was reached */ + if(*trav == NULL) return NULL; + + switch((*trav)->order) { + case mpc_ast_trav_order_pre: + ret = (*trav)->curr_node; + + /* If there aren't any more children, go up */ + while(*trav != NULL && + (*trav)->curr_child >= (*trav)->curr_node->children_num) + { + to_free = *trav; + *trav = (*trav)->parent; + free(to_free); + } + + /* If trav is NULL, the end was reached */ + if(*trav == NULL) { + break; + } + + /* Go to next child */ + n_trav = malloc(sizeof(mpc_ast_trav_t)); + + cchild = (*trav)->curr_child; + n_trav->curr_node = (*trav)->curr_node->children[cchild]; + n_trav->parent = *trav; + n_trav->curr_child = 0; + n_trav->order = (*trav)->order; + + (*trav)->curr_child++; + *trav = n_trav; + + break; + + case mpc_ast_trav_order_post: + ret = (*trav)->curr_node; + + /* Move up tree to the parent If the parent doesn't have any more nodes, + * then this is the current node. If it does, move down to its left most + * child. Also, free the previous traversal node */ + to_free = *trav; + *trav = (*trav)->parent; + free(to_free); + + if(*trav == NULL) + break; + + /* Next child */ + (*trav)->curr_child++; + + /* If there aren't any more children, this is the next node */ + if((*trav)->curr_child >= (*trav)->curr_node->children_num) { + break; + } + + /* If there are still more children, find the leftmost child from this + * node */ + while((*trav)->curr_node->children_num > 0) { + n_trav = malloc(sizeof(mpc_ast_trav_t)); + + cchild = (*trav)->curr_child; + n_trav->curr_node = (*trav)->curr_node->children[cchild]; + n_trav->parent = *trav; + n_trav->curr_child = 0; + n_trav->order = (*trav)->order; + + *trav = n_trav; + } + + default: + /* Unreachable, but compiler complaints */ + break; + } + + return ret; +} + +void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { + mpc_ast_trav_t *n_trav; + + /* Go through parents until all are free */ + while(*trav != NULL) { + n_trav = (*trav)->parent; + free(*trav); + *trav = n_trav; + } +} + +mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { + + int i, j; + mpc_ast_t** as = (mpc_ast_t**)xs; + mpc_ast_t *r; + + if (n == 0) { return NULL; } + if (n == 1) { return xs[0]; } + if (n == 2 && xs[1] == NULL) { return xs[0]; } + if (n == 2 && xs[0] == NULL) { return xs[1]; } + + r = mpc_ast_new(">", ""); + + for (i = 0; i < n; i++) { + + if (as[i] == NULL) { continue; } + + if (as[i] && as[i]->children_num == 0) { + mpc_ast_add_child(r, as[i]); + } else if (as[i] && as[i]->children_num == 1) { + mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); + mpc_ast_delete_no_children(as[i]); + } else if (as[i] && as[i]->children_num >= 2) { + for (j = 0; j < as[i]->children_num; j++) { + mpc_ast_add_child(r, as[i]->children[j]); + } + mpc_ast_delete_no_children(as[i]); + } + + } + + if (r->children_num) { + r->state = r->children[0]->state; + } + + return r; +} + +mpc_val_t *mpcf_str_ast(mpc_val_t *c) { + mpc_ast_t *a = mpc_ast_new("", c); + free(c); + return a; +} + +mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { + mpc_state_t *s = ((mpc_state_t**)xs)[0]; + mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; + (void)n; + a = mpc_ast_state(a, *s); + free(s); + return a; +} + +mpc_parser_t *mpca_state(mpc_parser_t *a) { + return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); +} + +mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { + return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); +} + +mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { + return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); +} + +mpc_parser_t *mpca_root(mpc_parser_t *a) { + return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); +} + +mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } +mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } +mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } +mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } +mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } + +mpc_parser_t *mpca_or(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; + +} + +mpc_parser_t *mpca_and(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = mpcf_fold_ast; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; + } + va_end(va); + + return p; +} + +mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } + +/* +** Grammar Parser +*/ + +/* +** This is another interesting bootstrapping. +** +** Having a general purpose AST type allows +** users to specify the grammar alone and +** let all fold rules be automatically taken +** care of by existing functions. +** +** You don't get to control the type spat +** out but this means you can make a nice +** parser to take in some grammar in nice +** syntax and spit out a parser that works. +** +** The grammar for this looks surprisingly +** like regex but the main difference is that +** it is now whitespace insensitive and the +** base type takes literals of some form. +*/ + +/* +** +** ### Grammar Grammar +** +** : ( "|" ) | +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : "<" ( | ) ">" +** | +** | +** | +** | "(" ")" +*/ + +typedef struct { + va_list *va; + int parsers_num; + mpc_parser_t **parsers; + int flags; +} mpca_grammar_st_t; + +static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { + (void) n; + if (xs[1] == NULL) { return xs[0]; } + else { return mpca_or(2, xs[0], xs[1]); } +} + +static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { + int i; + mpc_parser_t *p = mpc_pass(); + for (i = 0; i < n; i++) { + if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } + } + return p; +} + +static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { + int num; + (void) n; + if (xs[1] == NULL) { return xs[0]; } + switch(((char*)xs[1])[0]) + { + case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; + case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; + case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; + case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; + default: + num = *((int*)xs[1]); + free(xs[1]); + } + return mpca_count(num, xs[0]); +} + +static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { + mpca_grammar_st_t *st = s; + char *y = mpcf_unescape(x); + mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); + free(y); + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); +} + +static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { + mpca_grammar_st_t *st = s; + char *y = mpcf_unescape(x); + mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); + free(y); + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); +} + +static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { + char *y = xs[0]; + char *m = xs[1]; + mpca_grammar_st_t *st = xs[2]; + mpc_parser_t *p; + int mode = MPC_RE_DEFAULT; + + (void)n; + if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } + if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } + y = mpcf_unescape_regex(y); + p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); + free(y); + free(m); + + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); +} + +/* Should this just use `isdigit` instead? */ +static int is_number(const char* s) { + size_t i; + for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } + return 1; +} + +static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { + + int i; + mpc_parser_t *p; + + /* Case of Number */ + if (is_number(x)) { + + i = strtol(x, NULL, 10); + + while (st->parsers_num <= i) { + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); + if (st->parsers[st->parsers_num-1] == NULL) { + return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); + } + } + + return st->parsers[st->parsers_num-1]; + + /* Case of Identifier */ + } else { + + /* Search Existing Parsers */ + for (i = 0; i < st->parsers_num; i++) { + mpc_parser_t *q = st->parsers[i]; + if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } + if (q->name && strcmp(q->name, x) == 0) { return q; } + } + + /* Search New Parsers */ + while (1) { + + p = va_arg(*st->va, mpc_parser_t*); + + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = p; + + if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } + if (p->name && strcmp(p->name, x) == 0) { return p; } + + } + + } + +} + +static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { + + mpca_grammar_st_t *st = s; + mpc_parser_t *p = mpca_grammar_find_parser(x, st); + free(x); + + if (p->name) { + return mpca_state(mpca_root(mpca_add_tag(p, p->name))); + } else { + return mpca_state(mpca_root(p)); + } +} + +mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { + + char *err_msg; + mpc_parser_t *err_out; + mpc_result_t r; + mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; + + GrammarTotal = mpc_new("grammar_total"); + Grammar = mpc_new("grammar"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + + mpc_define(GrammarTotal, + mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) + ); + + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), + mpc_soft_delete + )); + + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, + Base, + mpc_or(6, + mpc_sym("*"), + mpc_sym("+"), + mpc_sym("?"), + mpc_sym("!"), + mpc_tok_brackets(mpc_int(), free), + mpc_pass()), + mpc_soft_delete + )); + + mpc_define(Base, mpc_or(5, + mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), + mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), + mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), + mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), + mpc_tok_parens(Grammar, mpc_soft_delete) + )); + + mpc_optimise(GrammarTotal); + mpc_optimise(Grammar); + mpc_optimise(Factor); + mpc_optimise(Term); + mpc_optimise(Base); + + if(!mpc_parse("", grammar, GrammarTotal, &r)) { + err_msg = mpc_err_string(r.error); + err_out = mpc_failf("Invalid Grammar: %s", err_msg); + mpc_err_delete(r.error); + free(err_msg); + r.output = err_out; + } + + mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); + + mpc_optimise(r.output); + + return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; + +} + +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { + mpca_grammar_st_t st; + mpc_parser_t *res; + va_list va; + va_start(va, grammar); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + res = mpca_grammar_st(grammar, &st); + free(st.parsers); + va_end(va); + return res; +} + +typedef struct { + char *ident; + char *name; + mpc_parser_t *grammar; +} mpca_stmt_t; + +static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { + mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); + stmt->ident = ((char**)xs)[0]; + stmt->name = ((char**)xs)[1]; + stmt->grammar = ((mpc_parser_t**)xs)[3]; + (void) n; + free(((char**)xs)[2]); + free(((char**)xs)[4]); + + return stmt; +} + +static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { + + int i; + mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); + + for (i = 0; i < n; i++) { + stmts[i] = xs[i]; + } + stmts[n] = NULL; + + return stmts; +} + +static void mpca_stmt_list_delete(mpc_val_t *x) { + + mpca_stmt_t **stmts = x; + + while(*stmts) { + mpca_stmt_t *stmt = *stmts; + free(stmt->ident); + free(stmt->name); + mpc_soft_delete(stmt->grammar); + free(stmt); + stmts++; + } + free(x); + +} + +static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { + + mpca_grammar_st_t *st = s; + mpca_stmt_t *stmt; + mpca_stmt_t **stmts = x; + mpc_parser_t *left; + + while(*stmts) { + stmt = *stmts; + left = mpca_grammar_find_parser(stmt->ident, st); + if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } + if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } + mpc_optimise(stmt->grammar); + mpc_define(left, stmt->grammar); + free(stmt->ident); + free(stmt->name); + free(stmt); + stmts++; + } + + free(x); + + return NULL; +} + +static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { + + mpc_result_t r; + mpc_err_t *e; + mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; + + Lang = mpc_new("lang"); + Stmt = mpc_new("stmt"); + Grammar = mpc_new("grammar"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + + mpc_define(Lang, mpc_apply_to( + mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), + mpca_stmt_list_apply_to, st + )); + + mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, + mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), + free, free, free, mpc_soft_delete + )); + + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), + mpc_soft_delete + )); + + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, + Base, + mpc_or(6, + mpc_sym("*"), + mpc_sym("+"), + mpc_sym("?"), + mpc_sym("!"), + mpc_tok_brackets(mpc_int(), free), + mpc_pass()), + mpc_soft_delete + )); + + mpc_define(Base, mpc_or(5, + mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), + mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), + mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), + mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), + mpc_tok_parens(Grammar, mpc_soft_delete) + )); + + mpc_optimise(Lang); + mpc_optimise(Stmt); + mpc_optimise(Grammar); + mpc_optimise(Term); + mpc_optimise(Factor); + mpc_optimise(Base); + + if (!mpc_parse_input(i, Lang, &r)) { + e = r.error; + } else { + e = NULL; + } + + mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); + + return e; +} + +mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, f); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_file("", f); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, p); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_pipe("", p); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang(int flags, const char *language, ...) { + + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, language); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_string("", language); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { + + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + + FILE *f = fopen(filename, "rb"); + + if (f == NULL) { + err = mpc_err_file(filename, "Unable to open file!"); + return err; + } + + va_start(va, filename); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_file(filename, f); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + + fclose(f); + + return err; +} + +static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { + + int i, total; + + if (p->retained && !force) { return 0; } + + if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } + + if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } + + if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } + if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } + + if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } + + if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + + if (p->type == MPC_TYPE_OR) { + total = 1; + for(i = 0; i < p->data.or.n; i++) { + total += mpc_nodecount_unretained(p->data.or.xs[i], 0); + } + return total; + } + + if (p->type == MPC_TYPE_AND) { + total = 1; + for(i = 0; i < p->data.and.n; i++) { + total += mpc_nodecount_unretained(p->data.and.xs[i], 0); + } + return total; + } + + return 1; + +} + +void mpc_stats(mpc_parser_t* p) { + printf("Stats\n"); + printf("=====\n"); + printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); +} + +static void mpc_optimise_unretained(mpc_parser_t *p, int force) { + + int i, n, m; + mpc_parser_t *t; + + if (p->retained && !force) { return; } + + /* Optimise Subexpressions */ + + if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } + if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } + if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } + if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } + + if (p->type == MPC_TYPE_OR) { + for(i = 0; i < p->data.or.n; i++) { + mpc_optimise_unretained(p->data.or.xs[i], 0); + } + } + + if (p->type == MPC_TYPE_AND) { + for(i = 0; i < p->data.and.n; i++) { + mpc_optimise_unretained(p->data.and.xs[i], 0); + } + } + + /* Perform optimisations */ + + while (1) { + + /* Merge rhs `or` */ + if (p->type == MPC_TYPE_OR + && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR + && !p->data.or.xs[p->data.or.n-1]->retained) { + t = p->data.or.xs[p->data.or.n-1]; + n = p->data.or.n; m = t->data.or.n; + p->data.or.n = n + m - 1; + p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); + memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); + free(t->data.or.xs); free(t->name); free(t); + continue; + } + + /* Merge lhs `or` */ + if (p->type == MPC_TYPE_OR + && p->data.or.xs[0]->type == MPC_TYPE_OR + && !p->data.or.xs[0]->retained) { + t = p->data.or.xs[0]; + n = p->data.or.n; m = t->data.or.n; + p->data.or.n = n + m - 1; + p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); + memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); + free(t->data.or.xs); free(t->name); free(t); + continue; + } + + /* Remove ast `pass` */ + if (p->type == MPC_TYPE_AND + && p->data.and.n == 2 + && p->data.and.xs[0]->type == MPC_TYPE_PASS + && !p->data.and.xs[0]->retained + && p->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[1]; + mpc_delete(p->data.and.xs[0]); + free(p->data.and.xs); free(p->data.and.dxs); free(p->name); + memcpy(p, t, sizeof(mpc_parser_t)); + free(t); + continue; + } + + /* Merge ast lhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_fold_ast + && p->data.and.xs[0]->type == MPC_TYPE_AND + && !p->data.and.xs[0]->retained + && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[0]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Merge ast rhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_fold_ast + && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND + && !p->data.and.xs[p->data.and.n-1]->retained + && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[p->data.and.n-1]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Remove re `lift` */ + if (p->type == MPC_TYPE_AND + && p->data.and.n == 2 + && p->data.and.xs[0]->type == MPC_TYPE_LIFT + && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str + && !p->data.and.xs[0]->retained + && p->data.and.f == mpcf_strfold) { + t = p->data.and.xs[1]; + mpc_delete(p->data.and.xs[0]); + free(p->data.and.xs); free(p->data.and.dxs); free(p->name); + memcpy(p, t, sizeof(mpc_parser_t)); + free(t); + continue; + } + + /* Merge re lhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_strfold + && p->data.and.xs[0]->type == MPC_TYPE_AND + && !p->data.and.xs[0]->retained + && p->data.and.xs[0]->data.and.f == mpcf_strfold) { + t = p->data.and.xs[0]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Merge re rhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_strfold + && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND + && !p->data.and.xs[p->data.and.n-1]->retained + && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { + t = p->data.and.xs[p->data.and.n-1]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + return; + + } + +} + +void mpc_optimise(mpc_parser_t *p) { + mpc_optimise_unretained(p, 1); +} + diff --git a/mpc.h b/mpc.h index 1622ada..c75ff0b 100644 --- a/mpc.h +++ b/mpc.h @@ -1,390 +1,390 @@ -/* -** mpc - Micro Parser Combinator library for C -** -** https://github.com/orangeduck/mpc -** -** Daniel Holden - contact@daniel-holden.com -** Licensed under BSD3 -*/ - -#ifndef mpc_h -#define mpc_h - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include -#include -#include -#include - -/* -** State Type -*/ - -typedef struct { - long pos; - long row; - long col; - int term; -} mpc_state_t; - -/* -** Error Type -*/ - -typedef struct { - mpc_state_t state; - int expected_num; - char *filename; - char *failure; - char **expected; - char received; -} mpc_err_t; - -void mpc_err_delete(mpc_err_t *e); -char *mpc_err_string(mpc_err_t *e); -void mpc_err_print(mpc_err_t *e); -void mpc_err_print_to(mpc_err_t *e, FILE *f); - -/* -** Parsing -*/ - -typedef void mpc_val_t; - -typedef union { - mpc_err_t *error; - mpc_val_t *output; -} mpc_result_t; - -struct mpc_parser_t; -typedef struct mpc_parser_t mpc_parser_t; - -int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); -int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r); -int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); -int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); -int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); - -/* -** Function Types -*/ - -typedef void(*mpc_dtor_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_ctor_t)(void); - -typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); -typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); - -typedef int(*mpc_check_t)(mpc_val_t**); -typedef int(*mpc_check_with_t)(mpc_val_t**,void*); - -/* -** Building a Parser -*/ - -mpc_parser_t *mpc_new(const char *name); -mpc_parser_t *mpc_copy(mpc_parser_t *a); -mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); -mpc_parser_t *mpc_undefine(mpc_parser_t *p); - -void mpc_delete(mpc_parser_t *p); -void mpc_cleanup(int n, ...); - -/* -** Basic Parsers -*/ - -mpc_parser_t *mpc_any(void); -mpc_parser_t *mpc_char(char c); -mpc_parser_t *mpc_range(char s, char e); -mpc_parser_t *mpc_oneof(const char *s); -mpc_parser_t *mpc_noneof(const char *s); -mpc_parser_t *mpc_satisfy(int(*f)(char)); -mpc_parser_t *mpc_string(const char *s); - -/* -** Other Parsers -*/ - -mpc_parser_t *mpc_pass(void); -mpc_parser_t *mpc_fail(const char *m); -mpc_parser_t *mpc_failf(const char *fmt, ...); -mpc_parser_t *mpc_lift(mpc_ctor_t f); -mpc_parser_t *mpc_lift_val(mpc_val_t *x); -mpc_parser_t *mpc_anchor(int(*f)(char,char)); -mpc_parser_t *mpc_state(void); - -/* -** Combinator Parsers -*/ - -mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); -mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); -mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); -mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); -mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); -mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); -mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); -mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); - -mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); -mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); -mpc_parser_t *mpc_maybe(mpc_parser_t *a); -mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); - -mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); -mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); -mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); - -mpc_parser_t *mpc_or(int n, ...); -mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); - -mpc_parser_t *mpc_predictive(mpc_parser_t *a); - -/* -** Common Parsers -*/ - -mpc_parser_t *mpc_eoi(void); -mpc_parser_t *mpc_soi(void); - -mpc_parser_t *mpc_boundary(void); -mpc_parser_t *mpc_boundary_newline(void); - -mpc_parser_t *mpc_whitespace(void); -mpc_parser_t *mpc_whitespaces(void); -mpc_parser_t *mpc_blank(void); - -mpc_parser_t *mpc_newline(void); -mpc_parser_t *mpc_tab(void); -mpc_parser_t *mpc_escape(void); - -mpc_parser_t *mpc_digit(void); -mpc_parser_t *mpc_hexdigit(void); -mpc_parser_t *mpc_octdigit(void); -mpc_parser_t *mpc_digits(void); -mpc_parser_t *mpc_hexdigits(void); -mpc_parser_t *mpc_octdigits(void); - -mpc_parser_t *mpc_lower(void); -mpc_parser_t *mpc_upper(void); -mpc_parser_t *mpc_alpha(void); -mpc_parser_t *mpc_underscore(void); -mpc_parser_t *mpc_alphanum(void); - -mpc_parser_t *mpc_int(void); -mpc_parser_t *mpc_hex(void); -mpc_parser_t *mpc_oct(void); -mpc_parser_t *mpc_number(void); - -mpc_parser_t *mpc_real(void); -mpc_parser_t *mpc_float(void); - -mpc_parser_t *mpc_char_lit(void); -mpc_parser_t *mpc_string_lit(void); -mpc_parser_t *mpc_regex_lit(void); - -mpc_parser_t *mpc_ident(void); - -/* -** Useful Parsers -*/ - -mpc_parser_t *mpc_startwith(mpc_parser_t *a); -mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); -mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); - -mpc_parser_t *mpc_stripl(mpc_parser_t *a); -mpc_parser_t *mpc_stripr(mpc_parser_t *a); -mpc_parser_t *mpc_strip(mpc_parser_t *a); -mpc_parser_t *mpc_tok(mpc_parser_t *a); -mpc_parser_t *mpc_sym(const char *s); -mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); - -mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); -mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); - -mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); -mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); - -/* -** Common Function Parameters -*/ - -void mpcf_dtor_null(mpc_val_t *x); - -mpc_val_t *mpcf_ctor_null(void); -mpc_val_t *mpcf_ctor_str(void); - -mpc_val_t *mpcf_free(mpc_val_t *x); -mpc_val_t *mpcf_int(mpc_val_t *x); -mpc_val_t *mpcf_hex(mpc_val_t *x); -mpc_val_t *mpcf_oct(mpc_val_t *x); -mpc_val_t *mpcf_float(mpc_val_t *x); -mpc_val_t *mpcf_strtriml(mpc_val_t *x); -mpc_val_t *mpcf_strtrimr(mpc_val_t *x); -mpc_val_t *mpcf_strtrim(mpc_val_t *x); - -mpc_val_t *mpcf_escape(mpc_val_t *x); -mpc_val_t *mpcf_escape_regex(mpc_val_t *x); -mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); -mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); - -mpc_val_t *mpcf_unescape(mpc_val_t *x); -mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); -mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); -mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); - -mpc_val_t *mpcf_null(int n, mpc_val_t** xs); -mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); -mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); -mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); - -mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); -mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); -mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); -mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs); - -mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs); -mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); -mpc_val_t *mpcf_maths(int n, mpc_val_t** xs); - -/* -** Regular Expression Parsers -*/ - -enum { - MPC_RE_DEFAULT = 0, - MPC_RE_M = 1, - MPC_RE_S = 2, - MPC_RE_MULTILINE = 1, - MPC_RE_DOTALL = 2 -}; - -mpc_parser_t *mpc_re(const char *re); -mpc_parser_t *mpc_re_mode(const char *re, int mode); - -/* -** AST -*/ - -typedef struct mpc_ast_t { - char *tag; - char *contents; - mpc_state_t state; - int children_num; - struct mpc_ast_t** children; -} mpc_ast_t; - -mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); -mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); -mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); -mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); -mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); -mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t); -mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); -mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); - -void mpc_ast_delete(mpc_ast_t *a); -void mpc_ast_print(mpc_ast_t *a); -void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); - -int mpc_ast_get_index(mpc_ast_t *ast, const char *tag); -int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb); -mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag); -mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb); - -typedef enum { - mpc_ast_trav_order_pre, - mpc_ast_trav_order_post -} mpc_ast_trav_order_t; - -typedef struct mpc_ast_trav_t { - mpc_ast_t *curr_node; - struct mpc_ast_trav_t *parent; - int curr_child; - mpc_ast_trav_order_t order; -} mpc_ast_trav_t; - -mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, - mpc_ast_trav_order_t order); - -mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav); - -void mpc_ast_traverse_free(mpc_ast_trav_t **trav); - -/* -** Warning: This function currently doesn't test for equality of the `state` member! -*/ -int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); - -mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); -mpc_val_t *mpcf_str_ast(mpc_val_t *c); -mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); - -mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); -mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); -mpc_parser_t *mpca_root(mpc_parser_t *a); -mpc_parser_t *mpca_state(mpc_parser_t *a); -mpc_parser_t *mpca_total(mpc_parser_t *a); - -mpc_parser_t *mpca_not(mpc_parser_t *a); -mpc_parser_t *mpca_maybe(mpc_parser_t *a); - -mpc_parser_t *mpca_many(mpc_parser_t *a); -mpc_parser_t *mpca_many1(mpc_parser_t *a); -mpc_parser_t *mpca_count(int n, mpc_parser_t *a); - -mpc_parser_t *mpca_or(int n, ...); -mpc_parser_t *mpca_and(int n, ...); - -enum { - MPCA_LANG_DEFAULT = 0, - MPCA_LANG_PREDICTIVE = 1, - MPCA_LANG_WHITESPACE_SENSITIVE = 2 -}; - -mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); - -mpc_err_t *mpca_lang(int flags, const char *language, ...); -mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); -mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); -mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); - -/* -** Misc -*/ - - -void mpc_print(mpc_parser_t *p); -void mpc_optimise(mpc_parser_t *p); -void mpc_stats(mpc_parser_t *p); - -int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)); - -int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)); - -#ifdef __cplusplus -} -#endif - -#endif +/* +** mpc - Micro Parser Combinator library for C +** +** https://github.com/orangeduck/mpc +** +** Daniel Holden - contact@daniel-holden.com +** Licensed under BSD3 +*/ + +#ifndef mpc_h +#define mpc_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +/* +** State Type +*/ + +typedef struct { + long pos; + long row; + long col; + int term; +} mpc_state_t; + +/* +** Error Type +*/ + +typedef struct { + mpc_state_t state; + int expected_num; + char *filename; + char *failure; + char **expected; + char received; +} mpc_err_t; + +void mpc_err_delete(mpc_err_t *e); +char *mpc_err_string(mpc_err_t *e); +void mpc_err_print(mpc_err_t *e); +void mpc_err_print_to(mpc_err_t *e, FILE *f); + +/* +** Parsing +*/ + +typedef void mpc_val_t; + +typedef union { + mpc_err_t *error; + mpc_val_t *output; +} mpc_result_t; + +struct mpc_parser_t; +typedef struct mpc_parser_t mpc_parser_t; + +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); +int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); + +/* +** Function Types +*/ + +typedef void(*mpc_dtor_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_ctor_t)(void); + +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); +typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); + +typedef int(*mpc_check_t)(mpc_val_t**); +typedef int(*mpc_check_with_t)(mpc_val_t**,void*); + +/* +** Building a Parser +*/ + +mpc_parser_t *mpc_new(const char *name); +mpc_parser_t *mpc_copy(mpc_parser_t *a); +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); +mpc_parser_t *mpc_undefine(mpc_parser_t *p); + +void mpc_delete(mpc_parser_t *p); +void mpc_cleanup(int n, ...); + +/* +** Basic Parsers +*/ + +mpc_parser_t *mpc_any(void); +mpc_parser_t *mpc_char(char c); +mpc_parser_t *mpc_range(char s, char e); +mpc_parser_t *mpc_oneof(const char *s); +mpc_parser_t *mpc_noneof(const char *s); +mpc_parser_t *mpc_satisfy(int(*f)(char)); +mpc_parser_t *mpc_string(const char *s); + +/* +** Other Parsers +*/ + +mpc_parser_t *mpc_pass(void); +mpc_parser_t *mpc_fail(const char *m); +mpc_parser_t *mpc_failf(const char *fmt, ...); +mpc_parser_t *mpc_lift(mpc_ctor_t f); +mpc_parser_t *mpc_lift_val(mpc_val_t *x); +mpc_parser_t *mpc_anchor(int(*f)(char,char)); +mpc_parser_t *mpc_state(void); + +/* +** Combinator Parsers +*/ + +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); + +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); +mpc_parser_t *mpc_maybe(mpc_parser_t *a); +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); + +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_or(int n, ...); +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); + +mpc_parser_t *mpc_predictive(mpc_parser_t *a); + +/* +** Common Parsers +*/ + +mpc_parser_t *mpc_eoi(void); +mpc_parser_t *mpc_soi(void); + +mpc_parser_t *mpc_boundary(void); +mpc_parser_t *mpc_boundary_newline(void); + +mpc_parser_t *mpc_whitespace(void); +mpc_parser_t *mpc_whitespaces(void); +mpc_parser_t *mpc_blank(void); + +mpc_parser_t *mpc_newline(void); +mpc_parser_t *mpc_tab(void); +mpc_parser_t *mpc_escape(void); + +mpc_parser_t *mpc_digit(void); +mpc_parser_t *mpc_hexdigit(void); +mpc_parser_t *mpc_octdigit(void); +mpc_parser_t *mpc_digits(void); +mpc_parser_t *mpc_hexdigits(void); +mpc_parser_t *mpc_octdigits(void); + +mpc_parser_t *mpc_lower(void); +mpc_parser_t *mpc_upper(void); +mpc_parser_t *mpc_alpha(void); +mpc_parser_t *mpc_underscore(void); +mpc_parser_t *mpc_alphanum(void); + +mpc_parser_t *mpc_int(void); +mpc_parser_t *mpc_hex(void); +mpc_parser_t *mpc_oct(void); +mpc_parser_t *mpc_number(void); + +mpc_parser_t *mpc_real(void); +mpc_parser_t *mpc_float(void); + +mpc_parser_t *mpc_char_lit(void); +mpc_parser_t *mpc_string_lit(void); +mpc_parser_t *mpc_regex_lit(void); + +mpc_parser_t *mpc_ident(void); + +/* +** Useful Parsers +*/ + +mpc_parser_t *mpc_startwith(mpc_parser_t *a); +mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_stripl(mpc_parser_t *a); +mpc_parser_t *mpc_stripr(mpc_parser_t *a); +mpc_parser_t *mpc_strip(mpc_parser_t *a); +mpc_parser_t *mpc_tok(mpc_parser_t *a); +mpc_parser_t *mpc_sym(const char *s); +mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); +mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); + +mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); +mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); + +/* +** Common Function Parameters +*/ + +void mpcf_dtor_null(mpc_val_t *x); + +mpc_val_t *mpcf_ctor_null(void); +mpc_val_t *mpcf_ctor_str(void); + +mpc_val_t *mpcf_free(mpc_val_t *x); +mpc_val_t *mpcf_int(mpc_val_t *x); +mpc_val_t *mpcf_hex(mpc_val_t *x); +mpc_val_t *mpcf_oct(mpc_val_t *x); +mpc_val_t *mpcf_float(mpc_val_t *x); +mpc_val_t *mpcf_strtriml(mpc_val_t *x); +mpc_val_t *mpcf_strtrimr(mpc_val_t *x); +mpc_val_t *mpcf_strtrim(mpc_val_t *x); + +mpc_val_t *mpcf_escape(mpc_val_t *x); +mpc_val_t *mpcf_escape_regex(mpc_val_t *x); +mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); +mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); + +mpc_val_t *mpcf_unescape(mpc_val_t *x); +mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); +mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); +mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); + +mpc_val_t *mpcf_null(int n, mpc_val_t** xs); +mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); +mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); +mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); + +mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs); + +mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs); +mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); +mpc_val_t *mpcf_maths(int n, mpc_val_t** xs); + +/* +** Regular Expression Parsers +*/ + +enum { + MPC_RE_DEFAULT = 0, + MPC_RE_M = 1, + MPC_RE_S = 2, + MPC_RE_MULTILINE = 1, + MPC_RE_DOTALL = 2 +}; + +mpc_parser_t *mpc_re(const char *re); +mpc_parser_t *mpc_re_mode(const char *re, int mode); + +/* +** AST +*/ + +typedef struct mpc_ast_t { + char *tag; + char *contents; + mpc_state_t state; + int children_num; + struct mpc_ast_t** children; +} mpc_ast_t; + +mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); +mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); +mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); +mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); +mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); + +void mpc_ast_delete(mpc_ast_t *a); +void mpc_ast_print(mpc_ast_t *a); +void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); + +int mpc_ast_get_index(mpc_ast_t *ast, const char *tag); +int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb); +mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag); +mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb); + +typedef enum { + mpc_ast_trav_order_pre, + mpc_ast_trav_order_post +} mpc_ast_trav_order_t; + +typedef struct mpc_ast_trav_t { + mpc_ast_t *curr_node; + struct mpc_ast_trav_t *parent; + int curr_child; + mpc_ast_trav_order_t order; +} mpc_ast_trav_t; + +mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, + mpc_ast_trav_order_t order); + +mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav); + +void mpc_ast_traverse_free(mpc_ast_trav_t **trav); + +/* +** Warning: This function currently doesn't test for equality of the `state` member! +*/ +int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); + +mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); +mpc_val_t *mpcf_str_ast(mpc_val_t *c); +mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); + +mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); +mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); +mpc_parser_t *mpca_root(mpc_parser_t *a); +mpc_parser_t *mpca_state(mpc_parser_t *a); +mpc_parser_t *mpca_total(mpc_parser_t *a); + +mpc_parser_t *mpca_not(mpc_parser_t *a); +mpc_parser_t *mpca_maybe(mpc_parser_t *a); + +mpc_parser_t *mpca_many(mpc_parser_t *a); +mpc_parser_t *mpca_many1(mpc_parser_t *a); +mpc_parser_t *mpca_count(int n, mpc_parser_t *a); + +mpc_parser_t *mpca_or(int n, ...); +mpc_parser_t *mpca_and(int n, ...); + +enum { + MPCA_LANG_DEFAULT = 0, + MPCA_LANG_PREDICTIVE = 1, + MPCA_LANG_WHITESPACE_SENSITIVE = 2 +}; + +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); + +mpc_err_t *mpca_lang(int flags, const char *language, ...); +mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); +mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); + +/* +** Misc +*/ + + +void mpc_print(mpc_parser_t *p); +void mpc_optimise(mpc_parser_t *p); +void mpc_stats(mpc_parser_t *p); + +int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)); + +int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tests/combinators.c b/tests/combinators.c index 0bd0ae9..4f55332 100644 --- a/tests/combinators.c +++ b/tests/combinators.c @@ -1,89 +1,89 @@ -#include "ptest.h" -#include "../mpc.h" - -static int check_is_a(mpc_val_t** x) { - return strcmp(*x, "a") == 0; -} - -static int check_is(mpc_val_t** x, void* t) { - return strcmp(*x, t) == 0; -} - -void test_check(void) { - int success; - mpc_result_t r; - mpc_parser_t* p = mpc_check(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is_a, "Expected 'a'"); - - success = mpc_parse("test", "a", p, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "a"); - if (success) free(r.output); else mpc_err_delete(r.error); - - success = mpc_parse("test", "b", p, &r); - PT_ASSERT(!success); - PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); - if (success) free(r.output); else mpc_err_delete(r.error); - - mpc_delete(p); -} - -void test_check_with(void) { - int success; - mpc_result_t r; - mpc_parser_t* p = mpc_check_with(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is, (void*)"a", "Expected 'a'"); - - success = mpc_parse("test", "a", p, &r); - PT_ASSERT(success); - if (success) PT_ASSERT_STR_EQ(r.output, "a"); - if (success) free(r.output); else mpc_err_delete(r.error); - - success = mpc_parse("test", "b", p, &r); - PT_ASSERT(!success); - if (!success) PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); - if (success) free(r.output); else mpc_err_delete(r.error); - - mpc_delete(p); -} - -void test_checkf(void) { - int success; - mpc_result_t r; - mpc_parser_t* p = mpc_checkf(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is_a, "Expected '%s'", "a"); - - success = mpc_parse("test", "a", p, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "a"); - if (success) free(r.output); else mpc_err_delete(r.error); - - success = mpc_parse("test", "b", p, &r); - PT_ASSERT(!success); - PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); - if (success) free(r.output); else mpc_err_delete(r.error); - - mpc_delete(p); -} - -void test_check_withf(void) { - int success; - mpc_result_t r; - mpc_parser_t* p = mpc_check_withf(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is, (void*)"a", "Expected '%s'", "a"); - - success = mpc_parse("test", "a", p, &r); - PT_ASSERT(success); - if (success) PT_ASSERT_STR_EQ(r.output, "a"); - if (success) free(r.output); else mpc_err_delete(r.error); - - success = mpc_parse("test", "b", p, &r); - PT_ASSERT(!success); - if (!success) PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); - if (success) free(r.output); else mpc_err_delete(r.error); - - mpc_delete(p); -} - -void suite_combinators(void) { - pt_add_test(test_check, "Test Check", "Suite Combinators"); - pt_add_test(test_check_with, "Test Check with", "Suite Combinators"); - pt_add_test(test_checkf, "Test Check F", "Suite Combinators"); - pt_add_test(test_check_withf, "Test Check with F", "Suite Combinators"); -} +#include "ptest.h" +#include "../mpc.h" + +static int check_is_a(mpc_val_t** x) { + return strcmp(*x, "a") == 0; +} + +static int check_is(mpc_val_t** x, void* t) { + return strcmp(*x, t) == 0; +} + +void test_check(void) { + int success; + mpc_result_t r; + mpc_parser_t* p = mpc_check(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is_a, "Expected 'a'"); + + success = mpc_parse("test", "a", p, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "a"); + if (success) free(r.output); else mpc_err_delete(r.error); + + success = mpc_parse("test", "b", p, &r); + PT_ASSERT(!success); + PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); + if (success) free(r.output); else mpc_err_delete(r.error); + + mpc_delete(p); +} + +void test_check_with(void) { + int success; + mpc_result_t r; + mpc_parser_t* p = mpc_check_with(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is, (void*)"a", "Expected 'a'"); + + success = mpc_parse("test", "a", p, &r); + PT_ASSERT(success); + if (success) PT_ASSERT_STR_EQ(r.output, "a"); + if (success) free(r.output); else mpc_err_delete(r.error); + + success = mpc_parse("test", "b", p, &r); + PT_ASSERT(!success); + if (!success) PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); + if (success) free(r.output); else mpc_err_delete(r.error); + + mpc_delete(p); +} + +void test_checkf(void) { + int success; + mpc_result_t r; + mpc_parser_t* p = mpc_checkf(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is_a, "Expected '%s'", "a"); + + success = mpc_parse("test", "a", p, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "a"); + if (success) free(r.output); else mpc_err_delete(r.error); + + success = mpc_parse("test", "b", p, &r); + PT_ASSERT(!success); + PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); + if (success) free(r.output); else mpc_err_delete(r.error); + + mpc_delete(p); +} + +void test_check_withf(void) { + int success; + mpc_result_t r; + mpc_parser_t* p = mpc_check_withf(mpc_or(2, mpc_char('a'), mpc_char('b')), free, check_is, (void*)"a", "Expected '%s'", "a"); + + success = mpc_parse("test", "a", p, &r); + PT_ASSERT(success); + if (success) PT_ASSERT_STR_EQ(r.output, "a"); + if (success) free(r.output); else mpc_err_delete(r.error); + + success = mpc_parse("test", "b", p, &r); + PT_ASSERT(!success); + if (!success) PT_ASSERT_STR_EQ(r.error->failure, "Expected 'a'"); + if (success) free(r.output); else mpc_err_delete(r.error); + + mpc_delete(p); +} + +void suite_combinators(void) { + pt_add_test(test_check, "Test Check", "Suite Combinators"); + pt_add_test(test_check_with, "Test Check with", "Suite Combinators"); + pt_add_test(test_checkf, "Test Check F", "Suite Combinators"); + pt_add_test(test_check_withf, "Test Check with F", "Suite Combinators"); +} diff --git a/tests/core.c b/tests/core.c index 0dae040..5864a58 100644 --- a/tests/core.c +++ b/tests/core.c @@ -1,234 +1,234 @@ -#include "ptest.h" -#include "../mpc.h" - -#include -#include - -static int int_eq(const void* x, const void* y) { return (*(int*)x == *(int*)y); } -static void int_print(const void* x) { printf("'%i'", *((int*)x)); } -static int streq(const void* x, const void* y) { return (strcmp(x, y) == 0); } -static void strprint(const void* x) { printf("'%s'", (char*)x); } - -void test_ident(void) { - - /* ^[a-zA-Z_][a-zA-Z0-9_]*$ */ - - mpc_parser_t* Ident = mpc_whole( - mpc_and(2, mpcf_strfold, - mpc_or(2, mpc_alpha(), mpc_underscore()), - mpc_many1(mpcf_strfold, mpc_or(3, mpc_alpha(), mpc_underscore(), mpc_digit())), - free), - free - ); - - PT_ASSERT(mpc_test_pass(Ident, "test", "test", streq, free, strprint)); - PT_ASSERT(mpc_test_fail(Ident, " blah", "", streq, free, strprint)); - PT_ASSERT(mpc_test_pass(Ident, "anoth21er", "anoth21er", streq, free, strprint)); - PT_ASSERT(mpc_test_pass(Ident, "du__de", "du__de", streq, free, strprint)); - PT_ASSERT(mpc_test_fail(Ident, "some spaces", "", streq, free, strprint)); - PT_ASSERT(mpc_test_fail(Ident, "", "", streq, free, strprint)); - PT_ASSERT(mpc_test_fail(Ident, "18nums", "", streq, free, strprint)); - - mpc_delete(Ident); - -} - -void test_maths(void) { - - mpc_parser_t *Expr, *Factor, *Term, *Maths; - int r0 = 1, r1 = 5, r2 = 13, r3 = 0, r4 = 2; - - Expr = mpc_new("expr"); - Factor = mpc_new("factor"); - Term = mpc_new("term"); - Maths = mpc_new("maths"); - - mpc_define(Expr, mpc_or(2, - mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free), - Factor - )); - - mpc_define(Factor, mpc_or(2, - mpc_and(3, mpcf_maths, Term, mpc_oneof("+-"), Term, free, free), - Term - )); - - mpc_define(Term, mpc_or(2, - mpc_int(), - mpc_parens(Expr, free) - )); - - mpc_define(Maths, mpc_whole(Expr, free)); - - PT_ASSERT(mpc_test_pass(Maths, "1", &r0, int_eq, free, int_print)); - PT_ASSERT(mpc_test_pass(Maths, "(5)", &r1, int_eq, free, int_print)); - PT_ASSERT(mpc_test_pass(Maths, "(4*2)+5", &r2, int_eq, free, int_print)); - PT_ASSERT(mpc_test_fail(Maths, "a", &r3, int_eq, free, int_print)); - PT_ASSERT(mpc_test_fail(Maths, "2b+4", &r4, int_eq, free, int_print)); - - mpc_cleanup(4, Expr, Factor, Term, Maths); -} - -void test_strip(void) { - - mpc_parser_t *Stripperl = mpc_apply(mpc_many(mpcf_strfold, mpc_any()), mpcf_strtriml); - mpc_parser_t *Stripperr = mpc_apply(mpc_many(mpcf_strfold, mpc_any()), mpcf_strtrimr); - mpc_parser_t *Stripper = mpc_apply(mpc_many(mpcf_strfold, mpc_any()), mpcf_strtrim); - - PT_ASSERT(mpc_test_pass(Stripperl, " asdmlm dasd ", "asdmlm dasd ", streq, free, strprint)); - PT_ASSERT(mpc_test_pass(Stripperr, " asdmlm dasd ", " asdmlm dasd", streq, free, strprint)); - PT_ASSERT(mpc_test_pass(Stripper, " asdmlm dasd ", "asdmlm dasd", streq, free, strprint)); - - mpc_delete(Stripperl); - mpc_delete(Stripperr); - mpc_delete(Stripper); - -} - -void test_repeat(void) { - - int success; - mpc_result_t r; - mpc_parser_t *p = mpc_count(3, mpcf_strfold, mpc_digit(), free); - - success = mpc_parse("test", "046", p, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "046"); - free(r.output); - - success = mpc_parse("test", "046aa", p, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "046"); - free(r.output); - - success = mpc_parse("test", "04632", p, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "046"); - free(r.output); - - success = mpc_parse("test", "04", p, &r); - PT_ASSERT(!success); - mpc_err_delete(r.error); - - mpc_delete(p); - -} - -void test_copy(void) { - - int success; - mpc_result_t r; - mpc_parser_t* p = mpc_or(2, mpc_char('a'), mpc_char('b')); - mpc_parser_t* q = mpc_and(2, mpcf_strfold, p, mpc_copy(p), free); - - success = mpc_parse("test", "aa", q, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "aa"); - free(r.output); - - success = mpc_parse("test", "bb", q, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "bb"); - free(r.output); - - success = mpc_parse("test", "ab", q, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "ab"); - free(r.output); - - success = mpc_parse("test", "ba", q, &r); - PT_ASSERT(success); - PT_ASSERT_STR_EQ(r.output, "ba"); - free(r.output); - - success = mpc_parse("test", "c", p, &r); - PT_ASSERT(!success); - mpc_err_delete(r.error); - - mpc_delete(mpc_copy(p)); - mpc_delete(mpc_copy(q)); - - mpc_delete(q); - -} - -static int line_count = 0; - -static mpc_val_t* read_line(mpc_val_t* line) { - line_count++; - return line; -} - -void test_reader(void) { - - mpc_parser_t* Line = mpc_many( - mpcf_strfold, - mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)); - - line_count = 0; - - PT_ASSERT(mpc_test_pass(Line, - "hello\nworld\n\nthis\nis\ndan", - "hello\nworld\n\nthis\nis\ndan", streq, free, strprint)); - - PT_ASSERT(line_count == 6); - - line_count = 0; - - PT_ASSERT(mpc_test_pass(Line, - "abcHVwufvyuevuy3y436782\n\n\nrehre\nrew\n-ql.;qa\neg", - "abcHVwufvyuevuy3y436782\n\n\nrehre\nrew\n-ql.;qa\neg", streq, free, strprint)); - - PT_ASSERT(line_count == 7); - - mpc_delete(Line); - -} - -static int token_count = 0; - -static mpc_val_t *print_token(mpc_val_t *x) { - /*printf("Token: '%s'\n", (char*)x);*/ - token_count++; - return x; -} - -void test_tokens(void) { - - mpc_parser_t* Tokens = mpc_many( - mpcf_strfold, - mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)); - - token_count = 0; - - PT_ASSERT(mpc_test_pass(Tokens, - " hello 4352 , \n foo.bar \n\n test:ing ", - "hello4352,foo.bartest:ing", streq, free, strprint)); - - PT_ASSERT(token_count == 9); - - mpc_delete(Tokens); - -} - -void test_eoi(void) { - - mpc_parser_t* Line = mpc_re("[^\\n]*$"); - - PT_ASSERT(mpc_test_pass(Line, "blah", "blah", streq, free, strprint)); - PT_ASSERT(mpc_test_pass(Line, "blah\n", "blah\n", streq, free, strprint)); - - mpc_delete(Line); - -} - -void suite_core(void) { - pt_add_test(test_ident, "Test Ident", "Suite Core"); - pt_add_test(test_maths, "Test Maths", "Suite Core"); - pt_add_test(test_strip, "Test Strip", "Suite Core"); - pt_add_test(test_repeat, "Test Repeat", "Suite Core"); - pt_add_test(test_copy, "Test Copy", "Suite Core"); - pt_add_test(test_reader, "Test Reader", "Suite Core"); - pt_add_test(test_tokens, "Test Tokens", "Suite Core"); - pt_add_test(test_eoi, "Test EOI", "Suite Core"); -} +#include "ptest.h" +#include "../mpc.h" + +#include +#include + +static int int_eq(const void* x, const void* y) { return (*(int*)x == *(int*)y); } +static void int_print(const void* x) { printf("'%i'", *((int*)x)); } +static int streq(const void* x, const void* y) { return (strcmp(x, y) == 0); } +static void strprint(const void* x) { printf("'%s'", (char*)x); } + +void test_ident(void) { + + /* ^[a-zA-Z_][a-zA-Z0-9_]*$ */ + + mpc_parser_t* Ident = mpc_whole( + mpc_and(2, mpcf_strfold, + mpc_or(2, mpc_alpha(), mpc_underscore()), + mpc_many1(mpcf_strfold, mpc_or(3, mpc_alpha(), mpc_underscore(), mpc_digit())), + free), + free + ); + + PT_ASSERT(mpc_test_pass(Ident, "test", "test", streq, free, strprint)); + PT_ASSERT(mpc_test_fail(Ident, " blah", "", streq, free, strprint)); + PT_ASSERT(mpc_test_pass(Ident, "anoth21er", "anoth21er", streq, free, strprint)); + PT_ASSERT(mpc_test_pass(Ident, "du__de", "du__de", streq, free, strprint)); + PT_ASSERT(mpc_test_fail(Ident, "some spaces", "", streq, free, strprint)); + PT_ASSERT(mpc_test_fail(Ident, "", "", streq, free, strprint)); + PT_ASSERT(mpc_test_fail(Ident, "18nums", "", streq, free, strprint)); + + mpc_delete(Ident); + +} + +void test_maths(void) { + + mpc_parser_t *Expr, *Factor, *Term, *Maths; + int r0 = 1, r1 = 5, r2 = 13, r3 = 0, r4 = 2; + + Expr = mpc_new("expr"); + Factor = mpc_new("factor"); + Term = mpc_new("term"); + Maths = mpc_new("maths"); + + mpc_define(Expr, mpc_or(2, + mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free), + Factor + )); + + mpc_define(Factor, mpc_or(2, + mpc_and(3, mpcf_maths, Term, mpc_oneof("+-"), Term, free, free), + Term + )); + + mpc_define(Term, mpc_or(2, + mpc_int(), + mpc_parens(Expr, free) + )); + + mpc_define(Maths, mpc_whole(Expr, free)); + + PT_ASSERT(mpc_test_pass(Maths, "1", &r0, int_eq, free, int_print)); + PT_ASSERT(mpc_test_pass(Maths, "(5)", &r1, int_eq, free, int_print)); + PT_ASSERT(mpc_test_pass(Maths, "(4*2)+5", &r2, int_eq, free, int_print)); + PT_ASSERT(mpc_test_fail(Maths, "a", &r3, int_eq, free, int_print)); + PT_ASSERT(mpc_test_fail(Maths, "2b+4", &r4, int_eq, free, int_print)); + + mpc_cleanup(4, Expr, Factor, Term, Maths); +} + +void test_strip(void) { + + mpc_parser_t *Stripperl = mpc_apply(mpc_many(mpcf_strfold, mpc_any()), mpcf_strtriml); + mpc_parser_t *Stripperr = mpc_apply(mpc_many(mpcf_strfold, mpc_any()), mpcf_strtrimr); + mpc_parser_t *Stripper = mpc_apply(mpc_many(mpcf_strfold, mpc_any()), mpcf_strtrim); + + PT_ASSERT(mpc_test_pass(Stripperl, " asdmlm dasd ", "asdmlm dasd ", streq, free, strprint)); + PT_ASSERT(mpc_test_pass(Stripperr, " asdmlm dasd ", " asdmlm dasd", streq, free, strprint)); + PT_ASSERT(mpc_test_pass(Stripper, " asdmlm dasd ", "asdmlm dasd", streq, free, strprint)); + + mpc_delete(Stripperl); + mpc_delete(Stripperr); + mpc_delete(Stripper); + +} + +void test_repeat(void) { + + int success; + mpc_result_t r; + mpc_parser_t *p = mpc_count(3, mpcf_strfold, mpc_digit(), free); + + success = mpc_parse("test", "046", p, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "046"); + free(r.output); + + success = mpc_parse("test", "046aa", p, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "046"); + free(r.output); + + success = mpc_parse("test", "04632", p, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "046"); + free(r.output); + + success = mpc_parse("test", "04", p, &r); + PT_ASSERT(!success); + mpc_err_delete(r.error); + + mpc_delete(p); + +} + +void test_copy(void) { + + int success; + mpc_result_t r; + mpc_parser_t* p = mpc_or(2, mpc_char('a'), mpc_char('b')); + mpc_parser_t* q = mpc_and(2, mpcf_strfold, p, mpc_copy(p), free); + + success = mpc_parse("test", "aa", q, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "aa"); + free(r.output); + + success = mpc_parse("test", "bb", q, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "bb"); + free(r.output); + + success = mpc_parse("test", "ab", q, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "ab"); + free(r.output); + + success = mpc_parse("test", "ba", q, &r); + PT_ASSERT(success); + PT_ASSERT_STR_EQ(r.output, "ba"); + free(r.output); + + success = mpc_parse("test", "c", p, &r); + PT_ASSERT(!success); + mpc_err_delete(r.error); + + mpc_delete(mpc_copy(p)); + mpc_delete(mpc_copy(q)); + + mpc_delete(q); + +} + +static int line_count = 0; + +static mpc_val_t* read_line(mpc_val_t* line) { + line_count++; + return line; +} + +void test_reader(void) { + + mpc_parser_t* Line = mpc_many( + mpcf_strfold, + mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)); + + line_count = 0; + + PT_ASSERT(mpc_test_pass(Line, + "hello\nworld\n\nthis\nis\ndan", + "hello\nworld\n\nthis\nis\ndan", streq, free, strprint)); + + PT_ASSERT(line_count == 6); + + line_count = 0; + + PT_ASSERT(mpc_test_pass(Line, + "abcHVwufvyuevuy3y436782\n\n\nrehre\nrew\n-ql.;qa\neg", + "abcHVwufvyuevuy3y436782\n\n\nrehre\nrew\n-ql.;qa\neg", streq, free, strprint)); + + PT_ASSERT(line_count == 7); + + mpc_delete(Line); + +} + +static int token_count = 0; + +static mpc_val_t *print_token(mpc_val_t *x) { + /*printf("Token: '%s'\n", (char*)x);*/ + token_count++; + return x; +} + +void test_tokens(void) { + + mpc_parser_t* Tokens = mpc_many( + mpcf_strfold, + mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)); + + token_count = 0; + + PT_ASSERT(mpc_test_pass(Tokens, + " hello 4352 , \n foo.bar \n\n test:ing ", + "hello4352,foo.bartest:ing", streq, free, strprint)); + + PT_ASSERT(token_count == 9); + + mpc_delete(Tokens); + +} + +void test_eoi(void) { + + mpc_parser_t* Line = mpc_re("[^\\n]*$"); + + PT_ASSERT(mpc_test_pass(Line, "blah", "blah", streq, free, strprint)); + PT_ASSERT(mpc_test_pass(Line, "blah\n", "blah\n", streq, free, strprint)); + + mpc_delete(Line); + +} + +void suite_core(void) { + pt_add_test(test_ident, "Test Ident", "Suite Core"); + pt_add_test(test_maths, "Test Maths", "Suite Core"); + pt_add_test(test_strip, "Test Strip", "Suite Core"); + pt_add_test(test_repeat, "Test Repeat", "Suite Core"); + pt_add_test(test_copy, "Test Copy", "Suite Core"); + pt_add_test(test_reader, "Test Reader", "Suite Core"); + pt_add_test(test_tokens, "Test Tokens", "Suite Core"); + pt_add_test(test_eoi, "Test EOI", "Suite Core"); +} diff --git a/tests/grammar.c b/tests/grammar.c index 6206e9c..cba80d5 100644 --- a/tests/grammar.c +++ b/tests/grammar.c @@ -1,413 +1,413 @@ -#include "ptest.h" -#include "../mpc.h" - -void test_grammar(void) { - - mpc_parser_t *Expr, *Prod, *Value, *Maths; - mpc_ast_t *t0, *t1, *t2; - - Expr = mpc_new("expression"); - Prod = mpc_new("product"); - Value = mpc_new("value"); - Maths = mpc_new("maths"); - - mpc_define(Expr, mpca_grammar(MPCA_LANG_DEFAULT, " (('+' | '-') )* ", Prod)); - mpc_define(Prod, mpca_grammar(MPCA_LANG_DEFAULT, " (('*' | '/') )* ", Value)); - mpc_define(Value, mpca_grammar(MPCA_LANG_DEFAULT, " /[0-9]+/ | '(' ')' ", Expr)); - mpc_define(Maths, mpca_total(Expr)); - - t0 = mpc_ast_new("product|value|regex", "24"); - t1 = mpc_ast_build(1, "product|>", - mpc_ast_build(3, "value|>", - mpc_ast_new("char", "("), - mpc_ast_new("expression|product|value|regex", "5"), - mpc_ast_new("char", ")"))); - - t2 = mpc_ast_build(3, ">", - - mpc_ast_build(3, "product|value|>", - mpc_ast_new("char", "("), - mpc_ast_build(3, "expression|>", - - mpc_ast_build(5, "product|>", - mpc_ast_new("value|regex", "4"), - mpc_ast_new("char", "*"), - mpc_ast_new("value|regex", "2"), - mpc_ast_new("char", "*"), - mpc_ast_new("value|regex", "11")), - - mpc_ast_new("char", "+"), - mpc_ast_new("product|value|regex", "2")), - mpc_ast_new("char", ")")), - - mpc_ast_new("char", "+"), - mpc_ast_new("product|value|regex", "5")); - - PT_ASSERT(mpc_test_pass(Maths, " 24 ", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - PT_ASSERT(mpc_test_pass(Maths, "(5)", t1, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - PT_ASSERT(mpc_test_pass(Maths, "(4 * 2 * 11 + 2) + 5", t2, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - PT_ASSERT(mpc_test_fail(Maths, "a", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - PT_ASSERT(mpc_test_fail(Maths, "2b+4", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - - mpc_ast_delete(t0); - mpc_ast_delete(t1); - mpc_ast_delete(t2); - - mpc_cleanup(4, Expr, Prod, Value, Maths); - -} - -void test_language(void) { - - mpc_parser_t *Expr, *Prod, *Value, *Maths; - - Expr = mpc_new("expression"); - Prod = mpc_new("product"); - Value = mpc_new("value"); - Maths = mpc_new("maths"); - - mpca_lang(MPCA_LANG_DEFAULT, - " expression : (('+' | '-') )*; " - " product : (('*' | '/') )*; " - " value : /[0-9]+/ | '(' ')'; " - " maths : /^/ /$/; ", - Expr, Prod, Value, Maths); - - mpc_cleanup(4, Expr, Prod, Value, Maths); -} - -void test_language_file(void) { - - mpc_parser_t *Expr, *Prod, *Value, *Maths; - - Expr = mpc_new("expression"); - Prod = mpc_new("product"); - Value = mpc_new("value"); - Maths = mpc_new("maths"); - - mpca_lang_contents(MPCA_LANG_DEFAULT, "./tests/maths.grammar", Expr, Prod, Value, Maths); - - mpc_cleanup(4, Expr, Prod, Value, Maths); - -} - -void test_doge(void) { - - mpc_ast_t *t0; - mpc_parser_t* Adjective = mpc_new("adjective"); - mpc_parser_t* Noun = mpc_new("noun"); - mpc_parser_t* Phrase = mpc_new("phrase"); - mpc_parser_t* Doge = mpc_new("doge"); - - mpca_lang(MPCA_LANG_DEFAULT, - " adjective : \"wow\" | \"many\" | \"so\" | \"such\"; " - " noun : \"lisp\" | \"language\" | \"c\" | \"book\" | \"build\"; " - " phrase : ; " - " doge : /^/ * /$/; ", - Adjective, Noun, Phrase, Doge, NULL); - - t0 = - mpc_ast_build(4, ">", - mpc_ast_new("regex", ""), - mpc_ast_build(2, "phrase|>", - mpc_ast_new("adjective|string", "so"), - mpc_ast_new("noun|string", "c")), - mpc_ast_build(2, "phrase|>", - mpc_ast_new("adjective|string", "so"), - mpc_ast_new("noun|string", "c")), - mpc_ast_new("regex", "") - ); - - PT_ASSERT(mpc_test_pass(Doge, "so c so c", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - - PT_ASSERT(mpc_test_fail(Doge, "so a so c", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); - - mpc_ast_delete(t0); - - mpc_cleanup(4, Adjective, Noun, Phrase, Doge); - -} - -void test_partial(void) { - - mpc_ast_t *t0; - mpc_err_t *err; - - mpc_parser_t *Line = mpc_new("line"); - mpc_parser_t *Number = mpc_new("number"); - mpc_parser_t *QuotedString = mpc_new("quoted_string"); - mpc_parser_t *LinePragma = mpc_new("linepragma"); - mpc_parser_t *Parser = mpc_new("parser"); - - mpc_define(Line, mpca_tag(mpc_apply(mpc_sym("#line"), mpcf_str_ast), "string")); - - err = mpca_lang(MPCA_LANG_PREDICTIVE, - "number : /[0-9]+/ ;\n" - "quoted_string : /\"(\\.|[^\"])*\"/ ;\n" - "linepragma : ;\n" - "parser : /^/ ()* /$/ ;\n", - Line, Number, QuotedString, LinePragma, Parser, NULL); - - PT_ASSERT(err == NULL); - - t0 = mpc_ast_build(3, ">", - mpc_ast_new("regex", ""), - mpc_ast_build(3, "linepragma|>", - mpc_ast_new("line|string", "#line"), - mpc_ast_new("number|regex", "10"), - mpc_ast_new("quoted_string|regex", "\"test\"")), - mpc_ast_new("regex", "")); - - PT_ASSERT(mpc_test_pass(Parser, "#line 10 \"test\"", t0, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - mpc_ast_delete(t0); - - mpc_cleanup(5, Line, Number, QuotedString, LinePragma, Parser); - -} - -void test_qscript(void) { - - mpc_ast_t *t0; - mpc_parser_t *Qscript = mpc_new("qscript"); - mpc_parser_t *Comment = mpc_new("comment"); - mpc_parser_t *Resource = mpc_new("resource"); - mpc_parser_t *Rtype = mpc_new("rtype"); - mpc_parser_t *Rname = mpc_new("rname"); - mpc_parser_t *InnerBlock = mpc_new("inner_block"); - mpc_parser_t *Statement = mpc_new("statement"); - mpc_parser_t *Function = mpc_new("function"); - mpc_parser_t *Parameter = mpc_new("parameter"); - mpc_parser_t *Literal = mpc_new("literal"); - mpc_parser_t *Block = mpc_new("block"); - mpc_parser_t *Seperator = mpc_new("seperator"); - mpc_parser_t *Qstring = mpc_new("qstring"); - mpc_parser_t *SimpleStr = mpc_new("simplestr"); - mpc_parser_t *ComplexStr = mpc_new("complexstr"); - mpc_parser_t *Number = mpc_new("number"); - mpc_parser_t *Float = mpc_new("float"); - mpc_parser_t *Int = mpc_new("int"); - - mpc_err_t *err = mpca_lang(0, - " qscript : /^/ ( | )* /$/ ;\n" - " comment : '#' /[^\\n]*/ ;\n" - "resource : '[' ( ) ']' ;\n" - " rtype : /[*]*/ ;\n" - " rname : ;\n" - "\n" - "inner_block : ( | )* ;\n" - " statement : '(' ( | | )* ')' ;\n" - " function : ;\n" - " parameter : ( | ) ;\n" - " literal : ( | ) ;\n" - " block : '{' '}' ;\n" - " seperator : ',' | \"\" ;\n" - "\n" - "qstring : ( | ) * ;\n" - " simplestr : /[a-zA-Z0-9_!@#$%^&\\*_+\\-\\.=\\/<>]+/ ;\n" - " complexstr : (/\"[^\"]*\"/ | /'[^']*'/) ;\n" - "\n" - "number : ( | ) ;\n" - " float : /[-+]?[0-9]+\\.[0-9]+/ ;\n" - " int : /[-+]?[0-9]+/ ;\n", - Qscript, Comment, Resource, Rtype, Rname, InnerBlock, Statement, Function, - Parameter, Literal, Block, Seperator, Qstring, SimpleStr, ComplexStr, Number, - Float, Int, NULL); - - PT_ASSERT(err == NULL); - - t0 = mpc_ast_build(3, ">", - mpc_ast_new("regex", ""), - mpc_ast_build(5, "resource|>", - mpc_ast_new("char", "["), - mpc_ast_new("rtype|regex", ""), - mpc_ast_new("rname|qstring|simplestr|regex", "my_func"), - mpc_ast_new("char", "]"), - mpc_ast_build(5, "inner_block|statement|>", - mpc_ast_new("function|qstring|simplestr|regex", "echo"), - mpc_ast_new("char", "("), - mpc_ast_build(2, "parameter|literal|>", - mpc_ast_build(2, "qstring|>", - mpc_ast_new("simplestr|regex", "a"), - mpc_ast_build(2, "qstring|>", - mpc_ast_new("simplestr|regex", "b"), - mpc_ast_new("qstring|simplestr|regex", "c") - ) - ), - mpc_ast_new("seperator|string", "") - ), - mpc_ast_new("char", ")"), - mpc_ast_new("seperator|string", "") - ) - ), - mpc_ast_new("regex", "")); - - PT_ASSERT(mpc_test_pass(Qscript, "[my_func]\n echo (a b c)\n", t0, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - mpc_ast_delete(t0); - - mpc_cleanup(18, Qscript, Comment, Resource, Rtype, Rname, InnerBlock, - Statement, Function, Parameter, Literal, Block, Seperator, Qstring, - SimpleStr, ComplexStr, Number, Float, Int); - -} - -void test_missingrule(void) { - - int result; - mpc_err_t *err; - mpc_result_t r; - mpc_parser_t *Parser = mpc_new("parser"); - - err = mpca_lang(MPCA_LANG_DEFAULT, - "parser : /^/ ()* /$/ ;\n", - Parser, NULL); - - PT_ASSERT(err == NULL); - - result = mpc_parse("", "test", Parser, &r); - - PT_ASSERT(result == 0); - PT_ASSERT(r.error != NULL); - PT_ASSERT(strcmp(r.error->failure, "Unknown Parser 'missing'!") == 0); - - mpc_err_delete(r.error); - mpc_cleanup(1, Parser); - -} - -void test_regex_mode(void) { - - mpc_parser_t *Line0, *Line1, *Line2, *Line3; - mpc_ast_t *t0, *t1, *t2, *t3, *t4; - - Line0 = mpc_new("line0"); - Line1 = mpc_new("line1"); - Line2 = mpc_new("line2"); - Line3 = mpc_new("line3"); - - mpca_lang(MPCA_LANG_DEFAULT, " line0 : /.*/; ", Line0); - mpca_lang(MPCA_LANG_DEFAULT, " line1 : /.*/s; ", Line1); - mpca_lang(MPCA_LANG_DEFAULT, " line2 : /(^[a-z]*$)*/; ", Line2); - mpca_lang(MPCA_LANG_DEFAULT, " line3 : /(^[a-z]*$)*/m; ", Line3); - - t0 = mpc_ast_new("regex", "blah"); - t1 = mpc_ast_new("regex", "blah\nblah"); - t2 = mpc_ast_new("regex", ""); - t3 = mpc_ast_new("regex", "blah"); - t4 = mpc_ast_new("regex", "blah\nblah"); - - PT_ASSERT(mpc_test_pass(Line0, "blah\nblah", t0, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - PT_ASSERT(mpc_test_pass(Line1, "blah\nblah", t1, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - PT_ASSERT(mpc_test_pass(Line2, "blah\nblah", t2, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - PT_ASSERT(mpc_test_pass(Line2, "blah", t3, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - PT_ASSERT(mpc_test_pass(Line3, "blah\nblah", t4, - (int(*)(const void*,const void*))mpc_ast_eq, - (mpc_dtor_t)mpc_ast_delete, - (void(*)(const void*))mpc_ast_print)); - - mpc_ast_delete(t0); - mpc_ast_delete(t1); - mpc_ast_delete(t2); - mpc_ast_delete(t3); - mpc_ast_delete(t4); - - mpc_cleanup(4, Line0, Line1, Line2, Line3); -} - -void test_digits_file(void) { - - FILE *f; - mpc_result_t r; - mpc_parser_t *Digit = mpc_new("digit"); - mpc_parser_t *Program = mpc_new("program"); - mpc_ast_t* t0; - - mpc_err_t* err = mpca_lang(MPCA_LANG_DEFAULT, - " digit : /[0-9]/ ;" - " program : /^/ + /$/ ;" - , Digit, Program, NULL); - - PT_ASSERT(err == NULL); - - t0 = mpc_ast_build(5, ">", - mpc_ast_new("regex", ""), - mpc_ast_new("digit|regex", "1"), - mpc_ast_new("digit|regex", "2"), - mpc_ast_new("digit|regex", "3"), - mpc_ast_new("regex", "")); - - if (mpc_parse_contents("tests/digits.txt", Program, &r)) { - PT_ASSERT(1); - PT_ASSERT(mpc_ast_eq(t0, r.output)); - mpc_ast_delete(r.output); - } else { - PT_ASSERT(0); - mpc_err_print(r.error); - mpc_err_delete(r.error); - } - - f = fopen("tests/digits.txt", "r"); - PT_ASSERT(f != NULL); - - if (mpc_parse_file("tests/digits.txt", f, Program, &r)) { - PT_ASSERT(1); - PT_ASSERT(mpc_ast_eq(t0, r.output)); - mpc_ast_delete(r.output); - } else { - PT_ASSERT(0); - mpc_err_print(r.error); - mpc_err_delete(r.error); - } - - fclose(f); - - if (mpc_parse("tests/digits.txt", "123", Program, &r)) { - PT_ASSERT(1); - PT_ASSERT(mpc_ast_eq(t0, r.output)); - mpc_ast_delete(r.output); - } else { - PT_ASSERT(0); - mpc_err_print(r.error); - mpc_err_delete(r.error); - } - - mpc_ast_delete(t0); - - mpc_cleanup(2, Digit, Program); - -} - -void suite_grammar(void) { - pt_add_test(test_grammar, "Test Grammar", "Suite Grammar"); - pt_add_test(test_language, "Test Language", "Suite Grammar"); - pt_add_test(test_language_file, "Test Language File", "Suite Grammar"); - pt_add_test(test_doge, "Test Doge", "Suite Grammar"); - pt_add_test(test_partial, "Test Partial", "Suite Grammar"); - pt_add_test(test_qscript, "Test QScript", "Suite Grammar"); - pt_add_test(test_missingrule, "Test Missing Rule", "Suite Grammar"); - pt_add_test(test_regex_mode, "Test Regex Mode", "Suite Grammar"); - pt_add_test(test_digits_file, "Test Digits File", "Suite Grammar"); -} +#include "ptest.h" +#include "../mpc.h" + +void test_grammar(void) { + + mpc_parser_t *Expr, *Prod, *Value, *Maths; + mpc_ast_t *t0, *t1, *t2; + + Expr = mpc_new("expression"); + Prod = mpc_new("product"); + Value = mpc_new("value"); + Maths = mpc_new("maths"); + + mpc_define(Expr, mpca_grammar(MPCA_LANG_DEFAULT, " (('+' | '-') )* ", Prod)); + mpc_define(Prod, mpca_grammar(MPCA_LANG_DEFAULT, " (('*' | '/') )* ", Value)); + mpc_define(Value, mpca_grammar(MPCA_LANG_DEFAULT, " /[0-9]+/ | '(' ')' ", Expr)); + mpc_define(Maths, mpca_total(Expr)); + + t0 = mpc_ast_new("product|value|regex", "24"); + t1 = mpc_ast_build(1, "product|>", + mpc_ast_build(3, "value|>", + mpc_ast_new("char", "("), + mpc_ast_new("expression|product|value|regex", "5"), + mpc_ast_new("char", ")"))); + + t2 = mpc_ast_build(3, ">", + + mpc_ast_build(3, "product|value|>", + mpc_ast_new("char", "("), + mpc_ast_build(3, "expression|>", + + mpc_ast_build(5, "product|>", + mpc_ast_new("value|regex", "4"), + mpc_ast_new("char", "*"), + mpc_ast_new("value|regex", "2"), + mpc_ast_new("char", "*"), + mpc_ast_new("value|regex", "11")), + + mpc_ast_new("char", "+"), + mpc_ast_new("product|value|regex", "2")), + mpc_ast_new("char", ")")), + + mpc_ast_new("char", "+"), + mpc_ast_new("product|value|regex", "5")); + + PT_ASSERT(mpc_test_pass(Maths, " 24 ", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + PT_ASSERT(mpc_test_pass(Maths, "(5)", t1, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + PT_ASSERT(mpc_test_pass(Maths, "(4 * 2 * 11 + 2) + 5", t2, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + PT_ASSERT(mpc_test_fail(Maths, "a", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + PT_ASSERT(mpc_test_fail(Maths, "2b+4", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + + mpc_ast_delete(t0); + mpc_ast_delete(t1); + mpc_ast_delete(t2); + + mpc_cleanup(4, Expr, Prod, Value, Maths); + +} + +void test_language(void) { + + mpc_parser_t *Expr, *Prod, *Value, *Maths; + + Expr = mpc_new("expression"); + Prod = mpc_new("product"); + Value = mpc_new("value"); + Maths = mpc_new("maths"); + + mpca_lang(MPCA_LANG_DEFAULT, + " expression : (('+' | '-') )*; " + " product : (('*' | '/') )*; " + " value : /[0-9]+/ | '(' ')'; " + " maths : /^/ /$/; ", + Expr, Prod, Value, Maths); + + mpc_cleanup(4, Expr, Prod, Value, Maths); +} + +void test_language_file(void) { + + mpc_parser_t *Expr, *Prod, *Value, *Maths; + + Expr = mpc_new("expression"); + Prod = mpc_new("product"); + Value = mpc_new("value"); + Maths = mpc_new("maths"); + + mpca_lang_contents(MPCA_LANG_DEFAULT, "./tests/maths.grammar", Expr, Prod, Value, Maths); + + mpc_cleanup(4, Expr, Prod, Value, Maths); + +} + +void test_doge(void) { + + mpc_ast_t *t0; + mpc_parser_t* Adjective = mpc_new("adjective"); + mpc_parser_t* Noun = mpc_new("noun"); + mpc_parser_t* Phrase = mpc_new("phrase"); + mpc_parser_t* Doge = mpc_new("doge"); + + mpca_lang(MPCA_LANG_DEFAULT, + " adjective : \"wow\" | \"many\" | \"so\" | \"such\"; " + " noun : \"lisp\" | \"language\" | \"c\" | \"book\" | \"build\"; " + " phrase : ; " + " doge : /^/ * /$/; ", + Adjective, Noun, Phrase, Doge, NULL); + + t0 = + mpc_ast_build(4, ">", + mpc_ast_new("regex", ""), + mpc_ast_build(2, "phrase|>", + mpc_ast_new("adjective|string", "so"), + mpc_ast_new("noun|string", "c")), + mpc_ast_build(2, "phrase|>", + mpc_ast_new("adjective|string", "so"), + mpc_ast_new("noun|string", "c")), + mpc_ast_new("regex", "") + ); + + PT_ASSERT(mpc_test_pass(Doge, "so c so c", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + + PT_ASSERT(mpc_test_fail(Doge, "so a so c", t0, (int(*)(const void*,const void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(const void*))mpc_ast_print)); + + mpc_ast_delete(t0); + + mpc_cleanup(4, Adjective, Noun, Phrase, Doge); + +} + +void test_partial(void) { + + mpc_ast_t *t0; + mpc_err_t *err; + + mpc_parser_t *Line = mpc_new("line"); + mpc_parser_t *Number = mpc_new("number"); + mpc_parser_t *QuotedString = mpc_new("quoted_string"); + mpc_parser_t *LinePragma = mpc_new("linepragma"); + mpc_parser_t *Parser = mpc_new("parser"); + + mpc_define(Line, mpca_tag(mpc_apply(mpc_sym("#line"), mpcf_str_ast), "string")); + + err = mpca_lang(MPCA_LANG_PREDICTIVE, + "number : /[0-9]+/ ;\n" + "quoted_string : /\"(\\.|[^\"])*\"/ ;\n" + "linepragma : ;\n" + "parser : /^/ ()* /$/ ;\n", + Line, Number, QuotedString, LinePragma, Parser, NULL); + + PT_ASSERT(err == NULL); + + t0 = mpc_ast_build(3, ">", + mpc_ast_new("regex", ""), + mpc_ast_build(3, "linepragma|>", + mpc_ast_new("line|string", "#line"), + mpc_ast_new("number|regex", "10"), + mpc_ast_new("quoted_string|regex", "\"test\"")), + mpc_ast_new("regex", "")); + + PT_ASSERT(mpc_test_pass(Parser, "#line 10 \"test\"", t0, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + mpc_ast_delete(t0); + + mpc_cleanup(5, Line, Number, QuotedString, LinePragma, Parser); + +} + +void test_qscript(void) { + + mpc_ast_t *t0; + mpc_parser_t *Qscript = mpc_new("qscript"); + mpc_parser_t *Comment = mpc_new("comment"); + mpc_parser_t *Resource = mpc_new("resource"); + mpc_parser_t *Rtype = mpc_new("rtype"); + mpc_parser_t *Rname = mpc_new("rname"); + mpc_parser_t *InnerBlock = mpc_new("inner_block"); + mpc_parser_t *Statement = mpc_new("statement"); + mpc_parser_t *Function = mpc_new("function"); + mpc_parser_t *Parameter = mpc_new("parameter"); + mpc_parser_t *Literal = mpc_new("literal"); + mpc_parser_t *Block = mpc_new("block"); + mpc_parser_t *Seperator = mpc_new("seperator"); + mpc_parser_t *Qstring = mpc_new("qstring"); + mpc_parser_t *SimpleStr = mpc_new("simplestr"); + mpc_parser_t *ComplexStr = mpc_new("complexstr"); + mpc_parser_t *Number = mpc_new("number"); + mpc_parser_t *Float = mpc_new("float"); + mpc_parser_t *Int = mpc_new("int"); + + mpc_err_t *err = mpca_lang(0, + " qscript : /^/ ( | )* /$/ ;\n" + " comment : '#' /[^\\n]*/ ;\n" + "resource : '[' ( ) ']' ;\n" + " rtype : /[*]*/ ;\n" + " rname : ;\n" + "\n" + "inner_block : ( | )* ;\n" + " statement : '(' ( | | )* ')' ;\n" + " function : ;\n" + " parameter : ( | ) ;\n" + " literal : ( | ) ;\n" + " block : '{' '}' ;\n" + " seperator : ',' | \"\" ;\n" + "\n" + "qstring : ( | ) * ;\n" + " simplestr : /[a-zA-Z0-9_!@#$%^&\\*_+\\-\\.=\\/<>]+/ ;\n" + " complexstr : (/\"[^\"]*\"/ | /'[^']*'/) ;\n" + "\n" + "number : ( | ) ;\n" + " float : /[-+]?[0-9]+\\.[0-9]+/ ;\n" + " int : /[-+]?[0-9]+/ ;\n", + Qscript, Comment, Resource, Rtype, Rname, InnerBlock, Statement, Function, + Parameter, Literal, Block, Seperator, Qstring, SimpleStr, ComplexStr, Number, + Float, Int, NULL); + + PT_ASSERT(err == NULL); + + t0 = mpc_ast_build(3, ">", + mpc_ast_new("regex", ""), + mpc_ast_build(5, "resource|>", + mpc_ast_new("char", "["), + mpc_ast_new("rtype|regex", ""), + mpc_ast_new("rname|qstring|simplestr|regex", "my_func"), + mpc_ast_new("char", "]"), + mpc_ast_build(5, "inner_block|statement|>", + mpc_ast_new("function|qstring|simplestr|regex", "echo"), + mpc_ast_new("char", "("), + mpc_ast_build(2, "parameter|literal|>", + mpc_ast_build(2, "qstring|>", + mpc_ast_new("simplestr|regex", "a"), + mpc_ast_build(2, "qstring|>", + mpc_ast_new("simplestr|regex", "b"), + mpc_ast_new("qstring|simplestr|regex", "c") + ) + ), + mpc_ast_new("seperator|string", "") + ), + mpc_ast_new("char", ")"), + mpc_ast_new("seperator|string", "") + ) + ), + mpc_ast_new("regex", "")); + + PT_ASSERT(mpc_test_pass(Qscript, "[my_func]\n echo (a b c)\n", t0, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + mpc_ast_delete(t0); + + mpc_cleanup(18, Qscript, Comment, Resource, Rtype, Rname, InnerBlock, + Statement, Function, Parameter, Literal, Block, Seperator, Qstring, + SimpleStr, ComplexStr, Number, Float, Int); + +} + +void test_missingrule(void) { + + int result; + mpc_err_t *err; + mpc_result_t r; + mpc_parser_t *Parser = mpc_new("parser"); + + err = mpca_lang(MPCA_LANG_DEFAULT, + "parser : /^/ ()* /$/ ;\n", + Parser, NULL); + + PT_ASSERT(err == NULL); + + result = mpc_parse("", "test", Parser, &r); + + PT_ASSERT(result == 0); + PT_ASSERT(r.error != NULL); + PT_ASSERT(strcmp(r.error->failure, "Unknown Parser 'missing'!") == 0); + + mpc_err_delete(r.error); + mpc_cleanup(1, Parser); + +} + +void test_regex_mode(void) { + + mpc_parser_t *Line0, *Line1, *Line2, *Line3; + mpc_ast_t *t0, *t1, *t2, *t3, *t4; + + Line0 = mpc_new("line0"); + Line1 = mpc_new("line1"); + Line2 = mpc_new("line2"); + Line3 = mpc_new("line3"); + + mpca_lang(MPCA_LANG_DEFAULT, " line0 : /.*/; ", Line0); + mpca_lang(MPCA_LANG_DEFAULT, " line1 : /.*/s; ", Line1); + mpca_lang(MPCA_LANG_DEFAULT, " line2 : /(^[a-z]*$)*/; ", Line2); + mpca_lang(MPCA_LANG_DEFAULT, " line3 : /(^[a-z]*$)*/m; ", Line3); + + t0 = mpc_ast_new("regex", "blah"); + t1 = mpc_ast_new("regex", "blah\nblah"); + t2 = mpc_ast_new("regex", ""); + t3 = mpc_ast_new("regex", "blah"); + t4 = mpc_ast_new("regex", "blah\nblah"); + + PT_ASSERT(mpc_test_pass(Line0, "blah\nblah", t0, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + PT_ASSERT(mpc_test_pass(Line1, "blah\nblah", t1, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + PT_ASSERT(mpc_test_pass(Line2, "blah\nblah", t2, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + PT_ASSERT(mpc_test_pass(Line2, "blah", t3, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + PT_ASSERT(mpc_test_pass(Line3, "blah\nblah", t4, + (int(*)(const void*,const void*))mpc_ast_eq, + (mpc_dtor_t)mpc_ast_delete, + (void(*)(const void*))mpc_ast_print)); + + mpc_ast_delete(t0); + mpc_ast_delete(t1); + mpc_ast_delete(t2); + mpc_ast_delete(t3); + mpc_ast_delete(t4); + + mpc_cleanup(4, Line0, Line1, Line2, Line3); +} + +void test_digits_file(void) { + + FILE *f; + mpc_result_t r; + mpc_parser_t *Digit = mpc_new("digit"); + mpc_parser_t *Program = mpc_new("program"); + mpc_ast_t* t0; + + mpc_err_t* err = mpca_lang(MPCA_LANG_DEFAULT, + " digit : /[0-9]/ ;" + " program : /^/ + /$/ ;" + , Digit, Program, NULL); + + PT_ASSERT(err == NULL); + + t0 = mpc_ast_build(5, ">", + mpc_ast_new("regex", ""), + mpc_ast_new("digit|regex", "1"), + mpc_ast_new("digit|regex", "2"), + mpc_ast_new("digit|regex", "3"), + mpc_ast_new("regex", "")); + + if (mpc_parse_contents("tests/digits.txt", Program, &r)) { + PT_ASSERT(1); + PT_ASSERT(mpc_ast_eq(t0, r.output)); + mpc_ast_delete(r.output); + } else { + PT_ASSERT(0); + mpc_err_print(r.error); + mpc_err_delete(r.error); + } + + f = fopen("tests/digits.txt", "r"); + PT_ASSERT(f != NULL); + + if (mpc_parse_file("tests/digits.txt", f, Program, &r)) { + PT_ASSERT(1); + PT_ASSERT(mpc_ast_eq(t0, r.output)); + mpc_ast_delete(r.output); + } else { + PT_ASSERT(0); + mpc_err_print(r.error); + mpc_err_delete(r.error); + } + + fclose(f); + + if (mpc_parse("tests/digits.txt", "123", Program, &r)) { + PT_ASSERT(1); + PT_ASSERT(mpc_ast_eq(t0, r.output)); + mpc_ast_delete(r.output); + } else { + PT_ASSERT(0); + mpc_err_print(r.error); + mpc_err_delete(r.error); + } + + mpc_ast_delete(t0); + + mpc_cleanup(2, Digit, Program); + +} + +void suite_grammar(void) { + pt_add_test(test_grammar, "Test Grammar", "Suite Grammar"); + pt_add_test(test_language, "Test Language", "Suite Grammar"); + pt_add_test(test_language_file, "Test Language File", "Suite Grammar"); + pt_add_test(test_doge, "Test Doge", "Suite Grammar"); + pt_add_test(test_partial, "Test Partial", "Suite Grammar"); + pt_add_test(test_qscript, "Test QScript", "Suite Grammar"); + pt_add_test(test_missingrule, "Test Missing Rule", "Suite Grammar"); + pt_add_test(test_regex_mode, "Test Regex Mode", "Suite Grammar"); + pt_add_test(test_digits_file, "Test Digits File", "Suite Grammar"); +} diff --git a/tests/maths.grammar b/tests/maths.grammar index 2010de4..d2cbea2 100644 --- a/tests/maths.grammar +++ b/tests/maths.grammar @@ -1,7 +1,7 @@ -expression : (('+' | '-') )*; - -product : (('*' | '/') )*; - -value : /[0-9]+/ | '(' ')'; - +expression : (('+' | '-') )*; + +product : (('*' | '/') )*; + +value : /[0-9]+/ | '(' ')'; + maths : /^/ /$/; \ No newline at end of file diff --git a/tests/regex.c b/tests/regex.c index 38f9519..02c9d93 100644 --- a/tests/regex.c +++ b/tests/regex.c @@ -1,181 +1,181 @@ -#include "ptest.h" -#include "../mpc.h" - -#include -#include - -static int string_eq(const void* x, const void* y) { return (strcmp(x, y) == 0); } -static void string_print(const void* x) { printf("'%s'", (char*)x); } - -int regex_test_pass(mpc_parser_t *p, const char* value, const char* match) { - return mpc_test_pass(p, value, match, string_eq, free, string_print); -} - -int regex_test_fail(mpc_parser_t *p, const char* value, const char* match) { - return mpc_test_fail(p, value, match, string_eq, free, string_print); -} - -void test_regex_basic(void) { - - mpc_parser_t *re0, *re1, *re2, *re3, *re4, *re5; - - re0 = mpc_re("abc|bcd"); - re1 = mpc_re("abc|bcd|e"); - re2 = mpc_re("ab()c(ab)*"); - re3 = mpc_re("abc(abdd)?"); - re4 = mpc_re("ab|c(abdd)?"); - re5 = mpc_re("abc(ab|dd)+g$"); - - PT_ASSERT(regex_test_pass(re0, "abc", "abc")); - PT_ASSERT(regex_test_pass(re0, "bcd", "bcd")); - PT_ASSERT(regex_test_fail(re0, "bc", "bc")); - PT_ASSERT(regex_test_fail(re0, "ab", "ab")); - PT_ASSERT(regex_test_pass(re1, "e", "e")); - PT_ASSERT(regex_test_pass(re2, "abc", "abc")); - PT_ASSERT(regex_test_pass(re2, "abcabab", "abcabab")); - PT_ASSERT(regex_test_pass(re2, "abcababd", "abcabab")); - PT_ASSERT(regex_test_pass(re5, "abcddg", "abcddg")); - - mpc_delete(re0); - mpc_delete(re1); - mpc_delete(re2); - mpc_delete(re3); - mpc_delete(re4); - mpc_delete(re5); - -} - -void test_regex_boundary(void) { - - mpc_parser_t *re0, *re1, *re2; - - re0 = mpc_re("\\bfoo\\b"); - re1 = mpc_re("(w| )?\\bfoo\\b"); - re2 = mpc_re("py\\B.*"); - - PT_ASSERT(regex_test_pass(re0, "foo", "foo")); - PT_ASSERT(regex_test_pass(re0, "foo.", "foo")); - PT_ASSERT(regex_test_pass(re0, "foo)", "foo")); - PT_ASSERT(regex_test_pass(re0, "foo baz", "foo")); - - PT_ASSERT(regex_test_fail(re0, "foobar", "foo")); - PT_ASSERT(regex_test_fail(re0, "foo3", "foo")); - - PT_ASSERT(regex_test_pass(re1, "foo", "foo")); - PT_ASSERT(regex_test_pass(re1, " foo", " foo")); - PT_ASSERT(regex_test_fail(re1, "wfoo", "foo")); - - PT_ASSERT(regex_test_pass(re2, "python", "python")); - PT_ASSERT(regex_test_pass(re2, "py3", "py3")); - PT_ASSERT(regex_test_pass(re2, "py2", "py2")); - PT_ASSERT(regex_test_fail(re2, "py", "py")); - PT_ASSERT(regex_test_fail(re2, "py.", "py.")); - PT_ASSERT(regex_test_fail(re2, "py!", "py!")); - - mpc_delete(re0); - mpc_delete(re1); - mpc_delete(re2); - -} - -void test_regex_range(void) { - - mpc_parser_t *re0, *re1, *re2, *re3; - - re0 = mpc_re("abg[abcdef]"); - re1 = mpc_re("y*[a-z]"); - re2 = mpc_re("zz(p+)?[A-Z_0\\]123]*"); - re3 = mpc_re("^[^56hy].*$"); - - /* TODO: Testing */ - - mpc_delete(re0); - mpc_delete(re1); - mpc_delete(re2); - mpc_delete(re3); - -} - -void test_regex_string(void) { - - mpc_parser_t *re0 = mpc_re("\"(\\\\.|[^\"])*\""); - - PT_ASSERT(regex_test_pass(re0, "\"there\"", "\"there\"")); - PT_ASSERT(regex_test_pass(re0, "\"hello\"", "\"hello\"")); - PT_ASSERT(regex_test_pass(re0, "\"i am dan\"", "\"i am dan\"")); - PT_ASSERT(regex_test_pass(re0, "\"i a\\\"m dan\"", "\"i a\\\"m dan\"")); - - mpc_delete(re0); - -} - -void test_regex_lisp_comment(void) { - - mpc_parser_t *re0 = mpc_re(";[^\\n\\r]*"); - - PT_ASSERT(regex_test_pass(re0, ";comment", ";comment")); - PT_ASSERT(regex_test_pass(re0, ";i am the\nman", ";i am the")); - - mpc_delete(re0); - -} - -void test_regex_newline(void) { - - mpc_parser_t *re0 = mpc_re("[a-z]*"); - - PT_ASSERT(regex_test_pass(re0, "hi", "hi")); - PT_ASSERT(regex_test_pass(re0, "hi\nk", "hi")); - PT_ASSERT(regex_test_fail(re0, "hi\nk", "hi\nk")); - - mpc_delete(re0); - -} - -void test_regex_multiline(void) { - - mpc_parser_t *re0 = mpc_re_mode("(^[a-z]*$)*", MPC_RE_MULTILINE); - - PT_ASSERT(regex_test_pass(re0, "hello\nhello", "hello\nhello")); - PT_ASSERT(regex_test_pass(re0, "hello\nhello\n", "hello\nhello\n")); - PT_ASSERT(regex_test_pass(re0, "\nblah\n\nblah\n", "\nblah\n\nblah\n")); - PT_ASSERT(regex_test_fail(re0, "45234", "45234")); - PT_ASSERT(regex_test_fail(re0, "\n45234", "\n45234")); - PT_ASSERT(regex_test_pass(re0, "\n45234", "\n")); - - mpc_delete(re0); - -} - -void test_regex_dotall(void) { - - mpc_parser_t *re0 = mpc_re_mode("^.*$", MPC_RE_DEFAULT); - mpc_parser_t *re1 = mpc_re_mode("^.*$", MPC_RE_DOTALL); - - PT_ASSERT(regex_test_pass(re0, "hello", "hello")); - PT_ASSERT(regex_test_fail(re0, "hello\n", "hello")); - PT_ASSERT(regex_test_fail(re0, "he\nllo\n", "he")); - PT_ASSERT(regex_test_pass(re0, "34njaksdklmasd", "34njaksdklmasd")); - PT_ASSERT(regex_test_fail(re0, "34njaksd\nklmasd", "34njaksd")); - - PT_ASSERT(regex_test_pass(re1, "hello", "hello")); - PT_ASSERT(regex_test_pass(re1, "hello\n", "hello\n")); - PT_ASSERT(regex_test_pass(re1, "he\nllo\n", "he\nllo\n")); - PT_ASSERT(regex_test_pass(re1, "34njaksdklmasd", "34njaksdklmasd")); - PT_ASSERT(regex_test_pass(re1, "34njaksd\nklmasd", "34njaksd\nklmasd")); - - mpc_delete(re0); - mpc_delete(re1); - -} - -void suite_regex(void) { - pt_add_test(test_regex_basic, "Test Regex Basic", "Suite Regex"); - pt_add_test(test_regex_range, "Test Regex Range", "Suite Regex"); - pt_add_test(test_regex_string, "Test Regex String", "Suite Regex"); - pt_add_test(test_regex_lisp_comment, "Test Regex Lisp Comment", "Suite Regex"); - pt_add_test(test_regex_boundary, "Test Regex Boundary", "Suite Regex"); - pt_add_test(test_regex_newline, "Test Regex Newline", "Suite Regex"); - pt_add_test(test_regex_multiline, "Test Regex Multiline", "Suite Regex"); - pt_add_test(test_regex_dotall, "Test Regex Dotall", "Suite Regex"); -} +#include "ptest.h" +#include "../mpc.h" + +#include +#include + +static int string_eq(const void* x, const void* y) { return (strcmp(x, y) == 0); } +static void string_print(const void* x) { printf("'%s'", (char*)x); } + +int regex_test_pass(mpc_parser_t *p, const char* value, const char* match) { + return mpc_test_pass(p, value, match, string_eq, free, string_print); +} + +int regex_test_fail(mpc_parser_t *p, const char* value, const char* match) { + return mpc_test_fail(p, value, match, string_eq, free, string_print); +} + +void test_regex_basic(void) { + + mpc_parser_t *re0, *re1, *re2, *re3, *re4, *re5; + + re0 = mpc_re("abc|bcd"); + re1 = mpc_re("abc|bcd|e"); + re2 = mpc_re("ab()c(ab)*"); + re3 = mpc_re("abc(abdd)?"); + re4 = mpc_re("ab|c(abdd)?"); + re5 = mpc_re("abc(ab|dd)+g$"); + + PT_ASSERT(regex_test_pass(re0, "abc", "abc")); + PT_ASSERT(regex_test_pass(re0, "bcd", "bcd")); + PT_ASSERT(regex_test_fail(re0, "bc", "bc")); + PT_ASSERT(regex_test_fail(re0, "ab", "ab")); + PT_ASSERT(regex_test_pass(re1, "e", "e")); + PT_ASSERT(regex_test_pass(re2, "abc", "abc")); + PT_ASSERT(regex_test_pass(re2, "abcabab", "abcabab")); + PT_ASSERT(regex_test_pass(re2, "abcababd", "abcabab")); + PT_ASSERT(regex_test_pass(re5, "abcddg", "abcddg")); + + mpc_delete(re0); + mpc_delete(re1); + mpc_delete(re2); + mpc_delete(re3); + mpc_delete(re4); + mpc_delete(re5); + +} + +void test_regex_boundary(void) { + + mpc_parser_t *re0, *re1, *re2; + + re0 = mpc_re("\\bfoo\\b"); + re1 = mpc_re("(w| )?\\bfoo\\b"); + re2 = mpc_re("py\\B.*"); + + PT_ASSERT(regex_test_pass(re0, "foo", "foo")); + PT_ASSERT(regex_test_pass(re0, "foo.", "foo")); + PT_ASSERT(regex_test_pass(re0, "foo)", "foo")); + PT_ASSERT(regex_test_pass(re0, "foo baz", "foo")); + + PT_ASSERT(regex_test_fail(re0, "foobar", "foo")); + PT_ASSERT(regex_test_fail(re0, "foo3", "foo")); + + PT_ASSERT(regex_test_pass(re1, "foo", "foo")); + PT_ASSERT(regex_test_pass(re1, " foo", " foo")); + PT_ASSERT(regex_test_fail(re1, "wfoo", "foo")); + + PT_ASSERT(regex_test_pass(re2, "python", "python")); + PT_ASSERT(regex_test_pass(re2, "py3", "py3")); + PT_ASSERT(regex_test_pass(re2, "py2", "py2")); + PT_ASSERT(regex_test_fail(re2, "py", "py")); + PT_ASSERT(regex_test_fail(re2, "py.", "py.")); + PT_ASSERT(regex_test_fail(re2, "py!", "py!")); + + mpc_delete(re0); + mpc_delete(re1); + mpc_delete(re2); + +} + +void test_regex_range(void) { + + mpc_parser_t *re0, *re1, *re2, *re3; + + re0 = mpc_re("abg[abcdef]"); + re1 = mpc_re("y*[a-z]"); + re2 = mpc_re("zz(p+)?[A-Z_0\\]123]*"); + re3 = mpc_re("^[^56hy].*$"); + + /* TODO: Testing */ + + mpc_delete(re0); + mpc_delete(re1); + mpc_delete(re2); + mpc_delete(re3); + +} + +void test_regex_string(void) { + + mpc_parser_t *re0 = mpc_re("\"(\\\\.|[^\"])*\""); + + PT_ASSERT(regex_test_pass(re0, "\"there\"", "\"there\"")); + PT_ASSERT(regex_test_pass(re0, "\"hello\"", "\"hello\"")); + PT_ASSERT(regex_test_pass(re0, "\"i am dan\"", "\"i am dan\"")); + PT_ASSERT(regex_test_pass(re0, "\"i a\\\"m dan\"", "\"i a\\\"m dan\"")); + + mpc_delete(re0); + +} + +void test_regex_lisp_comment(void) { + + mpc_parser_t *re0 = mpc_re(";[^\\n\\r]*"); + + PT_ASSERT(regex_test_pass(re0, ";comment", ";comment")); + PT_ASSERT(regex_test_pass(re0, ";i am the\nman", ";i am the")); + + mpc_delete(re0); + +} + +void test_regex_newline(void) { + + mpc_parser_t *re0 = mpc_re("[a-z]*"); + + PT_ASSERT(regex_test_pass(re0, "hi", "hi")); + PT_ASSERT(regex_test_pass(re0, "hi\nk", "hi")); + PT_ASSERT(regex_test_fail(re0, "hi\nk", "hi\nk")); + + mpc_delete(re0); + +} + +void test_regex_multiline(void) { + + mpc_parser_t *re0 = mpc_re_mode("(^[a-z]*$)*", MPC_RE_MULTILINE); + + PT_ASSERT(regex_test_pass(re0, "hello\nhello", "hello\nhello")); + PT_ASSERT(regex_test_pass(re0, "hello\nhello\n", "hello\nhello\n")); + PT_ASSERT(regex_test_pass(re0, "\nblah\n\nblah\n", "\nblah\n\nblah\n")); + PT_ASSERT(regex_test_fail(re0, "45234", "45234")); + PT_ASSERT(regex_test_fail(re0, "\n45234", "\n45234")); + PT_ASSERT(regex_test_pass(re0, "\n45234", "\n")); + + mpc_delete(re0); + +} + +void test_regex_dotall(void) { + + mpc_parser_t *re0 = mpc_re_mode("^.*$", MPC_RE_DEFAULT); + mpc_parser_t *re1 = mpc_re_mode("^.*$", MPC_RE_DOTALL); + + PT_ASSERT(regex_test_pass(re0, "hello", "hello")); + PT_ASSERT(regex_test_fail(re0, "hello\n", "hello")); + PT_ASSERT(regex_test_fail(re0, "he\nllo\n", "he")); + PT_ASSERT(regex_test_pass(re0, "34njaksdklmasd", "34njaksdklmasd")); + PT_ASSERT(regex_test_fail(re0, "34njaksd\nklmasd", "34njaksd")); + + PT_ASSERT(regex_test_pass(re1, "hello", "hello")); + PT_ASSERT(regex_test_pass(re1, "hello\n", "hello\n")); + PT_ASSERT(regex_test_pass(re1, "he\nllo\n", "he\nllo\n")); + PT_ASSERT(regex_test_pass(re1, "34njaksdklmasd", "34njaksdklmasd")); + PT_ASSERT(regex_test_pass(re1, "34njaksd\nklmasd", "34njaksd\nklmasd")); + + mpc_delete(re0); + mpc_delete(re1); + +} + +void suite_regex(void) { + pt_add_test(test_regex_basic, "Test Regex Basic", "Suite Regex"); + pt_add_test(test_regex_range, "Test Regex Range", "Suite Regex"); + pt_add_test(test_regex_string, "Test Regex String", "Suite Regex"); + pt_add_test(test_regex_lisp_comment, "Test Regex Lisp Comment", "Suite Regex"); + pt_add_test(test_regex_boundary, "Test Regex Boundary", "Suite Regex"); + pt_add_test(test_regex_newline, "Test Regex Newline", "Suite Regex"); + pt_add_test(test_regex_multiline, "Test Regex Multiline", "Suite Regex"); + pt_add_test(test_regex_dotall, "Test Regex Dotall", "Suite Regex"); +} diff --git a/tests/test.c b/tests/test.c index 1b78000..876e3e7 100644 --- a/tests/test.c +++ b/tests/test.c @@ -1,16 +1,16 @@ -#include "ptest.h" - -void suite_core(void); -void suite_regex(void); -void suite_grammar(void); -void suite_combinators(void); - -int main(int argc, char** argv) { - (void) argc; (void) argv; - pt_add_suite(suite_core); - pt_add_suite(suite_regex); - pt_add_suite(suite_grammar); - pt_add_suite(suite_combinators); - return pt_run(); -} - +#include "ptest.h" + +void suite_core(void); +void suite_regex(void); +void suite_grammar(void); +void suite_combinators(void); + +int main(int argc, char** argv) { + (void) argc; (void) argv; + pt_add_suite(suite_core); + pt_add_suite(suite_regex); + pt_add_suite(suite_grammar); + pt_add_suite(suite_combinators); + return pt_run(); +} +