From 0563ada497074a4619c1e968247be2ce27afc6ef Mon Sep 17 00:00:00 2001 From: AlDanial Date: Fri, 20 Dec 2024 10:09:43 -0800 Subject: [PATCH] add support for Civet, #873 --- Unix/cloc | 29 ++++++++++++ Unix/t/00_C.t | 10 ++++ cloc | 29 ++++++++++++ tests/inputs/parser_1.civet | 76 +++++++++++++++++++++++++++++++ tests/inputs/parser_2.civet | 37 +++++++++++++++ tests/outputs/parser_1.civet.yaml | 21 +++++++++ tests/outputs/parser_2.civet.yaml | 21 +++++++++ 7 files changed, 223 insertions(+) create mode 100644 tests/inputs/parser_1.civet create mode 100644 tests/inputs/parser_2.civet create mode 100644 tests/outputs/parser_1.civet.yaml create mode 100644 tests/outputs/parser_2.civet.yaml diff --git a/Unix/cloc b/Unix/cloc index 7e35da75..2650b583 100755 --- a/Unix/cloc +++ b/Unix/cloc @@ -8689,6 +8689,9 @@ sub set_constants { # {{{1 'tpp' => 'C++' , 'cdc' => 'Cadence' , 'ccs' => 'CCS' , + 'civet' => 'Civet' , + 'cvt' => 'Civet' , + 'cvtx' => 'Civet' , 'cfc' => 'ColdFusion CFScript' , 'cfml' => 'ColdFusion' , 'cfm' => 'ColdFusion' , @@ -9881,6 +9884,9 @@ sub set_constants { # {{{1 [ 'rm_comments_in_strings', '"', '//', '' ], [ 'call_regexp_common' , 'C' ], ], + 'Civet' => [ + [ 'call_parse_civet' ], + ], 'CSS' => [ [ 'rm_comments_in_strings', '"', '/*', '*/' ], [ 'rm_comments_in_strings', '"', '//', '' ], @@ -11574,6 +11580,7 @@ sub set_constants { # {{{1 'Cadence' => 3.00, 'Carbon' => 1.51, 'CCS' => 5.33, + 'Civet' => 3.00, 'ColdFusion' => 4.00, 'ColdFusion CFScript' => 4.00, 'Chapel' => 2.96, @@ -12003,6 +12010,28 @@ sub rm_last_line { # {{{1 my $n = scalar(@{$ra_lines}) - 2; return @{$ra_lines}[0..$n]; } # 1}}} +sub call_parse_civet { # {{{1 + my ($ra_lines, ) = @_; + print "-> call_parse_civet\n" if $opt_v > 2; + my $coffeeComment = 0; + foreach my $L (@{$ra_lines}) { + if ($L =~ /^\s*civet\s+coffee(Comment|Compat)/) { + $coffeeComment = 1; + last; + } + } + my (@step_1, @step_2, @step_3, @step_4); + if ($coffeeComment) { + @step_4 = remove_matches($ra_lines, '^\s*#'); + } else { + @step_1 = call_regexp_common($ra_lines, 'C'); + @step_2 = remove_matches( \@step_1, '^///'); + @step_3 = remove_matches( \@step_2, '^\s*//[^/]'); + @step_4 = remove_between_general(\@step_3, '###', '###'); + } + print "<- call_parse_civet\n" if $opt_v > 2; + return @step_4; +} # 1}}} sub call_regexp_common { # {{{1 my ($ra_lines, $language ) = @_; print "-> call_regexp_common for $language\n" if $opt_v > 2; diff --git a/Unix/t/00_C.t b/Unix/t/00_C.t index 6da3fe3c..6c715677 100755 --- a/Unix/t/00_C.t +++ b/Unix/t/00_C.t @@ -239,6 +239,16 @@ my @Tests = ( 'ref' => '../tests/outputs/C-Ansi.c.yaml', 'args' => '../tests/inputs/C-Ansi.c', }, + { + 'name' => 'Civet', + 'ref' => '../tests/outputs/parser_1.civet.yaml', + 'args' => '../tests/inputs/parser_1.civet', + }, + { + 'name' => 'Civet (coffeeComment)', + 'ref' => '../tests/outputs/parser_2.civet.yaml', + 'args' => '../tests/inputs/parser_2.civet', + }, { 'name' => 'CSV', 'ref' => '../tests/outputs/cloc_counts.csv.yaml', diff --git a/cloc b/cloc index 9439364f..f3b09421 100755 --- a/cloc +++ b/cloc @@ -8679,6 +8679,9 @@ sub set_constants { # {{{1 'tpp' => 'C++' , 'cdc' => 'Cadence' , 'ccs' => 'CCS' , + 'civet' => 'Civet' , + 'cvt' => 'Civet' , + 'cvtx' => 'Civet' , 'cfc' => 'ColdFusion CFScript' , 'cfml' => 'ColdFusion' , 'cfm' => 'ColdFusion' , @@ -9871,6 +9874,9 @@ sub set_constants { # {{{1 [ 'rm_comments_in_strings', '"', '//', '' ], [ 'call_regexp_common' , 'C' ], ], + 'Civet' => [ + [ 'call_parse_civet' ], + ], 'CSS' => [ [ 'rm_comments_in_strings', '"', '/*', '*/' ], [ 'rm_comments_in_strings', '"', '//', '' ], @@ -11564,6 +11570,7 @@ sub set_constants { # {{{1 'Cadence' => 3.00, 'Carbon' => 1.51, 'CCS' => 5.33, + 'Civet' => 3.00, 'ColdFusion' => 4.00, 'ColdFusion CFScript' => 4.00, 'Chapel' => 2.96, @@ -11993,6 +12000,28 @@ sub rm_last_line { # {{{1 my $n = scalar(@{$ra_lines}) - 2; return @{$ra_lines}[0..$n]; } # 1}}} +sub call_parse_civet { # {{{1 + my ($ra_lines, ) = @_; + print "-> call_parse_civet\n" if $opt_v > 2; + my $coffeeComment = 0; + foreach my $L (@{$ra_lines}) { + if ($L =~ /^\s*civet\s+coffee(Comment|Compat)/) { + $coffeeComment = 1; + last; + } + } + my (@step_1, @step_2, @step_3, @step_4); + if ($coffeeComment) { + @step_4 = remove_matches($ra_lines, '^\s*#'); + } else { + @step_1 = call_regexp_common($ra_lines, 'C'); + @step_2 = remove_matches( \@step_1, '^///'); + @step_3 = remove_matches( \@step_2, '^\s*//[^/]'); + @step_4 = remove_between_general(\@step_3, '###', '###'); + } + print "<- call_parse_civet\n" if $opt_v > 2; + return @step_4; +} # 1}}} sub call_regexp_common { # {{{1 my ($ra_lines, $language ) = @_; print "-> call_regexp_common for $language\n" if $opt_v > 2; diff --git a/tests/inputs/parser_1.civet b/tests/inputs/parser_1.civet new file mode 100644 index 00000000..60fb84e1 --- /dev/null +++ b/tests/inputs/parser_1.civet @@ -0,0 +1,76 @@ +{ fail } from node:assert +{ tokens, type Token } from ./tokens.civet + /* +type * as Ast from ./astTypes.civet +assert from ./assert.civet + */ + +/// TokenType ::= keyof typeof tokens +/// tokenEntries := Object.entries(tokens) as [TokenType, Token][] + +class TokenStream <: Iterable<[string, TokenType, readonly [number, number]]> + #sourceLocation = [1, 1] as tuple + @(#program: string) + + :iterator() ### + :outer while #program# ### + for [type, token] of tokenEntries + length := token.matchLength #program + /// if length > 0 + // chunk := #program[..= &[length<=..] + /// yield [chunk, type, #sourceLocation] as tuple + /// linesInChunk := chunk.split '\n' + // if linesInChunk# > 1 // comment + #sourceLocation.0 += linesInChunk# - 1 + #sourceLocation.1 = 1 + #sourceLocation.1 += linesInChunk.-1# + continue outer + throw new SyntaxError + `Unrecognized token starting with '${#program.0}' at input:${#sourceLocation.join ':'}` + +function collectUntil(iter: Iterator, pred: (arg: T) => boolean) + loop + next := iter.next() + break if next.done or pred next.value + yield next.value + +processExpression := (expr: string, line: number, column: number) => + processSplits := (parts: string[]): Ast.NumberSyntaxTree => + if parts# % 2 is 0 + throw new SyntaxError `Incomplete expression: '${parts.join ''}' (near input:${line}:${column})` + if parts# > 2 + type := switch parts.-2 + '+' + 'addition' as const + '_' + 'subtraction' as const + else + throw new SyntaxError + `Missing operator in expression containing '${parts[-3...].join ''}' (near input:${line}:${column})` + { + type + value: + . processSplits parts[...-2] + . processSplits [parts.-1] + } + else + part := parts.0 + switch part + '+', '_' + throw new SyntaxError `Unexpected operator with no operands (near input:${line}:${column})` + /\p{Letter}+/v + type: 'variable', value: part + /[0-9]+/ + type: 'literal', value: Number part + else + fail(); + + splitsAndEmpty := expr.split /(\+|_|\p{Letter}+|[0-9]+)/gv + splits := splitsAndEmpty.flatMap (el, i) => + if i % 2 is 0 + assert => el is '' + [] + else + [el] + return processSplits splits diff --git a/tests/inputs/parser_2.civet b/tests/inputs/parser_2.civet new file mode 100644 index 00000000..f4b97be8 --- /dev/null +++ b/tests/inputs/parser_2.civet @@ -0,0 +1,37 @@ +civet coffeeComment +{ fail } from node:assert +{ tokens, type Token } from ./tokens.civet + /* +type * as Ast from ./astTypes.civet +assert from ./assert.civet + */ + +/// TokenType ::= keyof typeof tokens +/// tokenEntries := Object.entries(tokens) as [TokenType, Token][] + +class TokenStream <: Iterable<[string, TokenType, readonly [number, number]]> + #sourceLocation = [1, 1] as tuple + @(#program: string) + + :iterator() ### + :outer while #program# ### + for [type, token] of tokenEntries + length := token.matchLength #program + /// if length > 0 + // chunk := #program[..= &[length<=..] + /// yield [chunk, type, #sourceLocation] as tuple + /// linesInChunk := chunk.split '\n' + // if linesInChunk# > 1 // comment + #sourceLocation.0 += linesInChunk# - 1 + #sourceLocation.1 = 1 + #sourceLocation.1 += linesInChunk.-1# + continue outer + throw new SyntaxError + `Unrecognized token starting with '${#program.0}' at input:${#sourceLocation.join ':'}` + +#function collectUntil(iter: Iterator, pred: (arg: T) => boolean) +# loop +# next := iter.next() +# break if next.done or pred next.value +# yield next.value diff --git a/tests/outputs/parser_1.civet.yaml b/tests/outputs/parser_1.civet.yaml new file mode 100644 index 00000000..a7e44c35 --- /dev/null +++ b/tests/outputs/parser_1.civet.yaml @@ -0,0 +1,21 @@ +--- +# github.com/AlDanial/cloc +header : + cloc_url : github.com/AlDanial/cloc + cloc_version : 2.03 + elapsed_seconds : 0.00380301475524902 + n_files : 1 + n_lines : 76 + files_per_second : 262.949282176666 + lines_per_second : 19984.1454454266 + report_file : tests/outputs/parser_1.civet.yaml +'Civet' : + nFiles: 1 + blank: 6 + comment: 9 + code: 61 +SUM: + blank: 6 + comment: 9 + code: 61 + nFiles: 1 diff --git a/tests/outputs/parser_2.civet.yaml b/tests/outputs/parser_2.civet.yaml new file mode 100644 index 00000000..968536de --- /dev/null +++ b/tests/outputs/parser_2.civet.yaml @@ -0,0 +1,21 @@ +--- +# github.com/AlDanial/cloc +header : + cloc_url : github.com/AlDanial/cloc + cloc_version : 2.03 + elapsed_seconds : 0.00371313095092773 + n_files : 1 + n_lines : 37 + files_per_second : 269.31449852318 + lines_per_second : 9964.63644535765 + report_file : tests/outputs/parser_2.civet.yaml +'Civet' : + nFiles: 1 + blank: 4 + comment: 9 + code: 24 +SUM: + blank: 4 + comment: 9 + code: 24 + nFiles: 1