From 0563ada497074a4619c1e968247be2ce27afc6ef Mon Sep 17 00:00:00 2001
From: AlDanial <al.danial@gmail.com>
Date: Fri, 20 Dec 2024 10:09:43 -0800
Subject: [PATCH] add support for Civet, #873

---
 Unix/cloc                         | 29 ++++++++++++
 Unix/t/00_C.t                     | 10 ++++
 cloc                              | 29 ++++++++++++
 tests/inputs/parser_1.civet       | 76 +++++++++++++++++++++++++++++++
 tests/inputs/parser_2.civet       | 37 +++++++++++++++
 tests/outputs/parser_1.civet.yaml | 21 +++++++++
 tests/outputs/parser_2.civet.yaml | 21 +++++++++
 7 files changed, 223 insertions(+)
 create mode 100644 tests/inputs/parser_1.civet
 create mode 100644 tests/inputs/parser_2.civet
 create mode 100644 tests/outputs/parser_1.civet.yaml
 create mode 100644 tests/outputs/parser_2.civet.yaml

diff --git a/Unix/cloc b/Unix/cloc
index 7e35da75..2650b583 100755
--- a/Unix/cloc
+++ b/Unix/cloc
@@ -8689,6 +8689,9 @@ sub set_constants {                          # {{{1
             'tpp'         => 'C++'                   ,
             'cdc'         => 'Cadence'               ,
             'ccs'         => 'CCS'                   ,
+            'civet'       => 'Civet'                 ,
+            'cvt'         => 'Civet'                 ,
+            'cvtx'        => 'Civet'                 ,
             'cfc'         => 'ColdFusion CFScript'   ,
             'cfml'        => 'ColdFusion'            ,
             'cfm'         => 'ColdFusion'            ,
@@ -9881,6 +9884,9 @@ sub set_constants {                          # {{{1
                                 [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C'      ],
                             ],
+    'Civet'              => [
+                                [ 'call_parse_civet'               ],
+                            ],
     'CSS'                => [
                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
                                 [ 'rm_comments_in_strings', '"', '//', '' ],
@@ -11574,6 +11580,7 @@ sub set_constants {                          # {{{1
     'Cadence'                      =>   3.00,
     'Carbon'                       =>   1.51,
     'CCS'                          =>   5.33,
+    'Civet'                        =>   3.00,
     'ColdFusion'                   =>   4.00,
     'ColdFusion CFScript'          =>   4.00,
     'Chapel'                       =>   2.96,
@@ -12003,6 +12010,28 @@ sub rm_last_line {                           # {{{1
     my $n = scalar(@{$ra_lines}) - 2;
     return @{$ra_lines}[0..$n];
 } # 1}}}
+sub call_parse_civet {                       # {{{1
+    my ($ra_lines, ) = @_;
+    print "-> call_parse_civet\n" if $opt_v > 2;
+    my $coffeeComment = 0;
+    foreach my $L (@{$ra_lines}) {
+        if ($L =~ /^\s*civet\s+coffee(Comment|Compat)/) {
+            $coffeeComment = 1;
+            last;
+        }
+    }
+    my (@step_1, @step_2, @step_3, @step_4);
+    if ($coffeeComment) {
+        @step_4 = remove_matches($ra_lines, '^\s*#');
+    } else {
+        @step_1 = call_regexp_common($ra_lines, 'C');
+        @step_2 = remove_matches(        \@step_1, '^///');
+        @step_3 = remove_matches(        \@step_2, '^\s*//[^/]');
+        @step_4 = remove_between_general(\@step_3, '###', '###');
+    }
+    print "<- call_parse_civet\n" if $opt_v > 2;
+    return @step_4;
+} # 1}}}
 sub call_regexp_common {                     # {{{1
     my ($ra_lines, $language ) = @_;
     print "-> call_regexp_common for $language\n" if $opt_v > 2;
diff --git a/Unix/t/00_C.t b/Unix/t/00_C.t
index 6da3fe3c..6c715677 100755
--- a/Unix/t/00_C.t
+++ b/Unix/t/00_C.t
@@ -239,6 +239,16 @@ my @Tests = (
                     'ref'  => '../tests/outputs/C-Ansi.c.yaml',
                     'args' => '../tests/inputs/C-Ansi.c',
                 },
+                {
+                    'name' => 'Civet',
+                    'ref'  => '../tests/outputs/parser_1.civet.yaml',
+                    'args' => '../tests/inputs/parser_1.civet',
+                },
+                {
+                    'name' => 'Civet (coffeeComment)',
+                    'ref'  => '../tests/outputs/parser_2.civet.yaml',
+                    'args' => '../tests/inputs/parser_2.civet',
+                },
                 {
                     'name' => 'CSV',
                     'ref'  => '../tests/outputs/cloc_counts.csv.yaml',
diff --git a/cloc b/cloc
index 9439364f..f3b09421 100755
--- a/cloc
+++ b/cloc
@@ -8679,6 +8679,9 @@ sub set_constants {                          # {{{1
             'tpp'         => 'C++'                   ,
             'cdc'         => 'Cadence'               ,
             'ccs'         => 'CCS'                   ,
+            'civet'       => 'Civet'                 ,
+            'cvt'         => 'Civet'                 ,
+            'cvtx'        => 'Civet'                 ,
             'cfc'         => 'ColdFusion CFScript'   ,
             'cfml'        => 'ColdFusion'            ,
             'cfm'         => 'ColdFusion'            ,
@@ -9871,6 +9874,9 @@ sub set_constants {                          # {{{1
                                 [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C'      ],
                             ],
+    'Civet'              => [
+                                [ 'call_parse_civet'               ],
+                            ],
     'CSS'                => [
                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
                                 [ 'rm_comments_in_strings', '"', '//', '' ],
@@ -11564,6 +11570,7 @@ sub set_constants {                          # {{{1
     'Cadence'                      =>   3.00,
     'Carbon'                       =>   1.51,
     'CCS'                          =>   5.33,
+    'Civet'                        =>   3.00,
     'ColdFusion'                   =>   4.00,
     'ColdFusion CFScript'          =>   4.00,
     'Chapel'                       =>   2.96,
@@ -11993,6 +12000,28 @@ sub rm_last_line {                           # {{{1
     my $n = scalar(@{$ra_lines}) - 2;
     return @{$ra_lines}[0..$n];
 } # 1}}}
+sub call_parse_civet {                       # {{{1
+    my ($ra_lines, ) = @_;
+    print "-> call_parse_civet\n" if $opt_v > 2;
+    my $coffeeComment = 0;
+    foreach my $L (@{$ra_lines}) {
+        if ($L =~ /^\s*civet\s+coffee(Comment|Compat)/) {
+            $coffeeComment = 1;
+            last;
+        }
+    }
+    my (@step_1, @step_2, @step_3, @step_4);
+    if ($coffeeComment) {
+        @step_4 = remove_matches($ra_lines, '^\s*#');
+    } else {
+        @step_1 = call_regexp_common($ra_lines, 'C');
+        @step_2 = remove_matches(        \@step_1, '^///');
+        @step_3 = remove_matches(        \@step_2, '^\s*//[^/]');
+        @step_4 = remove_between_general(\@step_3, '###', '###');
+    }
+    print "<- call_parse_civet\n" if $opt_v > 2;
+    return @step_4;
+} # 1}}}
 sub call_regexp_common {                     # {{{1
     my ($ra_lines, $language ) = @_;
     print "-> call_regexp_common for $language\n" if $opt_v > 2;
diff --git a/tests/inputs/parser_1.civet b/tests/inputs/parser_1.civet
new file mode 100644
index 00000000..60fb84e1
--- /dev/null
+++ b/tests/inputs/parser_1.civet
@@ -0,0 +1,76 @@
+{ fail } from node:assert
+{ tokens, type Token } from ./tokens.civet
+           /*
+type * as Ast from ./astTypes.civet
+assert from ./assert.civet
+           */
+
+/// TokenType ::= keyof typeof tokens
+/// tokenEntries := Object.entries(tokens) as [TokenType, Token][]
+
+class TokenStream <: Iterable<[string, TokenType, readonly [number, number]]>
+    #sourceLocation = [1, 1] as tuple
+    @(#program: string)
+
+    :iterator() ###
+        :outer while #program# ###
+            for [type, token] of tokenEntries
+                length := token.matchLength #program
+            /// if length > 0
+            //      chunk := #program[..<length]        // comment
+            ///     #program |>= &[length<=..]
+            ///     yield [chunk, type, #sourceLocation] as tuple
+            ///     linesInChunk := chunk.split '\n'
+            //      if linesInChunk# > 1        // comment
+                        #sourceLocation.0 += linesInChunk# - 1
+                        #sourceLocation.1 = 1
+                    #sourceLocation.1 += linesInChunk.-1#
+                    continue outer
+            throw new SyntaxError
+                `Unrecognized token starting with '${#program.0}' at input:${#sourceLocation.join ':'}`
+
+function collectUntil<T>(iter: Iterator<T>, pred: (arg: T) => boolean)
+    loop
+        next := iter.next()
+        break if next.done or pred next.value
+        yield next.value
+
+processExpression := (expr: string, line: number, column: number) =>
+    processSplits := (parts: string[]): Ast.NumberSyntaxTree =>
+        if parts# % 2 is 0
+            throw new SyntaxError `Incomplete expression: '${parts.join ''}' (near input:${line}:${column})`
+        if parts# > 2
+            type := switch parts.-2
+                '+'
+                    'addition' as const
+                '_'
+                    'subtraction' as const
+                else
+                    throw new SyntaxError
+                        `Missing operator in expression containing '${parts[-3...].join ''}' (near input:${line}:${column})`
+            {
+                type
+                value:
+                    . processSplits parts[...-2]
+                    . processSplits [parts.-1] 
+            }
+        else
+            part := parts.0
+            switch part
+                '+', '_'
+                    throw new SyntaxError `Unexpected operator with no operands (near input:${line}:${column})`
+                /\p{Letter}+/v
+                    type: 'variable', value: part
+                /[0-9]+/
+                    type: 'literal', value: Number part
+                else
+                    fail();
+    
+    splitsAndEmpty := expr.split /(\+|_|\p{Letter}+|[0-9]+)/gv
+    splits := splitsAndEmpty.flatMap (el, i) =>
+        if i % 2 is 0
+            assert => el is ''
+            []
+        else
+            [el]
+    return processSplits splits
diff --git a/tests/inputs/parser_2.civet b/tests/inputs/parser_2.civet
new file mode 100644
index 00000000..f4b97be8
--- /dev/null
+++ b/tests/inputs/parser_2.civet
@@ -0,0 +1,37 @@
+civet coffeeComment
+{ fail } from node:assert
+{ tokens, type Token } from ./tokens.civet
+           /*
+type * as Ast from ./astTypes.civet
+assert from ./assert.civet
+           */
+
+/// TokenType ::= keyof typeof tokens
+/// tokenEntries := Object.entries(tokens) as [TokenType, Token][]
+
+class TokenStream <: Iterable<[string, TokenType, readonly [number, number]]>
+    #sourceLocation = [1, 1] as tuple
+    @(#program: string)
+
+    :iterator() ###
+        :outer while #program# ###
+            for [type, token] of tokenEntries
+                length := token.matchLength #program
+            /// if length > 0
+            //      chunk := #program[..<length]        // comment
+            ///     #program |>= &[length<=..]
+            ///     yield [chunk, type, #sourceLocation] as tuple
+            ///     linesInChunk := chunk.split '\n'
+            //      if linesInChunk# > 1        // comment
+                        #sourceLocation.0 += linesInChunk# - 1
+                        #sourceLocation.1 = 1
+                    #sourceLocation.1 += linesInChunk.-1#
+                    continue outer
+            throw new SyntaxError
+                `Unrecognized token starting with '${#program.0}' at input:${#sourceLocation.join ':'}`
+
+#function collectUntil<T>(iter: Iterator<T>, pred: (arg: T) => boolean)
+#    loop
+#        next := iter.next()
+#        break if next.done or pred next.value
+#        yield next.value
diff --git a/tests/outputs/parser_1.civet.yaml b/tests/outputs/parser_1.civet.yaml
new file mode 100644
index 00000000..a7e44c35
--- /dev/null
+++ b/tests/outputs/parser_1.civet.yaml
@@ -0,0 +1,21 @@
+---
+# github.com/AlDanial/cloc
+header : 
+  cloc_url           : github.com/AlDanial/cloc
+  cloc_version       : 2.03
+  elapsed_seconds    : 0.00380301475524902
+  n_files            : 1
+  n_lines            : 76
+  files_per_second   : 262.949282176666
+  lines_per_second   : 19984.1454454266
+  report_file        : tests/outputs/parser_1.civet.yaml
+'Civet' :
+  nFiles: 1
+  blank: 6
+  comment: 9
+  code: 61
+SUM: 
+  blank: 6
+  comment: 9
+  code: 61
+  nFiles: 1
diff --git a/tests/outputs/parser_2.civet.yaml b/tests/outputs/parser_2.civet.yaml
new file mode 100644
index 00000000..968536de
--- /dev/null
+++ b/tests/outputs/parser_2.civet.yaml
@@ -0,0 +1,21 @@
+---
+# github.com/AlDanial/cloc
+header : 
+  cloc_url           : github.com/AlDanial/cloc
+  cloc_version       : 2.03
+  elapsed_seconds    : 0.00371313095092773
+  n_files            : 1
+  n_lines            : 37
+  files_per_second   : 269.31449852318
+  lines_per_second   : 9964.63644535765
+  report_file        : tests/outputs/parser_2.civet.yaml
+'Civet' :
+  nFiles: 1
+  blank: 4
+  comment: 9
+  code: 24
+SUM: 
+  blank: 4
+  comment: 9
+  code: 24
+  nFiles: 1