diff --git a/Unix/cloc b/Unix/cloc new file mode 100755 index 00000000..fda9fcb3 --- /dev/null +++ b/Unix/cloc @@ -0,0 +1,9212 @@ +#!/usr/bin/env perl +# cloc -- Count Lines of Code {{{1 +# Copyright (C) 2006-2016 Al Danial +# First release August 2006 +# +# Includes code from: +# - SLOCCount v2.26 +# http://www.dwheeler.com/sloccount/ +# by David Wheeler. +# - Win32::Autoglob +# http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm +# by Sean M. Burke. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details: +# . +# +# 1}}} +my $VERSION = "1.66"; # odd number == beta; even number == stable +my $URL = "https://github.com/AlDanial/cloc"; +require 5.006; +# use modules {{{1 +use warnings; +use strict; +use Getopt::Long; +use File::Basename; +use File::Temp qw { tempfile tempdir }; +use File::Find; +use File::Path; +use File::Spec; +use IO::File; +use POSIX "strftime"; + +use Digest::MD5; +my $HAVE_Digest_MD5 = 1; +### Digest::MD5 isn't in the standard distribution. Use it only if installed. +##my $HAVE_Digest_MD5 = 0; +##eval "use Digest::MD5;"; +##if (defined $Digest::MD5::VERSION) { +## $HAVE_Digest_MD5 = 1; +##} else { +## warn "Digest::MD5 not installed; will skip file uniqueness checks.\n"; +##} + +use Time::HiRes; +my $HAVE_Time_HiRes = 1; +### Time::HiRes became standard with Perl 5.8 +##my $HAVE_Time_HiRes = 0; +##eval "use Time::HiRes;"; +##$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION; + +use Regexp::Common; +my $HAVE_Rexexp_Common = 1; +### Regexp::Common isn't in the standard distribution. It will +### be installed in a temp directory if necessary. +##BEGIN { +## if (eval "use Regexp::Common;") { +## $HAVE_Rexexp_Common = 1; +## } else { +## $HAVE_Rexexp_Common = 0; +## } +##} + +use Algorithm::Diff qw ( sdiff ) ; +my $HAVE_Algorith_Diff = 1; +### Algorithm::Diff isn't in the standard distribution. It will +### be installed in a temp directory if necessary. +##eval "use Algorithm::Diff qw ( sdiff ) "; +##if (defined $Algorithm::Diff::VERSION) { +## $HAVE_Algorith_Diff = 1; +##} else { +## Install_Algorithm_Diff(); +##} +# print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n"; +# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die; +# die "Hre=$HAVE_Rexexp_Common Had=$HAVE_Algorith_Diff"; + +# Uncomment next two lines when building Windows executable with perl2exe +# or if running on a system that already has Regexp::Common. +#use Regexp::Common; +#$HAVE_Rexexp_Common = 1; + +#perl2exe_include "Regexp/Common/whitespace.pm" +#perl2exe_include "Regexp/Common/URI.pm" +#perl2exe_include "Regexp/Common/URI/fax.pm" +#perl2exe_include "Regexp/Common/URI/file.pm" +#perl2exe_include "Regexp/Common/URI/ftp.pm" +#perl2exe_include "Regexp/Common/URI/gopher.pm" +#perl2exe_include "Regexp/Common/URI/http.pm" +#perl2exe_include "Regexp/Common/URI/pop.pm" +#perl2exe_include "Regexp/Common/URI/prospero.pm" +#perl2exe_include "Regexp/Common/URI/news.pm" +#perl2exe_include "Regexp/Common/URI/tel.pm" +#perl2exe_include "Regexp/Common/URI/telnet.pm" +#perl2exe_include "Regexp/Common/URI/tv.pm" +#perl2exe_include "Regexp/Common/URI/wais.pm" +#perl2exe_include "Regexp/Common/CC.pm" +#perl2exe_include "Regexp/Common/SEN.pm" +#perl2exe_include "Regexp/Common/number.pm" +#perl2exe_include "Regexp/Common/delimited.pm" +#perl2exe_include "Regexp/Common/profanity.pm" +#perl2exe_include "Regexp/Common/net.pm" +#perl2exe_include "Regexp/Common/zip.pm" +#perl2exe_include "Regexp/Common/comment.pm" +#perl2exe_include "Regexp/Common/balanced.pm" +#perl2exe_include "Regexp/Common/lingua.pm" +#perl2exe_include "Regexp/Common/list.pm" +#perl2exe_include "File/Glob.pm" + +use Text::Tabs qw { expand }; +use Cwd qw { cwd }; +use File::Glob; +# 1}}} +# Usage information, options processing. {{{1 +my $ON_WINDOWS = 0; + $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT"); +if ($ON_WINDOWS and $ENV{'SHELL'}) { + if ($ENV{'SHELL'} =~ m{^/}) { + $ON_WINDOWS = 0; # make Cygwin look like Unix + } else { + $ON_WINDOWS = 1; # MKS defines $SHELL but still acts like Windows + } +} + +my $NN = chr(27) . "[0m"; # normal + $NN = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR: is it a terminal? +my $BB = chr(27) . "[1m"; # bold + $BB = "" if $ON_WINDOWS or !(-t STDERR); +my $script = basename $0; +my $usage = " +Usage: $script [options] | | + + Count, or compute differences of, physical lines of source code in the + given files (may be archives such as compressed tarballs or zip files) + and/or recursively below the given directories. + + ${BB}Input Options${NN} + --extract-with= This option is only needed if cloc is unable + to figure out how to extract the contents of + the input file(s) by itself. + Use to extract binary archive files (e.g.: + .tar.gz, .zip, .Z). Use the literal '>FILE<' as + a stand-in for the actual file(s) to be + extracted. For example, to count lines of code + in the input files + gcc-4.2.tar.gz perl-5.8.8.tar.gz + on Unix use + --extract-with='gzip -dc >FILE< | tar xf -' + or, if you have GNU tar, + --extract-with='tar zxf >FILE<' + and on Windows use, for example: + --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\" + (if WinZip is installed there). + --list-file= Take the list of file and/or directory names to + process from , which has one file/directory + name per line. Only exact matches are counted; + relative path names will be resolved starting from + the directory where cloc is invoked. + See also --exclude-list-file. + --unicode Check binary files to see if they contain Unicode + expanded ASCII text. This causes performance to + drop noticeably. + + ${BB}Processing Options${NN} + --autoconf Count .in files (as processed by GNU autoconf) of + recognized languages. + --by-file Report results for every source file encountered. + --by-file-by-lang Report results for every source file encountered + in addition to reporting by language. + --count-and-diff + First perform direct code counts of source file(s) + of and separately, then perform a diff + of these. Inputs may be pairs of files, directories, + or archives. See also --diff, --diff-alignment, + --diff-timeout, --ignore-case, --ignore-whitespace. + --diff Compute differences in code and comments between + source file(s) of and . The inputs + may be pairs of files, directories, or archives. + Use --diff-alignment to generate a list showing + which file pairs where compared. See also + --count-and-diff, --diff-alignment, --diff-timeout, + --ignore-case, --ignore-whitespace. + --diff-timeout Ignore files which take more than seconds + to process. Default is 10 seconds. + (Large files with many repeated lines can cause + Algorithm::Diff::sdiff() to take hours.) + --follow-links [Unix only] Follow symbolic links to directories + (sym links to files are always followed). + --force-lang=[,] + Process all files that have a extension + with the counter for language . For + example, to count all .f files with the + Fortran 90 counter (which expects files to + end with .f90) instead of the default Fortran 77 + counter, use + --force-lang=\"Fortran 90\",f + If is omitted, every file will be counted + with the counter. This option can be + specified multiple times (but that is only + useful when is given each time). + See also --script-lang, --lang-no-ext. + --force-lang-def= Load language processing filters from , + then use these filters instead of the built-in + filters. Note: languages which map to the same + file extension (for example: + MATLAB/Objective C/MUMPS/Mercury; Pascal/PHP; + Lisp/OpenCL; Lisp/Julia; Perl/Prolog) will be + ignored as these require additional processing + that is not expressed in language definition + files. Use --read-lang-def to define new + language filters without replacing built-in + filters (see also --write-lang-def). + --ignore-whitespace Ignore horizontal white space when comparing files + with --diff. See also --ignore-case. + --ignore-case Ignore changes in case; consider upper- and lower- + case letters equivalent when comparing files with + --diff. See also --ignore-whitespace. + --lang-no-ext= Count files without extensions using the + counter. This option overrides internal logic + for files without extensions (where such files + are checked against known scripting languages + by examining the first line for #!). See also + --force-lang, --script-lang. + --max-file-size= Skip files larger than megabytes when + traversing directories. By default, =100. + cloc's memory requirement is roughly twenty times + larger than the largest file so running with + files larger than 100 MB on a computer with less + than 2 GB of memory will cause problems. + Note: this check does not apply to files + explicitly passed as command line arguments. + --read-binary-files Process binary files in addition to text files. + This is usually a bad idea and should only be + attempted with text files that have embedded + binary data. + --read-lang-def= Load new language processing filters from + and merge them with those already known to cloc. + If defines a language cloc already knows + about, cloc's definition will take precedence. + Use --force-lang-def to over-ride cloc's + definitions (see also --write-lang-def ). + --script-lang=, Process all files that invoke as a #! + scripting language with the counter for language + . For example, files that begin with + #!/usr/local/bin/perl5.8.8 + will be counted with the Perl counter by using + --script-lang=Perl,perl5.8.8 + The language name is case insensitive but the + name of the script language executable, , + must have the right case. This option can be + specified multiple times. See also --force-lang, + --lang-no-ext. + --sdir= Use as the scratch directory instead of + letting File::Temp chose the location. Files + written to this location are not removed at + the end of the run (as they are with File::Temp). + --skip-uniqueness Skip the file uniqueness check. This will give + a performance boost at the expense of counting + files with identical contents multiple times + (if such duplicates exist). + --stdin-name= Give a file name to use to determine the language + for standard input. + --strip-comments= For each file processed, write to the current + directory a version of the file which has blank + lines and comments removed. The name of each + stripped file is the original file name with + . appended to it. It is written to the + current directory unless --original-dir is on. + --original-dir [Only effective in combination with + --strip-comments] Write the stripped files + to the same directory as the original files. + --sum-reports Input arguments are report files previously + created with the --report-file option. Makes + a cumulative set of results containing the + sum of data from the individual report files. + --unix Override the operating system autodetection + logic and run in UNIX mode. See also + --windows, --show-os. + --windows Override the operating system autodetection + logic and run in Microsoft Windows mode. + See also --unix, --show-os. + + ${BB}Filter Options${NN} + --exclude-dir=[,D2,] Exclude the given comma separated directories + D1, D2, D3, et cetera, from being scanned. For + example --exclude-dir=.cache,test will skip + all files that have /.cache/ or /test/ as part + of their path. + Directories named .bzr, .cvs, .hg, .git, and + .svn are always excluded. + --exclude-ext=[,[...]] + Do not count files having the given file name + extensions. + --exclude-lang=[,L2,] Exclude the given comma separated languages + L1, L2, L3, et cetera, from being counted. + --exclude-list-file= Ignore files and/or directories whose names + appear in . should have one file + name per line. Only exact matches are ignored; + relative path names will be resolved starting from + the directory where cloc is invoked. + See also --list-file. + --fullpath Modifies the behavior of --match-f or + --not-match-f to include the file's path + in the regex, not just the file's basename. + (This does not expand each file to include its + absolute path, instead it uses as much of + the path as is passed in to cloc.) + --include-lang=[,L2,] Count only the given comma separated languages + L1, L2, L3, et cetera. + --match-d= Only count files in directories matching the Perl + regex. For example + --match-d='/(src|include)/' + only counts files in directories containing + /src/ or /include/. + --not-match-d= Count all files except those in directories + matching the Perl regex. + --match-f= Only count files whose basenames match the Perl + regex. For example + --match-f='^[Ww]idget' + only counts files that start with Widget or widget. + Add --fullpath to include parent directories + in the regex instead of just the basename. + --not-match-f= Count all files except those whose basenames + match the Perl regex. Add --fullpath to include + parent directories in the regex instead of just + the basename. + --skip-archive= Ignore files that end with the given Perl regular + expression. For example, if given + --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)' + the code will skip files that end with .zip, + .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and + .tar.7z. + --skip-win-hidden On Windows, ignore hidden files. + + ${BB}Debug Options${NN} + --categorized= Save names of categorized files to . + --counted= Save names of processed source files to . + --diff-alignment= Write to a list of files and file pairs + showing which files were added, removed, and/or + compared during a run with --diff. This switch + forces the --diff mode on. + --explain= Print the filters used to remove comments for + language and exit. In some cases the + filters refer to Perl subroutines rather than + regular expressions. An examination of the + source code may be needed for further explanation. + --help Print this usage information and exit. + --found= Save names of every file found to . + --ignored= Save names of ignored files and the reason they + were ignored to . + --print-filter-stages Print processed source code before and after + each filter is applied. + --show-ext[=] Print information about all known (or just the + given) file extensions and exit. + --show-lang[=] Print information about all known (or just the + given) languages and exit. + --show-os Print the value of the operating system mode + and exit. See also --unix, --windows. + -v[=] Verbose switch (optional numeric value). + -verbose[=] Long form of -v. + --version Print the version of this program and exit. + --write-lang-def= Writes to the language processing filters + then exits. Useful as a first step to creating + custom language definitions (see also + --force-lang-def, --read-lang-def). + + ${BB}Output Options${NN} + --3 Print third-generation language output. + (This option can cause report summation to fail + if some reports were produced with this option + while others were produced without it.) + --by-percent X Instead of comment and blank line counts, show + these values as percentages based on the value + of X in the denominator: + X = 'c' -> # lines of code + X = 'cm' -> # lines of code + comments + X = 'cb' -> # lines of code + blanks + X = 'cmb' -> # lines of code + comments + blanks + For example, if using method 'c' and your code + has twice as many lines of comments as lines + of code, the value in the comment column will + be 200%. The code column remains a line count. + --csv Write the results as comma separated values. + --csv-delimiter= Use the character as the delimiter for comma + separated files instead of ,. This switch forces + --json Write the results as JavaScript Object Notation + (JSON) formatted output. + --md Write the results as Markdown-formatted text. + --out= Synonym for --report-file=. + --progress-rate= Show progress update after every files are + processed (default =100). Set to 0 to + suppress progress output (useful when redirecting + output to STDOUT). + --quiet Suppress all information messages except for + the final report. + --report-file= Write the results to instead of STDOUT. + --sql= Write results as SQL create and insert statements + which can be read by a database program such as + SQLite. If is -, output is sent to STDOUT. + --sql-append Append SQL insert statements to the file specified + by --sql and do not generate table creation + statements. Only valid with the --sql option. + --sql-project= Use as the project identifier for the + current run. Only valid with the --sql option. + --sql-style= + + +

+';
+    print "<- html_header\n" if $opt_v > 2;
+} # 1}}}
+sub html_end {                               # {{{1
+return
+'
+ + +'; +} # 1}}} +sub die_unknown_lang { # {{{1 + my ($lang, $option_name) = @_; + die "Unknown language '$lang' used with $option_name option. " . + "The command\n $script --show-lang\n" . + "will print all recognized languages. Language names are " . + "case sensitive.\n" ; +} # 1}}} +sub unicode_file { # {{{1 + my $file = shift @_; + + print "-> unicode_file($file)\n" if $opt_v > 2; + return 0 if (-s $file > 2_000_000); + # don't bother trying to test binary files bigger than 2 MB + + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + return 0; + } + my @lines = <$IN>; + $IN->close; + + if (unicode_to_ascii( join('', @lines) )) { + print "<- unicode_file()\n" if $opt_v > 2; + return 1; + } else { + print "<- unicode_file()\n" if $opt_v > 2; + return 0; + } + +} # 1}}} +sub unicode_to_ascii { # {{{1 + my $string = shift @_; + + # A trivial attempt to convert UTF-16 little or big endian + # files into ASCII. These files exhibit the following byte + # sequence: + # byte 1: 255 + # byte 2: 254 + # byte 3: ord of ASCII character + # byte 4: 0 + # byte 3+i: ord of ASCII character + # byte 4+i: 0 + # or + # byte 1: 255 + # byte 2: 254 + # byte 3: 0 + # byte 4: ord of ASCII character + # byte 3+i: 0 + # byte 4+i: ord of ASCII character + + my $length = length $string; +#print "length=$length\n"; + return '' if $length <= 3; + my @unicode = split(//, $string); + + # check the first 100 characters for big or little endian UTF-16 encoding + my $max_peek = $length < 200 ? $length : 200; + my @view_1 = (); + for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] } + my @view_2 = (); + for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] } + + my $points_1 = 0; + foreach my $C (@view_1) { + ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13 + or ord($C) == 10 + or ord($C) == 9; + } + + my $points_2 = 0; + foreach my $C (@view_2) { + ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13 + or ord($C) == 10 + or ord($C) == 9; + } +#print "points 1: $points_1\n"; +#print "points 2: $points_2\n"; + + my $offset = undef; + if ($points_1 > 90) { $offset = 2; } + elsif ($points_2 > 90) { $offset = 3; } + else { return '' } # neither big or little endian UTF-16 + + my @ascii = (); + for (my $i = $offset; $i < $length; $i += 2) { push @ascii, $unicode[$i]; } + return join("", @ascii); +} # 1}}} +sub uncompress_archive_cmd { # {{{1 + my ($archive_file, ) = @_; + + # Wrap $archive_file in single or double quotes in the system + # commands below to avoid filename chicanery (including + # spaces in the names). + + print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2; + my $extract_cmd = ""; + my $missing = ""; + if ($opt_extract_with) { + ( $extract_cmd = $opt_extract_with ) =~ s/>FILE -"; + } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or + $archive_file =~ /\.tgz$/ ) and !$ON_WINDOWS) { + if (external_utility_exists("gzip --version")) { + if (external_utility_exists("tar --version")) { + $extract_cmd = "gzip -dc '$archive_file' | tar xf -"; + } else { + $missing = "tar"; + } + } else { + $missing = "gzip"; + } + } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS) { + if (external_utility_exists("bzip2 --help")) { + if (external_utility_exists("tar --version")) { + $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -"; + } else { + $missing = "tar"; + } + } else { + $missing = "bzip2"; + } + } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS) { + if (external_utility_exists("unxz --version")) { + if (external_utility_exists("tar --version")) { + $extract_cmd = "unxz -dc '$archive_file' | tar xf -"; + } else { + $missing = "tar"; + } + } else { + $missing = "bzip2"; + } + } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS) { + $extract_cmd = "tar xf '$archive_file'"; + } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) { + if (external_utility_exists("cpio --version")) { + if (external_utility_exists("rpm2cpio")) { + $extract_cmd = "rpm2cpio '$archive_file' | cpio -i"; + } else { + $missing = "rpm2cpio"; + } + } else { + $missing = "bzip2"; + } + } elsif ($archive_file =~ /\.zip$/i and !$ON_WINDOWS) { + if (external_utility_exists("unzip")) { + $extract_cmd = "unzip -qq -d . '$archive_file'"; + } else { + $missing = "unzip"; + } + } elsif ($ON_WINDOWS and $archive_file =~ /\.zip$/i) { + # zip on Windows, guess default Winzip install location + $extract_cmd = ""; + my $WinZip = '"C:\\Program Files\\WinZip\\WinZip32.exe"'; + if (external_utility_exists($WinZip)) { + $extract_cmd = "$WinZip -e -o \"$archive_file\" ."; +#print "trace 5 extract_cmd=[$extract_cmd]\n"; + } else { +#print "trace 6\n"; + $missing = $WinZip; + } + } + print "<- uncompress_archive_cmd\n" if $opt_v > 2; + if ($missing) { + die "Unable to expand $archive_file because external\n", + "utility '$missing' is not available.\n", + "Another possibility is to use the --extract-with option.\n"; + } else { + return $extract_cmd; + } +} +# 1}}} +sub read_list_file { # {{{1 + my ($file, ) = @_; + + print "-> read_list_file($file)\n" if $opt_v > 2; + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + my @entry = (); + while (<$IN>) { + next if /^\s*$/ or /^\s*#/; # skip empty or commented lines + s/\cM$//; # DOS to Unix + chomp; + push @entry, $_; + } + $IN->close; + + print "<- read_list_file\n" if $opt_v > 2; + return @entry; +} +# 1}}} +sub external_utility_exists { # {{{1 + my $exe = shift @_; + + my $success = 0; + if ($ON_WINDOWS) { + $success = 1 unless system $exe . ' > nul'; + } else { + $success = 1 unless system $exe . ' >/dev/null 2>&1'; + if (!$success) { + $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1'; + } + } + + return $success; +} # 1}}} +sub write_xsl_file { # {{{1 + my $OUT = new IO::File $CLOC_XSL, "w"; + if (!defined $OUT) { + warn "Unable to write $CLOC_XSL $!\n"; + return; + } + my $XSL = # {{{2 +' + + + + + + + CLOC Results + + + +

+'; +# 2}}} + + if ($opt_by_file) { + $XSL .= #
{{{2 +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + +
FileBlankCommentCodeLanguage3rd Generation EquivalentScale
Total
+
+'; +# 2}}} + } + + if (!$opt_by_file or $opt_by_file_by_lang) { + $XSL .= #
{{{2 +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + +
LanguageFilesBlankCommentCodeScale3rd Generation Equivalent
Total
+'; +# 2}}} + } + + $XSL.= <<'EO_XSL'; # {{{2 + + +
+
+ +EO_XSL +# 2}}} + + my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + CLOC Results + + + +

+EO_DIFF_XSL +# 2}}} + + if ($opt_by_file) { + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + + + + + + + + + + + + + + +
Same
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Modified
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Added
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Removed
FileBlankCommentCode
+EO_DIFF_XSL +# 2}}} + } + + if (!$opt_by_file or $opt_by_file_by_lang) { + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + + + + + + + + + + + + + + + + +
Same
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Modified
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Added
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Removed
LanguageFilesBlankCommentCode
+EO_DIFF_XSL +# 2}}} + + } + + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + +
+
+EO_DIFF_XSL +# 2}}} + if ($opt_diff) { + print $OUT $XSL_DIFF; + } else { + print $OUT $XSL; + } + $OUT->close(); +} # 1}}} +sub normalize_file_names { # {{{1 + my (@files, ) = @_; + + # Returns a hash of file names reduced to a canonical form + # (fully qualified file names, all path separators changed to /, + # Windows file names lowercased). Hash values are the original + # file name. + + my %normalized = (); + foreach my $F (@files) { + my $F_norm = $F; + if ($ON_WINDOWS) { + $F_norm = lc $F_norm; # for case insensitive file name comparisons + $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix + $F_norm =~ s{^\./}{}g; # remove leading ./ + if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) { + # looks like a relative path; prefix with cwd + $F_norm = lc "$cwd/$F_norm"; + } + } else { + $F_norm =~ s{^\./}{}g; # remove leading ./ + if ($F_norm !~ m{^/}) { + # looks like a relative path; prefix with cwd + $F_norm = lc "$cwd/$F_norm"; + } + } + $normalized{ $F_norm } = $F; + } + return %normalized; +} # 1}}} +sub combine_diffs { # {{{1 + # subroutine by Andy (awalshe@sf.net) + # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625 + my ($ra_files) = @_; + + my $res = "$URL v $VERSION\n"; + my $dl = '-'; + my $width = 79; + # columns are in this order + my @cols = ('files', 'blank', 'comment', 'code'); + my %HoH = (); + + foreach my $file (@{$ra_files}) { + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + + my $sec; + while (<$IN>) { + chomp; + s/\cM$//; + next if /^(http|Language|-----)/; + if (/^[A-Za-z0-9]+/) { # section title + $sec = $_; + chomp($sec); + $HoH{$sec} = () if ! exists $HoH{$sec}; + next; + } + + if (/^\s(same|modified|added|removed)/) { # calculated totals row + my @ar = grep { $_ ne '' } split(/ /, $_); + chomp(@ar); + my $ttl = shift @ar; + my $i = 0; + foreach(@ar) { + my $t = "${ttl}${dl}${cols[$i]}"; + $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t}; + $HoH{$sec}{$t} += $_; + $i++; + } + } + } + $IN->close; + } + + # rows are in this order + my @rows = ('same', 'modified', 'added', 'removed'); + + $res .= sprintf("%s\n", "-" x $width); + $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language', + $cols[0], $cols[1], $cols[2], $cols[3]); + $res .= sprintf("%s\n", "-" x $width); + + for my $sec ( keys %HoH ) { + next if $sec =~ /SUM:/; + $res .= "$sec\n"; + foreach (@rows) { + $res .= sprintf(" %-18s %14s %14s %14s %14s\n", + $_, $HoH{$sec}{"${_}${dl}${cols[0]}"}, + $HoH{$sec}{"${_}${dl}${cols[1]}"}, + $HoH{$sec}{"${_}${dl}${cols[2]}"}, + $HoH{$sec}{"${_}${dl}${cols[3]}"}); + } + } + $res .= sprintf("%s\n", "-" x $width); + my $sec = 'SUM:'; + $res .= "$sec\n"; + foreach (@rows) { + $res .= sprintf(" %-18s %14s %14s %14s %14s\n", + $_, $HoH{$sec}{"${_}${dl}${cols[0]}"}, + $HoH{$sec}{"${_}${dl}${cols[1]}"}, + $HoH{$sec}{"${_}${dl}${cols[2]}"}, + $HoH{$sec}{"${_}${dl}${cols[3]}"}); + } + $res .= sprintf("%s\n", "-" x $width); + + return $res; +} # 1}}} +sub get_time { # {{{1 + if ($HAVE_Time_HiRes) { + return Time::HiRes::time(); + } else { + return time(); + } +} # 1}}} +sub really_is_D { # {{{1 + # Ref bug 131, files ending with .d could be init.d scripts + # instead of D language source files. + my ($file , # in + $rh_Err , # in hash of error codes + $raa_errors , # out + ) = @_; + print "-> really_is_D($file)\n" if $opt_v > 2; + my $possible_script = peek_at_first_line($file, $rh_Err, $raa_errors); + + print "<- really_is_D($file)\n" if $opt_v > 2; + return $possible_script; # null string if D, otherwise a language +} # 1}}} +# subroutines copied from SLOCCount +my %lex_files = (); # really_is_lex() +my %expect_files = (); # really_is_expect() +my %php_files = (); # really_is_php() +sub really_is_lex { # {{{1 +# Given filename, returns TRUE if its contents really is lex. +# lex file must have "%%", "%{", and "%}". +# In theory, a lex file doesn't need "%{" and "%}", but in practice +# they all have them, and requiring them avoid mislabeling a +# non-lexfile as a lex file. + + my $filename = shift; + chomp($filename); + + my $is_lex = 0; # Value to determine. + my $percent_percent = 0; + my $percent_opencurly = 0; + my $percent_closecurly = 0; + + # Return cached result, if available: + if ($lex_files{$filename}) { return $lex_files{$filename};} + + open(LEX_FILE, "<$filename") || + die "Can't open $filename to determine if it's lex.\n"; + while() { + $percent_percent++ if (m/^\s*\%\%/); + $percent_opencurly++ if (m/^\s*\%\{/); + $percent_closecurly++ if (m/^\s*\%\}/); + } + close(LEX_FILE); + + if ($percent_percent && $percent_opencurly && $percent_closecurly) + {$is_lex = 1;} + + $lex_files{$filename} = $is_lex; # Store result in cache. + + return $is_lex; +} # 1}}} +sub really_is_expect { # {{{1 +# Given filename, returns TRUE if its contents really are Expect. +# Many "exp" files (such as in Apache and Mesa) are just "export" data, +# summarizing something else # (e.g., its interface). +# Sometimes (like in RPM) it's just misc. data. +# Thus, we need to look at the file to determine +# if it's really an "expect" file. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Expect _IF_ it: +# 1. has "load_lib" command and either "#" comments or {}. +# 2. {, }, and one of: proc, if, [...], expect + + my $is_expect = 0; # Value to determine. + + my $begin_brace = 0; # Lines that begin with curly braces. + my $end_brace = 0; # Lines that begin with curly braces. + my $load_lib = 0; # Lines with the Load_lib command. + my $found_proc = 0; + my $found_if = 0; + my $found_brackets = 0; + my $found_expect = 0; + my $found_pound = 0; + + # Return cached result, if available: + if ($expect_files{$filename}) { return expect_files{$filename};} + + open(EXPECT_FILE, "<$filename") || + die "Can't open $filename to determine if it's expect.\n"; + while() { + + if (m/#/) {$found_pound++; s/#.*//;} + if (m/^\s*\{/) { $begin_brace++;} + if (m/\{\s*$/) { $begin_brace++;} + if (m/^\s*\}/) { $end_brace++;} + if (m/\};?\s*$/) { $end_brace++;} + if (m/^\s*load_lib\s+\S/) { $load_lib++;} + if (m/^\s*proc\s/) { $found_proc++;} + if (m/^\s*if\s/) { $found_if++;} + if (m/\[.*\]/) { $found_brackets++;} + if (m/^\s*expect\s/) { $found_expect++;} + } + close(EXPECT_FILE); + + if ($load_lib && ($found_pound || ($begin_brace && $end_brace))) + {$is_expect = 1;} + if ( $begin_brace && $end_brace && + ($found_proc || $found_if || $found_brackets || $found_expect)) + {$is_expect = 1;} + + $expect_files{$filename} = $is_expect; # Store result in cache. + + return $is_expect; +} # 1}}} +sub really_is_pascal { # {{{1 +# Given filename, returns TRUE if its contents really are Pascal. + +# This isn't as obvious as it seems. +# Many ".p" files are Perl files +# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p), +# others are C extractions +# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p +# and some files in linuxconf). +# However, test files in "p2c" really are Pascal, for example. + +# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p +# is actually C code. The heuristics determine that they're not Pascal, +# but because it ends in ".p" it's not counted as C code either. +# I believe this is actually correct behavior, because frankly it +# looks like it's automatically generated (it's a bitmap expressed as code). +# Rather than guess otherwise, we don't include it in a list of +# source files. Let's face it, someone who creates C files ending in ".p" +# and expects them to be counted by default as C files in SLOCCount needs +# their head examined. I suggest examining their head +# with a sucker rod (see syslogd(8) for more on sucker rods). + +# This heuristic counts as Pascal such files such as: +# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p +# Which is hand-generated. We don't count woven documents now anyway, +# so this is justifiable. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Pascal _IF_ it has all of the following +# (ignoring {...} and (*...*) comments): +# 1. "^..program NAME" or "^..unit NAME", +# 2. "procedure", "function", "^..interface", or "^..implementation", +# 3. a "begin", and +# 4. it ends with "end.", +# +# Or it has all of the following: +# 1. "^..module NAME" and +# 2. it ends with "end.". +# +# Or it has all of the following: +# 1. "^..program NAME", +# 2. a "begin", and +# 3. it ends with "end.". +# +# The "end." requirements in particular filter out non-Pascal. +# +# Note (jgb): this does not detect Pascal main files in fpc, like +# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in +# it + + my $is_pascal = 0; # Value to determine. + + my $has_program = 0; + my $has_unit = 0; + my $has_module = 0; + my $has_procedure_or_function = 0; + my $found_begin = 0; + my $found_terminating_end = 0; + my $has_begin = 0; + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while() { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;} + if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;} + if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; } + if (m/\bfunction\b/i) { $has_procedure_or_function = 1; } + if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; } + if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; } + if (m/\bbegin\b/i) { $has_begin = 1; } + # Originally I said: + # "This heuristic fails if there are multi-line comments after + # "end."; I haven't seen that in real Pascal programs:" + # But jgb found there are a good quantity of them in Debian, specially in + # fpc (at the end of a lot of files there is a multiline comment + # with the changelog for the file). + # Therefore, assume Pascal if "end." appears anywhere in the file. + if (m/end\.\s*$/i) {$found_terminating_end = 1;} +# elsif (m/\S/) {$found_terminating_end = 0;} + } + close(PASCAL_FILE); + + # Okay, we've examined the entire file looking for clues; + # let's use those clues to determine if it's really Pascal: + + if ( ( ($has_unit || $has_program) && $has_procedure_or_function && + $has_begin && $found_terminating_end ) || + ( $has_module && $found_terminating_end ) || + ( $has_program && $has_begin && $found_terminating_end ) ) + {$is_pascal = 1;} + + return $is_pascal; +} # 1}}} +sub really_is_incpascal { # {{{1 +# Given filename, returns TRUE if its contents really are Pascal. +# For .inc files (mainly seen in fpc) + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it is Pascal if any of the following: +# 1. really_is_pascal returns true +# 2. Any usual reserved word is found (program, unit, const, begin...) + + # If the general routine for Pascal files works, we have it + if (really_is_pascal($filename)) { + return 1; + } + + my $is_pascal = 0; # Value to determine. + my $found_begin = 0; + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while() { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bprocedure\b/i) {$is_pascal = 1; } + if (m/\bfunction\b/i) {$is_pascal = 1; } + if (m/^\s*interface\s+/i) {$is_pascal = 1; } + if (m/^\s*implementation\s+/i) {$is_pascal = 1; } + if (m/\bconstant\s+/i) {$is_pascal=1;} + if (m/\bbegin\b/i) { $found_begin = 1; } + if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;} + if ($is_pascal) { + last; + } + } + + close(PASCAL_FILE); + return $is_pascal; +} # 1}}} +sub really_is_php { # {{{1 +# Given filename, returns TRUE if its contents really is php. + + my $filename = shift; + chomp($filename); + + my $is_php = 0; # Value to determine. + # Need to find a matching pair of surrounds, with ending after beginning: + my $normal_surround = 0; # + my $script_surround = 0; # ; bit 0 =