diff --git a/Counting Rules/counting_standards_ada.pdf b/Counting Rules/counting_standards_ada.pdf new file mode 100644 index 0000000..79323f3 Binary files /dev/null and b/Counting Rules/counting_standards_ada.pdf differ diff --git a/Counting Rules/counting_standards_bash.pdf b/Counting Rules/counting_standards_bash.pdf new file mode 100644 index 0000000..7f4bb6e Binary files /dev/null and b/Counting Rules/counting_standards_bash.pdf differ diff --git a/Counting Rules/counting_standards_c_c.pdf b/Counting Rules/counting_standards_c_c.pdf new file mode 100644 index 0000000..0367bc8 Binary files /dev/null and b/Counting Rules/counting_standards_c_c.pdf differ diff --git a/Counting Rules/counting_standards_cfscript.pdf b/Counting Rules/counting_standards_cfscript.pdf new file mode 100644 index 0000000..e44e398 Binary files /dev/null and b/Counting Rules/counting_standards_cfscript.pdf differ diff --git a/Counting Rules/counting_standards_coldfusion.pdf b/Counting Rules/counting_standards_coldfusion.pdf new file mode 100644 index 0000000..ac88ba4 Binary files /dev/null and b/Counting Rules/counting_standards_coldfusion.pdf differ diff --git a/Counting Rules/counting_standards_cs.pdf b/Counting Rules/counting_standards_cs.pdf new file mode 100644 index 0000000..26ecee6 Binary files /dev/null and b/Counting Rules/counting_standards_cs.pdf differ diff --git a/Counting Rules/counting_standards_csh.pdf b/Counting Rules/counting_standards_csh.pdf new file mode 100644 index 0000000..5695646 Binary files /dev/null and b/Counting Rules/counting_standards_csh.pdf differ diff --git a/Counting Rules/counting_standards_css.pdf b/Counting Rules/counting_standards_css.pdf new file mode 100644 index 0000000..6f38212 Binary files /dev/null and b/Counting Rules/counting_standards_css.pdf differ diff --git a/Counting Rules/counting_standards_fortran.pdf b/Counting Rules/counting_standards_fortran.pdf new file mode 100644 index 0000000..e38ce11 Binary files /dev/null and b/Counting Rules/counting_standards_fortran.pdf differ diff --git a/Counting Rules/counting_standards_html_xml.pdf b/Counting Rules/counting_standards_html_xml.pdf new file mode 100644 index 0000000..51a4057 Binary files /dev/null and b/Counting Rules/counting_standards_html_xml.pdf differ diff --git a/Counting Rules/counting_standards_java.pdf b/Counting Rules/counting_standards_java.pdf new file mode 100644 index 0000000..0a47d3a Binary files /dev/null and b/Counting Rules/counting_standards_java.pdf differ diff --git a/Counting Rules/counting_standards_makefile.pdf b/Counting Rules/counting_standards_makefile.pdf new file mode 100644 index 0000000..8692d4e Binary files /dev/null and b/Counting Rules/counting_standards_makefile.pdf differ diff --git a/Counting Rules/counting_standards_matlab.pdf b/Counting Rules/counting_standards_matlab.pdf new file mode 100644 index 0000000..3880d32 Binary files /dev/null and b/Counting Rules/counting_standards_matlab.pdf differ diff --git a/Counting Rules/counting_standards_pascal.pdf b/Counting Rules/counting_standards_pascal.pdf new file mode 100644 index 0000000..285bdd5 Binary files /dev/null and b/Counting Rules/counting_standards_pascal.pdf differ diff --git a/Counting Rules/counting_standards_perl.pdf b/Counting Rules/counting_standards_perl.pdf new file mode 100644 index 0000000..06bdcbe Binary files /dev/null and b/Counting Rules/counting_standards_perl.pdf differ diff --git a/Counting Rules/counting_standards_php.pdf b/Counting Rules/counting_standards_php.pdf new file mode 100644 index 0000000..474e1d9 Binary files /dev/null and b/Counting Rules/counting_standards_php.pdf differ diff --git a/Counting Rules/counting_standards_python.pdf b/Counting Rules/counting_standards_python.pdf new file mode 100644 index 0000000..9f0de07 Binary files /dev/null and b/Counting Rules/counting_standards_python.pdf differ diff --git a/Counting Rules/counting_standards_ruby.pdf b/Counting Rules/counting_standards_ruby.pdf new file mode 100644 index 0000000..f8a3350 Binary files /dev/null and b/Counting Rules/counting_standards_ruby.pdf differ diff --git a/Counting Rules/counting_standards_sql.pdf b/Counting Rules/counting_standards_sql.pdf new file mode 100644 index 0000000..8e481b4 Binary files /dev/null and b/Counting Rules/counting_standards_sql.pdf differ diff --git a/Counting Rules/counting_standards_vb.pdf b/Counting Rules/counting_standards_vb.pdf new file mode 100644 index 0000000..e72a838 Binary files /dev/null and b/Counting Rules/counting_standards_vb.pdf differ diff --git a/Counting Rules/counting_standards_verilog.pdf b/Counting Rules/counting_standards_verilog.pdf new file mode 100644 index 0000000..e9dec6b Binary files /dev/null and b/Counting Rules/counting_standards_verilog.pdf differ diff --git a/Counting Rules/counting_standards_vhdl.pdf b/Counting Rules/counting_standards_vhdl.pdf new file mode 100644 index 0000000..ff3a186 Binary files /dev/null and b/Counting Rules/counting_standards_vhdl.pdf differ diff --git a/Counting Rules/counting_standards_x_midas.pdf b/Counting Rules/counting_standards_x_midas.pdf new file mode 100644 index 0000000..68c7501 Binary files /dev/null and b/Counting Rules/counting_standards_x_midas.pdf differ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a1c98d6 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +all: + mkdir -p bin + g++ -Wall -o bin/UCC -DUNIX src/*.cpp + +clean: + rm -f bin/UCC bin/UCC.exe + rmdir --ignore-fail-on-non-empty bin diff --git a/Read_Me.txt b/Read_Me.txt new file mode 100644 index 0000000..fcffd2d --- /dev/null +++ b/Read_Me.txt @@ -0,0 +1,69 @@ +Read_Me.txt for Unified Code Count 2013_04 with Threading and other improvements +June 6, 2015 + +Files found here + +Counting Rules documents - 1 doc per language parser group + 23 files +src sources needed to build UCC Thread version + 90 files with 30 different language parsers + +license.txt original from UCC +Makefile original from UCC +Read_Me.txt the file you are reading now... +UCC.2008.vcproj original from UCC +UCC.2010.vcxproj original from UCC +UCC.2010.vcxproj.filters original from UCC +UCC.2012.vcxproj original from UCC +UCC.2012.vcxproj.filters original from UCC +UCC_CA_Profile_DIFF_No_DUP_0.png screen shot of AMD CodeAnalyst profiler in use +UCC_CA_Profile_DIFF_No_DUP_Details.txt profiling/improving Differencing procedures +UCC_CA_Profile_DUP_No_DIFF_Details.txt profiling/improving Duplication checking procedures +UCC_Multithreading_Notes.doc text and screen shots giving some background during development +UCC_Release_Notes_Threads_2013_04.txt descriptions of changes and some tips (strongly suggested reading) +UCC_release_notes_v.2013.04.pdf original Release notes +UCC_user_manual_v.2013.04.pdf original User manual + +For all else: +You can Register and get the original UCC 2013_04 distribution including docs at +http://csse.usc.edu/ucc_wp/ + + License: +I adhere to terms as stated in the license.txt file in this directory from USC. +I have no other terms. +The software changes are freely available and with the same limitations. +I have no other license file for anyone to mull over. + + Acknowledgements: + +To: Center for Systems and Software Engineering + University of Southern California + and + All those elsewhere who have contributed. + Thanks to all the people past and present that built UCC. + I am happy to give back to a tool I have used to find complexity nuggets. + +To: Boost C++ libraries creators, contributors and maintainers. + I am humbled by the insights gained from using Boost. + Thanks so much. + +To: Jeff Preshing for contributing a cross platform Semaphore library. + The semaphore library sema.h was an excellent fit + after a slight edit to not require C++ 2011 features. + Enjoyed reading your web site and the analogies and illustrations. + Thank you. + +To: AMD CodeAnalyst team and whatever you are working on now. + Thanks for building such an excellent profiler that made my work much easier. + +To: KDiff3 team for building such an effective visual difference utility. + This tool literally saved me hours of time that I would have needed + checking all the various UCC output options trying to use other methods. + Thanks for a best in class visual differencer. + +As for me, +I tried to leave some sensible comments along with the working code... +Hopefully these changes will be merged into the original UCC baseline future versions. + +Enjoy! +Randy Maxwell \ No newline at end of file diff --git a/UCC.2008.vcproj b/UCC.2008.vcproj new file mode 100644 index 0000000..f7a9056 --- /dev/null +++ b/UCC.2008.vcproj @@ -0,0 +1,512 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/UCC.2010.vcxproj b/UCC.2010.vcxproj new file mode 100644 index 0000000..9a4767b --- /dev/null +++ b/UCC.2010.vcxproj @@ -0,0 +1,179 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + UCC + {FA47D54A-CAC6-4A69-B2CB-F3C4F9C37B0B} + Win32Proj + + + + Application + MultiByte + + + Application + MultiByte + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + bin\ + bin/Debug\ + true + bin\ + bin/Release\ + false + + + + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + + + Level3 + EditAndContinue + 4996;%(DisableSpecificWarnings) + + + $(OutDir)UCC-Debug.exe + true + $(OutDir)UCC.pdb + Console + false + + + MachineX86 + + + + + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Level3 + ProgramDatabase + + + $(OutDir)UCC.exe + true + Console + true + true + false + + + MachineX86 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/UCC.2010.vcxproj.filters b/UCC.2010.vcxproj.filters new file mode 100644 index 0000000..cc1bd5c --- /dev/null +++ b/UCC.2010.vcxproj.filters @@ -0,0 +1,258 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/UCC.2012.vcxproj b/UCC.2012.vcxproj new file mode 100644 index 0000000..f1f2533 --- /dev/null +++ b/UCC.2012.vcxproj @@ -0,0 +1,181 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + UCC + {FA47D54A-CAC6-4A69-B2CB-F3C4F9C37B0B} + Win32Proj + + + + Application + MultiByte + v110 + + + Application + MultiByte + v110 + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + bin\ + bin/Debug\ + true + bin\ + bin/Release\ + false + + + + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + + + Level3 + EditAndContinue + 4996;%(DisableSpecificWarnings) + + + $(OutDir)UCC-Debug.exe + true + $(OutDir)UCC.pdb + Console + false + + + MachineX86 + + + + + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Level3 + ProgramDatabase + + + $(OutDir)UCC.exe + true + Console + true + true + false + + + MachineX86 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/UCC.2012.vcxproj.filters b/UCC.2012.vcxproj.filters new file mode 100644 index 0000000..cc1bd5c --- /dev/null +++ b/UCC.2012.vcxproj.filters @@ -0,0 +1,258 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/UCC_CA_Profile_DIFF_No_DUP_0.png b/UCC_CA_Profile_DIFF_No_DUP_0.png new file mode 100644 index 0000000..8fe9395 Binary files /dev/null and b/UCC_CA_Profile_DIFF_No_DUP_0.png differ diff --git a/UCC_CA_Profile_DIFF_No_DUP_Details.txt b/UCC_CA_Profile_DIFF_No_DUP_Details.txt new file mode 100644 index 0000000..5fe523b --- /dev/null +++ b/UCC_CA_Profile_DIFF_No_DUP_Details.txt @@ -0,0 +1,550 @@ +Below is a capture of using a profiler to do small optimizations of UCC Differencing code + +Code with interleaved Times captured by AMD CodeAnalyst in this case are shown. + +Just a few examples of what a decent profiler can help with. + +Text capture of Details of using AMD CodeAnalyst Timing sampling (current time based profile) + on RELEASE (Fully Optimized) Build of UCC (Debug symbols & info also done to support CodeAnalyst) + +Visual C++ 2010 Express making 32 bit Windows UCC.exe and run on 64 bit Windows 7.1 OS using + O2 + W4 + optimize for speed + Whole program optimization at Link time + MT + +The profile used a statistical Time sampling approach +Operations in the profile included the Time of + <2 extra worker Threads on 2 CPU AMD> +Read, +Analyze, Count keywords, + +do Complexity metrics +and do Differencing with NO Duplicate checks +and finally produce output files + +UCC.exe -nodup -d -threads 2 +-dir "C:\C++\boost_1_48_0\tools" + "C:\C++\boost_1_58_0\tools" +-outdir "C:\TEST\UCC\Files_OUT" -ascii + + 783 files processed in boost_1_48_0\tools (baseline A) + 749 files processed in boost_1_58_0\tools (baseline B) +1532 files total + +Partial capture of overall Times given as percent of total time used by UCC. Clipped to show highest 93.47% of UCC Time used. +================================================================================================================================= +CS:EIP Symbol + Offset Timer samples +0x439080 CmpMngr::SimilarLine >>>> 35.74 <<<< +0x4b8f80 memchr 19.15 +0x418530 std::basic_string,std::allocator >::operator[] 7.4 +0x4058e0 std::basic_string,std::allocator >::find 4.04 +0x405760 std::basic_string,std::allocator >::assign 2.34 +0x4bc0b0 memcpy 2.28 +0x4564e0 CUtil::CountTally >>>> 2.28 +0x401040 std::char_traits::compare 2.19 +0x456060 CUtil::ToLower >>>> 1.5 +0x405d50 std::basic_string,std::allocator >::_Copy 1.44 +0x418820 std::basic_streambuf >::snextc 0.95 +0x457ab0 CUtil::ClearRedundantSpaces >>>> 0.91 +0x406160 std::operator+,std::allocator > 0.82 +0x4056b0 std::basic_string,std::allocator >::append 0.77 +0x406540 std::basic_string,std::allocator >::append 0.75 +0x41bf80 std::getline,std::allocator > 0.73 +0x4bd68d malloc 0.72 +0x457c50 CUtil::ReplaceSmartQuotes 0.72 +0x405c50 std::basic_string,std::allocator >::assign 0.72 +0x4562c0 CUtil::FindKeyword 0.71 +0x405b60 std::basic_string,std::allocator >::append 0.64 +0x4114e0 CCJavaCsCounter::LSLOC 0.6 +0x47b2e0 DiffTool::CompareFilePaths 0.49 +0x405640 std::basic_string,std::allocator >::assign 0.46 +0x458d60 std::basic_string,std::allocator >::find_first_not_of 0.43 +0x4c913b _read_nolock 0.33 +0x4b9205 free 0.29 +0x4b9089 operator new 0.29 +0x4b902d operator delete 0.29 +0x405990 std::basic_string,std::allocator >::_Chassign 0.29 +0x4bc23a NO SYMBOL 0.27 +0x4ba4e6 __from_strstr_to_strchr 0.26 +0x4b8c10 memmove 0.26 +0x455900 CTagCounter::CountTagTally 0.26 +0x40aa10 std::_Tree,std::allocator > 0.26 +0x4bc164 NO SYMBOL 0.22 +0x438520 CmpMngr::FindModifiedLines 0.22 +0x416fd0 CCodeCounter::CountComplexity 0.22 +0x454ff0 CTagCounter::LSLOC 0.21 +0x41b590 std::operator+,std::allocator > 0.21 +0x4bc25a NO SYMBOL 0.2 +0x4bc910 memset 0.18 +0x455e80 CUtil::TrimString 0.16 +0x4bc246 NO SYMBOL 0.15 +0x44b510 CPythonCounter::LSLOC 0.15 + +45 functions, 94 instructions, Total: 12689 samples, 93.47% of shown samples (don't care about % of other session samples) + +The below are the most "approachable" for optimization changes. +CmpMngr::SimilarLine +CUtil::CountTally +CUtil::ToLower +CUtil::ClearRedundantSpaces + SimilarLine is the clear candidate for another look. + +================================================================================================================================= + + Address Line Source Timer samples 35.74 % of TOTAL UCC.exe run time + 281 bool CmpMngr::SimilarLine( const string &baseLine, int *x1, const string &compareLine, int *x2 ) + 0x439080 282 { + 283 // Profiling this shows it was called 135,592 times when 12,553 Files were paired for Differencing + 284 // Due to being called from an inner LOOP of another inner LOOP (see FindModifiedLines) + 285 // Cmd line: split for readability + 286 // -threads 2 -nodup -d + 287 // -dir "C:\Linux\Stable_3_11_6\linux-3.11.6\arch" + 288 // "C:\Linux\linux_3_13_4\arch" + 289 // -outdir "C:\TEST\UCC\Files_OUT" -ascii + 290 // + 291 // 2 changes: Use C style int arrays instead of more general (and slower) std vector container class + 292 // Moved allocation/free of work buffers up to Caller level to prevent memory alloc/free thrashing here + 293 // + 294 bool retVal = false; + 295 int m, n, i, j, k; + 296 double LCSlen; + 0x439086 297 m = (int)baseLine.size(); + 0x439089 298 n = (int)compareLine.size(); + 299 + 300 // Commented out and replaced with C style array passed from Caller + 301 // vector x1, x2; + 302 // x1.resize(m + 1, 0); + 303 // x2.resize(m + 1, 0); + 0x439090 304 memset( x1, 0, (m + 1) * sizeof( int ) ); + 0x4390ae 305 memset( x2, 0, (m + 1) * sizeof( int ) ); + 306 + 307 // compute length of LCS + 308 // - no need to use CBitMatrix + 0x4390ba 309 for (j = n - 1; j >= 0; j--) 0.13 + 310 { + 0x4390c6 311 for (k = 0; k <= m; k++) 0.08 + 312 { + 0x4390d4 313 x2[k] = x1[k]; 2.31 + 0x4390ca 314 x1[k] = 0; 4.69 + 315 } + 0x4390e5 316 for (i = m - 1; i >= 0; i--) 4.38 + 317 { + 0x4390f7 318 if (baseLine[i] == compareLine[j]) 15.44 + 319 { + 0x439114 320 x1[i] = 1 + x2[i+1]; 1.84 + 321 } + 0x43911e 322 else 0.12 + 323 { + 0x439120 324 if (x1[i+1] > x2[i]) 5.69 + 325 { + 0x4390ec 326 x1[i] = x1[i+1]; 0.32 + 327 } + 0x43912f 328 else 0.18 + 329 { + 0x439131 330 x1[i] = x2[i]; 0.49 + 331 } + 332 } + 333 } + 334 } + 0x439144 335 LCSlen = x1[0]; + 336 if ((LCSlen / (double)m * 100 >= MATCH_THRESHOLD) && + 0x439146 337 (LCSlen / (double)n * 100 >= MATCH_THRESHOLD)) 0.07 + 0x439173 338 retVal = true; + 339 + 340 return retVal; + 0x439175 341 } 0.01 + +61 lines, 0 instructions, Summary: 4852 samples, 35.74% of shown samples, (don't care about 12.28% of total samples which includes all other running processes) + +================================================================================================================================= + + Timing Interpretation: First inner loop + +3 LOOPs 1 outer with 2 separate inner LOOPs (extremely low overhead for outer loop so not discussed below) + + First inner loop just over 7 % of TOTAL run time + + for (k = 0; k <= m; k++) 0.08 + { + x2[k] = x1[k]; 2.31 + x1[k] = 0; 4.69 + } + +Using CodeAnalyst to show the Code Bytes (like AMD or Intel ASM in this case) + + Address Line Source Code Bytes Timer samples + 307 // compute length of LCS + 308 // - no need to use CBitMatrix + 0x4390ba 309 for (j = n - 1; j >= 0; j--) 0.13 + 0x4390ba mov eax,[ebp-10h] 8B 45 F0 0.01 + 0x4390bd add esp,18h 83 C4 18 + 0x4390c0 dec eax 48 + 0x4390c1 mov [ebp-04h],eax 89 45 FC + 0x4390c4 js $+80h (0x439144) 78 7E + ----- break ----- + 0x43913f dec dword [ebp-04h] FF 4D FC + 0x439142 jns $-7ch (0x1004390c6) 79 82 0.12 + 310 { + 0x4390c6 311 for (k = 0; k <= m; k++) 0.08 + 0x4390c6 test esi,esi 85 F6 0.06 + 0x4390c8 js $+1dh (0x4390e5) 78 1B 0.02 + 312 { + 0x4390d4 313 x2[k] = x1[k]; 2.31 + 0x4390d4 mov edi,[eax] 8B 38 + 0x4390d6 mov [ecx+eax],edi 89 3C 01 2.31 + 0x4390ca 314 x1[k] = 0; 4.69 + 0x4390ca mov ecx,[ebp+18h] 8B 4D 18 + 0x4390cd mov eax,ebx 8B C3 0.01 + 0x4390cf sub ecx,ebx 2B CB 0.06 + 0x4390d1 lea edx,[esi+01h] 8D 56 01 + ----- break ----- + 0x4390d9 mov [eax],00000000h C7 00 00 00 00 00 0.27 + 0x4390df add eax,04h 83 C0 04 1.84 + 0x4390e2 dec edx 4A 2.5 + 0x4390e3 jnz $-0fh (0x1004390d4) 75 EF + 315 } + +9 lines, 21 instructions, Summary: 978 samples, 7.20% of shown samples + + Referring to Lines 311 to 315 +Those familar with Intel ASM will see that the use of CPU registers could be better. +ECX is not used effectively as a counter +The suprise to me is the time needed to just Zero x1[k] at over 4 1/2 percent of TOTAL runtime ! + + Comment out loop and replace with + memcpy( x2, x1, ( m + 1 ) * sizeof( int ) ); + memset( x1, 0, ( m + 1 ) * sizeof( int ) ); + + Which does the exact same logic using optimized C library code. + +================================================================================================================================= + + Timing Interpretation: Second inner loop + + Second inner loop 28.46 % of TOTAL run time + + for (i = m - 1; i >= 0; i--) 4.38 + { + if (baseLine[i] == compareLine[j]) 15.44 + { + x1[i] = 1 + x2[i+1]; 1.84 + } + else 0.12 + { + if (x1[i+1] > x2[i]) 5.69 + { + x1[i] = x1[i+1]; 0.32 + } + else 0.18 + { + x1[i] = x2[i]; 0.49 + } + } + } + + Address Line Source Code Bytes Timer samples + 0x4390e5 316 for (i = m - 1; i >= 0; i--) 4.38 + 0x4390e5 lea edi,[esi-01h] 8D 7E FF + 0x4390e8 test edi,edi 85 FF 0.49 + 0x4390ea js $+55h (0x43913f) 78 53 0.05 + ----- break ----- + 0x439133 sub esi,04h 83 EE 04 2.2 + 0x439136 dec edi 4F 0.57 + 0x439137 jns $-40h (0x1004390f7) 79 BE 0.65 + 0x439139 mov ebx,[ebp+10h] 8B 5D 10 0.03 + 0x43913c mov esi,[ebp-0ch] 8B 75 F4 0.38 + 317 { + 0x4390f7 318 if (baseLine[i] == compareLine[j]) 15.44 + 0x4390f7 mov ebx,[ebp+0ch] 8B 5D 0C 1.22 + 0x4390fa cmp [ebx+14h],10h 83 7B 14 10 1.16 + 0x4390fe jb $+04h (0x439102) 72 02 1.25 + 0x439100 mov ebx,[ebx] 8B 1B 1.08 + 0x439102 mov ecx,[ebp-04h] 8B 4D FC 1.07 + 0x439105 mov eax,[ebp+14h] 8B 45 14 0.69 + 0x439108 call $-00020bd8h (0x100418530) E8 23 F4 FD FF 0.59 + 0x43910d mov cl,[ebx+edi] 8A 0C 3B 3.03 + 0x439110 cmp cl,[eax] 3A 08 1.55 + 0x439112 jnz $+0eh (0x439120) 75 0C 3.79 + 319 { + 0x439114 320 x1[i] = 1 + x2[i+1]; 1.84 + 0x439114 mov edx,[ebp-08h] 8B 55 F8 0.1 + 0x439117 mov eax,[esi+edx+04h] 8B 44 16 04 1.31 + 0x43911b inc eax 40 0.29 + 0x43911c mov [esi],eax 89 06 0.14 + 321 } + 0x43911e 322 else 0.12 + 0x43911e jmp $+15h (0x439133) EB 13 0.12 + 323 { + 0x439120 324 if (x1[i+1] > x2[i]) 5.69 + 0x439120 mov ecx,[ebp-08h] 8B 4D F8 2.24 + 0x439123 mov eax,[esi+04h] 8B 46 04 0.94 + 0x439126 mov ecx,[esi+ecx] 8B 0C 0E 0.86 + 0x439129 cmp eax,ecx 3B C1 0.68 + 0x43912b jle $+06h (0x439131) 7E 04 0.97 + 325 { + 0x4390ec 326 x1[i] = x1[i+1]; 0.32 + 0x4390ec mov eax,[ebp+18h] 8B 45 18 0.12 + 0x4390ef sub eax,ebx 2B C3 0.06 + 0x4390f1 lea esi,[ebx+edi*4] 8D 34 BB + 0x4390f4 mov [ebp-08h],eax 89 45 F8 + ----- break ----- + 0x43912d mov [esi],eax 89 06 0.14 + 327 } + 0x43912f 328 else 0.18 + 0x43912f jmp $+04h (0x439133) EB 02 0.18 + 329 { + 0x439131 330 x1[i] = x2[i]; 0.49 + 0x439131 mov [esi],ecx 89 0E 0.49 + 331 } + 332 } + 333 } + 334 } + +19 lines, 37 instructions, Summary: 3863 samples, 28.46% of shown samples + +2 Approaches: 1 easy, other requires notes during Debug session of the inner loop and some thought (sometime) + +EASY: compareLine[j] does NOT change within the loop so new Second inner loop is: + + cmp_j = compareLine[ j ]; + + for ( i = m - 1; i >= 0; i-- ) + { + if ( baseLine[i] == cmp_j ) // Should be faster. Need to profile changes now... see below + { + x1[i] = 1 + x2[i+1]; + } + else + { + if (x1[i+1] > x2[i]) + { + x1[i] = x1[i+1]; + } + else + { + x1[i] = x2[i]; + } + } + } + + New Profiler run with above changes applied. Compared with run at top of this file. + +Partial capture of overall Times given as percent of total time used by UCC. Clipped to show highest 90.70% of UCC Time used. +================================================================================================================================= +CS:EIP Symbol + Offset Timer samples +0x439090 CmpMngr::SimilarLine 30.92 was 35.74 +0x4b8f60 memchr 20.81 was 19.15 +0x4bc090 memcpy 4.74 was 2.28 +0x4058e0 std::basic_string,std::allocator >::find 4.39 was 4.04 +0x455e20 CUtil::CountTally 2.86 was 2.28 +0x401040 std::char_traits::compare 2.52 +0x405760 std::basic_string,std::allocator >::assign 2.45 +0x4bf0e7 _VEC_memcpy 1.96 +0x405d50 std::basic_string,std::allocator >::_Copy 1.76 +0x4c553d _VEC_memzero 1.71 +0x4bc8f0 memset 1.39 +0x4559a0 CUtil::ToLower 1.38 +0x418e20 std::basic_streambuf >::snextc 1.06 +0x4573f0 CUtil::ClearRedundantSpaces 1.02 +0x4056b0 std::basic_string,std::allocator >::append 0.86 +0x41bea0 std::getline,std::allocator > 0.86 +0x405c50 std::basic_string,std::allocator >::assign 0.84 +0x4061c0 std::operator+,std::allocator > 0.82 +0x457590 CUtil::ReplaceSmartQuotes 0.81 +0x4065a0 std::basic_string,std::allocator >::append 0.76 +0x411af0 CCJavaCsCounter::LSLOC 0.74 +0x4bd66d malloc 0.74 +0x405640 std::basic_string,std::allocator >::assign 0.73 +0x47b0d0 DiffTool::CompareFilePaths 0.7 +0x405b60 std::basic_string,std::allocator >::append 0.61 +0x455c00 CUtil::FindKeyword 0.51 +0x4c90eb _read_nolock 0.4 +0x4b900d operator delete 0.38 +0x405990 std::basic_string,std::allocator >::_Chassign 0.37 +0x4b8bf0 memmove 0.34 +0x4b9069 operator new 0.34 +0x4b91e5 free 0.31 +0x455240 CTagCounter::CountTagTally 0.3 +0x4ba4c6 __from_strstr_to_strchr 0.3 + +34 functions, 34 instructions, Total: 9627 samples, 90.70% of shown samples + +Because the percent used by SimilarLine decreased the percentages for other procedures will increase +but the overall runtime is still lower. + +Along with making SimilarLine faster the other benefit is the 3 now missing entries between memchr and memcpy +0x418530 std::basic_string,std::allocator >::operator[] 7.4 +0x4058e0 std::basic_string,std::allocator >::find 4.04 +0x405760 std::basic_string,std::allocator >::assign 2.34 + +Getting rid of so many uses of operator[] is a good thing! along with find() and assign() + +================================================================================================================================= + Address Line Source Code Bytes Timer samples 30.92 % of TOTAL UCC.exe run time + 0x4390cf 311 for (j = n - 1; j >= 0; j--) 0.5 + 312 { + 313 // Left as example of previous code optimized for speed + 314 // for ( k = 0; k <= m; k++ ) + 315 // { + 316 // x2[ k ] = x1[ k ]; + 317 // x1[ k ] = 0; + 318 // } + 0x4390e0 319 memcpy( x2, x1, ( m + 1 ) * sizeof( int ) ); 0.21 <<< BIG change here. About 1/3 of a percent + 0x4390ee 320 memset( x1, 0, ( m + 1 ) * sizeof( int ) ); 0.13 <<< instead of 7 % of Total time + 321 + 0x4390f7 322 cmp_j = compareLine[ j ]; 0.24 <<< ADDED 1/4 percent overhead to TOTAL runtime + 323 + 0x439105 324 for ( i = m - 1; i >= 0; i-- ) 4.45 + 325 { + 0x43911a 326 if ( baseLine[i] == cmp_j ) 14.24 <<< 1.2 % improvement. Was 15.44 % + 327 { <<< Including overhead above slightly under 1 % of TOTAL time improvement + 0x43912d 328 x1[i] = 1 + x2[i+1]; 1.84 + 329 } + 0x439134 330 else + 331 { + 0x439136 332 if (x1[i+1] > x2[i]) 6.1 + 333 { + 0x439112 334 x1[i] = x1[i+1]; 0.62 + 335 } + 0x439142 336 else 0.26 + 337 { + 0x439144 338 x1[i] = x2[i]; 2.2 + 339 } + 340 } + 341 } + 342 } + 0x439155 343 LCSlen = x1[0]; + 344 if ((LCSlen / (double)m * 100 >= MATCH_THRESHOLD) && + 0x439157 345 (LCSlen / (double)n * 100 >= MATCH_THRESHOLD)) 0.1 + 0x439184 346 retVal = true; + 347 + 348 return retVal; + 0x439186 349 } 0.01 + +39 lines, 0 instructions, Summary: 3278 samples, 30.88% of shown samples + + +================================================================================================================================= + + Details of what works better now + + memcpy( x2, x1, ( m + 1 ) * sizeof( int ) ); + memset( x1, 0, ( m + 1 ) * sizeof( int ) ); + + Address Line Source Code Bytes Timer samples + 0x4390e0 319 memcpy( x2, x1, ( m + 1 ) * sizeof( int ) ); 0.21 + 0x4390e0 mov esi,[ebp-08h] 8B 75 F8 0.07 + 0x4390e3 mov ecx,[ebp+14h] 8B 4D 14 0.08 + 0x4390e6 push esi 56 + 0x4390e7 push ebx 53 + 0x4390e8 push ecx 51 0.07 + 0x4390e9 call $+00082fa7h (0x4bc090) E8 A2 2F 08 00 + 0x4390ee 320 memset( x1, 0, ( m + 1 ) * sizeof( int ) ); 0.13 + 0x4390ee push esi 56 0.05 + 0x4390ef push byte 00h 6A 00 0.01 + 0x4390f1 push ebx 53 0.08 + 0x4390f2 call $+000837feh (0x4bc8f0) E8 F9 37 08 00 + +2 lines, 10 instructions, Summary: 36 samples, 0.34% of shown samples + + if ( baseLine[i] == cmp_j ) NEW version + Address Line Source Code Bytes Timer samples + 0x43911a 326 if ( baseLine[i] == cmp_j ) 14.24 + 0x43911a mov esi,[ebp+0ch] 8B 75 0C 1.47 + 0x43911d cmp [esi+14h],10h 83 7E 14 10 2.89 + 0x439121 jb $+04h (0x439125) 72 02 3.42 + 0x439123 mov esi,[esi] 8B 36 0.34 + 0x439125 mov dl,[ebp-01h] 8A 55 FF 2 + 0x439128 cmp [esi+ecx],dl 38 14 0E 1.31 << BYTE (char size) compare + 0x43912b jnz $+0bh (0x439136) 75 09 2.81 + + if (baseLine[i] == compareLine[j]) OLD version + Address Line Source Code Bytes Timer samples + 0x4390f7 318 if (baseLine[i] == compareLine[j]) 15.44 + 0x4390f7 mov ebx,[ebp+0ch] 8B 5D 0C 1.22 + 0x4390fa cmp [ebx+14h],10h 83 7B 14 10 1.16 + 0x4390fe jb $+04h (0x439102) 72 02 1.25 + 0x439100 mov ebx,[ebx] 8B 1B 1.08 + 0x439102 mov ecx,[ebp-04h] 8B 4D FC 1.07 + 0x439105 mov eax,[ebp+14h] 8B 45 14 0.69 + 0x439108 call $-00020bd8h (0x100418530) E8 23 F4 FD FF 0.59 << used to be Call to library code before compare + 0x43910d mov cl,[ebx+edi] 8A 0C 3B 3.03 + 0x439110 cmp cl,[eax] 3A 08 1.55 << BYTE compare + 0x439112 jnz $+0eh (0x439120) 75 0C 3.79 + + Details of Second inner loop. This is a good candidate for more optimizations... + + Address Line Source Code Bytes Timer samples + 0x439105 324 for ( i = m - 1; i >= 0; i-- ) 4.45 + 0x439105 mov ecx,[ebp-10h] 8B 4D F0 0.03 + 0x439108 mov dl,[eax+edi] 8A 14 38 0.01 + 0x43910b mov [ebp-01h],dl 88 55 FF 0.06 + 0x43910e test ecx,ecx 85 C9 0.05 + 0x439110 js $+3fh (0x43914f) 78 3D 0.08 + ----- break ----- + 0x439146 sub eax,04h 83 E8 04 2.59 + 0x439149 dec ecx 49 1.18 + 0x43914a jns $-30h (0x10043911a) 79 CE 0.44 + 0x43914c mov edi,[ebp-0ch] 8B 7D F4 0.02 + 325 { + 0x43911a 326 if ( baseLine[i] == cmp_j ) 14.24 + 0x43911a mov esi,[ebp+0ch] 8B 75 0C 1.47 + 0x43911d cmp [esi+14h],10h 83 7E 14 10 2.89 + 0x439121 jb $+04h (0x439125) 72 02 3.42 + 0x439123 mov esi,[esi] 8B 36 0.34 + 0x439125 mov dl,[ebp-01h] 8A 55 FF 2 + 0x439128 cmp [esi+ecx],dl 38 14 0E 1.31 + 0x43912b jnz $+0bh (0x439136) 75 09 2.81 + 327 { + 0x43912d 328 x1[i] = 1 + x2[i+1]; 1.84 + 0x43912d mov edx,[edi+eax+04h] 8B 54 07 04 0.13 + 0x439131 inc edx 42 1.52 + 0x439132 mov [eax],edx 89 10 0.19 + 329 } + 0x439134 330 else + 0x439134 jmp $+12h (0x439146) EB 10 + 331 { + 0x439136 332 if (x1[i+1] > x2[i]) 6.1 + 0x439136 mov edx,[eax+04h] 8B 50 04 2.63 + 0x439139 mov esi,[edi+eax] 8B 34 07 2.83 + 0x43913c cmp edx,esi 3B D6 0.08 + 0x43913e jle $+06h (0x439144) 7E 04 0.56 + 333 { + 0x439112 334 x1[i] = x1[i+1]; 0.62 + 0x439112 mov edi,[ebp+14h] 8B 7D 14 + 0x439115 lea eax,[ebx+ecx*4] 8D 04 8B 0.01 + 0x439118 sub edi,ebx 2B FB 0.03 + ----- break ----- + 0x439140 mov [eax],edx 89 10 0.58 + 335 } + 0x439142 336 else 0.26 + 0x439142 jmp $+04h (0x439146) EB 02 0.26 + 337 { + 0x439144 338 x1[i] = x2[i]; 2.2 + 0x439144 mov [eax],esi 89 30 2.2 + 339 } + 340 } + 341 } + 342 } + +19 lines, 32 instructions, Summary: 3152 samples, 29.70% of shown samples + +================================================================================================================================= + + Next steps + + Analysis/Debug +Is it possible to refactor the code in +the Second inner loop to completely avoid use of arrays ? ? ? +That would be one of the focus viewpoints of the Debug session. +I am guessing that about 6 to 9 or so int variables could be used instead... + +IF arrays are not needed +then some overhead in the Calling code for the x1 and x2 arrays would be gone as well. + +Hopefully this has helped show how simple use of Profiler results can benefit UCC. + +Have Fun! +Randy Maxwell diff --git a/UCC_CA_Profile_DUP_No_DIFF_Details.txt b/UCC_CA_Profile_DUP_No_DIFF_Details.txt new file mode 100644 index 0000000..75bb778 --- /dev/null +++ b/UCC_CA_Profile_DUP_No_DIFF_Details.txt @@ -0,0 +1,735 @@ +Below is a capture of using a profiler to do small optimizations of UCC Duplicate checking code + +Code with interleaved Times captured by AMD CodeAnalyst in this case are shown. + +There is a CRITICAL Safety Tip near the end of this file about interpreting profiler results. + +Just a few examples of what a decent profiler can help with. + +Note: other developers have used CodeAnalyst Time sampling on Intel CPU HW as well + +Text capture of Details of using AMD CodeAnalyst Timing sampling (current Time based profile) + on RELEASE (Fully Optimized) Build of UCC (Debug symbols & info also done to support CodeAnalyst) + +Visual C++ 2010 Express making 32 bit Windows UCC.exe and run on 64 bit Windows 7.1 OS using + O2 + W4 + optimize for speed + Whole program optimization at Link time + MT + +The profile used a statistical Time sampling approach +Operations in the profile included the Time of + <2 extra worker Threads on 2 CPU AMD> +Read, +Analyze, Count keywords, + +do Complexity metrics +and do Duplicate checks with NO Differencing +and finally produce output files + +UCC.exe -threads 2 +-dir "C:\Linux\Stable_3_11_6\linux-3.11.6\arch" +-outdir "C:\TEST\UCC\Files_OUT" -ascii + +12062 files processed in linux-3.11.6\arch + + +Partial capture of overall Times given as percent of total time used by UCC. Clipped to show highest 89.33% of UCC Time used. +================================================================================================================================= +CS:EIP Symbol + Offset Timer samples +0x4b8f60 memchr 31.74 +0x48a2c0 MainObject::FindDuplicateFor 12.32 <<< +0x4058e0 std::basic_string,std::allocator >::find 7.25 +0x439090 CmpMngr::SimilarLine 5.99 <<< +0x401040 std::char_traits::compare 5.67 +0x455e20 CUtil::CountTally 4.63 <<< +0x4bc090 memcpy 3.56 +0x405760 std::basic_string,std::allocator >::assign 2.89 +0x405d50 std::basic_string,std::allocator >::_Copy 1.9 +0x4573f0 CUtil::ClearRedundantSpaces 1.2 +0x411af0 CCJavaCsCounter::LSLOC 1.15 +0x418e20 std::basic_streambuf >::snextc 1.09 +0x455c00 CUtil::FindKeyword 0.89 +0x4056b0 std::basic_string,std::allocator >::append 0.88 +0x4bd66d malloc 0.88 +0x457590 CUtil::ReplaceSmartQuotes 0.86 +0x4065a0 std::basic_string,std::allocator >::append 0.84 +0x4061c0 std::operator+,std::allocator > 0.82 +0x41bea0 std::getline,std::allocator > 0.73 +0x405b60 std::basic_string,std::allocator >::append 0.68 +0x405c50 std::basic_string,std::allocator >::assign 0.67 +0x405640 std::basic_string,std::allocator >::assign 0.51 +0x4c553d _VEC_memzero 0.47 +0x4c90eb _read_nolock 0.45 +0x4b9069 operator new 0.43 +0x4b91e5 free 0.41 +0x4bc8f0 memset 0.41 + +27 functions, 27 instructions, Total: 65770 samples, 89.33% of shown samples (don't care about % of other session samples) + +The below are the most "approachable" for optimization changes. +MainObject::FindDuplicateFor +CmpMngr::SimilarLine +CUtil::CountTally (maybe) + FindDuplicateFor and SimilarLine are candidates for another look + + + FindDuplicateFor 12.32% of TOTAL runtime of UCC (without Differencing) +================================================================================================================================= + + Address Line Source Code Bytes Timer samples + 1510 // LOOP Outer loop to look for Duplicates + 0x48a3ed 1511 for ( j++; j != cmpListEnd; j++ ) 0.47 + 1512 { + 1513 if ((*j).second.duplicate || (*j).second.firstDuplicate || + 0x48a499 1514 (*j).second.file_name_isEmbedded == true ) 7.84 + 1515 { + 1516 // already been matched or embedded file + 1517 continue; + 1518 } + 0x48a4c0 1519 filenameMatched = i_second_file_name_only.compare((*j).second.file_name_only); 1.52 + 0x48a517 1520 j_first_size = (*j).first.size(); 2.34 + 1521 if ( i_first_size != j_first_size && + 0x48a520 1522 filenameMatched != 0) 0.12 + 1523 { + 1524 // two files have different number of lines and different filename + 1525 continue; + 1526 } + 1527 + 1528 // if files have same name, do a diff and mark as duplicates if logical SLOC change % is below threshold + 1529 filesMatched = false; + 0x48a52c 1530 if (filenameMatched == 0 && (i_second_file_type != DATA || ( i_first_size < 1 && j_first_size < 1))) 0 + 1531 { + 1532 // match empty files with same name + 0x48a551 1533 if ( i_first_size < 1 && j_first_size < 1) 0 + 1534 filesMatched = true; + 0x48a559 1535 else + 1536 { + 1537 // each source file elements results object has a mySLOCLines object with the SLOC to be diffed + 0x48a565 1538 changed_lines = total_lines = pct_change = 0.0; + 0x48a567 1539 sizeF1 = 0; 0 + 1540 sizeF2 = 0; + 1541 + 1542 // for web languages, diff each of the embedded files + 0x48a569 1543 if ( i_second_class_type == WEB ) + 1544 { + 1545 // find all matches for i embedded files in j + 1546 SourceFileList::iterator i1 = i; + 1547 SourceFileList::iterator j1 = j; + 0x48a588 1548 for (i1++; i1 != cmpListEnd; i1++) + 1549 { + 0x48a593 1550 if ( i1->second.file_name_isEmbedded == false ) + 1551 break; + 1552 + 1553 found = false; + 1554 j1 = j; + 0x48a5a0 1555 for (j1++; j1 != cmpListEnd; j1++) + 1556 { + 0x48a5ae 1557 if ( j1->second.file_name_isEmbedded == false ) + 1558 break; + 0x48a5b7 1559 if (i1->second.file_name_only.compare(j1->second.file_name_only) == 0) + 1560 { + 1561 found = true; + 0x48a5cf 1562 matchedFiles.push_back(make_pair(&(*i1), &(*j1))); + 0x48a5f0 1563 sizeF1 += i1->second.mySLOCLines.size(); + 0x48a5f6 1564 sizeF2 += j1->second.mySLOCLines.size(); + 1565 } + 1566 } + 0x48a609 1567 if (!found) + 1568 { + 0x48a60f 1569 sizeF1 += i1->second.mySLOCLines.size(); + 0x48a618 1570 matchedFiles.push_back(make_pair(&(*i1), nullElement)); + 1571 } + 1572 } + 1573 + 1574 // find all unmatched j embedded files + 1575 j1 = j; + 0x48a644 1576 for (j1++; j1 != cmpListEnd; j1++) + 1577 { + 0x48a655 1578 if ( j1->second.file_name_isEmbedded == false ) + 1579 break; + 1580 + 1581 found = false; + 1582 i1 = i; + 0x48a665 1583 for (i1++; i1 != cmpListEnd; i1++) + 1584 { + 0x48a66f 1585 if ( i1->second.file_name_isEmbedded == false ) + 1586 break; + 0x48a678 1587 if (i1->second.file_name_only.compare(j1->second.file_name_only) == 0) + 1588 { + 1589 found = true; + 1590 break; + 1591 } + 1592 } + 1593 if (!found) + 1594 { + 0x48a6d8 1595 sizeF2 += j1->second.mySLOCLines.size(); + 0x48a6e1 1596 matchedFiles.push_back(make_pair(nullElement, &(*j1))); + 1597 } + 1598 } + 1599 + 0x48a7e3 1600 if (sizeF1 > sizeF2) + 0x48a7ed 1601 pctcheck = 100 * (double)(sizeF1 - sizeF2) / sizeF1; + 0x48a80d 1602 else + 0x48a80f 1603 pctcheck = 100 * (double)(sizeF2 - sizeF1) / sizeF2; + 1604 + 1605 // perform comparison only if the change percent (pctcheck) is not greater than threshold + 0x48a839 1606 if (pctcheck <= duplicate_threshold) + 1607 { + 0x48a84d 1608 vector >::iterator ii = matchedFiles.begin(); + 0x48a850 1609 while (ii != matchedFiles.end()) + 1610 { + 0x48a85c 1611 if (ii->first == nullElement) + 1612 { + 1613 // don't need to compare the empty file to compute the information + 0x48a85e 1614 changed_lines += ii->second->second.mySLOCLines.size(); // all lines deleted + 1615 } + 0x48a881 1616 else if (ii->second == nullElement) + 1617 { + 1618 // don't need to compare the empty file to compute the information + 0x48a887 1619 changed_lines += ii->first->second.mySLOCLines.size(); + 0x48a8a2 1620 total_lines += ii->first->second.mySLOCLines.size(); + 1621 } + 0x48a8bb 1622 else + 0x48a8bd 1623 CompareForDuplicate(ii->first->second.mySLOCLines, ii->second->second.mySLOCLines, changed_lines, total_lines); + 1624 + 0x48a8d5 1625 ii++; + 1626 } + 1627 } + 1628 else + 1629 continue; + 1630 } + 0x48a8dc 1631 else + 1632 { + 1633 // only compare if the chance of duplicates is high + 0x48a8e1 1634 sizeF1 = (*i).second.mySLOCLines.size(); + 0x48a8e7 1635 sizeF2 = (*j).second.mySLOCLines.size(); 0 + 0x48a8ed 1636 if (sizeF1 > sizeF2) 0 + 0x48a8f1 1637 pctcheck = 100 * (double)(sizeF1 - sizeF2) / sizeF1; 0.01 + 0x48a911 1638 else + 0x48a913 1639 pctcheck = 100 * (double)(sizeF2 - sizeF1) / sizeF2; + 1640 + 1641 // perform comparison only if the change percent (pctcheck) is not greater than threshold + 0x48a93d 1642 if (pctcheck <= duplicate_threshold) 0.01 + 0x48a951 1643 CompareForDuplicate((*i).second.mySLOCLines, (*j).second.mySLOCLines, changed_lines, total_lines); + 1644 else + 1645 continue; + 1646 } + 1647 + 0x48a96a 1648 if (changed_lines > 0.0) + 0x48a97a 1649 pct_change = (changed_lines / total_lines) * 100.0; + 0x48a98d 1650 if (pct_change <= duplicate_threshold) + 1651 filesMatched = true; + 1652 } + 1653 } + 0x48a9a1 1654 else + 1655 { + 1656 // if filenames are different, do a line by line comparison for identical duplicate + 1657 if ( ( i_first_size != j_first_size ) + 0x48a9a3 1658 || ( ( i_first_size < 1 ) || ( j_first_size < 1 ) ) ) + 1659 { + 1660 // don't match files with different line counts or empty files with different names + 1661 continue; + 1662 } + 1663 + 1664 // note: two files have the same number of lines + 0x48a9bd 1665 vector::iterator baseLine = i_first_begin; + 0x48a9c0 1666 vector::iterator compareLine = (*j).first.begin(); + 0x48a9c3 1667 while (baseLine != i_first_end && compareLine != (*j).first.end()) 0 + 1668 { + 0x48a9d0 1669 if ((*baseLine).line.compare((*compareLine).line) != 0) 0 + 1670 break; + 0x48a9dc 1671 baseLine++; + 0x48a9df 1672 compareLine++; + 1673 } + 0x48a9e7 1674 if (baseLine == i_first_end && compareLine == (*j).first.end()) 0 + 1675 filesMatched = true; + 1676 } + 1677 if (filesMatched) + 1678 { + 1679 // check whether a comparison match exists + 1680 recDup = true; + 0x48a9f3 1681 if (checkMatch) + 1682 { + 0x48a9f9 1683 if ((*i).second.matched) + 0x48aa05 1684 checkMatch = false; + 0x48aa56 1685 else if ((*j).second.matched) + 1686 { + 1687 // change previously set first duplicate (if necessary) + 0x48aa62 1688 if (foundDup) + 1689 { + 0x48aa68 1690 (*i).second.firstDuplicate = false; + 0x48aa6b 1691 for (size_t n = dupList1.size() - dupCnt; n < dupList1.size(); n++) + 0x48aaa5 1692 dupList1[n] = (*j).second.file_name; + 1693 } + 1694 + 1695 // switch first duplicate for one with a match + 1696 recDup = false; + 1697 checkMatch = false; + 0x48aadf 1698 (*j).second.firstDuplicate = true; + 0x48aae2 1699 (*i).second.duplicate = true; + 0x48aae5 1700 dupList1.push_back((*j).second.file_name); + 0x48ab05 1701 dupList2.push_back((*i).second.file_name); + 1702 dupCnt++; + 0x48ab13 1703 i = j; + 1704 } + 1705 } + 1706 + 0x48ab16 1707 if (recDup) + 1708 { + 1709 // add pair to duplicate list + 0x48aa09 1710 (*i).second.firstDuplicate = true; + 0x48aa0c 1711 (*j).second.duplicate = true; + 0x48aa0f 1712 dupList1.push_back((*i).second.file_name); + 0x48aa2b 1713 dupList2.push_back((*j).second.file_name); + 0x48aa39 1714 dupCnt++; + 1715 } + 0x48aa3c 1716 foundDup = true; + 1717 } + 1718 } + 0x48a41e 1719 return foundDup; 0 + 0x48a39a 1720 } 0 + +211 lines, 0 instructions, Summary: 9070 samples, 12.32% of shown samples + + LATEST FindDuplicateFor 1.51 % instead of 12.32% as above ! ! ! This code is 8x faster ! ! ! +================================================================================================================================= + + Consider carefully the setup code before the loop starts. + It minimizes pointer dereferencing and sets up a reference for i and j (for a better explaination see below...) + + Address Line Source Code Bytes Timer samples + 1500 results & i_r = (*i).second; + 0x48a3ba 1501 string i_r_file_name_only = i_r.file_name_only; + 1502 unsigned int i_r_file_name_only_size = i_r.file_name_only.size(); + 0x48a3f2 1503 unsigned int i_first_size = (*i).first.size(); + 1504 int i_r_file_type = i_r.file_type; + 0x48a3fe 1505 ClassType i_r_class_type = i_r.class_type; 0 + 0x48a419 1506 vector::iterator i_first_begin = (*i).first.begin(); + 0x48a422 1507 vector::iterator i_first_end = (*i).first.end(); + 1508 + 1509 // Values that are set 1 time in the LOOP and used 2 or more times + 1510 unsigned int j_first_size = 0; + 1511 results & j_r = i_r; + 1512 + 1513 // LOOP Outer loop to look for Duplicates + 0x48a425 1514 for ( j++; j != cmpListEnd; j++ ) 0.28 + 1515 { + 0x48a441 1516 j_r = (*j).second; 0.04 + 1517 if ( j_r.duplicate || j_r.firstDuplicate || + 0x48a452 1518 j_r.file_name_isEmbedded == true ) 0.28 was 7.84 ! ! ! + 1519 { + 1520 // already been matched or embedded file + 1521 continue; + 1522 } + 1523 + 1524 // filenameMatched = i_r_file_name_only.compare((*j).second.file_name_only); + 1525 filenameMatched = 1; // Start with NOT matched file names + 1526 + 1527 // Only call compare if sizes are same + 0x48a476 1528 if ( i_r_file_name_only_size == j_r.file_name_only.size() ) 0.03 <<< Added overhead, WELL worth it ! + 0x48a489 1529 filenameMatched = i_r_file_name_only.compare( j_r.file_name_only ); 0.01 was 1.52 + 1530 + 0x48a497 1531 j_first_size = (*j).first.size(); 0.81 was 2.34 + 1532 if ( i_first_size != j_first_size && + 0x48a49d 1533 filenameMatched != 0 ) 0.05 + 1534 { + 1535 // two files have different number of lines and different filename + 1536 continue; + 1537 } + +38 lines, 0 instructions, Summary: 2509 samples, 1.51% of shown samples + +================================================================================================================================= + +CS:EIP Symbol + Offset Timer samples +0x40b130 std::_Uninit_copy > 35.62 +0x4b8f60 memchr 8.98 +0x4bc090 memcpy 7.78 +0x405760 std::basic_string,std::allocator >::assign 6.88 +0x409da0 results::operator= 6.42 +0x40ad80 std::vector >::_Insert 5.51 +0x40b2b0 std::_Uninit_copy 2.57 +0x405d50 std::basic_string,std::allocator >::_Copy 2.45 +0x4058e0 std::basic_string,std::allocator >::find 2.08 +0x4b8bf0 memmove 1.86 +0x439090 CmpMngr::SimilarLine 1.71 +0x48a2c0 MainObject::FindDuplicateFor >>> 1.51 <<< +0x455e20 CUtil::CountTally 1.26 +0x401040 std::char_traits::compare 1.21 +0x4bd66d malloc 1.03 +0x4bc1b8 NO SYMBOL 0.58 +0x4bc1c0 NO SYMBOL 0.58 +0x4bc1c8 NO SYMBOL 0.57 +0x40ad40 std::_Destroy_range > 0.56 +0x4b9069 operator new 0.53 +0x4bc1d0 NO SYMBOL 0.44 +0x4b91e5 free 0.43 +0x4573f0 CUtil::ClearRedundantSpaces 0.36 +0x40af20 std::vector >::_Insert 0.34 +0x411af0 CCJavaCsCounter::LSLOC 0.34 +0x4bc21a NO SYMBOL 0.34 +0x4bc226 NO SYMBOL 0.33 +0x4bc23a NO SYMBOL 0.33 +0x418e20 std::basic_streambuf >::snextc 0.31 +0x4065a0 std::basic_string,std::allocator >::append 0.25 +0x455c00 CUtil::FindKeyword 0.25 +0x457590 CUtil::ReplaceSmartQuotes 0.25 +0x4b900d operator delete 0.24 + +33 functions, 33 instructions, Total: 240550 samples, 93.91% of shown samples + +================================================================================================================================= + + After capturing and writing in comments above I did another comparison... + +Original 2013_04 Release build + 3 minutes 22 seconds or 202 seconds + +The just optimized version as above + 5 minutes 39 seconds or 339 seconds + + WHAT did I do wrong ? ? ? + +Looking at the Total for UCC: 240,550 samples of recent vs 65,770 samples +we see that overall time has INCREASED as the performance sanity check has shown. + +OK... +So the comments I made about avoiding dereferencing pointers are TOTALLY misleading. + +Time to back away from the latest changes and look at the code again. + + We know from the profile info at the start of this file that + the slow operation is the first group of lines has the most timer hits. + + Rethink the code. + What does the loop do? + +The loop starts by doing some "Sanity checks" before entering the bulk of the processing. +Sanity checks are checking for valid conditions. + +// LOOP Outer loop to look for Duplicates + for ( j++; j != cmpListEnd; j++ ) + { + // START Precondition checks + if ((*j).second.duplicate || (*j).second.firstDuplicate || + (*j).second.file_name_isEmbedded == true ) + { + // already been matched or embedded file + continue; + } + + // Only call compare if sizes are same + filenameMatched = 1; // Start with file names NOT matched + if ( i_second_file_name_only_size == (*j).second.file_name_only.size() ) + filenameMatched = i_second_file_name_only.compare( (*j).second.file_name_only ); + + j_first_size = (*j).first.size(); + if ( i_first_size != j_first_size && + filenameMatched != 0 ) + { + // two files have different number of lines and different filename + continue; + } + // END Precondition checks + + // if files have same name, do a diff and mark as duplicates if logical SLOC change % is below threshold + filesMatched = false; + if (filenameMatched == 0 && (i_second_file_type != DATA || ( i_first_size < 1 && j_first_size < 1))) + ... +and so on. Rest of LOOP not repeated here. + +Another way of describing this is talking about Preconditions (as comments show). +Preconditions are one of the key concepts of Design by Contract. +In Design by Contract the Caller should satisfy the Preconditions before the call. +But FindDuplicateFiles (the Caller) does not satisfy all the Preconditions (only the first if block). +So a better approach is for FindDuplicateFiles to check ALL the Preconditions before... + + for (SourceFileList::iterator i = fileList.begin(); i != fileList_end; i++) + { + // Check Preconditions (Design by Contract) to avoid unneeded calls + SourceFileList::iterator j = i; + j++; + if ( j == fileList_end ) + break; // done + + if (!(*i).second.duplicate && !(*i).second.firstDuplicate) + { + if ( (*i).second.file_name_isEmbedded == false ) + { + // Only call compare if sizes are same (faster) + int filenameMatched = 1; // Start with file names NOT matched + if ( (*i).second.file_name_only.size() == (*j).second.file_name_only.size() ) + filenameMatched = (*i).second.file_name_only.compare( (*j).second.file_name_only ); + + if ( (*i).first.size() != (*j).first.size() && + filenameMatched != 0 ) + { + // two files have different number of lines and different filename + continue; + } + // END Precondition checks + + FindDuplicateFor( fileList, i, dupList1, dupList2, checkMatch ); <<<<< OK to call as Preconditions were checked + } + } +and so on. Rest of LOOP not repeated here. Below is a profile capture... + + +================================================================================================================================= + +CS:EIP Symbol + Offset Timer samples +0x4b9100 memchr 39.73 +0x4058e0 std::basic_string,std::allocator >::find 9.97 +0x4561e0 CUtil::CountTally 6.17 <<< was 4.63 +0x401040 std::char_traits::compare 4.76 +0x405760 std::basic_string,std::allocator >::assign 3.69 +0x4bc230 memcpy 3.4 +0x405d50 std::basic_string,std::allocator >::_Copy 2.41 +0x4577b0 CUtil::ClearRedundantSpaces 1.51 +0x4065a0 std::basic_string,std::allocator >::append 1.44 +0x4115e0 CCJavaCsCounter::LSLOC 1.4 +0x4188c0 std::basic_streambuf >::snextc 1.35 +0x4061c0 std::operator+,std::allocator > 1.27 +0x457950 CUtil::ReplaceSmartQuotes 1.17 +0x455fc0 CUtil::FindKeyword 1.16 +0x4bd80d malloc 1.07 +0x4056b0 std::basic_string,std::allocator >::append 1.06 +0x41bce0 std::getline,std::allocator > 0.97 +0x405b60 std::basic_string,std::allocator >::append 0.88 +0x405c50 std::basic_string,std::allocator >::assign 0.84 +0x4c928b _read_nolock 0.75 +0x405640 std::basic_string,std::allocator >::assign 0.63 +0x4b9209 operator new 0.51 +0x458a60 std::basic_string,std::allocator >::find_first_not_of 0.47 +0x4b9385 free 0.44 +0x405990 std::basic_string,std::allocator >::_Chassign 0.43 +0x4b91ad operator delete 0.41 +0x4170a0 CCodeCounter::CountComplexity 0.38 +0x4ba666 __from_strstr_to_strchr 0.37 +0x4059d0 std::basic_string,std::allocator >::_Grow 0.36 +0x4778e0 std::_Tree,std::allocator > 0.35 +0x41b4f0 std::operator+,std::allocator > 0.33 +0x455b80 CUtil::TrimString 0.33 +0x445d50 CPhpCounter::CountDirectiveSLOC 0.26 +0x4b8d90 memmove 0.26 +0x48a340 MainObject::FindDuplicateFor >>> 0.25 <<< was 12.32 WOW ! ! ! +0x4b1eb0 ReadFilesInList 0.25 +0x4773f0 std::_Tree,std::allocator 0.23 +0x410ff0 CCJavaCsCounter::LanguageSpecificProcess 0.22 +0x4062e0 std::operator+,std::allocator > 0.19 +0x405490 std::basic_string,std::allocator >::substr 0.17 +0x416df0 CCodeCounter::FindCommentStart 0.17 +0x40b320 std::_Uninit_copy,std::allocator > 0.12 +0x40ad80 std::_Destroy_range > 0.12 +0x478480 std::_Tree,std::allocator > 0.12 +0x405850 std::basic_string,std::allocator >::erase 0.11 +0x4b8bef __security_check_cookie 0.11 +0x416660 CCodeCounter::CountCommentsSLOC 0.1 +0x458ad0 std::basic_string,std::allocator >::find_last_not_of 0.1 +0x4ca0b3 _output_s_l 0.09 +0x41bc60 std::_Find,std::allocator > * 0.06 +0x4bca90 memset 0.06 +0x4c0405 _write_nolock 0.06 +0x413dd0 std::ios_base::getloc 0.05 +0x4157b0 CCodeCounter::IsSupportedFileExtension 0.05 +0x419fc0 std::basic_istream >::_Ipfx 0.05 +0x41b2f0 std::use_facet > 0.05 +0x41b540 std::operator<< > 0.05 +0x455f60 CUtil::FindCharAvoidEscape 0.05 +0x459f80 std::num_put > >::_Rep 0.05 +0x478300 std::vector >::push_back 0.05 +0x482090 UpdateCounterCounts 0.05 +0x48c050 PrintCountResults 0.05 +0x4b86d3 std::_Lockit::_Lockit 0.05 +0x4b9509 _unlock_file 0.05 +0x405260 std::basic_string,std::allocator >::basic_string >,results>,std::allocator >::_Lock 0.03 +0x41a690 std::basic_string,std::allocator >::assign 0.03 +0x41ab70 std::vector >::_Insert_n 0.03 +0x459c80 std::num_put > >::_Iput 0.03 +0x45a260 std::operator<<,std::allocator > 0.03 +0x4805f0 std::_Tree_val,std::allocator > const 0.03 +0x481ed0 DecideLanguage 0.03 +0x4b86fb std::_Lockit::~_Lockit 0.03 +0x4b9496 _lock_file 0.03 +0x4bfd8a _unlock 0.03 +0x4bfe63 _lock 0.03 +0x4052a0 std::basic_string,std::allocator >::~basic_string,std::allocator >::_Myptr 0.02 +0x406390 std::operator==,std::allocator > 0.02 +0x40ac50 std::less,std::allocator > >::operator() 0.02 +0x40aca0 std::_Tree_val,std::allocator >,unsign0.02 +0x40b1a0 std::_Uninit_copy > >0.02 +0x418aa0 std::basic_streambuf >::xsputn 0.02 +0x419e70 std::basic_string,std::allocator >::find_last_of 0.02 +0x471630 results::~results 0.02 +0x477d10 std::vector >::vector > 0.02 +0x4a89e0 PrintComplexityResults 0.02 +0x4ab5c0 PrintCyclomaticComplexity 0.02 +0x4c3c10 _aulldvrm 0.02 +0x4ccc4c _tsopen_nolock 0.02 +0x4052d0 std::basic_string,std::allocator >::operator= 0.01 +0x405300 std::basic_string,std::allocator >::replace 0.01 +0x405f40 std::vector,std::allocator >,std::allocator,std::allocator > &,unsigned int> 0.01 +0x40b270 std::_Uninit_move,lineElement> 0.01 +0x413c00 std::ctype::do_widen 0.01 +0x413e10 std::ios_base::_Init 0.01 +0x413f40 std::endl 0.01 +0x4150d0 CCodeCounter::InitializeResultsCounts 0.01 +0x415670 CCodeCounter::CountSLOC 0.01 +0x415c40 CCodeCounter::GetOutputStream 0.01 +0x416620 CCodeCounter::CountBlankSLOC 0.01 +0x418660 std::basic_ostream >::put 0.01 +0x418fd0 std::basic_filebuf >::_Unlock 0.01 +0x418fe0 std::basic_filebuf >::overflow 0.01 +0x419260 std::basic_filebuf >::underflow 0.01 +0x4192b0 std::basic_filebuf >::uflow 0.01 +0x419af0 std::map,std::allocator >::open 0.01 +0x41a750 std::basic_ostream >::_Osfx 0.01 +0x41a8e0 std::basic_streambuf >::getloc 0.01 +0x41aa40 std::_Deque_iterator >::operator- 0.01 +0x41b290 std::_Tree,std::allocator,std::allocator > >::do_put 0.01 +0x459f10 std::num_put > >::_Put 0.01 +0x45a030 std::numpunct::do_thousands_sep 0.01 +0x45a040 std::numpunct::do_grouping 0.01 +0x45a630 std::use_facet > 0.01 +0x476660 std::deque >::push_back 0.01 +0x477a50 results::results 0.01 +0x477fb0 std::_Tree,std::allocator0.01 +0x4781b0 std::_Tree,std::allocator >,unsigned i0.01 +0x4783b0 std::_Tree,std::allocator >,unsigned i0.01 +0x478420 std::_Tree_unchecked_const_iterator >::operator<< 0.01 +0x489ae0 MainObject::FindDuplicateFiles 0.01 +0x4b3040 ProcessSourceListFile 0.01 +0x4b7040 std::_List_val >,results>,std::allocatorlsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, i); + strLSLOCBak += lineBak.substr(start, i); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) <<< This is where I traced until the light bulb went on... + +Looking at the backup (unchanged) line we see an assignment statement with concatenation of a string literal at the end. +The string literal is +'self_ns::' +and the value of line[i] did indeed refer to the first : character in the literal. + +So the problem happens before parsing gets here. +Now the problem appears that a Python string literal enclosed by single quote characters was not recognized earlier. + +Because the line data is from before any detailed processing in LanguageSpecificProcess happens +it is reasonable to see how the line 497 is processed in CountDirectiveSLOC + +line "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'self_ns::'" + +This is the same as passed to LanguageSpecificProcess + +Backing up to CPythonCounter::CountCommentsSLOC + +{lineNumber=497 line=" namespace = namespaces.python + 'self_ns::'" } + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { +#ifdef _DEBUG + line_number++; +// cout << line_number << " "; + if ( 497 == line_number ) + { + cout << "Starting CountCommentsSLOC Detailed DEBUG" << endl; + } +#endif + contd_nextline = false; + + quote_idx_start = 0; + idx_start = 0; + + if (CUtil::CheckBlank(iter->line)) + continue; + if (quote_contd) <<< Before call this is true; meaning a quote continued from a prior line + { + // Replace quote until next character + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); <<< AFTER call line is changed, CurrentQuoteEnd = ' WRONG ! + +line "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'self_ns::'" + + NOW we know the line really should be + +line " namespace = namespaces.python + '$$$$$$$$$'" + +So on some previous line the code parsed a single quote and kept going (wiping out perfectly valid Python code) until line 497 + +Looking around some more: +QuoteStart = "\"'"; // used to be QuoteStart = "\"\'"; +QuoteEnd = "\"'"; // used to be QuoteEnd = "\"\'"; + +Were declared wrong for single quote chars (did not need \ to escape them) + +Recompiling and running again did NOT fix the problem. + +I saw that the contd flag was true when it should have been false. + +I then decided to use the Debugger and show the lines from the fmap and started scrolling from the start. +I was looking for when questionable replacement of literals with $$$$$ + +line 440 and 441 +BOOST_SUPPORTED_OPERATORS = '+ - * / % ^ & ! ~ | < > == != <= >= << >> && || += -= '\ + '*= /= %= ^= &= |= <<= >>='.split() + became +BOOST_SUPPORTED_OPERATORS = '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'\ +$$$$$$$$'*= /= %= ^= &= |= <<= >>='$$$$$$$$ + +440 is OK but 441 is the start of being out of step. Time for some more Debugging... + +More debugging led me into thinking that to really fix this is not a small low risk change. + +So this is a change I do not want to hurry with just to put into Thread release. + +Later... +Randy Maxwell + diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..1cf9196 --- /dev/null +++ b/license.txt @@ -0,0 +1,208 @@ + UCC LICENSE + + COPYRIGHT (C) 2006 - 2013 + Center for Systems and Software Engineering + University of Southern California + Salvatori 330, 941 West 37th Place + Los Angeles, California 90089-0781, USA + +This CodeCount program is free software with limitations; you can redistribute it and/or +modify it under the terms of the USC-CSSE Limited Public License as published by the +University of Southern California Center for Systems and Software Engineering; either version 1 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +------------------------------------------------------------------------------------------- +USC-CSSE LIMITED PUBLIC LICENSE TERMS AND CONDITIONS +------------------------------------------------------------------------------------------- +The precise terms and conditions for copying, distribution and modification are as follows. + +0. This License applies to any program or other work which contains a notice placed by the +copyright holder saying it may be distributed under the terms of this Limited Public +License. The "Program", below, refers to any such program or work, and a "work based on the +Program" means either the Program or any derivative work under copyright law: that is to +say, a work containing the Program or a portion of it, either verbatim or with +modifications and/or translated into another language. (Hereinafter, translation is +included without limitation in the term "modification".) Each licensee is addressed as +"you". + +Activities other than copying, distribution and modification are not covered by this +License; they are outside its scope. The act of running the Program is not restricted nor +is the output from the Program is covered. + +1. You may copy and distribute verbatim copies of the Program's source code as you receive +it, in any medium, provided that you conspicuously and appropriately publish on each copy +an appropriate copyright notice and disclaimer of warranty; keep intact all the notices +that refer to this License and to the absence of any warranty; and give any other +recipients of the Program a copy of this License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may at your +option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus forming a +work based on the Program, and copy and distribute such modifications or work under the +terms of Section 1 above, provided that you also meet all of these conditions: + +2.a) You must cause the modified files to carry prominent notices stating that you changed +the files and the date of any change. + +2.b) You must cause any work that you distribute or publish, that in whole or in part +contains or is derived from the Program or any part thereof, to be licensed as a whole at +no charge to all third parties under the terms of this License. + +2.c) If the modified program normally reads commands interactively when run, you must cause +it, when started running for such interactive use in the most ordinary way, to print or +display an announcement including an appropriate copyright notice and a notice that there +is no warranty (or else, saying that you provide a warranty) and that users may +redistribute the program under these conditions, and telling the user how to view a copy of +this License. (Exception: if the Program itself is interactive but does not normally print +such an announcement, your work based on the Program is not required to print an +announcement.) + +2.d) You must provide a copy of the modified machine-readable source code files to the +University of Southern California Center for Systems and Software Engineering Salvatori 330, +941 West 37th Place, Los Angeles, California 90089-0781, USA, on an appropriate medium or +via our contact information at http://csse.usc.edu. + +These requirements apply to the modified work as a whole. If identifiable sections of that +work are not derived from the Program, and can be reasonably considered independent and +separate works in themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you distribute the same +sections as part of a whole which is a work based on the Program, the distribution of the +whole must be on the terms of this License, whose permissions for other licensees extend to +the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work +written entirely by you; rather, the intent is to exercise the right to control the +distribution of derivative or collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program with the Program (or +with a work based on the Program) on a volume of a storage or distribution medium does not +bring the other work under the scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under Section 2) in +object code or executable form under the terms of Sections 1 and 2 above provided that you +also do one of the following: + +3.a) Accompany it with the complete corresponding machine-readable source code, which must +be distributed under the terms of Sections 1 and 2 above on a medium customarily used for +software interchange; or, + +3.b) Accompany it with a written offer, valid for at least three years, to give any third +party, for a charge no more than your cost of physically performing source distribution, a +complete machine-readable copy of the corresponding source code, to be distributed under +the terms of Sections 1 and 2 above on a medium customarily used for software interchange; +or, + +3.c) Accompany it with the information you received as to the offer to distribute +corresponding source code. (This alternative is allowed only for noncommercial distribution +and only if you received the program in object code or executable form with such an offer, +in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making modifications to +it. For an executable work, complete source code means all the source code for all modules +it contains, plus any associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a special exception, +the source code distributed need not include anything that is normally distributed (in +either source or binary form) with the major components (compiler, kernel, and so on) of +the operating system on which the executable runs, unless that component itself accompanies +the executable. + +If distribution of executable or object code is made by offering access to copy from a +designated place, then offering equivalent access to copy the source code from the same +place counts as distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as expressly +provided under this License. Any attempt otherwise to copy, modify, sublicense or +distribute the Program is void, and will automatically terminate your rights under this +License. However, parties who have received copies, or rights, from you under this License +will not have their licenses terminated so long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. However, +nothing else grants you permission to modify or distribute the Program or its derivative +works. These actions are prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the Program), you indicate your +acceptance of this License to do so, and all its terms and conditions for copying, +distributing or modifying the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), the recipient +automatically receives a license from the original licensor to copy, distribute or modify +the Program subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. You are not +responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement or for any +other reason (not limited to patent issues), conditions are imposed on you (whether by court +order, agreement or otherwise) that contradict the conditions of this License, they +do not excuse you from the conditions of this License. If you cannot distribute so as to +satisfy simultaneously your obligations under this License and any other pertinent +obligations, then as a consequence you may not distribute the Program at all. For example, +if a patent license would not permit royalty-free redistribution of the Program by all +those who receive copies directly or indirectly through you, then the only way you could +satisfy both it and this License would be to refrain entirely from distribution of the +Program. + +If any portion of this section is held invalid or unenforceable under any particular +circumstance, the balance of the section is intended to apply and the section as a whole is +intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other +property right claims or to contest validity of any such claims; this section has the sole +purpose of protecting the integrity of this free software with limitations distribution +system, which is implemented by limited public license practices. Many people have made +generous contributions to the wide range of software distributed through that system in +reliance on consistent application of that system; it is up to the author/donor to decide +if he or she is willing to distribute software through any other system and a licensee +cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of +the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain countries either +by patents or by copyrighted interfaces, the original copyright holder who places the +Program under this License may add an explicit geographical distribution limitation +excluding those countries, so that distribution is permitted only in or among countries not +thus excluded. In such case, this License incorporates the limitation as if written in the +body of this License. + +9. The University of Southern California Center for Systems and Software Engineering may publish +revised and/or new versions of this Limited Public License from time to time. Such new +versions will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies a version +number of this License which applies to it and "any later version", you have the option of +following the terms and conditions either of that version or of any later version published +by the University of Southern California Center for Systems and Software Engineering. If the Program +does not specify a version number of this License, you may choose any version ever +published by the University of Southern California Center for Systems and Software Engineering. + +10. If you wish to incorporate parts of the Program into other free programs whose +distribution conditions are different, write to the author to ask for permission. For +software which is copyrighted by the University of Southern California Center for Software +Engineering, write to the University of Southern California Center for Systems and Software Engineering; +we sometimes make exceptions for this. Our decision will be guided by the goal of preserving +the free status of all derivatives of our free software with limitations. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO +THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE +COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE +COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT +HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, +BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL +DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO +LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES +OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR +OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS diff --git a/src/BOOST_LICENSE_1_0.txt b/src/BOOST_LICENSE_1_0.txt new file mode 100644 index 0000000..36b7cd9 --- /dev/null +++ b/src/BOOST_LICENSE_1_0.txt @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/src/CAdaCounter.cpp b/src/CAdaCounter.cpp new file mode 100644 index 0000000..12ea11d --- /dev/null +++ b/src/CAdaCounter.cpp @@ -0,0 +1,669 @@ +//! Code counter class methods for the Ada language. +/*! +* \file CAdaCounter.cpp +* +* This file contains the code counter class methods for the Ada language. +*/ + +#include "CAdaCounter.h" + +/*! +* Constructs a CAdaCounter object. +*/ +CAdaCounter::CAdaCounter() +{ + classtype = ADA; + language_name = "Ada"; + casesensitive = false; + + file_extension.push_back(".ada"); + file_extension.push_back(".a"); + file_extension.push_back(".adb"); + file_extension.push_back(".ads"); + + LineCommentStart.push_back("--"); + + QuoteStart = "\""; + QuoteEnd = "\""; + QuoteEscapeRear = '\"'; + + directive.push_back("controlled"); + directive.push_back("elaborate"); + directive.push_back("inline"); + directive.push_back("interface"); + directive.push_back("list"); + directive.push_back("memory_size"); + directive.push_back("optimize"); + directive.push_back("pack"); + directive.push_back("page"); + directive.push_back("pragma"); + directive.push_back("priority"); + directive.push_back("shared"); + directive.push_back("storage_unit"); + directive.push_back("suppress"); + directive.push_back("system_name"); + + data_name_list.push_back("access"); + data_name_list.push_back("array"); + data_name_list.push_back("body"); + data_name_list.push_back("constant"); + data_name_list.push_back("declare"); + data_name_list.push_back("function"); + data_name_list.push_back("generic"); + data_name_list.push_back("limited"); + data_name_list.push_back("new"); + data_name_list.push_back("package"); + data_name_list.push_back("private"); + data_name_list.push_back("procedure"); + data_name_list.push_back("record"); + data_name_list.push_back("renames"); + data_name_list.push_back("separate"); + data_name_list.push_back("subtype"); + data_name_list.push_back("task"); + data_name_list.push_back("type"); + data_name_list.push_back("use"); + data_name_list.push_back("with"); + + exec_name_list.push_back("abort"); + exec_name_list.push_back("accept"); + exec_name_list.push_back("begin"); + exec_name_list.push_back("case"); + exec_name_list.push_back("delay"); + exec_name_list.push_back("else"); + exec_name_list.push_back("elsif"); + exec_name_list.push_back("end"); + exec_name_list.push_back("entry"); + exec_name_list.push_back("exception"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("loop"); + exec_name_list.push_back("others"); + exec_name_list.push_back("raise"); + exec_name_list.push_back("return"); + exec_name_list.push_back("select"); + exec_name_list.push_back("terminate"); + exec_name_list.push_back("when"); + + math_func_list.push_back("exp"); + math_func_list.push_back("random"); + math_func_list.push_back("sqrt"); + + trig_func_list.push_back("arccos"); + trig_func_list.push_back("arccosh"); + trig_func_list.push_back("arccot"); + trig_func_list.push_back("arccoth"); + trig_func_list.push_back("arcsin"); + trig_func_list.push_back("arcsinh"); + trig_func_list.push_back("arctan"); + trig_func_list.push_back("arctanh"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("cot"); + trig_func_list.push_back("coth"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + + log_func_list.push_back("log"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("mod"); + cmplx_calc_list.push_back("rem"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("elsif"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("loop"); + cmplx_cond_list.push_back("when"); + + cmplx_logic_list.push_back("="); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("/="); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("<="); + cmplx_logic_list.push_back("&"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("xor"); + cmplx_logic_list.push_back("in"); + cmplx_logic_list.push_back("not in"); + + cmplx_preproc_list.push_back("controlled"); + cmplx_preproc_list.push_back("elaborate"); + cmplx_preproc_list.push_back("inline"); + cmplx_preproc_list.push_back("interface"); + cmplx_preproc_list.push_back("list"); + cmplx_preproc_list.push_back("memory_size"); + cmplx_preproc_list.push_back("optimize"); + cmplx_preproc_list.push_back("pack"); + cmplx_preproc_list.push_back("page"); + cmplx_preproc_list.push_back("pragma"); + cmplx_preproc_list.push_back("priority"); + cmplx_preproc_list.push_back("shared"); + cmplx_preproc_list.push_back("storage_unit"); + cmplx_preproc_list.push_back("suppress"); + cmplx_preproc_list.push_back("system_name"); + + cmplx_assign_list.push_back(":="); +} + +/*! +* Replaces quoted strings inside a string starting at idx_start with '$'. +* Handles special cases for Ada literal strings. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CAdaCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + size_t idx = 0; + while(true) + { + idx = strline.find("'\"'", idx); // replace all '"' by '$' + if (idx != string::npos) + strline.replace(idx, 3, 3, '$'); + else + break; + } + return CCodeCounter::ReplaceQuote(strline, idx_start, contd, CurrentQuoteEnd); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CAdaCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string strDirLine = ""; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count, false); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // merged bug fix for considering only stand-alone keywords + // e.g. package should not be considered a directive (only 'pack' is) + if (((idx = CUtil::FindKeyword(iter->line, *viter, 0, TO_END_OF_STRING, false)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != ',' && iter->line[iter->line.length()-1] != '\\') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CAdaCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + bool found_accept = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int cnt = 0; + unsigned int loopLevel = 0; + + filemap::iterator fit, fitbak; + string line, lineBak, tmp; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + + unsigned int l_paren_cnt = 0; + bool l_foundblock, found_forifwhile, found_end, found_type, found_is; + l_foundblock = found_forifwhile = found_end = found_is = false; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning (for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + if (!CUtil::CheckBlank(line)) + { + // blank line means blank_line/comment_line/directive + // call SLOC function to detect logical SLOC and add to result + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, l_paren_cnt, l_foundblock, + found_forifwhile, found_end, found_type, found_is, found_accept, loopLevel); + + cnt = 0; + CUtil::CountTally(line, data_name_list, cnt, 1, exclude, "", "", NULL, false); + + // need to check also if the data line continues + if (cnt > 0) + result->data_lines[PHY]++; + else + result->exec_lines[PHY]++; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + } + } + return 1; +} + +/*! +* Processes a logical line of code. +* This method is called after a logical SLOC is determined. +* The method adds LSLOC to the result, increases counts, and resets variables. +* +* \param result counter results +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param found_block found block flag +* \param found_forifwhile found for, if, or while flag +* \param found_end found end flag +* \param found_type found type flag +* \param found_is found is flag +* \param found_accept found accept flag +* \param trunc_flag truncate lines? +*/ +void CAdaCounter::FoundSLOC(results* result, string &strLSLOC, string &strLSLOCBak, bool &found_block, bool &found_forifwhile, + bool &found_end, bool &found_type, bool &found_is, bool &found_accept, bool &trunc_flag) +{ + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + + // add to the list for comparison purpose + if (result->addSLOC(CUtil::TrimString(strLSLOCBak), trunc_flag)) + { + // determine logical type, data declaration or executable + unsigned int cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count, false); + if (cnt > 0) + result->data_lines[LOG]++; + else + result->exec_lines[LOG]++; + } + + // reset all variables whenever a new statement/logical SLOC is found + strLSLOC = ""; + strLSLOCBak = ""; + found_block = false; + found_forifwhile = false; + found_end = false; + found_type = false; + found_is = false; + found_accept = false; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param found_block found block flag +* \param found_forifwhile found for, if, or while flag +* \param found_end found end flag +* \param found_type found type flag +* \param found_is found is flag +* \param found_accept found accept flag +* \param loopLevel nested loop level +*/ +void CAdaCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &found_block, bool &found_forifwhile, bool &found_end, bool &found_type, bool &found_is, bool &found_accept, + unsigned int &loopLevel) +{ + size_t start = 0; //starting index of the working string + size_t i = 0, tempi, strSize; + string templine = CUtil::TrimString(line); + string tmp; + bool trunc_flag = false; + string keywordchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + // there may be more than 1 logical SLOC in a line + for (i = 0; i < line.length(); i++) + { + switch (line[i]) + { + case ';': + if (paren_cnt > 0) + break; + if (!found_end) + { + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + } + else + { + found_end = false; // end xxx + found_block = false; + found_is = false; + found_forifwhile = false; + found_type = false; + found_accept = false; + strLSLOC = ""; + strLSLOCBak = ""; + } + start = i + 1; + break; + case '(': + if (found_type) + found_type = false; + paren_cnt++; + break; + case ')': + if (paren_cnt > 0) + paren_cnt--; + break; + } + + // continue the following processing only if line[i] is not in a middle of a word + if (keywordchars.find(line[i]) != string::npos && i < line.length() - 1) + continue; + + // if it ends in xxx, then it has already been counted, so ignore it + tmp = "xxx " + CUtil::TrimString(line.substr(start, i + 1 - start)); + tempi = CUtil::FindKeyword(tmp, "end", 0, TO_END_OF_STRING, false); + if (tempi != string::npos) + { + found_end = true; + + // record end loop for nested loop processing + if (print_cmplx) + { + tmp = CUtil::TrimString(line.substr(start, i + 5 - start)); + if (CUtil::FindKeyword(tmp, "end loop", 0, TO_END_OF_STRING, false) != string::npos) + if (loopLevel > 0) + loopLevel--; + } + start = i + 1; + } + + if (!found_end) + { + if (!found_forifwhile) + { + if (CUtil::FindKeyword(tmp, "for", 0 , TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "elsif", 0, TO_END_OF_STRING, false) != string::npos) + { + found_forifwhile = true; + } + + // 'exception' is removed because it is not counted + if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) != string::npos) + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + + start = i + 1; + + // record nested loop level + if (print_cmplx) + { + loopLevel++; + if ((unsigned int)result->cmplx_nestloop_count.size() < loopLevel) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopLevel-1]++; + } + continue; + } + } + else if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "then", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "record", 0, TO_END_OF_STRING, false) != string::npos) // for..use..record + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + start = i + 1; + + // record nested loop level + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) != string::npos) + { + loopLevel++; + if ((unsigned int)result->cmplx_nestloop_count.size() < loopLevel) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopLevel-1]++; + } + } + continue; + } + + // similarly, check for procedure, task, function - it ends with 'is' keyword + // procedure ... is... + // package ... is ... + if (!found_block) + { + if (CUtil::FindKeyword(tmp, "procedure", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "function", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "package", 0, TO_END_OF_STRING, false) !=string::npos || + CUtil::FindKeyword(tmp, "task", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "case",0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "protected", 0, TO_END_OF_STRING, false) != string::npos) + { + found_block = true; + } + } + else // procedure...is... + { + // the 'if' statement below attempts to 'alleviate' the issue with + // procedure Swap is new Exchange(Elem => Integer); + // procedure Test1 is begin end Test1; + // only add new SLOC if 'is' is at the end of line and follows 'procedure', etc. + // NOTE: this implementation may not be complete + tempi = CUtil::FindKeyword(templine, "is", 0, TO_END_OF_STRING, false); + if (tempi == templine.length() - 2) + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + start = i + 1; + found_is = true; + continue; + } + } + if (!found_type) + { + if (CUtil::FindKeyword(tmp, "type", 0, TO_END_OF_STRING, false) != string::npos) + found_type = true; + } + else + { + if (CUtil::FindKeyword(tmp, "record", 0, TO_END_OF_STRING, false) != string::npos) + { + // the 'if' statement below attempts to resolves the issue with + // type Expression is tagged null record; + // so, ignore this case. + // NOTE: this implementation may not be complete + if (templine.at(templine.length() - 1) != ';') + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + start = i + 1; + continue; + } + } + } + + // process 'select...end select;', 'accept ... end accept;' + // 'record ... end record;' is handled via 'type' + // select ... end select; --> only one word statement 'select' + // accept id... do ... end [id]; --> SLOC starting from 'accept' to 'do' + // find 'do' only already found 'accept' + if (CUtil::FindKeyword(tmp, "select", 0, TO_END_OF_STRING, false) != string::npos) + { + // found 'select' statement, one SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + start = i + 1; + continue; + } + + if (!found_accept) + { + if (CUtil::FindKeyword(tmp, "accept", 0, TO_END_OF_STRING, false) != string::npos) + found_accept = true; + } + else + { + if (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) != string::npos) + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_accept, trunc_flag); + start = i + 1; + } + } + } + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + + // drop continuation symbol + if (strLSLOC[strLSLOC.length()-1] == '\\') + { + strLSLOC = strLSLOC.substr(0, strLSLOC.length()-1); + strLSLOCBak = strLSLOCBak.substr(0, strLSLOCBak.length()-1); + } + } + if (tmp == "") + found_forifwhile = false; +} diff --git a/src/CAdaCounter.h b/src/CAdaCounter.h new file mode 100644 index 0000000..6b6434d --- /dev/null +++ b/src/CAdaCounter.h @@ -0,0 +1,35 @@ +//! Code counter class definition for the Ada language. +/*! +* \file CAdaCounter.h +* +* This file contains the code counter class definition for the Ada language. +*/ + +#ifndef AdaCounter_h +#define AdaCounter_h + +#include "CCodeCounter.h" + +//! Ada code counter class. +/*! +* \class CAdaCounter +* +* Defines the Ada code counter class. +*/ +class CAdaCounter : public CCodeCounter +{ +public: + CAdaCounter(); + +protected: + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, bool &found_type, bool &found_is, bool &found_accept, + unsigned int &loopLevel); + void FoundSLOC(results* result, string &strLSLOC, string &strLSLOCBak, bool &found_block, bool &found_forifwhile, + bool &found_end, bool &found_type, bool &found_is, bool &found_accept, bool &trunc_flag); +}; + +#endif diff --git a/src/CBashCounter.cpp b/src/CBashCounter.cpp new file mode 100644 index 0000000..9c44689 --- /dev/null +++ b/src/CBashCounter.cpp @@ -0,0 +1,475 @@ +//! Code counter class methods for the Bash shell script language. +/*! +* \file CBashCounter.cpp +* +* This file contains the code counter class methods for the Bash shell script language. +* This also includes the Korn shell language. +*/ + +#include "CBashCounter.h" + +/*! +* Constructs a CBashCounter object. +*/ +CBashCounter::CBashCounter() +{ + classtype = BASH; + language_name = "Bash"; + + file_extension.push_back(".sh"); + file_extension.push_back(".ksh"); + + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + QuoteEscapeFront = '\\'; + ContinueLine = "\\"; + LineCommentStart.push_back("#"); + + exclude_keywords.push_back("done"); + exclude_keywords.push_back("esac"); + exclude_keywords.push_back("fi"); + + continue_keywords.push_back("do"); + continue_keywords.push_back("else"); + continue_keywords.push_back("then"); + + data_name_list.push_back("declare"); + data_name_list.push_back("local"); + data_name_list.push_back("type"); + data_name_list.push_back("typeset"); + + exec_name_list.push_back("alias"); + exec_name_list.push_back("awk"); + exec_name_list.push_back("bind"); + exec_name_list.push_back("break"); + exec_name_list.push_back("builtin"); + exec_name_list.push_back("caller"); + exec_name_list.push_back("case"); + exec_name_list.push_back("cd"); + exec_name_list.push_back("command"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("coproc"); + exec_name_list.push_back("dirs"); + exec_name_list.push_back("echo"); + exec_name_list.push_back("elif"); + exec_name_list.push_back("enable"); + exec_name_list.push_back("eval"); + exec_name_list.push_back("exec"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("export"); + exec_name_list.push_back("for"); + exec_name_list.push_back("function"); + exec_name_list.push_back("getopts"); + exec_name_list.push_back("hash"); + exec_name_list.push_back("if"); + exec_name_list.push_back("let"); + exec_name_list.push_back("mapfile"); + exec_name_list.push_back("popd"); + exec_name_list.push_back("printf"); + exec_name_list.push_back("pushd"); + exec_name_list.push_back("pwd"); + exec_name_list.push_back("read"); + exec_name_list.push_back("readarray"); + exec_name_list.push_back("readonly"); + exec_name_list.push_back("return"); + exec_name_list.push_back("select"); + exec_name_list.push_back("set"); + exec_name_list.push_back("shift"); + exec_name_list.push_back("source"); + exec_name_list.push_back("test"); + exec_name_list.push_back("time"); + exec_name_list.push_back("times"); + exec_name_list.push_back("trap"); + exec_name_list.push_back("ulimit"); + exec_name_list.push_back("umask"); + exec_name_list.push_back("unalias"); + exec_name_list.push_back("unset"); + exec_name_list.push_back("until"); + exec_name_list.push_back("while"); + + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("^"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("--"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("elif"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("select"); + cmplx_cond_list.push_back("until"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("!"); + cmplx_logic_list.push_back("~"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back("-lt"); + cmplx_logic_list.push_back("-gt"); + cmplx_logic_list.push_back("-ge"); + cmplx_logic_list.push_back("-le"); + cmplx_logic_list.push_back("-eq"); + cmplx_logic_list.push_back("-ne"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Perform preprocessing of file lines before counting. +* +* \param fmap list of file lines +* +* \return method status +*/ +int CBashCounter::PreCountProcess(filemap* fmap) +{ + filemap::iterator fit; + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + if (fit->line.empty()) + continue; + for (size_t i = fit->line.length() - 1; i > 0; i--) + { + // replace $# and ${# with $ to avoid determination of a comment + if (fit->line[i] == '#' && (fit->line[i-1] == '$' || fit->line[i-1] == '{')) + fit->line[i] = '$'; + } + } + return 0; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CBashCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + filemap::iterator fit, fitbak; + string line, lineBak; + + bool data_continue = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + string str; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + StringVector loopLevel; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + lineBak = fitbak->line; + + // do not process blank lines (blank_line/comment_line/directive) + if (!CUtil::CheckBlank(line)) + { + // process logical SLOC + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, data_continue, + temp_lines, phys_exec_lines, phys_data_lines, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + // update physical SLOC lines + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param loopLevel nested loop level +*/ +void CBashCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, StringVector &loopLevel) +{ + size_t start, end; + size_t i = 0, m, strSize; + bool trunc_flag = false, found; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string str, spc; + unsigned int cnt = 0; + + string tmp = CUtil::TrimString(line); + string tmpBak = CUtil::TrimString(lineBak); + start = 0; + + // skip whole line '{' or '}' + if (tmp == "{" || tmp == "}") + { + strLSLOC = strLSLOCBak = ""; + phys_exec_lines++; + temp_lines = 0; + return; + } + + // trim trailing '{' + if (tmp[tmp.length() - 1] == '{') + { + tmp = CUtil::TrimString(tmp.substr(0, tmp.length() - 1)); + tmpBak = CUtil::TrimString(tmpBak.substr(0, tmpBak.length() - 1)); + } + + // there may be more than 1 logical SLOC in this line + while (start < tmp.length()) + { + // check for semicolon to denote end of SLOC + end = tmp.find(";", start); + if (end != string::npos) + { + // handle empty statement + if (CUtil::TrimString(tmp.substr(start, end - start + 1)) == ";") + { + start = end + 1; + strLSLOC = strLSLOCBak = ""; + temp_lines = 0; + if (tmp == ";") + phys_exec_lines++; + continue; + } + + // handle for (( ; ; )) + i = CUtil::FindKeyword(tmp, "for", start, end); + if (i != string::npos) + { + i += 3; + i = tmp.find("((", i); + if (i != string::npos && i < end) + { + i += 2; + i = tmp.find("))", i); + if (i != string::npos) + { + i += 2; + end = tmp.find(";", i); + if (end == string::npos) + end = tmp.length() - 1; + } + else + end = tmp.length() - 1; + } + } + + // handle case ';;' or ';&' or ';;&' + if (end < tmp.length() - 1) + { + if (tmp[end + 1] == ';' || tmp[end + 1] == '&') + end++; + if (end < tmp.length() - 2 && tmp[end + 2] == '&') + end++; + } + } + else + end = tmp.length() - 1; + + // process nested loops + if (print_cmplx) + { + str = CUtil::TrimString(tmp.substr(start, end - start + 1)); + if (CUtil::FindKeyword(str, "for") != string::npos + || CUtil::FindKeyword(str, "while") != string::npos + || CUtil::FindKeyword(str, "until")!= string::npos + || CUtil::FindKeyword(str, "select")!= string::npos) + { + if (CUtil::FindKeyword(str, "for") != string::npos) + loopLevel.push_back("for"); + else if (CUtil::FindKeyword(str, "while")!= string::npos) + loopLevel.push_back("while"); + else if (CUtil::FindKeyword(str, "until") != string::npos) + loopLevel.push_back("until"); + else if (CUtil::FindKeyword(str, "select") != string::npos) + loopLevel.push_back(""); + + // record nested loop level + if (CUtil::FindKeyword(str, "select") == string::npos) + { + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + if (CUtil::FindKeyword(str, "done") != string::npos && loopLevel.size() > 0) + loopLevel.pop_back(); + } + + // check for line containing excluded keywords + for (StringVector::iterator it = exclude_keywords.begin(); it != exclude_keywords.end(); it++) + { + i = CUtil::FindKeyword(tmp, (*it), start, end); + if (i != string::npos) + { + // strip specified keyword and skip if empty + start = i + (*it).length(); + if (CUtil::CheckBlank(CUtil::TrimString(tmp.substr(start, end - start)))) + start = end + 1; + break; + } + } + if (start > end) + { + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + continue; + } + + // check for continuation words + found = false; + if (tmp[end] == ';') + str = CUtil::TrimString(tmp.substr(start, end - start)); + else + str = CUtil::TrimString(tmp.substr(start, end - start + 1)); + for (StringVector::iterator it = continue_keywords.begin(); it != continue_keywords.end(); it++) + { + if (str == (*it)) + { + found = true; + strLSLOC += str + " "; + strLSLOCBak += str + " "; + start = end + 1; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + temp_lines = 0; + } + } + if (found) + continue; + + // check for line continuation + if (tmp[end] == '\\') + { + // strip off trailing (\) + strSize = CUtil::TruncateLine(end - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + spc = ""; + str = tmp.substr(start, strSize); + for (m = str.length() - 1; m > 0; m--) + { + if (str[m] == ' ') + spc += " "; + else + break; + } + if (m == 0) + { + if (str[0] == ' ') + spc += " "; + } + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)) + spc; + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)) + spc; + } + start = end + 1; + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue == true) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + } + else + { + // save LSLOC + if (tmp[end] == ';') + { + // don't trim if ';;' + if (tmp.length() > 1 && tmp[end - 1] == ';') + strSize = CUtil::TruncateLine(end - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + else + strSize = CUtil::TruncateLine(end - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + } + else + strSize = CUtil::TruncateLine(end - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)); + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)); + } + start = end + 1; + if (strLSLOCBak.length() > 0) + { + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + // add a logical SLOC + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true || cnt > 0) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + else if (data_continue == true) + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + } + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + } + } +} diff --git a/src/CBashCounter.h b/src/CBashCounter.h new file mode 100644 index 0000000..b12476c --- /dev/null +++ b/src/CBashCounter.h @@ -0,0 +1,35 @@ +//! Code counter class definition for the Bash shell script language. +/*! +* \file CBashCounter.h +* +* This file contains the code counter class definition for the Bash shell script language. +* This also includes the Korn shell language. +*/ + +#ifndef CBashCounter_h +#define CBashCounter_h + +#include "CCodeCounter.h" + +//! Bash shell script code counter class. +/*! +* \class CBashCounter +* +* Defines the Bash shell script code counter class. +*/ +class CBashCounter : public CCodeCounter +{ +public: + CBashCounter(); + +protected: + virtual int PreCountProcess(filemap* fmap); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, StringVector &loopLevel); + + StringVector continue_keywords; //!< List of keywords to continue to next line +}; + +#endif diff --git a/src/CCCounter.cpp b/src/CCCounter.cpp new file mode 100644 index 0000000..ce5707f --- /dev/null +++ b/src/CCCounter.cpp @@ -0,0 +1,212 @@ +//! Code counter class methods for the C/C++ languages. +/*! +* \file CCCounter.cpp +* +* This file contains the code counter class methods for the C/C++ languages. +* +* Changed from UCC 2013_04 release by Randy Maxwell +* Changes started on 2015_06_02 +* Changes ended on 2015_06_02 +* Addition of file extensions .inl .inc to support working with Linux and Boost for example +* .cxx .hxx as comments ready for anyone wanting Legacy C++ file support +*/ + +#include "CCCounter.h" + +/*! +* Constructs a CCCounter object. +*/ +CCCounter::CCCounter() +{ + classtype = C_CPP; + language_name = "C_CPP"; + + file_extension.push_back(".c"); + file_extension.push_back(".cc"); + file_extension.push_back(".cpp"); + // file_extension.push_back(".cxx"); // Legacy C++ Enable if you want + file_extension.push_back(".inl"); // inline files for example see Boost libraries + + // These file types are not included when doing Cyclomatic Complexity calculations + file_extension.push_back(".h"); + file_extension.push_back(".hh"); + file_extension.push_back(".hpp"); + //file_extension.push_back(".hxx"); // Legacy C++ Enable if you want + file_extension.push_back(".inc"); // include files for example see Linux + + directive.push_back("#define"); + directive.push_back("#dictionary"); + directive.push_back("#error"); + directive.push_back("#if"); + directive.push_back("#ifdef"); + directive.push_back("#ifndef"); + directive.push_back("#else"); + directive.push_back("#elif"); + directive.push_back("#endif"); + directive.push_back("#import"); + directive.push_back("#include"); + directive.push_back("#line"); + directive.push_back("#module"); + directive.push_back("#pragma"); + directive.push_back("#undef"); + directive.push_back("#using"); + + data_name_list.push_back("asm"); + data_name_list.push_back("auto"); + data_name_list.push_back("bool"); + data_name_list.push_back("char"); + data_name_list.push_back("class"); + data_name_list.push_back("const"); + data_name_list.push_back("double"); + data_name_list.push_back("enum"); + data_name_list.push_back("explicit"); + data_name_list.push_back("extern"); + data_name_list.push_back("FILE"); + data_name_list.push_back("float"); + data_name_list.push_back("friend"); + data_name_list.push_back("inline"); + data_name_list.push_back("int"); + data_name_list.push_back("long"); + data_name_list.push_back("mutable"); + data_name_list.push_back("namespace"); + data_name_list.push_back("operator"); + data_name_list.push_back("register"); + data_name_list.push_back("short"); + data_name_list.push_back("static"); + data_name_list.push_back("string"); + data_name_list.push_back("struct"); + data_name_list.push_back("template"); + data_name_list.push_back("typedef"); + data_name_list.push_back("union"); + data_name_list.push_back("unsigned"); + data_name_list.push_back("using"); + data_name_list.push_back("virtual"); + data_name_list.push_back("void"); + data_name_list.push_back("volatile"); + data_name_list.push_back("wchar_t"); + + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("cerr"); + exec_name_list.push_back("cin"); + exec_name_list.push_back("clog"); + exec_name_list.push_back("const_cast"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("cout"); + exec_name_list.push_back("default"); + exec_name_list.push_back("delete"); + exec_name_list.push_back("do"); + exec_name_list.push_back("dynamic_cast"); + exec_name_list.push_back("else"); + exec_name_list.push_back("entry"); + exec_name_list.push_back("for"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("new"); + exec_name_list.push_back("reinterpret_cast"); + exec_name_list.push_back("return"); + exec_name_list.push_back("sizeof"); + exec_name_list.push_back("stderr"); + exec_name_list.push_back("stdin"); + exec_name_list.push_back("stdout"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("static_cast"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("try"); + exec_name_list.push_back("typeid"); + exec_name_list.push_back("while"); + + math_func_list.push_back("abs"); + math_func_list.push_back("cbrt"); + math_func_list.push_back("ceil"); + math_func_list.push_back("copysign"); + math_func_list.push_back("erf"); + math_func_list.push_back("erfc"); + math_func_list.push_back("exp"); + math_func_list.push_back("exp2"); + math_func_list.push_back("expm1"); + math_func_list.push_back("fabs"); + math_func_list.push_back("fdim"); + math_func_list.push_back("floor"); + math_func_list.push_back("fma"); + math_func_list.push_back("fmax"); + math_func_list.push_back("fmin"); + math_func_list.push_back("fmod"); + math_func_list.push_back("frexp"); + math_func_list.push_back("hypot"); + math_func_list.push_back("ilogb"); + math_func_list.push_back("ldexp"); + math_func_list.push_back("lgamma"); + math_func_list.push_back("llrint"); + math_func_list.push_back("lrint"); + math_func_list.push_back("llround"); + math_func_list.push_back("lround"); + math_func_list.push_back("modf"); + math_func_list.push_back("nan"); + math_func_list.push_back("nearbyint"); + math_func_list.push_back("nextafter"); + math_func_list.push_back("nexttoward"); + math_func_list.push_back("pow"); + math_func_list.push_back("remainder"); + math_func_list.push_back("remquo"); + math_func_list.push_back("rint"); + math_func_list.push_back("round"); + math_func_list.push_back("scalbln"); + math_func_list.push_back("scalbn"); + math_func_list.push_back("sqrt"); + math_func_list.push_back("tgamma"); + math_func_list.push_back("trunc"); + + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + trig_func_list.push_back("acos"); + trig_func_list.push_back("acosh"); + trig_func_list.push_back("asinh"); + trig_func_list.push_back("atanh"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + log_func_list.push_back("log1p"); + log_func_list.push_back("log2"); + log_func_list.push_back("logb"); + + cmplx_preproc_list.push_back("#define"); + cmplx_preproc_list.push_back("#dictionary"); + cmplx_preproc_list.push_back("#elif"); + cmplx_preproc_list.push_back("#else"); + cmplx_preproc_list.push_back("#endif"); + cmplx_preproc_list.push_back("#error"); + cmplx_preproc_list.push_back("#if"); + cmplx_preproc_list.push_back("#ifdef"); + cmplx_preproc_list.push_back("#ifndef"); + cmplx_preproc_list.push_back("#import"); + cmplx_preproc_list.push_back("#include"); + cmplx_preproc_list.push_back("#line"); + cmplx_preproc_list.push_back("#module"); + cmplx_preproc_list.push_back("#pragma"); + cmplx_preproc_list.push_back("#undef"); + cmplx_preproc_list.push_back("#using"); + + cmplx_pointer_list.push_back("->"); + + cmplx_cyclomatic_list.push_back("if"); + cmplx_cyclomatic_list.push_back("case"); + cmplx_cyclomatic_list.push_back("while"); + cmplx_cyclomatic_list.push_back("for"); + cmplx_cyclomatic_list.push_back("catch"); + cmplx_cyclomatic_list.push_back("?"); + + skip_cmplx_cyclomatic_file_extension_list.push_back(".h"); + skip_cmplx_cyclomatic_file_extension_list.push_back(".hh"); + skip_cmplx_cyclomatic_file_extension_list.push_back(".hpp"); + //skip_cmplx_cyclomatic_file_extension_list.push_back(".hxx"); // Legacy C++ Enable if you want + skip_cmplx_cyclomatic_file_extension_list.push_back(".inc"); +} diff --git a/src/CCCounter.h b/src/CCCounter.h new file mode 100644 index 0000000..33085ba --- /dev/null +++ b/src/CCCounter.h @@ -0,0 +1,25 @@ +//! Code counter class definition for the C/C++ languages. +/*! +* \file CCCounter.h +* +* This file contains the code counter class definition for the C/C++ languages. +*/ + +#ifndef CCCounter_h +#define CCCounter_h + +#include "CCJavaCsCounter.h" + +//! C/C++ code counter class. +/*! +* \class CCCounter +* +* Defines the C/C++ code counter class. +*/ +class CCCounter : public CCJavaCsCounter +{ +public: + CCCounter(); +}; + +#endif diff --git a/src/CCFScriptCounter.cpp b/src/CCFScriptCounter.cpp new file mode 100644 index 0000000..ac0b332 --- /dev/null +++ b/src/CCFScriptCounter.cpp @@ -0,0 +1,516 @@ +//! Code counter class methods for the CFScript language. +/*! +* \file CCFScriptCounter.cpp +* +* This file contains the code counter class methods for the CFScript language. +*/ + +#include "CCFScriptCounter.h" + +/*! +* Constructs a CCFScriptCounter object. +*/ +CCFScriptCounter::CCFScriptCounter() +{ + classtype = CFSCRIPT; + language_name = "CFScript"; + casesensitive = false; + + file_extension.push_back(".*cfs"); + + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + LineCommentStart.push_back("//"); + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + + data_name_list.push_back("function"); + data_name_list.push_back("import"); + data_name_list.push_back("include"); + data_name_list.push_back("interface"); + data_name_list.push_back("property"); + data_name_list.push_back("var"); + + exec_name_list.push_back("abort"); + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("component"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("createobject"); + exec_name_list.push_back("default"); + exec_name_list.push_back("else"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("finally"); + exec_name_list.push_back("for"); + exec_name_list.push_back("if"); + exec_name_list.push_back("location"); + exec_name_list.push_back("lock"); + exec_name_list.push_back("new"); + exec_name_list.push_back("param"); + exec_name_list.push_back("pageecoding"); + exec_name_list.push_back("rethrow"); + exec_name_list.push_back("return"); + exec_name_list.push_back("savecontent"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("thread"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("trace"); + exec_name_list.push_back("transaction"); + exec_name_list.push_back("try"); + exec_name_list.push_back("while"); + exec_name_list.push_back("writedump"); + exec_name_list.push_back("writelog"); + exec_name_list.push_back("writeoutput"); + + math_func_list.push_back("abs"); + math_func_list.push_back("arrayavg"); + math_func_list.push_back("arraysum"); + math_func_list.push_back("ceiling"); + math_func_list.push_back("decrementvalue"); + math_func_list.push_back("exp"); + math_func_list.push_back("fix"); + math_func_list.push_back("incrementvalue"); + math_func_list.push_back("int"); + math_func_list.push_back("max"); + math_func_list.push_back("min"); + math_func_list.push_back("mod"); + math_func_list.push_back("pi"); + math_func_list.push_back("precisionevaluate"); + math_func_list.push_back("rand"); + math_func_list.push_back("randomize"); + math_func_list.push_back("randrange"); + math_func_list.push_back("round"); + math_func_list.push_back("sgn"); + math_func_list.push_back("sqr"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atn"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("tan"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("**"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("switch"); + cmplx_cond_list.push_back("while"); + cmplx_cond_list.push_back("?"); + + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back("eq"); + cmplx_logic_list.push_back("neq"); + cmplx_logic_list.push_back("gt"); + cmplx_logic_list.push_back("lt"); + cmplx_logic_list.push_back("gte"); + cmplx_logic_list.push_back("lte"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCFScriptCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int paren_count = 0; + bool for_flag = false; + bool found_forifwhile = false; + bool found_while = false; + char prev_char = 0; + bool data_continue = false; + bool inArrayDec = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int openBrackets = 0; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + + // two strings used for string match + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning(for searching keywords + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, paren_count, for_flag, found_forifwhile, found_while, + prev_char, data_continue, temp_lines, phys_exec_lines, phys_data_lines, inArrayDec, + openBrackets, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param forflag found for flag +* \param found_forifwhile found for, if, or while flag +* \param found_while found while flag +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param inArrayDec marks an array declaration +* \param openBrackets number of open brackets (no matching close bracket) +* \param loopLevel nested loop level +*/ +void CCFScriptCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, bool &inArrayDec, + unsigned int &openBrackets, StringVector &loopLevel) +{ + // paren_cnt is used with 'for' statement only + size_t start = 0; //starting index of the working string + size_t i = 0, strSize; + bool found_do, found_try, found_else, trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + unsigned int cnt = 0; + + string tmp = CUtil::TrimString(strLSLOC); + + // do, try + found_do = (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) != string::npos); + found_try = (CUtil::FindKeyword(tmp, "try", 0, TO_END_OF_STRING, false) != string::npos); + // else is treated differently, else is included in SLOC, do and try are not + found_else = (CUtil::FindKeyword(tmp, "else", 0, TO_END_OF_STRING, false) != string::npos); + + while (i < line.length()) // there may be more than 1 logical SLOC in this line + { + switch (line[i]) + { + case ';': case '{': // LSLOC terminators + // ';' for normal executable or declaration statement + // '{' for starting a function or 'do' statement or a block (which is counted) + + // get the previous logical mark until i-1 index is the new LSLOC + // except 'do' precedes '{' + // except '}' precedes ';' ?? + if (paren_cnt > 0 && line[i] == ';') // do nothing inside 'for' statement + break; + + // record open bracket for nested loop processing + if (print_cmplx) + { + if (line[i] == '{') + { + openBrackets++; + if ((unsigned int)loopLevel.size() < openBrackets) + loopLevel.push_back(""); + } + else + { + if ((unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + } + } + + // case 'while(...);', 'while(...) {', and '} while(...);' + // this case is handled in case ')' + if (found_while && found_forifwhile) + { + found_while = false; + found_forifwhile = false; + start = i + 1; + break; + } + + if (line[i] == '{') + { + if (prev_char == '=') inArrayDec = true; + if (inArrayDec) break; // continue until seeing ';' + + // case for(...); and if (...) { + if (found_forifwhile) // these specials are handled + { + found_forifwhile = false; + start = i + 1; + break; + } + + // check if 'do' precedes '{' + if (!found_do && !found_try && !found_else) + { + // find for 'do' in string before tmp string + tmp = CUtil::TrimString(line.substr(start, i - start)); + found_do = (tmp == "do"); // found 'do' statement + found_try = (tmp == "try"); // found 'try' statement + // same as else + found_else = (tmp == "else"); // found 'else' statement + } + if (found_do || found_try || found_else) + { + if (found_do && print_cmplx) + { + if (loopLevel.size() > 0) loopLevel.pop_back(); + loopLevel.push_back("do"); + } + found_do = false; + found_try = false; + if (!found_else) + { + // everything before 'do', 'try' are cleared + strLSLOC = ""; + strLSLOCBak = ""; + start = i + 1; + } + break; // do not store '{' following 'do' + } + } + + if (line[i] == ';' && prev_char == '}') // wrong, e.g., a[]={1,2,3}; + { + // check if in array declaration or not + // if no, skip, otherwise, complete the SLOC containing array declaration + if (!inArrayDec) + { + start = i + 1; + break; + } + } + + inArrayDec = false; + + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count, false); + + temp_lines++; + if (data_continue == true && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (cnt > 0 && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + } + else if (data_continue == true && line[i] == ';') + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + start = i + 1; + + break; + case '(': + if (forflag) + paren_cnt++; + else + { + // handle 'for', 'while', 'if' the same way + tmp = CUtil::TrimString(line.substr(start,i)); + if (CUtil::FindKeyword(tmp, "for", 0, TO_END_OF_STRING, false) != string::npos + || CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false)!= string::npos + || CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) != string::npos) + { + forflag = true; + paren_cnt++; + + if (print_cmplx && (unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + + if (CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false)!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("while"); + found_while = true; + } + else if (print_cmplx && CUtil::FindKeyword(tmp, "for", 0, TO_END_OF_STRING, false) != string::npos) + { + loopLevel.push_back("for"); + } + + // record nested loop level + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) == string::npos) + { + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + } + } + break; + case ')': + if (forflag) + { + if (paren_cnt > 0) + paren_cnt--; + if (paren_cnt == 0) + { + // handle 'for', 'foreach', 'while', 'if' + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = i + 1; + found_forifwhile = true; + forflag = false; + } + } + break; + case '}': + // skip '}' when found ';' and then '}' because '{' is counted already + // also, {} is also skipped, counted + if (prev_char == ';' || prev_char == '{' || prev_char == '}') + if (!inArrayDec) start = i + 1; + + // record close bracket for nested loop processing + if (print_cmplx) + { + // record close bracket for nested loop processing + if (openBrackets > 0) + openBrackets--; + if (loopLevel.size() > 0) + loopLevel.pop_back(); + } + + break; + } + + if (line[i] != ' ' && line[i] != '\t') + { + // if ;}}} --> don't count }}} at all + // also, if {}}} --> don't count }}} at all + //if ( !(line[i] == '}' && (prev_char == ';' || prev_char == '{'))) // see case '}' above + prev_char = line[i]; + + // change to not found if a char appears before + if (line[i] != ')' && found_forifwhile) + found_forifwhile = false; + } + i++; + + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + } + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL, false); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} diff --git a/src/CCFScriptCounter.h b/src/CCFScriptCounter.h new file mode 100644 index 0000000..2a0b748 --- /dev/null +++ b/src/CCFScriptCounter.h @@ -0,0 +1,32 @@ +//! Code counter class definition for the CFScript language. +/*! +* \file CCFScriptCounter.h +* +* This file contains the code counter class definition for the CFScript language. +*/ + +#ifndef CCFScriptCounter_h +#define CCFScriptCounter_h + +#include "CCodeCounter.h" + +//! CFScript code counter class. +/*! +* \class CCFScriptCounter +* +* Defines the CFScript code counter class. +*/ +class CCFScriptCounter : public CCodeCounter +{ +public: + CCFScriptCounter(); + +protected: + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, bool &inArrayDec, + unsigned int &openBrackets, StringVector &loopLevel); +}; + +#endif diff --git a/src/CCJavaCsCounter.cpp b/src/CCJavaCsCounter.cpp new file mode 100644 index 0000000..2aa37a6 --- /dev/null +++ b/src/CCJavaCsCounter.cpp @@ -0,0 +1,676 @@ +//! Code counter class methods for the C/C++, Java, and C# languages. +/*! +* \file CCJavaCsCounter.cpp +* +* This file contains the code counter class methods for the C/C++, Java, and C# languages. +*/ + +#include "CCJavaCsCounter.h" +#include + +/*! +* Constructs a CCJavaCsCounter object. +*/ +CCJavaCsCounter::CCJavaCsCounter() +{ + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + QuoteEscapeFront = '\\'; + ContinueLine = "\\"; + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + + LineCommentStart.push_back("//"); + + // these properties are common for C/C++, Java, C# + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("^"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("--"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back(">>"); + cmplx_calc_list.push_back("<<"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("else if"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("switch"); + cmplx_cond_list.push_back("while"); + cmplx_cond_list.push_back("?"); + + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("!"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCJavaCsCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // ensures the keyword stands alone, avoid, e.g., #ifabc + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != ',' && iter->line[iter->line.length()-1] != '\\') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCJavaCsCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int paren_count = 0; + bool for_flag = false; + bool found_for = false; + bool found_forifwhile = false; + bool found_while = false; + char prev_char = 0; + bool data_continue = false; + bool inArrayDec = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int openBrackets = 0; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning (for searching keywords) + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, paren_count, for_flag, found_forifwhile, found_while, + prev_char, data_continue, temp_lines, phys_exec_lines, phys_data_lines, inArrayDec, found_for, + openBrackets, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param forflag found for flag +* \param found_forifwhile found for, if, or while flag +* \param found_while found while flag +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param inArrayDec marks an array declaration +* \param found_for found for loop +* \param openBrackets number of open brackets (no matching close bracket) +* \param loopLevel nested loop level +*/ +void CCJavaCsCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + bool &inArrayDec, bool &found_for, unsigned int &openBrackets, StringVector &loopLevel) +{ + // paren_cnt is used with 'for' statement only + size_t start = 0; //starting index of the working string + size_t i = 0, strSize; + bool found_do, found_try, found_else, trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + string dataExclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:()."; // avoid double count of casts as data and executable lines (e.g. set { m_uiValue = (uint)value; } + + unsigned int cnt = 0; + + string tmp = CUtil::TrimString(strLSLOC); + + // do, try + found_do = (CUtil::FindKeyword(tmp, "do") != string::npos); + found_try = (CUtil::FindKeyword(tmp, "try") != string::npos); + // else is treated differently, else is included in SLOC, do and try are not + found_else = (CUtil::FindKeyword(tmp, "else") != string::npos); + + // there may be more than 1 logical SLOC in this line + while (i < line.length()) + { + switch (line[i]) + { + case ';': case '{': // LSLOC terminators + // ';' for normal executable or declaration statement + // '{' for starting a function or 'do' stmt or a block (which is counted) + // get the previous logical mark until i-1 index is the new LSLOC + // except 'do' precedes '{' + // except '}' precedes ';' ?? + // do nothing inside 'for' statement + if (found_for == true && paren_cnt > 0 && line[i] == ';') + break; + + // record open bracket for nested loop processing + if (print_cmplx) + { + if (line[i] == '{') + { + openBrackets++; + if ((unsigned int)loopLevel.size() < openBrackets) + loopLevel.push_back(""); + } + else + { + if ((unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + } + } + + // case 'while(...);', 'while(...) {', and '} while(...);' + // this case is handled in case ')' + if (found_while && found_forifwhile) + { + found_while = false; + found_forifwhile = false; + start = i + 1; + break; + } + + if (line[i] == '{') + { + if (prev_char == '=') + inArrayDec = true; + + // continue until seeing ';' + if (inArrayDec) + break; + + // case for(...); and if (...) { + // these specials are handled + if (found_forifwhile) + { + found_forifwhile = false; + start = i + 1; + break; + } + + // check if 'do' precedes '{' + if (!found_do && !found_try && !found_else) + { + // find for 'do' in string before tmp string + tmp = CUtil::TrimString(line.substr(start, i - start)); + found_do = (tmp == "do"); // found 'do' statement + found_try = (tmp == "try"); // found 'try' statement + // same as else + found_else = (tmp == "else"); // found 'else' statement + } + if (found_do || found_try || found_else) + { + if (found_do && print_cmplx) + { + if (loopLevel.size() > 0) loopLevel.pop_back(); + loopLevel.push_back("do"); + } + found_do = false; + found_try = false; + if (!found_else) + { + // everything before 'do', 'try' are cleared + strLSLOC = ""; + strLSLOCBak = ""; + start = i + 1; + } + break; // do not store '{' following 'do' + } + } + + // wrong, e.g., a[]={1,2,3}; + if (line[i] == ';' && prev_char == '}') + { + // check if in array declaration or not + // if no, skip, otherwise, complete the SLOC containing array declaration + if (!inArrayDec) + { + start = i + 1; + break; + } + } + + inArrayDec = false; + + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, dataExclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (cnt > 0 && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + } + else if (data_continue == true && line[i] == ';') + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + start = i + 1; + + // reset some flagging parameters + forflag = false; + paren_cnt = 0; + found_while = false; + found_forifwhile = false; + found_for = false; + + break; + case '(': + if (forflag) + paren_cnt++; + else + { + // handle 'for', 'foreach', 'while', 'if' the same way + tmp = CUtil::TrimString(line.substr(start, i)); + if (CUtil::FindKeyword(tmp, "for") != string::npos + || CUtil::FindKeyword(tmp, "foreach") != string::npos + || CUtil::FindKeyword(tmp, "while") != string::npos + || CUtil::FindKeyword(tmp, "if") != string::npos) + { + forflag = true; + paren_cnt++; + + if (print_cmplx && (unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + + if (CUtil::FindKeyword(tmp, "for") != string::npos) + { + if (print_cmplx) + loopLevel.push_back("for"); + found_for = true; + } + else if (CUtil::FindKeyword(tmp, "while") != string::npos) + { + if (print_cmplx) + loopLevel.push_back("while"); + found_while = true; + } + else if (print_cmplx && CUtil::FindKeyword(tmp, "foreach") != string::npos) + loopLevel.push_back("foreach"); + + // record nested loop level + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "if") == string::npos) + { + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + } + } + break; + case ')': + if (forflag) + { + if (paren_cnt > 0) + paren_cnt--; + if (paren_cnt == 0) + { + // handle 'for', 'foreach', 'while', 'if' + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = i + 1; + found_forifwhile = true; + forflag = false; + found_for = false; + } + } + break; + case '}': + // skip '}' when found ';' and then '}' because '{' is counted already + // also, {} is also skipped, counted + if (prev_char == ';' || prev_char == '{' || prev_char == '}') + if (!inArrayDec) start = i + 1; + + // record close bracket for nested loop processing + if (print_cmplx) + { + if (openBrackets > 0) + openBrackets--; + if (loopLevel.size() > 0) + loopLevel.pop_back(); + } + break; + } + + if (line[i] != ' ' && line[i] != '\t') + { + // if ;}}} --> don't count }}} at all + // also, if {}}} --> don't count }}} at all + // if ( !(line[i] == '}' && (prev_char == ';' || prev_char == '{'))) // see case '}' above + prev_char = line[i]; + + // change to not found if a char appears before + if (line[i] != ')' && found_forifwhile) + found_forifwhile = false; + } + i++; + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + + // drop continuation symbol + if (strLSLOC[strLSLOC.length()-1] == '\\') + { + strLSLOC = strLSLOC.substr(0, strLSLOC.length()-1); + strLSLOCBak = strLSLOCBak.substr(0, strLSLOCBak.length()-1); + } + } + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} + +/*! +* Parses lines for function/method names. +* +* \param line line to be processed +* \param lastline last line processed +* \param functionStack stack of functions +* \param functionName function name found +* \param functionCount function count found +* +* \return 1 if function name is found +*/ +int CCJavaCsCounter::ParseFunctionName(const string &line, string &lastline, + filemap &functionStack, string &functionName, unsigned int &functionCount) +{ + string tline, str; + size_t idx, tidx, cnt, cnt2; + unsigned int fcnt, cyclomatic_cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + tline = CUtil::TrimString(line); + idx = tline.find('{'); + if (idx != string::npos) + { + // check whether it is at first index, if yes then function name is at above line + if (idx == 0) + { + lineElement element(++functionCount, lastline); + functionStack.push_back(element); + lastline.erase(); + } + else + { + str = tline.substr(0, idx); + tidx = cnt = cnt2 = 0; + if (str[0] != '(' && str[0] != ':' && (lastline.length() < 1 || lastline[lastline.length() - 1] != ':')) + { + while (tidx != string::npos) + { + tidx = str.find('(', tidx); + if (tidx != string::npos) + { + cnt++; + tidx++; + } + } + if (cnt > 0) + { + tidx = 0; + while (tidx != string::npos) + { + tidx = str.find(')', tidx); + if (tidx != string::npos) + { + cnt2++; + tidx++; + } + } + } + } + // make sure parentheses are closed and no parent class listed + if ((cnt > 0 && cnt == cnt2) || (lastline.length() > 0 && lastline[lastline.length() - 1] == ';')) + lastline = str; + else + lastline += " " + str; + lineElement element(++functionCount, CUtil::TrimString(lastline)); + functionStack.push_back(element); + lastline.erase(); + } + } + else if (tline.length() > 0 && tline[tline.length() - 1] != ';' && + lastline.length() > 0 && lastline[lastline.length() - 1] != ';') + { + // append until all parentheses are closed + tidx = lastline.find('('); + if (tidx != string::npos) + { + cnt = 1; + while (tidx != string::npos) + { + tidx = lastline.find('(', tidx + 1); + if (tidx != string::npos) + cnt++; + } + tidx = lastline.find(')'); + while (tidx != string::npos) + { + cnt++; + tidx = lastline.find(')', tidx + 1); + } + if (cnt % 2 != 0) + lastline += " " + tline; + else + lastline = tline; + } + else + lastline = tline; + } + else + lastline = tline; + + idx = line.find('}'); + if (idx != string::npos && !functionStack.empty()) + { + str = functionStack.back().line; + fcnt = functionStack.back().lineNumber; + functionStack.pop_back(); + idx = str.find('('); + + if (idx != string::npos) + { + // search for cyclomatic complexity keywords and other possible keywords + CUtil::CountTally(str, cmplx_cyclomatic_list, cyclomatic_cnt, 1, exclude, "", "", 0, casesensitive); + if (cyclomatic_cnt <= 0 && CUtil::FindKeyword(str, "switch") == string::npos && + CUtil::FindKeyword(str, "try") == string::npos && CUtil::FindKeyword(str, "finally") == string::npos && + CUtil::FindKeyword(str, "return") == string::npos && str.find('=') == string::npos) + { + functionName = CUtil::ClearRedundantSpaces(str.substr(0, idx)); + functionCount = fcnt; + lastline.erase(); + return 1; + } + } + lastline.erase(); + } + return 0; +} diff --git a/src/CCJavaCsCounter.h b/src/CCJavaCsCounter.h new file mode 100644 index 0000000..e03f91c --- /dev/null +++ b/src/CCJavaCsCounter.h @@ -0,0 +1,35 @@ +//! Code counter class definition for the C/C++, Java, and C# languages. +/*! +* \file CCJavaCsCounter.h +* +* This file contains the code counter class definition for the C/C++, Java, and C# languages. +*/ + +#ifndef CCJavaCsCounter_h +#define CCJavaCsCounter_h + +#include "CCodeCounter.h" + +//! C/C++, Java, and C# code counter class. +/*! +* \class CCJavaCsCounter +* +* Defines the C/C++, Java, and C# code counter class. +*/ +class CCJavaCsCounter : public CCodeCounter +{ +public: + CCJavaCsCounter(); + +protected: + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapmBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapmBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + bool &inArrayDec, bool &found_for, unsigned int &openBrackets, StringVector &loopLevel); + virtual int ParseFunctionName(const string &line, string &lastline, + filemap &functionStack, string &functionName, unsigned int &functionCount); +}; + +#endif diff --git a/src/CCodeCounter.cpp b/src/CCodeCounter.cpp new file mode 100644 index 0000000..e7db713 --- /dev/null +++ b/src/CCodeCounter.cpp @@ -0,0 +1,801 @@ +//! Common code counter methods for sub-classing individual languages. +/*! +* \file CCodeCounter.h +* +* This contains the base class code counter methods for inherited classes individual languages. +* +* Changed from UCC 2013_04 release by Randy Maxwell +* Changes started on 2015_06_06 +* Changes ended on 2015_06_06 +*/ + +#include + +#include "CCodeCounter.h" +#include "UCCGlobals.h" + +/*! +* Constructs a CCodeCounter object. +*/ +CCodeCounter::CCodeCounter() +{ + print_cmplx = false; + lsloc_truncate = DEFAULT_TRUNCATE; + QuoteStart = ""; + QuoteEnd = ""; + QuoteEscapeFront = 0; + QuoteEscapeRear = 0; + ContinueLine = ""; + classtype = UNKNOWN; + language_name = DEF_LANG_NAME; + casesensitive = true; + total_filesA = 0; + total_filesB = 0; + total_dupFilesA = 0; + total_dupFilesB = 0; +} + +/*! +* Destroys a CCodeCounter object. +*/ +CCodeCounter::~CCodeCounter() +{ +} + +/*! +* Initializes the count vectors. +* This method removes the existing content of the vectors and assigns them all zeros +*/ +void CCodeCounter::InitializeCounts() +{ + unsigned int i = 0; + counted_files = 0; + counted_dupFiles = 0; + + directive_count.assign(directive.size(), make_pair(i, i)); + data_name_count.assign(data_name_list.size(), make_pair(i, i)); + exec_name_count.assign(exec_name_list.size(), make_pair(i, i)); + math_func_count.assign(math_func_list.size(), make_pair(i, i)); + trig_func_count.assign(trig_func_list.size(), make_pair(i, i)); + log_func_count.assign(log_func_list.size(), make_pair(i, i)); + + cmplx_calc_count.assign(cmplx_calc_list.size(), make_pair(i, i)); + cmplx_cond_count.assign(cmplx_cond_list.size(), make_pair(i, i)); + cmplx_logic_count.assign(cmplx_logic_list.size(), make_pair(i, i)); + cmplx_preproc_count.assign(cmplx_preproc_list.size(), make_pair(i, i)); + cmplx_assign_count.assign(cmplx_assign_list.size(), make_pair(i, i)); + cmplx_pointer_count.assign(cmplx_pointer_list.size(), make_pair(i, i)); +} + +/*! +* Initializes the count vectors for a result. +* This method removes the existing content of the vectors and assigns them all zeros +*/ +void CCodeCounter::InitializeResultsCounts(results* result) +{ + result->directive_count.assign(directive.size(), 0); + result->data_name_count.assign(data_name_list.size(), 0); + result->exec_name_count.assign(exec_name_list.size(), 0); + result->math_func_count.assign(math_func_list.size(), 0); + result->trig_func_count.assign(trig_func_list.size(), 0); + result->log_func_count.assign(log_func_list.size(), 0); + + result->cmplx_calc_count.assign(cmplx_calc_list.size(), 0); + result->cmplx_cond_count.assign(cmplx_cond_list.size(), 0); + result->cmplx_logic_count.assign(cmplx_logic_list.size(), 0); + result->cmplx_preproc_count.assign(cmplx_preproc_list.size(), 0); + result->cmplx_assign_count.assign(cmplx_assign_list.size(), 0); + result->cmplx_pointer_count.assign(cmplx_pointer_list.size(), 0); +} + +/*! +* Processes and counts the source file. +* +* \param fmap list of file lines +* \param result counter results +* +* \return method status +*/ +int CCodeCounter::CountSLOC(filemap* fmap, results* result) +{ + // backup file content before modifying it (comments and directive lines are cleared) + // fmapBak is same as fmap except that it stores unmodified quoted strings + // fmap has quoted strings replaced with '$' + filemap fmapMod = *fmap; + filemap fmapModBak = *fmap; + + InitializeResultsCounts(result); + + PreCountProcess(&fmapMod); + + CountBlankSLOC(&fmapMod, result); + + CountCommentsSLOC(&fmapMod, result, &fmapModBak); + + if (print_cmplx) + CountComplexity(&fmapMod, result); + + CountDirectiveSLOC(&fmapMod, result, &fmapModBak); + + LanguageSpecificProcess(&fmapMod, result, &fmapModBak); + + return 0; +} + +/*! +* Checks whether the file extension is supported by the language counter. +* +* \param file_name file name +* +* \return whether file extension is supported +*/ +bool CCodeCounter::IsSupportedFileExtension(const string &file_name) +{ + // if Makefile, check whether name equals MAKEFILE since no extension exists + if (classtype == MAKEFILE && file_name.size() >= 8) + { + if (CUtil::ToLower(file_name.substr(file_name.size() - 8)) == "makefile") + return true; + } + size_t idx = file_name.find_last_of("."); + if (idx == string::npos) + return false; + string file_ext = file_name.substr(idx); + file_ext = CUtil::ToLower(file_ext); + if (find(file_extension.begin(), file_extension.end(), file_ext) != file_extension.end()) + { + // if X-Midas/NeXtMidas, parse file to check for startmacro or endmacro (needed since Midas can use .txt or .mm) + if (classtype == XMIDAS || classtype == NEXTMIDAS) + { + string oneline; + ifstream fr(file_name.c_str(), ios::in); + if (!fr.is_open()) + return false; + + // search for "startmacro" (optional) or "endmacro" (required) + while (fr.good() || fr.eof()) + { + getline(fr, oneline); + if ((!fr.good() && !fr.eof()) || (fr.eof() && oneline.empty())) + break; + oneline = CUtil::ToLower(CUtil::TrimString(oneline)); + if (oneline.compare(0, 10, "startmacro") == 0 || oneline.compare(0, 8, "endmacro") == 0) + { + fr.clear(); + fr.close(); + return true; + } + if (!fr.good()) + break; + } + fr.clear(); + fr.close(); + } + else + return true; + } + return false; +} + +/*! +* Retrieves the language output file stream. +* Opens a new stream if it has not been opened already. +* +* \param outputFileNamePrePend name to prepend to the output file +* \param cmd current command line string +* \param csvOutput CSV file stream? (otherwise ASCII text file) +* \param legacyOutput legacy format file stream? (otherwise standard text file) +* +* \return output file stream +*/ +ofstream* CCodeCounter::GetOutputStream(const string &outputFileNamePrePend, const string &cmd, bool csvOutput, bool legacyOutput) +{ + if (csvOutput) + { + if (!output_file_csv.is_open()) + { + string fname = outputFileNamePrePend + language_name + OUTPUT_FILE_NAME_CSV; + output_file_csv.open(fname.c_str(), ofstream::out); + + if (!output_file_csv.is_open()) return NULL; + + CUtil::PrintFileHeader(output_file_csv, "SLOC COUNT RESULTS", cmd); + + CUtil::PrintFileHeaderLine(output_file_csv, "RESULTS FOR " + language_name + " FILES"); + output_file_csv << endl; + output_file_csv << "Total,Blank,Comments,,Compiler,Data,Exec.,Logical,Physical,File,Module" << endl; + output_file_csv << "Lines,Lines,Whole,Embedded,Direct.,Decl.,Instr.,SLOC,SLOC,Type,Name" << endl; + } + return &output_file_csv; + } + else + { + if (!output_file.is_open()) + { + string fname = outputFileNamePrePend + language_name + OUTPUT_FILE_NAME; + output_file.open(fname.c_str(), ofstream::out); + + if (!output_file.is_open()) return NULL; + + CUtil::PrintFileHeader(output_file, "SLOC COUNT RESULTS", cmd); + + CUtil::PrintFileHeaderLine(output_file, "RESULTS FOR " + language_name + " FILES"); + output_file << endl; + if (legacyOutput) + { + output_file << " Total Blank | Comments | Compiler Data Exec. | Logical | File Module" << endl; + output_file << " Lines Lines | Whole Embedded | Direct. Decl. Instr. | SLOC | Type Name" << endl; + output_file << "-----------------+------------------+-------------------------+---------+---------------------------" << endl; + } + else + { + output_file << " Total Blank | Comments | Compiler Data Exec. | Logical Physical | File Module" << endl; + output_file << " Lines Lines | Whole Embedded | Direct. Decl. Instr. | SLOC SLOC | Type Name" << endl; + output_file << "-----------------+------------------+-------------------------+------------------+---------------------------" << endl; + } + } + return &output_file; + } +} + +/*! +* Closes the language output file stream. +*/ +void CCodeCounter::CloseOutputStream() +{ + if (output_file.is_open()) + output_file.close(); + if (output_file_csv.is_open()) + output_file_csv.close(); +} + +/*! +* Finds the first index of one of the characters of strQuote in strline. +* +* \param strline string line +* \param strQuote string of character(s) to find in strline +* \param idx_start index of line character to start search +* \param QuoteEscapeFront quote escape character +* +* \return index of strQuote character in strline +*/ +size_t CCodeCounter::FindQuote(string const &strline, string const &strQuote, size_t idx_start, char QuoteEscapeFront) +{ + size_t min_idx, idx; + min_idx = strline.length(); + for (size_t i = 0; i < strQuote.length(); i++) + { + idx = CUtil::FindCharAvoidEscape(strline, strQuote[i], idx_start, QuoteEscapeFront); + if (idx != string::npos && idx < min_idx) + min_idx = idx; + } + if (min_idx < strline.length()) + return min_idx; + return string::npos; +} + +/*! +* Replaces up to ONE quoted string inside a string starting at idx_start. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CCodeCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + size_t idx_end, idx_quote; + + if (contd) + { + idx_start = 0; + if (strline[0] == CurrentQuoteEnd) + { + idx_start = 1; + contd = false; + return 1; + } + strline[0] = '$'; + } + else + { + // handle two quote chars in some languages, both " and ' may be accepted + idx_start = FindQuote(strline, QuoteStart, idx_start, QuoteEscapeFront); + if (idx_start != string::npos) + { + idx_quote = QuoteStart.find_first_of(strline[idx_start]); + CurrentQuoteEnd = QuoteEnd[idx_quote]; + } + else + { + idx_start = strline.length(); + return 0; + } + } + + idx_end = CUtil::FindCharAvoidEscape(strline, CurrentQuoteEnd, idx_start + 1, QuoteEscapeFront); + if (idx_end == string::npos) + { + idx_end = strline.length() - 1; + strline.replace(idx_start + 1, idx_end - idx_start, idx_end - idx_start, '$'); + contd = true; + idx_start = idx_end + 1; + } + else + { + if ((QuoteEscapeRear) && (strline.length() > idx_end + 1) && (strline[idx_end+1] == QuoteEscapeRear)) + { + strline[idx_end] = '$'; + strline[idx_end+1] = '$'; + } + else + { + contd = false; + strline.replace(idx_start + 1, idx_end - idx_start - 1, idx_end - idx_start - 1, '$'); + idx_start = idx_end + 1; + } + } + return 1; +} + +/*! +* Counts blank lines in a file. +* +* \param fmap list of file lines +* \param result counter results +* +* \return method status +*/ +int CCodeCounter::CountBlankSLOC(filemap* fmap, results* result) +{ + for (filemap::iterator i = fmap->begin(); i != fmap->end(); i++) + { + if (CUtil::CheckBlank(i->line)) + result->blank_lines++; + } + return 1; +} + +/*! +* Counts the number of comment lines, removes comments, and +* replaces quoted strings by special chars, e.g., $ +* All arguments are modified by the method. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCodeCounter::CountCommentsSLOC(filemap* fmap, results* result, filemap *fmapBak) +{ + if (BlockCommentStart.empty() && LineCommentStart.empty()) + return 0; + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + + bool contd = false; + bool contd_nextline; + int comment_type = 0; + /* + comment_type: + 0 : not a comment + 1 : line comment, whole line + 2 : line comment, embedded + 3 : block comment, undecided + 4 : block comment, embedded + */ + + size_t idx_start, idx_end, comment_start; + size_t quote_idx_start; + string curBlckCmtStart, curBlckCmtEnd; + char CurrentQuoteEnd = 0; + bool quote_contd = false; + filemap::iterator itfmBak = fmapBak->begin(); + + quote_idx_start = 0; + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + contd_nextline = false; + + quote_idx_start = 0; + idx_start = 0; + + if (CUtil::CheckBlank(iter->line)) + continue; + if (quote_contd) + { + // Replace quote until next character + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_contd) + continue; + } + + if (contd) + comment_type = 3; + + while (!contd_nextline && idx_start < iter->line.length()) + { + // need to handle multiple quote chars in some languages, both " and ' may be accepted + quote_idx_start = FindQuote(iter->line, QuoteStart, quote_idx_start, QuoteEscapeFront); + comment_start = idx_start; + if (!contd) + FindCommentStart(iter->line, comment_start, comment_type, curBlckCmtStart, curBlckCmtEnd); + + if (comment_start == string::npos && quote_idx_start == string::npos) + break; + + if (comment_start != string::npos) + idx_start = comment_start; + + // if found quote before comment, e.g., "this is quote");//comment + if (quote_idx_start != string::npos && (comment_start == string::npos || quote_idx_start < comment_start)) + { + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_idx_start > idx_start && quote_idx_start != iter->line.length()) + { + // comment delimiter inside quote + idx_start = quote_idx_start; + continue; + } + } + else if (comment_start != string::npos) + { + // comment delimiter starts first + switch (comment_type) + { + case 1: // line comment, definitely whole line + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + contd_nextline = true; + break; + case 2: // line comment, possibly embedded + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + contd_nextline = true; + break; + case 3: // block comment + case 4: + if (contd) + idx_end = iter->line.find(curBlckCmtEnd); + else + idx_end = iter->line.find(curBlckCmtEnd, idx_start + curBlckCmtStart.length()); + + if (idx_end == string::npos) + { + if (comment_type == 3) + { + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + } + else if (comment_type == 4) + { + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + contd = true; + contd_nextline = true; + break; + } + else + { + contd = false; + iter->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + itfmBak->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + if (iter->line.empty()) + result->comment_lines++; + else + { + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + + // quote chars found may be erased as it is inside comment + quote_idx_start = idx_start; + } + break; + default: + cout << "Error in CountCommentsSLOC()" << endl; + break; + } + } + } + } + return 1; +} + +/*! +* Finds a starting position of a comment in a string starting at idx_start. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param comment_type comment type (0=not a comment, 1=whole line, 2=embedded line, 3=whole line block, 4=embedded block) +* \param curBlckCmtStart current block comment start string +* \param curBlckCmtEnd current block comment end string +* +* \return method status +*/ +int CCodeCounter::FindCommentStart(string strline, size_t &idx_start, int &comment_type, + string &curBlckCmtStart, string &curBlckCmtEnd) +{ + size_t idx_line, idx_tmp, idx_block; + string line = strline; + comment_type = 0; + + if (!casesensitive) + line = CUtil::ToLower(line); + + // searching for starting of line comment + idx_line = string::npos; + for (StringVector::iterator i = LineCommentStart.begin(); i != LineCommentStart.end(); i++) + { + idx_tmp = line.find((casesensitive ? (*i) : CUtil::ToLower(*i)), idx_start); + if (idx_tmp < idx_line) idx_line = idx_tmp; + } + + // searching for starting of block comment + idx_block = string::npos; + for (StringVector::iterator i = BlockCommentStart.begin(); i != BlockCommentStart.end(); i++) + { + idx_tmp = strline.find(*i, idx_start); + if (idx_tmp < idx_block) + { + idx_block = idx_tmp; + curBlckCmtStart = *i; + curBlckCmtEnd = *(BlockCommentEnd.begin() + (i - BlockCommentStart.begin())); + } + } + + // see what kind of comment appears first + if (idx_line == string::npos && idx_block == string::npos) + { + comment_type = 0; + idx_start = idx_line; + } + else if (idx_block > idx_line) + { + idx_start = idx_line; + comment_type = idx_start == 0 ? 1 : 2; + } + else + { + idx_start = idx_block; + comment_type = idx_start == 0 ? 3 : 4; + } + return 1; +} + +/*! +* Counts file language complexity based on specified language keywords/characters. +* +* \param fmap list of processed file lines +* \param result counter results +* +* \return method status +*/ +int CCodeCounter::CountComplexity(filemap* fmap, results* result) +{ + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + filemap::iterator fit; + size_t idx; + unsigned int cnt, ret, cyclomatic_cnt = 0, ignore_cyclomatic_cnt = 0, main_cyclomatic_cnt = 0, function_count = 0; + string line, lastline, file_ext, function_name = ""; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + filemap function_stack; + stack cyclomatic_stack; + map function_map; + bool process_cyclomatic_complexity = false; + + // check whether to process cyclomatic complexity + if (cmplx_cyclomatic_list.size() > 0) + { + process_cyclomatic_complexity = true; + if (skip_cmplx_cyclomatic_file_extension_list.size() > 0) + { + idx = result->file_name.find_last_of("."); + if (idx != string::npos) + { + file_ext = result->file_name.substr(idx); + file_ext = CUtil::ToLower(file_ext); + if (find(skip_cmplx_cyclomatic_file_extension_list.begin(), skip_cmplx_cyclomatic_file_extension_list.end(), file_ext) != skip_cmplx_cyclomatic_file_extension_list.end()) + process_cyclomatic_complexity = false; + } + } + } + + // process each line + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + line = fit->line; + + if (CUtil::CheckBlank(line)) + continue; + + line = " " + line; + + // mathematical functions + cnt = 0; + CUtil::CountTally(line, math_func_list, cnt, 1, exclude, "", "", &result->math_func_count, casesensitive); + result->cmplx_math_lines += cnt; + + // trigonometric functions + cnt = 0; + CUtil::CountTally(line, trig_func_list, cnt, 1, exclude, "", "", &result->trig_func_count, casesensitive); + result->cmplx_trig_lines += cnt; + + // logarithmic functions + cnt = 0; + CUtil::CountTally(line, log_func_list, cnt, 1, exclude, "", "", &result->log_func_count, casesensitive); + result->cmplx_logarithm_lines += cnt; + + // calculations + cnt = 0; + CUtil::CountTally(line, cmplx_calc_list, cnt, 1, exclude, "", "", &result->cmplx_calc_count, casesensitive); + result->cmplx_calc_lines += cnt; + + // conditionals + cnt = 0; + CUtil::CountTally(line, cmplx_cond_list, cnt, 1, exclude, "", "", &result->cmplx_cond_count, casesensitive); + result->cmplx_cond_lines += cnt; + + // logical operators + cnt = 0; + CUtil::CountTally(line, cmplx_logic_list, cnt, 1, exclude, "", "", &result->cmplx_logic_count, casesensitive); + result->cmplx_logic_lines += cnt; + + // preprocessor directives + cnt = 0; + CUtil::CountTally(line, cmplx_preproc_list, cnt, 1, exclude, "", "", &result->cmplx_preproc_count, casesensitive); + result->cmplx_preproc_lines += cnt; + + // assignments + cnt = 0; + CUtil::CountTally(line, cmplx_assign_list, cnt, 1, exclude, "", "", &result->cmplx_assign_count, casesensitive); + result->cmplx_assign_lines += cnt; + + // pointers + cnt = 0; + CUtil::CountTally(line, cmplx_pointer_list, cnt, 1, exclude, "", "", &result->cmplx_pointer_count, casesensitive); + result->cmplx_pointer_lines += cnt; + + // cyclomatic complexity + if (process_cyclomatic_complexity) + { + // search for cyclomatic complexity keywords + CUtil::CountTally(line, cmplx_cyclomatic_list, cyclomatic_cnt, 1, exclude, "", "", 0, casesensitive); + + // search for keywords to exclude + if (ignore_cmplx_cyclomatic_list.size() > 0) + CUtil::CountTally(line, ignore_cmplx_cyclomatic_list, ignore_cyclomatic_cnt, 1, exclude, "", "", 0, casesensitive); + + // parse function name if found + ret = ParseFunctionName(line, lastline, function_stack, function_name, function_count); + if (ret != 1 && !cyclomatic_stack.empty() && cyclomatic_stack.size() == function_stack.size()) + { + // remove count stack entry for non-function names + cyclomatic_cnt += cyclomatic_stack.top(); + ignore_cyclomatic_cnt = 0; + cyclomatic_stack.pop(); + } + if (ret == 1) + { + // capture count at end of function + lineElement element(cyclomatic_cnt - ignore_cyclomatic_cnt + 1, function_name); + function_map[function_count] = element; + + if (!function_stack.empty()) + { + // grab previous function from stack to continue + if (!cyclomatic_stack.empty()) + { + cyclomatic_cnt = cyclomatic_stack.top(); + cyclomatic_stack.pop(); + } + } + else + cyclomatic_cnt = 0; + function_name = ""; + ignore_cyclomatic_cnt = 0; + } + else if (ret == 2) + { + // some code doesn't belong to any function + main_cyclomatic_cnt += cyclomatic_cnt - ignore_cyclomatic_cnt; + if (main_cyclomatic_cnt < 1) + main_cyclomatic_cnt = 1; // add 1 for main function here in case no other decision points are found in main + cyclomatic_cnt = ignore_cyclomatic_cnt = 0; + } + else if (!function_stack.empty() && (function_stack.size() > cyclomatic_stack.size() + 1 || (cyclomatic_stack.empty() && function_stack.size() > 1))) + { + // capture previous complexity count from open function + cyclomatic_stack.push(cyclomatic_cnt - ignore_cyclomatic_cnt); + cyclomatic_cnt = ignore_cyclomatic_cnt = 0; + } + } + } + + // done with a file + if (main_cyclomatic_cnt > 0) + { + // add "main" code + lineElement element(main_cyclomatic_cnt, "main"); + function_map[0] = element; + } + else + { + // finish the first function if not closed + while (!function_stack.empty()) + { + function_name = function_stack.back().line; + function_count = function_stack.back().lineNumber; + function_stack.pop_back(); + + if (!function_stack.empty()) + { + // grab previous function from stack to continue + if (!cyclomatic_stack.empty()) + { + cyclomatic_cnt = cyclomatic_stack.top(); + cyclomatic_stack.pop(); + } + } + else + { + // capture count at end of function + lineElement element(cyclomatic_cnt + 1, function_name); + function_map[0] = element; + } + } + } + + // process ordered functions + for (map::iterator it = function_map.begin(); it != function_map.end(); ++it) + result->cmplx_cycfunct_count.push_back(it->second); + + return 1; +} + +/*! +* Processes physical and logical lines. +* This method is typically implemented in the specific language sub-class. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCodeCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* /*fmapBak*/) +{ + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++) + { + if (!CUtil::CheckBlank(iter->line)) + result->exec_lines[PHY]++; + } + return 1; +} diff --git a/src/CCodeCounter.h b/src/CCodeCounter.h new file mode 100644 index 0000000..4e8ebb4 --- /dev/null +++ b/src/CCodeCounter.h @@ -0,0 +1,109 @@ +//! Common code counter class for sub-classing individual languages. +/*! +* \file CCodeCounter.h +* +* This file contains the common code counter class for sub-classing individual languages. +*/ + +#ifndef CCodeCounter_h +#define CCodeCounter_h + +#include "cc_main.h" +#include "CUtil.h" + +//! Common code counter class. +/*! +* \class CCodeCounter +* +* Defines the common code counter class. +*/ +class CCodeCounter +{ +public: + CCodeCounter(); + virtual ~CCodeCounter(); + virtual void InitializeCounts(); + virtual int CountSLOC(filemap* fmap, results* result); + bool IsSupportedFileExtension(const string &file_name); + virtual ofstream* GetOutputStream(const string &outputFileNamePrePend = "", + const string &cmd = "", bool csvOutput = false, bool legacyOutput = false); + virtual void CloseOutputStream(); + + StringVector directive; //!< Directive statement keywords + StringVector data_name_list; //!< Data statement keywords + StringVector exec_name_list; //!< Executable statement keywords + StringVector file_extension; //!< File extension + + StringVector math_func_list; //!< Math functions + StringVector trig_func_list; //!< Trigonometric functions + StringVector log_func_list; //!< Logarithmic functions + StringVector cmplx_calc_list; //!< Calculations (complexity) + StringVector cmplx_cond_list; //!< Conditionals (complexity) + StringVector cmplx_logic_list; //!< Logicals (complexity) + StringVector cmplx_preproc_list; //!< Preprocessor directives (complexity) + StringVector cmplx_assign_list; //!< Assignments (complexity) + StringVector cmplx_pointer_list; //!< Pointers (complexity) + StringVector cmplx_cyclomatic_list; //!< Cyclomatic complexity decision keywords (complexity) + StringVector ignore_cmplx_cyclomatic_list; //!< Cyclomatic complexity decision keywords to ignore (for example End If) + StringVector skip_cmplx_cyclomatic_file_extension_list; //!< Cyclomatic complexity file extensions to skip + + UIntPairVector directive_count; //!< Count of each directive statement keyword + UIntPairVector data_name_count; //!< Count of each data statement keyword + UIntPairVector exec_name_count; //!< Count of each executable statement keyword + + UIntPairVector math_func_count; //!< Count of math functions + UIntPairVector trig_func_count; //!< Count of trigonometric functions + UIntPairVector log_func_count; //!< Count of logarithmic functions + UIntPairVector cmplx_calc_count; //!< Count of calculations + UIntPairVector cmplx_cond_count; //!< Count of conditionals + UIntPairVector cmplx_logic_count; //!< Count of logicals + UIntPairVector cmplx_preproc_count; //!< Count of preprocessor directives + UIntPairVector cmplx_assign_count; //!< Count of assignments + UIntPairVector cmplx_pointer_count; //!< Count of pointers + + bool print_cmplx; //!< Print complexity and keyword counts + size_t lsloc_truncate; //!< # of characters allowed in LSLOC for differencing (0=no truncation) + string language_name; //!< Counter language name + ClassType classtype; //!< Language class type + unsigned int counted_files; //!< Number of files counted + unsigned int counted_dupFiles; //!< Number of duplicate files counted + unsigned int total_filesA; //!< Total number of files in baseline A + unsigned int total_filesB; //!< Total number of duplicate files in baseline B + unsigned int total_dupFilesA; //!< Total number of files in baseline A + unsigned int total_dupFilesB; //!< Total number of duplicate files in baseline B + +protected: + virtual void InitializeResultsCounts(results* result); + static size_t FindQuote(string const &strline, string const &QuoteStart, size_t idx_start, char QuoteEscapeFront); + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + virtual int PreCountProcess(filemap* /*fmap*/) { return 0; } + int CountBlankSLOC(filemap* fmap, results* result); + virtual int CountCommentsSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + int FindCommentStart(string strline, size_t &idx_start, int &comment_type, + string &curBlckCmtStart, string &curBlckCmtEnd); + virtual int CountComplexity(filemap* fmap, results* result); + virtual int CountDirectiveSLOC(filemap* /*fmap*/, results* /*result*/, filemap* /*fmapBak = NULL*/) { return 0; } + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int ParseFunctionName(const string & /*line*/, string & /*lastline*/, + filemap & /*functionStack*/, string & /*functionName*/, unsigned int & /*functionCount*/) { return 0; } + + StringVector exclude_keywords; //!< List of keywords to exclude from counts + + // if language supports multiple quote marks such as javascript, you can put all of them here, ex. "\"'" + string QuoteStart; //!< Starting quotation mark(s) + string QuoteEnd; //!< Ending quotation mark(s) + + char QuoteEscapeFront; //!< Escape character for front quote (ex. '\' in C++) + char QuoteEscapeRear; //!< Escape character for rear quote + string ContinueLine; //!< Line continuation character(s) (ex. \\ in C++) + StringVector BlockCommentStart; //!< Block comment start character(s) (ex. /* in C++) + StringVector BlockCommentEnd; //!< Block comment end character(s) (ex. */ in C++) + StringVector LineCommentStart; //!< Single line or embedded comment character(s) + + bool casesensitive; //!< Is language is case sensitive? + + ofstream output_file; //!< Output file stream + ofstream output_file_csv; //!< Output CSV file stream +}; + +#endif diff --git a/src/CColdFusionCounter.cpp b/src/CColdFusionCounter.cpp new file mode 100644 index 0000000..ece2d4e --- /dev/null +++ b/src/CColdFusionCounter.cpp @@ -0,0 +1,175 @@ +//! Code counter class methods for the ColdFusion language. +/*! +* \file CColdFusionCounter.cpp +* +* This file contains the code counter class methods for the ColdFusion language. +*/ + +#include "CColdFusionCounter.h" + +/*! +* Constructs a CColdFusionCounter object. +*/ +CColdFusionCounter::CColdFusionCounter() +{ + classtype = COLDFUSION; + language_name = "ColdFusion"; + + file_extension.push_back(".*cfm"); + + BlockCommentStart.push_back(""); + + data_name_list.push_back("cfapplication"); + data_name_list.push_back("cfapplet"); + data_name_list.push_back("cfargument"); + data_name_list.push_back("cfcomponent"); + data_name_list.push_back("cffunction"); + data_name_list.push_back("cfimport"); + data_name_list.push_back("cfinclude"); + data_name_list.push_back("cfinterface"); + data_name_list.push_back("cfproperty"); + data_name_list.push_back("cfset"); + + exec_name_list.push_back("cfabort"); + exec_name_list.push_back("cfassociate"); + exec_name_list.push_back("cfbreak"); + exec_name_list.push_back("cfcache"); + exec_name_list.push_back("cfcase"); + exec_name_list.push_back("cfcatch"); + exec_name_list.push_back("cfcontent"); + exec_name_list.push_back("cfcontinue"); + exec_name_list.push_back("cfcookie"); + exec_name_list.push_back("cfdbinfo"); + exec_name_list.push_back("cfdefaultcase"); + exec_name_list.push_back("cfdirectory"); + exec_name_list.push_back("cfdiv"); + exec_name_list.push_back("cfdocument"); + exec_name_list.push_back("cfdump"); + exec_name_list.push_back("cfelse"); + exec_name_list.push_back("cfelseif"); + exec_name_list.push_back("cferror"); + exec_name_list.push_back("cfexchange"); + exec_name_list.push_back("cfexecute"); + exec_name_list.push_back("cfexit"); + exec_name_list.push_back("cffeed"); + exec_name_list.push_back("cffile"); + exec_name_list.push_back("cffinally"); + exec_name_list.push_back("cfflush"); + exec_name_list.push_back("cfform"); + exec_name_list.push_back("cfftp"); + exec_name_list.push_back("cfgrid"); + exec_name_list.push_back("cfheader"); + exec_name_list.push_back("cfhtmlhead"); + exec_name_list.push_back("cfhttp"); + exec_name_list.push_back("cfif"); + exec_name_list.push_back("cfimage"); + exec_name_list.push_back("cfindex"); + exec_name_list.push_back("cfinput"); + exec_name_list.push_back("cfinsert"); + exec_name_list.push_back("cfinvoke"); + exec_name_list.push_back("cflayout"); + exec_name_list.push_back("cfldap"); + exec_name_list.push_back("cflocation"); + exec_name_list.push_back("cflock"); + exec_name_list.push_back("cflog"); + exec_name_list.push_back("cflogin"); + exec_name_list.push_back("cflogout"); + exec_name_list.push_back("cfloop"); + exec_name_list.push_back("cfmail"); + exec_name_list.push_back("cfobject"); + exec_name_list.push_back("cfoutput"); + exec_name_list.push_back("cfparam"); + exec_name_list.push_back("cfpod"); + exec_name_list.push_back("cfpop"); + exec_name_list.push_back("cfpresentation"); + exec_name_list.push_back("cfprint"); + exec_name_list.push_back("cfprocessdirective"); + exec_name_list.push_back("cfprocparam"); + exec_name_list.push_back("cfprocresult"); + exec_name_list.push_back("cfquery"); + exec_name_list.push_back("cfregistry"); + exec_name_list.push_back("cfrethrow"); + exec_name_list.push_back("cfreturn"); + exec_name_list.push_back("cfsavecontent"); + exec_name_list.push_back("cfschedule"); + exec_name_list.push_back("cfscript"); + exec_name_list.push_back("cfsearch"); + exec_name_list.push_back("cfselect"); + exec_name_list.push_back("cfsetting"); + exec_name_list.push_back("cfsilent"); + exec_name_list.push_back("cfstoredproc"); + exec_name_list.push_back("cfswitch"); + exec_name_list.push_back("cfthread"); + exec_name_list.push_back("cfthrow"); + exec_name_list.push_back("cftimer"); + exec_name_list.push_back("cftrace"); + exec_name_list.push_back("cftransaction"); + exec_name_list.push_back("cftry"); + exec_name_list.push_back("cfupdate"); + + math_func_list.push_back("abs"); + math_func_list.push_back("arrayavg"); + math_func_list.push_back("arraysum"); + math_func_list.push_back("ceiling"); + math_func_list.push_back("decrementvalue"); + math_func_list.push_back("exp"); + math_func_list.push_back("fix"); + math_func_list.push_back("incrementvalue"); + math_func_list.push_back("int"); + math_func_list.push_back("max"); + math_func_list.push_back("min"); + math_func_list.push_back("mod"); + math_func_list.push_back("pi"); + math_func_list.push_back("precisionevaluate"); + math_func_list.push_back("rand"); + math_func_list.push_back("randomize"); + math_func_list.push_back("randrange"); + math_func_list.push_back("round"); + math_func_list.push_back("sgn"); + math_func_list.push_back("sqr"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atn"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("tan"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("--"); + cmplx_calc_list.push_back("mod"); + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("^"); + + cmplx_cond_list.push_back("cfcase"); + cmplx_cond_list.push_back("cfelse"); + cmplx_cond_list.push_back("cfelseif"); + cmplx_cond_list.push_back("cfif"); + cmplx_cond_list.push_back("cfloop"); + + cmplx_logic_list.push_back("eq"); + cmplx_logic_list.push_back("neq"); + cmplx_logic_list.push_back("gt"); + cmplx_logic_list.push_back("gte"); + cmplx_logic_list.push_back("lt"); + cmplx_logic_list.push_back("lte"); + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("xor"); + cmplx_logic_list.push_back("equiv"); + cmplx_logic_list.push_back("imp"); + cmplx_logic_list.push_back("is"); + + cmplx_assign_list.push_back("="); +} diff --git a/src/CColdFusionCounter.h b/src/CColdFusionCounter.h new file mode 100644 index 0000000..ee4a4c5 --- /dev/null +++ b/src/CColdFusionCounter.h @@ -0,0 +1,25 @@ +//! Code counter class definition for the ColdFusion language. +/*! +* \file CColdFusionCounter.h +* +* This file contains the code counter class definition for the ColdFusion language. +*/ + +#ifndef CColdFusionCounter_h +#define CColdFusionCounter_h + +#include "CTagCounter.h" + +//! ColdFusion code counter class. +/*! +* \class CColdFusionCounter +* +* Defines the ColdFusion code counter class. +*/ +class CColdFusionCounter : public CTagCounter +{ +public: + CColdFusionCounter(); +}; + +#endif diff --git a/src/CCshCounter.cpp b/src/CCshCounter.cpp new file mode 100644 index 0000000..ddeaacd --- /dev/null +++ b/src/CCshCounter.cpp @@ -0,0 +1,442 @@ +//! Code counter class methods for the C shell script language. +/*! +* \file CCshCounter.cpp +* +* This file contains the code counter class methods for the C shell script language. +* This also includes the Tcsh language.*/ + +#include "CCshCounter.h" + +/*! +* Constructs a CCshCounter object. +*/ +CCshCounter::CCshCounter() +{ + classtype = CSH; + language_name = "C-Shell"; + + file_extension.push_back(".csh"); + file_extension.push_back(".tcsh"); + + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + QuoteEscapeFront = '\\'; + ContinueLine = "\\"; + LineCommentStart.push_back("#"); + + exclude_keywords.push_back("end"); + exclude_keywords.push_back("endif"); + exclude_keywords.push_back("endsw"); + + continue_keywords.push_back("case"); + continue_keywords.push_back("default"); + continue_keywords.push_back("else"); + + exec_name_list.push_back("alias"); + exec_name_list.push_back("break"); + exec_name_list.push_back("breaksw"); + exec_name_list.push_back("builtins"); + exec_name_list.push_back("case"); + exec_name_list.push_back("cd"); + exec_name_list.push_back("chdir"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("dirs"); + exec_name_list.push_back("echo"); + exec_name_list.push_back("eval"); + exec_name_list.push_back("exec"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("foreach"); + exec_name_list.push_back("glob"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("onintr"); + exec_name_list.push_back("popd"); + exec_name_list.push_back("pushd"); + exec_name_list.push_back("rehash"); + exec_name_list.push_back("repeat"); + exec_name_list.push_back("set"); + exec_name_list.push_back("setenv"); + exec_name_list.push_back("shift"); + exec_name_list.push_back("source"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("time"); + exec_name_list.push_back("umask"); + exec_name_list.push_back("unalias"); + exec_name_list.push_back("unhash"); + exec_name_list.push_back("unset"); + exec_name_list.push_back("unsetenv"); + exec_name_list.push_back("while"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("--"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("foreach"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("switch"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back("=~"); + cmplx_logic_list.push_back("!~"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Perform preprocessing of file lines before counting. +* +* \param fmap list of file lines +* +* \return method status +*/ +int CCshCounter::PreCountProcess(filemap* fmap) +{ + filemap::iterator fit; + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + if (fit->line.empty()) + continue; + for (size_t i = fit->line.length() - 1; i > 0; i--) + { + // replace $# and ${# with $ to avoid determination of a comment + if (fit->line[i] == '#' && (fit->line[i-1] == '$' || fit->line[i-1] == '{')) + fit->line[i] = '$'; + } + } + return 0; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCshCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + filemap::iterator fit, fitbak; + string line, lineBak; + + bool data_continue = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + string str; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + unsigned int loopLevel = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + lineBak = fitbak->line; + + // do not process blank lines (blank_line/comment_line/directive) + if (!CUtil::CheckBlank(line)) + { + // process logical SLOC + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, data_continue, + temp_lines, phys_exec_lines, phys_data_lines, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + // update physical SLOC lines + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param loopLevel nested loop level +*/ +void CCshCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, unsigned int &loopLevel) +{ + size_t start, end; + size_t i, j, m, strSize; + bool trunc_flag = false, found; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string str, spc; + unsigned int cnt = 0; + + string tmp = CUtil::TrimString(line); + string tmpBak = CUtil::TrimString(lineBak); + start = 0; + + // there may be more than 1 logical SLOC in this line + while (start < tmp.length()) + { + // check for semicolon to denote end of SLOC + end = tmp.find(";", start); + if (end != string::npos) + { + // handle empty statement + if (CUtil::TrimString(tmp.substr(start, end - start + 1)) == ";") + { + start = end + 1; + strLSLOC = strLSLOCBak = ""; + temp_lines = 0; + if (tmp == ";") + phys_exec_lines++; + continue; + } + } + else + end = tmp.length() - 1; + + // check for label + if (tmp[end] == ':' && tmp.substr(start, end - start + 1) != "default:") + { + i = tmp.find_first_of("\t ", start); + if (i == string::npos || i > end) + { + // skip label + start = end + 1; + if (start >= tmp.length()) + { + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + continue; + } + } + } + + // process nested loops + if (print_cmplx) + { + str = CUtil::TrimString(tmp.substr(start, end - start + 1)); + if (CUtil::FindKeyword(str, "foreach") != string::npos + || CUtil::FindKeyword(str, "while") != string::npos) + { + loopLevel++; + + // record nested loop level + if ((unsigned int)result->cmplx_nestloop_count.size() < loopLevel) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopLevel-1]++; + } + else if (CUtil::FindKeyword(str, "end") != string::npos && loopLevel > 0) + loopLevel--; + } + + // check for line containing excluded keywords + for (StringVector::iterator it = exclude_keywords.begin(); it != exclude_keywords.end(); it++) + { + i = CUtil::FindKeyword(tmp, (*it), start, end); + if (i != string::npos) + { + // strip specified keyword and skip if empty + start = i + (*it).length(); + if (CUtil::CheckBlank(CUtil::TrimString(tmp.substr(start, end - start)))) + start = end + 1; + break; + } + } + if (start > end) + { + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + continue; + } + + // check for continuation words + found = false; + if (tmp[end] == ';') + str = CUtil::TrimString(tmp.substr(start, end - start)); + else + str = CUtil::TrimString(tmp.substr(start, end - start + 1)); + for (StringVector::iterator it = continue_keywords.begin(); it != continue_keywords.end(); it++) + { + i = str.find((*it)); + if (i == 0) + { + // process else if + if ((*it) != "else" || str.length() < 7 || str.substr(0, 7) != "else if") + { + found = true; + strLSLOC += str + " "; + if (tmp[end] == ';') + str = CUtil::TrimString(tmpBak.substr(start, end - start)); + else + str = CUtil::TrimString(tmpBak.substr(start, end - start + 1)); + strLSLOCBak += str + " "; + start = end + 1; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + temp_lines = 0; + } + } + } + if (found) + continue; + + // check for inline if + if (CUtil::FindKeyword(tmp, "if", start, end) == start) + { + i = CUtil::FindKeyword(tmp, "then", start, end); + if (i == string::npos) + { + // get end of if SLOC + found = false; + cnt = 0; + for (j = start + 2; j <= end; j++) + { + if (tmp[j] == '(') + { + found = true; + cnt++; + } + else if (tmp[j] == ')') + cnt--; + if (found && cnt < 1) + break; + } + + // save LSLOC for if statement, then process in-line action + strSize = CUtil::TruncateLine(j - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(start, strSize); + strLSLOCBak += tmpBak.substr(start, strSize); + } + start = j + 1; + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + phys_exec_lines++; + temp_lines = 0; + continue; + } + } + + // check for line continuation + if (tmp[end] == '\\') + { + // strip off trailing (\) + strSize = CUtil::TruncateLine(end - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + spc = ""; + str = tmp.substr(start, strSize); + for (m = str.length() - 1; m > 0; m--) + { + if (str[m] == ' ') + spc += " "; + else + break; + } + if (m == 0) + { + if (str[0] == ' ') + spc += " "; + } + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)) + spc; + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)) + spc; + } + start = end + 1; + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue == true) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + } + else + { + // save LSLOC + if (tmp[end] == ';') + strSize = CUtil::TruncateLine(end - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + else + strSize = CUtil::TruncateLine(end - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)); + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)); + } + start = end + 1; + if (strLSLOCBak.length() > 0) + { + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + // add a logical SLOC + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true || cnt > 0) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + else if (data_continue == true) + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + } + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + } + } +} diff --git a/src/CCshCounter.h b/src/CCshCounter.h new file mode 100644 index 0000000..bf9c466 --- /dev/null +++ b/src/CCshCounter.h @@ -0,0 +1,35 @@ +//! Code counter class definition for the C shell script language. +/*! +* \file CCshCounter.h +* +* This file contains the code counter class definition for the C shell script language. +* This also includes the Tcsh language. +*/ + +#ifndef CCshCounter_h +#define CCshCounter_h + +#include "CCodeCounter.h" + +//! C shell script code counter class. +/*! +* \class CCshCounter +* +* Defines the C shell script code counter class. +*/ +class CCshCounter : public CCodeCounter +{ +public: + CCshCounter(); + +protected: + virtual int PreCountProcess(filemap* fmap); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, unsigned int &loopLevel); + + StringVector continue_keywords; //!< List of keywords to continue to next line +}; + +#endif diff --git a/src/CCsharpCounter.cpp b/src/CCsharpCounter.cpp new file mode 100644 index 0000000..d40960e --- /dev/null +++ b/src/CCsharpCounter.cpp @@ -0,0 +1,339 @@ +//! Code counter class methods for the C# language. +/*! +* \file CCsharpCounter.cpp +* +* This file contains the code counter class methods for the C# language. +*/ + +#include "CCsharpCounter.h" + +/*! +* Constructs a CCsharpCounter object. +*/ +CCsharpCounter::CCsharpCounter() +{ + classtype = CSHARP; + language_name = "C#"; + + isVerbatim = false; + + file_extension.push_back(".cs"); + + directive.push_back("#define"); + directive.push_back("#else"); + directive.push_back("#elif"); + directive.push_back("#endif"); + directive.push_back("#endregion"); + directive.push_back("#error"); + directive.push_back("#if"); + directive.push_back("#line"); + directive.push_back("#region"); + directive.push_back("#undef"); + directive.push_back("#warning"); + + data_name_list.push_back("abstract"); + data_name_list.push_back("bool"); + data_name_list.push_back("byte"); + data_name_list.push_back("char"); + data_name_list.push_back("class"); + data_name_list.push_back("const"); + data_name_list.push_back("decimal"); + data_name_list.push_back("delegate"); + data_name_list.push_back("double"); + data_name_list.push_back("enum"); + data_name_list.push_back("event"); + data_name_list.push_back("explicit"); + data_name_list.push_back("extern"); + data_name_list.push_back("float"); + data_name_list.push_back("implicit"); + data_name_list.push_back("int"); + data_name_list.push_back("interface"); + data_name_list.push_back("internal"); + data_name_list.push_back("long"); + data_name_list.push_back("namespace"); + data_name_list.push_back("object"); + data_name_list.push_back("operator"); + data_name_list.push_back("override"); + data_name_list.push_back("private"); + data_name_list.push_back("protected"); + data_name_list.push_back("public"); + data_name_list.push_back("readonly"); + data_name_list.push_back("sbyte"); + data_name_list.push_back("sealed"); + data_name_list.push_back("short"); + data_name_list.push_back("static"); + data_name_list.push_back("string"); + data_name_list.push_back("struct"); + data_name_list.push_back("uint"); + data_name_list.push_back("ulong"); + data_name_list.push_back("unsafe"); + data_name_list.push_back("ushort"); + data_name_list.push_back("using"); + data_name_list.push_back("virtual"); + data_name_list.push_back("void"); + data_name_list.push_back("volatile"); + + exec_name_list.push_back("as"); + exec_name_list.push_back("base"); + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("checked"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("default"); + exec_name_list.push_back("do"); + exec_name_list.push_back("else"); + exec_name_list.push_back("finally"); + exec_name_list.push_back("fixed"); + exec_name_list.push_back("for"); + exec_name_list.push_back("foreach"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("lock"); + exec_name_list.push_back("new"); + exec_name_list.push_back("return"); + exec_name_list.push_back("sizeof"); + exec_name_list.push_back("stackalloc"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("this"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("try"); + exec_name_list.push_back("typeof"); + exec_name_list.push_back("unchecked"); + exec_name_list.push_back("while"); + + math_func_list.push_back("abs"); + math_func_list.push_back("cbrt"); + math_func_list.push_back("ceil"); + math_func_list.push_back("copysign"); + math_func_list.push_back("erf"); + math_func_list.push_back("erfc"); + math_func_list.push_back("exp"); + math_func_list.push_back("exp2"); + math_func_list.push_back("expm1"); + math_func_list.push_back("fabs"); + math_func_list.push_back("floor"); + math_func_list.push_back("fdim"); + math_func_list.push_back("fma"); + math_func_list.push_back("fmax"); + math_func_list.push_back("fmin"); + math_func_list.push_back("fmod"); + math_func_list.push_back("frexp"); + math_func_list.push_back("hypot"); + math_func_list.push_back("ilogb"); + math_func_list.push_back("ldexp"); + math_func_list.push_back("lgamma"); + math_func_list.push_back("llrint"); + math_func_list.push_back("lrint"); + math_func_list.push_back("llround"); + math_func_list.push_back("lround"); + math_func_list.push_back("modf"); + math_func_list.push_back("nan"); + math_func_list.push_back("nearbyint"); + math_func_list.push_back("nextafter"); + math_func_list.push_back("nexttoward"); + math_func_list.push_back("pow"); + math_func_list.push_back("remainder"); + math_func_list.push_back("remquo"); + math_func_list.push_back("rint"); + math_func_list.push_back("round"); + math_func_list.push_back("scalbln"); + math_func_list.push_back("scalbn"); + math_func_list.push_back("sqrt"); + math_func_list.push_back("tgamma"); + math_func_list.push_back("trunc"); + + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + trig_func_list.push_back("acos"); + trig_func_list.push_back("acosh"); + trig_func_list.push_back("asinh"); + trig_func_list.push_back("atanh"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + log_func_list.push_back("log1p"); + log_func_list.push_back("log2"); + log_func_list.push_back("logb"); + + cmplx_preproc_list.push_back("define"); + cmplx_preproc_list.push_back("elif"); + cmplx_preproc_list.push_back("else"); + cmplx_preproc_list.push_back("endif"); + cmplx_preproc_list.push_back("endregion"); + cmplx_preproc_list.push_back("error"); + cmplx_preproc_list.push_back("if"); + cmplx_preproc_list.push_back("import"); + cmplx_preproc_list.push_back("line"); + cmplx_preproc_list.push_back("region"); + cmplx_preproc_list.push_back("undef"); + cmplx_preproc_list.push_back("warning"); + + cmplx_cyclomatic_list.push_back("if"); + cmplx_cyclomatic_list.push_back("case"); + cmplx_cyclomatic_list.push_back("while"); + cmplx_cyclomatic_list.push_back("for"); + cmplx_cyclomatic_list.push_back("foreach"); + cmplx_cyclomatic_list.push_back("catch"); + cmplx_cyclomatic_list.push_back("?"); +} + +/*! +* Perform preprocessing of file lines before counting. +* +* \param fmap list of file lines +* +* \return method status +*/ +int CCsharpCounter::PreCountProcess(filemap* fmap) +{ + size_t i; + bool found; + filemap::iterator fit; + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + if (fit->line.empty()) + continue; + // check for parenthesis within attribute brackets [...()] + found = false; + for (i = 0; i < fit->line.length(); i++) + { + if (fit->line[i] == '[') + found = true; + else if (found) + { + if (fit->line[i] == ']') + found = false; + else if (fit->line[i] == '(' || fit->line[i] == ')') + fit->line[i] = '$'; + } + } + } + return 0; +} + +/*! +* Replaces up to ONE quoted string inside a string starting at idx_start. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CCsharpCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + size_t idx_end, idx_quote, idx_verbatim; + char noQuoteEscapeFront = 0x00; + + if (contd) + { + idx_start = 0; + if (strline[0] == CurrentQuoteEnd) + { + if (!isVerbatim || (strline.length() < 2) || (strline[1] != '"')) + { + idx_start = 1; + contd = false; + return 1; + } + } + else + strline[0] = '$'; + } + else + { + // accommodate C# verbatim string (e.g. @"\") + isVerbatim = false; + idx_verbatim = strline.find_first_of("@"); + if (idx_verbatim != string::npos && idx_verbatim + 1 == idx_start) + isVerbatim = true; + + // handle two quote chars in some languages, both " and ' may be accepted + idx_start = FindQuote(strline, QuoteStart, idx_start, QuoteEscapeFront); + if (idx_start != string::npos) + { + idx_quote = QuoteStart.find_first_of(strline[idx_start]); + CurrentQuoteEnd = QuoteEnd[idx_quote]; + } + else + { + idx_start = strline.length(); + return 0; + } + } + + // accommodate C# verbatim string (e.g. @"\") + if (isVerbatim) // verbatim string + idx_end = CUtil::FindCharAvoidEscape(strline, CurrentQuoteEnd, idx_start + 1, noQuoteEscapeFront); + else + idx_end = CUtil::FindCharAvoidEscape(strline, CurrentQuoteEnd, idx_start + 1, QuoteEscapeFront); + if (idx_end == string::npos) + { + idx_end = strline.length() - 1; + strline.replace(idx_start + 1, idx_end - idx_start, idx_end - idx_start, '$'); + contd = true; + idx_start = idx_end + 1; + } + else + { + if ((isVerbatim && (strline.length() > idx_end + 1) && (strline[idx_end+1] == '"')) || + ((QuoteEscapeRear) && (strline.length() > idx_end + 1) && (strline[idx_end+1] == QuoteEscapeRear))) + { + strline[idx_end] = '$'; + strline[idx_end+1] = '$'; + } + else + { + isVerbatim = false; + contd = false; + strline.replace(idx_start + 1, idx_end - idx_start - 1, idx_end - idx_start - 1, '$'); + idx_start = idx_end + 1; + } + } + return 1; +} + +/*! +* Constructs a CCsharpHtmlCounter object. +*/ +CCsharpHtmlCounter::CCsharpHtmlCounter() +{ + classtype = CSHARP_HTML; + language_name = "C#/HTML"; + + file_extension.clear(); + file_extension.push_back(".*cshtm"); +} + +/*! +* Constructs a CCsharpXmlCounter object. +*/ +CCsharpXmlCounter::CCsharpXmlCounter() +{ + classtype = CSHARP_XML; + language_name = "C#/XML"; + + file_extension.clear(); + file_extension.push_back(".*csxml"); +} + +/*! +* Constructs a CCsharpAspCounter object. +*/ +CCsharpAspCounter::CCsharpAspCounter() +{ + classtype = CSHARP_ASP_S; + language_name = "C#/ASPNET"; + + file_extension.clear(); + file_extension.push_back(".*csasps"); +} diff --git a/src/CCsharpCounter.h b/src/CCsharpCounter.h new file mode 100644 index 0000000..a52184c --- /dev/null +++ b/src/CCsharpCounter.h @@ -0,0 +1,68 @@ +//! Code counter class definition for the C# language. +/*! +* \file CCsharpCounter.h +* +* This file contains the code counter class definition for the C# language. +*/ + +#ifndef CCsharpCounter_h +#define CCsharpCounter_h + +#include "CCJavaCsCounter.h" + +//! C# code counter class. +/*! +* \class CCsharpCounter +* +* Defines the C# code counter class. +*/ +class CCsharpCounter : public CCJavaCsCounter +{ +public: + CCsharpCounter(); + +protected: + virtual int PreCountProcess(filemap* fmap); + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + +private: + bool isVerbatim; +}; + +//! C# in HTML code counter class. +/*! +* \class CCsharpHtmlCounter +* +* Defines the C# in HTML code counter class. +*/ +class CCsharpHtmlCounter : public CCsharpCounter +{ +public: + CCsharpHtmlCounter(); +}; + +//! C# in XML code counter class. +/*! +* \class CCsharpXmlCounter +* +* Defines the C# in XML code counter class. +*/ +class CCsharpXmlCounter : public CCsharpCounter +{ +public: + CCsharpXmlCounter(); +}; + +//! C# in ASP code counter class. +/*! +* \class CCsharpAspCounter +* +* Defines the C# in ASP code counter class. +*/ +class CCsharpAspCounter : public CCsharpCounter +{ +public: + CCsharpAspCounter(); +}; + +#endif diff --git a/src/CCssCounter.cpp b/src/CCssCounter.cpp new file mode 100644 index 0000000..c2a8f72 --- /dev/null +++ b/src/CCssCounter.cpp @@ -0,0 +1,262 @@ +//! Code counter class methods for the cascading style sheet (CSS) language. +/*! +* \file CCssCounter.cpp +* +* This file contains the code counter class methods for the cascading style sheet (CSS) language. +*/ + +#include "CCssCounter.h" + +/*! +* Constructs a CCssCounter object. +*/ +CCssCounter::CCssCounter() +{ + classtype = CSS; + language_name = "CSS"; + + file_extension.push_back(".css"); + + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + + exec_name_list.push_back("azimuth"); + exec_name_list.push_back("background"); + exec_name_list.push_back("background-attachment"); + exec_name_list.push_back("background-color"); + exec_name_list.push_back("background-image"); + exec_name_list.push_back("background-repeat"); + exec_name_list.push_back("background-position"); + exec_name_list.push_back("border"); + exec_name_list.push_back("border-bottom"); + exec_name_list.push_back("border-collapse"); + exec_name_list.push_back("border-bottom-color"); + exec_name_list.push_back("border-bottom-style"); + exec_name_list.push_back("border-bottom-width"); + exec_name_list.push_back("border-left"); + exec_name_list.push_back("border-left-color"); + exec_name_list.push_back("border-left-style"); + exec_name_list.push_back("border-left-width"); + exec_name_list.push_back("border-right"); + exec_name_list.push_back("border-right-color"); + exec_name_list.push_back("border-right-style"); + exec_name_list.push_back("border-right-width"); + exec_name_list.push_back("border-spacing"); + exec_name_list.push_back("border-style"); + exec_name_list.push_back("border-top"); + exec_name_list.push_back("border-top-color"); + exec_name_list.push_back("border-top-style"); + exec_name_list.push_back("border-top-width"); + exec_name_list.push_back("border-width"); + exec_name_list.push_back("bottom"); + exec_name_list.push_back("caption-side"); + exec_name_list.push_back("clear"); + exec_name_list.push_back("clip"); + exec_name_list.push_back("content"); + exec_name_list.push_back("counter-decrement"); + exec_name_list.push_back("counter-increment"); + exec_name_list.push_back("counter-reset"); + exec_name_list.push_back("cue"); + exec_name_list.push_back("cue-after"); + exec_name_list.push_back("cue-before"); + exec_name_list.push_back("cursor"); + exec_name_list.push_back("direction"); + exec_name_list.push_back("empty-cells"); + exec_name_list.push_back("float"); + exec_name_list.push_back("font"); + exec_name_list.push_back("font-family"); + exec_name_list.push_back("font-size"); + exec_name_list.push_back("font-style"); + exec_name_list.push_back("font-variant"); + exec_name_list.push_back("font-weight"); + exec_name_list.push_back("height"); + exec_name_list.push_back("left"); + exec_name_list.push_back("letter-spacing"); + exec_name_list.push_back("line-height"); + exec_name_list.push_back("list-style"); + exec_name_list.push_back("list-style-image"); + exec_name_list.push_back("list-style-position"); + exec_name_list.push_back("list-style-type"); + exec_name_list.push_back("line-width"); + exec_name_list.push_back("margin"); + exec_name_list.push_back("margin-bottom"); + exec_name_list.push_back("margin-left"); + exec_name_list.push_back("margin-right"); + exec_name_list.push_back("margin-top"); + exec_name_list.push_back("max-height"); + exec_name_list.push_back("max-width"); + exec_name_list.push_back("min-height"); + exec_name_list.push_back("min-width"); + exec_name_list.push_back("orphans"); + exec_name_list.push_back("outline"); + exec_name_list.push_back("outline-color"); + exec_name_list.push_back("outline-style"); + exec_name_list.push_back("outline-width"); + exec_name_list.push_back("padding"); + exec_name_list.push_back("padding-bottom"); + exec_name_list.push_back("padding-left"); + exec_name_list.push_back("padding-right"); + exec_name_list.push_back("padding-top"); + exec_name_list.push_back("page-break-after"); + exec_name_list.push_back("page-break-before"); + exec_name_list.push_back("pause"); + exec_name_list.push_back("pause-after"); + exec_name_list.push_back("pitch"); + exec_name_list.push_back("play"); + exec_name_list.push_back("play-during"); + exec_name_list.push_back("pitch-range"); + exec_name_list.push_back("position"); + exec_name_list.push_back("quptes"); + exec_name_list.push_back("richness"); + exec_name_list.push_back("right"); + exec_name_list.push_back("speak"); + exec_name_list.push_back("speak-header"); + exec_name_list.push_back("speak-numeral"); + exec_name_list.push_back("speak-punctuation"); + exec_name_list.push_back("speech-rate"); + exec_name_list.push_back("stress"); + exec_name_list.push_back("table-layout"); + exec_name_list.push_back("text"); + exec_name_list.push_back("text-transform"); + exec_name_list.push_back("text-indent"); + exec_name_list.push_back("text-decoration"); + exec_name_list.push_back("text-align"); + exec_name_list.push_back("top"); + exec_name_list.push_back("unicode-bidi"); + exec_name_list.push_back("vertical-align"); + exec_name_list.push_back("visibility"); + exec_name_list.push_back("voice-family"); + exec_name_list.push_back("volume"); + exec_name_list.push_back("white-space"); + exec_name_list.push_back("width"); + exec_name_list.push_back("word-spacing"); + exec_name_list.push_back("z-index"); +} + +/*! +* Processes physical and logical lines according to language specific rules. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CCssCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* /*fmapBak*/) +{ + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + char prev_char = ' '; + string strLSLOC = ""; + string line = ""; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int cnt = 0; + + // The fmap may not contain any lines (e.g. when the file was not found). + // If this check is not performed, when the for loop starts on the second + // line (see +1 below) an exception will occur. Ideally this exception + // will be passed up to CCodeCounter::CountSLOC and then be caught in + // MainObject::ProcessSourceList resulting in the output "Unable to count + // file". However this exception causes a program crash in Microsoft + // Visual Studio 2008. Note that this check causes the exception not to + // occur, and therefore the "Unable to count file" message not be be + // displayed, but the more appropriate "unable to open file" message is + // displayed with or without this check. + if (fmap->size() == 0) + return 1; + + // iterating line-by-line of the file + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++) + { + line = iter->line; + + // checking for a blank line + if (!CUtil::CheckBlank(line)) + { + // LSLOC call for the current line, which is not blank and does not contain non-css keywords + LSLOC(result, line, strLSLOC, prev_char, phys_exec_lines, phys_data_lines); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(iter->line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + // update physical SLOC + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param strLSLOC processed logical string +* \param lastLinesLastChar last character on previous line +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +*/ +void CCssCounter::LSLOC(results* result, string &line, string &strLSLOC, + char &lastLinesLastChar, unsigned int &phys_exec_lines, unsigned int &phys_data_lines) +{ + size_t index = 0, strSize; + bool trunc_flag = false; + string tmp = CUtil::TrimString(line); + + // iterating character-by-character for the current line + while (index < tmp.length()) + { + // checking current character + switch (line[index]) + { + case ';': case '}': + // adding the current exec strLSLOC to results + if (result->addSLOC(strLSLOC, trunc_flag)) + result->exec_lines[LOG]++; + + // reinitialize strLSLOC to null + strLSLOC = ""; + + break; + case '{': + // adding the current data strLSLOC to results + if (result->addSLOC(strLSLOC, trunc_flag)) + result->data_lines[LOG]++; + + // reinitialize strLSLOC to null + strLSLOC = ""; + + break; + default: + // by default for other characters than '{' or ';' or '}' this case is executed + // append character to strLSLOC + strSize = CUtil::TruncateLine(1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strLSLOC += tmp[index]; + break; + } + + // incrementing index to point to next character + index++; + } + + if (tmp[tmp.length() - 1] == ';' || + (lastLinesLastChar == ';' && tmp.find_first_of('{') == string::npos && tmp.find_first_of('}') == string::npos)) + phys_exec_lines++; + else + phys_data_lines++; + + // extract the last line's last character + lastLinesLastChar = tmp[tmp.length() - 1]; +} diff --git a/src/CCssCounter.h b/src/CCssCounter.h new file mode 100644 index 0000000..3f3a239 --- /dev/null +++ b/src/CCssCounter.h @@ -0,0 +1,30 @@ +//! Code counter class definition for the cascading style sheet (CSS) language. +/*! +* \file CCssCounter.h +* +* This file contains the code counter class definition for the cascading style sheet (CSS) language. +*/ + +#ifndef CCssCounter_h +#define CCssCounter_h + +#include "CCodeCounter.h" + +//! CSS code counter class. +/*! +* \class CCssCounter +* +* Defines the CSS code counter class. +*/ +class CCssCounter : public CCodeCounter +{ +public: + CCssCounter(); + +protected: + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string &line, string &strLSLOC, char &lastLinesLastChar, + unsigned int &phys_exec_lines, unsigned int &phys_data_lines); +}; + +#endif diff --git a/src/CDataCounter.cpp b/src/CDataCounter.cpp new file mode 100644 index 0000000..6651e37 --- /dev/null +++ b/src/CDataCounter.cpp @@ -0,0 +1,17 @@ +//! Code counter class methods for data files. +/*! +* \file CDataCounter.cpp +* +* This file contains the code counter class methods for data files. +*/ + +#include "CDataCounter.h" + +/*! +* Constructs a CDataCounter object. +*/ +CDataCounter::CDataCounter() +{ + classtype = DATAFILE; + language_name = "Datafile"; +} diff --git a/src/CDataCounter.h b/src/CDataCounter.h new file mode 100644 index 0000000..3cc220e --- /dev/null +++ b/src/CDataCounter.h @@ -0,0 +1,25 @@ +//! Code counter class definition for data files. +/*! +* \file CDataCounter.h +* +* This file contains the code counter class definition for data files. +*/ + +#ifndef CDataCounter_h +#define CDataCounter_h + +#include "CCodeCounter.h" + +//! Data file code counter class. +/*! +* \class CDataCounter +* +* Defines the data file code counter class. +*/ +class CDataCounter : public CCodeCounter +{ +public: + CDataCounter(); +}; + +#endif diff --git a/src/CFortranCounter.cpp b/src/CFortranCounter.cpp new file mode 100644 index 0000000..4584df3 --- /dev/null +++ b/src/CFortranCounter.cpp @@ -0,0 +1,944 @@ +//! Code counter class methods for the Fortran language. +/*! +* \file CFortranCounter.cpp +* +* This file contains the code counter class methods for the Fortran language. +* This includes F77, F90, F95, and F03 including fixed and free formats. +*/ + +#include "CFortranCounter.h" + +/*! +* Constructs a CFortranCounter object. +*/ +CFortranCounter::CFortranCounter() +{ + classtype = FORTRAN; + language_name = "Fortran"; + casesensitive = false; + + file_extension.push_back(".f"); + file_extension.push_back(".for"); + file_extension.push_back(".f77"); + file_extension.push_back(".f90"); + file_extension.push_back(".f95"); + file_extension.push_back(".f03"); + file_extension.push_back(".hpf"); + + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + ContinueLine = "&"; + + LineCommentStart.push_back("!"); + + exclude_keywords.push_back("case default"); + exclude_keywords.push_back("else"); + exclude_keywords.push_back("end"); + exclude_keywords.push_back("endblockdata"); + exclude_keywords.push_back("enddo"); + exclude_keywords.push_back("endfile"); + exclude_keywords.push_back("endfunction"); + exclude_keywords.push_back("endif"); + exclude_keywords.push_back("endinterface"); + exclude_keywords.push_back("endmodule"); + exclude_keywords.push_back("endprogram"); + exclude_keywords.push_back("endselect"); + exclude_keywords.push_back("endsubroutine"); + exclude_keywords.push_back("endtype"); + exclude_keywords.push_back("endwhere"); + exclude_keywords.push_back("endforall"); // f95+ only + exclude_keywords.push_back("endassociate"); // f03+ only + exclude_keywords.push_back("endenum"); // f03+ only + + c_keywords.push_back("call"); + c_keywords.push_back("case default"); + c_keywords.push_back("class default"); // f03+ only + c_keywords.push_back("character"); + c_keywords.push_back("common"); + c_keywords.push_back("complex"); + c_keywords.push_back("contains"); + c_keywords.push_back("continue"); + c_keywords.push_back("cycle"); + + directive.push_back("dictionary"); + directive.push_back("include"); + directive.push_back("options"); + + data_name_list.push_back("allocate"); + data_name_list.push_back("assign"); + data_name_list.push_back("associate"); // f03+ only + data_name_list.push_back("common"); + data_name_list.push_back("complex"); + data_name_list.push_back("character"); + data_name_list.push_back("contains"); + data_name_list.push_back("data"); + data_name_list.push_back("deallocate"); + data_name_list.push_back("dimension"); + data_name_list.push_back("double precision"); + data_name_list.push_back("enum"); // f03+ only + data_name_list.push_back("equivalence"); + data_name_list.push_back("external"); + data_name_list.push_back("final"); // f03+ only + data_name_list.push_back("function"); + data_name_list.push_back("generic"); // f03+ only + data_name_list.push_back("implicit"); + data_name_list.push_back("import"); // f03+ only + data_name_list.push_back("integer"); + data_name_list.push_back("interface"); + data_name_list.push_back("intrinsic"); + data_name_list.push_back("logical"); + data_name_list.push_back("module"); + data_name_list.push_back("namelist"); + data_name_list.push_back("nullify"); + data_name_list.push_back("optional"); + data_name_list.push_back("parameter"); + data_name_list.push_back("program"); + data_name_list.push_back("real"); + data_name_list.push_back("reallocate"); + data_name_list.push_back("recursive"); + data_name_list.push_back("save"); + data_name_list.push_back("select type"); // f03+ only + data_name_list.push_back("subroutine"); + data_name_list.push_back("type"); + data_name_list.push_back("use"); + + exec_name_list.push_back("backspace"); + exec_name_list.push_back("call"); + exec_name_list.push_back("close"); + exec_name_list.push_back("cycle"); + exec_name_list.push_back("do"); + exec_name_list.push_back("elseif"); + exec_name_list.push_back("entry"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("forall"); // f95+ only + exec_name_list.push_back("format"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("inquire"); + exec_name_list.push_back("open"); + exec_name_list.push_back("pause"); + exec_name_list.push_back("print"); + exec_name_list.push_back("read"); + exec_name_list.push_back("return"); + exec_name_list.push_back("rewind"); + exec_name_list.push_back("select case"); + exec_name_list.push_back("stop"); + exec_name_list.push_back("where"); + exec_name_list.push_back("write"); + + math_func_list.push_back("abs"); + math_func_list.push_back("ceiling"); + math_func_list.push_back("dim"); + math_func_list.push_back("dot_product"); + math_func_list.push_back("dprod"); + math_func_list.push_back("exp"); + math_func_list.push_back("floor"); + math_func_list.push_back("matmul"); + math_func_list.push_back("max"); + math_func_list.push_back("min"); + math_func_list.push_back("mod"); + math_func_list.push_back("modulo"); + math_func_list.push_back("sign"); + math_func_list.push_back("sqrt"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("acosh"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("asinh"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + trig_func_list.push_back("atanh"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("**"); + + cmplx_cond_list.push_back("do"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("else if"); + cmplx_cond_list.push_back("elseif"); + cmplx_cond_list.push_back("forall"); // f95+ only + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("select case"); + cmplx_cond_list.push_back("select type"); // f03+ only + + cmplx_logic_list.push_back(".and."); + cmplx_logic_list.push_back(".or."); + cmplx_logic_list.push_back(".not."); + cmplx_logic_list.push_back(".eqv."); + cmplx_logic_list.push_back(".neqv."); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("/="); + cmplx_logic_list.push_back(".eq."); + cmplx_logic_list.push_back(".ne."); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back(".gt."); + cmplx_logic_list.push_back(".lt."); + cmplx_logic_list.push_back(".ge."); + cmplx_logic_list.push_back(".le."); + cmplx_logic_list.push_back(".true."); + cmplx_logic_list.push_back(".false."); + + cmplx_preproc_list.push_back("dictionary"); + cmplx_preproc_list.push_back("include"); + cmplx_preproc_list.push_back("options"); + + cmplx_assign_list.push_back("="); + + cmplx_pointer_list.push_back("=>"); +} + +/*! +* Perform preprocessing of file lines before counting. +* Replace quote stuffing in literals '' or "" to avoid quote matching problems. +* +* \param fmap list of file lines +* +* \return method status +*/ +int CFortranCounter::PreCountProcess(filemap* fmap) +{ + size_t i; + filemap::iterator fit; + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + if (fit->line.empty()) + continue; + for (i = fit->line.length() - 1; i > 0; i--) + { + if ((fit->line[i] == '\'' && fit->line[i-1] == '\'') || (fit->line[i] == '"' && fit->line[i-1] == '"')) + { + fit->line[i] = '$'; + fit->line[i-1] = '$'; + } + } + } + return 0; +} + +/*! +* Counts the number of comment lines, removes comments, and +* replaces quoted strings by special chars, e.g., $ +* All arguments are modified by the method. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CFortranCounter::CountCommentsSLOC(filemap* fmap, results* result, filemap *fmapBak) +{ + if (LineCommentStart.empty()) + return 0; + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + + bool contd_nextline; + int comment_type = 0; + /* + comment_type: + 0 : not comment + 1 : line comment, whole line + 2 : line comment, embedded + */ + + size_t i, idx_start, comment_start; + size_t quote_idx_start; + string curBlckCmtStart, curBlckCmtEnd, prevLine; + char CurrentQuoteEnd = 0; + bool quote_contd = false, found, foundSpc; + filemap::iterator itfmBak = fmapBak->begin(); + + quote_idx_start = 0; + prevLine = ""; + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + contd_nextline = false; + + quote_idx_start = 0; + idx_start = 0; + + if (CUtil::CheckBlank(iter->line)) + continue; + if (quote_contd) + { + // replace quote until next character + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + prevLine = itfmBak->line; + if (quote_contd) + continue; + } + + while (!contd_nextline && idx_start < iter->line.length()) + { + quote_idx_start = FindQuote(iter->line, QuoteStart, quote_idx_start, QuoteEscapeFront); + comment_start = idx_start; + + // check for comment delimiters 'C', 'c' in col 1 (works for most cases) + found = false; + if ((iter->line[0] == 'C' || iter->line[0] == 'c') && + (prevLine.length() < 1 || prevLine[prevLine.length() - 1] != '&')) + { + // check for reserved 'c' words + for (vector::iterator viter = c_keywords.begin(); viter != c_keywords.end(); viter++) + { + if (CUtil::FindKeyword(iter->line, *viter, 0, TO_END_OF_STRING, false) == 0) + { + found = true; + break; + } + } + if (!found) + { + // check for function or assignment (check for 'c__()' or 'c__ =') + foundSpc = false; + for (i = 1; i < iter->line.length(); i++) + { + if (iter->line[i] == '(') + { + found = true; + break; + } + else if (iter->line[i] == '=') + { + if (i >= iter->line.length() - 1 || iter->line[i + 1] != '=') + found = true; + break; + } + else if (iter->line[i] == ' ') + foundSpc = true; + else if (foundSpc) + break; + } + } + found = !found; + } + + // check for comment delimiters '*', '!' in col 1 + if (found || ((iter->line[0] == '*' || iter->line[0] == '!') && + (prevLine.length() < 1 || prevLine[prevLine.length() - 1] != '&'))) + { + comment_start = 0; + comment_type = 1; + } + // commented out to favor Fortran 90+ (in Fortran 77 any character in column 6 indicates continuation, not comment) + // else if (iter->line.length() > 6 && iter->line[5] == '!' && CUtil::CheckBlank(iter->line.substr(0, 5))) + // comment_start = string::npos; + else + { + FindCommentStart(iter->line, comment_start, comment_type, curBlckCmtStart, curBlckCmtEnd); + if (comment_start != string::npos) + { + // check for characters before comment + for (i = 0; i < comment_start; i++) + { + if (iter->line[i] != ' ') + { + comment_type = 2; + break; + } + } + } + } + + if (comment_start == string::npos && quote_idx_start == string::npos) + { + prevLine = itfmBak->line; + break; + } + + if (comment_start != string::npos) + idx_start = comment_start; + + // if found quote before comment + if (quote_idx_start != string::npos && (comment_start == string::npos || quote_idx_start < comment_start)) + { + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_idx_start > idx_start) + { + if (quote_contd) + { + if (itfmBak->line[itfmBak->line.length() - 1] == '&') + { + iter->line[iter->line.length() - 1] = '&'; + if (itfmBak->line.length() > 2 && itfmBak->line[itfmBak->line.length() - 2] == ' ') + iter->line[iter->line.length() - 2] = ' '; + } + } + idx_start = quote_idx_start; + prevLine = itfmBak->line; + continue; // comment delimiter inside quote + } + } + else if (idx_start != string::npos) + { + // comment delimiter starts first + switch(comment_type) + { + case 1: // line comment, definitely whole line + case 3: + prevLine = ""; + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + contd_nextline = true; + break; + case 2: // line comment, possibly embedded + case 4: + result->e_comm_lines++; + prevLine = ""; + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + contd_nextline = true; + break; + default: + cout << "Error in CountCommentsSLOC()"; + break; + } + } + } + } + return 1; +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CFortranCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = "", tmp, str; + filemap::iterator itfmBak = fmapBak->begin(); + + for (filemap::iterator iter = fmap->begin(); iter!=fmap->end(); iter++, itfmBak++) + { + tmp = CUtil::TrimString(iter->line); + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count, false); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + if (((idx = CUtil::FindKeyword(iter->line, *viter, 0, TO_END_OF_STRING, false)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = CUtil::TrimString(itfmBak->line.substr(0, strSize)); + if (strDirLine[strDirLine.length() - 1] == '&') + strDirLine = CUtil::TrimString(strDirLine.substr(0, strDirLine.length() - 1)); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + str = CUtil::TrimString(itfmBak->line.substr(0, strSize)); + if (str[0] == '&') + strDirLine += CUtil::TrimString(str.substr(1, str.length() - 1)); + else + strDirLine += str; + } + result->directive_lines[PHY]++; + } + + if (contd) + { + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length() - 1] != '&') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CFortranCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + filemap::iterator fit, fitbak, fitNext = fmap->begin(); + string line, lineBak, lineNext; + + bool data_continue = false, fixed_continue = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + string str; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + StringVector loopEnd; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + if (fitNext != fmap->end()) + fitNext++; + line = fit->line; + lineBak = fitbak->line; + + // do not process blank lines (blank_line/comment_line/directive) + if (!CUtil::CheckBlank(line)) + { + // get next line to check for fixed format continuation (non-blank and non-0 in col 6) + lineNext = ""; + fixed_continue = false; + if (fitNext != fmap->end()) + { + lineNext = fitNext->line; + if (!CUtil::CheckBlank(lineNext)) + { + if (lineNext.length() > 6) + { + str = lineNext.substr(0, 5); + if ((CUtil::CheckBlank(str) || CUtil::IsInteger(str)) && + lineNext[5] != ' ' && lineNext[5] != '0') + { + // fixed format continuation + fixed_continue = true; + } + } + } + } + + // process logical SLOC + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, fixed_continue, + data_continue, temp_lines, phys_exec_lines, phys_data_lines, loopEnd); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + + // update physical SLOC lines + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param fixed_continue fixed format line continues on next line +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param loopEnd nested loop end string(s) +*/ +void CFortranCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &fixed_continue, bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, StringVector &loopEnd) +{ + size_t start, end; + size_t i, j, k, m, strSize; + bool found_exclusion = false, trunc_flag = false, found; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string str, spc; + unsigned int cnt = 0; + unsigned int paren_cnt; + + string tmp = CUtil::TrimString(line); + string tmpBak = CUtil::TrimString(lineBak); + start = 0; + + // if continuation, prepend previous line for correct processing + if (strLSLOC.length() > 0) + { + found = false; + if (tmpBak[0] == '&' && tmpBak.length() > 1) + found = true; + else + { + if (line.length() > 6) + { + str = line.substr(0, 5); + if ((CUtil::CheckBlank(str) || CUtil::IsInteger(str)) && + line[5] != ' ' && line[5] != '0') + found = true; + } + } + if (found) + { + tmp = tmp.substr(1, tmp.length() - 1); + tmpBak = tmpBak.substr(1, tmpBak.length() - 1); + } + tmp = strLSLOC + tmp; + tmpBak = strLSLOCBak + tmpBak; + strLSLOC = strLSLOCBak = ""; + } + + // there may be more than 1 logical SLOC in this line + while (start < tmp.length()) + { + // check for semicolon to denote end of SLOC + end = tmp.find(";", start); + if (end == string::npos) + end = tmp.length() - 1; + else + { + // skip empty ";" + str = CUtil::TrimString(tmp.substr(start, end - start + 1)); + if (str == ";") + { + start = end + 1; + continue; + } + } + + // record nested loops + if (print_cmplx) + { + bool new_loop = false; + i = CUtil::FindKeyword(tmp, "end do", start, end, false); + if (i != string::npos) + i = string::npos; + else + i = CUtil::FindKeyword(tmp, "do", start, end, false); + if (i != string::npos) + { + // check for label after do + found = false; + if (i + 2 < end && tmp[i+2] == ' ') + { + for (j = i + 3; j <= end; j++) + { + if (tmp[j] != ' ') + { + for (k = j; k <= end; k++) + { + if (tmp[k] == ' ' || tmp[k] == ',') + break; + } + k--; + str = CUtil::TrimString(tmp.substr(j, k - j + 1)); + if (CUtil::IsInteger(str)) + { + loopEnd.push_back(str); + found = true; + } + break; + } + } + } + if (!found) + loopEnd.push_back("end do"); + new_loop = true; + } + else + { + i = CUtil::FindKeyword(tmp, "end forall", start, end, false); + if (i != string::npos) + i = string::npos; + else + i = CUtil::FindKeyword(tmp, "forall", start, end, false); + if (i != string::npos) + { + loopEnd.push_back("end forall"); + new_loop = true; + } + else if (loopEnd.size() > 0) + { + str = loopEnd.back(); + if (CUtil::FindKeyword(tmp, str, start, end, false) != string::npos) + { + loopEnd.pop_back(); + if (CUtil::IsInteger(str)) + { + // remove additional label terminators + if (loopEnd.size() > 0) + { + for (m = loopEnd.size() - 1; m > 0; m--) + { + if (loopEnd[m] == str) + loopEnd.pop_back(); + else + break; + } + if (m == 0) + { + if (loopEnd[0] == str) + loopEnd.pop_back(); + } + } + } + } + else if (loopEnd.back() == "end do") + { + if (CUtil::FindKeyword(tmp, "enddo", start, end, false) != string::npos) + loopEnd.pop_back(); + } + else if (loopEnd.back() == "end forall") + { + if (CUtil::FindKeyword(tmp, "endforall", start, end, false) != string::npos) + loopEnd.pop_back(); + } + } + } + if (new_loop) + { + if (result->cmplx_nestloop_count.size() < loopEnd.size()) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopEnd.size()-1]++; + } + } + + // check for line containing excluded keywords (don't count as LSLOC) + found_exclusion = false; + for (StringVector::iterator it = exclude_keywords.begin(); it != exclude_keywords.end(); it++) + { + i = CUtil::FindKeyword(tmp, (*it), start, end, false); + if (i != string::npos) + { + found_exclusion = true; + + // exceptions + if ((*it) == "else") + { + // make sure not else if + if (CUtil::FindKeyword(tmp, "else if", i, i + 6, false) == string::npos) + break; + } + if ((*it) == "elsewhere") + { + // make sure elsewhere does not have condition + str = CUtil::TrimString(tmp.substr(i + 9, end - (i + 9) + 1)); + if (str[0] != '(') + break; + } + else + break; + } + } + if (found_exclusion) + { + strLSLOC = strLSLOCBak = ""; + phys_exec_lines++; + temp_lines = 0; + return; + } + + // check for inline if, where, forall + found = false; + i = CUtil::FindKeyword(tmp, "if", start, end, false); + if (i != string::npos) + { + if (CUtil::FindKeyword(tmp, "then", start, end, false) == string::npos) + found = true; + } + if (!found) + { + i = CUtil::FindKeyword(tmp, "where", start, end, false); + if (i != string::npos) + found = true; + } + if (!found) + { + i = CUtil::FindKeyword(tmp, "forall", start, end, false); + if (i != string::npos) + found = true; + } + if (found) + { + // check if in-line action exists after if statement (past ()) + found = false; + paren_cnt = 0; + for (j = i + 2; j <= end; j++) + { + if (tmp[j] == '(') + { + found = true; + paren_cnt++; + } + else if (tmp[j] == ')' && paren_cnt > 0) + paren_cnt--; + if (found && paren_cnt == 0) + { + if (j < end) + { + str = CUtil::TrimString(tmp.substr(j + 1, end - j + 1)); + if (!CUtil::CheckBlank(str) && str != ";" && str != "&" && !fixed_continue) + { + // save LSLOC for if statement, then process in-line action + strSize = CUtil::TruncateLine(j - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)); + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + start = j + 1; + } + } + break; + } + } + } + + // check for fixed continuation or free continuation (&) + if (tmp[end] == '&' || (fixed_continue && end >= tmp.length() - 1)) + { + // strip off trailing (&) + if (tmp[end] == '&') + strSize = CUtil::TruncateLine(end - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + else + strSize = CUtil::TruncateLine(end - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + spc = ""; + str = tmp.substr(start, strSize); + for (m = str.length() - 1; m > 0; m--) + { + if (str[m] == ' ') + spc += " "; + else + break; + } + if (m == 0) + { + if (str[0] == ' ') + spc += " "; + } + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)) + spc; + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)) + spc; + } + start = end + 1; + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL, false); + + if (cnt > 0) + data_continue = true; + if (data_continue == true) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + } + else + { + // save LSLOC + if (tmp[end] == ';') + strSize = CUtil::TruncateLine(end - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + else + strSize = CUtil::TruncateLine(end - start + 1, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += CUtil::TrimString(tmp.substr(start, strSize)); + strLSLOCBak += CUtil::TrimString(tmpBak.substr(start, strSize)); + } + start = end + 1; + if (strLSLOCBak.length() > 0) + { + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + // add a logical SLOC + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count, false); + + temp_lines++; + if (data_continue == true || cnt > 0) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + else if (data_continue == true) + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + } + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + } + } +} diff --git a/src/CFortranCounter.h b/src/CFortranCounter.h new file mode 100644 index 0000000..e0a1edf --- /dev/null +++ b/src/CFortranCounter.h @@ -0,0 +1,37 @@ +//! Code counter class definition for the Fortran language. +/*! +* \file CFortranCounter.h +* +* This file contains the code counter class definition for the Fortran language. +* This includes F77, F90, F95, and F03 including fixed and free formats. +*/ + +#ifndef CFortranCounter_h +#define CFortranCounter_h + +#include "CCodeCounter.h" + +//! Fortran code counter class. +/*! +* \class CFortranCounter +* +* Defines the Fortran code counter class. +*/ +class CFortranCounter : public CCodeCounter +{ +public: + CFortranCounter(); + +protected: + StringVector c_keywords; //!< List of keywords starting with 'c' with flexible formats + + virtual int PreCountProcess(filemap* fmap); + virtual int CountCommentsSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &fixed_continue, bool &data_continue, unsigned int &temp_lines, + unsigned int &phys_exec_lines, unsigned int &phys_data_lines, StringVector &loopEnd); +}; + +#endif diff --git a/src/CHtmlCounter.cpp b/src/CHtmlCounter.cpp new file mode 100644 index 0000000..b255154 --- /dev/null +++ b/src/CHtmlCounter.cpp @@ -0,0 +1,140 @@ +//! Code counter class methods for the HTML language. +/*! +* \file CHtmlCounter.cpp +* +* This file contains the code counter class methods for the HTML language. +*/ + +#include "CHtmlCounter.h" + +/*! +* Constructs a CHtmlCounter object. +*/ +CHtmlCounter::CHtmlCounter() +{ + classtype = HTML; + language_name = "HTML"; + + file_extension.push_back(".*htm"); + + BlockCommentStart.push_back(""); + + exec_name_list.push_back("address"); + exec_name_list.push_back("applet"); + exec_name_list.push_back("area"); + exec_name_list.push_back("a"); + exec_name_list.push_back("base"); + exec_name_list.push_back("basefont"); + exec_name_list.push_back("big"); + exec_name_list.push_back("blockquote"); + exec_name_list.push_back("body"); + exec_name_list.push_back("br"); + exec_name_list.push_back("b"); + exec_name_list.push_back("caption"); + exec_name_list.push_back("center"); + exec_name_list.push_back("cite"); + exec_name_list.push_back("code"); + exec_name_list.push_back("dd"); + exec_name_list.push_back("dfn"); + exec_name_list.push_back("dir"); + exec_name_list.push_back("div"); + exec_name_list.push_back("dl"); + exec_name_list.push_back("dt"); + exec_name_list.push_back("em"); + exec_name_list.push_back("font"); + exec_name_list.push_back("form"); + exec_name_list.push_back("h1"); + exec_name_list.push_back("h2"); + exec_name_list.push_back("h3"); + exec_name_list.push_back("h4"); + exec_name_list.push_back("h5"); + exec_name_list.push_back("h6"); + exec_name_list.push_back("head"); + exec_name_list.push_back("hr"); + exec_name_list.push_back("html"); + exec_name_list.push_back("img"); + exec_name_list.push_back("input"); + exec_name_list.push_back("isindex"); + exec_name_list.push_back("i"); + exec_name_list.push_back("jsp"); + exec_name_list.push_back("kbd"); + exec_name_list.push_back("link"); + exec_name_list.push_back("li"); + exec_name_list.push_back("map"); + exec_name_list.push_back("menu"); + exec_name_list.push_back("meta"); + exec_name_list.push_back("ol"); + exec_name_list.push_back("option"); + exec_name_list.push_back("param"); + exec_name_list.push_back("pre"); + exec_name_list.push_back("p"); + exec_name_list.push_back("samp"); + exec_name_list.push_back("script"); + exec_name_list.push_back("select"); + exec_name_list.push_back("small"); + exec_name_list.push_back("span"); + exec_name_list.push_back("strike"); + exec_name_list.push_back("strong"); + exec_name_list.push_back("style"); + exec_name_list.push_back("sub"); + exec_name_list.push_back("sup"); + exec_name_list.push_back("table"); + exec_name_list.push_back("td"); + exec_name_list.push_back("textarea"); + exec_name_list.push_back("th"); + exec_name_list.push_back("title"); + exec_name_list.push_back("tr"); + exec_name_list.push_back("tt"); + exec_name_list.push_back("ul"); + exec_name_list.push_back("u"); + exec_name_list.push_back("var"); +} + +/*! +* Constructs a CHtmlPhpCounter object. +*/ +CHtmlPhpCounter::CHtmlPhpCounter() +{ + classtype = HTML_PHP; + language_name = "HTML/PHP"; + + file_extension.clear(); + file_extension.push_back(".*htmphp"); +} + +/*! +* Constructs a CHtmlJspCounter object. +*/ +CHtmlJspCounter::CHtmlJspCounter() +{ + classtype = HTML_JSP; + language_name = "HTML/JSP"; + + file_extension.clear(); + file_extension.push_back(".*htmjsp"); +} + +/*! +* Constructs a CHtmlAspCounter object. +*/ +CHtmlAspCounter::CHtmlAspCounter() +{ + classtype = HTML_ASP; + language_name = "HTML/ASP"; + + file_extension.clear(); + file_extension.push_back(".*htmasp"); +} + +/*! +* Constructs a CHtmlColdFusionCounter object. +*/ +CHtmlColdFusionCounter::CHtmlColdFusionCounter() +{ + classtype = HTML_CFM; + language_name = "HTML/ColdFusion"; + + file_extension.clear(); + file_extension.push_back(".*htmcfm"); +} diff --git a/src/CHtmlCounter.h b/src/CHtmlCounter.h new file mode 100644 index 0000000..6466fd9 --- /dev/null +++ b/src/CHtmlCounter.h @@ -0,0 +1,73 @@ +//! Code counter class definition for the HTML language. +/*! +* \file CHtmlCounter.h +* +* This file contains the code counter class definition for the HTML language. +*/ + +#ifndef CHtmlCounter_h +#define CHtmlCounter_h + +#include "CTagCounter.h" + +//! HTML code counter class. +/*! +* \class CHtmlCounter +* +* Defines the HTML code counter class. +*/ +class CHtmlCounter : public CTagCounter +{ +public: + CHtmlCounter(); +}; + +//! HTML in PHP code counter class. +/*! +* \class CHtmlPhpCounter +* +* Defines the HTML in PHP code counter class. +*/ +class CHtmlPhpCounter : public CHtmlCounter +{ +public: + CHtmlPhpCounter(); +}; + +//! HTML in JSP code counter class. +/*! +* \class CHtmlJspCounter +* +* Defines the HTML in JSP code counter class. +*/ +class CHtmlJspCounter : public CHtmlCounter +{ +public: + CHtmlJspCounter(); +}; + +//! HTML in ASP code counter class. +/*! +* \class CHtmlAspCounter +* +* Defines the HTML in ASP code counter class. +*/ +class CHtmlAspCounter : public CHtmlCounter +{ +public: + CHtmlAspCounter(); +}; + +//! HTML in ColdFusion code counter class. +/*! +* \class CHtmlColdFusionCounter +* +* Defines the HTML in ColdFusion code counter class. +*/ +class CHtmlColdFusionCounter : public CHtmlCounter +{ +public: + CHtmlColdFusionCounter(); +}; + +#endif diff --git a/src/CJavaCounter.cpp b/src/CJavaCounter.cpp new file mode 100644 index 0000000..1189884 --- /dev/null +++ b/src/CJavaCounter.cpp @@ -0,0 +1,132 @@ +//! Code counter class methods for the Java language. +/*! +* \file CJavaCounter.cpp +* +* This file contains the code counter class methods for the Java language. +*/ + +#include "CJavaCounter.h" + +/*! +* Constructs a CJavaCounter object. +*/ +CJavaCounter::CJavaCounter() +{ + classtype = JAVA; + language_name = "Java"; + + file_extension.push_back(".java"); + + directive.push_back("import"); + directive.push_back("package"); + + data_name_list.push_back("abstract"); + data_name_list.push_back("ArrayList"); + data_name_list.push_back("boolean"); + data_name_list.push_back("byte"); + data_name_list.push_back("char"); + data_name_list.push_back("class"); + data_name_list.push_back("double"); + data_name_list.push_back("extends"); + data_name_list.push_back("float"); + data_name_list.push_back("HashMap"); + data_name_list.push_back("HashSet"); + data_name_list.push_back("implements"); + data_name_list.push_back("int"); + data_name_list.push_back("interface"); + data_name_list.push_back("LinkedHashMap"); + data_name_list.push_back("LinkedList"); + data_name_list.push_back("long"); + data_name_list.push_back("native"); + data_name_list.push_back("private"); + data_name_list.push_back("protected"); + data_name_list.push_back("public"); + data_name_list.push_back("short"); + data_name_list.push_back("static"); + data_name_list.push_back("String"); + data_name_list.push_back("TreeMap"); + data_name_list.push_back("Vector"); + data_name_list.push_back("void"); + data_name_list.push_back("volatile"); + + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("default"); + exec_name_list.push_back("do"); + exec_name_list.push_back("else"); + exec_name_list.push_back("finally"); + exec_name_list.push_back("for"); + exec_name_list.push_back("if"); + exec_name_list.push_back("new"); + exec_name_list.push_back("return"); + exec_name_list.push_back("super"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("this"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("throws"); + exec_name_list.push_back("try"); + exec_name_list.push_back("while"); + + math_func_list.push_back("Math.abs"); + math_func_list.push_back("Math.cbrt"); + math_func_list.push_back("Math.ceil"); + math_func_list.push_back("Math.copySign"); + math_func_list.push_back("Math.E"); + math_func_list.push_back("Math.exp"); + math_func_list.push_back("Math.expm1"); + math_func_list.push_back("Math.floor"); + math_func_list.push_back("Math.getExponent"); + math_func_list.push_back("Math.hypot"); + math_func_list.push_back("Math.IEEEremainder"); + math_func_list.push_back("Math.max"); + math_func_list.push_back("Math.min"); + math_func_list.push_back("Math.nextAfter"); + math_func_list.push_back("Math.nextUp"); + math_func_list.push_back("Math.PI"); + math_func_list.push_back("Math.pow"); + math_func_list.push_back("Math.random"); + math_func_list.push_back("Math.rint"); + math_func_list.push_back("Math.round"); + math_func_list.push_back("Math.scalb"); + math_func_list.push_back("Math.signum"); + math_func_list.push_back("Math.sqrt"); + math_func_list.push_back("Math.toRadians"); + math_func_list.push_back("Math.toDegrees"); + math_func_list.push_back("Math.ulp"); + + trig_func_list.push_back("Math.acos"); + trig_func_list.push_back("Math.asin"); + trig_func_list.push_back("Math.atan"); + trig_func_list.push_back("Math.atan2"); + trig_func_list.push_back("Math.cos"); + trig_func_list.push_back("Math.cosh"); + trig_func_list.push_back("Math.sin"); + trig_func_list.push_back("Math.sinh"); + trig_func_list.push_back("Math.tan"); + trig_func_list.push_back("Math.tanh"); + + log_func_list.push_back("Math.log"); + log_func_list.push_back("Math.log10"); + log_func_list.push_back("Math.log1p"); + + cmplx_cyclomatic_list.push_back("if"); + cmplx_cyclomatic_list.push_back("case"); + cmplx_cyclomatic_list.push_back("while"); + cmplx_cyclomatic_list.push_back("for"); + cmplx_cyclomatic_list.push_back("catch"); + cmplx_cyclomatic_list.push_back("?"); +} + +/*! +* Constructs a CJavaJspCounter object. +*/ +CJavaJspCounter::CJavaJspCounter() +{ + classtype = JAVA_JSP; + language_name = "Java/JSP"; + + file_extension.clear(); + file_extension.push_back(".*java"); +} diff --git a/src/CJavaCounter.h b/src/CJavaCounter.h new file mode 100644 index 0000000..baf24eb --- /dev/null +++ b/src/CJavaCounter.h @@ -0,0 +1,37 @@ +//! Code counter class definition for the Java language. +/*! +* \file CJavaCounter.h +* +* This file contains the code counter class definition for the Java language. +*/ + +#ifndef CJavaCounter_h +#define CJavaCounter_h + +#include "CCJavaCsCounter.h" + +//! Java code counter class. +/*! +* \class CJavaCounter +* +* Defines the Java code counter class. +*/ +class CJavaCounter : public CCJavaCsCounter +{ +public: + CJavaCounter(); +}; + +//! Java in JSP code counter class. +/*! +* \class CJavaJspCounter +* +* Defines the Java in JSP code counter class. +*/ +class CJavaJspCounter : public CJavaCounter +{ +public: + CJavaJspCounter(); +}; + +#endif diff --git a/src/CJavascriptCounter.cpp b/src/CJavascriptCounter.cpp new file mode 100644 index 0000000..a49de9a --- /dev/null +++ b/src/CJavascriptCounter.cpp @@ -0,0 +1,808 @@ +//! Code counter class methods for the JavaScript language. +/*! +* \file CJavascriptCounter.cpp +* +* This file contains the code counter class methods for the JavaScript language. +*/ + +#include "CJavascriptCounter.h" + +/*! +* Constructs a CJavascriptCounter object. +*/ +CJavascriptCounter::CJavascriptCounter() +{ + classtype = JAVASCRIPT; + language_name = "JavaScript"; + + file_extension.push_back(".js"); + + QuoteStart = "\"'/"; + QuoteEnd = "\"'/"; + QuoteEscapeFront = '\\'; + LineCommentStart.push_back("//"); + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + + data_name_list.push_back("abstract"); + data_name_list.push_back("boolean"); + data_name_list.push_back("byte"); + data_name_list.push_back("char"); + data_name_list.push_back("class"); + data_name_list.push_back("double"); + data_name_list.push_back("enum"); + data_name_list.push_back("float"); + data_name_list.push_back("implements"); + data_name_list.push_back("instanceOf"); + data_name_list.push_back("int"); + data_name_list.push_back("interface"); + data_name_list.push_back("long"); + data_name_list.push_back("private"); + data_name_list.push_back("protected"); + data_name_list.push_back("public"); + data_name_list.push_back("short"); + data_name_list.push_back("static"); + data_name_list.push_back("void"); + + exec_name_list.push_back("alert"); + exec_name_list.push_back("arguments"); + exec_name_list.push_back("assign"); + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("close"); + exec_name_list.push_back("comment"); + exec_name_list.push_back("constructor"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("default"); + exec_name_list.push_back("debugger"); + exec_name_list.push_back("delete"); + exec_name_list.push_back("do"); + exec_name_list.push_back("else"); + exec_name_list.push_back("escape"); + exec_name_list.push_back("eval"); + exec_name_list.push_back("export"); + exec_name_list.push_back("extends"); + exec_name_list.push_back("false"); + exec_name_list.push_back("find"); + exec_name_list.push_back("final"); + exec_name_list.push_back("finally"); + exec_name_list.push_back("focus"); + exec_name_list.push_back("for"); + exec_name_list.push_back("function"); + exec_name_list.push_back("if"); + exec_name_list.push_back("import"); + exec_name_list.push_back("label"); + exec_name_list.push_back("length"); + exec_name_list.push_back("location"); + exec_name_list.push_back("native"); + exec_name_list.push_back("new"); + exec_name_list.push_back("null"); + exec_name_list.push_back("open"); + exec_name_list.push_back("package"); + exec_name_list.push_back("print"); + exec_name_list.push_back("prompt"); + exec_name_list.push_back("prototype"); + exec_name_list.push_back("ref"); + exec_name_list.push_back("return"); + exec_name_list.push_back("self"); + exec_name_list.push_back("status"); + exec_name_list.push_back("stop"); + exec_name_list.push_back("super"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("synchronized"); + exec_name_list.push_back("taint"); + exec_name_list.push_back("this"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("throws"); + exec_name_list.push_back("transient"); + exec_name_list.push_back("true"); + exec_name_list.push_back("try"); + exec_name_list.push_back("typeof"); + exec_name_list.push_back("untaint"); + exec_name_list.push_back("var"); + exec_name_list.push_back("watch"); + exec_name_list.push_back("while"); + exec_name_list.push_back("with"); + + math_func_list.push_back("abs"); + math_func_list.push_back("ceil"); + math_func_list.push_back("exp"); + math_func_list.push_back("floor"); + math_func_list.push_back("max"); + math_func_list.push_back("min"); + math_func_list.push_back("pow"); + math_func_list.push_back("random"); + math_func_list.push_back("round"); + math_func_list.push_back("sqrt"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("tan"); + + log_func_list.push_back("log"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("--"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("else if"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("switch"); + cmplx_cond_list.push_back("while"); + cmplx_cond_list.push_back("?"); + + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("==="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back("!=="); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Handles special case for quote characters within regexp operators. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CJavascriptCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + static bool inRegexp = false; + size_t idx = idx_start; + size_t i = idx; + + if (inRegexp || strline[idx] == '/') + { + if (!inRegexp) + { + // check for open parenthesis to indicate a regexp (match(//), replace(//, ""), split(//)) + while (i > 0) + { + if (strline[i-1] == '(') + break; + else if (strline[i-1] != ' ' && strline[i-1] != '\t') + { + idx_start++; + return 1; + } + i--; + } + if (i <= 0) + { + idx_start++; + return 1; + } + i = idx; + } + + // replace all "\\" by "$$" + size_t start = idx_start; + while ((start = strline.find("\\\\", start)) != string::npos) + { + strline.replace(start, 2, "$$"); + start += 2; + } + + while (i < strline.length()) + { + if (inRegexp) + { + if ((strline[i] == '/' && (i == 0 || (i > 0 && strline[i - 1] != '\\'))) + || (contd && strline[i] == ';')) + { + // replace everything in the regexp + strline.replace(idx, i - idx + 1, i - idx + 1, '$'); + inRegexp = false; + contd = false; + idx = i + 1; + idx_start = idx; + return 1; + } + } + else if (strline[i] == '/') + { + idx = i; + inRegexp = true; + } + i++; + } + + if (inRegexp) + { + strline.replace(idx, i - idx, i - idx, '$'); + contd = true; + } + } + idx_start = idx; + + if (!inRegexp) + return CCodeCounter::ReplaceQuote(strline, idx_start, contd, CurrentQuoteEnd); + + return 1; +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CJavascriptCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + filemap::iterator itfmBak = fmapBak->begin(); + + for (filemap::iterator iter = fmap->begin(); iter!=fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // ensures the keyword stands alone, avoid, e.g., #ifabc + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != ',' && iter->line[iter->line.length()-1] != '\\') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CJavascriptCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int paren_count = 0; + bool for_flag = false; + bool found_forifwhile = false; + bool found_while = false; + char prev_char = 0; + bool data_continue = false; + bool inArrayDec = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int openBrackets = 0; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning (for searching keywords + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, paren_count, for_flag, found_forifwhile, found_while, + prev_char, data_continue, temp_lines, phys_exec_lines, phys_data_lines, inArrayDec, + openBrackets, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param forflag found for flag +* \param found_forifwhile found for, if, or while flag +* \param found_while found while flag +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param inArrayDec marks an array declaration +* \param openBrackets number of open brackets (no matching close bracket) +* \param loopLevel nested loop level +*/ +void CJavascriptCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, bool &inArrayDec, + unsigned int &openBrackets, StringVector &loopLevel) +{ + // paren_cnt is used with 'for' statement only + size_t start = 0; //starting index of the working string + size_t i = 0, strSize; + bool found_do, found_try, found_else, trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + unsigned int cnt = 0; + unsigned int loopCnt = 0; + StringVector::iterator lit; + + string tmp = CUtil::TrimString(strLSLOC); + + // do, try + found_do = (CUtil::FindKeyword(tmp, "do") != string::npos); + found_try = (CUtil::FindKeyword(tmp, "try") != string::npos); + // else is treated differently, else is included in SLOC, do and try are not + found_else = (CUtil::FindKeyword(tmp, "else") != string::npos); + + // there may be more than 1 logical SLOC in this line + while (i < line.length()) + { + switch (line[i]) + { + case ';': case '{': // LSLOC terminators + // ';' for normal executable or declaration statement + // '{' for starting a function or 'do' stmt or a block (which is counted) + + // get the previous logical mark until i-1 index is the new LSLOC + // except 'do' precedes '{' + // except '}' precedes ';' ?? + // do nothing inside 'for' statement + if (paren_cnt > 0 && line[i] == ';') + break; + + // record open bracket for nested loop processing + if (print_cmplx) + { + if (line[i] == '{') + { + openBrackets++; + if ((unsigned int)loopLevel.size() < openBrackets) + loopLevel.push_back(""); + } + else + { + if ((unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + } + } + + // case 'while(...);', 'while(...) {', and '} while(...);' + // this case is handled in case ')' + if (found_while && found_forifwhile) + { + found_while = false; + found_forifwhile = false; + start = i + 1; + break; + } + + if (line[i] == '{') + { + if (prev_char == '=') + inArrayDec = true; + + // continue until seeing ';' + if (inArrayDec) + break; + + // case for(...); and if (...) { + // these specials are handled + if (found_forifwhile) + { + found_forifwhile = false; + start = i + 1; + break; + } + + // check if 'do' precedes '{' + if (!found_do && !found_try && !found_else) + { + // find for 'do' in string before tmp string + tmp = CUtil::TrimString(line.substr(start, i - start)); + found_do = (tmp == "do"); // found 'do' statement + found_try = (tmp == "try"); // found 'try' statement + // same as else + found_else = (tmp == "else"); // found 'else' statement + } + if (found_do || found_try || found_else) + { + if (found_do && print_cmplx) + { + if (loopLevel.size() > 0) + loopLevel.pop_back(); + loopLevel.push_back("do"); + } + found_do = false; + found_try = false; + if (!found_else) + { + // everything before 'do', 'try' are cleared + strLSLOC = ""; + strLSLOCBak = ""; + start = i + 1; + } + break; // do not store '{' following 'do' + } + } + + // wrong, e.g., a[]={1,2,3}; + if (line[i] == ';' && prev_char == '}') + { + // check if in array declaration or not + // if no, skip, otherwise, complete the SLOC containing array declaration + if (!inArrayDec) + { + start = i + 1; + break; + } + } + + inArrayDec = false; + + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (cnt > 0 && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + } + else if (data_continue == true && line[i] == ';') + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + start = i + 1; + + break; + case '(': + if (forflag) + paren_cnt++; + else + { + // handle 'for', 'foreach', 'while', 'if' the same way + tmp = CUtil::TrimString(line.substr(start,i)); + if (CUtil::FindKeyword(tmp, "for") != string::npos + || CUtil::FindKeyword(tmp, "foreach") != string::npos + || CUtil::FindKeyword(tmp, "while")!= string::npos + || CUtil::FindKeyword(tmp, "if") != string::npos) + { + forflag = true; + paren_cnt++; + + if (print_cmplx && (unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + + if (CUtil::FindKeyword(tmp, "while")!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("while"); + found_while = true; + } + else if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "for") != string::npos) + loopLevel.push_back("for"); + else if (CUtil::FindKeyword(tmp, "foreach") != string::npos) + loopLevel.push_back("foreach"); + + // record nested loop level + if (CUtil::FindKeyword(tmp, "if") == string::npos) + { + loopCnt = 0; + for (lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + } + } + break; + case ')': + if (forflag) + { + if (paren_cnt > 0) + paren_cnt--; + if (paren_cnt == 0) + { + // handle 'for', 'foreach', 'while', 'if' + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = i + 1; + found_forifwhile = true; + forflag = false; + } + } + break; + case '}': + // skip '}' when found ';' and then '}' because '{' is counted already + // also, {} is also skipped, counted + if (prev_char == ';' || prev_char == '{' || prev_char == '}') + if (!inArrayDec) start = i + 1; + + // record close bracket for nested loop processing + if (print_cmplx) + { + if (openBrackets > 0) + openBrackets--; + if (loopLevel.size() > 0) + loopLevel.pop_back(); + } + break; + } + + if (line[i] != ' ' && line[i] != '\t') + { + // if ;}}} --> don't count }}} at all + // also, if {}}} --> don't count }}} at all + // if ( !(line[i] == '}' && (prev_char == ';' || prev_char == '{'))) // see case '}' above + prev_char = line[i]; + + // change to not found if a char appears before + if (line[i] != ')' && found_forifwhile) + found_forifwhile = false; + } + + i++; + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + + // drop continuation symbol + if (strLSLOC[strLSLOC.length()-1] == '\\') + { + strLSLOC = strLSLOC.substr(0, strLSLOC.length()-1); + strLSLOCBak = strLSLOCBak.substr(0, strLSLOCBak.length()-1); + } + } + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} + +/*! +* Constructs a CJavascriptPhpCounter object. +*/ +CJavascriptPhpCounter::CJavascriptPhpCounter() +{ + classtype = JAVASCRIPT_PHP; + language_name = "JavaScript/PHP"; + + file_extension.clear(); + file_extension.push_back(".*jsphp"); +} + +/*! +* Constructs a CJavascriptHtmlCounter object. +*/ +CJavascriptHtmlCounter::CJavascriptHtmlCounter() +{ + classtype = JAVASCRIPT_HTML; + language_name = "JavaScript/HTML"; + + file_extension.clear(); + file_extension.push_back(".*jshtm"); +} + +/*! +* Constructs a CJavascriptXmlCounter object. +*/ +CJavascriptXmlCounter::CJavascriptXmlCounter() +{ + classtype = JAVASCRIPT_XML; + language_name = "JavaScript/XML"; + + file_extension.clear(); + file_extension.push_back(".*jsxml"); +} + +/*! +* Constructs a CJavascriptJspCounter object. +*/ +CJavascriptJspCounter::CJavascriptJspCounter() +{ + classtype = JAVASCRIPT_JSP; + language_name = "JavaScript/JSP"; + + file_extension.clear(); + file_extension.push_back(".*jsjsp"); +} + +/*! +* Constructs a CJavascriptAspServerCounter object. +*/ +CJavascriptAspServerCounter::CJavascriptAspServerCounter() +{ + classtype = JAVASCRIPT_ASP_S; + language_name = "JavaScript/ASP Server"; + + file_extension.clear(); + file_extension.push_back(".*jsasps"); +} + +/*! +* Constructs a CJavascriptAspClientCounter object. +*/ +CJavascriptAspClientCounter::CJavascriptAspClientCounter() +{ + classtype = JAVASCRIPT_ASP_C; + language_name = "JavaScript/ASP Client"; + + file_extension.clear(); + file_extension.push_back(".*jsaspc"); +} + +/*! +* Constructs a CJavascriptColdFusionCounter object. +*/ +CJavascriptColdFusionCounter::CJavascriptColdFusionCounter() +{ + classtype = JAVASCRIPT_CFM; + language_name = "JavaScript/ColdFusion"; + + file_extension.clear(); + file_extension.push_back(".*jscfm"); +} diff --git a/src/CJavascriptCounter.h b/src/CJavascriptCounter.h new file mode 100644 index 0000000..6480c15 --- /dev/null +++ b/src/CJavascriptCounter.h @@ -0,0 +1,118 @@ +//! Code counter class definition for the JavaScript language. +/*! +* \file CJavascriptCounter.h +* +* This file contains the code counter class definition for the JavaScript language. +*/ + +#ifndef CJavascriptCounter_h +#define CJavascriptCounter_h + +#include "CCodeCounter.h" + +//! JavaScript code counter class. +/*! +* \class CJavascriptCounter +* +* Defines the JavaScript code counter class. +*/ +class CJavascriptCounter : public CCodeCounter +{ +public: + CJavascriptCounter(); + +protected: + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, bool &inArrayDec, + unsigned int &openBrackets, StringVector &loopLevel); +}; + +//! JavaScript in PHP code counter class. +/*! +* \class CJavascriptPhpCounter +* +* Defines the JavaScript in PHP code counter class. +*/ +class CJavascriptPhpCounter : public CJavascriptCounter +{ +public: + CJavascriptPhpCounter(); +}; + +//! JavaScript in HTML code counter class. +/*! +* \class CJavascriptHtmlCounter +* +* Defines the JavaScript in HTML code counter class. +*/ +class CJavascriptHtmlCounter : public CJavascriptCounter +{ +public: + CJavascriptHtmlCounter(); +}; + +//! JavaScript in XML code counter class. +/*! +* \class CJavascriptXmlCounter +* +* Defines the JavaScript in XML code counter class. +*/ +class CJavascriptXmlCounter : public CJavascriptCounter +{ +public: + CJavascriptXmlCounter(); +}; + +//! JavaScript in JSP code counter class. +/*! +* \class CJavascriptJspCounter +* +* Defines the JavaScript in JSP code counter class. +*/ +class CJavascriptJspCounter : public CJavascriptCounter +{ +public: + CJavascriptJspCounter(); +}; + +//! JavaScript in ASP server code counter class. +/*! +* \class CJavascriptAspServerCounter +* +* Defines the JavaScript in ASP server code counter class. +*/ +class CJavascriptAspServerCounter : public CJavascriptCounter +{ +public: + CJavascriptAspServerCounter(); +}; + +//! JavaScript in ASP client code counter class. +/*! +* \class CJavascriptAspClientCounter +* +* Defines the JavaScript in ASP client code counter class. +*/ +class CJavascriptAspClientCounter : public CJavascriptCounter +{ +public: + CJavascriptAspClientCounter(); +}; + +//! JavaScript in ColdFusion code counter class. +/*! +* \class CJavascriptColdFusionCounter +* +* Defines the JavaScript in ColdFusion code counter class. +*/ +class CJavascriptColdFusionCounter : public CJavascriptCounter +{ +public: + CJavascriptColdFusionCounter(); +}; + +#endif diff --git a/src/CMakefileCounter.cpp b/src/CMakefileCounter.cpp new file mode 100644 index 0000000..775c583 --- /dev/null +++ b/src/CMakefileCounter.cpp @@ -0,0 +1,208 @@ +//! Code counter class methods for Makefiles. +/*! +* \file CMakefileCounter.cpp +* +* This file contains the code counter class methods for Makefiles. +*/ + +#include "CMakefileCounter.h" + +/*! +* Constructs a CMakefileCounter object. +*/ +CMakefileCounter::CMakefileCounter() +{ + classtype = MAKEFILE; + language_name = "Makefile"; + + file_extension.push_back(".make"); + file_extension.push_back(".makefile"); + + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + LineCommentStart.push_back("#"); + + directive.push_back("include"); + directive.push_back("-include"); + directive.push_back("sinclude"); + + cmplx_assign_list.push_back("="); + cmplx_assign_list.push_back("?="); + cmplx_assign_list.push_back(":="); + cmplx_assign_list.push_back("+="); + + cmplx_preproc_list.push_back("include"); + cmplx_preproc_list.push_back("-include"); + cmplx_preproc_list.push_back("sinclude"); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CMakefileCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // ensures the keyword stands alone + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != '\\') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CMakefileCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + filemap::iterator fit, fitbak; + string line, lineBak; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning (for searching keywords) + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + result->exec_lines[PHY]++; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +*/ +void CMakefileCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak) +{ + size_t strSize; + bool trunc_flag = false; + string tmp = CUtil::TrimString(line); + string tmpBak = CUtil::TrimString(line); + string tmpLower = CUtil::ToLower(tmp); + + // check for continuation and skip ending condition strings + if (tmp[tmp.length()-1] == '\\') + { + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 1) + { + strLSLOC += CUtil::TrimString(tmp.substr(0, strSize - 1)) + " "; + strLSLOCBak += CUtil::TrimString(tmpBak.substr(0, strSize - 1)) + " "; + return; + } + } + if (tmpLower == "endef" || tmpLower == "else" || tmpLower == "endif" || tmpLower == "done") + return; + + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; +} diff --git a/src/CMakefileCounter.h b/src/CMakefileCounter.h new file mode 100644 index 0000000..3e785e3 --- /dev/null +++ b/src/CMakefileCounter.h @@ -0,0 +1,30 @@ +//! Code counter class definition for Makefiles. +/*! +* \file CMakefileCounter.h +* +* This file contains the code counter class definition for Makefiles. +*/ + +#ifndef CMakefileCounter_h +#define CMakefileCounter_h + +#include "CCodeCounter.h" + +//! Makefile code counter class. +/*! +* \class CMakefileCounter +* +* Defines the Makefile code counter class. +*/ +class CMakefileCounter : public CCodeCounter +{ +public: + CMakefileCounter(); + +protected: + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak); +}; + +#endif diff --git a/src/CMatlabCounter.cpp b/src/CMatlabCounter.cpp new file mode 100644 index 0000000..6c99a32 --- /dev/null +++ b/src/CMatlabCounter.cpp @@ -0,0 +1,425 @@ +//! Code counter class methods for the Matlab language. +/*! +* \file CMatlabCounter.cpp +* +* This file contains the code counter class methods for the Matlab language. +*/ + +#include "CMatlabCounter.h" + +/*! +* Constructs a CMatlab object. +*/ +CMatlabCounter::CMatlabCounter() +{ + classtype = MATLAB; + language_name = "MATLAB"; + + file_extension.push_back(".m"); + + QuoteStart = "'"; + QuoteEnd = "'"; + QuoteEscapeRear = '\''; + ContinueLine = "..."; + + BlockCommentStart.push_back("%{"); + BlockCommentEnd.push_back("%}"); + LineCommentStart.push_back("%"); + + directive.push_back("import"); + + exec_name_list.push_back("all"); + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("for"); + exec_name_list.push_back("if"); + exec_name_list.push_back("else"); + exec_name_list.push_back("elseif"); + exec_name_list.push_back("end"); + exec_name_list.push_back("otherwise"); + exec_name_list.push_back("parfor"); + exec_name_list.push_back("return"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("try"); + exec_name_list.push_back("while"); + + math_func_list.push_back("ceil"); + math_func_list.push_back("eps"); + math_func_list.push_back("exp"); + math_func_list.push_back("factor"); + math_func_list.push_back("factorial"); + math_func_list.push_back("fix"); + math_func_list.push_back("floor"); + math_func_list.push_back("idivide"); + math_func_list.push_back("Inf"); + math_func_list.push_back("intmax"); + math_func_list.push_back("intmin"); + math_func_list.push_back("max"); + math_func_list.push_back("mod"); + math_func_list.push_back("NaN"); + math_func_list.push_back("pi"); + math_func_list.push_back("pow2"); + math_func_list.push_back("power"); + math_func_list.push_back("realmax"); + math_func_list.push_back("realmin"); + math_func_list.push_back("rem"); + math_func_list.push_back("round"); + math_func_list.push_back("sqrt"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("acot"); + trig_func_list.push_back("acsc"); + trig_func_list.push_back("asec"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("cot"); + trig_func_list.push_back("csc"); + trig_func_list.push_back("sec"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("tan"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + log_func_list.push_back("log1p"); + log_func_list.push_back("log2"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("^"); + cmplx_calc_list.push_back("\\"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("\'"); + cmplx_calc_list.push_back(".'"); + cmplx_calc_list.push_back(".*"); + cmplx_calc_list.push_back(".^"); + cmplx_calc_list.push_back(".\\"); + cmplx_calc_list.push_back("./"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("elseif"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("switch"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("<="); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("~="); + + cmplx_preproc_list.push_back("import"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CMatlabCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // ensures the keyword stands alone, avoid, e.g., #ifabc + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine.length() > 3 && strDirLine.substr(strDirLine.length()-3, 3) == "...") + strDirLine = strDirLine.substr(0, strDirLine.length()-3); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line.length() < 3 || iter->line.substr(iter->line.length()-4, 3) != "...") + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CMatlabCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + bool cont_str = false; + unsigned int openBrackets = 0; + string strLSLOC = ""; + string strLSLOCBak = ""; + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning (for searching keywords) + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, cont_str, openBrackets, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + result->exec_lines[PHY]++; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param cont_str continue string +* \param openBrackets number of open brackets (no matching close bracket) +* \param loopLevel nested loop level +*/ +void CMatlabCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &cont_str, unsigned int &openBrackets, StringVector &loopLevel) +{ + size_t start = 0, len; + size_t i = 0, strSize; + bool trunc_flag = false; + string tmp, tmpBak, str; + + // check exclusions/continuation + tmp = CUtil::TrimString(line); + tmpBak = CUtil::TrimString(lineBak); + if (CUtil::FindKeyword(tmp, "end") == 0) + { + if (loopLevel.size() > 0) + loopLevel.pop_back(); + return; + } + else if (CUtil::FindKeyword(tmp, "case") == 0 || CUtil::FindKeyword(tmp, "else") == 0 || CUtil::FindKeyword(tmp, "otherwise") == 0) + { + strLSLOC += tmp + " "; + strLSLOCBak += tmpBak + " "; + return; + } + + // process nested loops + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "for") != string::npos || + CUtil::FindKeyword(tmp, "while") != string::npos || + CUtil::FindKeyword(tmp, "parfor")!= string::npos) + { + loopLevel.push_back("loop"); + + // record nested loop level + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + else if (CUtil::FindKeyword(tmp, "if") != string::npos || + CUtil::FindKeyword(tmp, "switch") != string::npos || + CUtil::FindKeyword(tmp, "try") != string::npos) + loopLevel.push_back(""); + } + + // there may be more than 1 logical SLOC in this line + while (i < line.length()) + { + switch (line[i]) + { + case ';': case ',': // LSLOC terminators + + if (openBrackets > 0) + { + i++; + continue; + } + + tmp = CUtil::TrimString(line.substr(start, i - start + 1)); + tmpBak = CUtil::TrimString(lineBak.substr(start, i - start + 1)); + + if (cont_str && strLSLOC.length() > 0) + { + // check for string continuation + if (tmp[0] == '\'') + { + tmp = tmp.substr(1, tmp.length() - 1); + tmpBak = tmpBak.substr(1, tmpBak.length() - 1); + } + } + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + } + result->addSLOC(strLSLOCBak, trunc_flag); + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + cont_str = false; + start = i + 1; + + break; + case '[': case '(': case '{': + openBrackets++; + break; + case ']': case ')': case '}': + openBrackets--; + break; + } + i++; + } + + // check for line continuation + tmp = CUtil::TrimString(line.substr(start, i - start)); + tmpBak = CUtil::TrimString(lineBak.substr(start, i - start)); + if (tmp.length() > 3 && tmp.substr(tmp.length()-3, 3) == "...") + { + // strip off trailing (...) + tmp = tmp.substr(0, tmp.length()-3); + tmpBak = tmpBak.substr(0, tmpBak.length()-3); + + // strip off trailing (') to continue string + str = CUtil::TrimString(tmp, 1); + if (str[str.length()-1] == '\'') + { + len = str.length() - 1; + cont_str = true; + } + else + len = tmp.length(); + + strSize = CUtil::TruncateLine(len, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + if (cont_str) + { + strLSLOC += CUtil::TrimString(tmp.substr(0, strSize), -1); + strLSLOCBak += CUtil::TrimString(tmpBak.substr(0, strSize), -1); + } + else + { + strLSLOC += CUtil::TrimString(tmp.substr(0, strSize)) + " "; + strLSLOCBak += CUtil::TrimString(tmpBak.substr(0, strSize)) + " "; + } + } + } + else + { + // save LSLOC + if (cont_str && strLSLOC.length() > 0) + { + // check for string continuation + if (tmp[0] == '\'') + { + tmp = tmp.substr(1, tmp.length() - 1); + tmpBak = tmpBak.substr(1, tmpBak.length() - 1); + } + } + + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + + result->addSLOC(strLSLOCBak, trunc_flag); + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + cont_str = false; + } + } +} diff --git a/src/CMatlabCounter.h b/src/CMatlabCounter.h new file mode 100644 index 0000000..0ee7078 --- /dev/null +++ b/src/CMatlabCounter.h @@ -0,0 +1,31 @@ +//! Code counter class definition for the Matlab language. +/*! +* \file CMatlabCounter.h +* +* This file contains the code counter class definition for the Matlab Language. +*/ + +#ifndef CMatlabCounter_h +#define CMatlabCounter_h + +#include "CCodeCounter.h" + +//! Matlab code counter class. +/*! +* \class CMatlabCounter +* +* Defines the Matlab code counter class. +*/ +class CMatlabCounter : public CCodeCounter +{ +public: + CMatlabCounter(); + +protected: + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &cont_str, unsigned int &openBrackets, StringVector &loopLevel); +}; + +#endif diff --git a/src/CMidasCounter.cpp b/src/CMidasCounter.cpp new file mode 100644 index 0000000..411179c --- /dev/null +++ b/src/CMidasCounter.cpp @@ -0,0 +1,462 @@ +//! Code counter class methods for the Midas macro languages. +/*! +* \file CMidasCounter.cpp +* +* This file contains the code counter class methods for the Midas macro languages. +*/ + +#include "CMidasCounter.h" + +/*! +* Constructs a CMidasCounter object. +*/ +CMidasCounter::CMidasCounter() +{ + casesensitive = false; + + QuoteStart = "\""; + QuoteEnd = "\""; + QuoteEscapeFront = '\"'; + ContinueLine = "&"; + + LineCommentStart.push_back("!"); + + exclude_keywords.push_back("endcontrols"); + exclude_keywords.push_back("endif"); + exclude_keywords.push_back("endloop"); + exclude_keywords.push_back("endl"); // abbreviation for endloop + exclude_keywords.push_back("endmacro"); + exclude_keywords.push_back("endsubroutine"); + exclude_keywords.push_back("endwhile"); + exclude_keywords.push_back("endw"); // abbreviation for endwhile + exclude_keywords.push_back("else"); + exclude_keywords.push_back("label"); + + directive.push_back("include"); + + exec_name_list.push_back("break"); + exec_name_list.push_back("call"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("else"); + exec_name_list.push_back("elseif"); + exec_name_list.push_back("forall"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("loop"); + exec_name_list.push_back("pipe"); + exec_name_list.push_back("procedure"); + exec_name_list.push_back("return"); + exec_name_list.push_back("subroutine"); + exec_name_list.push_back("trap"); + exec_name_list.push_back("while"); + + math_func_list.push_back("calc"); + math_func_list.push_back("fcalc"); + math_func_list.push_back("fft"); + math_func_list.push_back("firwind"); + math_func_list.push_back("histogram"); + math_func_list.push_back("maxmin"); + math_func_list.push_back("peakpick"); + math_func_list.push_back("ramp"); + + trig_func_list.push_back("waveform"); + + cmplx_calc_list.push_back("**"); + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("elseif"); + cmplx_cond_list.push_back("forall"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("loop"); + cmplx_cond_list.push_back("trap"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("gt"); + cmplx_logic_list.push_back("lt"); + cmplx_logic_list.push_back("ge"); + cmplx_logic_list.push_back("le"); + cmplx_logic_list.push_back("eq"); + cmplx_logic_list.push_back("eqs"); + cmplx_logic_list.push_back("eqss"); + cmplx_logic_list.push_back("ngt"); + cmplx_logic_list.push_back("nlt"); + cmplx_logic_list.push_back("nge"); + cmplx_logic_list.push_back("nle"); + cmplx_logic_list.push_back("neq"); + cmplx_logic_list.push_back("neqs"); + cmplx_logic_list.push_back("neqss"); + + cmplx_preproc_list.push_back("include"); + + cmplx_assign_list.push_back("results"); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CMidasCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = "", tmp, tmpBak; + filemap::iterator itfmBak = fmapBak->begin(); + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + tmp = CUtil::TrimString(iter->line); + tmpBak = CUtil::TrimString(itfmBak->line); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + tmp, directive, cnt, 1, exclude, "", "", &result->directive_count, false); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + if (((idx = CUtil::FindKeyword(tmp, *viter, 0, TO_END_OF_STRING, false)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + // strip off trailing (&) + if (tmpBak[tmpBak.length()-1] == '&') + tmpBak = tmpBak.substr(0, tmpBak.length() - 1); + strSize = CUtil::TruncateLine(tmpBak.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = tmpBak.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + // strip off trailing (&) + if (tmpBak[tmpBak.length()-1] == '&') + tmpBak = tmpBak.substr(0, tmpBak.length() - 1); + strSize = CUtil::TruncateLine(tmpBak.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += tmpBak.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // if a directive or continuation of a directive + if (tmp[tmp.length()-1] != '&') + { + // add another logical directive line, should also have type + // if no continuation symbol found + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CMidasCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + filemap::iterator fit, fitbak; + string line, lineBak; + + bool data_continue = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + StringVector loopEnd; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning (for searching keywords) + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines (blank_line/comment_line/directive) + if (!CUtil::CheckBlank(line)) + { + // process logical SLOC + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, + data_continue, temp_lines, phys_exec_lines, phys_data_lines, loopEnd); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + + // update physical SLOC lines + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param loopEnd nested loop end string(s) +*/ +void CMidasCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, StringVector &loopEnd) +{ + size_t start = 0; //starting index of the working string + size_t i = 0, strSize; + bool found_exclusion = false, trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + unsigned int cnt = 0; + + string tmp = CUtil::TrimString(line); + string tmpBak = CUtil::TrimString(lineBak); + + // record nested loops + if (print_cmplx) + { + bool new_loop = false; + if (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) == 0) + { + loopEnd.push_back("enddo"); + new_loop = true; + } + else if (CUtil::FindKeyword(tmp, "forall", 0, TO_END_OF_STRING, false) == 0 || + CUtil::FindKeyword(tmp, "foreach", 0, TO_END_OF_STRING, false) == 0 || + CUtil::FindKeyword(tmp, "for", 0, TO_END_OF_STRING, false) == 0) + { + loopEnd.push_back("endfor"); + new_loop = true; + } + else if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) == 0) + { + loopEnd.push_back("endloop"); + new_loop = true; + } + else if (CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false) == 0) + { + loopEnd.push_back("endwhile"); + new_loop = true; + } + else if (loopEnd.size() > 0) + { + if (CUtil::FindKeyword(tmp, loopEnd.back(), 0, TO_END_OF_STRING, false) == 0) + loopEnd.pop_back(); + } + if (new_loop) + { + if (result->cmplx_nestloop_count.size() < loopEnd.size()) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopEnd.size()-1]++; + + // drop end keyword on forall since it is a single line loop + if (CUtil::FindKeyword(tmp, "forall", 0, TO_END_OF_STRING, false) == 0) + loopEnd.pop_back(); + } + } + + // check for line starting with excluded keywords (don't count as LSLOC) + for (StringVector::iterator it = exclude_keywords.begin(); it != exclude_keywords.end(); it++) + { + found_exclusion = (CUtil::FindKeyword(tmp, (*it), 0, TO_END_OF_STRING, false) == 0); + if (found_exclusion) + { + // process else + if ((*it) == "else") + { + if (CUtil::FindKeyword(tmp, "elseif", 0, TO_END_OF_STRING, false) != 0) + break; + } + else + break; + } + } + if (found_exclusion) + { + strLSLOC = strLSLOCBak = ""; + phys_exec_lines++; + temp_lines = 0; + return; + } + + // check for inline if + if (CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) == 0) + { + i = CUtil::FindKeyword(tmp, "then", 0, TO_END_OF_STRING, false); + if (i != string::npos) + { + // check if anything exists past the "then" + start = i + 4; + if (start < tmp.length()) + { + // save LSLOC for if statement, then process in-line action + strSize = CUtil::TruncateLine(start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + tmp = CUtil::TrimString(tmp.substr(start, tmp.length() - start)); + tmpBak = CUtil::TrimString(tmpBak.substr(start, tmpBak.length() - start)); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + phys_exec_lines++; + temp_lines = 0; + } + } + } + // check for "forall" or "for" + else if (CUtil::FindKeyword(tmp, "forall", 0, TO_END_OF_STRING, false) == 0 || + CUtil::FindKeyword(tmp, "for", 0, TO_END_OF_STRING, false) == 0) + { + // split after forall/for + if (CUtil::FindKeyword(tmp, "forall", 0, TO_END_OF_STRING, false) == 0) + start = 6; + else + start = 3; + if (tmp.length() > start) + { + // save LSLOC for if statement, then process loop action + strSize = CUtil::TruncateLine(start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + tmp = CUtil::TrimString(tmp.substr(start, tmp.length() - start)); + tmpBak = CUtil::TrimString(tmpBak.substr(start, tmpBak.length() - start)); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + phys_exec_lines++; + temp_lines = 0; + } + } + + // check for continuation (&); exception is (&&) to use literal (&) + if (tmpBak.substr(tmpBak.length() - 1, 1) == "&" && + (tmpBak.length() < 2 || tmpBak.substr(tmpBak.length() - 2, 1) != "&")) + { + // strip off trailing (&) + if (tmp.length() > 1) + { + tmp = tmp.substr(start, tmp.length() - 1); + tmpBak = tmpBak.substr(start, tmpBak.length() - 1); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + } + } + else + tmp = tmpBak = ""; + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL, false); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; + } + else + { + // save LSLOC + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + strLSLOCBak += tmpBak.substr(0, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count, false); + + temp_lines++; + if (data_continue == true || cnt > 0) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + else if (data_continue == true) + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + } +} diff --git a/src/CMidasCounter.h b/src/CMidasCounter.h new file mode 100644 index 0000000..9e598e2 --- /dev/null +++ b/src/CMidasCounter.h @@ -0,0 +1,32 @@ +//! Code counter class definition for the Midas macro languages. +/*! +* \file CMidasCounter.h +* +* This file contains the code counter class definition for the Midas macro languages. +*/ + +#ifndef CMidasCounter_h +#define CMidasCounter_h + +#include "CCodeCounter.h" + +//! Midas code counter class. +/*! +* \class CMidasCounter +* +* Defines the Midas code counter class. +*/ +class CMidasCounter : public CCodeCounter +{ +public: + CMidasCounter(); + +protected: + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, + unsigned int &phys_data_lines, StringVector &loopEnd); +}; + +#endif diff --git a/src/CNeXtMidasCounter.cpp b/src/CNeXtMidasCounter.cpp new file mode 100644 index 0000000..8c912e6 --- /dev/null +++ b/src/CNeXtMidasCounter.cpp @@ -0,0 +1,40 @@ +//! Code counter class methods for the NeXtMidas macro language. +/*! +* \file CNeXtMidasCounter.cpp +* +* This file contains the code counter class methods for the NeXtMidas macro language. +*/ + +#include "CNeXtMidasCounter.h" + +/*! +* Constructs a CNeXtMidasCounter object. +*/ +CNeXtMidasCounter::CNeXtMidasCounter() +{ + classtype = NEXTMIDAS; + language_name = "NeXtMidas"; + + file_extension.push_back(".mm"); + + exclude_keywords.push_back("enddo"); + exclude_keywords.push_back("endfor"); + + data_name_list.push_back("global"); + + exec_name_list.push_back("do"); + exec_name_list.push_back("foreach"); + + math_func_list.push_back("filter"); + math_func_list.push_back("ifft"); + + cmplx_cond_list.push_back("do"); + cmplx_cond_list.push_back("foreach"); + + cmplx_logic_list.push_back("isTrue"); + cmplx_logic_list.push_back("isFalse"); + cmplx_logic_list.push_back("oeq"); + cmplx_logic_list.push_back("feq"); + cmplx_logic_list.push_back("noeq"); + cmplx_logic_list.push_back("nfeq"); +} diff --git a/src/CNeXtMidasCounter.h b/src/CNeXtMidasCounter.h new file mode 100644 index 0000000..4ffb5fa --- /dev/null +++ b/src/CNeXtMidasCounter.h @@ -0,0 +1,25 @@ +//! Code counter class definition for the NeXtMidas macro language. +/*! +* \file CNeXtMidasCounter.h +* +* This file contains the code counter class definition for the NeXtMidas macro language. +*/ + +#ifndef CNeXtMidasCounter_h +#define CNeXtMidasCounter_h + +#include "CMidasCounter.h" + +//! NeXtMidas code counter class. +/*! +* \class CNeXtMidasCounter +* +* Defines the NeXtMidas code counter class. +*/ +class CNeXtMidasCounter : public CMidasCounter +{ +public: + CNeXtMidasCounter(); +}; + +#endif diff --git a/src/CPascalCounter.cpp b/src/CPascalCounter.cpp new file mode 100644 index 0000000..ca8610f --- /dev/null +++ b/src/CPascalCounter.cpp @@ -0,0 +1,838 @@ +//! Code counter class methods for the Pascal language. +/*! +* \file CCJavaCsCounter.cpp +* +* This file contains the code counter class methods for the Pascal language. +*/ + +#include "CPascalCounter.h" + +/*! +* Constructs a CPascalCounter object. +*/ +CPascalCounter::CPascalCounter() +{ + classtype = PASCAL; + language_name = "Pascal"; + casesensitive = false; + + QuoteStart = "'"; + QuoteEnd = "'"; + QuoteEscapeFront = '\''; + BlockCommentStart.push_back("(*"); + BlockCommentEnd.push_back("*)"); + BlockCommentStart.push_back("{"); + BlockCommentEnd.push_back("}"); + LineCommentStart.push_back("//"); + + file_extension.push_back(".pas"); + file_extension.push_back(".p"); + file_extension.push_back(".pp"); + file_extension.push_back(".pa3"); + file_extension.push_back(".pa4"); + file_extension.push_back(".pa5"); + + data_name_list.push_back("ansistring"); + data_name_list.push_back("array"); + data_name_list.push_back("boolean"); + data_name_list.push_back("byte"); + data_name_list.push_back("bytebool"); + data_name_list.push_back("cardinal"); + data_name_list.push_back("char"); + data_name_list.push_back("class"); + data_name_list.push_back("comp"); + data_name_list.push_back("complex"); + data_name_list.push_back("const"); + data_name_list.push_back("double"); + data_name_list.push_back("extended"); + data_name_list.push_back("file"); + data_name_list.push_back("integer"); + data_name_list.push_back("interface"); + data_name_list.push_back("int64"); + data_name_list.push_back("longbool"); + data_name_list.push_back("longint"); + data_name_list.push_back("longword"); + data_name_list.push_back("object"); + data_name_list.push_back("pchar"); + data_name_list.push_back("qword"); + data_name_list.push_back("real"); + data_name_list.push_back("record"); + data_name_list.push_back("set"); + data_name_list.push_back("shortint"); + data_name_list.push_back("shortstring"); + data_name_list.push_back("single"); + data_name_list.push_back("smallint"); + data_name_list.push_back("string"); + data_name_list.push_back("type"); + data_name_list.push_back("widestring"); + data_name_list.push_back("word"); + data_name_list.push_back("wordbool"); + + exec_name_list.push_back("absolute"); + exec_name_list.push_back("assembler"); + exec_name_list.push_back("case"); + exec_name_list.push_back("const"); + exec_name_list.push_back("constructor"); + exec_name_list.push_back("destructor"); + exec_name_list.push_back("dispose"); + exec_name_list.push_back("downto"); + exec_name_list.push_back("else"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("far"); + exec_name_list.push_back("for"); + exec_name_list.push_back("forward"); + exec_name_list.push_back("freemem"); + exec_name_list.push_back("function"); + exec_name_list.push_back("getmem"); + exec_name_list.push_back("goto"); + exec_name_list.push_back("if"); + exec_name_list.push_back("implementation"); + exec_name_list.push_back("inline"); + exec_name_list.push_back("interrupt"); + exec_name_list.push_back("label"); + exec_name_list.push_back("mark"); + exec_name_list.push_back("near"); + exec_name_list.push_back("new"); + exec_name_list.push_back("nil"); + exec_name_list.push_back("packed"); + exec_name_list.push_back("private"); + exec_name_list.push_back("procedure"); + exec_name_list.push_back("program"); + exec_name_list.push_back("protected"); + exec_name_list.push_back("public"); + exec_name_list.push_back("repeat"); + exec_name_list.push_back("unit"); + exec_name_list.push_back("uses"); + exec_name_list.push_back("var"); + exec_name_list.push_back("virtual"); + exec_name_list.push_back("while"); + exec_name_list.push_back("with"); + + math_func_list.push_back("abs"); + math_func_list.push_back("arg"); + math_func_list.push_back("cmplx"); + math_func_list.push_back("dec"); + math_func_list.push_back("exp"); + math_func_list.push_back("im"); + math_func_list.push_back("inc"); + math_func_list.push_back("min"); + math_func_list.push_back("max"); + math_func_list.push_back("polar"); + math_func_list.push_back("pow"); + math_func_list.push_back("re"); + math_func_list.push_back("round"); + math_func_list.push_back("sqr"); + math_func_list.push_back("sqrt"); + + trig_func_list.push_back("cos"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("arccos"); + trig_func_list.push_back("arcsin"); + trig_func_list.push_back("arctan"); + + log_func_list.push_back("ln"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("**"); + cmplx_calc_list.push_back("div"); + cmplx_calc_list.push_back("mod"); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("repeat"); + cmplx_cond_list.push_back("while"); + cmplx_cond_list.push_back("with"); + + cmplx_logic_list.push_back("="); + cmplx_logic_list.push_back("<>"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("xor"); + cmplx_logic_list.push_back("shl"); + cmplx_logic_list.push_back("shr"); + + cmplx_assign_list.push_back(":="); + + cmplx_pointer_list.push_back("^"); +} + +/*! +* Counts the number of comment lines, removes comments, and +* replaces quoted strings by special chars, e.g., $ +* All arguments are modified by the method. +* Since Pascal compiler directives are block comments starting with '$' +* this method also captures directive SLOC. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPascalCounter::CountCommentsSLOC(filemap* fmap, results* result, filemap *fmapBak) +{ + if (BlockCommentStart.empty() && LineCommentStart.empty()) + return 0; + if (classtype == DATAFILE) + return 0; + + bool contd = false; + bool contd_nextline; + int comment_type = 0; + /* + comment_type: + 0 : not a comment + 1 : line comment, whole line + 2 : line comment, embedded + 3 : block comment, undecided + 4 : block comment, embedded + */ + + size_t idx_start, idx_end, comment_start; + size_t quote_idx_start; + string curBlckCmtStart, curBlckCmtEnd; + char CurrentQuoteEnd = 0; + bool quote_contd = false; + filemap::iterator itfmBak = fmapBak->begin(); + + string strDirLine; + size_t strSize; + bool isDirective = false, trunc_flag = false; + + quote_idx_start = 0; + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + contd_nextline = false; + + quote_idx_start = 0; + idx_start = 0; + + if (CUtil::CheckBlank(iter->line)) + continue; + if (quote_contd) + { + // Replace quote until next character + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_contd) + continue; + } + if (contd) + comment_type = 3; + + while (!contd_nextline && idx_start < iter->line.length()) + { + // need to handle multiple quote chars in some languages, both " and ' may be accepted + quote_idx_start = FindQuote(iter->line, QuoteStart, quote_idx_start, QuoteEscapeFront); + comment_start = idx_start; + if (!contd) + FindCommentStart(iter->line, comment_start, comment_type, curBlckCmtStart, curBlckCmtEnd); + + if (comment_start == string::npos && quote_idx_start == string::npos) + break; + + if (comment_start != string::npos) + idx_start = comment_start; + + // if found quote before comment, e.g., "this is quote");//comment + if (quote_idx_start != string::npos && (comment_start == string::npos || quote_idx_start < comment_start)) + { + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_idx_start > idx_start) + { + // comment delimiter inside quote + idx_start = quote_idx_start; + continue; + } + } + else if (comment_start != string::npos) + { + // comment delimiter starts first + switch (comment_type) + { + case 1: // line comment, definitely whole line + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + contd_nextline = true; + break; + case 2: // line comment, possibly embedded + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + contd_nextline = true; + break; + case 3: // block comment + case 4: + if (contd) + idx_end = iter->line.find(curBlckCmtEnd); + else + { + idx_end = iter->line.find(curBlckCmtEnd, idx_start + curBlckCmtStart.length()); + + // check whether comment is a directive (starts with '$') + isDirective = false; + for (size_t i = 0; i < BlockCommentStart.size(); i++) + { + if (iter->line.substr(idx_start, BlockCommentStart[i].length() + 1) == BlockCommentStart[i] + "$") + { + strDirLine = ""; + isDirective = true; + break; + } + } + } + + if (idx_end == string::npos) + { + if (comment_type == 3) + { + if (isDirective) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + if (contd) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + else + strDirLine = itfmBak->line.substr(0, strSize); + } + result->directive_lines[PHY]++; + } + else + result->comment_lines++; + iter->line = ""; + itfmBak->line = ""; + } + else if (comment_type == 4) + { + if (isDirective) + { + strSize = CUtil::TruncateLine(itfmBak->line.length() - idx_start, 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(idx_start, strSize); + result->directive_lines[PHY]++; + } + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (!isDirective) + { + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + } + contd = true; + contd_nextline = true; + break; + } + else + { + if (isDirective) + { + strSize = CUtil::TruncateLine(idx_end - idx_start + curBlckCmtEnd.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + if (contd) + strDirLine += "\n" + itfmBak->line.substr(idx_start, strSize); + else + strDirLine = itfmBak->line.substr(idx_start, strSize); + } + result->directive_lines[PHY]++; + } + contd = false; + iter->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + itfmBak->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + if (isDirective) + { + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + strDirLine = ""; + } + else + { + if (iter->line.empty()) + result->comment_lines++; + else + { + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + } + + // quote chars found may be erased as it is inside comment + quote_idx_start = idx_start; + } + break; + default: + cout << "Error in CountCommentsSLOC()" << endl; + break; + } + } + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPascalCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + bool found_block = false; + bool found_forifwhile = false; + bool found_end = false; + bool found_loop = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning (for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + // blank line means blank_line/comment_line/directive + // call SLOC function to detect logical SLOC and add to result + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, found_block, + found_forifwhile, found_end, found_loop, loopLevel); + + cnt = 0; + CUtil::CountTally(line, data_name_list, cnt, 1, exclude, "", "", NULL, false); + + // need to check also if the data line continues + if (cnt > 0) + result->data_lines[PHY] += 1; + else + result->exec_lines[PHY] += 1; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param found_block found block flag +* \param found_forifwhile found for, if, or while flag +* \param found_end found end flag +* \param found_loop found loop flag +* \param loopLevel nested loop level +*/ +void CPascalCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, bool &found_block, + bool &found_forifwhile, bool &found_end, bool &found_loop, StringVector &loopLevel) +{ + size_t start = 0; // starting index of the working string + size_t i, tempi, strSize; + string templine = CUtil::TrimString(line); + string tmp; + bool trunc_flag = false; + unsigned int loopCnt; + StringVector::iterator lit; + string keywordchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + // there may be more than 1 logical SLOC in a line + found_end = false; + for (i = 0; i < line.length(); i++) + { + if (line[i] == ';') + { + if (!found_end) + { + strSize = CUtil::TruncateLine(i - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + tmp = strLSLOC; + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + + // record end loop for nested loop processing + if (print_cmplx) + { + if (found_loop) + { + found_loop = false; + loopLevel.push_back("do"); + + loopCnt = 0; + for (lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + + loopLevel.pop_back(); + } + else if (CUtil::FindKeyword(tmp, "end", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "until", 0, TO_END_OF_STRING, false) != string::npos) + { + if (loopLevel.size() > 0) + loopLevel.pop_back(); + } + } + } + else + { + if (strLSLOC.size() > 0) + { + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + } + found_end = false; // end xxx + found_block = false; + found_forifwhile = false; + strLSLOC = ""; + strLSLOCBak = ""; + } + found_loop = false; + start = i + 1; + } + + // if it ends in xxx, then it has already been counted, so ignore it + tmp = "xxx " + CUtil::TrimString(line.substr(start, i + 1 - start)); + if (CUtil::FindKeyword(tmp, "end", 0, TO_END_OF_STRING, false) != string::npos) + { + // check for 'end,' and skip + if ((line.length() > i + 1 && line[i + 1] == ',') || line[i] == ',') + continue; + + found_end = true; + found_block = false; + found_loop = false; + + // capture SLOC + strSize = CUtil::TruncateLine(i - 2 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + + // if end is followed by a period 'end.' record SLOC if any + if (line.length() > i + 1 && line[i + 1] == '.') + { + // record end loop for nested loop processing + if (print_cmplx) + { + while (loopLevel.size() > 0) + loopLevel.pop_back(); + } + start = i + 2; + continue; + } + else + { + // record end loop for nested loop processing + if (print_cmplx) + { + if (loopLevel.size() > 0) + loopLevel.pop_back(); + } + start = i + 1; + } + } + + // continue the following processing only if line[i] is not in a middle of a word + if (keywordchars.find(line[i]) != string::npos && i < line.length() - 1) + continue; + + if (!found_end) + { + if (!found_forifwhile) + { + if (CUtil::FindKeyword(tmp, "for", 0 , TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "repeat", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "with", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) != string::npos) + { + found_forifwhile = true; + } + + if (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "repeat", 0, TO_END_OF_STRING, false) != string::npos) + { + if (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) != string::npos) + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + start = i + 1; + + found_loop = true; + } + else + { + // record nested loop level + if (print_cmplx) + { + loopLevel.push_back("repeat"); + + loopCnt = 0; + for (lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + continue; + } + } + else if (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "then", 0, TO_END_OF_STRING, false) != string::npos) + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + start = i + 1; + + if (CUtil::FindKeyword(tmp, "do", 0, TO_END_OF_STRING, false) != string::npos) + found_loop = true; + + continue; + } + + // process else since no ';' is allowed before else + if (CUtil::FindKeyword(tmp, "else", 0, TO_END_OF_STRING, false) != string::npos) + { + strSize = CUtil::TruncateLine(i - 4 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + strLSLOC = strLSLOCBak = "else "; + start = i + 1; + continue; + } + + // process until since ';' is optional before else + if (CUtil::FindKeyword(tmp, "until", 0, TO_END_OF_STRING, false) != string::npos) + { + strSize = CUtil::TruncateLine(i - 5 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + strLSLOC = strLSLOCBak = "until "; + start = i + 1; + continue; + } + + if (!found_block) + { + if (CUtil::FindKeyword(tmp, "begin", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "asm", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "case",0, TO_END_OF_STRING, false) != string::npos) + { + found_block = true; + + // record nested loop level + if (print_cmplx) + { + if (found_loop) + { + found_loop = false; + loopLevel.push_back("do"); + + loopCnt = 0; + for (lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + else + loopLevel.push_back(""); + } + } + } + else + { + // only add new SLOC if 'of' is at the end of line and follows 'case', etc. + tempi = CUtil::FindKeyword(templine, "of", 0, TO_END_OF_STRING, false); + if (tempi == templine.length() - 2) + { + strSize = CUtil::TruncateLine(line.length() - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + start = line.length(); + found_block = false; + continue; + } + } + + // check for '= record' + if (CUtil::FindKeyword(tmp, "= array", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "= record", 0, TO_END_OF_STRING, false) != string::npos) + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, + found_block, found_forifwhile, found_end, trunc_flag); + start = i + 1; + + if (print_cmplx) + loopLevel.push_back(""); + + continue; + } + } + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + } + if (tmp == "") + { + found_forifwhile = false; + } +} + +/*! +* Processes a logical line of code. +* This method is called after a logical SLOC is determined. +* The method adds LSLOC to the result, increases counts, and resets variables. +* +* \param result counter results +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param found_block found block flag +* \param found_forifwhile found for, if, or while flag +* \param found_end found end flag +* \param trunc_flag truncate lines? +*/ +void CPascalCounter::FoundSLOC(results* result, string &strLSLOC, string &strLSLOCBak, bool &found_block, + bool &found_forifwhile, bool &found_end, bool &trunc_flag) +{ + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + // add to the list for comparison purpose + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + // determine logical type, data declaration or executable + unsigned int cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count, false); + if (cnt > 0) + result->data_lines[LOG] += 1; + else + result->exec_lines[LOG] += 1; + + // reset all variables whenever a new statement/logical SLOC is found + strLSLOC = ""; + strLSLOCBak = ""; + found_block = false; + found_forifwhile = false; + found_end = false; + } +} diff --git a/src/CPascalCounter.h b/src/CPascalCounter.h new file mode 100644 index 0000000..719c18f --- /dev/null +++ b/src/CPascalCounter.h @@ -0,0 +1,33 @@ +//! Code counter class definition for the Pascal language. +/*! +* \file CPascalCounter.h +* +* This file contains the code counter class definition for the Pascal language. +*/ + +#ifndef CPascalCounter_h +#define CPascalCounter_h + +#include "CCodeCounter.h" + +//! Pascal code counter class. +/*! +* \class CPascalCounter +* +* Defines the Pascal counter class. +*/ +class CPascalCounter : public CCodeCounter +{ +public: + CPascalCounter(); + +protected: + virtual int CountCommentsSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapmBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, bool &found_block, + bool &found_forifwhile, bool &found_end, bool &found_loop, StringVector &loopLevel); + void FoundSLOC(results* result, string &strLSLOC, string &strLSLOCBak, bool &found_block, + bool &found_forifwhile, bool &found_end, bool &trunc_flag); +}; + +#endif diff --git a/src/CPerlCounter.cpp b/src/CPerlCounter.cpp new file mode 100644 index 0000000..d3a7955 --- /dev/null +++ b/src/CPerlCounter.cpp @@ -0,0 +1,976 @@ +//! Code counter class methods for the Perl language. +/*! +* \file CPerlCounter.cpp +* +* This file contains the code counter class methods for the Perl language. +*/ + +#include "CPerlCounter.h" + +/*! +* Constructs a CPerlCounter object. +*/ +CPerlCounter::CPerlCounter() +{ + classtype = PERL; + language_name = "Perl"; + + file_extension.push_back(".pl"); + file_extension.push_back(".pm"); + + LineCommentStart.push_back("#"); + + QuoteStart = "\"'/"; + QuoteEnd = "\"'/"; + QuoteEscapeRear = '\"'; + + directive.push_back("import"); + directive.push_back("no"); + directive.push_back("package"); + directive.push_back("require"); + directive.push_back("use"); + + data_name_list.push_back("AUTOLOAD"); + data_name_list.push_back("BEGIN"); + data_name_list.push_back("CHECK"); + data_name_list.push_back("CORE"); + data_name_list.push_back("DESTROY"); + data_name_list.push_back("END"); + data_name_list.push_back("INIT"); + data_name_list.push_back("NULL"); + + exec_name_list.push_back("catch"); + exec_name_list.push_back("elsif"); + exec_name_list.push_back("eval"); + exec_name_list.push_back("for"); + exec_name_list.push_back("foreach"); + exec_name_list.push_back("if"); + exec_name_list.push_back("sub"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("try"); + exec_name_list.push_back("unless"); + exec_name_list.push_back("until"); + exec_name_list.push_back("while"); + + math_func_list.push_back("abs"); + math_func_list.push_back("exp"); + math_func_list.push_back("int"); + math_func_list.push_back("rand"); + math_func_list.push_back("sqrt"); + math_func_list.push_back("srand"); + math_func_list.push_back("time"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("acosh"); + trig_func_list.push_back("asinh"); + trig_func_list.push_back("atanh"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + + log_func_list.push_back("log"); + + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("^"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("--"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back(">>"); + cmplx_calc_list.push_back("<<"); + + cmplx_cond_list.push_back("elsif"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("foreach"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("unless"); + cmplx_cond_list.push_back("until"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("<=>"); + cmplx_logic_list.push_back("!"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("xor"); + cmplx_logic_list.push_back("~"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back("lt"); + cmplx_logic_list.push_back("gt"); + cmplx_logic_list.push_back("ge"); + cmplx_logic_list.push_back("le"); + cmplx_logic_list.push_back("eq"); + cmplx_logic_list.push_back("ne"); + cmplx_logic_list.push_back("cmp"); + + cmplx_preproc_list.push_back("import"); + cmplx_preproc_list.push_back("no"); + cmplx_preproc_list.push_back("package"); + cmplx_preproc_list.push_back("require"); + cmplx_preproc_list.push_back("use"); + + cmplx_assign_list.push_back("="); + + cmplx_cyclomatic_list.push_back("if"); + cmplx_cyclomatic_list.push_back("elsif"); + cmplx_cyclomatic_list.push_back("case"); + cmplx_cyclomatic_list.push_back("while"); + cmplx_cyclomatic_list.push_back("until"); + cmplx_cyclomatic_list.push_back("for"); + cmplx_cyclomatic_list.push_back("foreach"); + cmplx_cyclomatic_list.push_back("catch"); + cmplx_cyclomatic_list.push_back("unless"); + cmplx_cyclomatic_list.push_back("?"); +} + +/*! +* Perform preprocessing of file lines before counting. +* +* \param fmap list of file lines +* +* \return method status +*/ +int CPerlCounter::PreCountProcess(filemap* fmap) +{ + size_t i, j; + filemap::iterator fit; + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + if (fit->line.empty()) + continue; + // check for $#, $', $", $`, {$...} + for (i = fit->line.length() - 1; i > 0; i--) + { + if (fit->line[i-1] == '{' && fit->line[i] == '$') + { + fit->line[i-1] = '$'; + for (j = i+1; j < fit->line.length(); j++) + { + if (fit->line[j] == '}') + { + fit->line[j] = '$'; + break; + } + } + } + if (fit->line[i-1] == '$' && + (fit->line[i] == '#' || fit->line[i] == '\'' || fit->line[i] == '"' || fit->line[i] == '`')) + fit->line[i] = '$'; + } + } + return 0; +} + +/*! +* Handles special case for Perl regexp operators m// s/// tr///. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CPerlCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + static bool inRegexp = false; + static int slashCount = 1; // =1 for match m//, =1 for s/// and tr/// + size_t idx = idx_start; + size_t i = idx; + + if (inRegexp || strline[idx] == '/') + { + // replace all "\\" by "$$" + size_t start = idx_start; + while ((start = strline.find("\\\\", start)) != string::npos) + { + strline.replace(start, 2, "$$"); + start += 2; + } + + while (i < strline.length()) + { + if (inRegexp) + { + if ((strline[i] == '/' && (i == 0 || (i > 0 && strline[i - 1] != '\\'))) + || (contd && strline[i] == ';')) + { + // replace everything in the regexp + slashCount--; + if (slashCount == 0) + { + strline.replace(idx, i - idx + 1, i - idx + 1, '$'); + inRegexp = false; + contd = false; + idx = i + 1; + idx_start = idx; + return 1; + } + } + } + else if (strline[i] == '/') + { + if ((i > 0 && strline[i-1] == 's') || (i > 1 && strline[i-1] == 'r' && strline[i-2] == 't')) + slashCount = 2; + else + slashCount = 1; + idx = i; + inRegexp = true; + } + + // quick fix to replace '#' + if (i > 0 && strline[i] == '#' && strline[i-1] == '$') + strline[i] = '$'; + + i++; + } + + if (inRegexp) + { + strline.replace(idx, i - idx, i - idx, '$'); + contd = true; + } + } + idx_start = idx; + + if (!inRegexp) + return CCodeCounter::ReplaceQuote(strline, idx_start, contd, CurrentQuoteEnd); + + return 1; +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPerlCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$/\\<>.+?!@#$%^&*()-+*"; + string strDirLine = ""; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter!=fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // ensures the keyword stands alone, avoid, e.g., pack instead of package + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] == ';') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPerlCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + string strLSLOC; + string strLSLOCBak; + unsigned int l_paren_cnt = 0; + bool l_forflag, found_forifwhile, found_while; + char prev_char = 0; + l_forflag = found_forifwhile = found_while = false; + + bool comment = false; + unsigned int cnt = 0; + size_t comPos = string::npos; + size_t p; + filemap::iterator fit, fitbak; + string line, lineBak; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"; + bool data_continue = false; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int openBrackets = 0; + StringVector loopLevel; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning(for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + if (!CUtil::CheckBlank(line)) + { + if (comment == true) + { + p = line.find("\n"); + if (p != string::npos) + { + line.replace(0, p + 2, " "); + lineBak.replace(0, p + 2, " "); + } + else + continue; + } + + comPos = line.find("#"); + if (comPos == string::npos) + comment = false; + else + { + comment = true; + p = line.substr(comPos + 3).find(""); + if (p != string::npos) + { + line.replace(comPos, p - comPos + 1, " "); + lineBak.replace(comPos, p - comPos + 1, " "); + comment = false; + } + else + { + line.replace(comPos, line.size() - comPos, " "); + lineBak.replace(comPos, lineBak.size() - comPos, " "); + } + } + + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, l_paren_cnt, l_forflag, found_forifwhile, + found_while, prev_char, data_continue, temp_lines, phys_exec_lines, phys_data_lines, openBrackets, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param forflag found for flag +* \param found_forifwhile found for, if, or while flag +* \param found_while found while flag +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param openBrackets number of open brackets (no matching close bracket) +* \param loopLevel nested loop level +*/ +void CPerlCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + unsigned int &openBrackets, StringVector &loopLevel) +{ + size_t start = 0; // starting index of the working string + size_t i = 0, strSize, pos; + bool do_boolean, trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"; + unsigned int cnt = 0; + unsigned int loopCnt = 0; + StringVector::iterator lit; + string tmp = CUtil::TrimString(strLSLOC); + string tmp2; + + // check for the keyword do + do_boolean = (CUtil::FindKeyword(tmp, "do") == tmp.length() - 2); + + // check the entire line for SLOC present in it + while (i < line.length()) + { + if (line[i] == ';' || line[i] == '{') + { + // LSLOC terminators + // ';' any valid perl command ends with a ';' terminator + // do statements start with a '{' and ends with '}' + + if (line[i] == ';' && paren_cnt > 0) + { + // for a 'for' statement counter is incremented. + i++; + continue; + } + + // record open bracket for nested loop processing + if (print_cmplx) + { + if (line[i] == '{') + { + openBrackets++; + if ((unsigned int)loopLevel.size() < openBrackets) + loopLevel.push_back(""); + } + else + { + if ((unsigned int)loopLevel.size() > openBrackets && openBrackets > 0) + loopLevel.pop_back(); + } + } + + if (found_while && found_forifwhile) + { + found_while = false; + found_forifwhile = false; + start = i + 1; + i++; + continue; + } + + if (line[i] == '{') + { + // case for(...); and if (...) { + // these specials are handled + if (found_forifwhile) + { + found_forifwhile = false; + start = i + 1; + i++; + continue; + } + + // check if 'do' precedes '{' + if (!do_boolean) + { + // find for 'do' in string before tmp string + tmp = CUtil::TrimString(line.substr(start, i - start)); + + // check for 'do' statement + do_boolean = (tmp == "do"); + } + if (do_boolean) + { + if (print_cmplx) + { + if (loopLevel.size() > 0) loopLevel.pop_back(); + loopLevel.push_back("do"); + } + + do_boolean = false; + start = i + 1; + i++; + continue; // do not store '{' following 'do' + } + } + + if (line[i] == ';' && prev_char == '}') + { + i++; + continue; + } + + // the 'for(...)' or 'while(..)' or anything with the '{' on the next line gets counted as an extra SLOC + // so to avoid that increment the counter and continue + if (line[i] == '{' && prev_char == ')') + { + i++; + continue; + } + + // check for expression modifiers using 'foreach', 'while', 'if', 'unless', 'until' (for example, statement unless condition;) + pos = string::npos; + if (line[i] == ';') + { + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + tmp = CUtil::TrimString(strLSLOC + line.substr(start, i + 1 - start)); + pos = CUtil::FindKeyword(tmp, "foreach"); + if (pos == string::npos) + { + pos = CUtil::FindKeyword(tmp, "while"); + if (pos == string::npos) + + pos = CUtil::FindKeyword(tmp, "if"); + if (pos == string::npos) + { + pos = CUtil::FindKeyword(tmp, "unless"); + if (pos == string::npos) + pos = CUtil::FindKeyword(tmp, "until"); + } + } + } + } + if (pos != string::npos) + { + // capture statement before modifier + tmp2 = CUtil::TrimString(strLSLOCBak + lineBak.substr(start, i - start)); + strSize = CUtil::TruncateLine(pos, 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC = tmp.substr(0, strSize); + strLSLOCBak = tmp2.substr(0, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = ""; + strLSLOCBak = ""; + + strSize = CUtil::TruncateLine(tmp.length() - pos, 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC = tmp.substr(pos, strSize); + strLSLOCBak = tmp2.substr(pos, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + found_forifwhile = false; + } + else + { + strSize = CUtil::TruncateLine(i - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + tmp = strLSLOC; + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (cnt > 0) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + } + else if (data_continue == true) + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + } + data_continue = false; + temp_lines = 0; + strLSLOC = ""; + strLSLOCBak = ""; + start = i + 1; + } + else if (line[i] == '}') + { + // also, {} is also skipped, empty block is not counted + if (prev_char == ';' || prev_char == '{') + start = i + 1; + + // record close bracket for nested loop processing + if (print_cmplx) + { + if (openBrackets > 0) + openBrackets--; + if (loopLevel.size() > 0) + loopLevel.pop_back(); + } + } + else if (line[i] == '(') + { + if (!forflag) + { + // handle 'for', 'foreach', 'while', 'if', 'elsif, and 'unless' + tmp = "xxx " + CUtil::TrimString(line.substr(start, i)); + if ((CUtil::FindKeyword(tmp, "for") != string::npos) || (CUtil::FindKeyword(tmp, "foreach") != string::npos) || + (CUtil::FindKeyword(tmp, "while")!= string::npos) || (CUtil::FindKeyword(tmp, "if") != string::npos) || + (CUtil::FindKeyword(tmp, "elsif") != string::npos) || (CUtil::FindKeyword(tmp, "unless") != string::npos) || + (CUtil::FindKeyword(tmp, "until") != string::npos)) + { + forflag = true; + paren_cnt++; + + if (print_cmplx && (loopLevel.size() > openBrackets) && (openBrackets > 0)) + loopLevel.pop_back(); + + if (CUtil::FindKeyword(tmp, "while")!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("while"); + found_while = true; + } + else if (CUtil::FindKeyword(tmp, "until")!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("until"); + found_while = true; + } + else if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "for") != string::npos) + loopLevel.push_back("for"); + else if (CUtil::FindKeyword(tmp, "foreach") != string::npos) + loopLevel.push_back("foreach"); + + // record nested loop level + if (CUtil::FindKeyword(tmp, "if") == string::npos && CUtil::FindKeyword(tmp, "elsif") == string::npos && + CUtil::FindKeyword(tmp, "unless") == string::npos) + { + loopCnt = 0; + for (lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + } + } + else + paren_cnt++; + } + else if (line[i] == ')') + { + /* + cases + 'while(...);', + 'while(...) {' and + '} while(...);' + is handled in this case + */ + if (forflag) + { + if (paren_cnt > 0) + paren_cnt--; + if (paren_cnt == 0) + { + // handling 'for', 'foreach', 'while', 'if', 'elsif', 'unless', 'until' + // check for expression modifiers using 'foreach', 'while', 'if', 'unless', 'until' (for example, statement unless (condition);) + tmp = CUtil::TrimString(strLSLOC + line.substr(start, i + 1 - start)); + pos = CUtil::FindKeyword(tmp, "foreach"); + if (pos == string::npos) + { + pos = CUtil::FindKeyword(tmp, "while"); + if (pos == string::npos) + { + pos = CUtil::FindKeyword(tmp, "if"); + if (pos == string::npos) + { + pos = CUtil::FindKeyword(tmp, "unless"); + if (pos == string::npos) + pos = CUtil::FindKeyword(tmp, "until"); + } + } + } + if (pos != string::npos) + { + // capture statement before modifier + tmp2 = CUtil::TrimString(strLSLOCBak + lineBak.substr(start, i + 1 - start)); + strSize = CUtil::TruncateLine(pos, 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC = tmp.substr(0, strSize); + strLSLOCBak = tmp2.substr(0, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = ""; + strLSLOCBak = ""; + + strSize = CUtil::TruncateLine(tmp.length() - pos, 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC = tmp.substr(pos, strSize); + strLSLOCBak = tmp2.substr(pos, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + found_forifwhile = false; + + // skip trailing ';' + tmp = CUtil::TrimString(line.substr(i + 1)); + if (tmp.length() > 0 && tmp[0] == ';') + i++; + } + else + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + tmp = strLSLOC; + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + found_forifwhile = true; + } + strLSLOC = ""; + strLSLOCBak = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = i + 1; + forflag = false; + } + } + } + + if (line[i] != ' ' && line[i] != '\t') + { + // if ;}}} --> don't count }}} at all + // also, if {}}} --> don't count }}} at all + if (!(line[i] == '}' && (prev_char == ';' || prev_char == '{'))) // see case '}' above + prev_char = line[i]; + + // change to not found if a char appears before + if (line[i] != ')' && found_forifwhile) + found_forifwhile = false; + + if (CUtil::FindKeyword(line, "or", i, i + 2, true) == i) + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + tmp = strLSLOC; + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + temp_lines = 0; + strLSLOC = ""; + strLSLOCBak = ""; + start = i; + } + } + i++; + } + + tmp2 = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp2.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp2.substr(0, strSize); + tmp2 = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp2.substr(0, strSize); + } + if (tmp == "") + found_forifwhile = found_while = false; + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} + +/*! +* Parses lines for function/method names. +* +* \param line line to be processed +* \param lastline last line processed +* \param functionStack stack of functions +* \param functionName function name found +* \param functionCount function count found +* +* \return 1 if function name is found +*/ +int CPerlCounter::ParseFunctionName(const string &line, string &lastline, + filemap &functionStack, string &functionName, unsigned int &functionCount) +{ + string tline, str; + size_t i, idx, tidx, cnt; + unsigned int fcnt; + + tline = CUtil::TrimString(line); + idx = tline.find('{'); + if (idx != string::npos) + { + // check whether it is at first index, if yes then function name is at above line + if (idx == 0) + { + lineElement element(++functionCount, lastline); + functionStack.push_back(element); + lastline.erase(); + } + else + { + str = tline.substr(0, idx); + if (str.find('(') != string::npos && str[0] != '(') + lastline = str; + else + lastline += " " + str; + lineElement element(++functionCount, CUtil::TrimString(lastline)); + functionStack.push_back(element); + lastline.erase(); + } + } + else if (tline.length() > 0 && tline[tline.length() - 1] != ';' && + lastline.length() > 0 && lastline[lastline.length() - 1] != ';') + { + // append until all parenthesis are closed + tidx = lastline.find('('); + if (tidx != string::npos) + { + cnt = 1; + while (tidx != string::npos) + { + tidx = lastline.find('(', tidx + 1); + if (tidx != string::npos) + cnt++; + } + tidx = lastline.find(')'); + while (tidx != string::npos) + { + cnt++; + tidx = lastline.find(')', tidx + 1); + } + if (cnt % 2 != 0) + lastline += " " + tline; + else + lastline = tline; + } + else + lastline = tline; + } + else + lastline = tline; + + idx = line.find('}'); + if (idx != string::npos && !functionStack.empty()) + { + str = functionStack.back().line; + fcnt = functionStack.back().lineNumber; + functionStack.pop_back(); + idx = CUtil::FindKeyword(str, "sub"); + if (idx != string::npos && idx + 4 < str.length()) + { + functionName = CUtil::ClearRedundantSpaces(str.substr(idx + 4)); + functionCount = fcnt; + lastline.erase(); + return 1; + } + lastline.erase(); + } + + // check stack for any "sub" + idx = string::npos; + if (!functionStack.empty()) + { + for (i = 0; i < functionStack.size(); i++) + { + idx = CUtil::FindKeyword(functionStack[i].line, "sub"); + if (idx != string::npos) + break; + } + } + if (idx == string::npos) + { + // dealing with some code out of any subroutines, it a "main" code + return 2; + } + return 0; +} diff --git a/src/CPerlCounter.h b/src/CPerlCounter.h new file mode 100644 index 0000000..a323ca7 --- /dev/null +++ b/src/CPerlCounter.h @@ -0,0 +1,37 @@ +//! Code counter class definition for the Perl language. +/*! +* \file CPerlCounter.h +* +* This file contains the code counter class definition for the Perl language. +*/ + +#ifndef CPerlCounter_h +#define CPerlCounter_h + +#include "CCodeCounter.h" + +//! Perl code counter class. +/*! +* \class CPerlCounter +* +* Defines the Perl code counter class. +*/ +class CPerlCounter : public CCodeCounter +{ +public: + CPerlCounter(); + +protected: + virtual int PreCountProcess(filemap* fmap); + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + unsigned int &openBrackets, StringVector &loopLevel); + int ParseFunctionName(const string &line, string &lastline, + filemap &functionStack, string &functionName, unsigned int &functionCount); +}; + +#endif diff --git a/src/CPhpCounter.cpp b/src/CPhpCounter.cpp new file mode 100644 index 0000000..dfaae9e --- /dev/null +++ b/src/CPhpCounter.cpp @@ -0,0 +1,734 @@ +//! Code counter class methods for the PHP language. +/*! +* \file CPhpCounter.cpp +* +* This file contains the code counter class methods for the PHP language. +*/ + +#include "CPhpCounter.h" + +/*! +* Constructs a CPhpCounter object. +*/ +CPhpCounter::CPhpCounter() +{ + classtype = PHP; + language_name = "PHP"; + + file_extension.push_back(".*php"); + + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + QuoteEscapeFront = '\\'; + ContinueLine = "\\"; + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + + LineCommentStart.push_back("//"); + LineCommentStart.push_back("#"); + + exclude_keywords.push_back("endif"); + exclude_keywords.push_back("endfor"); + exclude_keywords.push_back("endforeach"); + exclude_keywords.push_back("endswitch"); + exclude_keywords.push_back("endwhile"); + + exclude_loop.push_back("endfor"); + exclude_loop.push_back("endforeach"); + exclude_loop.push_back("endwhile"); + + directive.push_back("define"); + directive.push_back("include"); + directive.push_back("include_once"); + directive.push_back("require"); + directive.push_back("require_once"); + + data_name_list.push_back("array"); + data_name_list.push_back("bool"); + data_name_list.push_back("class"); + data_name_list.push_back("const"); + data_name_list.push_back("declare"); + data_name_list.push_back("extends"); + data_name_list.push_back("float"); + data_name_list.push_back("function"); + data_name_list.push_back("global"); + data_name_list.push_back("int"); + data_name_list.push_back("interface"); + data_name_list.push_back("NULL"); + data_name_list.push_back("object"); + data_name_list.push_back("private"); + data_name_list.push_back("protected"); + data_name_list.push_back("public"); + data_name_list.push_back("string"); + data_name_list.push_back("var"); + + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("default"); + exec_name_list.push_back("die"); + exec_name_list.push_back("do"); + exec_name_list.push_back("echo"); + exec_name_list.push_back("else"); + exec_name_list.push_back("exception"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("for"); + exec_name_list.push_back("foreach"); + exec_name_list.push_back("if"); + exec_name_list.push_back("isset"); + exec_name_list.push_back("new"); + exec_name_list.push_back("print"); + exec_name_list.push_back("return"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("this"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("try"); + exec_name_list.push_back("while"); + + math_func_list.push_back("abs"); + math_func_list.push_back("base_convert"); + math_func_list.push_back("bindec"); + math_func_list.push_back("ceil"); + math_func_list.push_back("decbin"); + math_func_list.push_back("dechex"); + math_func_list.push_back("decoct"); + math_func_list.push_back("deg2rad"); + math_func_list.push_back("exp"); + math_func_list.push_back("expm1"); + math_func_list.push_back("floor"); + math_func_list.push_back("fmod"); + math_func_list.push_back("getrandmax"); + math_func_list.push_back("hexdec"); + math_func_list.push_back("hypot"); + math_func_list.push_back("is_finite"); + math_func_list.push_back("is_infinite"); + math_func_list.push_back("is_nan"); + math_func_list.push_back("lcg_value"); + math_func_list.push_back("max"); + math_func_list.push_back("min"); + math_func_list.push_back("mt_getrandmax"); + math_func_list.push_back("mt_rand"); + math_func_list.push_back("mt_srand"); + math_func_list.push_back("octdec"); + math_func_list.push_back("pi"); + math_func_list.push_back("pow"); + math_func_list.push_back("rad2deg"); + math_func_list.push_back("rand"); + math_func_list.push_back("round"); + math_func_list.push_back("sqrt"); + math_func_list.push_back("srand"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("acosh"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("asinh"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + trig_func_list.push_back("atanh"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + log_func_list.push_back("log1p"); + + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("++"); + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("--"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back(">>"); + cmplx_calc_list.push_back("<<"); + + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("else if"); + cmplx_cond_list.push_back("elseif"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("switch"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("&"); + cmplx_logic_list.push_back("|"); + cmplx_logic_list.push_back("^"); + cmplx_logic_list.push_back("~"); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("==="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back("!=="); + cmplx_logic_list.push_back("<>"); + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("!"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("xor"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + + cmplx_assign_list.push_back("="); + cmplx_assign_list.push_back("=>"); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPhpCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + filemap::iterator itfmBak = fmapBak->begin(); + + for (filemap::iterator iter = fmap->begin(); iter!=fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != ',' && iter->line[iter->line.length()-1] != '\\') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPhpCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int paren_count = 0; + bool for_flag = false; + bool found_for = false; + bool found_forifwhile = false; + bool found_while = false; + char prev_char = 0; + bool data_continue = false; + bool inArrayDec = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning (for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, paren_count, for_flag, found_forifwhile, found_while, + prev_char, data_continue, temp_lines, phys_exec_lines, phys_data_lines, inArrayDec, found_for, loopLevel); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param forflag found for flag +* \param found_forifwhile found for, if, or while flag +* \param found_while found while flag +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param inArrayDec marks an array declaration +* \param found_for found for loop +* \param loopLevel nested loop level +*/ +void CPhpCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + bool &inArrayDec, bool &found_for, StringVector &loopLevel) +{ + size_t start = 0; + size_t i = 0, strSize; + bool found_do, found_try, found_else, found_declare, trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + unsigned int cnt = 0; + + string tmp = CUtil::TrimString(strLSLOC); + + // do, try + found_do = (CUtil::FindKeyword(tmp, "do") != string::npos); + found_try = (CUtil::FindKeyword(tmp, "try") != string::npos); + // else, declare are treated differently, else and declare are included in SLOC, do and try are not + found_else = (CUtil::FindKeyword(tmp, "else") != string::npos); + found_declare = (CUtil::FindKeyword(tmp, "declare") != string::npos); + + while (i < line.length()) // there may be more than 1 logical SLOC in this line + { + switch (line[i]) + { + case ';': case '{': case ':': // LSLOC terminators + // ';' for normal executable or declaration statement + // '{' or ':' for starting a function or 'do' stmt or a block (which is counted) + // get the previous logical mark until i-1 index is the new LSLOC + // except 'do' precedes '{' + // except '}' precedes ';' ?? + // do nothing inside 'for' statement + if (found_for == true && paren_cnt > 0 && line[i] == ';') + break; + + // record open bracket for nested loop processing + // check for excluded loop keywords for alternate control syntax + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exclude_loop, cnt, 1, exclude, "", ""); + if (cnt > 0) + { + if ((unsigned int)loopLevel.size() > 0) + loopLevel.pop_back(); + } + else if (line[i] == '{') + loopLevel.push_back(""); + else if (line[i] == ';') + { + if ((unsigned int)loopLevel.size() > 0 && loopLevel.back() != "") + { + tmp = loopLevel.back(); + if (tmp[tmp.length()-1] != ':') + loopLevel.pop_back(); + } + } + else if (line[i] == ':') + { + if ((unsigned int)loopLevel.size() > 0 && loopLevel.back() != "") + { + tmp = CUtil::TrimString(line.substr(0, i)); + if (CUtil::FindKeyword(tmp, loopLevel.back()) != string::npos) + { + tmp = loopLevel.back() + ":"; + loopLevel.pop_back(); + loopLevel.push_back(tmp); + } + } + } + } + + // check for excluded keywords for alternate control syntax (don't count as LSLOC) + cnt = 0; + CUtil::CountTally(line, exclude_keywords, cnt, 1, exclude, "", ""); + if (cnt > 0) + { + start = i + 1; + break; + } + + // case 'if(...):', 'while(...):, for(...):, foreach(...):, switch(...):' + // this case is handled in case ')' + // skip other ':' + if (line[i] == ':') + { + if (found_forifwhile) + { + found_forifwhile = false; + start = i + 1; + } + break; + } + + // case 'while(...);', 'while(...) {', '} while(...);' + // this case is handled in case ')' + if (found_while && found_forifwhile) + { + found_while = false; + found_forifwhile = false; + start = i + 1; + break; + } + + if (line[i] == '{' || line[i] == ':') + { + if (prev_char == '=') + inArrayDec = true; + + // continue until seeing ';' + if (inArrayDec) + break; + + // case for(...);, if (...) {, and if (...):, elseif (...) {, and elseif (...): + // these specials are handled + if (found_forifwhile) + { + found_forifwhile = false; + start = i + 1; + break; + } + + // check if 'do' precedes '{' or ':' + if (!found_do && !found_try && !found_else && !found_declare) + { + // find 'do' in string before tmp string + tmp = CUtil::TrimString(line.substr(start, i - start)); + found_do = (tmp == "do"); // found 'do' statement + found_try = (tmp == "try"); // found 'try' statement + found_else = (tmp == "else"); // found 'else' statement + found_declare = (CUtil::FindKeyword(tmp, "declare") != string::npos); // found 'declare' statement + } + if (found_do || found_try || found_else) + { + if (found_do && print_cmplx) + { + if (loopLevel.size() > 0) + loopLevel.pop_back(); + loopLevel.push_back("do"); + } + found_do = false; + found_try = false; + if (!found_else) + { + // everything before 'do', 'try' are cleared + strLSLOC = ""; + strLSLOCBak = ""; + start = i + 1; + } + break; // do not store '{' or ':' following 'do' + } + } + + // wrong, e.g., a[]={1,2,3}; + if (line[i] == ';' && prev_char == '}') + { + // check if in array declaration or not + // if no, skip, otherwise, complete the SLOC containing array declaration + if (!inArrayDec) + { + start = i + 1; + break; + } + } + + inArrayDec = false; + + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (cnt > 0 && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (found_declare) + { + result->directive_lines[PHY] += temp_lines; + result->directive_lines[LOG]++; + } + else + { + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + } + } + else if (data_continue == true && line[i] == ';') + phys_data_lines = temp_lines; + else if (found_declare) + result->directive_lines[PHY] += temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + start = i + 1; + + // reset some flagging parameters + forflag = false; + paren_cnt = 0; + found_while = false; + found_forifwhile = false; + found_for = false; + found_declare = false; + + break; + case '(': + if (forflag) + paren_cnt++; + else + { + // handle 'for', 'foreach', 'while', 'if' the same way + tmp = CUtil::TrimString(line.substr(start, i)); + if (CUtil::FindKeyword(tmp, "for") != string::npos + || CUtil::FindKeyword(tmp, "foreach") != string::npos + || CUtil::FindKeyword(tmp, "while")!= string::npos + || CUtil::FindKeyword(tmp, "if") != string::npos + || CUtil::FindKeyword(tmp, "elseif") != string::npos + || CUtil::FindKeyword(tmp, "switch") != string::npos) + { + forflag = true; + paren_cnt++; + + if (CUtil::FindKeyword(tmp, "for") != string::npos) + { + if (print_cmplx) + loopLevel.push_back("for"); + found_for = true; + } + else if (CUtil::FindKeyword(tmp, "while")!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("while"); + found_while = true; + } + else if (CUtil::FindKeyword(tmp, "foreach") != string::npos) + loopLevel.push_back("foreach"); + + else if (print_cmplx && CUtil::FindKeyword(tmp, "foreach") != string::npos) + loopLevel.push_back("foreach"); + + // record nested loop level + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "if") == string::npos && + CUtil::FindKeyword(tmp, "elseif") == string::npos && + CUtil::FindKeyword(tmp, "switch") == string::npos) + { + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + } + } + break; + case ')': + if (forflag) + { + if (paren_cnt > 0) + paren_cnt--; + if (paren_cnt == 0) + { + // handle 'for', 'foreach', 'while', 'if', 'elseif', 'switch' + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = i + 1; + found_forifwhile = true; + forflag = false; + found_for = false; + } + } + break; + case '}': + // skip '}' when found ';' and then '}' because '{' is counted already + // also, {} is also skipped, counted + if (prev_char == ';' || prev_char == '{' || prev_char == '}') + { + if (!inArrayDec) + start = i + 1; + } + + // record close bracket for nested loop processing + if (print_cmplx) + { + if ((unsigned int)loopLevel.size() > 0) + loopLevel.pop_back(); + if ((unsigned int)loopLevel.size() > 0 && loopLevel.back() != "") + { + tmp = loopLevel.back(); + if (tmp[tmp.length()-1] != ':') + loopLevel.pop_back(); + } + } + break; + } + + if (line[i] != ' ' && line[i] != '\t') + { + // if ;}}} --> don't count }}} at all + // also, if {}}} --> don't count }}} at all + // if ( !(line[i] == '}' && (prev_char == ';' || prev_char == '{'))) // see case '}' above + prev_char = line[i]; + + // change to not found if a char appears before + if (line[i] != ')' && found_forifwhile) + found_forifwhile = false; + } + + i++; + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + + // drop continuation symbol + if (strLSLOC[strLSLOC.length()-1] == '\\') + { + strLSLOC = strLSLOC.substr(0, strLSLOC.length()-1); + strLSLOCBak = strLSLOCBak.substr(0, strLSLOCBak.length()-1); + } + } + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} diff --git a/src/CPhpCounter.h b/src/CPhpCounter.h new file mode 100644 index 0000000..e1046e6 --- /dev/null +++ b/src/CPhpCounter.h @@ -0,0 +1,36 @@ +//! Code counter class definition for the PHP language. +/*! +* \file CPhpCounter.h +* +* This file contains the code counter class definition for the PHP language. +*/ + +#ifndef CPhpCounter_h +#define CPhpCounter_h + +#include "CCodeCounter.h" + +//! PHP code counter class. +/*! +* \class CPhpCounter +* +* Defines the PHP code counter class. +* NOTE: PHP variables are case sensitive, but PHP functions are case insensitive. +*/ +class CPhpCounter : public CCodeCounter +{ +public: + CPhpCounter(); + +protected: + StringVector exclude_loop; //!< List of keywords to exclude for loops + + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + bool &inArrayDec, bool &found_for, StringVector &loopLevel); +}; + +#endif diff --git a/src/CPythonCounter.cpp b/src/CPythonCounter.cpp new file mode 100644 index 0000000..cd4cd26 --- /dev/null +++ b/src/CPythonCounter.cpp @@ -0,0 +1,743 @@ +//! Code counter class methods for the Python language. +/*! +* \file CPythonCounter.cpp +* +* This file contains the code counter class methods for the Python language. +*/ + +#include "CPythonCounter.h" + +/*! +* Constructs a CPythonCounter object. +*/ +CPythonCounter::CPythonCounter() +{ + classtype = PYTHON; + language_name = "Python"; + + file_extension.push_back(".py"); + + BlockCommentStart.push_back("\"\"\""); + BlockCommentEnd.push_back("\"\"\""); + BlockCommentStart.push_back("'''"); + BlockCommentEnd.push_back("'''"); + LineCommentStart.push_back("#"); + QuoteStart = "\"\'"; + QuoteEnd = "\"\'"; + QuoteEscapeRear = '\\'; + + loop_keywords.push_back("for"); + loop_keywords.push_back("while"); + + directive.push_back("do"); + directive.push_back("from"); + directive.push_back("import"); + directive.push_back("no"); + directive.push_back("package"); + directive.push_back("use"); + directive.push_back("require"); + + exec_name_list.push_back("and"); + exec_name_list.push_back("as"); + exec_name_list.push_back("assert"); + exec_name_list.push_back("break"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("def"); + exec_name_list.push_back("del"); + exec_name_list.push_back("elif"); + exec_name_list.push_back("else"); + exec_name_list.push_back("except"); + exec_name_list.push_back("exec"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("finally"); + exec_name_list.push_back("for"); + exec_name_list.push_back("global"); + exec_name_list.push_back("if"); + exec_name_list.push_back("in"); + exec_name_list.push_back("is"); + exec_name_list.push_back("lambda"); + exec_name_list.push_back("not"); + exec_name_list.push_back("or"); + exec_name_list.push_back("pass"); + exec_name_list.push_back("print"); + exec_name_list.push_back("raise"); + exec_name_list.push_back("return"); + exec_name_list.push_back("try"); + exec_name_list.push_back("while"); + exec_name_list.push_back("with"); + exec_name_list.push_back("yield"); + + math_func_list.push_back("math.ceil"); + math_func_list.push_back("math.copysign"); + math_func_list.push_back("math.degrees"); + math_func_list.push_back("math.e"); + math_func_list.push_back("math.exp"); + math_func_list.push_back("math.fabs"); + math_func_list.push_back("math.factorial"); + math_func_list.push_back("math.floor"); + math_func_list.push_back("math.fmod"); + math_func_list.push_back("math.frexp"); + math_func_list.push_back("math.fsum"); + math_func_list.push_back("math.hypot"); + math_func_list.push_back("math.ldexp"); + math_func_list.push_back("math.modf"); + math_func_list.push_back("math.pi"); + math_func_list.push_back("math.pow"); + math_func_list.push_back("math.radians"); + math_func_list.push_back("math.sqrt"); + math_func_list.push_back("math.trunc"); + math_func_list.push_back("cmath.phase"); + math_func_list.push_back("cmath.polar"); + math_func_list.push_back("cmath.rect"); + + trig_func_list.push_back("math.acos"); + trig_func_list.push_back("math.acosh"); + trig_func_list.push_back("math.asinh"); + trig_func_list.push_back("math.atanh"); + trig_func_list.push_back("math.asin"); + trig_func_list.push_back("math.atan"); + trig_func_list.push_back("math.atan2"); + trig_func_list.push_back("math.cos"); + trig_func_list.push_back("math.cosh"); + trig_func_list.push_back("math.sin"); + trig_func_list.push_back("math.sinh"); + trig_func_list.push_back("math.tan"); + trig_func_list.push_back("math.tanh"); + + log_func_list.push_back("math.log"); + log_func_list.push_back("math.log10"); + log_func_list.push_back("math.log1p"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("**"); + + cmplx_cond_list.push_back("elif"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("except"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("try"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Replaces up to ONE quoted string inside a string starting at idx_start. +* Uses a string instead of a character to allow processing multi-line +* literals """ and '''. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote string of the current status +* +* \return method status +*/ +int CPythonCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, string &CurrentQuoteEnd) +{ + size_t idx_end, idx_quote; + + if (contd) + { + // python: use string instead of character to check for """ and ''' + idx_start = 0; + if (strline.length() >= CurrentQuoteEnd.length() && + strline.substr(0, CurrentQuoteEnd.length()) == CurrentQuoteEnd) + { + idx_start = CurrentQuoteEnd.length(); + contd = false; + return 1; + } + strline[0] = '$'; + } + else + { + // handle two quote chars in some languages, both " and ' may be accepted + idx_start = FindQuote(strline, QuoteStart, idx_start, QuoteEscapeFront); + if (idx_start != string::npos) + { + idx_quote = QuoteStart.find_first_of(strline[idx_start]); + CurrentQuoteEnd = QuoteEnd[idx_quote]; + // python: check for """ or ''' + if (strline.length() >= idx_start + 3) + { + if (CurrentQuoteEnd == "\"") + { + if (strline.substr(idx_start, 3) == "\"\"\"") + CurrentQuoteEnd = "\"\"\""; + } + else if (CurrentQuoteEnd == "'") + { + if (strline.substr(idx_start, 3) == "'''") + CurrentQuoteEnd = "'''"; + } + } + } + else + { + idx_start = strline.length(); + return 0; + } + } + + // python: handle """ and ''' + if (CurrentQuoteEnd.length() == 3) + { + if (idx_start + 3 >= strline.length()) + idx_end = string::npos; + else + { + idx_end = strline.find(CurrentQuoteEnd, idx_start + 3); + if (idx_end != string::npos) + idx_end += 2; // shift to last quote character + } + } + else + idx_end = CUtil::FindCharAvoidEscape(strline, CurrentQuoteEnd[0], idx_start + 1, QuoteEscapeFront); + if (idx_end == string::npos) + { + idx_end = strline.length() - 1; + strline.replace(idx_start + 1, idx_end - idx_start, idx_end - idx_start, '$'); + contd = true; + idx_start = idx_end + 1; + } + else + { + if (CurrentQuoteEnd.length() != 3 && (QuoteEscapeRear) && (strline.length() > idx_end + 1) && (strline[idx_end+1] == QuoteEscapeRear)) + { + strline[idx_end] = '$'; + strline[idx_end+1] = '$'; + } + else + { + contd = false; + strline.replace(idx_start + 1, idx_end - idx_start - 1, idx_end - idx_start - 1, '$'); + idx_start = idx_end + 1; + } + } + return 1; +} + +/*! +* Counts the number of comment lines, removes comments, and +* replaces quoted strings by special chars, e.g., $ +* All arguments are modified by the method. +* Special processing for """ and ''' which can be multi-line literal +* or a multi-line comment if it stands alone. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPythonCounter::CountCommentsSLOC(filemap* fmap, results* result, filemap *fmapBak) +{ + if (BlockCommentStart.empty() && LineCommentStart.empty()) + return 0; + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + + bool contd = false; + bool contd_nextline; + int comment_type = 0; + /* + comment_type: + 0 : not a comment + 1 : line comment, whole line + 2 : line comment, embedded + 3 : block comment, undecided + 4 : block comment, embedded + */ + + size_t idx_start, idx_end, comment_start; + size_t quote_idx_start; + string curBlckCmtStart, curBlckCmtEnd, tmp; + string CurrentQuoteEnd = ""; + bool quote_contd = false; + filemap::iterator itfmBak = fmapBak->begin(); + + quote_idx_start = 0; + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + contd_nextline = false; + + quote_idx_start = 0; + idx_start = 0; + + if (CUtil::CheckBlank(iter->line)) + continue; + if (quote_contd) + { + // Replace quote until next character + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_contd) + continue; + } + + if (contd) + comment_type = 3; + + while (!contd_nextline && idx_start < iter->line.length()) + { + // need to handle multiple quote chars in some languages, both " and ' may be accepted + quote_idx_start = FindQuote(iter->line, QuoteStart, quote_idx_start, QuoteEscapeFront); + comment_start = idx_start; + if (!contd) + { + FindCommentStart(iter->line, comment_start, comment_type, curBlckCmtStart, curBlckCmtEnd); + if (comment_start != string::npos && comment_type > 2) + { + // python: check whether this is a multi-line literal or a block comment + tmp = CUtil::TrimString(iter->line, -1); + if (iter->line.length() - tmp.length() != comment_start) + { + quote_idx_start = comment_start; + comment_start = string::npos; + } + } + } + + if (comment_start == string::npos && quote_idx_start == string::npos) + break; + + if (comment_start != string::npos) + idx_start = comment_start; + + // if found quote before comment, e.g., "this is quote");//comment + if (quote_idx_start != string::npos && (comment_start == string::npos || quote_idx_start < comment_start)) + { + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_idx_start > idx_start && quote_idx_start != iter->line.length()) + { + // comment delimiter inside quote + idx_start = quote_idx_start; + continue; + } + } + else if (comment_start != string::npos) + { + // comment delimiter starts first + switch (comment_type) + { + case 1: // line comment, definitely whole line + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + contd_nextline = true; + break; + case 2: // line comment, possibly embedded + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + contd_nextline = true; + break; + case 3: // block comment + case 4: + if (contd) + idx_end = iter->line.find(curBlckCmtEnd); + else + idx_end = iter->line.find(curBlckCmtEnd, idx_start + curBlckCmtStart.length()); + + if (idx_end == string::npos) + { + if (comment_type == 3) + { + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + } + else if (comment_type == 4) + { + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + contd = true; + contd_nextline = true; + break; + } + else + { + contd = false; + iter->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + itfmBak->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + if (iter->line.empty()) + result->comment_lines++; + else + { + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + + // quote chars found may be erased as it is inside comment + quote_idx_start = idx_start; + } + break; + default: + cout << "Error in CountCommentsSLOC()" << endl; + break; + } + } + } + } + return 1; +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPythonCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + if ((idx = iter->line.find((*viter), 0)) != string::npos && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != '_') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CPythonCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int paren_count = 0; + string strLSLOC = ""; + string strLSLOCBak = ""; + + filemap::iterator fit, fitbak; + string line, lineBak; + UIntVector loopWhiteSpace; + + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + lineBak = fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + // does the ReplaceQuote get the continuation character \ replaced? + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, paren_count, loopWhiteSpace); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + } + } + return 1; +} + +// Logical Counting Consideration +/* +Not counted line: +else: +\ (continuation char) +in () +in [] +in {} +end of line preceding by operator characters + - * / = < > | & is in % ^ \ ~ not , : +compound statement, with : and ; +esp. compound statement with : in middle line, not in () [] {} or in else: +*/ + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param loopWhiteSpace count of white space to determine loop ends +*/ +void CPythonCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + unsigned int &paren_cnt, UIntVector &loopWhiteSpace) +{ +#define CONT_STR_LENGTH 18 + string continuation_str[] = {"is", "in", "not", "+", "-", "*", "/", "=", "<", ">", "|", "&", "%", "^", "\\", "~", ",", "$"}; + + size_t start = 0; // starting index of the working string + size_t i = 0, idx, strSize; + int n; + bool trunc_flag = false; + unsigned int cnt = 0, numWS; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + string tmp; + + // process: + // paren_cnt is used with {} [] () + // 1. check if the current char is in one of the parentheses + // 2. if no, check if the line has : or ; (statement separators), except else: + // 3. if yes, count and put the statement in the result + // 4. if the line does not ends with a continuation string or a statement separator (handled) + // and the line is not in one of the parentheses + // then count and put the statement in the result + // 5. physical count considers all lines executables (or directives, no declarations) + + // check for loop ends, new loops, and record white space in order to determine ends + if (print_cmplx) + { + // check white space for loop ends + if (loopWhiteSpace.size() > 0) + { + // get white space + tmp = line; + tmp = CUtil::TrimString(tmp, -1); + numWS = (unsigned)(line.length() - tmp.length()); + + // check for loop ends + for (n = (int)loopWhiteSpace.size() - 1; n >= 0; n--) + { + if (loopWhiteSpace.at(n) != numWS) + break; + else + loopWhiteSpace.pop_back(); + } + } + + // check for loop keywords (for, while) + cnt = 0; + CUtil::CountTally(line, loop_keywords, cnt, 1, exclude, "", "", NULL); + if (cnt > 0) + { + if (loopWhiteSpace.size() < 1) + { + // get white space + tmp = line; + tmp = CUtil::TrimString(tmp, -1); + numWS = (unsigned)(line.length() - tmp.length()); + } + + // add nested loop white space and record nested loop level + for (i = 0; i < cnt; i++) + { + loopWhiteSpace.push_back(numWS); + + if ((unsigned int)result->cmplx_nestloop_count.size() < loopWhiteSpace.size()) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopWhiteSpace.size()-1]++; + } + } + } + + line = CUtil::TrimString(line); + lineBak = CUtil::TrimString(lineBak); + size_t line_length = line.length(); + bool lineContinued = false; + + while (i < line_length) + { + switch (line[i]) + { + case '{': case '[': case '(': // parentheses opener + paren_cnt++; + break; + case '}': case ']': case ')': // parentheses closer + if (paren_cnt > 0) + paren_cnt--; + break; + } + + // 2. if no parentheses enclosing, and if the char is a statement separator + if (paren_cnt == 0 && (line[i] == ';' || line[i] == ':')) + { + tmp = CUtil::ClearRedundantSpaces(line); + // if line[..i] is else: then exit the outer if + if (tmp.rfind("else:") != tmp.length() - 5) + { + // 3. + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, i); + strLSLOCBak += lineBak.substr(start, i); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + // increase logical SLOC here + result->exec_lines[LOG]++; + } + strLSLOC = strLSLOCBak = ""; + start = i + 1; + } + else + lineContinued = true; + } + i++; + } + + if (paren_cnt == 0) + { + // add logical SLOC if the line does not end with a continuation string/char + if (!lineContinued) + { + for (i = 0; i < CONT_STR_LENGTH; i++) + { + if (continuation_str[i].length() == 1) + { + if (line[line_length - 1] == continuation_str[i][0]) + { + lineContinued = true; + break; + } + } + else + { + idx = CUtil::FindKeyword(line, continuation_str[i]); + if (idx != string::npos && idx == line_length - continuation_str[i].length() - 1) + { + lineContinued = true; + break; + } + } + } + } + + if (!lineContinued) + { + strSize = CUtil::TruncateLine(line_length - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, line_length); + strLSLOCBak += lineBak.substr(start, line_length); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + // increase logical SLOC here + result->exec_lines[LOG]++; + } + strLSLOC = strLSLOCBak = ""; + } + else + { + tmp = CUtil::TrimString(line.substr(start, line_length - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, line_length - start)); + strLSLOCBak += tmp.substr(0, strSize); + } + } + } + result->exec_lines[PHY]++; +} diff --git a/src/CPythonCounter.h b/src/CPythonCounter.h new file mode 100644 index 0000000..7ac532f --- /dev/null +++ b/src/CPythonCounter.h @@ -0,0 +1,35 @@ +//! Code counter class definition for the Python language. +/*! +* \file CPythonCounter.h +* +* This file contains the code counter class definition for the Python language. +*/ + +#ifndef CPythonCounter_h +#define CPythonCounter_h + +#include "CCodeCounter.h" + +//! Python code counter class. +/*! +* \class CPythonCounter +* +* Defines the Python code counter class. +*/ +class CPythonCounter : public CCodeCounter +{ +public: + CPythonCounter(); + +protected: + StringVector loop_keywords; //!< List of keywords to indicate the beginning of a loop + + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, string &CurrentQuoteEnd); + virtual int CountCommentsSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, + unsigned int &paren_cnt, UIntVector &loopWhiteSpace); +}; + +#endif diff --git a/src/CRubyCounter.cpp b/src/CRubyCounter.cpp new file mode 100644 index 0000000..c1c2a53 --- /dev/null +++ b/src/CRubyCounter.cpp @@ -0,0 +1,553 @@ +//! Code counter class methods for the Ruby language. +/*! +* \file CRubyCounter.cpp +* +* This file contains the code counter class methods for the Ruby language. +*/ + +#include "CRubyCounter.h" + +/*! +* Constructs a CRubyCounter object. +*/ +CRubyCounter::CRubyCounter() +{ + classtype = RUBY; + language_name = "Ruby"; + + file_extension.push_back(".rb"); + + QuoteStart = "\"'%/<"; + QuoteEnd = "\"'/"; + QuoteEscapeFront = '\\'; + ContinueLine = ".,\\+-*/"; + + BlockCommentStart.push_back("=begin"); + BlockCommentEnd.push_back("=end"); + + LineCommentStart.push_back("#"); + delimiter = ""; + + exec_name_list.push_back("alias"); + exec_name_list.push_back("begin"); + exec_name_list.push_back("break"); + exec_name_list.push_back("case"); + exec_name_list.push_back("catch"); + exec_name_list.push_back("collect"); + exec_name_list.push_back("continue"); + exec_name_list.push_back("default"); + exec_name_list.push_back("die"); + exec_name_list.push_back("do"); + exec_name_list.push_back("each"); + exec_name_list.push_back("else"); + exec_name_list.push_back("elsif"); + exec_name_list.push_back("end"); + exec_name_list.push_back("ensure"); + exec_name_list.push_back("exception"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("for"); + exec_name_list.push_back("if"); + exec_name_list.push_back("module"); + exec_name_list.push_back("new"); + exec_name_list.push_back("next"); + exec_name_list.push_back("puts"); + exec_name_list.push_back("print"); + exec_name_list.push_back("redo"); + exec_name_list.push_back("rescue"); + exec_name_list.push_back("retry"); + exec_name_list.push_back("return"); + exec_name_list.push_back("switch"); + exec_name_list.push_back("throw"); + exec_name_list.push_back("try"); + exec_name_list.push_back("undef"); + exec_name_list.push_back("unless"); + exec_name_list.push_back("until"); + exec_name_list.push_back("when"); + exec_name_list.push_back("while"); + exec_name_list.push_back("yield"); + + math_func_list.push_back("atan2"); + math_func_list.push_back("cos"); + math_func_list.push_back("exp"); + math_func_list.push_back("frexp"); + math_func_list.push_back("ldexp"); + math_func_list.push_back("rand"); + math_func_list.push_back("sin"); + math_func_list.push_back("sqrt"); + math_func_list.push_back("srand"); + math_func_list.push_back("tan"); + + log_func_list.push_back("log"); + log_func_list.push_back("log10"); + + cmplx_calc_list.push_back("%"); + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("**"); + + cmplx_cond_list.push_back("begin"); + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("elsif"); + cmplx_cond_list.push_back("ensure"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("rescue"); + cmplx_cond_list.push_back("unless"); + cmplx_cond_list.push_back("until"); + cmplx_cond_list.push_back("when"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("<=>"); + cmplx_logic_list.push_back("!"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("~"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back("==="); + + cmplx_assign_list.push_back("="); + cmplx_assign_list.push_back("+="); + cmplx_assign_list.push_back("-="); + cmplx_assign_list.push_back("*="); + cmplx_assign_list.push_back("/="); + cmplx_assign_list.push_back("%="); + cmplx_assign_list.push_back("**="); +} + +/*! +* Replaces quoted strings inside a string starting at idx_start with '$'. +* Handles special cases for Ruby string literals. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CRubyCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + //%DStrD # multiple lines allowed + //%QDStrD # multiple lines allowed + //%qDStrD # multiple lines allowed + //< + // blank is also used as a delimiter for string literal started with % + size_t i = 0, startpos = 0, endpos = string::npos; + bool foundQuote = false; + size_t strlen = strline.length(); + unsigned int paren = 1; + + if (contd) + { + string tstrline = CUtil::TrimString(strline, 1); + if ((tstrline.length() == 0 && delimiter == "\n") || // end of string literal with the new line as delimiter, e.g., % + delimiter == tstrline) + { + // end of here document + for (i = 0; i < ContinueLine.length(); i++) + { + if (delimiter[0] == ContinueLine[i]) + { + // avoid continuation + strline = "$"; + break; + } + } + contd = false; + delimiter = ""; + idx_start = strline.length(); + return 0; + } + else + { + // replace the whole line + strline.replace(idx_start, strlen - idx_start, strlen - idx_start, '$'); + if (strline.length() > 0) + strline[strline.length() - 1] = ContinueLine[0]; // continue the line + else + strline = string(1, ContinueLine[0]); + return 0; + } + } + else + { + while (i < strlen && !contd) + { + switch (strline[i]) + { + case '%': + if (!foundQuote) + { + foundQuote = true; + startpos = i; + if (i + 1 < strlen) + { + if (strline[i + 1] == 'Q' || + strline[i + 1] == 'q' || + strline[i + 1] == 'r' || + strline[i + 1] == 's' || + strline[i + 1] == 'W' || + strline[i + 1] == 'w' || + strline[i + 1] == 'x') + i++; + } + if (i + 1 == strlen) + { + contd = true; // continued in the next line + idx_start = i; + delimiter = "\n"; + strline[i] = ContinueLine[0]; + return 0; + } + else + { + delimiter = string(1, strline[i + 1]); + paren = 1; + } + i++; + } + break; + case '<': + if (!foundQuote) + { + if (i + 2 < strlen && strline[i + 1] == '<' && strline[i + 2] != ' ' && strline[i + 2] != '\t') + { + // here document + // foundQuote = true; + // get the last delimiter specifying the end of the 'here document' + + // find the last delimiter, check for the last delimiter word not in a comment + idx = strline.find_last_of("#"); + if (idx == string::npos) + idx = strlen; + idx = idx - 1; + string tstrline = CUtil::TrimString(strline.substr(i + 2, idx + 1 - i - 2)); + + strline[idx] = ContinueLine[0]; // indicate that the string will continue in the next line, so one SLOC is counted + idx_start = idx + 1; + + idx = tstrline.find_last_of("<<"); // another here-doc delimiter? + + // if yes, get the last delimiter + if (idx != string::npos) + tstrline = tstrline.substr(idx + 1, tstrline.length()); + + tstrline = CUtil::TrimString(tstrline); + + size_t i1 = 0; + if (tstrline.length() > 2 && (tstrline.substr(0, 2) == "-\"" || tstrline.substr(0, 2) == "-'")) + { + i1 = 2; + char qChar = tstrline[1]; + while (i1 < tstrline.length() && tstrline[i1] != qChar) + i1++; + } + else + { + while (i1 < tstrline.length() && heredoc_deli.find(tstrline[i1]) != string::npos) + i1++; + } + + delimiter = tstrline.substr(0, i1); + delimiter = CUtil::TrimString(delimiter); + + delimiter = CUtil::EraseString(delimiter, "\""); + delimiter = CUtil::EraseString(delimiter, "\'"); + delimiter = CUtil::EraseString(delimiter, "-"); + + contd = true; + return 0; + } + else + { + idx_start = strline.length(); + return 1; + } + } + break; + case '/': + if (!foundQuote) + { + delimiter = "/"; + foundQuote = true; + startpos = i; + } + else + { + strline.replace(startpos, i - startpos + 1, i - startpos + 1, '$'); + delimiter = ""; + foundQuote = false; + idx_start = i + 1; + } + break; + default: + if (foundQuote) + { + if (delimiter.length() == 1 && delimiter[0] == strline[i] && + openParentheses.find(delimiter) != string::npos) + { + paren++; + } + + // check if the char is a closing bracket + if ((idx = closedParentheses.find(strline[i])) != string::npos) + { + if (delimiter.length() > 0 && openParentheses[idx] == delimiter[0] && paren > 0) + paren--; + if (paren == 0) + { + // here we go, replace string starting from startpos? + endpos = i; + } + } + else + { + if (delimiter.length() > 0 && strline[i] == delimiter[0] && paren == 1) + { + // here we go, replace string starting from startpos + endpos = i; + } + } + + if (endpos != string::npos) + { + foundQuote = false; + // now, replace string from startpos to endpos + strline.replace(startpos, endpos - startpos + 1, endpos - startpos + 1, '$'); + contd = false; + delimiter = ""; + endpos = string::npos; + idx_start = i + 1; + } + } + break; + } + i++; + if (i > idx_start && !foundQuote) + break; + } + } + int ret = CCodeCounter::ReplaceQuote(strline, idx_start, contd, CurrentQuoteEnd); + if (contd && !foundQuote) + { + strline[strline.length() - 1] = ContinueLine[0]; + if (delimiter == "") + delimiter = CurrentQuoteEnd; + } + return ret; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CRubyCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + string strLSLOC = ""; + string strLSLOCBak = ""; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + delimiter = ""; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + lineBak = fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + // no data declaration in Ruby + result->exec_lines[PHY]++; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +*/ +void CRubyCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak) +{ + static string control_modifiers[] = {"if", "for", "unless", "while", "until", "when", "close", "elsif", "else", "then"}; + static size_t control_mod_cnt = 10; + size_t start = 0; // starting index of the working string + size_t i, j, strSize = string::npos; + size_t idx = string::npos; + bool trunc_flag = false; + + string tmp = CUtil::TrimString(strLSLOC); + string tline = CUtil::TrimString(line); + bool line_skipped = false; + + if (tline == "end" || tline == "else" || tline == "}" || tline == "]") + { + if (tmp.length() > 0) + line_skipped = true; + else + return; + } + + while (start < line.length()) + { + strSize = string::npos; + + // get first control modifier + idx = line.find(';', start); + i = line.find(':', start); + if (i != string::npos && (idx == string::npos || i < idx)) + { + // avoid processing :: and splitting ternary operator x ? y : z + if (i >= line.length() - 1 || line[i + 1] != ':') + { + for (j = i - 1; j > start; j--) + { + if (line[j] == '?') + break; + } + if (j <= start) + idx = i; + } + } + for (j = 0; j < control_mod_cnt; j++) + { + i = CUtil::FindKeyword(line, control_modifiers[j], start); + if (i != string::npos && (idx == string::npos || i < idx)) + { + if (CUtil::FindKeyword(tline, control_modifiers[j]) != 0) + idx = i; + } + } + + // process modifier + if (idx != string::npos) + { + if (line[idx] == ';') + { + strSize = CUtil::TruncateLine(idx + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 1 && CUtil::TrimString(line.substr(start, strSize - 1)) != ";") + { + // only include ';' if it stands alone + strSize--; + } + idx++; + } + else if (line[idx] == ':') + { + strSize = CUtil::TruncateLine(idx + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + idx++; + } + else if (line.length() >= idx + 4 && line.substr(idx, 4) == "then") + { + idx += 4; + strSize = CUtil::TruncateLine(idx - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + } + else + strSize = CUtil::TruncateLine(idx - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + } + + if (strSize > 0 && strSize != string::npos) // only if (idx != 0 && idx != string::npos) returns true + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + else if (!line_skipped) + { + strSize = CUtil::TruncateLine(line.length() - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + + if (tline.length() > 0) + { + for (i = 0; i < ContinueLine.length(); i++) + { + if (tline.find_last_of(ContinueLine[i]) == tline.length() - 1) + { + // continued to the next line + if (tline[tline.length() - 1] == '\\') + { + i = strLSLOC.find_last_of('\\'); + if (i != string::npos) + { + strLSLOC[i] = ' '; + strLSLOCBak[i] = ' '; + } + } + return; + } + } + } + } + if (CUtil::TrimString(strLSLOC).length() == 0) + return; + + // add SLOC + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOC = strLSLOCBak = ""; + + if (idx != 0 && idx != string::npos) + { + start = idx; + tline = CUtil::TrimString(line.substr(start)); + } + else + start = string::npos; + } +} diff --git a/src/CRubyCounter.h b/src/CRubyCounter.h new file mode 100644 index 0000000..be9b4d0 --- /dev/null +++ b/src/CRubyCounter.h @@ -0,0 +1,32 @@ +//! Code counter class definition for the Ruby language. +/*! +* \file CRubyCounter.h +* +* This file contains the code counter class definition for the Ruby language. +*/ + +#ifndef CRubyCounter_h +#define CRubyCounter_h + +#include "CCodeCounter.h" + +//! Ruby code counter class. +/*! +* \class CRubyCounter +* +* Defines the Ruby code counter class. +*/ +class CRubyCounter : public CCodeCounter +{ +public: + CRubyCounter(); + +protected: + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak); + + string delimiter; // used to store delimiter of string literals across lines +}; + +#endif diff --git a/src/CSqlCounter.cpp b/src/CSqlCounter.cpp new file mode 100644 index 0000000..801afc6 --- /dev/null +++ b/src/CSqlCounter.cpp @@ -0,0 +1,671 @@ +//! Code counter class methods for the SQL language. +/*! +* \file CSqlCounter.cpp +* +* This file contains the code counter class methods for the SQL language. +*/ + +#include "CSqlCounter.h" +#include +#include +#include +#include + +/*! +* Constructs a CSqlCounter object. +*/ +CSqlCounter::CSqlCounter() +{ + classtype = SQL; + language_name = "SQL"; + casesensitive = false; + + file_extension.push_back(".sql"); + QuoteStart = "\"'"; + QuoteEnd = "\"'"; + LineCommentStart.push_back("--"); + + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + + data_name_list.push_back("bigint"); + data_name_list.push_back("binary"); + data_name_list.push_back("bit"); + data_name_list.push_back("blob"); + data_name_list.push_back("boolean"); + data_name_list.push_back("byte"); + data_name_list.push_back("char"); + data_name_list.push_back("character"); + data_name_list.push_back("date"); + data_name_list.push_back("datetime"); + data_name_list.push_back("decimal"); + data_name_list.push_back("double"); + data_name_list.push_back("enum"); + data_name_list.push_back("float"); + data_name_list.push_back("image"); + data_name_list.push_back("int"); + data_name_list.push_back("integer"); + data_name_list.push_back("interval"); + data_name_list.push_back("long"); + data_name_list.push_back("longblob"); + data_name_list.push_back("longtext"); + data_name_list.push_back("mediumblob"); + data_name_list.push_back("mediumint"); + data_name_list.push_back("mediumtext"); + data_name_list.push_back("memo"); + data_name_list.push_back("money"); + data_name_list.push_back("nchar"); + data_name_list.push_back("ntext"); + data_name_list.push_back("nvarchar"); + data_name_list.push_back("numeric"); + data_name_list.push_back("real"); + data_name_list.push_back("single"); + data_name_list.push_back("smalldatetime"); + data_name_list.push_back("smallint"); + data_name_list.push_back("smallmoney"); + data_name_list.push_back("text"); + data_name_list.push_back("time"); + data_name_list.push_back("timestamp"); + data_name_list.push_back("tinyint"); + data_name_list.push_back("tinytext"); + data_name_list.push_back("uniqueidentifier"); + data_name_list.push_back("varbinary"); + data_name_list.push_back("varchar"); + data_name_list.push_back("year"); + + exec_name_list.push_back("alter"); + exec_name_list.push_back("close"); + exec_name_list.push_back("comment"); + exec_name_list.push_back("commit"); + exec_name_list.push_back("create"); + exec_name_list.push_back("declare"); + exec_name_list.push_back("delete"); + exec_name_list.push_back("deny"); + exec_name_list.push_back("drop"); + exec_name_list.push_back("except"); + exec_name_list.push_back("fetch"); + exec_name_list.push_back("grant"); + exec_name_list.push_back("group by"); + exec_name_list.push_back("having"); + exec_name_list.push_back("insert"); + exec_name_list.push_back("intersect"); + exec_name_list.push_back("join"); + exec_name_list.push_back("limit"); + exec_name_list.push_back("order by"); + exec_name_list.push_back("rename"); + exec_name_list.push_back("replace"); + exec_name_list.push_back("revoke"); + exec_name_list.push_back("rollback"); + exec_name_list.push_back("select"); + exec_name_list.push_back("set"); + exec_name_list.push_back("truncate"); + exec_name_list.push_back("union"); + exec_name_list.push_back("update"); + exec_name_list.push_back("where"); + + math_func_list.push_back("abs"); + math_func_list.push_back("avg"); + math_func_list.push_back("ceil"); + math_func_list.push_back("count"); + math_func_list.push_back("exp"); + math_func_list.push_back("floor"); + math_func_list.push_back("max"); + math_func_list.push_back("min"); + math_func_list.push_back("mod"); + math_func_list.push_back("power"); + math_func_list.push_back("round"); + math_func_list.push_back("sign"); + math_func_list.push_back("sqrt"); + math_func_list.push_back("stddev"); + math_func_list.push_back("sum"); + math_func_list.push_back("trunc"); + math_func_list.push_back("variance"); + + trig_func_list.push_back("acos"); + trig_func_list.push_back("acosh"); + trig_func_list.push_back("asin"); + trig_func_list.push_back("asinh"); + trig_func_list.push_back("atan"); + trig_func_list.push_back("atan2"); + trig_func_list.push_back("atanh"); + trig_func_list.push_back("cos"); + trig_func_list.push_back("cosh"); + trig_func_list.push_back("sin"); + trig_func_list.push_back("sinh"); + trig_func_list.push_back("tan"); + trig_func_list.push_back("tanh"); + + log_func_list.push_back("ln"); + log_func_list.push_back("log"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + + cmplx_cond_list.push_back("except"); + cmplx_cond_list.push_back("group by"); + cmplx_cond_list.push_back("having"); + cmplx_cond_list.push_back("intersect"); + cmplx_cond_list.push_back("join"); + cmplx_cond_list.push_back("limit"); + cmplx_cond_list.push_back("order by"); + cmplx_cond_list.push_back("union"); + cmplx_cond_list.push_back("where"); + + cmplx_logic_list.push_back("="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back("<>"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("<="); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("like"); + + cmplx_preproc_list.push_back("dictionary"); + + cmplx_assign_list.push_back("="); +} + +/*! +* Replaces quoted strings inside a string starting at idx_start with '$'. +* Handles special cases for SQL literal strings. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CSqlCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + size_t idx = string::npos; + while (true) + { + idx = strline.find("''", idx_start); + if (idx != string::npos) + strline.replace(idx, 2, 2, '$'); + else + break; + } + while (true) + { + idx = strline.find("\"\"", idx_start); + if (idx != string::npos) + strline.replace(idx, 2, 2, '$'); + else + break; + } + return CCodeCounter::ReplaceQuote(strline, idx_start, contd, CurrentQuoteEnd); +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CSqlCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + size_t i, j, k, ind, nextInd; + unsigned int sloc_count, dsloc_count, lineNum; + string stmt, stmtBak, exec_keyword, data_keyword; + filemap::iterator fit, fitbak; + + unsigned int data_count = 0; + unsigned int exec_count = 0; + bool trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string line = ""; + string lineBak = ""; + bool data_continue = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + + vector nestedSql, nestedSqlBak; + stack pistack, ppstack; + size_t mapi = 0, pi, pp; + + // process physical SLOC and capture embedded SLOC + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning(for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + if (CUtil::CheckBlank(line)) + { + mapi++; + continue; + } + + // check physical lines + exec_count = 0; + CUtil::CountTally(line, exec_name_list, exec_count, 1, exclude, "", "", &result->exec_name_count, false); + data_count = 0; + CUtil::CountTally(line, data_name_list, data_count, 1, exclude, "", "", &result->data_name_count, false); + if (exec_count > 0 || (data_count <= 0 && !data_continue)) + { + data_continue = false; + result->exec_lines[PHY]++; + } + else + { + data_continue = true; + result->data_lines[PHY]++; + } + + for (i = 1; i < line.length(); i++) + { + if (line[i] == '(') + { + pistack.push(mapi); + ppstack.push(i - 1); + } + else if (line[i] == ')' && pistack.size() > 0) + { + // capture embedded SLOC + pi = pistack.top(); + pp = ppstack.top(); + pistack.pop(); + ppstack.pop(); + if (pi == mapi) + { + stmt = fit->line.substr(pp, i - pp); + stmtBak = fitbak->line.substr(pp, i - pp); + } + else + { + stmt = fmap->at(pi).line.substr(pp); + stmtBak = fmapBak->at(pi).line.substr(pp); + for (j = pi + 1; j < mapi; j++) + { + stmt += ' ' + fmap->at(j).line; + stmtBak += ' ' + fmapBak->at(j).line; + } + stmt += ' ' + fit->line.substr(0, i); + stmtBak += ' ' + fitbak->line.substr(0, i); + } + + // check for executable statement keywords + sloc_count = 0; + dsloc_count = 0; + CUtil::CountTally(stmt, exec_name_list, sloc_count, 1, exclude, "", "", NULL, false); + if (sloc_count > 0) + { + // extract embedded SLOC from main content + if (pi == mapi) + { + fit->line.erase(pp, i - pp); + fitbak->line.erase(pp, i - pp); + line.erase(pp + 1, i - pp - 1); + lineBak.erase(pp + 1, i - pp - 1); + i = pp + 1; + } + else + { + fmap->at(pi).line.erase(pp); + fmapBak->at(pi).line.erase(pp); + for (j = pi + 1; j < mapi; j++) + { + fmap->at(j).line.clear(); + fmapBak->at(j).line.clear(); + } + fit->line.erase(0, i); + fitbak->line.erase(0, i); + line.erase(1, i - 1); + lineBak.erase(1, i - 1); + i = 1; + } + stmt = CUtil::TrimString(stmt.substr(1, stmt.length() - 2)); + stmtBak = CUtil::TrimString(stmtBak.substr(1, stmtBak.length() - 2)); + nestedSql.push_back(stmt); + nestedSqlBak.push_back(stmtBak); + } + else + { + CUtil::CountTally(stmt, data_name_list, dsloc_count, 1, exclude, "", "", NULL, false); + if (dsloc_count > 0) + { + // mark data keywords (not counted as LSLOC) + for (j = 0; j < data_name_list.size(); j++) + { + ind = 0; + nextInd = 0; + data_keyword = data_name_list.at(j); + while (ind != string::npos) + { + ind = CUtil::FindKeyword(stmt, data_keyword, nextInd, TO_END_OF_STRING, false); + if (ind != string::npos) + { + stmt.replace(ind, 1, "$"); + nextInd = ind + 1; + } + } + ind = pp; + nextInd = pp; + if (pi == mapi) + { + while (ind != string::npos) + { + ind = CUtil::FindKeyword(fit->line, data_keyword, nextInd, i - 1, false); + if (ind != string::npos) + { + fit->line.replace(ind, 1, "$"); + nextInd = ind + 1; + } + } + } + else + { + while (ind != string::npos) + { + ind = CUtil::FindKeyword(fmap->at(pi).line, data_keyword, nextInd, TO_END_OF_STRING, false); + if (ind != string::npos) + { + fmap->at(pi).line.replace(ind, 1, "$"); + nextInd = ind + 1; + } + } + for (k = pi + 1; k < mapi; k++) + { + ind = 0; + nextInd = 0; + while (ind != string::npos) + { + ind = CUtil::FindKeyword(fmap->at(k).line, data_keyword, nextInd, TO_END_OF_STRING, false); + if (ind != string::npos) + { + fmap->at(k).line.replace(ind, 1, "$"); + nextInd = ind + 1; + } + } + } + ind = 0; + nextInd = 0; + while (ind != string::npos) + { + ind = CUtil::FindKeyword(fit->line, data_keyword, nextInd, i - 1, false); + if (ind != string::npos) + { + fit->line.replace(ind, 1, "$"); + nextInd = ind + 1; + } + } + } + } + } + } + } + } + mapi++; + } + data_continue = false; + + // add embedded SLOC to file maps + if (nestedSql.size() > 0) + { + lineNum = fmap->back().lineNumber; + for (i = 0; i < nestedSql.size(); i++) + { + lineNum++; + lineElement element(lineNum, nestedSql.at(i)); + fmap->push_back(element); + lineElement elementBak(lineNum, nestedSqlBak.at(i)); + fmapBak->push_back(elementBak); + } + nestedSql.clear(); + nestedSqlBak.clear(); + } + + // process logical SLOC + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning(for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + if (CUtil::CheckBlank(line)) + continue; + + // process logical SLOC + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, data_continue); + } + if (strLSLOC.length() > 0) + { + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + if (data_continue) + result->data_lines[LOG]++; + else + result->exec_lines[LOG]++; + } + strLSLOC = strLSLOCBak = ""; + data_continue = false; + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param data_continue continuation of a data declaration line +*/ +void CSqlCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, bool &data_continue) +{ + size_t i, ind, nextInd, startInd, endInd, strSize; + ptrdiff_t j; + bool trunc_flag = false, found; + string exec_keyword, data_keyword; + list slocIndices, eslocIndices, dslocIndices, gslocIndices; + + // find locations of executable keywords + for (i = 0; i < exec_name_list.size(); i++) + { + ind = 0; + nextInd = 0; + exec_keyword = exec_name_list.at(i); + while (ind != string::npos) + { + ind = CUtil::FindKeyword(line, exec_keyword, nextInd, TO_END_OF_STRING, false); + if (ind != string::npos) + { + // check for grant, revoke, deny + if (exec_keyword == "grant" || exec_keyword == "revoke" || exec_keyword == "deny") + { + // ignore GRANT OPTION + if (line.length() <= ind + 12 || CUtil::ToLower(line.substr(ind, 12)) != "grant option") + gslocIndices.push_back(ind); + } + nextInd = ind + 1; + slocIndices.push_back(ind); + eslocIndices.push_back(ind); + } + } + } + eslocIndices.sort(); + eslocIndices.unique(); + + // process grant, revoke, deny + if (gslocIndices.size() > 0) + { + slocIndices.sort(); + slocIndices.unique(); + } + while (gslocIndices.size() > 0) + { + ind = gslocIndices.front() + 1; + gslocIndices.pop_front(); + + // search for ON, TO, FROM until first non-exec keyword found and clear slocIndices in between + // (ideally this check would span multiple lines, but we avoid this for now) + if (ind < line.length()) + { + nextInd = CUtil::FindKeyword(line, "on", ind, TO_END_OF_STRING, false); + if (nextInd == string::npos) + { + nextInd = CUtil::FindKeyword(line, "to", ind, TO_END_OF_STRING, false); + if (nextInd == string::npos) + { + nextInd = CUtil::FindKeyword(line, "from", ind, TO_END_OF_STRING, false); + if (nextInd == string::npos) + nextInd = line.length(); + } + } + // clear any slocIndices between these values + list::iterator it, it2; + for (it = eslocIndices.begin(), it2 = slocIndices.begin(); it != eslocIndices.end(); ++it, ++it2) + { + if (*it >= ind && *it < nextInd) + { + *it = INVALID_POSITION; + *it2 = INVALID_POSITION; + } + } + eslocIndices.remove(INVALID_POSITION); + slocIndices.remove(INVALID_POSITION); + } + } + + // find locations of data keywords + for (i = 0; i < data_name_list.size(); i++) + { + ind = 0; + nextInd = 0; + data_keyword = data_name_list.at(i); + while (ind != string::npos) + { + ind = CUtil::FindKeyword(line, data_keyword, nextInd, TO_END_OF_STRING, false); + if (ind != string::npos) + { + // try to get variable name (var name is listed before type in SQL) + nextInd = ind + 1; + found = false; + for (j = ind - 1; j >= 0; j--) + { + if (line[j] != ' ' && line[j] != '\t') + found = true; + else if (found && (line[j] == ' ' || line[j] == '\t' || line[j] == ',')) + { + ind = j + 1; + break; + } + } + if (found && j <= 0) + ind = 1; + slocIndices.push_back(ind); + dslocIndices.push_back(ind); + } + } + } + dslocIndices.sort(); + dslocIndices.unique(); + + // process total set of keywords + slocIndices.sort(); + slocIndices.unique(); + startInd = 0; + while (slocIndices.size() > 0) + { + // get current keyword index + startInd = slocIndices.front(); + slocIndices.pop_front(); + if (slocIndices.size() > 0) + endInd = slocIndices.front(); + else + endInd = string::npos; + + // process continuation + if (strLSLOCBak.length() > 0) + { + strSize = CUtil::TruncateLine(startInd, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(0, strSize); + strLSLOCBak += lineBak.substr(0, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + if (data_continue) + result->data_lines[LOG]++; + else + result->exec_lines[LOG]++; + } + strLSLOC = strLSLOCBak = ""; + } + data_continue = false; + + // determine keyword type + if (eslocIndices.size() > 0 && eslocIndices.front() == startInd) + eslocIndices.pop_front(); + else + { + dslocIndices.pop_front(); + data_continue = true; + } + + // process LSLOC + if (endInd != string::npos) + { + strSize = CUtil::TruncateLine(endInd - startInd, 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC = line.substr(startInd, strSize); + strLSLOCBak = lineBak.substr(startInd, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + if (data_continue) + result->data_lines[LOG]++; + else + result->exec_lines[LOG]++; + } + strLSLOC = strLSLOCBak = ""; + startInd = endInd; + } + } + + // capture continuing LSLOC + if (startInd < line.length()) + { + strSize = CUtil::TruncateLine(line.length() - startInd, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(startInd, strSize); + strLSLOCBak += lineBak.substr(startInd, strSize); + } + } +} + +/*! +* Constructs a CSqlColdFusionCounter object. +*/ +CSqlColdFusionCounter::CSqlColdFusionCounter() +{ + classtype = SQL_CFM; + language_name = "SQL/ColdFusion"; + + file_extension.clear(); + file_extension.push_back(".*sqlcfm"); +} diff --git a/src/CSqlCounter.h b/src/CSqlCounter.h new file mode 100644 index 0000000..82dffdb --- /dev/null +++ b/src/CSqlCounter.h @@ -0,0 +1,42 @@ +//! Code counter class definition for the SQL language. +/*! +* \file CSqlCounter.h +* +* This file contains the code counter class definition for the SQL language. +*/ + +#ifndef CSqlCounter_h +#define CSqlCounter_h + +#include "CCodeCounter.h" + +//! SQL code counter class. +/*! +* \class CSqlCounter +* +* Defines the SQL code counter class. +*/ +class CSqlCounter : public CCodeCounter +{ +public: + CSqlCounter(); + +protected: + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, bool &data_continue); +}; + +//! SQL in ColdFusion code counter class. +/*! +* \class CSqlColdFusionCounter +* +* Defines the SQL in ColdFusion code counter class. +*/ +class CSqlColdFusionCounter : public CSqlCounter +{ +public: + CSqlColdFusionCounter(); +}; + +#endif diff --git a/src/CTagCounter.cpp b/src/CTagCounter.cpp new file mode 100644 index 0000000..ae8f8ec --- /dev/null +++ b/src/CTagCounter.cpp @@ -0,0 +1,341 @@ +//! Code counter class methods for tag languages including HTML, XML, and ColdFusion. +/*! +* \file CTagCounter.cpp +* +* This file contains the code counter class methods for tag languages including HTML, XML, and ColdFusion. +*/ + +#include "CTagCounter.h" + +/*! +* Constructs a CTagCounter object. +*/ +CTagCounter::CTagCounter() +{ + casesensitive = false; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CTagCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int cnt = 0; + filemap::iterator fit, fitBak; + string line, lineBak; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strLSLOC, strLSLOCBak; + char prev_char = 0; + bool ok, data_continue = false; + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + + size_t idx_start, quote_idx_start; + char CurrentQuoteEnd = 0; + bool quote_contd = false; + QuoteStart = ">"; + QuoteEnd = "<"; + + for (fit = fmap->begin(), fitBak = fmapBak->begin(); fit != fmap->end(); fit++, fitBak++) + { + if (!CUtil::CheckBlank(fit->line)) + { + // replace "quotes" - string between close and open tags + // must be processed after comments since comments start/end with same character as tag + quote_idx_start = 0; + idx_start = 0; + if (quote_contd) + { + // replace quote until next character + ReplaceQuote(fit->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + } + if (!quote_contd) + { + while (idx_start < fit->line.length()) + { + quote_idx_start = FindQuote(fit->line, QuoteStart, quote_idx_start, QuoteEscapeFront); + + if (quote_idx_start == string::npos) + break; + + ReplaceQuote(fit->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_idx_start > idx_start) + { + // comment delimiter inside quote + idx_start = quote_idx_start; + continue; + } + } + } + + line = fit->line; + lineBak = fitBak->line; + + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, prev_char, + data_continue, temp_lines, phys_exec_lines, phys_data_lines); + + if (print_cmplx) + { + cnt = 0; + CountTagTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + QuoteStart = ""; + QuoteEnd = ""; + + // capture closing tag + if (strLSLOC.length() > 0) + { + bool trunc_flag = false; + if (strLSLOCBak.length() == this->lsloc_truncate) + trunc_flag = true; + ok = result->addSLOC(strLSLOCBak, trunc_flag); + + cnt = 0; + if (data_name_list.size() > 0) + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + if (data_continue || cnt > 0) + { + if (ok) + result->data_lines[LOG]++; + result->data_lines[PHY]++; + } + else + { + if (ok) + result->exec_lines[LOG]++; + + // since physical data lines are recorded at next LSLOC, check if first line was a data line + if (data_name_list.size() > 0) + { + fit = fmap->begin(); + cnt = 0; + CUtil::CountTally(fit->line, data_name_list, cnt, 1, exclude, "", "", NULL); + if (cnt > 0) + result->exec_lines[PHY]++; + } + } + } + return 0; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +*/ +void CTagCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, char &prev_char, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines) +{ + size_t start = 0; + size_t i = 0, strSize; + unsigned int cnt = 0; + string tmp; + bool trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + while (i < line.length()) + { + if (line[i] == '<') + { + if (line.length() - 1 > i && (line[i+1] == '/' || line[i+1] == '?' || line[i+1] == '!')) + { + i++; + continue; + } + + strSize = CUtil::TruncateLine(i - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + if (strLSLOC.length() > 0 && strLSLOC[strLSLOC.length()-1] != '>' && line[0] != '<') + { + strLSLOC += " "; + strLSLOCBak += " "; + } + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (strLSLOC.length() > 0) + { + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + if (data_name_list.size() > 0) + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + + if (data_continue || cnt > 0) + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + temp_lines++; + result->exec_lines[LOG]++; + phys_exec_lines = temp_lines; + } + } + else if (data_continue == true) + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + start = i; + } + } + else if (line[i] == '>') + { + // also, <> is also skipped, empty block is not counted + if (prev_char == '/' || prev_char == '<') + start = i + 1; + else if (exclude_keywords.size() > 0) + { + // skip excluded keywords + cnt = 0; + CUtil::CountTally(line.substr(start, i - start), exclude_keywords, cnt, 1, exclude, "", "", NULL); + if (cnt > 0) + start = i + 1; + } + } + i++; + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + if (strLSLOC.length() > 0 && strLSLOC[strLSLOC.length()-1] != '>' && line[0] != '<') + { + strLSLOC += " "; + strLSLOCBak += " "; + } + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + } + + // verify not beginning to process a new data line + cnt = 0; + if (!data_continue && data_name_list.size() > 0) + { + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + if (cnt > 0) + data_continue = true; + } + if (data_continue == true) + temp_lines++; + if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} + +/*! +* Counts HTML keywords in string. +* +* \param base string to search +* \param container set of keywords +* \param count number of keywords found +* \param mode allowable leading/trailing character mode (1=exclude keywords surrounded by exclude characters, 2=include only keywords preceded by include1 characters and followed by include2 characters) +* \param exclude list of characters that may not surround keywords found (if mode=1) +* \param include1 list of characters that must precede keywords found (if mode=2) +* \param include2 list of characters that must follow keywords found (if mode=2) +* \param counter_container stores list of individual keyword counts (if specified) +* \param case_sensitive language is case sensitive? +*/ +void CTagCounter::CountTagTally(string base, StringVector& container, unsigned int &count, int mode, string exclude, + string include1, string include2, UIntVector* counter_container, bool case_sensitive) +{ + base = " " + base + " "; + string::size_type idx; + string base1, temp; + StringVector::iterator vit; + UIntVector::iterator cit; + unsigned int single_count = 0; + + if (counter_container) + cit = counter_container->begin(); + + if (case_sensitive == false) + { + base1 = CUtil::ToLower(base); + for (vit = container.begin(); vit != container.end(); vit++) + (*vit) = CUtil::ToLower((*vit)); + } + else + base1 = base; + + if (mode == 1) + { + // exclude mode + for (vit = container.begin(); vit != container.end(); vit++) + { + temp = "<" + *vit; + idx = base1.find(temp); + while (idx != string::npos) + { + if ((exclude.find(base1[idx+(temp.size())] ) == string::npos) && + (exclude.find(base1[idx-1]) == string::npos)) + { + count++; + single_count++; + } + idx = base1.find(temp, idx + (temp.size())); + } + if (counter_container) + { + (*cit) += single_count; + single_count = 0; + cit++; + } + } + } + else if (mode == 2) + { + // include mode + for (vit = container.begin(); vit != container.end(); vit++) + { + temp = "<" + *vit; + idx = base1.find(temp); + while (idx != string::npos) + { + if ((include1.find(base1[idx-1]) != string::npos) && + (include2.find(base1[idx+(temp.size())]) != string::npos)) + count++; + idx = base1.find(temp, idx + (temp.size())); + } + } + } +} diff --git a/src/CTagCounter.h b/src/CTagCounter.h new file mode 100644 index 0000000..9893e52 --- /dev/null +++ b/src/CTagCounter.h @@ -0,0 +1,32 @@ +//! Code counter class definition for tag languages including HTML, XML, and ColdFusion. +/*! +* \file CHtmlCounter.h +* +* This file contains the code counter class definition for for tag languages including HTML, XML, and ColdFusion. +*/ + +#ifndef CTagCounter_h +#define CTagCounter_h + +#include "CCodeCounter.h" + +//! Tag language code counter class. +/*! +* \class CTagCounter +* +* Defines the tag language code counter class. +*/ +class CTagCounter : public CCodeCounter +{ +public: + CTagCounter(); + +protected: + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, char &prev_char, + bool &data_continue, unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines); + void CountTagTally(string base, StringVector& container, unsigned int &count, int mode, string exclude, + string include1, string include2, UIntVector* counter_container = 0, bool case_sensitive = true); +}; + +#endif diff --git a/src/CUtil.cpp b/src/CUtil.cpp new file mode 100644 index 0000000..958e924 --- /dev/null +++ b/src/CUtil.cpp @@ -0,0 +1,1000 @@ +//! Utility class static methods. +/*! +* \file CUtil.cpp +* +* This file contains the utility class static methods. +* +* Changed from UCC 2013_04 release by Randy Maxwell +* Changes started on 2015_04_22 +* Changes ended on 2015_06_06 +* Refactored ToUpper to use table lookup (char array) for much faster performance +* Cleanup of MS Visual Studio 2010 Express edition Warning level 4 warnings +*/ + +#include "CUtil.h" +#include +#include +#include +#include +#include + +#ifdef UNIX + #include + #include +#else + #include + #include +#endif + +#include "UCCGlobals.h" + +/*! +* Returns a string without leading/trailing spaces or tabs. +* +* \param str original string +* \param mode trim mode (-1=left, 0=both, 1=right) +* +* \return trimmed string +*/ +string CUtil::TrimString(const string &str, int mode) +{ + size_t idx; + string str1 = str; + bool done = false; + if (mode <= 0) + { + idx = str1.find_first_not_of(" \t\n\r\f"); + if (idx != string::npos) + str1 = str1.substr(idx); + else + { + // Nothing but whitespace in the string + str1 = ""; + done = true; // no need to check other direction + } + } + if ( ( false == done ) + && ( mode >= 0 ) ) + { + idx = str1.find_last_not_of(" \t\n\r\f"); + if (idx != string::npos) + str1 = str1.substr(0, idx + 1); + else + str1 = ""; + } + return str1; +} + +/*! +* Erases all strings specified by erasedstr in srcstr. +* +* \param srcstr original string +* \param erasedstr substring to be erased +* +* \return modified string +*/ +string CUtil::EraseString(const string &srcstr, const string &erasedstr) +{ + size_t idx = 0; + string srcstr1 = srcstr; + while ((idx = srcstr1.find(erasedstr, idx)) != string::npos) + { + srcstr1.erase(idx, erasedstr.length()); + } + return srcstr1; +} + +/*! +* Initialize an array of lower case chars used by ToLower. Performance improvement. +* +* \Globals lowerChars char array is initialized +*/ +static char lowerChars[256]; +void CUtil::InitToLower() +{ + for ( unsigned int j = 0; j < sizeof( lowerChars ); j++ ) + { + lowerChars[ j ] = (char)tolower( j ); + } +} + +/*! +* Returns a string in lower case. +* +* \param string_to_lower original string +* +* \return lower case string +*/ +string CUtil::ToLower(const string &string_to_lower) +{ + string string_to_lower1 = string_to_lower; + for (size_t i = 0; i < string_to_lower1.size(); i++) + { + string_to_lower1[ i ] = lowerChars[ string_to_lower1[ i ] ]; + } + return string_to_lower1; +} + +/*! +* Checks whether the string passed is blank. +* +* \param str original string +* +* \return blank string? +*/ +bool CUtil::CheckBlank(const string &str) +{ + string::size_type idx; + idx = str.find_first_not_of("\n\t\r\f "); // searches for chars other than specified + if (idx != string::npos) + return false; + else + return true; +} + +/*! +* Checks whether the string passed is an integer. +* +* \param str original string +* +* \return integer string? +*/ +bool CUtil::IsInteger(const string &str) +{ + string str1 = TrimString(str); + if (str1.find_first_not_of("0123456789") != string::npos) + return false; + else + return true; +} + +/*! +* Finds the first appearance of each element of table's first value. +* +* \param target string +* \param table map table +* \param pos position of string +* \param preLang previous language (for web languages) +* +* \return second value of the map +*/ +size_t CUtil::FindStringsCaseInsensitive(const string &target, map &table, size_t &pos, size_t preLang) +{ + string target1 = ToLower(target); + size_t tmp_pos; + pos = string::npos; + size_t ret = INVALID_POSITION; + for (map::iterator iter = table.begin(); iter != table.end(); iter++) + { + tmp_pos = target1.find(iter->first); + if (tmp_pos != string::npos && (pos == string::npos || pos > tmp_pos)) + { + // handle special case "width=100%>" + ret = iter->second; + if ((ret == WEB_ASP_JSP_END && preLang != WEB_ASP_JSP_START) || + (ret == WEB_PHP_END && (preLang != WEB_PHP_START && preLang != WEB_PHP_START2))) + ret = INVALID_POSITION; + pos = tmp_pos; + } + } + return ret; +} + +/*! +* Finds the position of the "target" char in "source" string. +* Starting from "start_idx" but ignoring escape chars, +* for example, '\n' is different from 'n' in "this is not nothing" +* +* \param source source string +* \param target target character +* \param start_idx index to start search +* \param escape escape character to ignore +* +* \return position of target character +*/ +size_t CUtil::FindCharAvoidEscape(const string &source, char target, size_t start_idx, char escape) +{ + size_t idx, i; + if (start_idx >= source.length()) + return string::npos; + idx = start_idx; + while (idx < source.length()) + { + idx = source.find(target, idx); + if (idx == string::npos) + return idx; + + for (i = 1; i <= idx; i ++) + { + // trace back to the previous char ex. "adfd\\\\" + if (source[idx - i] != escape) break; + } + if (i % 2 != 0) + { + // case \\", avoid case something\" + break; + } + idx++; + } + if (idx >= source.length()) + return string::npos; + return idx; +} + +/*! +* Finds the keyword in the string starting from start to end. +* +* \param str string +* \param keyword keyword to find +* \param start starting index for search +* \param end ending index for search +* \param case_sensitive is case sensitive? +* +* \return index of keyword in string +*/ +size_t CUtil::FindKeyword(const string &str, const string &keyword, size_t start, size_t end, bool case_sensitive) +{ +#define SPECIAL_CHARS " \t;[]()+/-*<>=,&~!^?:%{}|" + size_t kw_length = keyword.length(); + size_t idx, i = start; + string str1 = str; + string keyword1 = keyword; + if (end == TO_END_OF_STRING) + end = str1.length() - 1; // inclusive + + if (!case_sensitive) + { + str1 = CUtil::ToLower(str1); + keyword1 = CUtil::ToLower(keyword1); + } + + while (i <= end) + { + idx = str1.find(keyword1, i); + if (idx != string::npos && idx + kw_length - 1 <= end) + { + if ((idx == 0 || strchr(SPECIAL_CHARS, str1[idx - 1]) != NULL) && + (idx + kw_length >= str1.length() || + strchr(SPECIAL_CHARS, str1[idx + kw_length]) != NULL)) + { + // the keyword stands alone or surrounded by special chars + return idx; + } + } + else + { + // cannot find the keyword in str + break; + } + i = idx + 1; // keyword found but not stands alone nor surrounded by special chars + } + + return string::npos; //not found +#undef SPECIAL_CHARS +} + +/*! +* Counts keywords in string. +* +* \param base string to search +* \param container set of keywords +* \param count number of keywords found +* \param mode allowable leading/trailing character mode (1=exclude keywords surrounded by exclude characters, 2=include only keywords preceded by include1 characters and followed by include2 characters) +* \param exclude list of characters that may not surround keywords found (if mode=1) +* \param include1 list of characters that must precede keywords found (if mode=2) +* \param include2 list of characters that must follow keywords found (if mode=2) +* \param counter_container stores list of individual keyword counts (if specified) +* \param case_sensitive language is case sensitive? +*/ +void CUtil::CountTally(const string &base, StringVector &container, unsigned int &count, int mode, const string &exclude, + const string &include1, const string &include2, UIntVector* counter_container, bool case_sensitive) +{ + string::size_type idx; + string base1; + StringVector::iterator vit; + UIntVector::iterator cit; + unsigned int single_count = 0; + base1 = " " + base + " "; + + if (counter_container) + cit = counter_container->begin(); + + if (case_sensitive == false) + { + base1 = CUtil::ToLower(base1); + for (vit = container.begin(); vit != container.end(); vit++) + (*vit) = CUtil::ToLower((*vit)); + } + + if (mode == 1) + { + // exclude mode + for (vit = container.begin(); vit != container.end(); vit++) + { + idx = base1.find((*vit)); + while (idx != string::npos) + { + if ((exclude.find(base1[idx+((*vit).size())]) == string::npos) && + (exclude.find(base1[idx-1]) == string::npos)) + { + count++; + single_count++; + } + idx = base1.find((*vit), idx + ((*vit).size())); + } + if (counter_container) + { + (*cit) += single_count; + single_count = 0; + cit++; + } + } + } + else if (mode == 2) + { + // include mode + for (vit = container.begin(); vit != container.end(); vit++) + { + idx = base1.find((*vit)); + while (idx != string::npos) + { + if ((include1.find(base1[idx-1]) != string::npos) && + (include2.find(base1[idx+((*vit).size())]) != string::npos)) + count++; + idx = base1.find((*vit), idx + ((*vit).size())); + } + } + } +} + +/*! +* Extracts the filename (without path) from the filepath. +* ex. abc\xyz.cpp --> xyz.cpp +* +* \param filepath file path +* +* \return file name +*/ +string CUtil::ExtractFilename(const string &filepath) +{ + string filename = filepath; + size_t idx = filename.find_last_of("\\"); + if (idx == string::npos) + idx = filename.find_last_of("/"); // Unix + if (idx != string::npos) + return filename.substr(idx + 1); + return filename; +} + +/*! +* For a given directory name, extract all the files from that directory as well as +* from all its sub-directories and store the filenames in the fileList vector. +* +* \param folder folder to list +* \param fileExtList list of file extensions to search +* \param fileList list of files in folder +* \param symLinks follow Unix links? +* +* \return path exists and is a directory +*/ +bool CUtil::ListAllFiles(string &folder, StringVector &fileExtList, StringVector &fileList, bool symLinks) +{ + StringVector tmpList; + string file; + size_t i, n; + + folder = CUtil::TrimString(folder); + +#ifdef UNIX + // skip links if user specified + struct stat inodeData; + if (!symLinks && (lstat(folder.c_str(), &inodeData) < 0 || S_ISLNK(inodeData.st_mode))) + return(false); +#endif + + // process folder + if (!GetFileList(tmpList, folder, symLinks)) + return(false); + + // read through tmpList and get the names of all the files in the directory mentioned + for (n = 0; n < tmpList.size(); n++) + { + file = tmpList.at(n); + + // if no-extension filtering, each file is pushed into the fileList + if (fileExtList.at(0) == "*.*" || fileExtList.at(0) == "*") + fileList.push_back(file); + else + { + // for each extension, if file extension matches with the extension, the file is pushed into the fileList + for (i = 0; i < fileExtList.size(); i++) + { + if (MatchFilename(ExtractFilename(file), fileExtList.at(i))) + fileList.push_back(file); + } + } + } + tmpList.clear(); + return(true); +} + +/*! +* For a given path, this method lists all files, directories and +* sub-directories it contains and stores the filenames in the fileList vector. +* +* \param fileList list of files in folder +* \param path folder's path to list +* \param symLinks follow Unix links? +* +* \return path exists and is a directory +*/ +bool CUtil::GetFileList(StringVector &fileList, const string &path, bool symLinks) +{ + string fullPath; +#ifdef UNIX + DIR *dir; + struct dirent *fileRead; + struct stat inodeData; + + // opening the given path + dir = opendir(path.c_str()); + + // If the dir doesn't exist + if (dir == NULL) + return(false); + + // each file is processed until the last one + while ((fileRead = readdir(dir)) != NULL) + { + // '.' & '..' are omitted + if ((strcmp(fileRead->d_name, ".") != 0) && (strcmp(fileRead->d_name, "..") != 0)) + { + // fullPath contains the path + the file name. + fullPath = path + '/' + fileRead->d_name; + if (symLinks) + { + if (stat(fullPath.c_str(), &inodeData) >= 0) + { + // for each file, store the fullPath into the ofstream + if (!S_ISDIR(inodeData.st_mode)) + fileList.push_back(fullPath); + else + { + // for each directory, its file list is obtained + GetFileList(fileList, fullPath, symLinks); + } + } + } + else + { + if (lstat(fullPath.c_str(), &inodeData) >= 0) + { + // for each file, store the fullPath into the ofstream + if (!S_ISLNK(inodeData.st_mode)) + { + if (!S_ISDIR(inodeData.st_mode)) + fileList.push_back(fullPath); + else + { + // for each directory, its file list is obtained + GetFileList(fileList, fullPath, symLinks); + } + } + } + } + } + } + // close the directory + closedir(dir); +#else + struct _finddata_t c_file; + ptrdiff_t hFile; + string findPath = path + "\\*.*"; + + // the first file is obtained + hFile = _findfirst(findPath.c_str(), &c_file); + + // If the dir doesn't exist + if (hFile == -1) + return(false); + + // each file is processed until the last one + while (_findnext(hFile, &c_file) == 0) + { + // for each file (not a directory (_A_SUBDIR), store its name into the fileList + fullPath = path + "\\" + c_file.name; + if (!(c_file.attrib & _A_SUBDIR)) + fileList.push_back(fullPath); + else if ((strcmp(".", c_file.name) != 0) && (strcmp("..", c_file.name) != 0)) + { + // for each directory, except '.' and '..', its file list is obtained + GetFileList(fileList, fullPath, symLinks); + } + } + // close the directory + _findclose(hFile); +#endif + return(true); +} + +/*! +* For a given filename, this method checks whether the file matches +* a given match string containing wildcards (*) and placeholders (?). +* +* \param filename filename to be checked +* \param matchstr string pattern to match +* +* \return filename matches pattern +*/ +bool CUtil::MatchFilename(const string &filename, const string &matchstr) +{ + int i, j, k, f, m, fl, ml, s, e, sl, lim; + + fl = (int)filename.length(); + ml = (int)matchstr.length(); + if (ml == 0) + return(fl == 0); + if (fl == 0) + return(false); + + f = 0; + for (m = 0; m < ml; m++) + { + if (matchstr[m] == '?') + { + f++; + if (f > fl) + return(false); + continue; + } + else if (matchstr[m] == '*') + { + // search for next non-wild card character + s = m + 1; + while (s < ml && matchstr[s] == '*') + s++; + if (s >= ml) + break; + e = s + 1; + while (e < ml && matchstr[e] != '*') + e++; + sl = e - s; + lim = fl - sl - f; + if (e >= ml) + { + // check the end of the filename + if (fl - f < sl) + return(false); + for (j = fl - sl, k = s; j < fl; j++, k++) + { + if (matchstr[k] != '?') + { +#ifdef UNIX + // case-sensitive match + if (matchstr[k] != filename[j]) + break; +#else + // case-insensitive match + if (tolower(matchstr[k]) != tolower(filename[j])) + break; +#endif + } + } + return(j >= fl); + } + for (i = 0; i <= lim; i++) + { + for (j = f + i, k = s; j < f + i + sl; j++, k++) + { + if (matchstr[k] != '?') + { +#ifdef UNIX + // case-sensitive match + if (matchstr[k] != filename[j]) + break; +#else + // case-insensitive match + if (tolower(matchstr[k]) != tolower(filename[j])) + break; +#endif + } + } + if (j >= f + i + sl) + { + f += i + sl; + break; + } + } + if (i > lim) + return(false); + m = e - 1; + continue; + } +#ifdef UNIX + // case-sensitive match + if (matchstr[m] != filename[f]) + return(false); +#else + // case-insensitive match + if (tolower(matchstr[m]) != tolower(filename[f])) + return(false); +#endif + f++; + } + if (f < fl && matchstr[ml - 1] != '*') + return(false); + return(true); +} + +/*! +* For a given path, this method creates the specified directory path +* including all required sub-directories. +* +* \param path path to create +* +* \return file status +*/ +int CUtil::MkPath(const string &path) +{ + size_t i = 1; + string tpath; +#ifdef UNIX + struct stat st; + if (stat(path.c_str(), &st) != 0) + { + while (i < path.size()) + { + if (path[i] == '/') + { + tpath = path.substr(0, i); + if (stat(tpath.c_str(), &st) != 0) + { + if (mkdir(tpath.c_str(), 0777) != 0) + return 0; + } + } + i++; + } + if (mkdir(path.c_str(), 0777) != 0) + return 0; + } + string tempfile = path + "/___temp.dat"; +#else + if (_access(path.c_str(), 0) != 0) + { + while (i < path.size()) + { + if ((path[i] == '\\' || path[i] == '/') && path[i-1] != ':') + { + tpath = path.substr(0, i); + if (_access(tpath.c_str(), 0) != 0) + { + if (_mkdir(tpath.c_str()) != 0) + return 0; + } + } + i++; + } + if (_mkdir(path.c_str()) != 0) + return 0; + } + string tempfile = path + "\\___temp.dat"; +#endif + + // attempt to write a temporary file to the directory + ofstream outfile; + outfile.open(tempfile.c_str(), ofstream::out); + outfile.close(); + if (outfile.fail()) + { + // file could not be opened + return 0; + } + else + { + // delete the temporary file + remove(tempfile.c_str()); + } + return 1; +} + +/*! +* Prints the standardized file header for all output files. +* +* \param pout output file stream +* \param title custom title +* \param cmd current command line string +* +* \return file status +*/ +int CUtil::PrintFileHeader(ofstream &pout, const string &title, const string &cmd) +{ + string myOutput; + time_t myTime; + struct tm *myLocalTime; + time(&myTime); +#if defined UNIX || defined MINGW + myLocalTime = localtime(&myTime); +#else + struct tm myLT; + localtime_s(&myLT, &myTime); + myLocalTime = &myLT; +#endif + + myOutput = "USC Unified CodeCount (UCC)"; + PrintFileHeaderLine(pout, myOutput); + + myOutput = "(c) Copyright 1998 - 2013 University of Southern California"; + PrintFileHeaderLine(pout, myOutput); + pout << endl; + + myOutput = title; + PrintFileHeaderLine(pout, myOutput); + + ostringstream s0; + s0 << "Generated by UCC v." << PRODUCT_REVISION << " on " << myLocalTime->tm_mon + 1 + << " " << myLocalTime->tm_mday << " " << myLocalTime->tm_year + 1900; + myOutput = s0.str(); + PrintFileHeaderLine(pout, myOutput); + + // print command line if given + if (cmd.length() > 0) + { + myOutput = cmd; + PrintFileHeaderLine(pout, myOutput); + } + pout << endl; + + return 1; +} + +/*! +* Prints a standardized file header header line. +* +* \param pout output file stream +* \param line custom line +* +* \return file status +*/ +int CUtil::PrintFileHeaderLine(ofstream &pout, const string &line) +{ + size_t i, mysize, lineLength = 100; + string myOutput = line; + + mysize = myOutput.length(); + if (lineLength > mysize) + { + for (i = 0; i < (lineLength - mysize) / 2; i++) + { + myOutput.insert(0, " "); + myOutput.append(" "); + } + } + pout << myOutput << endl; + + return 1; +} + +/*! +* Removes extra text added after @@ by ClearCase. +* The original implementation was provided by NGC. +* +* \param fileName file name +* +* \return modified file name +*/ +string CUtil::ConvertClearCaseFile(const string &fileName) +{ + // remove @@ and anything after + string fileName1 = fileName; + size_t loc = fileName1.rfind("@@"); + if (loc == string::npos) + return fileName1; + else + return fileName1.erase(loc); +} + +/*! +* Checks for line truncation. +* +* \param length length of current line +* \param totalLength length of current SLOC +* \param truncate allowable number of characters per SLOC +* \param trunc_flag line truncated? +* +* \return size of string to keep after truncation +*/ +size_t CUtil::TruncateLine(size_t length, size_t totalLength, size_t truncate, bool &trunc_flag) +{ + if (truncate == 0) + { + trunc_flag = false; + return(length); + } + else if (totalLength >= truncate) + { + trunc_flag = true; + return(0); + } + else if (totalLength + length <= truncate) + { + trunc_flag = false; + return(length); + } + else + { + trunc_flag = true; + return(truncate - totalLength); + } +} + +/*! +* Clear redundant/unnecessary white spaces in a string. +* +* \param str string to be processed +* +* \return new string +*/ +string CUtil::ClearRedundantSpaces(const string &str) +{ +#define SPECIAL_CHARS " \t;[]()+/-*<>=,&~!^?:%{}|" + size_t len = str.length(); + size_t idx = 0; + size_t idx_new = 0; + string str_new(len, '\0'); + + for (idx = 0; idx < len; idx++) + { + if (str[idx] == ' ' || str[idx] == '\t') + { + if (idx == 0 || idx + 1 == len + || strchr(SPECIAL_CHARS, str[idx - 1]) != NULL + || strchr(SPECIAL_CHARS, str[idx + 1]) != NULL) + { + continue; + } + } + if (str[idx] == '\t') + str_new[idx_new++] = ' '; + else + str_new[idx_new++] = str[idx]; + } + str_new.resize(idx_new); + + return str_new; +#undef SPECIAL_CHARS +} + +/*! +* Returns a string without smart quotes. +* +* \param str original string +* +* \return updated string +*/ +string CUtil::ReplaceSmartQuotes(const string &str) +{ + string str1 = str; + std::replace(str1.begin(), str1.end(), char((unsigned char)145), '\''); + std::replace(str1.begin(), str1.end(), char((unsigned char)146), '\''); + std::replace(str1.begin(), str1.end(), char((unsigned char)147), '\"'); + std::replace(str1.begin(), str1.end(), char((unsigned char)148), '\"'); + return str1; +} + +/*! +* Adds an uncounted file log if wanted. +* +* \Global cmdLine IN program command line +* \Global isDiff IN are we doing file Differencing (-d) +* \Global g_no_uncounted IN true will disable this +* +* \param msg IN message string +* \param uncFile IN uncounted file path +* \param useListA IN use file list A? (otherwise use list B) +* \param csvOutput IN CSV file stream? (otherwise ASCII text file) +* \param outDir IN output directory string +* +* return retMsg string of message to send to UI if not empty string +*/ +string CUtil::WriteUncountedFileUtil(const string &msg, const string &uncFile, bool useListA, bool csvOutput, string outDir) +{ + if ( true == g_no_uncounted ) + return ""; + + string retMsg = ""; + + static ofstream uncFileA; + static ofstream uncFileB; + static string filePathA; + static string filePathB; + ofstream *uncFS; + string fileName, filePath; + + if (uncFile.empty()) + return retMsg; + + // open the uncounted files output file if not already opened + if (useListA) + { + filePath = filePathA; + uncFS = &uncFileA; + } + else + { + filePath = filePathB; + uncFS = &uncFileB; + } + if (filePath.empty()) + { + fileName = ""; + if (isDiff) // Read global variable + { + if (useListA) + fileName = "Baseline-A-"; + else + fileName = "Baseline-B-"; + } + if (csvOutput) + fileName += UNCOUNTED_FILES_CSV; + else + fileName += UNCOUNTED_FILES; + + if (outDir.empty()) + filePath = fileName; + else + filePath = outDir + fileName; + if (useListA) + filePathA = filePath; + else + filePathB = filePath; + } + if (!(*uncFS).is_open()) + { + (*uncFS).open(filePath.c_str(), ofstream::out); + if (!(*uncFS).is_open()) + { + string err = "Error: Failed to open uncounted files output file ("; + err += filePath; + err += ")"; + // userIF->AddError(err); + retMsg = err; + return retMsg; + } + CUtil::PrintFileHeader(*uncFS, "UNCOUNTED FILES", cmdLine); + if (csvOutput) + (*uncFS) << "Message,Uncounted File" << endl; + else + { + (*uncFS).setf(ofstream::left); + (*uncFS).width(45); + (*uncFS) << "Message"; + (*uncFS).unsetf(ofstream::left); + (*uncFS).width(5); + (*uncFS) << " | "; + (*uncFS).width(3); + (*uncFS).setf(ofstream::left); + (*uncFS).width(45); + (*uncFS) << "Uncounted File"; + (*uncFS) << endl; + for (int y = 0; y < 9; y++) + (*uncFS) << "----------"; // 10 - chars + } + } + if (csvOutput) + (*uncFS) << msg << "," << uncFile << endl; + else + { + (*uncFS) << endl; + (*uncFS).setf(ofstream::left); + (*uncFS).width(45); + (*uncFS) << msg; + (*uncFS).unsetf(ofstream::left); + (*uncFS).width(5); + (*uncFS) << " | "; + (*uncFS).setf(ofstream::left); + (*uncFS).width(45); + (*uncFS) << uncFile; + } + return retMsg; +} diff --git a/src/CUtil.h b/src/CUtil.h new file mode 100644 index 0000000..bfaac32 --- /dev/null +++ b/src/CUtil.h @@ -0,0 +1,74 @@ +//! Utility class definition for static methods. +/*! +* \file CUtil.h +* +* This file contains the utility class definition for static methods. +* +* Changed from UCC 2013_04 release by Randy Maxwell +* Changes started on 2015_06_06 +* Changes ended on 2015_06_06 +* Refactored for faster performance +*/ + +#ifndef CUTIL_H +#define CUTIL_H + +#include "cc_main.h" + +using namespace std; + +#define INVALID_POSITION ((unsigned int)-1) +#define TO_END_OF_STRING INVALID_POSITION + +//! Vector containing a list of strings. +/*! +* \typedef vectorString +* +* Defines a vector containing a list of strings. +*/ +typedef vector vectorString; + +//! Utility class. +/*! +* \class CUtil +* +* Defines a utility class. +*/ +class CUtil +{ +public: + static string TrimString(const string &str, int mode = 0); + static string EraseString(const string &srcstr, const string &erasedstr); + + /*! + * Initialize an array of lower case chars used by ToLower. Much faster performance. + * + * \Globals lowerChars char array is initialized + */ + static void InitToLower(); + + static string ToLower(const string &string_to_lower); + static bool CheckBlank(const string &str); + static bool IsInteger(const string &str); + static size_t FindStringsCaseInsensitive(const string &target, map &table, size_t &pos, size_t preLang = INVALID_POSITION); + static size_t FindCharAvoidEscape(const string &source, char target, size_t start_idx, char escape); + static size_t FindKeyword(const string &str, const string &keyword, size_t start = 0, size_t end = TO_END_OF_STRING, bool case_sensitive = true); + static void CountTally(const string &base, StringVector &container, unsigned int &count, int mode, const string &exclude, + const string &include1, const string &include2, UIntVector *counter_container = 0, bool case_sensitive = true); + static string ExtractFilename(const string &filepath); + static bool ListAllFiles(string &folder, StringVector &fileExtList, StringVector &fileList, bool symLinks); + static bool GetFileList(StringVector &fileList, const string &path, bool symLinks); + static bool MatchFilename(const string &filename, const string &matchStr); + static int MkPath(const string &path); + static int PrintFileHeader(ofstream &pout, const string &title, const string &cmd = ""); + static int PrintFileHeaderLine(ofstream &pout, const string &line); + static string ConvertClearCaseFile(const string &fileName); + static size_t TruncateLine(size_t length, size_t totalLength, size_t truncate, bool &trunc_flag); + static string ClearRedundantSpaces(const string &str); + static string ReplaceSmartQuotes(const string &str); + + // Returns string of message to send to UI if not empty string + static string WriteUncountedFileUtil(const string &msg, const string &uncFile, bool useListA, bool csvOutput, string outDir); +}; + +#endif diff --git a/src/CVHDLCounter.cpp b/src/CVHDLCounter.cpp new file mode 100644 index 0000000..efb41b6 --- /dev/null +++ b/src/CVHDLCounter.cpp @@ -0,0 +1,1449 @@ +//! Code counter class methods for the VHDL language. +/*! +* \file CVHDLCounter.cpp +* +* This file contains the code counter class methods for the VHDL hardware definition language (used in FPGA programming). +*/ + +#include "CVHDLCounter.h" +#include + +/*! +* Constructs a CVHDLCounter object. +*/ +CVHDLCounter::CVHDLCounter() +{ + classtype = VHDL; + language_name = "VHDL"; + + file_extension.push_back(".vhd"); + file_extension.push_back(".vhdl"); + + LineCommentStart.push_back("--"); + QuoteStart = "\""; + QuoteEnd = "\""; + QuoteEscapeFront = '\"'; + + directive.push_back("pragma"); + + data_name_list.push_back("access"); + data_name_list.push_back("alias"); + data_name_list.push_back("attribute"); + data_name_list.push_back("buffer"); + data_name_list.push_back("bus"); + data_name_list.push_back("constant"); + data_name_list.push_back("file"); + data_name_list.push_back("generic"); + data_name_list.push_back("group"); + data_name_list.push_back("label"); + data_name_list.push_back("linkage"); + data_name_list.push_back("literal"); + data_name_list.push_back("new"); + data_name_list.push_back("range"); + data_name_list.push_back("record"); + data_name_list.push_back("register"); + data_name_list.push_back("shared"); + data_name_list.push_back("signal"); + data_name_list.push_back("subtype"); + data_name_list.push_back("type"); + data_name_list.push_back("units"); + data_name_list.push_back("variable"); + + exec_name_list.push_back("after"); + exec_name_list.push_back("architecture"); + exec_name_list.push_back("assert"); + exec_name_list.push_back("begin"); + exec_name_list.push_back("block"); + exec_name_list.push_back("body"); + exec_name_list.push_back("case"); + exec_name_list.push_back("component"); + exec_name_list.push_back("configuration"); + exec_name_list.push_back("disconnect"); + exec_name_list.push_back("else"); + exec_name_list.push_back("elsif"); + exec_name_list.push_back("end"); + exec_name_list.push_back("entity"); + exec_name_list.push_back("exit"); + exec_name_list.push_back("for"); + exec_name_list.push_back("function"); + exec_name_list.push_back("generate"); + exec_name_list.push_back("if"); + exec_name_list.push_back("inertial"); + exec_name_list.push_back("library"); + exec_name_list.push_back("loop"); + exec_name_list.push_back("map"); + exec_name_list.push_back("next"); + exec_name_list.push_back("null"); + exec_name_list.push_back("on"); + exec_name_list.push_back("others"); + exec_name_list.push_back("package"); + exec_name_list.push_back("port"); + exec_name_list.push_back("procedure"); + exec_name_list.push_back("process"); + exec_name_list.push_back("reject"); + exec_name_list.push_back("report"); + exec_name_list.push_back("return"); + exec_name_list.push_back("select"); + exec_name_list.push_back("then"); + exec_name_list.push_back("transport"); + exec_name_list.push_back("use"); + exec_name_list.push_back("wait"); + exec_name_list.push_back("while"); + exec_name_list.push_back("with"); + + cmplx_preproc_list.push_back("pragma"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("**"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("mod"); + cmplx_calc_list.push_back("abs"); + cmplx_calc_list.push_back("rem"); + + cmplx_logic_list.push_back("="); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("/="); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("&"); + cmplx_logic_list.push_back("not"); + cmplx_logic_list.push_back("and"); + cmplx_logic_list.push_back("or"); + cmplx_logic_list.push_back("nand"); + cmplx_logic_list.push_back("nor"); + cmplx_logic_list.push_back("xor"); + cmplx_logic_list.push_back("xnor"); + cmplx_logic_list.push_back("sll"); + cmplx_logic_list.push_back("srl"); + cmplx_logic_list.push_back("sla"); + cmplx_logic_list.push_back("sra"); + cmplx_logic_list.push_back("rol"); + cmplx_logic_list.push_back("ror"); + cmplx_logic_list.push_back("<="); + + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("elseif"); + cmplx_cond_list.push_back("loop"); + cmplx_cond_list.push_back("next"); + cmplx_cond_list.push_back("when"); + cmplx_cond_list.push_back("while"); + + cmplx_assign_list.push_back("=>"); + cmplx_assign_list.push_back(":="); + cmplx_assign_list.push_back("<="); +} +/*! +* Replaces quoted strings inside a string starting at idx_start with '$'. +* Handles special cases for VHDL literal strings. +* +* \param strline string to be processed +* \param idx_start index of line character to start search +* \param contd specifies the quote string is continued from the previous line +* \param CurrentQuoteEnd end quote character of the current status +* +* \return method status +*/ +int CVHDLCounter::ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd) +{ + size_t idx = 0; + while (true) + { + idx = strline.find("\"", idx); // replace all '"' by '$' + if (idx != string::npos) + strline.replace(idx, 1, 1, '$'); + else + break; + } + return CCodeCounter::ReplaceQuote(strline, idx_start, contd, CurrentQuoteEnd); +} + +/*! +* Counts the number of comment lines, removes comments, and +* replaces quoted strings by special chars, e.g., $ +* All arguments are modified by the method. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVHDLCounter::CountCommentsSLOC(filemap* fmap, results* result, filemap *fmapBak) +{ + if (BlockCommentStart.empty() && LineCommentStart.empty()) + return 0; + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + + bool contd = false; + bool contd_nextline; + int comment_type = 0; + /* + comment_type: + 0 : not a comment + 1 : line comment, whole line + 2 : line comment, embedded + 3 : block comment, undecided + 4 : block comment, embedded + */ + + size_t idx_start, idx_end, comment_start; + size_t quote_idx_start; + string curBlckCmtStart, curBlckCmtEnd; + char CurrentQuoteEnd = 0; + bool quote_contd = false, directiveComment =false; + filemap::iterator itfmBak = fmapBak->begin(); + + quote_idx_start = 0; + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + directiveComment = false; + contd_nextline = false; + + quote_idx_start = 0; + idx_start = 0; + + if (CUtil::CheckBlank(iter->line)) + continue; + + if (quote_contd) + { + // Replace quote until next character + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_contd) + continue; + } + + if (contd) + comment_type = 3; + + while (!contd_nextline && idx_start < iter->line.length()) + { + // need to handle multiple quote chars in some languages, both " and ' may be accepted + quote_idx_start = FindQuote(iter->line, QuoteStart, quote_idx_start, QuoteEscapeFront); + comment_start = idx_start; + if (!contd) + FindCommentStart(iter->line, comment_start, comment_type, curBlckCmtStart, curBlckCmtEnd); + + if (comment_start == string::npos && quote_idx_start == string::npos) + break; + + if (comment_start != string::npos) + idx_start = comment_start; + + // if found quote before comment, e.g., "this is quote");//comment + if (quote_idx_start != string::npos && (comment_start == string::npos || quote_idx_start < comment_start)) + { + ReplaceQuote(iter->line, quote_idx_start, quote_contd, CurrentQuoteEnd); + if (quote_idx_start > idx_start && quote_idx_start != iter->line.length()) + { + // comment delimiter inside quote + idx_start = quote_idx_start; + continue; + } + } + else if (comment_start != string::npos) + { + // check if next word is pragma or synopsis; this can turn out to be a declaration + StringVector::iterator itDirective = directive.begin(); + for (; itDirective != directive.end(); ++itDirective) + { + if ((CUtil::FindKeyword(iter->line, *itDirective, comment_start+LineCommentStart.size(), TO_END_OF_STRING, false)) != string::npos) + { + directiveComment = true; + iter->line = *itDirective; + break; + } + } + if (directiveComment) + break; + + // comment delimiter starts first + switch (comment_type) + { + case 1: // line comment, definitely whole line + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + contd_nextline = true; + break; + case 2: // line comment, possibly embedded + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + contd_nextline = true; + break; + case 3: // block comment + case 4: + if (contd) + idx_end = iter->line.find(curBlckCmtEnd); + else + idx_end = iter->line.find(curBlckCmtEnd, idx_start + curBlckCmtStart.length()); + + if (idx_end == string::npos) + { + if (comment_type == 3) + { + iter->line = ""; + itfmBak->line = ""; + result->comment_lines++; + } + else if (comment_type == 4) + { + iter->line = iter->line.substr(0, idx_start); + itfmBak->line = itfmBak->line.substr(0, idx_start); + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + contd = true; + contd_nextline = true; + break; + } + else + { + contd = false; + iter->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + itfmBak->line.erase(idx_start, idx_end - idx_start + curBlckCmtEnd.length()); + if (iter->line.empty()) + result->comment_lines++; + else + { + // trim trailing space + iter->line = CUtil::TrimString(iter->line, 1); + itfmBak->line = CUtil::TrimString(itfmBak->line, 1); + if (iter->line.empty()) + result->comment_lines++; // whole line + else + result->e_comm_lines++; // embedded + } + + // quote chars found may be erased as it is inside comment + quote_idx_start = idx_start; + } + break; + default: + cout << "Error in CountCommentsSLOC()" << endl; + break; + } + } + } + if (directiveComment) + continue; + } + return 1; +} + +/*! +* Counts file language complexity based on specified language keywords/characters. +* +* \param fmap list of processed file lines +* \param result counter results +* +* \return method status +*/ +int CVHDLCounter::CountComplexity(filemap* fmap, results* result) +{ + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + filemap::iterator fit; + filemap fitBak; + filemap::iterator fitForw, fitBack;//used to check prior an later lines for semicolons + unsigned int cnt; + string line, line2; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$><=:"; + tokLocVect conditionalVector; + tokLocVect::reverse_iterator r_tlvIter; + StringVector::iterator strIter = this->cmplx_cond_list.begin(); + string buf; // have a buffer string + stringstream ss; // insert the string into a stream + tokenLocation tl; + int count; + bool whenCont; + + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + line = fit->line; + + if (CUtil::CheckBlank(line)) + continue; + + line = " " + line; + + // mathematical functions + cnt = 0; + CUtil::CountTally(line, math_func_list, cnt, 1, exclude, "", "", &result->math_func_count, casesensitive); + result->cmplx_math_lines += cnt; + + // trigonometric functions + cnt = 0; + CUtil::CountTally(line, trig_func_list, cnt, 1, exclude, "", "", &result->trig_func_count, casesensitive); + result->cmplx_trig_lines += cnt; + + // logarithmic functions + cnt = 0; + CUtil::CountTally(line, log_func_list, cnt, 1, exclude, "", "", &result->log_func_count, casesensitive); + result->cmplx_logarithm_lines += cnt; + + // calculations + cnt = 0; + CUtil::CountTally(line, cmplx_calc_list, cnt, 1, exclude, "", "", &result->cmplx_calc_count, casesensitive); + result->cmplx_calc_lines += cnt; + + // conditionals + cnt = 0; + CUtil::CountTally(line, cmplx_cond_list, cnt, 1, exclude, "", "", &result->cmplx_cond_count, casesensitive); + result->cmplx_cond_lines += cnt; + + // logical operators + cnt = 0; + StringVector tmpList = cmplx_logic_list;//making a temporary list with the '<=' operator removed from the list; counting it on another pass; + tmpList.pop_back(); + CUtil::CountTally(line, tmpList, cnt, 1, exclude, "", "", &result->cmplx_logic_count, casesensitive); + result->cmplx_logic_lines += cnt; + + // preprocessor directives + cnt = 0; + CUtil::CountTally(line, cmplx_preproc_list, cnt, 1, exclude, "", "", &result->cmplx_preproc_count, casesensitive); + result->cmplx_preproc_lines += cnt; + + // assignments + cnt = 0; + tmpList.clear(); + tmpList = cmplx_assign_list;//making a temporary list with the '<=' operator removed from the list; counting it on another pass; + tmpList.pop_back(); + CUtil::CountTally(line, tmpList, cnt, 1, exclude, "", "", &result->cmplx_assign_count, casesensitive); + result->cmplx_assign_lines += cnt; + + // pointers + cnt = 0; + CUtil::CountTally(line, cmplx_pointer_list, cnt, 1, exclude, "", "", &result->cmplx_pointer_count, casesensitive); + result->cmplx_pointer_lines += cnt; + } + + // do a single pass to mark and replace logical operator lessThan or equal "<=" + // these appear only in conditional statements + // the remaining are signal assignment operators + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + line = fit->line; + line = CUtil::ToLower(line); + + if (CUtil::CheckBlank(line)) + continue; + ss.clear(); + ss.str(""); + ss << line; + count = -1; + while (ss >> buf) + { + ++count; + if (!buf.compare("if")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("then")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("elsif")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("wait")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("until")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("assert")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("while")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("loop")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("next")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("when")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("exit")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("return")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (!buf.compare("case")) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = buf; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (buf.find_last_of(";") != string::npos) + { + tl.lineNumber= fit->lineNumber; + tl.position = count; + tl.token = ";"; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + if (buf.find("<=") != string::npos) + { + whenCont = false; + // iterate up the vector an look for the first conditional statement + r_tlvIter = conditionalVector.rbegin(); + while (r_tlvIter != conditionalVector.rend()) + { + if (!r_tlvIter->token.compare(";")) + { + result->cmplx_assign_count.back()++; + result->cmplx_assign_lines++; + tl.token = "assign"; + break; + } + else + { + if ((!r_tlvIter->token.compare("if") || !r_tlvIter->token.compare("elsif") || !r_tlvIter->token.compare("assert") || + !r_tlvIter->token.compare("while") || !r_tlvIter->token.compare("return") || !r_tlvIter->token.compare("until") ) && !whenCont) + { + result->cmplx_logic_count.back()++; + result->cmplx_logic_lines++; + tl.token = "lte"; + break; + } + if (!r_tlvIter->token.compare("when")) + { + whenCont = true; + r_tlvIter++; + continue; + } + if (!r_tlvIter->token.compare("case") || !r_tlvIter->token.compare("next") || !r_tlvIter->token.compare("exit")) + { + result->cmplx_assign_count.back()++; + result->cmplx_assign_lines++; + tl.token = "assign"; + whenCont = false; + break; + } + result->cmplx_assign_count.back()++; + result->cmplx_assign_lines++; + tl.token = "assign"; + break; + } + r_tlvIter++; + } + tl.lineNumber= fit->lineNumber; + tl.position = count; + buf.clear(); + conditionalVector.push_back(tl); + continue; + } + } + } + return 1; +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVHDLCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + size_t idx; + unsigned int cnt = 0; + string strDirLine = ""; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$><=:"; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count, false); + } + + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // merged bug fix for considering only stand-alone keywords + // e.g. package should not be considered a directive (only 'pack' is) + if (((idx = CUtil::FindKeyword(iter->line, *viter, 0, TO_END_OF_STRING, false)) != string::npos) && idx == 0) + { + iter->line = ""; + result->directive_lines[PHY]++; + break; + } + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVHDLCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + bool blank_flag = false; + bool found_unit = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + unsigned int cnt = 0; + unsigned int loopLevel = 0; + + filemap::iterator fit, fitbak; + string line, lineBak, tmp; + string special = "[]()+/-*<>=,@&~!^?:%{}"; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$><=:"; + + unsigned int l_paren_cnt = 0; + bool l_foundblock, found_forifwhile, found_end, found_type, found_is, processSignatureFound, found_withSelect, found_whenConditional, foundWait, blockSignatureFound, found_record; + l_foundblock = found_forifwhile = found_end = found_is = processSignatureFound = found_withSelect = found_whenConditional = foundWait = blockSignatureFound = found_record = false; + vectorString currentBlock; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + // insert blank at the beginning (for searching keywords) + line = ' ' + fit->line; + lineBak = ' ' + fitbak->line; + + if (CUtil::CheckBlank(line)) + { + // the line is either blank/whole line comment/compiler directive + blank_flag = true; + continue; + } + else + blank_flag = false; + + if (!blank_flag) + { + // blank line means blank_line/comment_line/directive + // call SLOC function to detect logical SLOC and add to result + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, l_paren_cnt, l_foundblock, + found_forifwhile, found_end, found_type, found_is, found_unit, loopLevel, currentBlock, processSignatureFound, found_withSelect, found_whenConditional, foundWait, + blockSignatureFound, found_record); + + cnt = 0; + CUtil::CountTally(line, data_name_list, cnt, 1, exclude, "", "", NULL, false); //tie breaker in favor of physical data rather than physical exec + + // need to check also if the data line continues + if ((cnt > 0 && currentBlock.size() == 0 ) || (currentBlock.size()>0 && (currentBlock.back().compare("record") == 0 || currentBlock.back().compare("units") == 0))) + result->data_lines[PHY]++; + else + result->exec_lines[PHY]++; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count, false); + } + } + } + return 1; +} + +/*! +* Processes a logical line of code. +* This method is called after a logical SLOC is determined. +* The method adds LSLOC to the result, increases counts, and resets variables. +* +* \param result counter results +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param found_block found block flag +* \param found_forifwhile found for, if, or while flag +* \param found_end found end flag +* \param found_type found type flag +* \param found_is found is flag +* \param found_unit found accept flag +* \param trunc_flag truncate lines? +* \param currentBlock current block vector +*/ +void CVHDLCounter::FoundSLOC(results* result, string &strLSLOC, string &strLSLOCBak, bool &found_block, bool &found_forifwhile, + bool &found_end, bool &found_type, bool &found_is, bool &found_unit, bool &trunc_flag, StringVector currentBlock) +{ + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + + // add to the list for comparison purpose + if (result->addSLOC(CUtil::TrimString(strLSLOCBak), trunc_flag)) + { + // determine logical type, data declaration or executable + unsigned int cnt = 0; + + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count, false); + if ((cnt > 0 && currentBlock.size() == 0) || (currentBlock.size()>0 && (currentBlock.back().compare("record") == 0 || currentBlock.back().compare("units") == 0)) ) + result->data_lines[LOG]++; + else + result->exec_lines[LOG]++; + } + + // reset all variables whenever a new statement/logical SLOC is found + strLSLOC = ""; + strLSLOCBak = ""; + found_block = false; + found_forifwhile = false; + found_end = false; + found_type = false; + found_is = false; + found_unit = false; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results28186 +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param found_block found block flag +* \param found_forifwhile found for, if, or while flag +* \param found_end found end flag +* \param found_type found type flag +* \param found_is found is flag +* \param found_unit found unit flag +* \param loopLevel nested loop level +* \param currentBlock current block vector +* \param processSignatureStartFound found process signature start flag +* \param found_withSelect found with/select flag +* \param found_whenConditional found when/conditional flag +* \param foundWait found wait flag +* \param blockSignatureStartFound found block signature start flag +* \param found_recort found record flag +*/ +void CVHDLCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &found_block, bool &found_forifwhile, bool &found_end, bool &found_type, bool &found_is, bool &found_unit, + unsigned int &loopLevel, vectorString ¤tBlock, bool &processSignatureStartFound, bool &found_withSelect, bool &found_whenConditional, bool &foundWait, + bool &blockSignatureStartFound, bool &found_record) +{ + size_t start = 0, forstart = 0; //starting index of the working string + size_t i = 0, tempi, strSize; + string templine = CUtil::TrimString(line); + string tmp, fortmp; + bool trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + string keywordchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + // there may be more than 1 logical SLOC in a line + for (i = 0; i < line.length(); i++) + { + switch (line[i]) + { + case ';': + if (paren_cnt > 0) + break; + if (currentBlock.size() != 0) + { + if (((string) currentBlock.back()).compare("port") == 0) + { + // paren_cnt should be zero here + currentBlock.pop_back(); + } + if (((string) currentBlock.back()).compare("generic") == 0) + { + // paren_cnt should be zero here + currentBlock.pop_back(); + } + if (found_withSelect && CUtil::FindKeyword(line.substr(start, i + 1 - start), "others", 0, TO_END_OF_STRING, false) != string::npos) + { + // found the end of with ... select statement + found_withSelect = false; + currentBlock.pop_back(); + } + if (found_whenConditional) + { + //found the end of when ... conditional statement + found_whenConditional = false; + currentBlock.pop_back(); + } + } + if (!found_end) + { + if (foundWait) foundWait = false; + + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + if (currentBlock.size() != 0 && (currentBlock.front().compare("configuration") == 0) && (currentBlock.back().compare("entity") == 0)) + { + //pop the element in the vector matching the END that was just found + if (currentBlock.size() != 0) + currentBlock.pop_back(); + } + } + else + { + if (currentBlock.size() != 0 && ((string) currentBlock.back()).compare("for") == 0 && ((string) currentBlock.front()).compare("configuration") == 0) + { + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + } + found_end = false; + found_block = false; + found_is = false; + found_forifwhile = false; + found_type = false; + found_unit = false; + strLSLOC = ""; + strLSLOCBak = ""; + foundWait = false; + if (found_record) + found_record = false; + if (found_unit) + found_unit = false; + + //pop the element in the vector matching the END that was just found + if (currentBlock.size() != 0) + currentBlock.pop_back(); + } + start = i + 1; + break; + case '(': + if (found_type) + found_type = false; + if (currentBlock.size() != 0) + { + if (((string) currentBlock.back()).compare("process") == 0 && !found_is) + processSignatureStartFound = true; + else if (((string) currentBlock.back()).compare("block") == 0 && !found_is) + blockSignatureStartFound = true; + } + paren_cnt++; + break; + case ')': + if (paren_cnt > 0) + paren_cnt--; + break; + case ',': + if (found_withSelect) + { + strSize = CUtil::TruncateLine(i+1-start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + continue; + } + break; + default: + if (currentBlock.size() != 0) + { + // check for is here first + tmp = "xxx " + CUtil::TrimString(line.substr(start, i + 1 - start)); + + //try to locate the is at the end of the string if a block has been found + if (found_block) + { + tempi = CUtil::FindKeyword(tmp, "is", 0, TO_END_OF_STRING, false); + if (tempi != string::npos) + { + strSize = CUtil::TruncateLine(tempi+3-start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + found_is = true; + if (currentBlock.size() > 0 && (currentBlock.back().compare("block") == 0 || currentBlock.back().compare("process") == 0 )) + processSignatureStartFound = true; + continue; + } + else + { + // now see if there is anything else other than a space or an 'i' for the start of the word is + if (((string) currentBlock.back()).compare("process") == 0 && line[i] != ' ' && line[i] != 'i' && paren_cnt == 0 && !found_is) + { + strSize = CUtil::TruncateLine(i - 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i; + processSignatureStartFound = false; + continue; + } + else if (((string) currentBlock.back()).compare("block") == 0 && line[i] != ' ' && line[i] != 'i' && paren_cnt == 0 && !found_is) + { + strSize = CUtil::TruncateLine(i - 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i; + blockSignatureStartFound = false; + continue; + } + } + } + } + } + + // continue the following processing only if line[i] is not in a middle of a word + if (keywordchars.find(line[i]) != string::npos && i < line.length() - 1) + continue; + + // if it ends in xxx, then it has already been counted, so ignore it + tmp = "xxx " + CUtil::TrimString(line.substr(start, i + 1 - start)); + fortmp = "xxx " + CUtil::TrimString(line.substr(forstart, i + 1 - start)); + + if (found_block) + { + // try to locate the is at the end of the string if a block has been found + tempi = CUtil::FindKeyword(tmp, "is", 0, TO_END_OF_STRING, false); + if (tempi != string::npos) + { + strSize = CUtil::TruncateLine(tempi+3-start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + found_is = true; + if (currentBlock.size() > 0 && (currentBlock.back().compare("block") == 0 || currentBlock.back().compare("process") == 0 )) + processSignatureStartFound = true; + continue; + } + } + + tempi = CUtil::FindKeyword(tmp, "end", 0, TO_END_OF_STRING, false); + if (tempi != string::npos) + { + found_end = true; + + // record end loop for nested loop processing + if (print_cmplx) + { + tmp = CUtil::TrimString(line.substr(start, i + 5 - start)); + if (CUtil::FindKeyword(tmp, "end loop", 0, TO_END_OF_STRING, false) != string::npos) + { + if (loopLevel > 0) + loopLevel--; + } + tmp = CUtil::TrimString(line.substr(start, i + 9 - start)); + if (CUtil::FindKeyword(tmp, "end generate", 0, TO_END_OF_STRING, false) != string::npos) + { + if (loopLevel > 0) + loopLevel--; + } + } + start = i + 1; + } + + if (!found_end) + { + // 'begin' is ignored because it's counted with procedure, function, etc. already + // this may ignore the 'standalone' block that starts with 'declare' or only 'begin' + if (CUtil::FindKeyword(tmp, "begin", 0, TO_END_OF_STRING, false) != string::npos) + { + // found a SLOC + strLSLOC += line.substr(start, i - start + 1); + strLSLOCBak += lineBak.substr(start, i - start + 1); + start = i + 1; + continue; + } + if (!foundWait) + { + if (CUtil::FindKeyword(tmp, "wait", 0, TO_END_OF_STRING, false) != string::npos) + foundWait = true; + } + + if (!found_forifwhile || (currentBlock.size() != 0 && ((string) currentBlock.front()).compare("configuration") == 0)) + { + if (CUtil::FindKeyword(tmp, "for", 0 , TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "elsif", 0, TO_END_OF_STRING, false) != string::npos) + { + if (CUtil::FindKeyword(tmp, "if", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("if"); + found_forifwhile = true; + } + else if (CUtil::FindKeyword(tmp, "elsif", 0, TO_END_OF_STRING, false) != string::npos) + { + found_forifwhile = true; + } + else if (CUtil::FindKeyword(tmp, "while", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("while"); + found_forifwhile = true; + } + else if (CUtil::FindKeyword(tmp, "for", 0, TO_END_OF_STRING, false) != string::npos && !foundWait) + { + found_forifwhile = true; + + if (currentBlock.size() != 0 && ((string) currentBlock.front()).compare("configuration") == 0) + { + //inside a configuration block + if (CUtil::FindKeyword(fortmp, "for", 0, TO_END_OF_STRING, false) != string::npos) + { + if (((string) currentBlock.back()).compare("for") == 0 ) + { + // add sloc if this for loop is nested + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + currentBlock.push_back("for"); + forstart = i + 1; + } + else + { + currentBlock.push_back("for"); + forstart = i + 1; + } + } + } + else + { + // just a regular for + currentBlock.push_back("for"); + } + } + } + + if (currentBlock.size() != 0 && ((string) currentBlock.front()).compare("configuration") == 0) + { + // inside a configuration block + if ((CUtil::FindKeyword(fortmp, "use", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(fortmp, "group", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(fortmp, "attribute", 0, TO_END_OF_STRING, false) != string::npos) && + ((string) currentBlock.back()).compare("for") == 0) + { + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + forstart = i + 1; + } + } + + // 'exception' is removed because it is not counted + if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) != string::npos) + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + + // record nested loop level + if (print_cmplx) + { + loopLevel++; + if ((unsigned int)result->cmplx_nestloop_count.size() < loopLevel) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopLevel-1]++; + } + continue; + } + } + else if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "then", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "record", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "generate", 0, TO_END_OF_STRING, false) != string::npos ) // for..use..record + { + // found a SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + + // record nested loop level + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "loop", 0, TO_END_OF_STRING, false) != string::npos || CUtil::FindKeyword(tmp, "generate", 0, TO_END_OF_STRING, false) != string::npos) + { + loopLevel++; + if ((unsigned int)result->cmplx_nestloop_count.size() < loopLevel) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopLevel-1]++; + } + } + continue; + } + + // similarly, check for procedure, task, function - it ends with 'is' keyword + // procedure ... is... + // package ... is ... + if (!found_block) + { + if (CUtil::FindKeyword(tmp, "procedure", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "function", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "package", 0, TO_END_OF_STRING, false) !=string::npos || + CUtil::FindKeyword(tmp, "component", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "case",0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "process", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "entity", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "architecture", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "configuration", 0, TO_END_OF_STRING, false) != string::npos || + CUtil::FindKeyword(tmp, "block", 0, TO_END_OF_STRING, false) != string::npos ) + { + if (CUtil::FindKeyword(tmp, "entity", 0, TO_END_OF_STRING, false) != string::npos) + { + if (((currentBlock.size() != 0) && ! (((string) currentBlock.front()).compare("configuration") == 0))) + { + currentBlock.push_back("entity"); + found_block = true; + } + else if (currentBlock.size() == 0) + { + currentBlock.push_back("entity"); + found_block = true; + } + } + else if (CUtil::FindKeyword(tmp, "architecture", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("architecture"); + found_block = true; + } + else if (CUtil::FindKeyword(tmp, "case", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("case"); + found_block = true; + } + else if (CUtil::FindKeyword(tmp, "configuration", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("configuration"); + found_block = true; + } + else if (CUtil::FindKeyword(tmp, "block", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("block"); + blockSignatureStartFound = false; + found_block = true; + } + else if (CUtil::FindKeyword(tmp, "component", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("component"); + found_block = true; + } + else if (CUtil::FindKeyword(tmp, "process", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("process"); + processSignatureStartFound = false; + found_block = true; + } + else + found_block = true; + } + } + + // check for end of a when statement within a case statement + tempi = CUtil::FindKeyword(templine, "=>", 0, TO_END_OF_STRING, false); + if ((tempi == templine.length() - 2) && currentBlock.size() != 0 && ((string) currentBlock.back()).compare("case") == 0) + { + strSize = CUtil::TruncateLine(tempi + 3 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = templine.length() + 1; + continue; + } + + if (!found_type) + { + if (CUtil::FindKeyword(tmp, "type", 0, TO_END_OF_STRING, false) != string::npos) + found_type = true; + } + if ((currentBlock.size() != 0) && ! (((string) currentBlock.back()).compare("port") == 0)) + { + if (CUtil::FindKeyword(tmp, "port", 0, TO_END_OF_STRING, false) != string::npos) + { + if (currentBlock.back().compare("component") == 0) + { + strSize = CUtil::TruncateLine(1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + } + currentBlock.push_back("port"); + } + } + if ((currentBlock.size() != 0) && ! (((string) currentBlock.back()).compare("generic") == 0)) + { + if (CUtil::FindKeyword(tmp, "generic", 0, TO_END_OF_STRING, false) != string::npos) + { + if (currentBlock.back().compare("component") == 0) + { + strSize = CUtil::TruncateLine(1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + } + currentBlock.push_back("generic"); + } + } + + if ((currentBlock.size() != 0) && ! (((string) currentBlock.back()).compare("component") == 0)) + { + if (CUtil::FindKeyword(tmp, "component", 0, TO_END_OF_STRING, false) != string::npos) + currentBlock.push_back("component"); + } + + // process 'select...end select;', 'accept ... end accept;' + // 'record ... end record;' is handled via 'type' + // select ... end select; --> only one word statement 'select' + // accept id... do ... end [id]; --> SLOC starting from 'accept' to 'do' + // find 'do' only already found 'accept' + if (!found_withSelect) + { + if (CUtil::FindKeyword(tmp, "with", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("with"); + found_withSelect = true; + } + } + else if (CUtil::FindKeyword(tmp, "select", 0, TO_END_OF_STRING, false) != string::npos) + { + // found 'select' statement, one SLOC + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + continue; + } + if (currentBlock.size() != 0) + { + string currentString = ((string) currentBlock.back()); + if (!(currentString.compare("case") == 0) && !(currentString.compare("with") == 0) && !(currentString.compare("when") == 0 ) && + CUtil::FindKeyword(tmp, "when", 0, TO_END_OF_STRING, false) != string::npos) + { + currentBlock.push_back("when"); + found_whenConditional = true; + } + else if (found_whenConditional) + { + if (CUtil::FindKeyword(tmp, "else", 0, TO_END_OF_STRING, false) != string::npos) + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + start = i + 1; + continue; + } + } + } + + if (!found_unit) + { + if (CUtil::FindKeyword(tmp, "units", 0, TO_END_OF_STRING, false) != string::npos) + { + found_unit = true; + currentBlock.push_back("units"); + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag, currentBlock); + continue; + } + } + if (!found_record) + { + if (CUtil::FindKeyword(tmp, "record", 0, TO_END_OF_STRING, false) != string::npos) + { + found_record = true; + currentBlock.push_back("record"); + FoundSLOC(result, strLSLOC, strLSLOCBak, found_block, found_forifwhile, + found_end, found_type, found_is, found_unit, trunc_flag,currentBlock); + continue; + } + } + + } + } + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + + // drop continuation symbol + if (strLSLOC[strLSLOC.length()-1] == '\\') + { + strLSLOC = strLSLOC.substr(0, strLSLOC.length()-1); + strLSLOCBak = strLSLOCBak.substr(0, strLSLOCBak.length()-1); + } + } + if (tmp == "") + found_forifwhile = false; +} diff --git a/src/CVHDLCounter.h b/src/CVHDLCounter.h new file mode 100644 index 0000000..254bb7e --- /dev/null +++ b/src/CVHDLCounter.h @@ -0,0 +1,48 @@ +//! Code counter class definition for the VHDL language. +/*! +* \file CVHDLCounter.h +* +* This file contains the code counter class definition for the VHDL hardware definition language (used in FPGA programming). +*/ + +#ifndef CVHDLCounter_h +#define CVHDLCounter_h + +#include "CCodeCounter.h" + +class tokenLocation +{ +public: + string token; + int lineNumber; + int position; +}; + +typedef vector tokLocVect; + +//! VHDL code counter class. +/*! +* \class CVHDLCounter +* +* Defines the VHDL code counter class. +*/ +class CVHDLCounter : public CCodeCounter +{ +public: + CVHDLCounter(); + +protected: + virtual int ReplaceQuote(string &strline, size_t &idx_start, bool &contd, char &CurrentQuoteEnd); + virtual int CountCommentsSLOC(filemap* fmap, results* result, filemap *fmapBak); + virtual int CountComplexity(filemap* fmap, results* result); + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak); + void FoundSLOC(results* result, string &strLSLOC, string &strLSLOCBak, bool &found_block, bool &found_forifwhile, + bool &found_end, bool &found_type, bool &found_is, bool &found_unit, bool &trunc_flag, StringVector currentBlock); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, bool &found_type, bool &found_is, bool &found_unit, + unsigned int &loopLevel, vectorString ¤tBlock, bool &processSignatureFound, bool &found_withSelect, bool &found_whenConditional, bool &foundWait, + bool &blockSignatureFound, bool &found_record); +}; + +#endif diff --git a/src/CVbCounter.cpp b/src/CVbCounter.cpp new file mode 100644 index 0000000..0607b9e --- /dev/null +++ b/src/CVbCounter.cpp @@ -0,0 +1,588 @@ +//! Code counter class methods for the Visual Basic language. +/*! +* \file CVbCounter.cpp +* +* This file contains the code counter class methods for the Visual Basic language. +*/ + +#include "CVbCounter.h" + +#define CONTINUATION_CHAR '_' + +/*! +* Constructs a CVbCounter object. +*/ +CVbCounter::CVbCounter() +{ + classtype = VB; + language_name = "Visual_Basic"; + + file_extension.push_back(".vb"); + file_extension.push_back(".frm"); + file_extension.push_back(".mod"); + file_extension.push_back(".cls"); + file_extension.push_back(".bas"); + + QuoteStart = "\""; + QuoteEnd = "\""; + QuoteEscapeRear = '\"'; + casesensitive = false; + + exclude_keywords.push_back("Do"); // must be alone, e.g., not Do i = 5 + exclude_keywords.push_back("Else"); + exclude_keywords.push_back("Loop"); + exclude_keywords.push_back("Wend"); + + exclude_start_keywords.push_back("End"); + exclude_start_keywords.push_back("Next"); + + LineCommentStart.push_back("'"); + LineCommentStart.push_back("REM "); + + directive.push_back("#Const"); + directive.push_back("#Else"); + directive.push_back("#ElseIf"); + directive.push_back("#End"); + directive.push_back("#ExternalSource"); + directive.push_back("#If"); + directive.push_back("#Region"); + + data_name_list.push_back("Boolean"); + data_name_list.push_back("Byte"); + data_name_list.push_back("Collection"); + data_name_list.push_back("Const"); + data_name_list.push_back("Currency"); + data_name_list.push_back("Date"); + data_name_list.push_back("Dim"); + data_name_list.push_back("Double"); + data_name_list.push_back("Integer"); + data_name_list.push_back("Item"); + data_name_list.push_back("Long"); + data_name_list.push_back("New"); + data_name_list.push_back("Object"); + data_name_list.push_back("Option"); + data_name_list.push_back("Private"); + data_name_list.push_back("Public"); + data_name_list.push_back("ReDim"); + data_name_list.push_back("Single"); + data_name_list.push_back("Static"); + data_name_list.push_back("String"); + data_name_list.push_back("Time"); + data_name_list.push_back("Variant"); + + exec_name_list.push_back("Add"); + exec_name_list.push_back("AppActivate"); + exec_name_list.push_back("Asc"); + exec_name_list.push_back("Beep"); + exec_name_list.push_back("Call"); + exec_name_list.push_back("CBool"); + exec_name_list.push_back("CByte"); + exec_name_list.push_back("CCur"); + exec_name_list.push_back("CDate"); + exec_name_list.push_back("CDbl"); + exec_name_list.push_back("CDec"); + exec_name_list.push_back("CInt"); + exec_name_list.push_back("CStr"); + exec_name_list.push_back("CVar"); + exec_name_list.push_back("ChDir"); + exec_name_list.push_back("Clear"); + exec_name_list.push_back("Close"); + exec_name_list.push_back("Command"); + exec_name_list.push_back("CreateObject"); + exec_name_list.push_back("CurDir"); + exec_name_list.push_back("Dir"); + exec_name_list.push_back("Do"); + exec_name_list.push_back("DoEvents"); + exec_name_list.push_back("Else"); + exec_name_list.push_back("End"); + exec_name_list.push_back("Environ"); + exec_name_list.push_back("Erase"); + exec_name_list.push_back("Error"); + exec_name_list.push_back("Exit"); + exec_name_list.push_back("FileAttr"); + exec_name_list.push_back("FileCopy"); + exec_name_list.push_back("FileDateTime"); + exec_name_list.push_back("FileLen"); + exec_name_list.push_back("Fix"); + exec_name_list.push_back("For"); + exec_name_list.push_back("Format"); + exec_name_list.push_back("FreeFile"); + exec_name_list.push_back("Function"); + exec_name_list.push_back("Get"); + exec_name_list.push_back("GetAttr"); + exec_name_list.push_back("GetObject"); + exec_name_list.push_back("GoSub"); + exec_name_list.push_back("GoTo"); + exec_name_list.push_back("If"); + exec_name_list.push_back("Input"); + exec_name_list.push_back("InStr"); + exec_name_list.push_back("IsError"); + exec_name_list.push_back("Kill"); + exec_name_list.push_back("Left"); + exec_name_list.push_back("Len"); + exec_name_list.push_back("Line"); + exec_name_list.push_back("Loc"); + exec_name_list.push_back("Lock"); + exec_name_list.push_back("LOF"); + exec_name_list.push_back("Loop"); + exec_name_list.push_back("Mid"); + exec_name_list.push_back("MkDir"); + exec_name_list.push_back("Name"); + exec_name_list.push_back("Next"); + exec_name_list.push_back("Now"); + exec_name_list.push_back("On"); + exec_name_list.push_back("Open"); + exec_name_list.push_back("Print"); + exec_name_list.push_back("Put"); + exec_name_list.push_back("Raise"); + exec_name_list.push_back("Randomize"); + exec_name_list.push_back("Remove"); + exec_name_list.push_back("Reset"); + exec_name_list.push_back("Resume"); + exec_name_list.push_back("Return"); + exec_name_list.push_back("Right"); + exec_name_list.push_back("RmDir"); + exec_name_list.push_back("Rnd"); + exec_name_list.push_back("Seek"); + exec_name_list.push_back("Select"); + exec_name_list.push_back("SendKeys"); + exec_name_list.push_back("Server"); + exec_name_list.push_back("SetAttr"); + exec_name_list.push_back("Shell"); + exec_name_list.push_back("Spc"); + exec_name_list.push_back("Stop"); + exec_name_list.push_back("Str"); + exec_name_list.push_back("Switch"); + exec_name_list.push_back("Sub"); + exec_name_list.push_back("Tab"); + exec_name_list.push_back("Timer"); + exec_name_list.push_back("Unlock"); + exec_name_list.push_back("Val"); + exec_name_list.push_back("Wend"); + exec_name_list.push_back("While"); + exec_name_list.push_back("Width"); + exec_name_list.push_back("With"); + exec_name_list.push_back("Write"); + + math_func_list.push_back("Abs"); + math_func_list.push_back("Exp"); + math_func_list.push_back("Round"); + math_func_list.push_back("Rnd"); + math_func_list.push_back("Randomize"); + math_func_list.push_back("Sign"); + math_func_list.push_back("Sqrt"); + + trig_func_list.push_back("Atan"); + trig_func_list.push_back("Cos"); + trig_func_list.push_back("Sin"); + trig_func_list.push_back("Tan"); + + log_func_list.push_back("Log"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("\\"); + cmplx_calc_list.push_back("^"); + + cmplx_cond_list.push_back("Case"); + cmplx_cond_list.push_back("Do"); + cmplx_cond_list.push_back("Else"); + cmplx_cond_list.push_back("ElseIf"); + cmplx_cond_list.push_back("For"); + cmplx_cond_list.push_back("If"); + cmplx_cond_list.push_back("Select"); + cmplx_cond_list.push_back("While"); + + cmplx_logic_list.push_back("<>"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=<"); + cmplx_logic_list.push_back("And"); + cmplx_logic_list.push_back("Not"); + cmplx_logic_list.push_back("Or"); + cmplx_logic_list.push_back("Xor"); + cmplx_logic_list.push_back("AndAlso"); + cmplx_logic_list.push_back("OrElse"); + cmplx_logic_list.push_back("IsFalse"); + cmplx_logic_list.push_back("IsTrue"); + + cmplx_preproc_list.push_back("#Const"); + cmplx_preproc_list.push_back("#Else"); + cmplx_preproc_list.push_back("#ElseIf"); + cmplx_preproc_list.push_back("#End"); + cmplx_preproc_list.push_back("#ExternalSource"); + cmplx_preproc_list.push_back("#If"); + cmplx_preproc_list.push_back("#Region"); + + cmplx_assign_list.push_back("="); + + cmplx_cyclomatic_list.push_back("If"); + cmplx_cyclomatic_list.push_back("ElseIf"); + cmplx_cyclomatic_list.push_back("IIf"); + cmplx_cyclomatic_list.push_back("For"); + cmplx_cyclomatic_list.push_back("While"); + cmplx_cyclomatic_list.push_back("Until"); + cmplx_cyclomatic_list.push_back("Catch"); + cmplx_cyclomatic_list.push_back("When"); + cmplx_cyclomatic_list.push_back("Case"); + + ignore_cmplx_cyclomatic_list.push_back("Select Case"); + ignore_cmplx_cyclomatic_list.push_back("Case Else"); + ignore_cmplx_cyclomatic_list.push_back("End If"); + ignore_cmplx_cyclomatic_list.push_back("End While"); +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVbCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* /*fmapBak*/) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + + for (filemap::iterator iter = fmap->begin(); iter!=fmap->end(); iter++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for(vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + if ((idx = iter->line.find((*viter), 0)) != string::npos && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + result->directive_lines[PHY]++; + strSize = CUtil::TruncateLine(iter->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = iter->line.substr(0, strSize); + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(iter->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + iter->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + // drop continuation symbol + if (strDirLine[strDirLine.length()-1] == '\\') + strDirLine = strDirLine.substr(0, strDirLine.length()-1); + + // if a directive or continuation of a directive (no continuation symbol found) + if (iter->line[iter->line.length()-1] != '_') + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + } + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVbCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + string strLSLOC = ""; + string strLSLOCBak = ""; + + filemap::iterator fit, fitbak; + string line, lineBak; + size_t i, pos, prev_pos, strSize, tmpLoc; + unsigned int cnt = 0; + StringVector loopEnd; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string special = "[]()+/-*<>=,@&~!^?:%{}"; + + string tmp, tmpstr; + bool isDataLine = false; + bool line_continued = false; + bool line_skipped; + bool trunc_flag = false; + bool new_loop = false; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + lineBak = fitbak->line; + + if (CUtil::CheckBlank(line)) + continue; + + // delete the line number + pos = line.find_first_of(":"); + if ((pos == 0) || (pos != string::npos && CUtil::IsInteger(line.substr(0,pos)))) + { + line = line.substr(pos + 1); + lineBak = lineBak.substr(pos + 1); + } + + prev_pos = 0; + + // check for inline If..Then + tmpstr = CUtil::TrimString(line); + tmpLoc = CUtil::FindKeyword(tmpstr, "Then"); + if (tmpLoc != string::npos) + { + if (tmpLoc < tmpstr.length() - 4) + tmpLoc += 3; + else + tmpLoc = string::npos; + } + + // record nested loops + if (print_cmplx) + { + new_loop = false; + if (CUtil::FindKeyword(tmpstr, "Do") == 0) + { + loopEnd.push_back("Loop"); + new_loop = true; + } + else if (CUtil::FindKeyword(tmpstr, "For") == 0) + { + loopEnd.push_back("Next"); + new_loop = true; + } + else if (CUtil::FindKeyword(tmpstr, "While") == 0) + { + loopEnd.push_back("Wend"); + new_loop = true; + } + else if (loopEnd.size() > 0) + { + if (CUtil::FindKeyword(tmpstr, loopEnd.back()) == 0) + loopEnd.pop_back(); + } + if (new_loop) + { + if ((unsigned int)result->cmplx_nestloop_count.size() < loopEnd.size()) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopEnd.size()-1]++; + } + } + + tmp = line; + for (i = 0; i < tmp.size(); i++) + { + if ((tmp[i] == ':') || (i == tmp.size() - 1) || tmpLoc != string::npos) + { + if (tmpLoc != string::npos) + { + i = tmpLoc; + tmpLoc = string::npos; + } + else + tmpstr = CUtil::TrimString(tmp.substr(prev_pos, i - prev_pos + 1)); + + // exclude SLOC defined in the exclude_keywords + line_skipped = false; + for (vector::iterator stri = exclude_keywords.begin(); stri != exclude_keywords.end(); stri++) + { + if (tmpstr.compare(*stri) == 0) + { + line_skipped = true; + break; + } + } + if (line_skipped) + continue; + + // exclude SLOC starting with Next, End + for (StringVector::iterator stri = exclude_start_keywords.begin(); stri != exclude_start_keywords.end(); stri++) + { + if (CUtil::FindKeyword(tmpstr, *stri) == 0) + { + line_skipped = true; + break; + } + } + if (line_skipped) + continue; + + strSize = CUtil::TruncateLine(i + 1 - prev_pos, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(prev_pos, strSize); + strLSLOCBak += lineBak.substr(prev_pos, strSize); + } + + line_continued = ((i == tmp.size() - 1) && tmpstr[tmpstr.length() - 1] == CONTINUATION_CHAR); + if (line_continued) + { + // drop continuation symbol + if (strLSLOC[strLSLOC.length()-1] == '_') + { + strLSLOC = strLSLOC.substr(0, strLSLOC.length()-1); + strLSLOCBak = strLSLOCBak.substr(0, strLSLOCBak.length()-1); + } + continue; + } + + isDataLine = false; + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", &result->data_name_count); + if (cnt > 0) + { + isDataLine = true; + result->data_lines[LOG]++; + } + else + { + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(strLSLOC, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + result->exec_lines[LOG]++; + } + } + strLSLOCBak = ""; + strLSLOC = ""; + prev_pos = i + 1; + + if (tmpLoc != string::npos) + { + tmpLoc = string::npos; + prev_pos++; + } + } + + if (special.find_first_of(tmp[i]) != string::npos) + tmp[i] = ' '; + } + + if (isDataLine) + result->data_lines[PHY]++; + else + result->exec_lines[PHY]++; + } + return 1; +} + +/*! +* Parses lines for function/method names. +* +* \param line line to be processed +* \param functionStack stack of functions +* \param functionName function name found +* \param functionCount function count found +* +* \return 1 if function name is found +*/ +int CVbCounter::ParseFunctionName(const string &line, string &/*lastline*/, + filemap &functionStack, string &functionName, unsigned int &functionCount) +{ + string str; + size_t idx; + unsigned int fcnt; + + idx = CUtil::FindKeyword(line, "Sub"); + if (idx != string::npos) + { + if (idx + 4 < line.length()) + { + str = line.substr(idx + 4); + lineElement element(++functionCount, str); + functionStack.push_back(element); + } + } + else + { + idx = CUtil::FindKeyword(line, "Function"); + if (idx != string::npos) + { + if (idx + 9 < line.length()) + { + str = line.substr(idx + 9); + lineElement element(++functionCount, str); + functionStack.push_back(element); + } + } + } + + if (functionStack.empty()) + { + // dealing with some code out of any subroutines, it a "main" code + return 2; + } + + idx = CUtil::FindKeyword(line, "End Sub"); + if (idx != string::npos) + { + str = functionStack.back().line; + fcnt = functionStack.back().lineNumber; + functionStack.pop_back(); + idx = str.find("("); + if (idx != string::npos) + { + functionName = CUtil::ClearRedundantSpaces(str.substr(0, idx)); + functionCount = fcnt; + return 1; + } + } + else + { + idx = CUtil::FindKeyword(line, "End Function"); + if (idx != string::npos) + { + str = functionStack.back().line; + fcnt = functionStack.back().lineNumber; + functionStack.pop_back(); + idx = str.find("("); + if (idx != string::npos) + { + functionName = CUtil::ClearRedundantSpaces(str.substr(0, idx)); + functionCount = fcnt; + return 1; + } + } + } + return 0; +} diff --git a/src/CVbCounter.h b/src/CVbCounter.h new file mode 100644 index 0000000..ccffe18 --- /dev/null +++ b/src/CVbCounter.h @@ -0,0 +1,33 @@ +//! Code counter class definition for the Visual Basic language. +/*! +* \file CVbCounter.h +* +* This file contains the code counter class definition for the Visual Basic language. +*/ + +#ifndef CVbCounter_h +#define CVbCounter_h + +#include "CCodeCounter.h" + +//! Visual Basic code counter class. +/*! +* \class CVbCounter +* +* Defines the Visual Basic code counter class. +*/ +class CVbCounter : public CCodeCounter +{ +public: + CVbCounter(); + +protected: + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak = NULL); + int ParseFunctionName(const string &line, string &lastline, + filemap &functionStack, string &functionName, unsigned int &functionCount); + + StringVector exclude_start_keywords; //!< SLOC lines excluded from counts starting with keywords +}; + +#endif diff --git a/src/CVbscriptCounter.cpp b/src/CVbscriptCounter.cpp new file mode 100644 index 0000000..de3bb7c --- /dev/null +++ b/src/CVbscriptCounter.cpp @@ -0,0 +1,106 @@ +//! Code counter class methods for the VBScript language. +/*! +* \file CVbscriptCounter.cpp +* +* This file contains the code counter class methods for the VBScript language. +*/ + +#include "CVbscriptCounter.h" + +#define CONTINUATION_CHAR '_' + +/*! +* Constructs a CVbscriptCounter object. +*/ +CVbscriptCounter::CVbscriptCounter() +{ + classtype = VBSCRIPT; + language_name = "VBScript"; + + file_extension.clear(); + file_extension.push_back(".vbs"); +} + +/*! +* Constructs a CVbsPhpCounter object. +*/ +CVbsPhpCounter::CVbsPhpCounter() +{ + classtype = VBS_PHP; + language_name = "VBScript/PHP"; + + file_extension.clear(); + file_extension.push_back(".*vbsphp"); +} + +/*! +* Constructs a CVbsHtmlCounter object. +*/ +CVbsHtmlCounter::CVbsHtmlCounter() +{ + classtype = VBS_HTML; + language_name = "VBScript/HTML"; + + file_extension.clear(); + file_extension.push_back(".*vbshtm"); +} + +/*! +* Constructs a CVbsXmlCounter object. +*/ +CVbsXmlCounter::CVbsXmlCounter() +{ + classtype = VBS_XML; + language_name = "VBScript/XML"; + + file_extension.clear(); + file_extension.push_back(".*vbsxml"); +} + +/*! +* Constructs a CVbsJspCounter object. +*/ +CVbsJspCounter::CVbsJspCounter() +{ + classtype = VBS_JSP; + language_name = "VBScript/JSP"; + + file_extension.clear(); + file_extension.push_back(".*vbsjsp"); +} + +/*! +* Constructs a CVbsAspServerCounter object. +*/ +CVbsAspServerCounter::CVbsAspServerCounter() +{ + classtype = VBS_ASP_S; + language_name = "VBScript/ASP Server"; + + file_extension.clear(); + file_extension.push_back(".*vbsasps"); +} + +/*! +* Constructs a CVbsAspClientCounter object. +*/ +CVbsAspClientCounter::CVbsAspClientCounter() +{ + classtype = VBS_ASP_C; + language_name = "VBScript/ASP Client"; + + file_extension.clear(); + file_extension.push_back(".*vbsaspc"); +} + +/*! +* Constructs a CVbsColdFusionCounter object. +*/ +CVbsColdFusionCounter::CVbsColdFusionCounter() +{ + classtype = VBS_CFM; + language_name = "VBScript/ColdFusion"; + + file_extension.clear(); + file_extension.push_back(".*vbscfm"); +} diff --git a/src/CVbscriptCounter.h b/src/CVbscriptCounter.h new file mode 100644 index 0000000..1120c93 --- /dev/null +++ b/src/CVbscriptCounter.h @@ -0,0 +1,109 @@ +//! Code counter class definition for the VBScript language. +/*! +* \file CVbscriptCounter.h +* +* This file contains the code counter class definition for the VBScript language. +*/ + +#ifndef CVbscriptCounter_h +#define CVbscriptCounter_h + +#include "CVbCounter.h" + +//! Visual Basic code counter class. +/*! +* \class CVbscriptCounter +* +* Defines the Visual Basic code counter class. +*/ +class CVbscriptCounter : public CVbCounter +{ +public: + CVbscriptCounter(); +}; + +//! VBScript in PHP code counter class. +/*! +* \class CVbsPhpCounter +* +* Defines the VBScript in PHP code counter class. +*/ +class CVbsPhpCounter : public CVbscriptCounter +{ +public: + CVbsPhpCounter(); +}; + +//! VBScript in HTML code counter class. +/*! +* \class CVbsHtmlCounter +* +* Defines the VBScript in HTML code counter class. +*/ +class CVbsHtmlCounter : public CVbscriptCounter +{ +public: + CVbsHtmlCounter(); +}; + +//! VBScript in XML code counter class. +/*! +* \class CVbsXmlCounter +* +* Defines the VBScript in XML code counter class. +*/ +class CVbsXmlCounter : public CVbscriptCounter +{ +public: + CVbsXmlCounter(); +}; + +//! VBScript in JSP code counter class. +/*! +* \class CVbsJspCounter +* +* Defines the VBScript in JSP code counter class. +*/ +class CVbsJspCounter : public CVbscriptCounter +{ +public: + CVbsJspCounter(); +}; + +//! VBScript in ASP server code counter class. +/*! +* \class CVbsAspServerCounter +* +* Defines the VBScript in ASP server code counter class. +*/ +class CVbsAspServerCounter : public CVbscriptCounter +{ +public: + CVbsAspServerCounter(); +}; + +//! VBScript in ASP client code counter class. +/*! +* \class CVbsAspClientCounter +* +* Defines the VBScript in ASP client code counter class. +*/ +class CVbsAspClientCounter : public CVbscriptCounter +{ +public: + CVbsAspClientCounter(); +}; + +//! VBScript in ColdFusion code counter class. +/*! +* \class CVbsColdFusionCounter +* +* Defines the VBScript in ColdFusion code counter class. +*/ +class CVbsColdFusionCounter : public CVbscriptCounter +{ +public: + CVbsColdFusionCounter(); +}; + +#endif diff --git a/src/CVerilogCounter.cpp b/src/CVerilogCounter.cpp new file mode 100644 index 0000000..c5fcec2 --- /dev/null +++ b/src/CVerilogCounter.cpp @@ -0,0 +1,889 @@ +//! Code counter class methods for the Verilog language. +/*! +* \file CVerilogCounter.cpp +* +* This file contains the code counter class methods for the Verilog hardware definition language (used in FPGA programming). +*/ + +#include "CVerilogCounter.h" +#include + +/*! +* Constructs a CCCounter object. +*/ +CVerilogCounter::CVerilogCounter() +{ + classtype = VERILOG; + language_name = "Verilog"; + + file_extension.push_back(".v"); + + LineCommentStart.push_back("//"); + BlockCommentStart.push_back("/*"); + BlockCommentEnd.push_back("*/"); + QuoteStart = "\""; + QuoteEnd = "\""; + QuoteEscapeFront = '\"'; + + directive.push_back("`define"); + directive.push_back("`include"); + directive.push_back("`ifdef"); + directive.push_back("`else"); + directive.push_back("`endif"); + directive.push_back("`timescale"); + + data_name_list.push_back("endfunction"); + data_name_list.push_back("endmodule"); + data_name_list.push_back("endtask"); + data_name_list.push_back("event"); + data_name_list.push_back("function"); + data_name_list.push_back("genvar"); + data_name_list.push_back("inout"); + data_name_list.push_back("input"); + data_name_list.push_back("integer"); + data_name_list.push_back("localparam"); + data_name_list.push_back("module"); + data_name_list.push_back("output"); + data_name_list.push_back("parameter"); + data_name_list.push_back("reg"); + data_name_list.push_back("specparam"); + data_name_list.push_back("supply0"); + data_name_list.push_back("supply1"); + data_name_list.push_back("task"); + data_name_list.push_back("time"); + data_name_list.push_back("tri"); + data_name_list.push_back("tri0"); + data_name_list.push_back("tri1"); + data_name_list.push_back("triand"); + data_name_list.push_back("trior"); + data_name_list.push_back("trireg"); + data_name_list.push_back("wand"); + data_name_list.push_back("wire"); + data_name_list.push_back("wor"); + + exec_name_list.push_back("always"); + exec_name_list.push_back("assign"); + exec_name_list.push_back("begin"); + exec_name_list.push_back("case"); + exec_name_list.push_back("casex"); + exec_name_list.push_back("casez"); + exec_name_list.push_back("deassign"); + exec_name_list.push_back("defparam"); + exec_name_list.push_back("disable"); + exec_name_list.push_back("end"); + exec_name_list.push_back("endcase"); + exec_name_list.push_back("for"); + exec_name_list.push_back("forever"); + exec_name_list.push_back("fork"); + exec_name_list.push_back("generate"); + exec_name_list.push_back("if"); + exec_name_list.push_back("else if"); + exec_name_list.push_back("else"); + exec_name_list.push_back("initial"); + exec_name_list.push_back("join"); + exec_name_list.push_back("posedge"); + exec_name_list.push_back("repeat"); + exec_name_list.push_back("wait"); + exec_name_list.push_back("while"); + + exec_name_list.push_back("$bitstoreal"); + exec_name_list.push_back("$display"); + exec_name_list.push_back("$dumpall"); + exec_name_list.push_back("$dumpfile"); + exec_name_list.push_back("$dumpflush"); + exec_name_list.push_back("$dumplimit"); + exec_name_list.push_back("$dumpoff"); + exec_name_list.push_back("$dumpon"); + exec_name_list.push_back("$dumpvar"); + exec_name_list.push_back("$dumpvars"); + exec_name_list.push_back("$fclose"); + exec_name_list.push_back("$fdisplay"); + exec_name_list.push_back("$finish"); + exec_name_list.push_back("$fmonitor"); + exec_name_list.push_back("$fopen"); + exec_name_list.push_back("$fstrobe"); + exec_name_list.push_back("$fwrite"); + exec_name_list.push_back("$itor"); + exec_name_list.push_back("$monitor"); + exec_name_list.push_back("$monitoroff"); + exec_name_list.push_back("$monitoron"); + exec_name_list.push_back("$printtimescale"); + exec_name_list.push_back("$random"); + exec_name_list.push_back("$readmemb"); + exec_name_list.push_back("$readmemh"); + exec_name_list.push_back("$realtime"); + exec_name_list.push_back("$realtobits"); + exec_name_list.push_back("$rtoi"); + exec_name_list.push_back("$scale"); + exec_name_list.push_back("$shm_open"); + exec_name_list.push_back("$shm_probe"); + exec_name_list.push_back("$stime"); + exec_name_list.push_back("$stop"); + exec_name_list.push_back("$strobe"); + exec_name_list.push_back("$time"); + exec_name_list.push_back("$timeformat"); + exec_name_list.push_back("$write"); + exec_name_list.push_back("@"); + + exec_name_list.push_back("buf"); + exec_name_list.push_back("not"); + exec_name_list.push_back("and"); + exec_name_list.push_back("or"); + exec_name_list.push_back("nand"); + exec_name_list.push_back("nor"); + exec_name_list.push_back("xor"); + exec_name_list.push_back("xnor"); + exec_name_list.push_back("bufif0"); + exec_name_list.push_back("bufif1"); + exec_name_list.push_back("notif0"); + exec_name_list.push_back("notif1"); + + cmplx_calc_list.push_back("+"); + cmplx_calc_list.push_back("-"); + cmplx_calc_list.push_back("*"); + cmplx_calc_list.push_back("/"); + cmplx_calc_list.push_back("%"); + + cmplx_cond_list.push_back("?"); + cmplx_cond_list.push_back("case"); + cmplx_cond_list.push_back("if"); + cmplx_cond_list.push_back("else"); + cmplx_cond_list.push_back("else if"); + cmplx_cond_list.push_back("for"); + cmplx_cond_list.push_back("forever"); + cmplx_cond_list.push_back("repeat"); + cmplx_cond_list.push_back("while"); + + cmplx_logic_list.push_back("!"); + cmplx_logic_list.push_back("~"); + cmplx_logic_list.push_back("&"); + cmplx_logic_list.push_back("|"); + cmplx_logic_list.push_back("^"); + cmplx_logic_list.push_back("~&"); + cmplx_logic_list.push_back("~|"); + cmplx_logic_list.push_back("~^"); + cmplx_logic_list.push_back("<<"); + cmplx_logic_list.push_back(">>"); + cmplx_logic_list.push_back("<"); + cmplx_logic_list.push_back(">"); + cmplx_logic_list.push_back(">="); + cmplx_logic_list.push_back("=="); + cmplx_logic_list.push_back("!="); + cmplx_logic_list.push_back("==="); + cmplx_logic_list.push_back("!=="); + cmplx_logic_list.push_back("^~"); + cmplx_logic_list.push_back("&&"); + cmplx_logic_list.push_back("||"); + cmplx_logic_list.push_back("<="); + + cmplx_preproc_list.push_back("`define"); + cmplx_preproc_list.push_back("`include"); + cmplx_preproc_list.push_back("`ifdef"); + cmplx_preproc_list.push_back("`else"); + cmplx_preproc_list.push_back("`endif"); + cmplx_preproc_list.push_back("`timescale"); + + cmplx_assign_list.push_back("="); + cmplx_assign_list.push_back("<="); +} + +/*! +* Counts file language complexity based on specified language keywords/characters. +* +* \param fmap list of processed file lines +* \param result counter results +* +* \return method status +*/ +int CVerilogCounter::CountComplexity(filemap* fmap, results* result) +{ + if (classtype == UNKNOWN || classtype == DATAFILE) + return 0; + filemap::iterator fit; + filemap fitBak; + filemap::iterator fitForw, fitBack; // used to check prior an later lines for semicolons + unsigned int cnt; + string line, line2; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$><=:"; + tokLocVect conditionalVector; + tokLocVect::reverse_iterator r_tlvIter; + StringVector::iterator strIter = this->cmplx_cond_list.begin(); + string buf; // have a buffer string + stringstream ss; // insert the string into a stream + int count = 0; + int condCount = 0; + StringVector tmpLogicList = cmplx_logic_list; // making a temporary list with the '<=' operator removed from the list; counting it on another pass; + tmpLogicList.pop_back(); + StringVector tmpAssignList = cmplx_assign_list; // making a temporary list with the '<=' operator removed from the list; counting it on another pass; + tmpAssignList.pop_back(); + string::iterator it; + + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + line = fit->line; + + if (CUtil::CheckBlank(line)) + continue; + + line = " " + line; + + // mathematical functions + cnt = 0; + CUtil::CountTally(line, math_func_list, cnt, 1, exclude, "", "", &result->math_func_count, casesensitive); + result->cmplx_math_lines += cnt; + + // trigonometric functions + cnt = 0; + CUtil::CountTally(line, trig_func_list, cnt, 1, exclude, "", "", &result->trig_func_count, casesensitive); + result->cmplx_trig_lines += cnt; + + // logarithmic functions + cnt = 0; + CUtil::CountTally(line, log_func_list, cnt, 1, exclude, "", "", &result->log_func_count, casesensitive); + result->cmplx_logarithm_lines += cnt; + + // calculations + cnt = 0; + CUtil::CountTally(line, cmplx_calc_list, cnt, 1, exclude, "", "", &result->cmplx_calc_count, casesensitive); + result->cmplx_calc_lines += cnt; + + // conditionals + cnt = 0; + CUtil::CountTally(line, cmplx_cond_list, cnt, 1, exclude, "", "", &result->cmplx_cond_count, casesensitive); + result->cmplx_cond_lines += cnt; + + // logical operators + cnt = 0; + //using tmpLogicList + CUtil::CountTally(line, tmpLogicList, cnt, 1, exclude, "", "", &result->cmplx_logic_count, casesensitive); + result->cmplx_logic_lines += cnt; + + // preprocessor directives + cnt = 0; + CUtil::CountTally(line, cmplx_preproc_list, cnt, 1, exclude, "", "", &result->cmplx_preproc_count, casesensitive); + result->cmplx_preproc_lines += cnt; + + // assignments + cnt = 0; + //using tmpAssignList + CUtil::CountTally(line, tmpAssignList, cnt, 1, exclude, "", "", &result->cmplx_assign_count, casesensitive); + result->cmplx_assign_lines += cnt; + + // pointers + cnt = 0; + CUtil::CountTally(line, cmplx_pointer_list, cnt, 1, exclude, "", "", &result->cmplx_pointer_count, casesensitive); + result->cmplx_pointer_lines += cnt; + } + + // do another pass since we ignored every less than or equal to symbol + // if the <= symbol appears inside parentheses count it as a less a comparison operator + // otherwise it is a signal assignment operator + count = 0; + condCount = 0; + for (fit = fmap->begin(); fit != fmap->end(); fit++) + { + line = fit->line; + line = CUtil::ToLower(line); + + if (CUtil::CheckBlank(line)) + continue; + if (line.find("(") != string::npos || line.find(")") != string::npos || line.find("<=") != string::npos || line.find("?") != string::npos || line.find(";") != string::npos) + { + // iterate through each character looking for parentheses or the conditional operator + for (it = line.begin(); it < line.end(); it++) + { + if (*it == '(') + count++; + else if (*it == ')') + count--; + else if (*it == '?') + condCount++; // found the conditional operator + else if (condCount > 0 && *it == ';') + condCount = 0; // found the end of the conditional operator + else if (*it == '=') + { + // looking for the <= operator + if (it != line.begin()) + { + if (*(it-1) == '<') + { + if (count == 0 && condCount == 0) + { + // no conditional operator and no parentheses meets conditions for this to be assignment operator + result->cmplx_assign_count.back()++; + result->cmplx_assign_lines++; + } + else + { + //anything else defaults to comparison operator + result->cmplx_logic_count.back()++; + result->cmplx_logic_lines++; + } + } + } + } + } + } + } + return 1; +} + +/*! +* Counts directive lines of code. +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVerilogCounter::CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapBak) +{ + bool contd = false, trunc_flag = false; + size_t idx, strSize; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + string strDirLine = ""; + + filemap::iterator itfmBak = fmapBak->begin(); + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++, itfmBak++) + { + if (CUtil::CheckBlank(iter->line)) + continue; + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(" " + iter->line, directive, cnt, 1, exclude, "", "", &result->directive_count); + } + + if (!contd) + { + // if not a continuation of a previous directive + for (vector::iterator viter = directive.begin(); viter != directive.end(); viter++) + { + // ensures the keyword stands alone, avoid, e.g., #ifabc + if (((idx = CUtil::FindKeyword(iter->line, *viter)) != string::npos) && idx == 0) + { + contd = true; + break; + } + } + if (contd) + { + strSize = CUtil::TruncateLine(itfmBak->line.length(), 0, this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine = itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + } + else + { + // continuation of a previous directive + strSize = CUtil::TruncateLine(itfmBak->line.length(), strDirLine.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + strDirLine += "\n" + itfmBak->line.substr(0, strSize); + result->directive_lines[PHY]++; + } + + if (contd) + { + contd = false; + if (result->addSLOC(strDirLine, trunc_flag)) + result->directive_lines[LOG]++; + iter->line = ""; + } + } + return 1; +} + +/*! +* Processes physical and logical lines according to language specific rules. +* NOTE: all the blank lines + +* whole line comments + +* lines with compiler directives +* should have been blanked from filemap by previous processing +* before reaching this function +* +* \param fmap list of processed file lines +* \param result counter results +* \param fmapBak list of original file lines (same as fmap except it contains unmodified quoted strings) +* +* \return method status +*/ +int CVerilogCounter::LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapBak) +{ + unsigned int paren_count = 0; + bool for_flag = false; + bool always_flag = false; + bool case_flag = false; + bool repeat_flag = false; + bool found_for = false; + bool found_forifwhile = false; + bool found_while = false; + char prev_char = 0; + bool data_continue = false; + string strLSLOC = ""; + string strLSLOCBak = ""; + + filemap::iterator fit, fitbak; + string line, lineBak; + StringVector loopLevel; + + unsigned int phys_exec_lines = 0; + unsigned int phys_data_lines = 0; + unsigned int temp_lines = 0; + unsigned int cnt = 0; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$"; + + for (fit = fmap->begin(), fitbak = fmapBak->begin(); fit != fmap->end(); fit++, fitbak++) + { + line = fit->line; + + // insert blank at the beginning (for searching keywords) + line = ' ' + line; + lineBak = ' ' + fitbak->line; + + // do not process blank lines + // blank line means blank_line/comment_line/directive + if (!CUtil::CheckBlank(line)) + { + LSLOC(result, line, lineBak, strLSLOC, strLSLOCBak, paren_count, for_flag, found_forifwhile, found_while, + prev_char, data_continue, temp_lines, phys_exec_lines, phys_data_lines, found_for, + loopLevel, always_flag, case_flag, repeat_flag); + + if (print_cmplx) + { + cnt = 0; + CUtil::CountTally(line, exec_name_list, cnt, 1, exclude, "", "", &result->exec_name_count); + } + + result->exec_lines[PHY] += phys_exec_lines; + phys_exec_lines = 0; + + result->data_lines[PHY] += phys_data_lines; + phys_data_lines = 0; + } + } + return 1; +} + +/*! +* Extracts and stores logical lines of code. +* Determines and extract logical SLOC to place in the result variable +* using addSLOC function. Each time the addSLOC function is called, +* a new logical SLOC is added. This function assumes that the directive +* is handled before it is called. +* +* \param result counter results +* \param line processed physical line of code +* \param lineBak original physical line of code +* \param strLSLOC processed logical string +* \param strLSLOCBak original logical string +* \param paren_cnt count of parenthesis +* \param forflag found for flag +* \param found_forifwhile found for, if, or while flag +* \param found_while found while flag +* \param prev_char previous character +* \param data_continue continuation of a data declaration line +* \param temp_lines tracks physical line count +* \param phys_exec_lines number of physical executable lines +* \param phys_data_lines number of physical data lines +* \param found_for found for loop +* \param loopLevel nested loop level +* \param always_flag found always +* \param case_flag found case +* \param repeat_flag found repeat +*/ +void CVerilogCounter::LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + bool &found_for, StringVector &loopLevel, bool &always_flag, bool &case_flag, bool &repeat_flag) +{ + // paren_cnt is used with 'for' statement only + size_t start = 0, startmax = 0; // starting index of the working string + size_t i = 0, strSize; + bool trunc_flag = false; + string exclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:"; + string dataExclude = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$:()."; // avoid double count of casts as data and executable lines (e.g. set { m_uiValue = (uint)value; } + bool found_end = false, found_endcase = false; + unsigned int cnt = 0; + bool newline = true; + + string tmp = CUtil::TrimString(strLSLOC); + size_t tmpi; + + // there may be more than 1 logical SLOC in this line + while (i < line.length()) + { + tmp = CUtil::TrimString(line.substr(start, i + 1 - start)); + if (CUtil::FindKeyword(tmp, "end") != string::npos && loopLevel.size() > 0 && loopLevel.back().compare("begin") == 0) + { + loopLevel.pop_back(); // pop begin + loopLevel.pop_back(); // pop looping + start = i + 1; + } + if ((tmpi = CUtil::FindKeyword(line.substr(start, i + 1 - start), "generate")) != string::npos) + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = start + 7 + tmpi; + found_forifwhile = false; + forflag = false; + found_for = false; + always_flag = false; + case_flag = false; + repeat_flag = false; + } + + if ((tmpi = CUtil::FindKeyword(line.substr(start, i + 1 - start), "forever")) != string::npos) + { + if (print_cmplx) + { + tmp = CUtil::TrimString(line.substr(start, i + 1 - start)); + tmp = strLSLOC + " " + tmp; + if (CUtil::FindKeyword(tmp, "begin") != string::npos && loopLevel.size() > 0 && loopLevel.back().compare("looping") == 0) + { + loopLevel.push_back("begin"); + } + else if (loopLevel.size() > 0) + { + // didn't find begin, so pop off since no longer in a looping block + loopLevel.pop_back(); + } + loopLevel.push_back("looping"); + // forever doesn't have any conditions so just add it to sloc + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "begin") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = start + 7 + tmpi; + found_forifwhile = true; + forflag = false; + found_for = false; + always_flag = false; + case_flag = false; + repeat_flag = false; + } + + switch (line[i]) + { + case ';': // LSLOC terminators + // ';' for normal executable or declaration statement + // '{' for starting a function or 'do' stmt or a block (which is counted) + // get the previous logical mark until i-1 index is the new LSLOC + // except 'do' precedes '{' + // except '}' precedes ';' ?? + // do nothing inside 'for' statement + if (found_for == true && paren_cnt > 0 && line[i] == ';') + break; + + // record open bracket for nested loop processing + if (print_cmplx) + { + tmp = CUtil::TrimString(line.substr(start, i + 1 - start)); + tmp = strLSLOC + " " + tmp; + + if (CUtil::FindKeyword(tmp, "begin") != string::npos && loopLevel.size() > 0 && loopLevel.back().compare("looping") == 0) + { + loopLevel.push_back("begin"); + } + else if (loopLevel.size() > 0 && loopLevel.back().compare("begin") != 0) // check that this isn't already in a begin block...if it is leave it alone + { + // didn't find begin, so pop off since no longer in a looping block + loopLevel.pop_back(); + } + } + // case 'while(...);', 'while(...) {', and '} while(...);' + // this case is handled in case ')' + if (found_while && found_forifwhile) + { + found_while = false; + found_forifwhile = false; + start = i + 1; + break; + } + + // check for empty statement (=1 LSLOC) + if (CUtil::TrimString(line.substr(start, i + 1 - start)) == ";" && strLSLOC.length() < 1) + { + strLSLOC = ";"; + strLSLOCBak = ";"; + } + else + { + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + { + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, dataExclude, "", "", &result->data_name_count); + + temp_lines++; + if (data_continue == true && line[i] == ';') + { + result->data_lines[LOG]++; + phys_data_lines = temp_lines; + } + else + { + if (cnt > 0 && line[i] == ';' ) + { + result->data_lines[LOG]++; + if (newline) + { + // only add a physical line once per line, otherwise a line of code might have multiple physical data and exec lines + phys_data_lines = temp_lines; + newline = false; + } + } + else + { + result->exec_lines[LOG]++; + if (newline) + { + // only add a physical line once per line, otherwise a line of code might have multiple physical data and exec lines + phys_exec_lines = temp_lines; + newline = false; + } + } + } + } + else if (data_continue == true && line[i] == ';') + phys_data_lines = temp_lines; + else + phys_exec_lines = temp_lines; + data_continue = false; + temp_lines = 0; + strLSLOC = strLSLOCBak = ""; + start = i + 1; + + // reset some flagging parameters + forflag = false; + paren_cnt = 0; + found_while = false; + found_forifwhile = false; + found_for = false; + + break; + case '(': + tmp = CUtil::TrimString(line.substr(start, i)); + if (CUtil::FindKeyword(tmp, "always") != string::npos) + { + // found always + paren_cnt++; + always_flag = true; + } + if (CUtil::FindKeyword(tmp, "case") != string::npos || CUtil::FindKeyword(tmp, "casex") != string::npos || CUtil::FindKeyword(tmp, "casez") != string::npos) + { + // found case + paren_cnt++; + case_flag = true; + } + if (forflag) + paren_cnt++; + else + { + // handle 'for', 'while', 'if', 'repeat' the same way + + if (CUtil::FindKeyword(tmp, "for") != string::npos + || CUtil::FindKeyword(tmp, "while")!= string::npos + || CUtil::FindKeyword(tmp, "if") != string::npos + || CUtil::FindKeyword(tmp, "repeat") != string::npos) + { + forflag = true; + paren_cnt++; + + if (print_cmplx) + { + tmp = CUtil::TrimString(line.substr(start, i + 1 - start)); + tmp = strLSLOC + " " + tmp; + if (CUtil::FindKeyword(tmp, "begin") != string::npos && loopLevel.size() > 0 && loopLevel.back().compare("looping") == 0) + { + loopLevel.push_back("begin"); + } + else if (loopLevel.size() > 0 && loopLevel.back().compare("begin") != 0) + { + // didn't find begin, so pop off since no longer in a looping block + loopLevel.pop_back(); + } + } + + if (CUtil::FindKeyword(tmp, "for") != string::npos) + { + if (print_cmplx) + loopLevel.push_back("looping"); + found_for = true; + } + else if (CUtil::FindKeyword(tmp, "while")!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("looping"); + found_while = true; + } + else if (CUtil::FindKeyword(tmp, "repeat")!= string::npos) + { + if (print_cmplx) + loopLevel.push_back("looping"); + repeat_flag = true; + } + + + // record nested loop level + if (print_cmplx) + { + if (CUtil::FindKeyword(tmp, "if") == string::npos) + { + unsigned int loopCnt = 0; + for (StringVector::iterator lit = loopLevel.begin(); lit < loopLevel.end(); lit++) + { + if ((*lit) != "begin") + loopCnt++; + } + if ((unsigned int)result->cmplx_nestloop_count.size() < loopCnt) + result->cmplx_nestloop_count.push_back(1); + else + result->cmplx_nestloop_count[loopCnt-1]++; + } + } + } + } + break; + case ')': + if (always_flag || case_flag || repeat_flag || forflag) + { + if (paren_cnt > 0) + paren_cnt--; + if (paren_cnt == 0) + { + // handle always @ + strSize = CUtil::TruncateLine(i + 1 - start, strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0) + { + strLSLOC += line.substr(start, strSize); + strLSLOCBak += lineBak.substr(start, strSize); + } + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + start = i + 1; + found_forifwhile = true; + forflag = false; + found_for = false; + always_flag = false; + case_flag = false; + repeat_flag = false; + } + } + break; + } + + if (line[i] != ' ' && line[i] != '\t') + { + // if ;}}} --> don't count }}} at all + // also, if {}}} --> don't count }}} at all + // if ( !(line[i] == '}' && (prev_char == ';' || prev_char == '{'))) // see case '}' above + prev_char = line[i]; + + // change to not found if a char appears before + if (line[i] != ')' && found_forifwhile) + found_forifwhile = false; + } + i++; + } + + // don't save end statements to add to next sloc, they will be counted as physical sloc but not logical + tmp = CUtil::TrimString(line.substr(start, i - start)); + if ((tmpi = CUtil::FindKeyword(tmp, "endcase")) != string::npos) + { + startmax = max((start + tmpi + 8), startmax); + found_endcase = true; + } + if ((tmpi = CUtil::FindKeyword(tmp, "endmodule")) != string::npos) + { + startmax = max((start + tmpi + 10), startmax); + } + if ((tmpi = CUtil::FindKeyword(tmp, "endtask")) != string::npos) + { + startmax = max((start + tmpi + 8), startmax); + } + if ((tmpi = CUtil::FindKeyword(tmp, "endfunction")) != string::npos) + { + startmax = max((start + tmpi + 12), startmax); + } + if ((tmpi = CUtil::FindKeyword(tmp, "end")) != string::npos) + { + startmax = max((start + tmpi + 4), startmax); + found_end = true; // this is to catch any empty begin and end statements + } + if (startmax != 0) start = min(i, startmax); // if we found and end statement update start to be the max of i and startmax + + tmp = CUtil::TrimString(line.substr(start, i - start)); + strSize = CUtil::TruncateLine(tmp.length(), strLSLOC.length(), this->lsloc_truncate, trunc_flag); + if (strSize > 0 || (strLSLOC.size() > 0 && found_end)) + { + strLSLOC += tmp.substr(0, strSize); + tmp = CUtil::TrimString(lineBak.substr(start, i - start)); + strLSLOCBak += tmp.substr(0, strSize); + + if (found_end) + { + found_end = false; + if (strLSLOC.compare(strLSLOCBak) != 0) + { + if (result->addSLOC(strLSLOCBak, trunc_flag)) + result->exec_lines[LOG]++; + strLSLOCBak = strLSLOC = ""; + phys_exec_lines = temp_lines; + temp_lines = 0; + } + } + } + + // make sure that we are not beginning to process a new data line + cnt = 0; + CUtil::CountTally(strLSLOC, data_name_list, cnt, 1, exclude, "", "", NULL); + + if (cnt > 0) + data_continue = true; + if (data_continue) + temp_lines++; + if (startmax > 0 && !found_endcase && !found_end) + phys_data_lines = 1; + else if (temp_lines == 0 && phys_data_lines == 0 && phys_exec_lines == 0) + phys_exec_lines = 1; +} diff --git a/src/CVerilogCounter.h b/src/CVerilogCounter.h new file mode 100644 index 0000000..86620bd --- /dev/null +++ b/src/CVerilogCounter.h @@ -0,0 +1,36 @@ +//! Code counter class definition for the Verilog language. +/*! +* \file CVerilogCounter.h +* +* This file contains the code counter class definition for the Verilog hardware definition language (used in FPGA programming). +*/ + +#ifndef CVerilogCounter_h +#define CVerilogCounter_h + +#include "CCodeCounter.h" +#include "CVHDLCounter.h" + +//! Verilog code counter class. +/*! +* \class CVerilogCounter +* +* Defines the Verilog code counter class. +*/ + +class CVerilogCounter : public CCodeCounter +{ +public: + CVerilogCounter(); + +protected: + virtual int CountComplexity(filemap* fmap, results* result); + virtual int CountDirectiveSLOC(filemap* fmap, results* result, filemap* fmapmBak = NULL); + virtual int LanguageSpecificProcess(filemap* fmap, results* result, filemap* fmapmBak = NULL); + void LSLOC(results* result, string line, string lineBak, string &strLSLOC, string &strLSLOCBak, unsigned int &paren_cnt, + bool &forflag, bool &found_forifwhile, bool &found_while, char &prev_char, bool &data_continue, + unsigned int &temp_lines, unsigned int &phys_exec_lines, unsigned int &phys_data_lines, + bool &found_for, StringVector &loopLevel, bool &always_flag, bool &case_flag, bool &repeat_flag); +}; + +#endif diff --git a/src/CWebCounter.cpp b/src/CWebCounter.cpp new file mode 100644 index 0000000..1c5316f --- /dev/null +++ b/src/CWebCounter.cpp @@ -0,0 +1,1098 @@ +//! Code counter class methods for web languages. +/*! +* \file CWebCounter.cpp +* +* This file contains the code counter class methods for web languages. +*/ + +#include + +#include "CWebCounter.h" + +/*! +* Constructs a CWebCounter object. +*/ +CWebCounter::CWebCounter() +{ + classtype = WEB; + + // initialize list of web language names + web_lang_names.push_back("PHP"); + web_lang_names.push_back("ASP"); + web_lang_names.push_back("JSP"); + web_lang_names.push_back("HTML"); + web_lang_names.push_back("XML"); + web_lang_names.push_back("ColdFusion"); + + // initialize language file counters + total_php_filesA = 0; + total_asp_filesA = 0; + total_jsp_filesA = 0; + total_htm_filesA = 0; + total_xml_filesA = 0; + total_cfm_filesA = 0; + total_php_filesB = 0; + total_asp_filesB = 0; + total_jsp_filesB = 0; + total_htm_filesB = 0; + total_xml_filesB = 0; + total_cfm_filesB = 0; + total_php_dupFilesA = 0; + total_asp_dupFilesA = 0; + total_jsp_dupFilesA = 0; + total_htm_dupFilesA = 0; + total_xml_dupFilesA = 0; + total_cfm_dupFilesA = 0; + total_php_dupFilesB = 0; + total_asp_dupFilesB = 0; + total_jsp_dupFilesB = 0; + total_htm_dupFilesB = 0; + total_xml_dupFilesB = 0; + total_cfm_dupFilesB = 0; + + // initialize web language file extensions + file_exten_htm.push_back(".html"); + file_exten_htm.push_back(".htm"); + file_exten_htm.push_back(".shtml"); + file_exten_htm.push_back(".shtm"); + file_exten_htm.push_back(".stm"); + file_exten_htm.push_back(".sht"); + file_exten_htm.push_back(".oth"); + file_exten_htm.push_back(".xhtml"); + file_exten_xml.push_back(".xml"); + file_exten_php.push_back(".php"); + file_exten_asp.push_back(".asp"); + file_exten_asp.push_back(".aspx"); + file_exten_jsp.push_back(".jsp"); + file_exten_cfm.push_back(".cfm"); + file_exten_cfm.push_back(".cfml"); + file_exten_cfm.push_back(".cfc"); + UpdateWebFileExt(); + + // the space to save the separated file + SourceFileElement tmp; + tmp.second.file_name = "*.*htm"; + Separation.insert(map::value_type(HTML ,tmp)); + tmp.second.file_name = "*.*htmphp"; + Separation.insert(map::value_type(HTML_PHP ,tmp)); + tmp.second.file_name = "*.*htmjsp"; + Separation.insert(map::value_type(HTML_JSP ,tmp)); + tmp.second.file_name = "*.*htmasp"; + Separation.insert(map::value_type(HTML_ASP ,tmp)); + tmp.second.file_name = "*.*htmcfm"; + Separation.insert(map::value_type(HTML_CFM ,tmp)); + tmp.second.file_name = "*.*xml"; + Separation.insert(map::value_type(XML ,tmp)); + tmp.second.file_name = "*.*jshtm"; + Separation.insert(map::value_type(JAVASCRIPT_HTML ,tmp)); + tmp.second.file_name = "*.*jsxml"; + Separation.insert(map::value_type(JAVASCRIPT_XML ,tmp)); + tmp.second.file_name = "*.*jsphp"; + Separation.insert(map::value_type(JAVASCRIPT_PHP ,tmp)); + tmp.second.file_name = "*.*jsjsp"; + Separation.insert(map::value_type(JAVASCRIPT_JSP ,tmp)); + tmp.second.file_name = "*.*jsasps"; + Separation.insert(map::value_type(JAVASCRIPT_ASP_S ,tmp)); + tmp.second.file_name = "*.*jsaspc"; + Separation.insert(map::value_type(JAVASCRIPT_ASP_C ,tmp)); + tmp.second.file_name = "*.*jscfm"; + Separation.insert(map::value_type(JAVASCRIPT_CFM ,tmp)); + tmp.second.file_name = "*.*php"; + Separation.insert(map::value_type(PHP ,tmp)); + tmp.second.file_name = "*.*sqlcfm"; + Separation.insert(map::value_type(SQL_CFM ,tmp)); + tmp.second.file_name = "*.*java"; + Separation.insert(map::value_type(JAVA_JSP ,tmp)); + tmp.second.file_name = "*.*cshtm"; + Separation.insert(map::value_type(CSHARP_HTML ,tmp)); + tmp.second.file_name = "*.*csxml"; + Separation.insert(map::value_type(CSHARP_XML ,tmp)); + tmp.second.file_name = "*.*csasps"; + Separation.insert(map::value_type(CSHARP_ASP_S ,tmp)); + tmp.second.file_name = "*.*vbshtm"; + Separation.insert(map::value_type(VBS_HTML ,tmp)); + tmp.second.file_name = "*.*vbsxml"; + Separation.insert(map::value_type(VBS_XML ,tmp)); + tmp.second.file_name = "*.*vbsphp"; + Separation.insert(map::value_type(VBS_PHP ,tmp)); + tmp.second.file_name = "*.*vbsjsp"; + Separation.insert(map::value_type(VBS_JSP ,tmp)); + tmp.second.file_name = "*.*vbsasps"; + Separation.insert(map::value_type(VBS_ASP_S ,tmp)); + tmp.second.file_name = "*.*vbsaspc"; + Separation.insert(map::value_type(VBS_ASP_C ,tmp)); + tmp.second.file_name = "*.*vbscfm"; + Separation.insert(map::value_type(VBS_CFM ,tmp)); + tmp.second.file_name = "*.*cfm"; + Separation.insert(map::value_type(COLDFUSION ,tmp)); + tmp.second.file_name = "*.*cfs"; + Separation.insert(map::value_type(CFSCRIPT ,tmp)); + + // the space to save the separated line + lineElement tmp2; + SeparatedLine.insert(map::value_type(HTML ,tmp2)); + SeparatedLine.insert(map::value_type(HTML_PHP ,tmp2)); + SeparatedLine.insert(map::value_type(HTML_JSP ,tmp2)); + SeparatedLine.insert(map::value_type(HTML_ASP ,tmp2)); + SeparatedLine.insert(map::value_type(HTML_CFM ,tmp2)); + SeparatedLine.insert(map::value_type(XML ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_HTML ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_XML ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_PHP ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_JSP ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_ASP_S ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_ASP_C ,tmp2)); + SeparatedLine.insert(map::value_type(JAVASCRIPT_CFM ,tmp2)); + SeparatedLine.insert(map::value_type(PHP ,tmp2)); + SeparatedLine.insert(map::value_type(SQL_CFM ,tmp2)); + SeparatedLine.insert(map::value_type(JAVA_JSP ,tmp2)); + SeparatedLine.insert(map::value_type(CSHARP_HTML ,tmp2)); + SeparatedLine.insert(map::value_type(CSHARP_XML ,tmp2)); + SeparatedLine.insert(map::value_type(CSHARP_ASP_S ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_HTML ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_XML ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_PHP ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_JSP ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_ASP_S ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_ASP_C ,tmp2)); + SeparatedLine.insert(map::value_type(VBS_CFM ,tmp2)); + SeparatedLine.insert(map::value_type(COLDFUSION ,tmp2)); + SeparatedLine.insert(map::value_type(CFSCRIPT ,tmp2)); + + // supported script tags + // this container contains the tags that may appear in html code to specify the script language + TagTable.insert(map::value_type(string("::value_type(string("::value_type(string("?>") ,WEB_PHP_END)); + TagTable.insert(map::value_type(string("::value_type(string("::value_type(string("<%") ,WEB_ASP_JSP_START)); + TagTable.insert(map::value_type(string("%>") ,WEB_ASP_JSP_END)); + TagTable.insert(map::value_type(string("::value_type(string(" + ScriptTable.insert(map::value_type(string("javascript"),JAVASCRIPT)); + ScriptTable.insert(map::value_type(string("jscript") ,JAVASCRIPT)); + ScriptTable.insert(map::value_type(string("c#") ,CSHARP_HTML)); + ScriptTable.insert(map::value_type(string("php") ,PHP)); + ScriptTable.insert(map::value_type(string("vb") ,VB)); +} + +/*! +* Cleans up separated language file objects. +*/ +void CWebCounter::CleanSeparatedFile() +{ + for (map::iterator iter = Separation.begin(); iter != Separation.end(); iter++) + iter->second.first.clear(); + while (!PreviousLanguage.empty()) + PreviousLanguage.pop(); +} + +/*! +* Cleans up separated language file lines. +*/ +void CWebCounter::CleanSeparatedLine() +{ + for (map::iterator iter = SeparatedLine.begin(); iter != SeparatedLine.end(); iter++) + { + iter->second.line = ""; + iter->second.lineNumber = 0; + } +} + +/*! +* Separate web files into different languages and count these +* languages using their corresponding counters. +* Names the newly created file with its embedding file's filename. +* The filename is used in the matching function for the comparison purpose. +* new file's filename = embedding file's filename + its default filename +* NOTE: the filename contains *.* in its name. +* +* \param fmap list of file lines +* \param result counter results +* +* \return method status +*/ +int CWebCounter::CountSLOC(filemap* fmap, results* result) +{ + CleanSeparatedFile(); + lineElement current_line; + lineElement separate_line; + size_t pos, close_pos, search_result, tempi, lng; + char c; + int current_language, cf_mode = -1, tag_open_mode = -1; + int script_language_of_asp = VBS_ASP_S; + int script_language_of_cfm = VBS_CFM; + bool blank_line, found; + bool first_line_of_asp = true; + size_t preLang = INVALID_POSITION; // used for writing to files + string language, tempstr; + WebType webType; + + webType = GetWebType(result->file_name); + language_name = GetWebLangName(webType); + switch (webType) + { + case WEB_ASP: + current_language = HTML_ASP; + break; + case WEB_JSP: + current_language = HTML_JSP; + break; + case WEB_PHP: + current_language = HTML_PHP; + break; + case WEB_CFM: + current_language = COLDFUSION; + cf_mode = current_language; + break; + case WEB_XML: + current_language = XML; + break; + default: + current_language = HTML; + break; + } + + for (filemap::iterator iter = fmap->begin(); iter != fmap->end(); iter++) + { + current_line.line = iter->line; + current_line.lineNumber = iter->lineNumber; + + if (current_line.line.length() == 0) + blank_line = true; + else + blank_line = false; + + while (current_line.line.length() > 0) + { + search_result = CUtil::FindStringsCaseInsensitive(current_line.line, TagTable, pos, preLang); + if (search_result != INVALID_POSITION) + preLang = search_result; + + switch (search_result) + { + case WEB_PHP_START: + { + SeparatedLine[current_language].line += current_line.line.substr(0, pos + 5); + PreviousLanguage.push(current_language); + current_language = PHP; + current_line.line = current_line.line.substr(pos + 5); + } + break; + case WEB_PHP_START2: + { + // check for = pos + 5) + { + tempstr = CUtil::ToLower(current_line.line); + if (tempstr.substr(pos + 2, 3) == "php") + tempi = 5; + } + SeparatedLine[current_language].line += current_line.line.substr(0, pos + tempi); + PreviousLanguage.push(current_language); + current_language = PHP; + current_line.line = current_line.line.substr(pos + tempi); + } + break; + case WEB_PHP_END: + { + // check for line termination (not required at an end tag), add semi-colon if none + tempstr = CUtil::TrimString(current_line.line.substr(0, pos)); + if (CUtil::CheckBlank(tempstr)) + { + filemap::iterator iter2 = Separation[current_language].first.end(); + while (CUtil::CheckBlank(tempstr)) + { + if (iter2 == Separation[current_language].first.begin()) + break; + iter2--; + tempstr = CUtil::TrimString(iter2->line); + if (iter2 == Separation[current_language].first.begin()) + break; + } + if (!CUtil::CheckBlank(tempstr)) + { + c = tempstr[tempstr.length() - 1]; + if (c != ';' && c != '{' && c != '}' && c != ':') + iter->line += ";"; + } + } + else + { + c = tempstr[tempstr.length() - 1]; + if (c != ';' && c != '{' && c != '}' && c != ':') + { + current_line.line.insert(pos, ";"); + pos++; + } + } + if (cf_mode >= 0) + current_language = cf_mode; + else + { + current_language = PreviousLanguage.top(); + PreviousLanguage.pop(); + } + } + break; + case WEB_ASP_JSP_END: + { + SeparatedLine[current_language].line += current_line.line.substr(0, pos); + if (current_language == JAVA_JSP || current_language == CSHARP_ASP_S) + { + // special process for JSP; add semicolon at the end of line if there is no semicolon + SeparatedLine[current_language].line = CUtil::TrimString(SeparatedLine[current_language].line); + lng = SeparatedLine[current_language].line.length(); + if (lng > 0 && SeparatedLine[current_language].line[lng-1] != ';') + SeparatedLine[current_language].line += ";"; + } + if (cf_mode >= 0) + current_language = cf_mode; + else + { + current_language = PreviousLanguage.top(); + PreviousLanguage.pop(); + } + + // remove %>, next line + current_line.line = current_line.line.substr(pos + 2); + } + break; + case WEB_SCRIPT_START: + { + close_pos = current_line.line.find_first_of(">", pos + 7); + SeparatedLine[current_language].line += current_line.line.substr(0, close_pos + 1); + string language = CUtil::ToLower(current_line.line.substr(pos + 7, close_pos-pos + 7)); + if (close_pos < current_line.line.size()) + current_line.line = current_line.line.substr(close_pos + 1); + else + current_line.line = ""; + + PreviousLanguage.push(current_language); + current_language = (int)CUtil::FindStringsCaseInsensitive(language, ScriptTable, pos); + + // check for