diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 9baa23f4..91cc43d6 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,6 +1,6 @@ -META MD5 67039fe64aba3375bbcf27f16984acc5 -DATA MD5 4833f5614e463ecc7989b00a29499240 -DATA: 16345157 interested lines. MARKUP: 62644 items +META MD5 a563fa1445f92b235930b2a4b82a379f +DATA MD5 ffb6d9cefac6009b3f35b77e7edf63df +DATA: 16345157 interested lines. MARKUP: 62651 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 66 414 85 @@ -29,7 +29,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .cmd 4 401 2 3 .cnf 8 858 15 36 16 .coffee 1 585 2 -.conf 60 4945 53 67 53 +.conf 60 4945 55 64 53 .config 20 492 16 38 1 .cpp 15 5688 2 61 .creds 1 10 1 1 @@ -63,7 +63,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .gd 1 37 1 .gml 3 3075 16 .gni 3 5017 19 -.go 1080 566476 692 4117 739 +.go 1080 566476 692 4123 738 .golden 5 1168 1 13 29 .gradle 45 3265 4 90 100 .graphql 7 420 13 @@ -82,7 +82,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .ipynb 1 134 5 .j 1 241 2 2 .j2 30 5530 6 186 10 -.java 621 134132 360 1366 171 +.java 621 134132 358 1368 171 .jenkinsfile 1 58 2 6 .jinja2 1 64 2 .js 659 536413 535 2489 330 @@ -91,7 +91,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .jsx 7 857 19 .jwt 1 1 2 .key 83 2737 70 14 -.kt 123 20774 67 379 3 +.kt 123 20774 65 381 3 .l 1 982 1 .las 1 6656 35 .lasso 1 230 7 @@ -124,7 +124,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .mqh 1 1023 2 .msg 1 26644 1 1 .mysql 1 36 2 -.ndjson 2 5006 69 237 2 +.ndjson 2 5006 72 239 2 .nix 4 211 12 .nolint 1 2 1 .odd 1 1281 43 @@ -134,7 +134,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .patch 4 109405 4 27 .pbxproj 1 941 2 .pem 48 1169 47 8 -.php 371 75710 128 1619 79 +.php 371 75710 129 1618 79 .pl 16 14727 6 34 .pm 3 744 7 .po 3 2994 15 @@ -158,7 +158,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .pyx 2 1094 23 .r 4 62 6 3 1 .rake 2 51 2 -.rb 860 131838 258 3311 613 +.rb 860 131838 259 3311 613 .re 1 31 1 .red 1 159 1 .release 1 13 4 @@ -171,7 +171,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .rs 31 9855 2 233 11 .rsc 1 691 1 .rsp 16 7101 19 10 28 -.rst 86 33980 70 321 68 +.rst 86 33980 69 321 68 .rules 1 6 2 .sample 2 25 3 4 4 .sbt 3 570 5 2 @@ -222,16 +222,16 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36162 545 892 380 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10259 16345157 12150 50325 5111 +TOTAL: 10259 16345157 12152 50333 5110 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- -API 128 3161 189 0 0 3350 128 0.000000 1.000000 0.963197 0.000000 +API 128 3162 189 0 0 3351 128 0.000000 1.000000 0.963208 0.000000 AWS Client ID 167 21 0 0 0 21 167 0.000000 1.000000 0.111702 0.000000 AWS Multi 75 16 0 0 0 16 75 0.000000 1.000000 0.175824 0.000000 AWS S3 Bucket 66 24 0 0 0 24 66 0.000000 1.000000 0.266667 0.000000 Atlassian Old PAT token 27 308 3 0 0 311 27 0.000000 1.000000 0.920118 0.000000 -Auth 418 2727 76 0 0 2803 418 0.000000 1.000000 0.870227 0.000000 +Auth 420 2730 76 0 0 2806 420 0.000000 1.000000 0.869808 0.000000 Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 BASE64 Private Key 7 4 0 0 0 4 7 0.000000 1.000000 0.363636 0.000000 BASE64 encoded PEM Private Key 7 0 0 0 0 0 7 1.000000 0.000000 0.000000 @@ -253,17 +253,17 @@ Grafana Provisioned API Key 22 1 0 JSON Web Token 170 61 0 0 0 61 170 0.000000 1.000000 0.264069 0.000000 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 15 6 1 0 0 7 15 0.000000 1.000000 0.318182 0.000000 -Key 3918 15693 482 0 0 16175 3918 0.000000 1.000000 0.805007 0.000000 +Key 3918 15695 482 0 0 16177 3918 0.000000 1.000000 0.805026 0.000000 Nonce 91 49 0 0 0 49 91 0.000000 1.000000 0.350000 0.000000 Other 0 8291 1 0 0 8292 0 0.000000 1.000000 PEM Private Key 1019 1483 0 0 0 1483 1019 0.000000 1.000000 0.592726 0.000000 -Password 1843 7527 2711 0 0 10238 1843 0.000000 1.000000 0.847446 0.000000 +Password 1843 7529 2711 0 0 10240 1843 0.000000 1.000000 0.847472 0.000000 Salt 45 76 2 0 0 78 45 0.000000 1.000000 0.634146 0.000000 Secret 1297 1575 799 0 0 2374 1297 0.000000 1.000000 0.646690 0.000000 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 0 0 1 4 0.000000 1.000000 0.200000 0.000000 -Token 648 4177 438 0 0 4615 648 0.000000 1.000000 0.876876 0.000000 +Token 648 4176 438 0 0 4614 648 0.000000 1.000000 0.876853 0.000000 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 -URL Credentials 208 145 225 0 0 370 208 0.000000 1.000000 0.640138 0.000000 +URL Credentials 208 146 224 0 0 370 208 0.000000 1.000000 0.640138 0.000000 UUID 1069 265 0 0 0 265 1069 0.000000 1.000000 0.198651 0.000000 - 12150 50325 5111 0 0 0 50325 12150 0.000000 1.000000 0.805522 0.000000 + 12152 50333 5110 0 0 0 50333 12152 0.000000 1.000000 0.805521 0.000000 diff --git a/download_data.py b/download_data.py index fae7462e..9f589e31 100644 --- a/download_data.py +++ b/download_data.py @@ -398,6 +398,9 @@ def gen_random_value(value): if '-' == i and len(value) in (18, 36) and n in (8, 13, 18, 23): # UUID separator or something like this continue + if ':' == i and 2 == n % 3: + # wifi key like 7f:44:52:fe: ... + continue if hex_upper and i not in "0123456789ABCDEF": hex_upper = False if hex_lower and i not in "0123456789abcdef": diff --git a/meta/1ce69180.csv b/meta/1ce69180.csv index 3bb76f7b..7ae31a8a 100644 --- a/meta/1ce69180.csv +++ b/meta/1ce69180.csv @@ -629,3 +629,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 114065,5e754a57,GitHub,1ce69180,data/1ce69180/src/5e754a57.rb,30,30,T,F,50,80,F,F,,,,,0.0,-1,F,F,F,Auth 114066,fcf0ceac,GitHub,1ce69180,data/1ce69180/src/fcf0ceac.txt,2,2,F,F,,,F,F,,,,,0.00,,F,F,F,Auth 131923,1e5305b3,GitHub,1ce69180,data/1ce69180/src/1e5305b3.rb,36,36,F,F,90,330,F,F,,,,,0.0,-1,F,F,F,JSON Web Token +1479368,560ba91b,GitHub,1ce69180,data/1ce69180/src/560ba91b.rb,14,14,T,F,116,124,F,F,,,,,0.0,0,F,F,F,Auth diff --git a/meta/28728ab4.csv b/meta/28728ab4.csv index d535ae39..628dd4b5 100644 --- a/meta/28728ab4.csv +++ b/meta/28728ab4.csv @@ -838,3 +838,5 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 1341508,76a6fc48,GitHub,28728ab4,data/28728ab4/other/76a6fc48.md,83,83,F,F,40,83,F,F,,,,,0.0,0,F,F,F,Credential 1341509,d3e2cec1,GitHub,28728ab4,data/28728ab4/src/d3e2cec1.toml,41,41,F,F,40,83,F,F,,,,,0.0,0,F,F,F,Credential 1341513,56575ef0,GitHub,28728ab4,data/28728ab4/test/56575ef0.go,89,89,F,F,17,85,F,F,,,,,0.0,0,F,F,F,API +1479369,2ac10c69,GitHub,28728ab4,data/28728ab4/src/2ac10c69.go,70,70,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth +1479370,bf78cace,GitHub,28728ab4,data/28728ab4/src/bf78cace.go,30,30,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth diff --git a/meta/2df212a2.csv b/meta/2df212a2.csv index 6ce9325c..9af53953 100644 --- a/meta/2df212a2.csv +++ b/meta/2df212a2.csv @@ -61,7 +61,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 30545,6ac10689,GitHub,2df212a2,data/2df212a2/src/6ac10689.rst,120,120,Template,F,26,30,F,F,Any,,,Secret,2.0,4,F,F,F,Password:Key:Secret 30547,6ac10689,GitHub,2df212a2,data/2df212a2/src/6ac10689.rst,124,124,Template,F,26,30,F,F,Any,,,Secret,2.0,4,F,F,F,Password:Key:Secret 30548,7824b4fc,GitHub,2df212a2,data/2df212a2/test/7824b4fc.py,28,28,Template,F,62,68,F,F,Any,,,Secret,2.58,6,F,F,F,Credential:Key -31125,6f0a498c,GitHub,2df212a2,data/2df212a2/test/6f0a498c.rst,25,25,T,T,14,34,F,F,Any,,AWS Client ID,Unknown,3.68,20,F,F,F,AWS Client ID:Gitlab Feed Token:Key +31125,6f0a498c,GitHub,2df212a2,data/2df212a2/test/6f0a498c.rst,25,25,T,T,14,34,F,F,Any,,AWS Client ID,Unknown,3.68,20,F,F,F,AWS Client ID:Gitlab Feed Token 32689,7824b4fc,GitHub,2df212a2,data/2df212a2/test/7824b4fc.py,68,68,Template,T,35,40,F,F,CharsOnly,,,Token,2.32,5,F,F,F,Token 32690,6ac10689,GitHub,2df212a2,data/2df212a2/src/6ac10689.rst,62,62,F,F,26,39,F,F,,,,,0.0,0,F,F,F,Token 36742,7824b4fc,GitHub,2df212a2,data/2df212a2/test/7824b4fc.py,93,93,F,F,30,35,F,F,,,,,0.0,0,F,F,F,Token diff --git a/meta/35c273e2.csv b/meta/35c273e2.csv index 70de45c3..a434fc24 100644 --- a/meta/35c273e2.csv +++ b/meta/35c273e2.csv @@ -15,7 +15,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 84944,7849399f,GitHub,35c273e2,data/35c273e2/src/7849399f.java,155,155,F,F,36,40,F,F,,,,,2.0,4,F,F,F,Credential 84945,9c5fb3d1,GitHub,35c273e2,data/35c273e2/src/9c5fb3d1.xml,12,12,F,F,169,189,F,F,,,,,3.17,20,F,F,F,Credential 84946,29edcedd,GitHub,35c273e2,data/35c273e2/test/29edcedd.java,27,27,T,F,45,81,F,F,,,,,3.51,36,F,F,F,Token:UUID -84947,470893ef,GitHub,35c273e2,data/35c273e2/test/470893ef.java,27,27,F,F,26,37,F,F,,,,,2.6,11,F,F,F,Password +84947,470893ef,GitHub,35c273e2,data/35c273e2/test/470893ef.java,27,27,F,F,26,35,F,F,,,,,2.6,11,F,F,F,Password 84948,6e4997a7,GitHub,35c273e2,data/35c273e2/test/6e4997a7.groovy,34,34,T,F,48,84,F,F,,,,,3.29,36,F,F,F,Token:UUID 84949,805bb559,GitHub,35c273e2,data/35c273e2/test/805bb559.java,124,124,T,F,54,67,F,F,,,,,2.74,13,F,F,F,Credential 84950,d1839abf,GitHub,35c273e2,data/35c273e2/test/d1839abf.java,90,90,F,F,29,34,F,F,,,,,1.46,5,F,F,F,Auth diff --git a/meta/48fd3902.csv b/meta/48fd3902.csv index 570a1042..39f18a8e 100644 --- a/meta/48fd3902.csv +++ b/meta/48fd3902.csv @@ -129,8 +129,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 86772,6ae2e7e4,GitHub,48fd3902,data/48fd3902/test/6ae2e7e4.java,30,30,F,F,42,53,F,F,,,,,2.96,11,F,F,F,Password 86773,f23339e5,GitHub,48fd3902,data/48fd3902/test/f23339e5.java,41,41,F,F,13,23,F,F,,,,,2.59,10,F,F,F,Nonce 86774,73bf4bdd,GitHub,48fd3902,data/48fd3902/test/73bf4bdd.java,15,15,F,F,17,21,F,F,,,,,2.0,4,F,F,F,Key -86775,73bf4bdd,GitHub,48fd3902,data/48fd3902/test/73bf4bdd.java,336,336,T,F,11,70,F,F,,,,,2.99,59,F,F,F,Key -86776,73bf4bdd,GitHub,48fd3902,data/48fd3902/test/73bf4bdd.java,383,383,T,F,11,70,F,F,,,,,2.83,59,F,F,F,Key +86775,73bf4bdd,GitHub,48fd3902,data/48fd3902/test/73bf4bdd.java,336,336,F,F,11,70,F,F,,,,,2.99,59,F,F,F,Key +86776,73bf4bdd,GitHub,48fd3902,data/48fd3902/test/73bf4bdd.java,383,383,F,F,11,70,F,F,,,,,2.83,59,F,F,F,Key 86777,73bf4bdd,GitHub,48fd3902,data/48fd3902/test/73bf4bdd.java,420,420,F,F,23,31,F,F,,,,,2.5,8,F,F,F,Key 86778,7cd2d29d,GitHub,48fd3902,data/48fd3902/test/7cd2d29d.java,812,812,F,F,81,90,F,F,,,,,2.6,9,F,F,F,Credential:Certificate 86779,839267fb,GitHub,48fd3902,data/48fd3902/test/839267fb.kt,11,11,F,F,57,66,F,F,,,,,2.6,9,F,F,F,Credential @@ -153,8 +153,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 86796,c3d109ca,GitHub,48fd3902,data/48fd3902/test/c3d109ca.java,359,359,F,F,70,82,F,F,,,,,2.79,12,F,F,F,Password 86797,c6175528,GitHub,48fd3902,data/48fd3902/test/c6175528.kt,296,296,F,F,31,50,F,F,,,,,3.17,19,F,F,F,Password 86798,c709b566,GitHub,48fd3902,data/48fd3902/test/c709b566.kt,15,15,F,F,23,27,F,F,,,,,2.0,4,F,F,F,Key -86799,c709b566,GitHub,48fd3902,data/48fd3902/test/c709b566.kt,321,321,T,F,17,76,F,F,,,,,2.99,59,F,F,F,Key -86800,c709b566,GitHub,48fd3902,data/48fd3902/test/c709b566.kt,367,367,T,F,17,76,F,F,,,,,2.83,59,F,F,F,Key +86799,c709b566,GitHub,48fd3902,data/48fd3902/test/c709b566.kt,321,321,F,F,17,76,F,F,,,,,2.99,59,F,F,F,Key +86800,c709b566,GitHub,48fd3902,data/48fd3902/test/c709b566.kt,367,367,F,F,17,76,F,F,,,,,2.83,59,F,F,F,Key 86801,c709b566,GitHub,48fd3902,data/48fd3902/test/c709b566.kt,402,402,F,F,29,37,F,F,,,,,2.5,8,F,F,F,Key 86802,cc3a23ed,GitHub,48fd3902,data/48fd3902/test/cc3a23ed.kt,68,68,F,F,33,46,F,F,,,,,2.9,13,F,F,F,Password 86803,d4ef4fe5,GitHub,48fd3902,data/48fd3902/test/d4ef4fe5.java,119,119,F,F,35,44,F,F,,,,,2.24,9,F,F,F,Credential diff --git a/meta/60f9915d.csv b/meta/60f9915d.csv index f0f7d2a9..a23a531b 100644 --- a/meta/60f9915d.csv +++ b/meta/60f9915d.csv @@ -86,7 +86,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 82149,4b667fea,GitHub,60f9915d,data/60f9915d/src/4b667fea.go,1176,1176,F,F,22,41,F,F,,,,,0.0,0,F,F,F,Password 82150,4b667fea,GitHub,60f9915d,data/60f9915d/src/4b667fea.go,1316,1316,F,F,22,41,F,F,,,,,0.0,0,F,F,F,Password 82151,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,186,186,Template,T,40,46,F,F,CharsOnly,,,Secret,2.25,6,F,F,F,Password -82152,373f5578,GitHub,60f9915d,data/60f9915d/test/373f5578.go,51,51,Template,F,22,25,T,F,CharsOnly,,,Secret,1.58,3,F,F,F,URL Credentials +82152,373f5578,GitHub,60f9915d,data/60f9915d/test/373f5578.go,51,51,F,F,,,T,F,CharsOnly,,,Secret,1.58,3,F,F,F,URL Credentials:Password 100788,9870bda5,GitHub,60f9915d,data/60f9915d/src/9870bda5.sum,174,174,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Other 100789,9870bda5,GitHub,60f9915d,data/60f9915d/src/9870bda5.sum,175,175,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Other 110372,9870bda5,GitHub,60f9915d,data/60f9915d/src/9870bda5.sum,241,241,F,F,,,F,F,,,,,0.00,,F,F,F,Auth diff --git a/meta/6c73b80a.csv b/meta/6c73b80a.csv index 51571279..3faa71d3 100644 --- a/meta/6c73b80a.csv +++ b/meta/6c73b80a.csv @@ -350,8 +350,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 35752,728925ac,GitHub,6c73b80a,data/6c73b80a/test/728925ac.go,2913,2913,F,F,17,25,F,F,,,,,0.0,0,F,F,F,Other 35770,94ae447a,GitHub,6c73b80a,data/6c73b80a/src/94ae447a.go,696,696,F,F,,,F,F,,,,,0,0,F,F,F,Password:URL Credentials 35884,edc1c568,GitHub,6c73b80a,data/6c73b80a/test/edc1c568.go,540,540,T,F,14,70,F,F,Any,,,Unknown,4.63,56,F,F,F,Key -35885,7f0ec905,GitHub,6c73b80a,data/6c73b80a/src/7f0ec905.conf,26,26,F,F,16,72,F,F,Any,,,Unknown,4.6,56,F,F,F,Token:Key -35886,7f0ec905,GitHub,6c73b80a,data/6c73b80a/src/7f0ec905.conf,41,41,F,F,16,72,F,F,Any,,,Unknown,4.57,56,F,F,F,Key +35885,7f0ec905,GitHub,6c73b80a,data/6c73b80a/src/7f0ec905.conf,26,26,T,F,16,72,F,F,Any,,,Unknown,4.6,56,F,F,F,Key +35886,7f0ec905,GitHub,6c73b80a,data/6c73b80a/src/7f0ec905.conf,41,41,T,F,16,72,F,F,Any,,,Unknown,4.57,56,F,F,F,Key 35891,edc1c568,GitHub,6c73b80a,data/6c73b80a/test/edc1c568.go,573,573,T,F,18,74,F,F,Any,,,Unknown,4.6,56,F,F,F,Key 35896,88763956,GitHub,6c73b80a,data/6c73b80a/test/88763956.go,3587,3587,T,T,51,55,T,F,CharsOnly,,,Secret,1.5,4,F,F,F,Password:URL Credentials 35897,7708ebf0,GitHub,6c73b80a,data/6c73b80a/test/7708ebf0.go,5119,5119,T,T,47,55,T,F,CharsOnly,,,Secret,2.75,8,F,F,F,URL Credentials diff --git a/meta/81cd05d0.csv b/meta/81cd05d0.csv index 8742a828..ca70d2d4 100644 --- a/meta/81cd05d0.csv +++ b/meta/81cd05d0.csv @@ -5238,7 +5238,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 1341473,970588b6,GitHub,81cd05d0,data/81cd05d0/src/970588b6.json,52671,52671,T,F,23,59,F,F,,,,,0.0,0,F,F,F,UUID:Key 1341474,970588b6,GitHub,81cd05d0,data/81cd05d0/src/970588b6.json,52674,52674,T,F,23,59,F,F,,,,,0.0,0,F,F,F,UUID:Key 1341475,970588b6,GitHub,81cd05d0,data/81cd05d0/src/970588b6.json,52677,52677,T,F,23,59,F,F,,,,,0.0,0,F,F,F,UUID:Key -1341476,970588b6,GitHub,81cd05d0,data/81cd05d0/src/970588b6.json,52680,52680,T,F,23,59,F,F,,,,,0.0,0,F,F,F,UUID:Key +1341476,970588b6,GitHub,81cd05d0,data/81cd05d0/src/970588b6.json,52680,52680,T,F,23,59,F,F,,,,id_only,0.0,0,F,F,F,UUID:Key 1341477,a84a47e9,GitHub,81cd05d0,data/81cd05d0/src/a84a47e9.json,3031,3031,T,F,1076,1112,F,F,,,,,0.0,0,F,F,F,UUID:Token 1341478,a84a47e9,GitHub,81cd05d0,data/81cd05d0/src/a84a47e9.json,3080,3080,T,F,1152,1188,F,F,,,,,0.0,0,F,F,F,UUID:Token 1341479,a84a47e9,GitHub,81cd05d0,data/81cd05d0/src/a84a47e9.json,4518,4518,T,F,1076,1112,F,F,,,,,0.0,0,F,F,F,UUID:Token diff --git a/meta/894e3377.csv b/meta/894e3377.csv index 46a24725..ed930dd6 100644 --- a/meta/894e3377.csv +++ b/meta/894e3377.csv @@ -187,7 +187,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 36114,40a8ca9e,GitHub,894e3377,data/894e3377/test/40a8ca9e.php,647,647,Template,F,17,21,F,F,CharsOnly,,,Unknown,2.0,4,F,F,F,Password 36170,40a8ca9e,GitHub,894e3377,data/894e3377/test/40a8ca9e.php,1050,1050,F,T,16,581,F,F,Any,,,Public,5.86,557,F,F,F,Key 36175,ac94d981,GitHub,894e3377,data/894e3377/test/ac94d981.php,504,504,Template,T,16,109,F,F,Any,,,Unknown,4.96,85,F,F,F,Key -36180,dc238167,GitHub,894e3377,data/894e3377/test/dc238167.php,84,84,F,F,16,68,F,F,,,,,0.0,0,F,F,F,Key +36180,dc238167,GitHub,894e3377,data/894e3377/test/dc238167.php,84,84,T,F,16,68,F,F,,,,,0.0,0,F,F,F,Key 36183,ac94d981,GitHub,894e3377,data/894e3377/test/ac94d981.php,484,484,Template,T,16,189,F,F,Any,,,Unknown,5.48,165,F,F,F,Key 37575,40a8ca9e,GitHub,894e3377,data/894e3377/test/40a8ca9e.php,690,690,Template,F,17,21,F,F,CharsOnly,,,Unknown,2.0,4,F,F,F,Password 37576,40a8ca9e,GitHub,894e3377,data/894e3377/test/40a8ca9e.php,731,731,Template,F,17,21,F,F,CharsOnly,,,Unknown,2.0,4,F,F,F,Password diff --git a/meta/8cda00f3.csv b/meta/8cda00f3.csv index c5474f1a..e9a64b2f 100644 --- a/meta/8cda00f3.csv +++ b/meta/8cda00f3.csv @@ -914,3 +914,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 135314,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,300,300,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth 1340770,4cf2897e,GitHub,8cda00f3,data/8cda00f3/src/4cf2897e.go,16,16,T,F,22,58,F,F,,,,,0.0,0,F,F,F,UUID 1479363,0a7921b3,GitHub,8cda00f3,data/8cda00f3/test/0a7921b3.go,155,155,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth +1479364,8bb83972,GitHub,8cda00f3,data/8cda00f3/src/8bb83972.go,146,146,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth diff --git a/meta/a09d9e50.csv b/meta/a09d9e50.csv index fb195bc8..92a5dc35 100644 --- a/meta/a09d9e50.csv +++ b/meta/a09d9e50.csv @@ -191,7 +191,9 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 75547,9c72cd5b,GitHub,a09d9e50,data/a09d9e50/src/9c72cd5b.ts,8361,8361,F,F,,,F,F,,,,,0,0,F,F,F,Token 76913,c747d0a2,GitHub,a09d9e50,data/a09d9e50/src/c747d0a2.js,264,264,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key 76914,c747d0a2,GitHub,a09d9e50,data/a09d9e50/src/c747d0a2.js,248,248,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key -76957,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,4554,4554,F,F,,,F,F,,,,,0,0,F,F,F,Secret:Key +76957,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,4554,4554,T,F,367,379,F,F,,,,,0,0,F,F,F,Key +1076957,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,4554,4554,T,F,1013,1025,F,F,,,,,0,0,F,F,F,Key +1276957,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,4554,4554,F,F,1733,,F,F,,,,,0,0,F,F,F,Key:Secret 76958,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,257,257,F,F,,,F,F,,,,,0,0,F,F,F,Password 76959,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,1100,1100,F,F,,,F,F,,,,,0,0,F,F,F,Atlassian Old PAT token 76960,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,441,441,F,F,,,F,F,,,,,0,0,F,F,F,API @@ -362,8 +364,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 1138239,c419ffed,GitHub,a09d9e50,data/a09d9e50/test/c419ffed.js,798,798,T,F,55,67,F,F,,,,,0.0,0,F,F,F,Auth 1138240,c419ffed,GitHub,a09d9e50,data/a09d9e50/test/c419ffed.js,809,809,T,F,56,68,F,F,,,,,0.0,0,F,F,F,Auth 1138241,c419ffed,GitHub,a09d9e50,data/a09d9e50/test/c419ffed.js,820,820,T,F,56,68,F,F,,,,,0.0,0,F,F,F,Auth -1130094,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,3391,3391,T,F,1349,1393,F,F,,,,,0.0,0,F,F,F,Token -1130095,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,2435,2435,T,F,5075,5111,F,F,,,,,0.0,0,F,F,F,Auth:Token +1130094,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,3391,3391,T,F,1349,1393,F,F,,,,,0.0,0,F,F,F,Auth:Token +1130095,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,2435,2435,T,F,5075,5172,F,F,,,,,0.0,0,F,F,F,Auth:Token 1341516,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,2435,2435,F,F,5256,5264,F,F,,,,,0.0,0,F,F,F,Password 1238223,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,355,355,Template,F,324,364,F,F,,,,,0.0,0,F,F,F,Password 1338370,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,3413,3413,F,F,1918,1926,F,F,,,,,0.0,0,F,F,F,Password @@ -401,3 +403,5 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 1341503,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,2123,2123,T,F,1248,1284,F,F,,,,,0.0,0,F,F,F,UUID 1341538,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,4301,4301,T,F,401,437,F,F,,,,,0.0,0,F,F,F,UUID 1341539,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,4301,4301,T,F,542,578,F,F,,,,,0.0,0,F,F,F,UUID +1479367,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,727,727,F,F,,,F,F,,,,,0.0,0,F,F,F,Password +1479371,054074fe,GitHub,a09d9e50,data/a09d9e50/test/054074fe.ndjson,1675,1675,F,F,,,F,F,,,,,0.0,0,F,F,F,API diff --git a/meta/a0cd6261.csv b/meta/a0cd6261.csv index 27e68a01..61551a27 100644 --- a/meta/a0cd6261.csv +++ b/meta/a0cd6261.csv @@ -125,7 +125,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 31483,c7caa3f3,GitHub,a0cd6261,data/a0cd6261/other/c7caa3f3.md,227,227,T,F,13,45,F,F,CharsOnly,,,Secret,0.0,32,F,F,F,Credential 31515,3b98604a,GitHub,a0cd6261,data/a0cd6261/other/3b98604a.md,2179,2179,F,F,,,F,F,,,,,0,0,F,F,F,Bitbucket Client ID:Bitbucket Client Secret 31732,919b979a,GitHub,a0cd6261,data/a0cd6261/other/919b979a.md,299,299,Template,T,109,117,F,F,Any,,,Secret,2.75,8,F,F,F,Password -31738,e955e6ae,GitHub,a0cd6261,data/a0cd6261/other/e955e6ae.md,84,84,T,F,127,132,F,F,Any,,,Secret,1.39,5,F,F,F,Password +31738,e955e6ae,GitHub,a0cd6261,data/a0cd6261/other/e955e6ae.md,84,84,T,F,127,145,F,F,Any,,,Secret,1.39,5,F,F,F,Password 31749,3b98604a,GitHub,a0cd6261,data/a0cd6261/other/3b98604a.md,1261,1261,T,T,62,72,F,F,Any,,,Secret,2.79,10,F,F,F,Password 32342,c4cf4dfc,GitHub,a0cd6261,data/a0cd6261/other/c4cf4dfc.md,98,98,T,F,27,62,T,F,Any,,,Token,3.59,35,F,F,F,Key:Token 32624,837f5751,GitHub,a0cd6261,data/a0cd6261/other/837f5751.md,418,418,T,T,138,159,F,F,Any,,,Secret,3.38,21,F,F,F,Key:Password diff --git a/meta/b356a0cd.csv b/meta/b356a0cd.csv index db9b39e7..a9919a8d 100644 --- a/meta/b356a0cd.csv +++ b/meta/b356a0cd.csv @@ -129,4 +129,5 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 134325,3ec69825,GitHub,b356a0cd,data/b356a0cd/src/3ec69825.go,120,120,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key 138054,7b7ea72b,GitHub,b356a0cd,data/b356a0cd/src/7b7ea72b.sh,39,39,F,F,64,84,F,F,,,,,0.0,0,F,F,F,Password 1340836,25426921,GitHub,b356a0cd,data/b356a0cd/other/25426921.md,1,1,T,F,73,109,F,F,,,,,0.0,0,F,F,F,UUID -1381101,41240813,GitHub,b356a0cd,data/b356a0cd/src/41240813.go,435,435,F,F,18,37,F,F,,,,,0.0,0,F,F,F,Key +1381101,41240813,GitHub,b356a0cd,data/b356a0cd/src/41240813.go,435,435,F,F,,,F,F,,,,,0.0,0,F,F,F,Key +1479365,41240813,GitHub,b356a0cd,data/b356a0cd/src/41240813.go,432,432,F,F,,,F,F,,,,,0.0,0,F,F,F,Key diff --git a/meta_cred.py b/meta_cred.py index d1175101..b96cf871 100644 --- a/meta_cred.py +++ b/meta_cred.py @@ -12,10 +12,11 @@ def __init__(self, cs_cred: dict): line_data_list = cs_cred["line_data_list"] line_data_list.sort(key=lambda x: (x["line_num"], x["value_start"], x["value_end"])) path = Path(line_data_list[0]["path"]) - self.path = '/'.join([str(x) for x in path.parts[-4:]]) - if not self.path.startswith('data/'): - # license files ... - self.path = '/'.join([str(x) for x in path.parts[-3:]]) + assert path.parts.count("data") == 1, f"Only one 'data' dir must be in path:{path}" + for n, i in enumerate(path.parts): + if "data" == i: + self.path = '/'.join([str(x) for x in path.parts[n:]]) + break # path for benchmark must start from "data/" assert self.path.startswith('data/'), cs_cred self.valid_path = bool(self.valid_path_regex.match(self.path)) # to skip license files