-
Notifications
You must be signed in to change notification settings - Fork 4
/
stats.py
executable file
·27 lines (25 loc) · 1004 Bytes
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python3
import sys
import pandas as pd
from io import StringIO
import glob
import subprocess
def compute_stats(dataset, extension):
all_lines = pd.DataFrame()
for before_file in glob.glob(f"{dataset}/before/**/*." + extension, recursive = True):
after_file = f"{dataset}/after" + before_file[len(f"{dataset}/before"):]
ps = subprocess.Popen(('diff', '-u', before_file, after_file), stdout=subprocess.PIPE)
output = subprocess.check_output(('diffstat', '-t'), stdin=ps.stdout)
ps.wait()
csv_string = StringIO(output.decode('UTF-8'))
line = pd.read_csv(csv_string, sep=",")
line["FILENAME"] = before_file
if all_lines.empty:
all_lines = line
else:
all_lines = pd.concat([all_lines, line], ignore_index=True)
all_lines.to_csv(f"{dataset}-sizes.csv", index=False)
if __name__ == '__main__':
dataset = sys.argv[1]
extension = sys.argv[2]
compute_stats(dataset, extension)