-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathget_sample_features.py
68 lines (53 loc) · 2.21 KB
/
get_sample_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*-
import argparse
from lib.io_utils import *
from lib.math_utils import *
from lib.processing_utils import *
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
import numpy as np
import os
import sys
# input
parser = argparse.ArgumentParser()
parser.add_argument('-in', dest="INPUT_FILE", default="tmp/ia_fedflixnara.csv", help="Input file")
parser.add_argument('-dir', dest="SAMPLE_FILE_DIRECTORY", default="tmp/ia_fedflixnara_samples/", help="Directory to where the .csv files with sample data is found")
parser.add_argument('-out', dest="OUTPUT_FILE", default="", help="File to write results to. Leave blank to update the input file")
parser.add_argument('-threads', dest="THREADS", default=3, type=int, help="Number of concurrent threads, -1 for all available")
a = parser.parse_args()
OUTPUT_FILE = a.OUTPUT_FILE if len(a.OUTPUT_FILE) > 0 else a.INPUT_FILE
THREADS = getThreadCount(a.THREADS)
# get files
fieldNames, rows = readCsv(a.INPUT_FILE)
rowCount = len(rows)
FEATURES_TO_ADD = ["samples", "medianPower", "medianHz", "medianClarity", "medianDur"]
for f in FEATURES_TO_ADD:
if f not in set(fieldNames):
fieldNames.append(f)
filenames = [(i, a.SAMPLE_FILE_DIRECTORY + r["filename"]+".csv") for i, r in enumerate(rows)]
progress = 0
def getSampleFeatures(p):
global FEATURES_TO_ADD
global progress
global rowCount
result = dict([(f, -1) for f in FEATURES_TO_ADD])
i, fn = p
if os.path.isfile(fn):
_, samples = readCsv(fn, verbose=False)
result["samples"] = len(samples)
if len(samples) > 0:
result["medianPower"] = round(np.median([s["power"] for s in samples]), 2)
result["medianHz"] = round(np.median([s["hz"] for s in samples]), 2)
result["medianClarity"] = round(np.median([s["clarity"] for s in samples]), 2)
result["medianDur"] = round(np.median([s["dur"] for s in samples]), 2)
progress += 1
printProgress(progress, rowCount)
return (i, result)
pool = ThreadPool(THREADS)
results = pool.map(getSampleFeatures, filenames)
pool.close()
pool.join()
# Update rows
for i, r in results:
rows[i].update(r)
writeCsv(OUTPUT_FILE, rows, headings=fieldNames)