-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsamples_to_features.py
113 lines (94 loc) · 3.23 KB
/
samples_to_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
import argparse
import csv
from lib.audio_utils import *
from lib.io_utils import *
from lib.math_utils import *
from lib.processing_utils import *
import librosa
from matplotlib import pyplot as plt
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
import os
import numpy as np
from pprint import pprint
import sys
# input
parser = argparse.ArgumentParser()
parser.add_argument('-in', dest="INPUT_FILE", default="tmp/samples.csv", help="Input file")
parser.add_argument('-dir', dest="MEDIA_DIRECTORY", default="media/sample/", help="Input file")
parser.add_argument('-out', dest="OUTPUT_FILE", default="", help="CSV output file")
parser.add_argument('-append', dest="APPEND", default=1, type=int, help="Append to existing data?")
parser.add_argument('-overwrite', dest="OVERWRITE", action="store_true", help="Overwrite existing data?")
parser.add_argument('-plot', dest="PLOT", action="store_true", help="Show plot?")
parser.add_argument('-threads', dest="THREADS", default=4, type=int, help="Number of threads")
args = parser.parse_args()
# Parse arguments
INPUT_FILE = args.INPUT_FILE
MEDIA_DIRECTORY = args.MEDIA_DIRECTORY
OUTPUT_FILE = args.OUTPUT_FILE if len(args.OUTPUT_FILE) > 0 else args.INPUT_FILE
APPEND = args.APPEND > 0
OVERWRITE = args.OVERWRITE
PLOT = args.PLOT
THREADS = args.THREADS
FEATURES_TO_ADD = ["power", "hz", "clarity", "note", "octave"]
# Read files
rows = []
fieldNames, rows = readCsv(INPUT_FILE)
rowCount = len(rows)
print("Found %s rows" % rowCount)
# Check if file exists already
if os.path.isfile(OUTPUT_FILE) and not OVERWRITE and not APPEND:
print("%s already exists. Skipping." % OUTPUT_FILE)
sys.exit()
if APPEND and set(FEATURES_TO_ADD).issubset(set(fieldNames)) and not OVERWRITE:
print("Headers already exists in %s. Skipping." % OUTPUT_FILE)
sys.exit()
for i, row in enumerate(rows):
rows[i]["path"] = MEDIA_DIRECTORY + row["filename"]
# Make sure output dirs exist
makeDirectories(OUTPUT_FILE)
# find unique filepaths
filepaths = list(set([row["path"] for row in rows]))
params = [{
"samples": [row for row in rows if row["path"]==fp],
"path": fp
} for fp in filepaths]
fileCount = len(params)
def samplesToFeatures(p):
fn = p["path"]
samples = p["samples"]
features = getFeaturesFromSamples(fn, samples)
return features
# files = files[:1]
# for p in params:
# samplesToFeatures(p)
# sys.exit(1)
threads = getThreadCount(THREADS)
pool = ThreadPool(threads)
data = pool.map(samplesToFeatures, params)
pool.close()
pool.join()
# flatten data
data = [item for sublist in data for item in sublist]
headings = fieldNames[:]
for feature in FEATURES_TO_ADD:
if feature not in headings:
headings.append(feature)
writeCsv(OUTPUT_FILE, data, headings)
if PLOT:
plt.figure(figsize = (10,6))
pows = [d["power"] for d in data]
hzs = [d["hz"] for d in data]
flts = [d["clarity"] for d in data]
ax = plt.subplot(1, 3, 1)
ax.set_title("Power distribution")
plt.hist(pows, bins=50)
ax = plt.subplot(1, 3, 2)
ax.set_title("Frequency distribution")
plt.hist(hzs, bins=50)
ax = plt.subplot(1, 3, 3)
ax.set_title("Clarity distribution")
plt.hist(flts, bins=50)
plt.tight_layout()
plt.show()