-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathhist
executable file
·81 lines (73 loc) · 2.42 KB
/
hist
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/awk -f
#
# Copyright (c) 2012,2014 Douglas G. Scofield, Uppsala University
#
# This code is covered by the Gnu GPLv2, please see LICENSE.
# Bugs and suggestions to https://github.com/douglasgscofield/tinyutils/issues.
#
# Calculates a histogram of input values placed into integer bins. Bins
# are half-open [i,i+1) so values are *floored down* to the nearest integer.
#
#
# CHANGELOG
# 2014-05-27 : don't quit on missing-column error
# 2013-05-15 : make FS and OFS be "\t" by default
# 2012-12-02 : clean the script up a bit
# 2012-11-27 : create the script
BEGIN {
# parameters
FS = "\t";
OFS = "\t";
sparse = 0.01; # if less than this fraction of the hist is occupied, exit
override = 0; # sparse hist will not be printed unless override=1 is set
drop_zero = 0; # drop zero-valued entries, implies override=1
header = 0; # does the input have a header? if so how many lines?
print_header = 0; # print a simple "bin", "count" header on the output
skip_comment = 1; # skip '#' lines on input
col = 1; # which column do we operate on?
# internal variables
n_a = 0.0;
n_total = 0.0;
the_min = 1.0e+20 # may be too low for your application
the_max = -1.0e+20 # may be too high for your application
}
function floor(v, ans) {
ans = int(v);
return((ans > v) ? ans - 1 : ans);
}
{
if (header && NR <= header) next;
if (skip_comment && $0 ~ /^#/) next;
if (NF < col) {
print "on input line " NR " missing requested column" > "/dev/stderr";
next;
}
val = floor($(col) + 0.0);
if (! a[val]) { # initialize counts as floats
a[val] = 1.0;
n_a += 1.0;
} else {
a[val] += 1.0;
n_total += 1.0;
}
if (val > the_max) {
the_max = val;
} else if (val < the_min) {
the_min = val;
}
}
END {
if (print_header)
print "bin","count";
occupancy = n_a / (the_max - the_min);
if (occupancy < sparse && ! override && ! drop_zero) {
printf("Less than %f of the input range is occupied (%f). Use override=1 or drop_zero=1 to print the histogram anyway\n", sparse, occupancy) > "/dev/stderr";
} else {
for (i = the_min; i <= the_max; i += 1.0) {
if (drop_zero && a[i] == 0.0) continue;
printf("%.0f%s%.0f\n", i, OFS, a[i] ? a[i] : 0.0);
}
}
}