-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzfs-pool-stats.py
436 lines (320 loc) · 19.5 KB
/
zfs-pool-stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
#! python3
import subprocess
import math
import argparse
import time
import curses
import random
""" TODO:
* Restore shell_cmd() lines commented out and remove placeholder lists
* Change shell_cmd() to run locally instead of remotely
* Write a basic document of how data flows from start to finish.
* From which arguments
* From which system commands
* Through which functions, to which functions
* At what stage each conv_X() function is called, and what it does
* ...to have mercy on my future self and others.
* I tried to avoid the "nesting china doll" approach, and with the exception of implementing curses,
mostly succeedded in keeping everything separate and modular. However, many functions are not
input-agnostic and will error if input is not speciall formatted.
That's fine though, I wrote a script, not a framework.
* When an invalid column is passed to --columns, exit with an error instead of continuing silently.
* An -c+ flag to specify additional columns to append to the default set, rather than re-specifying the whole set.
* Add a third sub-argument to --columns for decimal places
* Add a fourth sub-argument to --columns for custom column names printed at runtime. Also include
default column names which are more sane. Or maybe scrap this idea and use sane column names to begin with.
* Shorten length of key names in {zpool_keys} in preparation for column output, where width is at a premium
* Strip whitespace from args.COLUMNS input, to prevent dictionary key mis-matches.
* Standardize all input flags to lowercase handling, to prevent mismatches
* Improve conv_microseconds() and conv_bytes() to round up by length (99s = 1.65m != 100s), instead of time (59s > 1m)
* Add a sticky header with ["stateHealth"] and ["stateText"]
* Use first line of `zpool iostat` (without -y) to get statistics since boot, and display as the last line (sticky)
and with some special stylization (bold, underlined, etc.)
* Implement text coloring for pool state health
* A brief warning if the user specifies a REPEAT_DELAY < 1, that stats will be less precise.
* Try removing the need for modules: math, random
* Implement threading or async so that any delay inherent to get_stats() isn't added to --interval
* Implement multiple simultaneous pool outputs (this would take a gigantic effort)
* Add internal delays in get_stats() of between 5 and 15 seconds (to save CPU) for any relatively stable values,
which are extremely unlikely to change +/- 1%. These are:
* Name, LogicCapUsed, LogicCapFree, VirtCapUsed, VirtCapFree, VirtCompRatio,
VirtCapUsedByChilds, VirtCapUsedBySnaps, StateHealth, StateFragPerc, StateText
* Use time module to either calculate or pass based on the current second of the current minute.
* Experiment with curses in print_columns() to remove the weird header and scrolling behavior on init
"""
##### Accept arguments #####
# Define arguments parser
def parse_complex_arg(string):
"""Parse a flag argument in format 'Main:SubArg1:SubArg2' and intelligently split into a dictionary.
Used by the parser module to handle complex/structured flags.
Args:
string: The raw flag (string) to be parsed and split.
Primary arguments are split by ',' and Sub-arguments are split by ':'.
Returns:
A dictionary comprised of Primary arguments (as keys) and Sub-arguments (as a list of values).
If no Sub-arguments were passed, then value is a list containing an empty string.
"""
arguments = {}
try:
sub_args = string.split(',') # Iterate over Primary arguments
for i in sub_args:
# If ':' separators, split Sub-arguments and stuff them into a list.
if ':' in i:
key, *value = i.split(':')
# If no ':' separators, don't attempt to split non-existent Sub-arguments.
else:
key, value = i, None
arguments[key] = value
# Return a dictionary of Primary arguments (as keys) and Sub-arguments (as a list of values).
return arguments
except ValueError:
raise argparse.ArgumentTypeError("ERROR: Invalid format for --columns. Use Column1,Column2, ... ")
# Assign arguments parser
parser = argparse.ArgumentParser()
# Construct args.COLUMNS dictionary from --columns (-c) flag
parser.add_argument('--columns', '-c', dest="COLUMNS", type=parse_complex_arg,
# Default columns if none specified:
# TODO: VirtCapUsedPerc,VirtCompPerc are being ignored?
default="PoolName,VirtCapUsed,VirtCapFree,VirtCapTot,VirtCapUsedPerc,BwRead:M,BwWrite:M,TotalwaitBoth,StateFragPerc,VirtCompPerc,VirtCapUsedBySnaps:G",
help='A comma-separated list of columns to output. Optionally specify :scale. \
For example: --columns PoolName,StateHealth,VirtCapFree:T ')
# Construct args.INTERVAL dictionary from --interval (-t) flag
parser.add_argument('--interval', '-t', dest="INTERVAL", type=float,
default=1.0, # Default delay interval of 1 second, if not specified.
help='The frequency of time (in seconds) to output statistics. Accepts whole or decimal numbers. \
This also affects the sampling of some delay measurements; the recommendation is 1 second \
or more, to allow a sufficient sampling window for collecting i/o timing statistics. \
For example: --interval 1.5 ')
# Construct args.POOL dictionary from --pool (-p) flag
parser.add_argument('--pool', '-p', dest="POOL", type=str,
required=True,
help='The name of the pool to report statistics for. For example: --pool tank ')
args = parser.parse_args() # Expose args.COLUMNS, args.INTERVAL, etc. for use
##### Define functions #####
def shell_cmd(cmdline):
"""Run shell commands via SSH and return output.
Args:
cmdline: The shell command to run on the system.
Returns:
A string representing either stdout or stderr,
depending on the exit code of cmdline.
"""
result = subprocess.run(["ssh", "[email protected]", cmdline],
capture_output=True, text=True, check=True)
return result.stdout if result.returncode == 0 else result.stderr
def get_stats(pool):
"""Ingest ZFS pool statistics from `iostat`, `zfs get` and `zpool status` system commands.
Args:
pool: The name of the ZFS pool to collect statistics on.
Returns:
A dictionary of ZFS pool statistics, formatted as floats or strings."""
# NOTE: In case the output sequence from any of these underlying commands ever changes in a future version,
# the keys and values in this dictionary will be misaligned, requiring source code adjustment.
# Starting from ["Name"], the values of `zpool iostat` are assigned. Starting from ["VirtCapUsed"], the values of `zfs get` are assigned.
# Starting from ["StateHealth"], the values of `zfs get` (again) are assigned. Starting from ["StateText"], the values of `zpool status` are assigned.
zpool_keys = [("PoolName", "label"), ("LogicCapUsed", "size"), ("LogicCapFree", "size"), ("OpsRead", "size"), ("OpsWrite", "size"), ("BwRead", "size"), ("BwWrite", "size"), ("TotalwaitRead", "time"), ("TotalwaitWrite", "time"), ("DiskwaitRead", "time"), ("DiskwaitWrite", "time"), ("SyncqwaitRead", "time"), ("SyncqwaitWrite", "time"),
("AsyncqwaitRead", "time"), ("AsyncqwaitWrite", "time"), ("ScrubWait", "time"), ("TrimWait", "time"), ("VirtCapUsed", "size"), ("VirtCapFree", "size"), ("VirtCompRatio", "label"), ("VirtCapUsedByChilds", "size"), ("VirtCapUsedBySnaps", "size"), ("StateHealth", "label"), ("StateFragPerc", "perc"), ("StateText", "label")]
# Map zpool_keys dict to different functions, for later use.
# This allows us to intelligently convert to higher notations by
# constructing the key name as a tuple, with the first value in the
# tuple being the key name, and the second value being the type.
global zpool_keys_types
global zpool_keys_map
zpool_keys_types = ('size', 'time', 'label', 'perc')
zpool_keys_map = {'size': conv_bytes, 'time': conv_microseconds, 'label': conv_str, 'perc': conv_perc}
zpool_vals = ["amalgm", "51567724367872", "16344298516480", "16", "0", "8468325", "0",
"15682379", "-", "15682379", "-", "3532", "-", "3510", "-", "-", "-"]
# shell_cmd("zpool iostat -Hypl " + pool + " " + REPEAT_DELAY + " " + "1").split()
zpool_vals.extend(["54866186481664", "12908397449216", "1.01", "54700434006016"])
# shell_cmd("zfs get used,available,compressratio,usedbychildren " + pool + " -Hp -d 0 -o value | tr '\n' ' '").split()
# TODO: This `zfs get usedbysnapshots` command is very slow, because it's recursively checking
# all snapshots sizes (`-r`) and summing them (`awk`) before returning. Alternative?
# Also, this method with `grep` and `awk` is very clunky and may break with future `zfs` versions.
# Parsing and addition should be performed locally.
zpool_vals.extend(["1381425606656"])
# shell_cmd("zfs get usedbysnapshots " + pool + " -Hp -r -o value | grep -v '-' | awk '{s+=$1} END {printf \"%.0f\", s}'").split())
zpool_vals.extend(["ONLINE", "20%"])
# shell_cmd("zpool list -H -o health,frag " + pool)
# TODO: Clean up this `zpool status` command and perform text parsing locally instead.
zpool_vals.extend(
["scan: scrub repaired 0B in 1 days 12:59:37 with 0 errors on Sat Jan 27 22:59:39 2024 remove: Removal of mirror canceled on Tue Jan 9 08:30:58 2024"])
# shell_cmd("zpool status " + pool + " | sed -n '3,$p' | tr '\n' ' ' | tr -d '\011\012' | sed -e 's/^[ \t]*//' | " + "sed --regexp-extended 's/ config\:.*//g'")
# Merge keys and values lists into a dictionary.
zpool = dict(zip(zpool_keys, zpool_vals))
# Convert all eligible values to floats, so we can do math.
zpool = {key: conv_float(value) for key, value in zpool.items()}
# Create some more dictionary entries
# Correctly reference keys by their full tuple names
zpool.update({('VirtCapTot', 'size'): zpool[('VirtCapUsed', 'size')] + zpool[('VirtCapFree', 'size')]})
zpool.update({('VirtCapUsedPerc', 'perc'): (zpool['VirtCapUsed', 'size'] / zpool['VirtCapTot', 'size']),
('VirtCompPerc', 'perc'): zpool['VirtCompRatio', 'label'] - 1,
('TotalwaitBoth', 'time'): zpool['TotalwaitRead', 'time'] + zpool['TotalwaitWrite', 'time'],
('StateFragPerc', 'perc'): zpool['StateFragPerc', 'perc'] * 0.01})
return zpool
def conv_float(value):
"""Try to convert string to float, otherwise pass through original input.
Args:
value: The value (string or int) to convert to float.
Returns:
The float representation of value.
"""
try:
value = value.strip('-%') # Remove undesired chars
return float(value) if value else 0 # Convert eligible strings to floats. Convert empty strings to 0.
except ValueError: # If failed to convert to float, return as original type.
return value
def conv_str(input, notation=None):
"""Accepts any valid input and returns a string.
This function is used instead of the built-in str() because str()
will error when receiving an invalid second parameter.
Args:
input: The input value to convert to a string.
notation: Does nothing. Exists for compatibility reasons.
Returns:
input formatted as a string.
"""
return str(input)
def conv_perc(input, notation=None):
"""Accepts any valid input and returns a percentage value as a string.
Args:
input: The input value to convert to a string.
notation: Does nothing. Exists for compatibility reasons.
Returns:
input formatted as a string, with a '%' appended.
"""
# Return a string with no leading/trailing decimals, and append a '%'
return f"{input:.0%}"
def conv_bytes(bytes, notation=None):
"""Convert byte values to a specified notation. Uses powers of 1024 as output by `zfs get`.
Args:
bytes: The byte value (int or float) to convert.
notation: The desired notation ('B', 'K', 'M', 'G', 'T', 'P', 'E').
If unspecified, automatically chooses the highest notation.
Returns:
A string representing the byte value expressed in the chosen notation."""
# Handle 0 and strings. Return them unmodified.
if bytes == 0 or isinstance(bytes, str):
return bytes
notations = ("B", "K", "M", "G", "T", "P", "E")
if notation is None: # Automatic unit scaling, if not specified.
i = int(math.floor(math.log(bytes, 1024))) # Math, how does it work?!
p = math.pow(1024, i)
return f"{round(bytes / p)}{notations[i]}"
try: # Manual unit scaling, if specified.
notation = str(notation[0])
index = notations.index(notation.upper()) # Find index of target notation
divisor = 1024 ** index # Calculate byte value to divide by
return f"{round(bytes / divisor)}{notation}"
except ValueError:
print(f"ValueError: {notation} is not one of: {notations}")
def conv_microseconds(microseconds, notation=None):
"""Convert microsecond values to a specified notation. Uses microseconds to align with `zpool iostat -p`.
Args:
microseconds: The microsecond value (int or float) to convert.
notation: The desired notation ('d', 'h', 'm', 's', 'ms', 'us').
If unspecified, automatically chooses the highest notation.
Returns:
A string representing the time value expressed in the chosen notation."""
# Handle 0 and strings. Return them unmodified.
if microseconds == 0 or isinstance(microseconds, str):
return microseconds
notations = {"d": 86400000000, "h": 3600000000, "m": 60000000, "s": 1000000, "ms": 1000, "us": 1}
if notation is None: # Automatic unit scaling, if not specified.
for i, key in enumerate(notations):
if microseconds >= (notations[key] - 0.0001): # Subtract a small rounding tolerance
divisor = notations[key]
notation = key
break # Exit the loop once the appropriate unit is found
return f"{round(microseconds / divisor)}{notation}"
try: # Manual unit scaling, if specified.
notation = str(notation[0])
return f"{round(microseconds / notations[notation])}{notation}"
except KeyError:
print(f"ValueError: {notation} is not one of: {notations}")
def conv_dict_notation(ref_keys, conv_keys):
"""Convert the values in a dictionary from raw integer/time values to human-readable notation.
Args:
ref_keys: A dictionary containing raw values to be converted to human-readable notation.
Dictionary keys must be in a nested tuple format, as returned by get_stats().
conv_keys: A dictionary containing keys to be matched against {ref_keys}. Keys found in
both dictionaries will have their corresponding values in {ref_keys} converted.
Keys found in both dictionaries will have their values extracted from {ref_keys},
converted to human-readable notation, and returned as a new dictionary.
Returns:
A dictionary of keys common to both {ref_keys} and {conv_keys}, with values converted."""
output = {}
for key_name, notation in conv_keys.items():
# Try all possible permutations of key_match in {ref_keys}.
# NOTE: This is because each key of {ref_keys} is a tuple of (key_name, key_type) but
# we want to access the key by just key_name, hence this crutch. Maybe we could
# specify key_type in zpool_vals instead of zpool_keys, however this may be impractical.
for key_type in zpool_keys_types: # Try all key_type to match against keys in {ref_keys}
key_match = (key_name, key_type) # Construct a tuple to properly match key_match against {ref_keys}
if key_match in ref_keys: # Proceed once the correct match for key_match has been found in {key_refs}
# Check which function to use for conversion. {zpool_keys_map} is a global defined in get_stats()
key_use_func = zpool_keys_map.get(key_match[1])
# Calculate notation and append the key from {conv_keys} to {output}:
output.update({key_name: key_use_func(ref_keys[key_match], notation)})
# Print for debugging. TODO: Remove this
# print(f"{key} : {notation[0]} : {ref_keys[key_match]} : {key_use_func} : {key_use_func(ref_keys[key_match])}")
# print(f"ref_keys: {ref_keys}")
return (output)
def get_keys_width(input_dict):
"""For each key and value pair in input_dict, calculate the maximum length of both. Return a new dictionary.
Args:
input_dict: The input dictionary to perform length calculations on.
Returns:
A new dictionary. Keys are identical to those in input_dict. Values are integers indicating
the maximum length in characters of each (key, value) of input_dict, whichever was greater."""
column_widths = {}
# Construct a dictionary, where:
# keys = same as {input_dict}
# values = the maximum string length of each (key and value) in {input_dict}
for key, value in input_dict.items():
max_width = max(len(str(key)), len(str(value)))
column_widths[key] = max_width + 2
return column_widths
def print_columns(input_dict=conv_dict_notation(get_stats(args.POOL), args.COLUMNS), interval=max(args.INTERVAL, 0.01)):
"""On a loop, print out a dictionary in columns format.
Args:
input_dict: The input dictionary to be output at each interval.
interval: The delay in seconds (float) between outputs.
"""
# Determine the minimum width of each column.
column_widths = get_keys_width(input_dict)
# Output in aligned columns, as specified in {column_widths} from calc_column_widths()
# Assemble header column as combined strings
header = ""
for key in input_dict:
header += f"{key:<{column_widths[key]}}"
# Assemble values column as combined strings
values = ""
for key, value in input_dict.items():
values += f"{value:<{column_widths[key]}}"
# Cast some curses
def stdscr(stdscr, header, values):
stdscr.clear() # Clear the screen
stdscr.scrollok(True) # Enable scrolling
curses.echo() # Echo stdin
# Define curses scrolling region
scr_height, scr_width = stdscr.getmaxyx() # Get screen dimensions
scr_row = 0 # Set first row for values to begin printing on
# Print header initially
stdscr.addstr(1, 0, header + "\n") # Print columns header on first line
while True:
# Keep track of screen height to avoid errors with printing out of bounds
scr_row += 1
if scr_row >= scr_height:
scr_row = scr_height - 1
stdscr.addstr(1, 0, header + "\n") # Repeatedly print columns header on first line
# TODO: Remove random.randint() value used for debugging
stdscr.addstr(scr_row, 0, f"{values} {random.randint(90, 110)} \n") # Repeatedly print latest values
stdscr.refresh()
time.sleep(interval)
# Call stdscr() sub-function
curses.wrapper(stdscr, header, values)
##### Print output #####
try:
print_columns()
except KeyboardInterrupt: # Exit gracefully on ^C (SIGINT)
exit