-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcondor_job_metrics.py
35 lines (30 loc) · 1.99 KB
/
condor_job_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from prometheus_client import Counter, Histogram
class JobMetrics():
'''
Wrapper class for holding prometheus job metrics
'''
def __init__(self):
labels = ['owner','site','schedd','GPUDeviceName','usage','kind','IceProdDataset','IceProdTaskName','MATCH_EXP_JOBGLIDEIN_ResourceName']
memory_buckets = (1, 2, 3, 4, 6, 8, 12, 20, 40,float('inf'))
resource_buckets = (1, 2, 3, 4, 8, 16, float('inf'))
self.condor_job_walltime_hours = Counter(f'condor_job_walltime_hours',
'Total job hours',
labels)
self.condor_job_resource_hours = Counter(f'condor_job_resource_hours',
'Total job resource kind hours',
labels)
self.condor_job_count = Counter(f'condor_job_count',
'Total job count with good exit status',
labels)
self.condor_job_mem_req = Histogram(f'condor_job_mem_req',
'Total memory request with good exit status',
labels,
buckets=memory_buckets)
self.condor_job_mem_used = Histogram(f'condor_job_mem_used',
'Total memory request with good exit status',
labels,
buckets=memory_buckets)
self.condor_job_resource_req = Histogram(f'condor_job_resource_req',
'Total memory request with bad exit status',
labels,
buckets=resource_buckets)