-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathperfservmon.py
811 lines (727 loc) · 38.5 KB
/
perfservmon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
#!/usr/bin/python
"""
@author: varounisdi
@contributor: atterdag
"""
import argparse
import shelve
from xml.etree.ElementTree import parse
try:
from urllib.parse import urlparse, urlencode
from urllib.request import urlopen, Request
from urllib.error import HTTPError, URLError
except ImportError:
from urlparse import urlparse
from urllib import urlencode
from urllib2 import urlopen, Request, HTTPError, URLError
import sys
import datetime
import platform
import os
import time
import base64
import ssl
import socket
class GenericServer:
"""Generic WAS Server Prototype"""
def __init__(self, name, nodename):
"""
:param name: WAS Server Name
:param nodename: WAS Node Name the Server belongs
"""
self.name = name
self.nodename = nodename
self.maxheapMB = None
self.heapusedMB = None
def printserver(self):
"""
Print Generic Server Attributes(for debug purposes)
"""
print('Name:' + str(self.name))
print('NodeName:' + str(self.nodename))
print('MaxHeap:' + str(self.maxheapMB))
print('HeapUsed:' + str(self.heapusedMB))
def serverfullname(self):
"""Utility to uniquely identify a server in a Cell"""
return '.'.join((self.nodename, self.name))
# ###################################################################################
class SIBDestination:
"""WAS SIB Generic Class
Can be a Topic Space or a Queue
"""
def __init__(self, name, mename, totalmessagesconsumed, availablemessages):
"""
:param name: The Destination Name
:param mename: The Message Engine Name
:param totalmessagesconsumed: PMI Metric -> Total Messages Consumed since restart of Message Engine
:param availablemessages: PMI Metric -> No of available msgs in Destination
"""
self.Name = name
self.MEName = mename
self.TotalMessagesConsumed = totalmessagesconsumed
self.AvailableMessages = availablemessages
def printsibdest(self):
print('SIB Destination Name:' + str(self.Name))
print('SIB Message Engine Name:' + str(self.MEName))
print('SIB Dest Messages Consumed:' + str(self.TotalMessagesConsumed))
print('SIB Dest Available Messages:' + str(self.AvailableMessages))
class SIBQueue(SIBDestination):
"""Queue Destination"""
def __init__(self, name, mename, totalmessagesconsumed, availablemessages):
SIBDestination.__init__(self, name, mename, totalmessagesconsumed, availablemessages)
class SIBTopicSpace(SIBDestination):
"""Pub/Sub Destination"""
def __init__(self, name, mename, totalmessagesconsumed, availablemessages):
"""
:param name: The Destination Name
:param totalmessagesconsumed: PMI Metric -> Total Messages Consumed since restart of Message Engine
:param availablemessages: PMI Metric -> No of available msgs in Destination
"""
SIBDestination.__init__(self, name, mename, totalmessagesconsumed, availablemessages)
self.subscribers = []
def adddurablesubscriber(self, subscrname):
"""Add Active Durable Subscribers to the list"""
self.subscribers.append(str(subscrname))
def printsibdest(self):
SIBDestination.printsibdest(self)
if len(self.subscribers) > 0:
print('SIB Topic Subscribers:' + str(self.subscribers))
# ######################################################################################
class TypicalApplicationServer(GenericServer):
"""Typical WAS Class - Recommended for use in most cases"""
def __init__(self, name, nodename):
GenericServer.__init__(self, name, nodename)
self.wcpoolsize = None
self.wcactive = None
self.wcthreadshung = None
self.orbpoolsize = None
self.orbactive = None
self.connpoolspercentused = {}
self.connpoolsusetime = {}
self.connpoolswaittime = {}
self.connpoolswaitingthreadcount = {}
self.totalactivesessions = None
self.totallivesessions = None
self.activesessions = {}
self.livesessions = {}
self.destinations = {}
self.messageengines = []
self.webSecAuthenTime = None
self.webSecAuthorTime = None
def printserver(self):
"""
Print Typical Server Attributes(for debug purposes)
"""
print('****************************')
GenericServer.printserver(self)
print('WebContainerActive:' + str(self.wcactive))
print('WebContainerPoolSize:' + str(self.wcpoolsize))
print('WebContainerConcurrentHungThreadCount:' + str(self.wcthreadshung))
print('ORBActive:' + str(self.orbactive))
print('ORBPoolSize:' + str(self.orbpoolsize))
print('JDBC Conn Pools Percent Used:' + str(self.connpoolspercentused))
print('JDBC Conn Pools Use Time:' + str(self.connpoolsusetime))
print('JDBC Conn Pools Wait Time:' + str(self.connpoolswaittime))
print('JDBC Conn Pools Waiting Thread Count:' + str(self.connpoolswaitingthreadcount))
print('Total Active Http Sessions:' + str(self.totalactivesessions))
print('Total Live Http Sessions:' + str(self.totallivesessions))
print('Http Active Sessions:' + str(self.activesessions))
print('Http Live Sessions:' + str(self.livesessions))
for dest in self.destinations:
(self.destinations[dest]).printsibdest()
print('****************************')
def addjdbcconnpoolpercentused(self, name, value):
self.connpoolspercentused[name] = value
def addjdbcconnpoolusetime(self, name, value):
self.connpoolsusetime[name] = value
def addjdbcconnpoolwaittime(self, name, value):
self.connpoolswaittime[name] = value
def addjdbcconnpoolwaitingthreadcount(self, name, value):
self.connpoolswaitingthreadcount[name] = value
def addactivehttpsessions(self, modname, nosessions):
self.activesessions[modname] = nosessions
def addlivehttpsessions(self, modname, nosessions):
self.livesessions[modname] = nosessions
def adddestination(self, sibdest):
self.destinations[sibdest.Name] = sibdest
def addsibme(self, sibmename):
self.messageengines.append(sibmename)
def querymetric(self, metric, warning, critical, destination=None, jndi=None):
"""
Delegate the metric query to the appropriate function
:param metric:
:param warning:
:param critical:
:param destination:
:param jndi:
:return:
"""
metrics = dict(WebContainer=self.querywebcontainer,
WebContainerThreadHung=self.querywebcontainerhungthreads,
ORB=self.queryorb,
DBConnectionPoolPercentUsed=self.querydbconnpoolpercentused,
DBConnectionPoolUseTime=self.querydbconnpoolusetime,
DBConnectionPoolWaitTime=self.querydbconnpoolwaittime,
DBConnectionPoolWaitingThreadCount=self.querydbconnpoolwaitingthreadcount,
WebAuthenticationTime=self.querysecauthen,
WebAuthorizationTime=self.querysecauthor,
Heap=self.queryheapusage,
LiveSessions=self.querylivesessions,
SIBDestinations=self.querysibdestination
)
queryargs = dict(warning=warning, critical=critical)
if destination is not None:
queryargs['destname'] = destination
elif jndi is not None:
queryargs['jndiname'] = jndi
return metrics[metric](**queryargs)
def querywebcontainer(self, warning=75, critical=90):
if self.wcactive is None or self.wcpoolsize is None:
return UNKNOWN, 'Could not find WebContainer Usage metrics for server {}'.format(self.name)
else:
percentused = int(float(self.wcactive) / float(self.wcpoolsize) * 100)
msg = 'WebContainer Thread Pool: {actv}/{sz} ({pc}%)|' \
'wcthreadpoolusage={pc}%;{warn};{crit} wcthreadpoolused={actv};;;0;{sz}' \
.format(actv=self.wcactive, sz=self.wcpoolsize, pc=percentused, warn=warning, crit=critical)
if warning < percentused < critical:
return WARNING, msg
elif percentused >= critical:
return CRITICAL, msg
else:
return OK, msg
def querywebcontainerhungthreads(self, warning=75, critical=90):
if self.wcthreadshung is None:
return UNKNOWN, 'Could not find WebContainer Thread Hung metrics for server {}'.format(self.name)
else:
wcthreadshung = int(self.wcthreadshung)
msg = 'WebContainer Declared Thread Hung: {thrh}|wcthreadhung={thrh};{warn};{crit};0' \
.format(thrh=self.wcthreadshung, warn=warning, crit=critical)
if warning < wcthreadshung < critical:
return WARNING, msg
elif wcthreadshung >= critical:
return CRITICAL, msg
else:
return OK, msg
def queryorb(self, warning=75, critical=90):
if self.orbactive is None or self.orbpoolsize is None:
return UNKNOWN, 'Could not find ORB metrics for server {}'.format(self.name)
else:
percentused = int(float(self.orbactive) / float(self.orbpoolsize) * 100)
msg = 'ORB Thread Pool: {actv}/{sz} ({pc}%)|' \
'orbthreadpoolusage={pc}%;{warn};{crit} orbthreadpoolused={actv};;;0;{sz}' \
.format(actv=self.orbactive, sz=self.orbpoolsize, pc=percentused, warn=warning, crit=critical)
if warning < percentused < critical:
return WARNING, msg
elif percentused >= critical:
return CRITICAL, msg
else:
return OK, msg
def querydbconnpoolpercentused(self, jndiname=None, warning=75, critical=90):
if len(self.connpoolspercentused) == 0 or self.connpoolspercentused is None:
return UNKNOWN, 'Could not find DB Connection Pool Percent Used metrics for server {}'.format(self.name)
else:
statuscode = OK
if jndiname is None:
# If no jndi name is given, show all Connection Pools
# alert if ANY is above Warn, Crit
msg = 'DB Connection Pool Percent Used'
perfdata = '|'
for connpool in self.connpoolspercentused:
percentused = int(self.connpoolspercentused[connpool])
msg += ' - {connpool} {pc}%'.format(connpool=connpool, pc=percentused)
perfdata += '{connpool}_usage={pc}%;{warn};{crit} ' \
.format(connpool=connpool, pc=percentused, warn=warning, crit=critical)
# For this loop, Change statuscode only when lower status code is active
# e.g. change to warning only when statuscode is OK, not critical or warning
if warning < percentused < critical and statuscode == OK:
statuscode = WARNING
if critical <= percentused:
statuscode = CRITICAL
msg += perfdata
elif jndiname in self.connpoolspercentused:
percentused = int(self.connpoolspercentused[jndiname])
msg = 'DB Connection Pool Percent Used - {jndi} {pc}%|{jndi}_usage={pc}%;{warn};{crit}' \
.format(jndi=jndiname, pc=percentused, warn=warning, crit=critical)
if warning < percentused < critical:
statuscode = WARNING
if critical <= percentused:
statuscode = CRITICAL
else:
msg = 'No DB Connection Pool for {jndi} was found'.format(jndi=jndiname)
statuscode = "UNKNOWN"
return statuscode, msg
def querydbconnpoolusetime(self, jndiname=None, warning=10, critical=30):
if len(self.connpoolsusetime) == 0 or self.connpoolsusetime is None:
return UNKNOWN, 'Could not find DB Connection Pool Use Time metrics for server {}'.format(self.name)
elif jndiname is None:
return UNKNOWN, 'Please set datasource JNDI name using -j JndiName'
else:
if jndiname in self.connpoolsusetime:
statuscode = OK
usetime = int(self.connpoolsusetime[jndiname])
msg = 'DB Connection Pool Use Time - {jndi} {usets} seconds|' \
'{jndi}_usetime={usets}s;{warn};{crit};0' \
.format(jndi=jndiname, usets=usetime, warn=warning, crit=critical)
if warning < usetime < critical:
statuscode = WARNING
if critical <= usetime:
statuscode = CRITICAL
else:
statuscode = "UNKNOWN"
msg = 'No DB Connection Pool for {jndi} was found'.format(jndi=jndiname)
return statuscode, msg
def querydbconnpoolwaittime(self, jndiname=None, warning=5, critical=10):
if len(self.connpoolswaittime) == 0 or self.connpoolswaittime is None:
return UNKNOWN, 'Could not find DB Connection Pool Wait Time metrics for server {}'.format(self.name)
elif jndiname is None:
return UNKNOWN, 'Please set datasource JNDI name using -j JndiName'
else:
if jndiname in self.connpoolswaittime:
statuscode = OK
waittime = int(self.connpoolswaittime[jndiname])
msg = 'DB Connection Pool Wait Time - {jndi} {waitts} seconds|' \
'{jndi}_waittime={waitts}s;{warn};{crit};0' \
.format(jndi=jndiname, waitts=waittime, warn=warning, crit=critical)
if warning < waittime < critical:
statuscode = WARNING
if critical <= waittime:
statuscode = CRITICAL
else:
statuscode = "UNKNOWN"
msg = 'No DB Connection Pool for {jndi} was found'.format(jndi=jndiname)
return statuscode, msg
def querydbconnpoolwaitingthreadcount(self, jndiname=None, warning=5, critical=10):
if len(self.connpoolswaitingthreadcount) == 0 or self.connpoolswaitingthreadcount is None:
return UNKNOWN, 'Could not find DB Connection Pool Waiting Threads Count metrics for server {}' \
.format(self.name)
elif jndiname is None:
return UNKNOWN, 'Please set datasource JNDI name using -j JndiName'
else:
if jndiname in self.connpoolswaitingthreadcount:
statuscode = OK
waitingthreadcount = int(self.connpoolswaitingthreadcount[jndiname])
msg = 'DB Connection Pool Waiting Threads Count - {jndi} {waitthrcount}|' \
'{jndi}_waitthreads={waitthrcount};{warn};{crit};0' \
.format(jndi=jndiname, waitthrcount=waitingthreadcount, warn=warning, crit=critical)
if warning < waitingthreadcount < critical:
statuscode = WARNING
if critical <= waitingthreadcount:
statuscode = CRITICAL
else:
statuscode = "UNKNOWN"
msg = 'No DB Connection Pool for {jndi} was found'.format(jndi=jndiname)
return statuscode, msg
def queryheapusage(self, warning=75, critical=90):
if self.heapusedMB is None or self.maxheapMB is None:
return UNKNOWN, 'Could not find Heap Usage metrics for server {}'.format(self.name)
else:
percentused = int(float(self.heapusedMB) / float(self.maxheapMB) * 100)
msg = 'Heap Usage: {heapused}/{maxheap} MB ({heappc}%)|' \
'heapusage={heappc}%;{warn};{crit} usedheap={heapused}MB;;;0;{maxheap}' \
.format(heapused=self.heapusedMB, maxheap=self.maxheapMB, heappc=percentused, warn=warning,
crit=critical)
if warning < percentused < critical:
return WARNING, msg
elif percentused >= critical:
return CRITICAL, msg
else:
return OK, msg
def querysecauthen(self, warning=2, critical=5):
if self.webSecAuthenTime is None:
return UNKNOWN, 'Could not find Web Authentication Time metrics for server {}'.format(self.name)
else:
websecauthentime = int(self.webSecAuthenTime)
msg = 'Web Authentication Time: {wsecauthtime} seconds|websecauthentime={wsecauthtime}s;{warn};{crit}' \
.format(wsecauthtime=self.webSecAuthenTime, warn=warning, crit=critical)
if warning < websecauthentime < critical:
return WARNING, msg
elif websecauthentime >= critical:
return CRITICAL, msg
else:
return OK, msg
def querysecauthor(self, warning=2, critical=5):
if self.webSecAuthorTime is None:
return UNKNOWN, 'Could not find Web Authorization Time metrics for server {}'.format(self.name)
else:
websecauthortime = int(self.webSecAuthorTime)
msg = 'Web Authorization Time: {wsecauthortime} seconds|websecauthortime={wsecauthortime}s;{warn};{crit}' \
.format(wsecauthortime=self.webSecAuthorTime, warn=warning, crit=critical)
if warning < websecauthortime < critical:
return WARNING, msg
elif websecauthortime >= critical:
return CRITICAL, msg
else:
return OK, msg
def querylivesessions(self, warning=None, critical=None):
# TODO Implement threshold checking
if len(self.livesessions) == 0 or self.totallivesessions is None:
return UNKNOWN, 'Could not find Live Session metrics for server {}'.format(self.name)
else:
msg = 'live sessions: total {totalsessions}'.format(totalsessions=self.totallivesessions)
perfdata = '|totallivesessions={totalsessions};;;0'.format(totalsessions=self.totallivesessions)
for appmodule in self.livesessions:
msg += ' , {mod} {livesessions!s}'.format(mod=appmodule, livesessions=self.livesessions[appmodule])
perfdata += " '{mod}_sessions'={livesessions!s};;;0" \
.format(mod=appmodule, livesessions=self.livesessions[appmodule])
msg += perfdata
return OK, msg
def querysibdestination(self, destname=None, warning=10, critical=100):
if len(self.destinations) == 0 or self.destinations is None:
if len(self.messageengines) > 0:
return OK, 'Inactive SIB Message Engine'
else:
return UNKNOWN, 'Could not find requested Destination metrics for server {}'.format(self.name)
elif destname is None:
return UNKNOWN, 'Please set Destination name using -d DestName'
else:
destination = self.destinations[destname]
msg = 'Destination:{dname} - Available Messages:{davail} , Messages Consumed:{dtotalmsgcon} ' \
.format(dname=destination.Name, davail=destination.AvailableMessages,
dtotalmsgcon=destination.TotalMessagesConsumed)
if isinstance(destination, SIBTopicSpace) and len(destination.subscribers) > 0:
msg += ' , Durable Subscribers:'
for subscriber in destination.subscribers:
msg += '%s ' % subscriber
msg += '|{dname}_AvailMsgs={davail};{warn};{crit};0 {dname}_ConsumMsgs={dtotalmsgcon};;;0' \
.format(dname=destination.Name,
davail=destination.AvailableMessages,
dtotalmsgcon=destination.TotalMessagesConsumed,
warn=warning,
crit=critical)
if warning < int(destination.AvailableMessages) < critical:
return WARNING, msg
elif int(destination.AvailableMessages) > critical:
return CRITICAL, msg
else:
return OK, msg
# ############################################################################################################
def parseperfxml(path, cellname):
"""
Parse the perfsevlet xml and store the needed metrics(defined in metrics dict) for all WAS servers
of the Cell in a python selve file
:param path: Where to store the perfserv xml and the python shelve file
:param cellname: The name of the WAS Cell
:raise:
"""
xmlfilename = path + cellname + '.xml'
shelvefilename = path + cellname + '.dbm'
metrics = {'Security Authentication': parsesecauthen,
'Security Authorization': parsesecauthor,
'JVM Runtime': parsejvmstats,
'WebContainer': parsewebcontstats,
'Object Request Broker': parseorbtpstats,
'JDBC Connection Pools': parseconnpoolsstats,
'Servlet Session Manager': parsesessionstats,
'SIB Service': parsesibstats
}
with shelve.open(shelvefilename, flag='c') as pfile:
try:
tree = parse(xmlfilename)
for B in tree.iter('Node'):
nodename = B.attrib['name']
for server in B.iter('Server'):
was = TypicalApplicationServer(server.attrib['name'], nodename)
for stat in server.iter('Stat'):
metricname = stat.attrib['name']
if metricname is not None and metricname in metrics:
# For each metric call the appropriate method
metrics[metricname](was, stat)
# Comment out for debug purposes
# was.printserver()
pfile[was.serverfullname()] = was
except AttributeError:
raise
def parsejvmstats(was, stat):
for jvmstat in stat.iter():
if jvmstat.attrib['name'] == 'HeapSize':
was.maxheapMB = int(jvmstat.attrib['upperBound']) // 1024
if jvmstat.attrib['name'] == 'UsedMemory':
was.heapusedMB = int(jvmstat.attrib['count']) // 1024
def parsesecauthen(was, stat):
for secauthen in stat.iter():
if secauthen.attrib['name'] == 'WebAuthenticationTime':
was.webSecAuthenTime = int(secauthen.attrib['max']) // 1000
def parsesecauthor(was, stat):
for secauthor in stat.iter():
if secauthor.attrib['name'] == 'WebAuthorizationTime':
was.webSecAuthorTime = int(secauthor.attrib['max']) // 1000
def parsewebcontstats(was, stat):
for wcstat in stat.iter('BoundedRangeStatistic'):
if wcstat.attrib['name'] == 'ActiveCount':
was.wcactive = wcstat.attrib['value']
if wcstat.attrib['name'] == 'PoolSize':
was.wcpoolsize = wcstat.attrib['upperBound']
for wcstat in stat.iter('CountStatistic'):
if wcstat.attrib['name'] == 'DeclaredThreadHungCount':
was.wcthreadshung = wcstat.attrib['count']
def parseorbtpstats(was, stat):
for orbstat in stat.iter('BoundedRangeStatistic'):
if orbstat.attrib['name'] == 'ActiveCount':
was.orbactive = orbstat.attrib['value']
if orbstat.attrib['name'] == 'PoolSize':
was.orbpoolsize = orbstat.attrib['upperBound']
def parseconnpoolsstats(was, stat):
for connprovider in stat.findall('./Stat'):
for connpool in connprovider.findall('./Stat'):
connpoolpercentused = connpool.find(".//RangeStatistic[@name='PercentUsed']")
if connpoolpercentused is not None:
was.addjdbcconnpoolpercentused(connpool.attrib['name'], connpoolpercentused.attrib['value'])
connpoolwaitingthreadcount = connpool.find(".//RangeStatistic[@name='WaitingThreadCount']")
if connpoolwaitingthreadcount is not None:
was.addjdbcconnpoolwaitingthreadcount(connpool.attrib['name'],
connpoolwaitingthreadcount.attrib['value'])
# The following values are measured in ms, convert to seconds
connpoolusetime = connpool.find(".//TimeStatistic[@name='UseTime']")
if connpoolusetime is not None:
was.addjdbcconnpoolusetime(connpool.attrib['name'], int(connpoolusetime.attrib['max']) // 1000)
connpoolwaittime = connpool.find(".//TimeStatistic[@name='WaitTime']")
if connpoolwaittime is not None:
was.addjdbcconnpoolwaittime(connpool.attrib['name'], int(connpoolwaittime.attrib['max']) // 1000)
def parsesessionstats(was, stat):
for modul in stat.findall('./Stat'):
modname = modul.attrib['name']
if not modname.startswith('perfServletApp'):
activesessions = modul.find(".//RangeStatistic[@name='ActiveCount']")
livesessions = modul.find(".//RangeStatistic[@name='LiveCount']")
if activesessions is not None:
was.addactivehttpsessions(modname, activesessions.attrib['value'])
if livesessions is not None:
was.addlivehttpsessions(modname, livesessions.attrib['value'])
for totals in stat.findall('./RangeStatistic'):
if totals.attrib['name'] == 'ActiveCount':
was.totalactivesessions = totals.attrib['value']
elif totals.attrib['name'] == 'LiveCount':
was.totallivesessions = totals.attrib['value']
def parsesibstats(was, stat):
"""
Parse SIB Statistics found in perfservlet xml and attach them in WAS Object instance
:param was: Current Typical Application Server instance
:param stat: Stat tags in perfservlet xml under the specific Server tag
"""
sibmes = stat.find(".//Stat[@name='SIB Messaging Engines']")
sibme = sibmes.findall('./Stat')
# Assume 1-to-1 relationship of ME and WAS JVM
if len(sibme) > 0:
sibmename = sibme[0].attrib['name']
queuesnode = stat.find(".//Stat[@name='Queues']")
if queuesnode is not None:
for queue in queuesnode.findall('./Stat'):
queuename = queue.attrib['name']
totammsgsconsumed = queue.find(
"./CountStatistic[@name='QueueStats.TotalMessagesConsumedCount']")
availablemsgs = queue.find("./CountStatistic[@name='QueueStats.AvailableMessageCount']")
if totammsgsconsumed is not None and availablemsgs is not None:
sibqueue = SIBQueue(queuename, sibmename, totammsgsconsumed.attrib['count'],
availablemsgs.attrib['count'])
was.adddestination(sibqueue)
topicspacesnode = stat.find(".//Stat[@name='Topicspaces']")
if topicspacesnode is not None:
# Loop over each topic space
for topicspace in topicspacesnode.findall('./Stat'):
topicspname = topicspace.attrib['name']
totammsgsconsumed = topicspace.find(
"./Stat/CountStatistic[@name='DurableSubscriptionStats.TotalMessagesConsumedCount']")
availablemsgs = topicspace.find(
"./Stat/CountStatistic[@name='DurableSubscriptionStats.AvailableMessageCount']")
if totammsgsconsumed is not None and availablemsgs is not None:
sibtopic = SIBTopicSpace(topicspname, sibmename, totammsgsconsumed.attrib['count'],
availablemsgs.attrib['count'])
for durablesub in topicspace.findall("./Stat[@name='Durable Subscriptions']/Stat"):
dursubname = durablesub.attrib['name']
sibtopic.adddurablesubscriber(dursubname)
was.adddestination(sibtopic)
# Case of inactive SIB Message Engine
if queuesnode is None and topicspacesnode is None:
was.addsibme(sibmename)
# #################################################################################################################\
def retrieveperfxml(path, cellname, ip, port, username, password, httpprotocol='http', ignorecert=False):
"""
Perfservlet XML Retrieval Method
:param path: The file path where perfserv xml and shelve output is stored
:param cellname: The Name of the WAS Cell
:param ip: The ip of the perfserv appication
:param port: The port of the perfserv appication
:param username: An user which is authorized to access perfservlet
:param password: perfservlet authorized user password
:param httpprotocol: The http protocol to access the perfservlet, can be http or https, default http
:param ignorecert: Ignore TLS Certificate, default False
:return: The nagios message
"""
urlopentimeout = 30
if httpprotocol in ['http', 'https']:
url = setperfservurl(ip, port, path, cellname, httpprotocol)
else:
return UNKNOWN, 'Invalid Perfserv URL'
xmlfilename = path + cellname + '.xml'
try:
req = Request(url)
# if Basic Auth is enabled
if username and password:
credentials = ('%s:%s' % (username, password))
auth_encoded = base64.b64encode(credentials.encode('ascii'))
req.add_header('Authorization', 'Basic %s' % auth_encoded.decode("ascii"))
# Add SSLContext check for Python newer than 2.7.9
if httpprotocol == 'https' and hasattr(ssl, 'SSLContext') and hasattr(ssl, 'Purpose') and ignorecert is False:
ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
# Default Behaviour: Accept only trusted SSL certificates
perfserv = urlopen(req, context=ctx, timeout=urlopentimeout)
elif httpprotocol == 'https' and hasattr(ssl, 'SSLContext') and ignorecert is True:
# On --ignorecert option accept any certificate
ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
perfserv = urlopen(req, context=ctx, timeout=urlopentimeout)
else:
# Pre Python 2.7.9 behaviour or plain http request
perfserv = urlopen(req, timeout=urlopentimeout)
except HTTPError as error:
return CRITICAL, 'Could not open perfservlet URL - Response Status Code {}'.format(error.code)
except URLError as error:
return CRITICAL, 'Could not open perfservlet URL - {}'.format(error.reason)
# Handle HTTP Timeouts
except socket.timeout:
return CRITICAL, 'Could not open perfservlet URL: Socket Timeout'
# Handle HTTPS Timeouts
except ssl.SSLError:
return CRITICAL, 'Could not open perfservlet URL: Generic SSL Error, possibly a timeout'
else:
with open(xmlfilename, 'wb') as xmlfile:
xmlfile.writelines(perfserv.readlines())
tree = parse(xmlfilename)
root = tree.getroot()
if root.attrib['responseStatus'] == 'failed':
return CRITICAL, 'Error retrieving PMI data! Check your Cell status!'
elif root.attrib['responseStatus'] == 'success':
return OK, 'PerfServlet Data refreshed on {}'.format(datetime.datetime.now().strftime('%c'))
else:
return UNKNOWN, 'Unknown Perfserv Status: {}'.format(root.attrib['responseStatus'])
def touch(fullpath):
"""
Used for Refreshing Perfservlet cache, determing the time for this to happen
Usage Similar to UNIX touch command
"""
with open(fullpath, 'a'):
os.utime(fullpath, None)
def setperfservurl(ip, port, path, cellname, httpprotocol, refcacheinterval=3600):
"""Construct PerfServlet URL to call from Collector
:param ip: IP Addr of the Server where perfservl runs
:param port: HTTP Port of the Server where perfservl runs
:param path: Location of .lck file, used for determining the interval window for the specific Cell
:param cellname: The Name of the WAS Cell, used in .lck file name
:param refcacheinterval: Interval to Refresh Perfservlet cache
:param httpprotocol: The http protocol to access the perfservlet, can be http or https
:return: PerfServlet URL
"""
cachereffile = path + cellname + '.lck'
url = httpprotocol + '://' + ip + ':' + port + '/wasPerfTool/servlet/perfservlet'
if os.path.isfile(cachereffile):
timeelapsed = time.time() - os.path.getmtime(cachereffile)
if timeelapsed > refcacheinterval:
touch(cachereffile)
return url + '?refreshConfig=true'
return url
else:
touch(cachereffile)
return url
def parsecmdargs():
"""Parse Given Plugin Attributes"""
parser = argparse.ArgumentParser(description='Nagios plugin on Websphere Cell Metrics. Uses the PerfServlet App')
parser.add_argument("-C", type=str, action="store", dest='CellName', help="Cell name", required=True)
subparsers = parser.add_subparsers(help='Commands', dest='command_name')
retrieve_parser = subparsers.add_parser('retrieve', help='Retrieve Data and Store them')
retrieve_parser.add_argument("-N", type=str, action="store", dest='IPAddress',
help="IP Address of perfservlet server", required=True)
retrieve_parser.add_argument("-P", type=str, action="store", dest='Port', help="Port of perfservlet server",
required=True)
retrieve_parser.add_argument("-H", type=str, action="store", dest='HttpProtocol', choices=['http', 'https'],
help="Perfservlet HTTP Protocol", default='http', required=False)
retrieve_parser.add_argument("--ignorecert", action="store_true",
help="Ignore TLS Server Certificate", required=False)
retrieve_parser.add_argument("-u", type=str, action="store", dest='Username',
help="Perfservlet authorized user", default='', required=False)
retrieve_parser.add_argument("-p", type=str, action="store", dest='Password',
help="Perfservlet user password", default='', required=False)
show_parser = subparsers.add_parser('show', help='Show metrics')
show_parser.add_argument("-n", type=str, action="store", dest='NodeName', help="Node Name", required=True)
show_parser.add_argument("-s", type=str, action="store", dest='ServerName', help="Server Name", required=True)
show_parser.add_argument("-M", type=str, action="store", dest='Metric',
choices=['WebContainer', 'WebContainerThreadHung', 'ORB', 'DBConnectionPoolPercentUsed',
'DBConnectionPoolUseTime', 'DBConnectionPoolWaitTime',
'DBConnectionPoolWaitingThreadCount', 'Heap', 'LiveSessions',
'SIBDestinations', 'WebAuthenticationTime', 'WebAuthorizationTime'],
help="Metric Type", required=True)
show_parser.add_argument("-d", type=str, action="store", dest='Destination', help="SIB Destination Name",
required=False)
show_parser.add_argument("-j", type=str, action="store", dest='JndiName', help="JNDI Name", required=False)
show_parser.add_argument("-c", type=int, action="store", dest='Critical',
help="Critical Value for Metric", required=False)
show_parser.add_argument("-w", type=int, action="store", dest='Warning',
help="Warning Value for Metric", required=False)
return parser.parse_args()
def queryperfdata(path, cellname, nodename, servername, metric, warning, critical, destination=None, jndiname=None):
"""Fundamental Perfservlet Data Query Method - Used by Nagios show Check
:param path: Where selve file lies
:param cellname: the WAS Cell Name
:param nodename: the WAS Node Name
:param servername: the WAS Server Name
:param metric: Pick one of WebContainer, ORB, DBConnectionPoolPercentUsed, DBConnectionPoolUseTime,
DBConnectionPoolWaitTime, DBConnectionPoolWaitingThreadCount, Heap, LiveSessions, SIBDestinations,
WebAuthenticationTime, WebAuthorizationTime
:param warning: Warning threshold
:param critical: Critical threshold
:param jndiname: JNDI Name. Must be defined if Metric = DBConnectionPool*
:param destination: Destination Name. Must be defined if Metric = SIBDestinations
:return: Nagios Message
"""
shelvefilename = path + cellname + '.dbm'
try:
with shelve.open(shelvefilename, flag='r') as perffile:
serverfullname = '.'.join((nodename, servername))
if serverfullname in perffile:
appsrv = perffile[serverfullname]
return appsrv.querymetric(metric, warning, critical, destination, jndiname)
else:
return UNKNOWN, 'Not available statistics for server ' + serverfullname
except IOError as error:
return UNKNOWN, error.message
except:
return UNKNOWN, 'Error opening cached metrics file'
def show(alertstatus, alertmessage):
"""Print Nagios Msg and exit with appropriate Return Code"""
if alertstatus == OK:
print('OK - {}'.format(alertmessage))
sys.exit(OK)
elif alertstatus == WARNING:
print('WARNING - {}'.format(alertmessage))
sys.exit(WARNING)
elif alertstatus == CRITICAL:
print('CRITICAL - {}'.format(alertmessage))
sys.exit(CRITICAL)
else:
print('UNKNOWN - {}'.format(alertmessage))
sys.exit(UNKNOWN)
if __name__ == '__main__':
OK = 0
WARNING = 1
CRITICAL = 2
UNKNOWN = 3
startingpath = ''
# Assume the Plugin/Nagios Server runs in Linux OS
if 'Linux' == platform.system():
startingpath = '/tmp/'
arguments = parsecmdargs()
if arguments.command_name == 'retrieve':
# Perfservlet Data Collector Operation
status, message = retrieveperfxml(path=startingpath, cellname=arguments.CellName, ip=arguments.IPAddress,
port=arguments.Port, httpprotocol=arguments.HttpProtocol,
ignorecert=arguments.ignorecert,
username=arguments.Username, password=arguments.Password)
if status == OK:
parseperfxml(path=startingpath, cellname=arguments.CellName)
show(status, message)
elif arguments.command_name == 'show':
# Nagios Check Perfservlet Data stored in Python selve file
status, message = queryperfdata(startingpath, arguments.CellName, arguments.NodeName, arguments.ServerName,
arguments.Metric, arguments.Warning, arguments.Critical,
destination=arguments.Destination, jndiname=arguments.JndiName)
show(status, message)