-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoodman_kruskal.py
106 lines (94 loc) · 3.59 KB
/
goodman_kruskal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from base import BaseComparisons
class GoodmanKruskal(BaseComparisons):
"""Class to calculate the Goodman-Kruskal index.
n=2 formula:
(2 * min(a, d) - b - c)/(2 * min(a, d) + b + c)
Attributes
----------
fingerprints : np.ndarray
Numpy array with the fingerprints that will be compared.
The fingerprints must be also given as Numpy arrays.
c_threshold : {None, 'dissimilar', int}
Coincidence threshold.
Properties
----------
n_fingerprints : int
Number of fingerprints that will be compared.
Methods
-------
__init__(self, fingerprints, c_threshold=None, w_factor="fraction")
Initialize the object.
assign_fingerprints(fingerprints)
Assign fingerprints.
assign_c_threshold(c_threshold)
Assign coincidence threshold.
matches()
Calculate the matches between the fingerprints.
set_d_vector()
Calculate the d vector.
set_w_factor(w_factor)
Calculate weight factors.
set_weighted_matches()
Calculate weighted matches.
set_a()
Calculate the (unweighted) 1-similarity counter.
set_d()
Calculate the (unweighted) 0-similarity counter.
set_weighted_a()
Calculate the (weighted) 1-similarity counter.
set_weighted_d()
Calculate the (weighted) 0-similarity counter.
set_dis_counters()
Calculate the (unweighted) dissimilarity counters.
set_weighted_dis_counters()
Calculate the (weighted) dissimilarity counters.
set_total_sim_counter()
Calculate the total number of (unweighted) similarity counters.
set_total_weighted_sim_counter()
Calculate the total number of (unweighted) similarity counters.
total_dis_counters()
Calculate total number of (unweighted) dissimilarity counters.
total_weighted_dis_counters()
Calculate total number of (weighted) dissimilarity counters.
set_p()
Calculate p.
set_weighted_p()
Calculate weighted p.
gk_1sim_wdis()
Calculate the index with 1-sim-counters and with weighted denominator.
gk_1sim_dis()
Calculate the index with 1-sim-counters and with unweighted denominator.
"""
def __init__(self, fingerprints, c_threshold=None, w_factor="fraction"):
"""Initialize the object.
Parameters
----------
fingerprints : np.ndrarray
Numpy array with the fingerprints that will be compared.
The fingerprints must be also given as Numpy arrays.
c_threshold : {None, 'dissimilar', int}
Coincidence threshold.
w_factor : {"fraction", "power_n"}
Type of weight function that will be used.
"""
super().__init__(fingerprints, c_threshold, w_factor)
self.gk_1sim_wdis()
self.gk_1sim_dis()
def gk_1sim_wdis(self):
"""Calculate the index with 1-sim-counters and with weighted denominator.
Note
----
(2 * min(w_a, w_d) - w_b - w_c)/(2 * min(w_a, w_d) + w_b + w_c)
"""
numerator = 2 * min(self.w_a, self.w_d) - self.total_w_dis
denominator = 2 * min(self.w_a, self.w_d) + self.total_w_dis
self.GK_1sim_wdis = numerator/denominator
def gk_1sim_dis(self):
"""Calculate the index with 1-sim-counters and with unweighted denominator.
Note
----
(2 * min(w_a, w_d) - w_b - w_c)/(2 * min(a, d) + b + c)
"""
numerator = 2 * min(self.w_a, self.w_d) - self.total_w_dis
denominator = 2 * min(self.a, self.d) + self.total_dis
self.GK_1sim_dis = numerator / denominator