-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathxyDist.h
104 lines (82 loc) · 3.79 KB
/
xyDist.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/* Open source system for classification learning from very large data
** Copyright (C) 2012 Geoffrey I Webb
** Class for handling a joint distribution between an Attribute and a class
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
** Please report any bugs to Geoff Webb <[email protected]>
*/
#pragma once
#include "instanceStream.h"
#include "smoothing.h"
// model the joint distribution for each individual x-value and the class
typedef InstanceCount const* ySubDist; ///< A pointer to the start of an array of InstanceCounts for a conditional class distribution
class xyDist
{
public:
xyDist(); ///< constructor without initialisation of InstanceStream specific data
xyDist(InstanceStream *is); ///< constructor that reads the distribution from the stream
~xyDist(void);
void reset(InstanceStream *is); ///< initialise with InstanceStream specific information but do not read the distribution
void update(const instance &inst); ///< update the distribution according to the given instance
void clear();
// p(a=v|Y=y) using M-estimate
inline double p(CategoricalAttribute a, CatValue v, CatValue y) {
return mEstimate(counts_[a][v*noOfClasses_+y], classCounts[y], metaData_->getNoValues(a));
}
// p(a=v, Y=y) using M-estimate
inline double jointP(CategoricalAttribute a, CatValue v, CatValue y) {
return (counts_[a][v*noOfClasses_+y]+M/(metaData_->getNoValues(a)*metaData_->getNoClasses()))/(count+M);
}
// p(a=v) using M-estimate
inline double p(CategoricalAttribute a, CatValue v) {
return (getCount(a,v)+M/(metaData_->getNoValues(a)))/(count+M);
}
inline double p(CatValue y) {
return (classCounts[y]+M/metaData_->getNoClasses())/(count+M);
}
// count[A=v,Y=y]
inline InstanceCount getCount(CategoricalAttribute a, CatValue v, CatValue y) const {
return counts_[a][v*noOfClasses_+y];
}
// count[A=v]
inline InstanceCount getCount(CategoricalAttribute a, CatValue v) const {
InstanceCount c = 0;
const ySubDist ySD = getYSubDist(a, v);
for (CatValue y = 0; y < noOfClasses_; y++) {
c+= ySD[y];
}
return c;
}
// count[Y=y]
inline InstanceCount getClassCount(CatValue y) const {
return classCounts[y];
}
inline unsigned int getNoClasses() const { return noOfClasses_; }
inline unsigned int getNoAtts() const { return counts_.size(); }
inline unsigned int getNoCatAtts() const { return counts_.size(); }
inline unsigned int getNoValues(CategoricalAttribute a) const { return metaData_->getNoValues(a); }
inline const ySubDist getYSubDist(CategoricalAttribute a, CatValue v) const {
return &counts_[a][v*noOfClasses_];
}
InstanceCount count;
std::vector<InstanceCount> classCounts;
private:
// InstanceStream *instanceStream_;
InstanceStream::MetaData* metaData_;
/// Instance counts indexed by attribute, then attribute value, then class.
/// The inner two vectors are flattened into a single vector, indexed by val*noOfClasses + class.
std::vector<std::vector<InstanceCount> > counts_;
unsigned int noOfClasses_; ///< store the number of classes for use in indexing the inner vector
};