-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature.hpp
135 lines (96 loc) · 3.46 KB
/
feature.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*
* feature.hpp
*
* Created on: May 3, 2011
* Author: Ethan L. Schreiber
* Adapted from java code written by Andrew McGregor
*/
#ifndef FEATURE_HPP_
#define FEATURE_HPP_
#include <string>
#include <cstring>
using std::string;
#define FEATURE_DEBUG 0
/**
* feature.hpp
*
* Stores counts and computes probabilities for one boolean feature.
*/
class Feature {
/* counts[][] is a 2x2 matrix that stores for each of the
* 2 classes (0 and 1), the number of training examples we've
* seen that don't have the feature (0) or have it (1).
*
* So counts[0][0] would be the number of training examples that fall
* into class 0, that don't have the feature.
*
* totalPerClass[] stores the number of training examples we've seen
* per class. So totalPerClass[i] should always equal
* counts[i][0] + counts[i][1].
*/
protected:
// Class 0 = String Not found
// Class 1 = String Found
int m_counts[2][2];
int m_totalPerClass[2];
string m_toMatch;
public :
string getFeatureMatch() { return m_toMatch; }
Feature(string toMatch) : m_toMatch(toMatch) {
// Initialize the counters to 0
m_counts[0][0] = 0;
m_counts[0][1] = 0;
m_counts[1][0] = 0;
m_counts[1][1] = 0;
m_totalPerClass[0] = 0;
m_totalPerClass[1] = 0;
}
#define FOUND_CLASS 1
#define NOT_FOUND_CLASS 0
int isFeaturePresent(string s) {
/* YOUR CODE HERE
*
* Determine whether this feature occurs in the string.
* Return 1 if the feature is present; 0 otherwise.
*/
// Scan through the string to see if m_toMatch can be found in the string.
size_t searchResult = s.find(m_toMatch);
return searchResult != string::npos;
}
void addTrainingExample(int featurePresence, int classNumber) {
/* YOUR CODE HERE
*
* Given the absence/presence of this feature for the given class,
* update the counts used to compute the probabilities.
*
* classNumber = the number of the class for this example (0 or 1)
* featurePresence = 0 or 1 stating the presence or absence of this feature
*/
// Record the correct class
m_counts[classNumber][featurePresence] += 1;
// DEBUG
if(FEATURE_DEBUG)
fprintf(stderr, "Feature.addTrainingExample: FEATURE: %s | m_count[%d][%d] = %d\n", m_toMatch.c_str(),classNumber, featurePresence, m_counts[classNumber][featurePresence]);
// DEBUG
if(FEATURE_DEBUG)
fprintf(stderr, "Feature.addTrainingExample: BEFORE FEATURE: %s | m_totalPerClass[%d] = %d\n", m_toMatch.c_str(),classNumber, m_totalPerClass[classNumber]);
// Update the total count
this->m_totalPerClass[classNumber] += 1;
// DEBUG
if(FEATURE_DEBUG)
fprintf(stderr, "Feature.addTrainingExample: AFTER FEATURE: %s | m_totalPerClass[%d] = %d\n", m_toMatch.c_str(),classNumber, m_totalPerClass[classNumber]);
}
double getProbOfFeatureGivenClass(int featurePresence, int classNumber) {
/* YOUR CODE HERE
*
* What is the probability of this feature being absent/present for this class?
* Use your counts, but make sure to smooth it by adding 1 to avoid probabilities of
* absolute zero or one.
*/
// Find the total count, and class+feature count.
double result = (double) (m_counts[classNumber][featurePresence] + 1)/( m_totalPerClass[classNumber]);
return result;
//.. you need to smooth it so there's never a 0 or 1 probability
}
};
#endif /* FEATURE_HPP_ */