41 "ARPAPredictor, a predictor relying on an ARPA language model",
42 "ARPAPredictor, long description." 63 logger << INFO <<
"VOCABFILENAME: " << value <<
endl;
69 logger << INFO <<
"ARPAFILENAME: " << value <<
endl;
75 logger << INFO <<
"TIMEOUT: " << value <<
endl;
81 std::ifstream vocabFile;
89 while(std::getline(vocabFile,row))
97 logger << DEBUG <<
"["<<row<<
"] -> "<< code<<
endl;
102 logger << DEBUG <<
"Loaded "<<code<<
" words from vocabulary" <<
endl;
108 std::ifstream arpaFile;
124 bool startData =
false;
126 while(std::getline(arpaFile,row))
135 if(row ==
"\\data\\")
142 if( startData ==
true && currOrder == 0)
144 if( row.find(
"ngram 1")==0 )
151 if( row.find(
"ngram 2")==0)
158 if( row.find(
"ngram 3")==0)
166 if( row ==
"\\1-grams:" && startData)
174 if( row ==
"\\2-grams:" && startData)
182 if( row ==
"\\3-grams:" && startData)
216 std::stringstream str(row);
232 logger << DEBUG <<
"adding unigram ["<<wd1Str<<
"] -> "<<logProb<<
" "<<logAlfa<<
endl;
243 std::stringstream str(row);
254 if(wd1Str !=
OOV && wd2Str !=
OOV)
261 logger << DEBUG <<
"adding bigram ["<<wd1Str<<
"] ["<<wd2Str<<
"] -> "<<logProb<<
" "<<logAlfa<<
endl;
270 std::stringstream str(row);
282 if(wd1Str !=
OOV && wd2Str !=
OOV && wd3Str !=
OOV)
289 logger << DEBUG <<
"adding trigram ["<<wd1Str<<
"] ["<<wd2Str<<
"] ["<<wd3Str<<
"] -> "<<logProb <<
endl;
308 return word.find(prefix)==0;
310 for(
int j = 0; filter[j] != 0; j++)
312 std::string pattern = prefix+std::string(filter[j]);
313 if(word.find(pattern)==0)
326 std::vector<std::string> tokens(cardinality);
332 std::multimap< float, std::string, cmp > result;
334 logger << DEBUG <<
"["<<wd1Str<<
"]"<<
" ["<<wd2Str<<
"] "<<
"["<<prefix<<
"]"<<
endl;
337 std::map<std::string,int>::const_iterator wd1It,wd2It;
387 std::pair<const float,std::string> p (
unigramMap.find(it->first)->second.logProb,
395 size_t numSuggestions = 0;
396 for(std::multimap< float, std::string, cmp >::const_iterator it = result.begin();
397 it != result.end() && numSuggestions < max_partial_prediction_size;
418 logger << DEBUG <<
"returning "<<trigramIt->second <<
endl;
419 return trigramIt->second;
423 std::map<BigramKey,ARPAData>::const_iterator bigramIt =
bigramMap.find(
BigramKey(wd1,wd2));
433 logger << DEBUG <<
"no bigram w1,w2 exist" <<
endl;
446 std::map<BigramKey,ARPAData>::const_iterator bigramIt =
bigramMap.find(
BigramKey(wd1,wd2));
448 return bigramIt->second.logProb;
457 logger << DEBUG <<
"learn() method called" <<
endl;
458 logger << DEBUG <<
"learn() method exited" <<
endl;
std::map< TrigramKey, float > trigramMap
ProgressBar< char > * unigramProg
Dispatcher< ARPAPredictor > dispatcher
void dispatch(const Observable *var)
ARPAPredictor(Configuration *, ContextTracker *, const char *)
bool matchesPrefixAndFilter(std::string, std::string, const char **) const
void update(const double percentage)
virtual Prediction predict(const size_t size, const char **filter) const
Generate prediction.
virtual void update(const Observable *variable)
virtual void learn(const std::vector< std::string > &change)
ProgressBar< char > * trigramProg
virtual void set_logger(const std::string &level)
void set_vocab_filename(const std::string &value)
const std::string PREDICTORS
void addBigram(std::string)
void set_arpa_filename(const std::string &value)
void addTrigram(std::string)
void set_timeout(const std::string &value)
ProgressBar< char > * bigramProg
virtual std::string get_name() const =0
std::map< std::string, int > vocabCode
float computeBigramBackoff(int, int) const
void map(Observable *var, const mbr_func_ptr_t &ptr)
static char * strtolower(char *)
std::map< int, std::string > vocabDecode
ContextTracker * contextTracker
void addSuggestion(Suggestion)
Tracks user interaction and context.
std::string VOCABFILENAME
virtual std::string get_value() const =0
void addUnigram(std::string)
std::string vocabFilename
std::map< int, ARPAData > unigramMap
float computeTrigramBackoff(int, int, int) const
std::map< BigramKey, ARPAData > bigramMap
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
std::string getToken(const int) const