presage  0.9.2~beta
ARPAPredictor.h
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #ifndef PRESAGE_ARPAPREDICTOR
26 #define PRESAGE_ARPAPREDICTOR
27 
28 #include "predictor.h"
29 #include "../core/logger.h"
30 #include "../core/progress.h"
31 #include "../core/dispatcher.h"
32 
33 #include <assert.h>
34 #include <fstream>
35 #include <iomanip>
36 
37 
38 class cmp {
39  public:
40  bool operator() (const float& f1, const float& f2 ) const {
41  return f2 < f1;
42  }
43 };
44 
45 class ARPAData
46 {
47  public:
48  ARPAData() {};
49  ARPAData(float lp,float la) : logProb(lp), logAlfa(la) {};
50  float logProb;
51  float logAlfa;
52 };
53 
55 {
56  public:
57  TrigramKey(int wd1, int wd2, int wd3) : key1(wd1), key2(wd2), key3(wd3) {};
58 
59  bool operator<(const TrigramKey &right) const
60  {
61  if(key1 < right.key1)
62  return true;
63 
64  if(key1 == right.key1)
65  if(key2 < right.key2 )
66  return true;
67 
68  if(key1 == right.key1 && key2 == right.key2)
69  if(key3 < right.key3)
70  return true;
71 
72  return false;
73  }
74 
75  bool operator==(const TrigramKey &right) const
76  {
77  return (key1 == right.key1 && key2 == right.key2 && key3 == right.key3 );
78  }
79  int key1;
80  int key2;
81  int key3;
82 };
83 
84 class BigramKey
85 {
86  public:
87  BigramKey(int wd1, int wd2) : key1(wd1), key2(wd2) {};
88 
89  bool operator<(const BigramKey &right) const
90  {
91  if(key1 < right.key1)
92  return true;
93 
94  if(key1 == right.key1)
95  if(key2 < right.key2 )
96  return true;
97 
98  return false;
99  }
100 
101  bool operator==(const TrigramKey &right) const
102  {
103  return (key1 == right.key1 && key2 == right.key2);
104  }
105  int key1;
106  int key2;
107 };
108 
112 class ARPAPredictor : public Predictor, public Observer {
113 
114 public:
115  ARPAPredictor(Configuration*, ContextTracker*, const char*);
116  ~ARPAPredictor();
117 
118  virtual Prediction predict(const size_t size, const char** filter) const;
119 
120  virtual void learn(const std::vector<std::string>& change);
121 
122  virtual void update (const Observable* variable);
123 
124  void set_vocab_filename (const std::string& value);
125  void set_arpa_filename (const std::string& value);
126  void set_timeout (const std::string& value);
127 
128 private:
129  std::string LOGGER;
130  std::string ARPAFILENAME;
131  std::string VOCABFILENAME;
132  std::string TIMEOUT;
133 
134  std::string arpaFilename;
135  std::string vocabFilename;
136  int timeout;
137 
138  std::map<std::string,int> vocabCode;
139  std::map<int,std::string> vocabDecode;
140 
141  std::map<int,ARPAData> unigramMap;
142  std::map<BigramKey,ARPAData>bigramMap;
143  std::map<TrigramKey,float>trigramMap;
144 
145  void loadVocabulary();
146  void createARPATable();
147  bool matchesPrefixAndFilter(std::string , std::string , const char** ) const;
148 
149  void addUnigram(std::string);
150  void addBigram(std::string);
151  void addTrigram(std::string);
152 
153  inline float computeTrigramBackoff(int,int,int) const;
154  inline float computeBigramBackoff(int,int) const;
155 
159 
163 
167 
169 };
170 
171 #endif // PRESAGE_ARPAPREDICTOR
void loadVocabulary()
bool operator()(const float &f1, const float &f2) const
Definition: ARPAPredictor.h:40
std::map< TrigramKey, float > trigramMap
BigramKey(int wd1, int wd2)
Definition: ARPAPredictor.h:87
ProgressBar< char > * unigramProg
Dispatcher< ARPAPredictor > dispatcher
ARPAPredictor(Configuration *, ContextTracker *, const char *)
bool operator<(const BigramKey &right) const
Definition: ARPAPredictor.h:89
bool matchesPrefixAndFilter(std::string, std::string, const char **) const
virtual Prediction predict(const size_t size, const char **filter) const
Generate prediction.
std::string ARPAFILENAME
virtual void update(const Observable *variable)
virtual void learn(const std::vector< std::string > &change)
ProgressBar< char > * trigramProg
float logAlfa
Definition: ARPAPredictor.h:51
std::string LOGGER
void set_vocab_filename(const std::string &value)
float logProb
Definition: ARPAPredictor.h:49
void addBigram(std::string)
void set_arpa_filename(const std::string &value)
void addTrigram(std::string)
bool operator==(const TrigramKey &right) const
Definition: ARPAPredictor.h:75
void set_timeout(const std::string &value)
ProgressBar< char > * bigramProg
std::map< std::string, int > vocabCode
float computeBigramBackoff(int, int) const
bool operator<(const TrigramKey &right) const
Definition: ARPAPredictor.h:59
std::map< int, std::string > vocabDecode
TrigramKey(int wd1, int wd2, int wd3)
Definition: ARPAPredictor.h:57
std::string TIMEOUT
Tracks user interaction and context.
std::string VOCABFILENAME
void addUnigram(std::string)
std::string vocabFilename
std::map< int, ARPAData > unigramMap
float computeTrigramBackoff(int, int, int) const
std::map< BigramKey, ARPAData > bigramMap
bool operator==(const TrigramKey &right) const
void createARPATable()
std::string arpaFilename