presage  0.9.2~beta
databaseConnector.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "databaseConnector.h"
26 
27 #include "../../core/utility.h"
28 
29 #include <list>
30 #include <sstream>
31 #include <stdlib.h>
32 #include <assert.h>
33 
34 DatabaseConnector::DatabaseConnector(const std::string database_name,
35  const size_t cardinality,
36  const bool read_write)
37  : logger("DatabaseConnector", std::cerr)
38 {
39  set_database_filename (database_name);
41  set_read_write_mode (read_write);
42 }
43 
44 DatabaseConnector::DatabaseConnector(const std::string database_name,
45  const size_t cardinality,
46  const bool read_write,
47  const std::string& log_level)
48  : logger("DatabaseConnector", std::cerr, log_level)
49 {
50  set_database_filename (database_name);
52  set_read_write_mode (read_write);
53 }
54 
56 {}
57 
58 void DatabaseConnector::createNgramTable(const size_t n) const
59 {
60  if (n > 0) {
61  std::stringstream query;
62  std::stringstream unique;
63  query << "CREATE TABLE";
64 // This #ifdef does not belong here, but unfortunately SQLite 2.x does
65 // not support the IF NOT EXISTS SQL clause.
66 #ifndef HAVE_SQLITE_H
67  query << " IF NOT EXISTS";
68 #endif
69  query << " _" << n << "_gram (";
70  for (int i = n - 1; i >= 0; i--) {
71  if (i != 0) {
72  unique << "word_" << i << ", ";
73  query << "word_" << i << " TEXT, ";
74  } else {
75  unique << "word";
76  query << "word TEXT, count INTEGER, UNIQUE(" << unique.str() << ") );";
77  }
78  }
79 
80  executeSql(query.str());
81  } else {
82  // TODO
83  // throw exception
84  }
85 }
86 
88 {
89  std::string query = "SELECT SUM(count) FROM _1_gram;";
90 
91  NgramTable result = executeSql(query);
92 
93  logger << DEBUG << "NgramTable:";
94  for (size_t i = 0; i < result.size(); i++) {
95  for (size_t j = 0; j < result[i].size(); j++) {
96  logger << DEBUG << result[i][j] << '\t';
97  }
98  logger << DEBUG << endl;
99  }
100 
101  return extractFirstInteger(result);
102 }
103 
105 {
106  std::stringstream query;
107  query << "SELECT count "
108  << "FROM _" << ngram.size() << "_gram"
109  << buildWhereClause(ngram) << ";";
110 
111  NgramTable result = executeSql(query.str());
112 
113  logger << DEBUG << "NgramTable:";
114  for (size_t i = 0; i < result.size(); i++) {
115  for (size_t j = 0; j < result[i].size(); j++) {
116  logger << DEBUG << result[i][j] << '\t';
117  }
118  logger << DEBUG << endl;
119  }
120 
121  return extractFirstInteger(result);
122 }
123 
124 NgramTable DatabaseConnector::getNgramLikeTable(const Ngram ngram, const char** filter, const int count_threshold, int limit) const
125 {
126  std::stringstream query;
127  query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
128  << "FROM _" << ngram.size() << "_gram"
129  << buildWhereLikeClause(ngram, filter, count_threshold)
130  << " ORDER BY count DESC";
131  if (limit < 0) {
132  query << ";";
133  } else {
134  query << " LIMIT " << limit << ';';
135  }
136 
137  return executeSql(query.str());
138 }
139 
141 {
142  int count = getNgramCount(ngram);
143 
144  if (count > 0) {
145  // the ngram was found in the database
146  updateNgram(ngram, ++count);
147 
148  logger << DEBUG << "Updated ngram to " << count << endl;
149 
150  } else {
151  // the ngram was not found in the database
152  count = 1;
153  insertNgram(ngram, count);
154 
155  logger << DEBUG << "Inserted ngram" << endl;
156 
157  }
158  return count;
159 }
160 
161 void DatabaseConnector::removeNgram(const Ngram ngram) const
162 {}
163 
164 void DatabaseConnector::insertNgram(const Ngram ngram, const int count) const
165 {
166  std::stringstream query;
167 
168  query << "INSERT INTO _" << ngram.size() << "_gram "
169  << buildValuesClause(ngram, count)
170  << ";";
171 
172  executeSql(query.str());
173 }
174 
175 void DatabaseConnector::updateNgram(const Ngram ngram, const int count) const
176 {
177  std::stringstream query;
178 
179  query << "UPDATE _" << ngram.size() << "_gram "
180  << "SET count = " << count
181  << buildWhereClause(ngram) << ";";
182 
183  executeSql(query.str());
184 }
185 
186 std::string DatabaseConnector::buildWhereClause(const Ngram ngram) const
187 {
188  std::stringstream where_clause;
189  where_clause << " WHERE";
190  for (size_t i = 0; i < ngram.size(); i++) {
191  if (i < ngram.size() - 1) {
192  where_clause << " word_" << ngram.size() - i - 1 << " = '"
193  << sanitizeString(ngram[i]) << "' AND";
194  } else {
195  where_clause << " word = '" << sanitizeString(ngram[ngram.size() - 1]) << "'";
196  }
197  }
198  return where_clause.str();
199 }
200 
202  const char** filter,
203  const int count_threshold) const
204 {
205  std::stringstream where_clause;
206  where_clause << " WHERE";
207  for (size_t i = 0; i < ngram.size(); i++) {
208  if (i < ngram.size() - 1) {
209  where_clause << " word_" << ngram.size() - i - 1 << " = '"
210  << sanitizeString(ngram[i]) << "' AND";
211  } else {
212  if(filter == 0)
213  where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
214  else {
215  std::string true_prefix = sanitizeString(ngram[ngram.size() - 1]);
216  where_clause << " (";
217  for (int j = 0; filter[j] != 0; j++) {
218  if (j) {
219  where_clause << " OR ";
220  }
221  where_clause << " word LIKE '" << true_prefix << filter[j] << "%'";
222  }
223  where_clause << ')';
224  }
225  if (count_threshold > 0) {
226  where_clause << " AND count >= " << count_threshold;
227  }
228  }
229  }
230  return where_clause.str();
231 }
232 
233 
234 std::string DatabaseConnector::buildSelectLikeClause(const int cardinality) const
235 {
236  assert(cardinality > 0);
237 
238  std::stringstream result;
239  for (int i = cardinality - 1; i >= 0; i--) {
240  if (i != 0) {
241  result << "word_" << i << ", ";
242  } else {
243  result << "word, count";
244  }
245  }
246 
247  return result.str();
248 }
249 
250 std::string DatabaseConnector::buildValuesClause(const Ngram ngram, const int count) const
251 {
252  std::stringstream values_clause;
253  values_clause << "VALUES(";
254  for (size_t i = 0; i < ngram.size(); i++) {
255  if (i < ngram.size() - 1) {
256  values_clause << "'" << sanitizeString(ngram[i]) << "', ";
257  } else {
258  values_clause << "'" << sanitizeString(ngram[i]) << "', " << count << ")";
259  }
260  }
261  return values_clause.str();
262 }
263 
264 std::string DatabaseConnector::sanitizeString(const std::string str) const
265 {
266  // TODO
267  // just return the string for the time being
268  // REVISIT
269  // TO BE DONE
270  // TBD
271  return str;
272 }
273 
275 {
276  // Initialize count to zero and then check that we have at least
277  // an entry in the table of ngram counts returned by the
278  // executeSql() method. If so, convert it into an integer and
279  // return it.
280  //
281  // REVISIT: make conversion to integer more robust (strtol ??)
282  //
283  int count = 0;
284  if (table.size() > 0) {
285  if (table[0].size() > 0) {
286  count = atoi(table[0][0].c_str());
287  }
288  }
289 
290  logger << DEBUG << "table: ";
291  for (size_t i = 0; i < table.size(); i++) {
292  for (size_t j = 0; j < table[i].size(); j++) {
293  logger << DEBUG << table[i][j] << '\t';
294  }
295  logger << DEBUG << endl;
296  }
297 
298  return (count > 0 ? count : 0);
299 }
300 
302 {
303  executeSql("BEGIN TRANSACTION;");
304 }
305 
307 {
308  executeSql("END TRANSACTION;");
309 }
310 
312 {
313  executeSql("ROLLBACK TRANSACTION;");
314 }
315 
317 {
318  return database_filename;
319 }
320 
321 std::string DatabaseConnector::set_database_filename (const std::string& filename)
322 {
323  std::string prev_filename = database_filename;
324 
325  database_filename = expand_variables (filename);
326 
327  // make an attempt at determining whether directory where language
328  // model database is located exists and try to create it if it
329  // does not... only cater for one directory level to create it.
330  //
331  std::string dir = Utility::dirname (database_filename);
332  if (! dir.empty()) {
333  // check that specified directory exists and accessible
334  if (! Utility::is_directory_usable (dir)) {
335  // create it if not
337  }
338  }
339 
340  return prev_filename;
341 }
342 
343 std::string DatabaseConnector::expand_variables (std::string filepath) const
344 {
345  // scan the filepath for variables, which follow the same pattern
346  // as shell variables - strings enclosed in '${' and '}'
347  //
348  const std::string start_marker = "${";
349  const std::string end_marker = "}";
350 
351  std::list<std::string> variables;
352 
353  std::string::size_type pos_start = filepath.find (start_marker);
354  while (pos_start != std::string::npos)
355  {
356  std::string::size_type pos_end = filepath.find (end_marker, pos_start);
357  if (pos_end != std::string::npos) {
358  variables.push_back (filepath.substr(pos_start + start_marker.size(), pos_end - end_marker.size() - pos_start - 1));
359  }
360 
361  pos_start = filepath.find (start_marker, pos_end);
362  }
363 
364  for (std::list<std::string>::const_iterator it = variables.begin();
365  it != variables.end();
366  it++)
367  {
368  substitute_variable_in_string(*it, filepath);
369  }
370 
371  return filepath;
372 }
373 
374 void DatabaseConnector::substitute_variable_in_string (const std::string& variable_name, std::string& filepath) const
375 {
376  std::string variable_token = "${" + variable_name + "}";
377 
378  for (std::string::size_type pos = filepath.find (variable_token);
379  pos != std::string::npos;
380  pos = filepath.find (variable_token, pos))
381  {
382  const char* value = getenv(variable_name.c_str());
383  if (value)
384  {
385  filepath.replace (pos,
386  variable_token.size(),
387  value);
388  }
389  else
390  {
391  // handle "special" variables
392  if (variable_name == "HOME")
393  {
394  value = getenv("USERPROFILE");
395  if (value)
396  {
397  filepath.replace (pos,
398  variable_token.size(),
399  value);
400  }
401  }
402  else
403  {
404  // FIXME: maybe throw exception instead of leaving
405  // variable name in string?
406  //
407  filepath.replace (pos,
408  variable_token.size(),
409  variable_name);
410  }
411  }
412  }
413 }
414 
415 void DatabaseConnector::set_cardinality (const size_t card)
416 {
417  cardinality = card;
418 }
419 
421 {
422  return cardinality;
423 }
424 
425 void DatabaseConnector::set_read_write_mode (const bool read_write)
426 {
427  read_write_mode = read_write;
428 }
429 
431 {
432  return read_write_mode;
433 }
bool get_read_write_mode() const
size_t get_cardinality() const
std::string set_database_filename(const std::string &filename)
int getNgramCount(const Ngram ngram) const
void insertNgram(const Ngram ngram, const int count) const
std::string expand_variables(std::string filename) const
void set_read_write_mode(const bool read_write)
int getUnigramCountsSum() const
NgramTable getNgramLikeTable(const Ngram ngram, const char **filter, const int count_threshold, int limit=-1) const
void set_cardinality(const size_t cardinality)
std::string database_filename
DatabaseConnector(const std::string database_name, const size_t cardinality, const bool read_write)
void removeNgram(const Ngram ngram) const
std::string buildValuesClause(const Ngram ngram, const int count) const
int extractFirstInteger(const NgramTable &) const
static void create_directory(const std::string &dir)
Definition: utility.cpp:330
virtual NgramTable executeSql(const std::string query) const =0
std::vector< Ngram > NgramTable
int incrementNgramCount(const Ngram ngram) const
virtual void beginTransaction() const
void updateNgram(const Ngram ngram, const int count) const
virtual void rollbackTransaction() const
std::string buildWhereClause(const Ngram ngram) const
void createNgramTable(const size_t cardinality) const
std::string buildWhereLikeClause(const Ngram ngram, const char **filter, const int count_threshold) const
virtual void endTransaction() const
std::string buildSelectLikeClause(const int cardinality) const
std::string sanitizeString(const std::string) const
Logger< char > logger
void substitute_variable_in_string(const std::string &variable_name, std::string &filepath) const
static std::string dirname(const std::string &)
Definition: utility.cpp:275
std::string get_database_filename() const
Definition: ngram.h:33
static bool is_directory_usable(const std::string &dir)
Definition: utility.cpp:307
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
Definition: logger.h:278