presage  0.9.2~beta
reverseTokenizer.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "reverseTokenizer.h"
26 
28  const std::string blanks,
29  const std::string separs)
30  : Tokenizer(stream, blanks, separs)
31 {
32  offset = offend;
33  //assert( stream.good());
34  //assert(!stream.fail());
35  //assert(!stream.bad() );
36  //assert(!stream.eof() );
37  // stream clearing needed because offset is positioned at end
38  stream.clear();
39 
40  //std::cerr << "ReverseTokenizer::ReverseTokenizer() offbeg: " << offbeg
41  // << " offset: " << offset << " offend: " << offend << std::endl;
42 }
43 
45 {}
46 
48 {
49  StreamGuard guard(stream, offset);
50 
51  // store current seek pointer position
52  std::streamoff curroff = offset;
53 
54  // position get pointer at end of stream
55  offset = offend;
56 
57  int count = 0;
58  while (hasMoreTokens()) {
59  nextToken();
60  count++;
61  }
62 
63  // reposition seek get pointer to original position
64  offset = curroff;
65 
66  return count;
67 }
68 
70 {
71  //std::cerr << "ReverseTokenizer::hasMoreTokens() offbeg: " << offbeg
72  // << " offset: " << offset << " offend: " << offend << std::endl;
73  if (offbeg < offset) {
74  return true;
75  } else {
76  return false;
77  }
78 }
79 
81 {
82  StreamGuard guard(stream, offset);
83 
84  int current;
85  std::string str;
86 
87  if (stream.good()) {
88  while (offbeg < offset
89  && str.empty()) {
90  stream.seekg(offset - 1);
91  current = stream.peek();
92 
93  if (offset == offend &&
94  (isSeparator(current) || isBlankspace(current))) {
95  offset--;
96  return str;
97  }
98 
99  while ((isBlankspace(current) || isSeparator(current))
100  && offbeg < offset ) {
101  offset--;
102  stream.seekg(offset - 1);
103  current = stream.peek();
104  }
105 
106  while (!isBlankspace(current)
107  && !isSeparator(current)
108  && offbeg < offset) {
109 
110  if( lowercaseMode() ) {
111  current = tolower( current );
112  }
113 
114  // since the token is read backwards, the string
115  // needs to be reversed by inserting the char at
116  // the front
117  str.insert(str.begin(), current);
118 
119  offset--;
120  stream.seekg(offset - 1);
121  current = stream.peek();
122  }
123  }
124  }
125 
126 // if (stream.good()) {
127 // do {
128 // do {
129 // current = stream.peek();
130 // offset--;
131 // stream.seekg(offset);
132 //
133 // // handle case where last character is a separator by
134 // // returning an empty token
135 // if (offset == offend - 2
136 // && isSeparator(current)) {
137 // return "";
138 // }
139 //
140 // //std::cerr << "[DEBUG] read: "
141 // // << static_cast<char>(current)
142 // // << std::endl;
143 //
144 // if (!isBlankspace(current)
145 // && !isSeparator(current)
146 // && offset >= offbeg - 1) {
147 //
148 // if( lowercaseMode() ) {
149 // current = tolower( current );
150 // }
151 //
152 // // since the token is read backwards, the string
153 // // needs to be reversed by inserting the char at
154 // // the front
155 // str.insert(str.begin(), current);
156 //
157 // //std::cerr << "[DEBUG] pushed: "
158 // // << static_cast<char>(current)
159 // // << std::endl;
160 // //std::cerr << "[DEBUG] partial string: "
161 // // << str << std::endl;
162 // }
163 // } while (!isBlankspace(current)
164 // && !isSeparator(current)
165 // && (offset >= offbeg));
166 // } while (str.empty() && (offset >= offbeg));
167 // }
168 
169  //std::cerr << "[DEBUG] token: " << str << std::endl;
170 
171  return str;
172 }
173 
175 {
176  return static_cast<double>(offend - offset) / (offend - offbeg);
177 }
virtual ~ReverseTokenizer()
bool isBlankspace(const int character) const
Definition: tokenizer.cpp:91
ReverseTokenizer(std::istream &stream, const std::string blanks, const std::string separs)
bool lowercaseMode() const
Definition: tokenizer.cpp:86
virtual bool hasMoreTokens() const
std::streamoff offset
Definition: tokenizer.h:148
virtual int countTokens()
std::streamoff offend
Definition: tokenizer.h:147
virtual double progress() const
std::istream & stream
Definition: tokenizer.h:144
virtual std::string nextToken()
bool isSeparator(const int character) const
Definition: tokenizer.cpp:101
std::streamoff offbeg
Definition: tokenizer.h:146