presage  0.9.2~beta
forwardTokenizer.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "forwardTokenizer.h"
26 
28  const std::string blankspaces,
29  const std::string separators)
30  : Tokenizer(stream, blankspaces, separators)
31 {
32  //std::cerr << "ForwardTokenizer::ForwardTokenizer()" << std::endl;
33  offset = offbeg;
34 }
35 
37 {}
38 
40 {
41  StreamGuard guard(stream, offset);
42 
43  // store current seek pointer position
44  std::streamoff curroff = offset;
45 
46  // position get pointer at beginning of stream
47  offset = offbeg;
48 
49  int count = 0;
50  while (hasMoreTokens()) {
51  count++;
52  nextToken();
53  }
54 
55  // reposition seek get pointer to original position
56  offset = curroff;
57 
58  return count;
59 }
60 
62 {
63  //StreamGuard guard(stream, offset);
64 
65  if (offset >= offend) {
66  return false;
67  } else {
68  return true;
69  }
70 }
71 
73 {
74  StreamGuard guard(stream, offset);
75 
76  int current;
77  std::string str;
78 
79  if (stream.good()) { // good() if bad,fail and eof bit are not set
80  current = stream.peek();
81  if (offset < offend) {
82 
83  while (isBlankspace(current)
84  || isSeparator(current)) {
85  offset++;
86  stream.seekg(offset);
87  current = stream.peek();
88  }
89 
90  while (!isBlankspace(current)
91  && !isSeparator(current)
92  && offset < offend) {
93 
94  //std::cerr << "[DEBUG] read: "
95  // << static_cast<char>(current)
96  // << std::endl;
97 
98  if( lowercaseMode() ) {
99  current = tolower( current );
100  }
101 
102  str.push_back(current);
103 
104  //std::cerr << "[DEBUG] pushed: "
105  // << static_cast<char>(current)
106  // << std::endl;
107 
108  offset++;
109  stream.seekg(offset);
110  current = stream.peek();
111  }
112  }
113 
114 // do {
115 // do {
116 // current = stream.peek();
117 // offset++;
118 // stream.seekg(offset);
119 //
120 // //std::cerr << "[DEBUG] read: "
121 // // << static_cast<char>(current)
122 // // << std::endl;
123 //
124 // if ( !isBlankspace(current)
125 // && !isSeparator(current)
126 // && offset <= offend) {
127 //
128 // if( lowercaseMode() ) {
129 // current = tolower( current );
130 // }
131 //
132 // str.push_back(current);
133 //
134 // //std::cerr << "[DEBUG] pushed: "
135 // // << static_cast<char>(current)
136 // // << std::endl;
137 // }
138 // } while ( !isBlankspace(current)
139 // && !isSeparator(current)
140 // && offset < offend);
141 // } while (str.empty() && (offset < offend));
142  } else {
143  std::cerr << "stream is NOT good!" << std::endl;
144  }
145 
146  //std::cerr << "[DEBUG] token: " << str << std::endl;
147 
148  return str;
149 }
150 
152 {
153  return static_cast<double>(offset) / offend;
154 }
155 
bool isBlankspace(const int character) const
Definition: tokenizer.cpp:91
bool lowercaseMode() const
Definition: tokenizer.cpp:86
std::streamoff offset
Definition: tokenizer.h:148
virtual double progress() const
std::streamoff offend
Definition: tokenizer.h:147
virtual bool hasMoreTokens() const
virtual ~ForwardTokenizer()
std::istream & stream
Definition: tokenizer.h:144
bool isSeparator(const int character) const
Definition: tokenizer.cpp:101
std::streamoff offbeg
Definition: tokenizer.h:146
virtual std::string nextToken()
virtual int countTokens()
ForwardTokenizer(std::istream &stream, const std::string blankspaces, const std::string separators)
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
Definition: logger.h:278