presage  0.9.2~beta
tokenizer.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "tokenizer.h"
26 
28  std::istream& is,
29  const std::string blankspaces,
30  const std::string separators
31 )
32  : stream(is),
33  lowercase(false)
34 {
35  // this should be changed to deal with a !good() stream
36  // appropriately
37  //assert(stream.good());
38 
39  offset = stream.tellg();
40  sstate = stream.rdstate();
41 
43 
44  stream.seekg(0, std::ios::end);
45  offend = stream.tellg();
46  stream.seekg(0, std::ios::beg);
47  offbeg = stream.tellg();
48 
51 }
52 
54 {
55  // reset stream state to enable repeatability
56  // (see reverseTokenizerTest::testRepeatability())
57  stream.setstate(sstate);
58  stream.clear();
59 }
60 
61 void Tokenizer::blankspaceChars(const std::string chars)
62 {
63  blankspaces = chars;
64 }
65 
66 std::string Tokenizer::blankspaceChars() const
67 {
68  return blankspaces;
69 }
70 
71 void Tokenizer::separatorChars(const std::string chars)
72 {
73  separators = chars;
74 }
75 
76 std::string Tokenizer::separatorChars() const
77 {
78  return separators;
79 }
80 
81 void Tokenizer::lowercaseMode(const bool value)
82 {
83  lowercase = value;
84 }
85 
87 {
88  return lowercase;
89 }
90 
91 bool Tokenizer::isBlankspace(const int character) const
92 {
93  std::string::size_type ret = blankspaces.find(character);
94  if (ret == std::string::npos) {
95  return false;
96  } else {
97  return true;
98  }
99 }
100 
101 bool Tokenizer::isSeparator(const int character) const
102 {
103  std::string::size_type ret = separators.find(character);
104  if (ret == std::string::npos) {
105  return false;
106  } else {
107  return true;
108  }
109 }
Tokenizer(std::istream &stream, const std::string blankspaces, const std::string separators)
Definition: tokenizer.cpp:27
bool isBlankspace(const int character) const
Definition: tokenizer.cpp:91
bool lowercaseMode() const
Definition: tokenizer.cpp:86
std::string separators
Definition: tokenizer.h:155
std::string separatorChars() const
Definition: tokenizer.cpp:76
std::streamoff offset
Definition: tokenizer.h:148
std::streamoff offend
Definition: tokenizer.h:147
std::istream & stream
Definition: tokenizer.h:144
bool isSeparator(const int character) const
Definition: tokenizer.cpp:101
std::streamoff offbeg
Definition: tokenizer.h:146
std::string blankspaceChars() const
Definition: tokenizer.cpp:66
std::string blankspaces
Definition: tokenizer.h:154
bool lowercase
Definition: tokenizer.h:157
virtual ~Tokenizer()
Definition: tokenizer.cpp:53
std::ios::iostate sstate
Definition: tokenizer.h:145