presage  0.9.1
contextTracker.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "contextTracker.h"
26 #include "../utility.h"
27 #include "../predictorRegistry.h"
28 #include "../tokenizer/forwardTokenizer.h"
29 
30 #include <stdlib.h> // for atoi()
31 
32 const char* ContextTracker::LOGGER = "Presage.ContextTracker.LOGGER";
33 const char* ContextTracker::SLIDING_WINDOW_SIZE = "Presage.ContextTracker.SLIDING_WINDOW_SIZE";
34 const char* ContextTracker::LOWERCASE_MODE = "Presage.ContextTracker.LOWERCASE_MODE";
35 const char* ContextTracker::ONLINE_LEARNING = "Presage.ContextTracker.ONLINE_LEARNING";
36 
38  PredictorRegistry* registry,
39  PresageCallback* callback,
40  const char wChars[],
41  const char tChars[],
42  const char bChars[],
43  const char cChars[])
44  : wordChars (wChars),
45  separatorChars (tChars),
46  blankspaceChars(bChars),
47  controlChars (cChars),
48  predictorRegistry (registry),
49  logger ("ContextTracker", std::cerr),
50  //tokenizer (pastStream, blankspaceChars, separatorChars),
51  lowercase_mode (true),
52  dispatcher (this)
53 {
54  if (callback) {
56  } else {
57  throw new PresageException(PRESAGE_INVALID_CALLBACK_ERROR, "Invalid callback object");
58  }
59 
65 
66  // set pointer to this context tracker in predictor registry so that
67  // predictors can be constructed when next iterator is requested
68  //
69  if (predictorRegistry) {
71  }
72 
73  // build dispatch map
78 }
79 
81 {
82  delete contextChangeDetector;
83 }
84 
85 void ContextTracker::set_logger (const std::string& value)
86 {
87  logger << setlevel (value);
88  logger << INFO << "LOGGER: " << value << endl;
89 }
90 
91 void ContextTracker::set_sliding_window_size (const std::string& value)
92 {
94  logger << INFO << "SLIDING_WINDOWS_SIZE: " << value << endl;
95 }
96 
97 void ContextTracker::set_lowercase_mode (const std::string& value)
98 {
100  logger << INFO << "LOWERCASE_MODE: " << value << endl;
101 }
102 
103 void ContextTracker::set_online_learning(const std::string& value)
104 {
106  logger << INFO << "ONLINE_LEARNING: " << value << endl;
107 }
108 
110 {
112  if (new_callback) {
113  context_tracker_callback = new_callback;
114  }
115  return result;
116 }
117 
122 {
124 }
125 
127 {
128  // detect change that needs to be learned
129  std::string change = contextChangeDetector->change(getPastStream());
130 
131  if (online_learning)
132  {
133  learn (change);
134  }
135 
136  // update sliding window
138 }
139 
140 void ContextTracker::learn(const std::string& text) const
141 {
142  logger << INFO << "learn(): text: " << text << endl;
143 
144  std::stringstream stream_to_learn(text);
145 
146  // split stream up into tokens
147  std::vector<std::string> tokens;
148  ForwardTokenizer tok(stream_to_learn,
152  logger << INFO << "learn(): tokenized change: ";
153  while (tok.hasMoreTokens()) {
154  std::string token = tok.nextToken();
155  tokens.push_back(token);
156  logger << INFO << token << '|';
157  }
158  logger << INFO << endl;
159 
160  if (! tokens.empty()) {
161  // remove prefix (partially entered token or empty token)
162  tokens.pop_back();
163  }
164 
165  if ((logger << INFO).shouldLog())
166  {
167  logger << "learn(): sanitized change: ";
168  for (std::vector<std::string>::const_iterator it = tokens.begin();
169  it != tokens.end();
170  it++) {
171  logger << *it << '|';
172  }
173  logger << endl;
174  }
175 
176  // time to learn
178  Predictor* predictor = 0;
179 
180  while (it.hasNext()) {
181  predictor = it.next();
182  predictor->learn(tokens);
183  }
184 }
185 
186 std::string ContextTracker::getPrefix() const
187 {
188  return getToken(0);
189 }
190 
191 std::string ContextTracker::getToken(const int index) const
192 {
193  std::stringstream pastStringStream(context_tracker_callback->get_past_stream());
194  ReverseTokenizer tokenizer(pastStringStream, blankspaceChars, separatorChars);
195  tokenizer.lowercaseMode(lowercase_mode);
196 
197  std::string token;
198  int i = 0;
199  while (tokenizer.hasMoreTokens() && i <= index) {
200  token = tokenizer.nextToken();
201  i++;
202  }
203  if (i <= index) {
204  // in case the index points too far back
205  token = "";
206  }
207  return token;
208 
210 // "a b c"
211 // 2 1 0
212 // 0 1 2
213 // 1 2 3
214 //
215 // ForwardTokenizer tokenizer(pastStream, blankspaceChars, separatorChars);
216 // tokenizer.lowercaseMode(lowercase_mode);
217 // std::string result;
218 // int tokens = tokenizer.countTokens();
219 // // why oh why is this clear() required to get it to work???
220 // pastStream.clear();
221 // int j = 0;
222 // while (tokenizer.hasMoreTokens() && j < tokens - index) {
223 // result = tokenizer.nextToken();
224 // j++;
225 //
226 // std::cerr << "ContextTracker::getToken() current token: " << result << std::endl;
227 // }
228 // return result;
229 }
230 
231 std::string ContextTracker::getExtraTokenToLearn(const int index, const std::vector<std::string>& change) const
232 {
233  //logger << DEBUG
234  // << "past_stream : " << getPastStream() << endl
235  // << "change : " << contextChangeDetector->change(getPastStream()) << endl
236  // << "sliding_window: " << contextChangeDetector->get_sliding_window() + "\n" << endl;
237 
238 
239  // Extra tokens to learn are to be found in (past_stream - change)
240  //
241  // The change tokens are tokens that have not been seen or learnt
242  // before.
243  //
244  // The extra tokens to learn are tokens that have been seen and
245  // learn before, but that we need to reuse to fill out the n-gram
246  // of required cardinality that we are about to learn.
247  //
248  // To find the extra tokens to learn, we use the size of tokenized
249  // change vector to offset the index and extract the extra tokens
250  // to learn from the past stream.
251  //
252  // For example:
253  // past_stream : "The quick brown fox jumped over the "
254  // change : |over|the|
255  // extra_tokens: |The|quick|brown|fox|jumped|
256  //
257  return getToken(index + change.size());
258 }
259 
261 {
263 }
264 
265 std::string ContextTracker::getPastStream() const
266 {
267  std::string result = context_tracker_callback->get_past_stream();
268  return result;
269 }
270 
271 bool ContextTracker::isCompletionValid(const std::string& completion) const
272 {
273  bool result = false;
274 
275  std::string prefix = getPrefix();
276  prefix = Utility::strtolower(prefix); // no need to be case sensitive
277  if (completion.find(prefix) == 0) {
278  result = true;
279  }
280 
281  return result;
282 }
283 
284 bool ContextTracker::isWordChar(const char c) const
285 {
286  if(wordChars.find(c, 0) != std::string::npos)
287  return true;
288  else
289  return false;
290 }
291 
292 bool ContextTracker::isSeparatorChar(const char c) const
293 {
294  if(separatorChars.find(c, 0) != std::string::npos)
295  return true;
296  else
297  return false;
298 }
299 
300 bool ContextTracker::isBlankspaceChar(const char c) const
301 {
302  if(blankspaceChars.find(c, 0) != std::string::npos)
303  return true;
304  else
305  return false;
306 }
307 
308 bool ContextTracker::isControlChar(const char c) const
309 {
310  if(controlChars.find(c, 0) != std::string::npos)
311  return true;
312  else
313  return false;
314 }
315 
316 std::string ContextTracker::getWordChars() const
317 {
318  return wordChars;
319 }
320 
322 {
323  return separatorChars;
324 }
325 
327 {
328  return blankspaceChars;
329 }
330 
332 {
333  return controlChars;
334 }
335 
336 std::string ContextTracker::toString() const
337 {
339 }
340 
341 void ContextTracker::update (const Observable* variable)
342 {
343  logger << DEBUG << "Notification received: "
344  << variable->get_name () << " - " << variable->get_value () << endl;
345 
346  dispatcher.dispatch (variable);
347 }
ContextTracker::getToken
std::string getToken(const int) const
Definition: contextTracker.cpp:191
ContextTracker::getSeparatorChars
std::string getSeparatorChars() const
Definition: contextTracker.cpp:321
ReverseTokenizer::hasMoreTokens
virtual bool hasMoreTokens() const
Definition: reverseTokenizer.cpp:69
ContextTracker::getWordChars
std::string getWordChars() const
Definition: contextTracker.cpp:316
ContextTracker::set_online_learning
void set_online_learning(const std::string &value)
Definition: contextTracker.cpp:103
ContextTracker::getFutureStream
std::string getFutureStream() const
Definition: contextTracker.cpp:260
Tokenizer::lowercaseMode
void lowercaseMode(const bool)
Definition: tokenizer.cpp:81
ContextTracker::update
void update()
Definition: contextTracker.cpp:126
ContextTracker::getControlChars
std::string getControlChars() const
Definition: contextTracker.cpp:331
ContextTracker::callback
const PresageCallback * callback(const PresageCallback *callback)
Definition: contextTracker.cpp:109
ForwardTokenizer::hasMoreTokens
virtual bool hasMoreTokens() const
Definition: forwardTokenizer.cpp:61
ContextTracker::dispatcher
Dispatcher< ContextTracker > dispatcher
Definition: contextTracker.h:236
contextTracker.h
ContextTracker::isSeparatorChar
bool isSeparatorChar(const char) const
Definition: contextTracker.cpp:292
Observable::get_name
virtual std::string get_name() const =0
PresageCallback::get_past_stream
virtual std::string get_past_stream() const =0
Dispatcher::map
void map(Observable *var, const mbr_func_ptr_t &ptr)
Definition: dispatcher.h:62
ContextTracker::getPrefix
std::string getPrefix() const
Definition: contextTracker.cpp:186
ReverseTokenizer::nextToken
virtual std::string nextToken()
Definition: reverseTokenizer.cpp:80
ContextTracker::set_logger
void set_logger(const std::string &value)
Definition: contextTracker.cpp:85
ContextTracker::contextChange
bool contextChange()
Definition: contextTracker.cpp:121
Predictor
Definition: predictor.h:46
ForwardTokenizer::nextToken
virtual std::string nextToken()
Definition: forwardTokenizer.cpp:72
Utility::isYes
static bool isYes(const char *)
Definition: utility.cpp:185
PredictorRegistry::Iterator::hasNext
bool hasNext() const
Definition: predictorRegistry.cpp:248
ContextTracker::set_sliding_window_size
void set_sliding_window_size(const std::string &value)
Definition: contextTracker.cpp:91
ContextTracker::getExtraTokenToLearn
std::string getExtraTokenToLearn(const int index, const std::vector< std::string > &change) const
Definition: contextTracker.cpp:231
ContextTracker::isBlankspaceChar
bool isBlankspaceChar(const char) const
Definition: contextTracker.cpp:300
ContextTracker::online_learning
bool online_learning
Definition: contextTracker.h:219
ContextTracker::logger
Logger< char > logger
Definition: contextTracker.h:234
config
std::string config
Definition: presageDemo.cpp:70
Observable::get_value
virtual std::string get_value() const =0
ContextTracker::getPastStream
std::string getPastStream() const
Definition: contextTracker.cpp:265
ContextTracker::blankspaceChars
std::string blankspaceChars
Definition: contextTracker.h:215
PredictorRegistry
Definition: predictorRegistry.h:46
ContextTracker::context_tracker_callback
const PresageCallback * context_tracker_callback
Definition: contextTracker.h:231
ContextTracker::wordChars
std::string wordChars
Definition: contextTracker.h:213
ContextTracker::contextChangeDetector
ContextChangeDetector * contextChangeDetector
Definition: contextTracker.h:233
ContextTracker::separatorChars
std::string separatorChars
Definition: contextTracker.h:214
ContextTracker::LOGGER
static const char * LOGGER
Definition: contextTracker.h:207
ContextChangeDetector
Definition: contextChangeDetector.h:36
ContextTracker::learn
void learn(const std::string &text) const
Learn from text.
Definition: contextTracker.cpp:140
ContextTracker::getBlankspaceChars
std::string getBlankspaceChars() const
Definition: contextTracker.cpp:326
ContextTracker::lowercase_mode
bool lowercase_mode
Definition: contextTracker.h:218
Dispatcher::dispatch
void dispatch(const Observable *var)
Definition: dispatcher.h:73
ContextTracker::controlChars
std::string controlChars
Definition: contextTracker.h:216
PredictorRegistry::Iterator
Definition: predictorRegistry.h:53
ContextTracker::~ContextTracker
~ContextTracker()
Definition: contextTracker.cpp:80
ContextChangeDetector::update_sliding_window
void update_sliding_window(const std::string &str)
Definition: contextChangeDetector.cpp:63
endl
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
Definition: logger.h:278
Utility::strtolower
static char * strtolower(char *)
Definition: utility.cpp:42
PresageException
Definition: presageException.h:67
PredictorRegistry::iterator
Iterator iterator()
Definition: predictorRegistry.cpp:232
ContextChangeDetector::set_sliding_window_size
void set_sliding_window_size(const std::string &str)
Definition: contextChangeDetector.cpp:54
Predictor::learn
virtual void learn(const std::vector< std::string > &change)=0
PresageCallback::get_future_stream
virtual std::string get_future_stream() const =0
Configuration
Definition: configuration.h:36
ContextTracker::isWordChar
bool isWordChar(const char) const
Definition: contextTracker.cpp:284
PredictorRegistry::setContextTracker
void setContextTracker(ContextTracker *ct)
Definition: predictorRegistry.cpp:67
ContextTracker::predictorRegistry
PredictorRegistry * predictorRegistry
Definition: contextTracker.h:232
ContextTracker::isCompletionValid
bool isCompletionValid(const std::string &) const
Definition: contextTracker.cpp:271
ForwardTokenizer
Definition: forwardTokenizer.h:39
ContextTracker::ONLINE_LEARNING
static const char * ONLINE_LEARNING
Definition: contextTracker.h:210
ContextTracker::set_lowercase_mode
void set_lowercase_mode(const std::string &value)
Definition: contextTracker.cpp:97
ReverseTokenizer
Definition: reverseTokenizer.h:39
PRESAGE_INVALID_CALLBACK_ERROR
@ PRESAGE_INVALID_CALLBACK_ERROR
Definition: presageException.h:39
ContextTracker::SLIDING_WINDOW_SIZE
static const char * SLIDING_WINDOW_SIZE
Definition: contextTracker.h:208
Observable
Definition: observable.h:37
ContextTracker::ContextTracker
ContextTracker(Configuration *config, PredictorRegistry *predictorRegistry, PresageCallback *callback, const char[]=DEFAULT_WORD_CHARS, const char[]=DEFAULT_SEPARATOR_CHARS, const char[]=DEFAULT_BLANKSPACE_CHARS, const char[]=DEFAULT_CONTROL_CHARS)
Definition: contextTracker.cpp:37
ContextTracker::isControlChar
bool isControlChar(const char) const
Definition: contextTracker.cpp:308
PresageCallback
Definition: presageCallback.h:66
ContextTracker::LOWERCASE_MODE
static const char * LOWERCASE_MODE
Definition: contextTracker.h:209
setlevel
_SetLevel setlevel(std::string __l)
Manipulator for level.
Definition: logger.h:46
ContextChangeDetector::context_change
bool context_change(const std::string &past_stream) const
Definition: contextChangeDetector.cpp:75
ContextChangeDetector::change
std::string change(const std::string &past_stream) const
Definition: contextChangeDetector.cpp:203
ContextTracker::toString
std::string toString() const
Definition: contextTracker.cpp:336
PredictorRegistry::Iterator::next
Predictor * next()
Definition: predictorRegistry.cpp:255