+ string text = string(data->text, data->text_size);
+ vector<string> text_tokens;
+ size_t period_pos = 0;
+ size_t paragraph_pos = 0;
+ size_t start = 0;
+ while ((period_pos = text.find(".", period_pos + 1)) != string::npos &&
+ (paragraph_pos = text.find("\n\n", paragraph_pos + 1)) != string::npos) {
+ if (paragraph_pos < period_pos) {
+ text_tokens.push_back(text.substr(start, paragraph_pos - start));
+ start = paragraph_pos;
+ } else {
+ text_tokens.push_back(text.substr(start, period_pos - start));
+ start = period_pos;
+ }
+ }
+
+ vector<string> additions;
+ vector<string> deletions;
+
+ if (data->last_tokens.empty()) {
+ data->last_tokens = text_tokens;
+ } else {
+ // do the diff
+
+ dtl::Diff< string, vector<string> > d(data->last_tokens, text_tokens);
+ //d.onOnlyEditDistance();
+ d.compose();
+
+ vector<pair<string, dtl::elemInfo> > ses_v = d.getSes().getSequence();
+ for (vector<pair<string, dtl::elemInfo> >::iterator sit=ses_v.begin(); sit!=ses_v.end(); ++sit) {
+ switch (sit->second.type) {
+ case dtl::SES_ADD:
+ cout << "ADD: \"" << sit->first << "\"" << endl;
+ additions.push_back(sit->first);
+ break;
+ case dtl::SES_DELETE:
+ cout << "DEL: \"" << sit->first << "\"" << endl;
+ deletions.push_back(sit->first);
+ break;
+ }
+ }
+
+ // apply regex to the diff
+
+
+ data->last_tokens = text_tokens;
+ }
+
+