projects
/
wikiq
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
61ee762
)
added code and docs to search for regex within the content of revisions
author
Benjamin Mako Hill
<mako@atdot.cc>
Fri, 11 Apr 2014 22:55:11 +0000
(15:55 -0700)
committer
Benjamin Mako Hill
<mako@atdot.cc>
Sat, 12 Apr 2014 02:52:54 +0000
(19:52 -0700)
wikiq.cpp
patch
|
blob
|
history
diff --git
a/wikiq.cpp
b/wikiq.cpp
index 0343dd381917bc205863a3337e104c0db924ed5d..18259ea836924c24dae060c91177d3829e81e5dd 100644
(file)
--- a/
wikiq.cpp
+++ b/
wikiq.cpp
@@
-60,6
+60,10
@@
typedef struct {
// title regexes
vector<pcrecpp::RE> title_regexes;
// title regexes
vector<pcrecpp::RE> title_regexes;
+ // regexes for checking with revisions
+ vector<string> content_regex_names;
+ vector<pcrecpp::RE> content_regexes;
+
// regexes for looking within diffs
vector<string> diff_regex_names;
vector<pcrecpp::RE> diff_regexes;
// regexes for looking within diffs
vector<string> diff_regex_names;
vector<pcrecpp::RE> diff_regexes;
@@
-269,6
+273,15
@@
write_row(revisionData *data)
}
}
}
}
+ // search the content of the revision for a any of the regexes
+ vector<bool> content_regex_matches;
+ if (!data->content_regexes.empty()) {
+ for (vector<pcrecpp::RE>::iterator r = data->content_regexes.begin(); r != data->content_regexes.end(); ++r) {
+ pcrecpp::RE& content_regex = *r;
+ content_regex_matches.push_back(content_regex.PartialMatch(data->text));
+ }
+ }
+
//vector<string> additions;
//vector<string> deletions;
string additions;
//vector<string> additions;
//vector<string> deletions;
string additions;
@@
-338,6
+351,11
@@
write_row(revisionData *data)
<< (int) additions.size() << "\t"
<< (int) deletions.size();
<< (int) additions.size() << "\t"
<< (int) deletions.size();
+ for (int n = 0; n < data->content_regex_names.size(); ++n) {
+ cout << "\t" << ((!content_regex_matches.empty()
+ && content_regex_matches.at(n)) ? "TRUE" : "FALSE");
+ }
+
for (int n = 0; n < data->diff_regex_names.size(); ++n) {
cout << "\t" << ((!diff_regex_matches_adds.empty() && diff_regex_matches_adds.at(n)) ? "TRUE" : "FALSE")
<< "\t" << ((!diff_regex_matches_dels.empty() && diff_regex_matches_dels.at(n)) ? "TRUE" : "FALSE");
for (int n = 0; n < data->diff_regex_names.size(); ++n) {
cout << "\t" << ((!diff_regex_matches_adds.empty() && diff_regex_matches_adds.at(n)) ? "TRUE" : "FALSE")
<< "\t" << ((!diff_regex_matches_dels.empty() && diff_regex_matches_dels.at(n)) ? "TRUE" : "FALSE");
@@
-512,6
+530,8
@@
void print_usage(char* argv[]) {
<< endl
<< "options:" << endl
<< " -v verbose mode prints text and comments after each line of tab separated data" << endl
<< endl
<< "options:" << endl
<< " -v verbose mode prints text and comments after each line of tab separated data" << endl
+ << " -n name of the following regex for contet (e.g. -n name -r \"...\")" << endl
+ << " -r regex to check against content of the revision" << endl
<< " -N name of the following regex for diffs (e.g. -N name -R \"...\")" << endl
<< " -R regex to check against diffs (i.e., additions and deletions)" << endl
<< " -t parse revisions only from pages whose titles match regex(es)" << endl
<< " -N name of the following regex for diffs (e.g. -N name -R \"...\")" << endl
<< " -R regex to check against diffs (i.e., additions and deletions)" << endl
<< " -t parse revisions only from pages whose titles match regex(es)" << endl
@@
-541,6
+561,7
@@
main(int argc, char *argv[])
output_type = SIMPLE;
char c;
string diff_regex_name;
output_type = SIMPLE;
char c;
string diff_regex_name;
+ string content_regex_name;
// the user data struct which is passed to callback functions
revisionData data;
// the user data struct which is passed to callback functions
revisionData data;
@@
-554,6
+575,16
@@
main(int argc, char *argv[])
case 'v':
output_type = FULL;
break;
case 'v':
output_type = FULL;
break;
+ case 'n':
+ content_regex_name = optarg;
+ break;
+ case 'r':
+ data.content_regexes.push_back(pcrecpp::RE(optarg, pcrecpp::UTF8()));
+ data.content_regex_names.push_back(content_regex_name);
+ if (!content_regex_name.empty()) {
+ content_regex_name.clear();
+ }
+ break;
case 'N':
diff_regex_name = optarg;
break;
case 'N':
diff_regex_name = optarg;
break;
@@
-618,6
+649,17
@@
main(int argc, char *argv[])
<< "deletions_size";
int n = 0;
<< "deletions_size";
int n = 0;
+ if (!data.content_regexes.empty()) {
+ for (vector<pcrecpp::RE>::iterator r = data.content_regexes.begin();
+ r != data.content_regexes.end(); ++r, ++n) {
+ if (data.content_regex_names.at(n).empty()) {
+ cout << "\t" << "regex" << n;
+ } else {
+ cout << "\t" << data.content_regex_names.at(n);
+ }
+ }
+ }
+
if (!data.diff_regexes.empty()) {
for (vector<pcrecpp::RE>::iterator r = data.diff_regexes.begin(); r != data.diff_regexes.end(); ++r, ++n) {
if (data.diff_regex_names.at(n).empty()) {
if (!data.diff_regexes.empty()) {
for (vector<pcrecpp::RE>::iterator r = data.diff_regexes.begin(); r != data.diff_regexes.end(); ++r, ++n) {
if (data.diff_regex_names.at(n).empty()) {
@@
-629,6
+671,7
@@
main(int argc, char *argv[])
}
}
}
}
}
}
+
cout << endl;
// shovel data into the parser
cout << endl;
// shovel data into the parser
Benjamin Mako Hill
||
Want to submit a patch?