projects
/
wikiq
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
31741a5
)
change diff regex options from -n and -r to -N and -R
author
Benjamin Mako Hill
<mako@atdot.cc>
Fri, 11 Apr 2014 22:29:40 +0000
(15:29 -0700)
committer
Benjamin Mako Hill
<mako@atdot.cc>
Fri, 11 Apr 2014 22:29:40 +0000
(15:29 -0700)
wikiq.cpp
patch
|
blob
|
history
diff --git
a/wikiq.cpp
b/wikiq.cpp
index dfa7251d5a9e0ea943be334480cc8539fb19094a..0343dd381917bc205863a3337e104c0db924ed5d 100644
(file)
--- a/
wikiq.cpp
+++ b/
wikiq.cpp
@@
-56,9
+56,14
@@
typedef struct {
char *comment;
char *text;
vector<string> last_text_tokens;
char *comment;
char *text;
vector<string> last_text_tokens;
+
+ // title regexes
vector<pcrecpp::RE> title_regexes;
vector<pcrecpp::RE> title_regexes;
+
+ // regexes for looking within diffs
vector<string> diff_regex_names;
vector<pcrecpp::RE> diff_regexes;
vector<string> diff_regex_names;
vector<pcrecpp::RE> diff_regexes;
+
map<string, string> revision_md5; // used for detecting reversions
// track string size of the elements, to prevent O(N^2) processing in charhndl
map<string, string> revision_md5; // used for detecting reversions
// track string size of the elements, to prevent O(N^2) processing in charhndl
@@
-507,8
+512,8
@@
void print_usage(char* argv[]) {
<< endl
<< "options:" << endl
<< " -v verbose mode prints text and comments after each line of tab separated data" << endl
<< endl
<< "options:" << endl
<< " -v verbose mode prints text and comments after each line of tab separated data" << endl
- << " -
n name of the following regex (e.g. -n name -r
\"...\")" << endl
- << " -
r regex to check against additions and deletions
" << endl
+ << " -
N name of the following regex for diffs (e.g. -N name -R
\"...\")" << endl
+ << " -
R regex to check against diffs (i.e., additions and deletions)
" << endl
<< " -t parse revisions only from pages whose titles match regex(es)" << endl
<< endl
<< "Takes a wikimedia data dump XML stream on standard in, and produces" << endl
<< " -t parse revisions only from pages whose titles match regex(es)" << endl
<< endl
<< "Takes a wikimedia data dump XML stream on standard in, and produces" << endl
@@
-549,10
+554,10
@@
main(int argc, char *argv[])
case 'v':
output_type = FULL;
break;
case 'v':
output_type = FULL;
break;
- case '
n
':
+ case '
N
':
diff_regex_name = optarg;
break;
diff_regex_name = optarg;
break;
- case '
r
':
+ case '
R
':
data.diff_regexes.push_back(pcrecpp::RE(optarg, pcrecpp::UTF8()));
data.diff_regex_names.push_back(diff_regex_name);
if (!diff_regex_name.empty()) {
data.diff_regexes.push_back(pcrecpp::RE(optarg, pcrecpp::UTF8()));
data.diff_regex_names.push_back(diff_regex_name);
if (!diff_regex_name.empty()) {
Benjamin Mako Hill
||
Want to submit a patch?