- fprintf(stderr, "usage: <wikimedia dump xml> | %s [options]\n", argv[0]);
- fprintf(stderr, "\n");
- fprintf(stderr, "options:\n");
- fprintf(stderr, " -t print text and comments after each line of tab separated data\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "Takes a wikimedia data dump XML stream on standard in, and produces\n");
- fprintf(stderr, "a tab-separated stream of revisions on standard out:\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "title, articleid, revid, timestamp, anon, editor, editorid, minor, revlength, reventropy, revmd5\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "author: Erik Garrison <erik@hypervolu.me>\n");
+ cerr << "usage: <wikimedia dump xml> | " << argv[0] << "[options]" << endl
+ << endl
+ << "options:" << endl
+ << " -v verbose mode prints text and comments after each line of tab separated data" << endl
+ << " -n name of the following regex for contet (e.g. -n name -r \"...\")" << endl
+ << " -r regex to check against content of the revision" << endl
+ << " -N name of the following regex for diffs (e.g. -N name -R \"...\")" << endl
+ << " -R regex to check against diffs (i.e., additions and deletions)" << endl
+ << " -t parse revisions only from pages whose titles match regex(es)" << endl
+ << endl
+ << "Takes a wikimedia data dump XML stream on standard in, and produces" << endl
+ << "a tab-separated stream of revisions on standard out:" << endl
+ << endl
+ << "title, articleid, revid, timestamp, anon, editor, editorid, minor," << endl
+ << "text_length, text_entropy, text_md5, reversion, additions_size, deletions_size" << endl
+ << ".... and additional fields for each regex executed against add/delete diffs" << endl
+ << endl
+ << "Boolean fields are TRUE/FALSE except in the case of reversion, which is blank" << endl
+ << "unless the article is a revert to a previous revision, in which case, it" << endl
+ << "contains the revision ID of the revision which was reverted to." << endl
+ << endl
+ << "author: Erik Garrison <erik@hypervolu.me>" << endl;