X-Git-Url: https://projects.mako.cc/source/wikiq/blobdiff_plain/b685a77fb13d526f5b4b216844a216b0dbab1ed3..dab8600205c04124a3e576722e06fcce1afd4408:/wikiq.c diff --git a/wikiq.c b/wikiq.c index 554e145..ce61af7 100644 --- a/wikiq.c +++ b/wikiq.c @@ -24,7 +24,7 @@ enum elements { enum block { TITLE_BLOCK, REVISION_BLOCK, CONTRIBUTOR_BLOCK, SKIP }; -enum outtype { NORMAL, SIMPLE }; +enum outtype { FULL, SIMPLE }; typedef struct { @@ -38,17 +38,16 @@ typedef struct { char *anon; char *editor; char *editorid; - char *minor; + bool minor; char *comment; char *text; } rev; - char *dropstr; enum elements element; enum block position; enum outtype output_type; -} parseData; +} revisionData; /* free_data and clean_data @@ -57,7 +56,7 @@ typedef struct { * Also, frees memory dynamically allocated to store data. */ static void -clean_data(parseData *data, int title) +clean_data(revisionData *data, int title) { if (title) { data->rev.title = NULL; @@ -70,7 +69,7 @@ clean_data(parseData *data, int title) data->rev.anon = NULL; data->rev.editor = NULL; data->rev.editorid = NULL; - data->rev.minor = NULL; + data->rev.minor = false; data->rev.comment = NULL; data->rev.text = NULL; data->element = UNUSED; @@ -78,7 +77,7 @@ clean_data(parseData *data, int title) } static void -free_data(parseData *data, int title) +free_data(revisionData *data, int title) { if (title) { //printf("freeing article\n"); @@ -92,34 +91,32 @@ free_data(parseData *data, int title) free(data->rev.anon); free(data->rev.editor); free(data->rev.editorid); - free(data->rev.minor); free(data->rev.comment); free(data->rev.text); } -cleanup_revision(parseData *data) { +void cleanup_revision(revisionData *data) { free_data(data, 0); clean_data(data, 0); } -cleanup_article(parseData *data) { +void cleanup_article(revisionData *data) { free_data(data, 1); clean_data(data, 1); } static void -init_data(parseData *data, char *dropstr, int output_type) +init_data(revisionData *data, outtype output_type) { clean_data(data, 1); // sets every element to null... - data->dropstr = dropstr; data->output_type = output_type; } /* for debugging only, prints out the state of the data struct */ static void -print_state(parseData *data) +print_state(revisionData *data) { printf("element = %i\n", data->element); printf("output_type = %i\n", data->output_type); @@ -131,7 +128,7 @@ print_state(parseData *data) printf("anon = %s\n", data->rev.anon); printf("editor = %s\n", data->rev.editor); printf("editorid = %s\n", data->rev.editorid); - printf("minor = %s\n", data->rev.minor); + printf("minor = %s\n", (data->rev.minor ? "1" : "0")); printf("comment = %s\n", data->rev.comment); printf("text = %s\n", data->rev.text); printf("\n"); @@ -158,73 +155,31 @@ write_header() * it is called right before cleanup_revision() and cleanup_article() */ static void -write_row(parseData *data) +write_row(revisionData *data) { - // define temporary variables to hold output values: - char *title, *articleid; - char *revid, *date, *time, *anon, *editor, *editorid; - char *minor, *comment; - char *text; - // perform some simple logic to obtain correct output values - - if (data->rev.minor == NULL) - minor = "0"; - else minor = data->rev.minor; - - if (data->rev.editor == NULL) - anon = "1"; - else anon = "0"; - - if (data->rev.title == NULL) - title = ""; - else title = data->rev.title; - - if (data->rev.articleid == NULL) - articleid = ""; - else articleid = data->rev.articleid; - - if (data->rev.revid == NULL) - revid = ""; - else revid = data->rev.revid; - - if (data->rev.date == NULL) - date = ""; - else date = data->rev.date; - - if (data->rev.time == NULL) - time = ""; - else time = data->rev.time; - - if (data->rev.editor == NULL) - editor = ""; - else editor = data->rev.editor; - - if (data->rev.editorid == NULL) - editorid = ""; - else editorid = data->rev.editorid; - - if (data->rev.text == NULL) - text = ""; - else text = data->rev.text; - - - if (data->rev.comment == NULL) - comment = ""; - else comment = data->rev.comment; - // TODO: make it so you can specify fields to output // note that date and time are separated by a space, to match postgres's // timestamp format + printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%s\t%s", + (data->rev.title != NULL) ? data->rev.title : "", + (data->rev.articleid != NULL) ? data->rev.articleid : "", + (data->rev.revid != NULL) ? data->rev.revid : "", + (data->rev.date != NULL) ? data->rev.date : "", + (data->rev.time != NULL) ? data->rev.time : "", + (data->rev.editor != NULL) ? "0" : "1", + (data->rev.editor != NULL) ? data->rev.editor : "", + (data->rev.editorid != NULL) ? data->rev.editorid : "", + (data->rev.minor) ? "1" : "0"); switch (data->output_type) { - case NORMAL: - printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%s\t%s\t%s\t%s\n", - title,articleid,revid,date,time,anon,editor,editorid,minor,comment,text); - break; case SIMPLE: - printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%s\t%s\n", - title,articleid,revid,date,time,anon,editor,editorid,minor); + printf("\n"); + break; + case FULL: + printf("\t%s\t%s\n", + (data->rev.comment != NULL) ? data->rev.comment : "", + (data->rev.text != NULL) ? data->rev.text : ""); break; } @@ -247,41 +202,41 @@ static char } char -*append(char *entry, char *new) +*append(char *entry, char *newstr) { char *newbuff; int len; - len = (strlen(entry)+strlen(new))*sizeof(char) + 1; - newbuff = realloc(entry, len); - strcat(newbuff, new); + len = (strlen(entry)+strlen(newstr))*sizeof(char) + 1; + newbuff = (char*) realloc(entry, len); + strcat(newbuff, newstr); return newbuff; } char -*cache(char *entry, char *new) +*cache(char *entry, char *newstr) { char *newbuff; int len; - len = strlen(new)*sizeof(char) + 1; // include space for the '\0' ! - newbuff = malloc(len); - strcpy(newbuff,new); + len = strlen(newstr)*sizeof(char) + 1; // include space for the '\0' ! + newbuff = (char*) malloc(len); + strcpy(newbuff,newstr); return newbuff; } char -*store(char *entry, char *new) +*store(char *entry, char *newstr) { char *newbuff; if (entry == NULL) - newbuff = cache(entry, new); + newbuff = cache(entry, newstr); else - newbuff = append(entry, new); + newbuff = append(entry, newstr); return newbuff; } void -split_timestamp(parseData *data) +split_timestamp(revisionData *data) { char *t = data->rev.timestamp; char date_buffer[DATE_LENGTH+1]; @@ -327,8 +282,9 @@ contains(char *s, char *t) } static void -charhndl(parseData *data, char *s, int len) +charhndl(void* vdata, const XML_Char* s, int len) { + revisionData* data = (revisionData*) vdata; if (data->element != UNUSED && data->position != SKIP) { char t[len]; strncpy(t,s,len); @@ -338,10 +294,6 @@ charhndl(parseData *data, char *s, int len) { data->rev.title = store(data->rev.title, t); // skip any articles with bad characters in their titles - if (contains(t, data->dropstr)) { - data->position = SKIP; - //printf("found a baddie\n"); - } break; } case ARTICLEID: @@ -373,10 +325,14 @@ charhndl(parseData *data, char *s, int len) */ case COMMENT: // printf("row: comment is %s\n", t); - data->rev.comment = store(data->rev.comment, t); + if (data->output_type == FULL) { + data->rev.comment = store(data->rev.comment, t); + } break; case TEXT: - data->rev.text = store(data->rev.text, t); + if (data->output_type == FULL) { + data->rev.text = store(data->rev.text, t); + } break; default: break; } @@ -384,8 +340,9 @@ charhndl(parseData *data, char *s, int len) } static void -start(parseData *data, const char *name, const char **attr) +start(void* vdata, const XML_Char* name, const XML_Char** attr) { + revisionData* data = (revisionData*) vdata; if (strcmp(name,"title") == 0) { cleanup_article(data); // cleans up data from last article @@ -414,7 +371,7 @@ start(parseData *data, const char *name, const char **attr) // minor tag has no character data, so we parse here else if (strcmp(name,"minor") == 0) { data->element = MINOR; - data->rev.minor = store(data->rev.minor, "1"); + data->rev.minor = true; } else if (strcmp(name,"timestamp") == 0) data->element = TIMESTAMP; @@ -442,8 +399,9 @@ start(parseData *data, const char *name, const char **attr) static void -end(parseData *data, const char *name) +end(void* vdata, const XML_Char* name) { + revisionData* data = (revisionData*) vdata; if (strcmp(name, "revision") == 0 && data->position != SKIP) { write_row(data); // crucial... :) cleanup_revision(data); // also crucial @@ -472,24 +430,20 @@ int main(int argc, char *argv[]) { - char *dropstr = ""; enum outtype output_type; int dry_run = 0; // in "simple" output, we don't print text and comments output_type = SIMPLE; char c; - while ((c = getopt(argc, argv, "hr:sd")) != -1) + while ((c = getopt(argc, argv, "ht")) != -1) switch (c) { - case 'r': - dropstr = optarg; - break; case 'd': dry_run = 1; break; case 't': - output_type = NORMAL; + output_type = FULL; break; case 'h': print_usage(argv); @@ -499,7 +453,6 @@ main(int argc, char *argv[]) if (dry_run) { // lets us print initialization options printf("simple_output = %i\n", output_type); - printf("dropstr = %s\n", dropstr); exit(1); } @@ -507,17 +460,21 @@ main(int argc, char *argv[]) XML_Parser parser = XML_ParserCreate(NULL); // initialize the user data struct which is passed to callback functions - parseData data; + revisionData data; // initialize the elements of the struct to default values - init_data(&data, dropstr, output_type); + init_data(&data, output_type); // makes the parser pass "data" as the first argument to every callback XML_SetUserData(parser, &data); + void (*startFnPtr)(void*, const XML_Char*, const XML_Char**) = start; + void (*endFnPtr)(void*, const XML_Char*) = end; + void (*charHandlerFnPtr)(void*, const XML_Char*, int) = charhndl; + // sets start and end to be the element start and end handlers - XML_SetElementHandler(parser, (void *) start, (void *) end); + XML_SetElementHandler(parser, startFnPtr, endFnPtr); // sets charhndl to be the callback for raw character data - XML_SetCharacterDataHandler(parser, (void *) charhndl); + XML_SetCharacterDataHandler(parser, charHandlerFnPtr); int done; char buf[BUFSIZ];