]> projects.mako.cc - redirect-tools/blob - example/01-extract_redirects_wrapper.sh
e953c6182cf35878ba1bdc01266e5363c7d261c8
[redirect-tools] / example / 01-extract_redirects_wrapper.sh
1 #!/bin/bash
2
3 RUN=$(expr $1 + 1)
4 INDEX=$(printf "%03d" "$RUN")
5
6 CUR_DIR="/nfs/home/B/bhill/condor_jobs/extract_redirects-20140412"
7 DATA_DIR="/nfs/home/B/bhill/shared_space/barnstar"
8 INPUT_FILE=$(find ${DATA_DIR}/wp-enwiki-xml -name '*7z' | sed -n ${RUN}p)
9 OUTPUT_FILE="${DATA_DIR}/wp-enwiki-redir/wp_edits_redir_${INDEX}.tsv.bz2"
10
11 # print material out
12 7za x -so "${INPUT_FILE}" | /usr/local/bin/python2.7 ${CUR_DIR}/extract_redirects.py | bzip2 -c - > ${OUTPUT_FILE}

Benjamin Mako Hill || Want to submit a patch?