add two modified version and documentation
[pyblosxom2wxr] / pyblosxom2wxr.sh
diff --git a/pyblosxom2wxr.sh b/pyblosxom2wxr.sh
deleted file mode 100755 (executable)
index 4e91ef5..0000000
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/bin/bash
-#
-# pyblosxom2wxr.sh
-# http://snarfed.org/pyblosxom2wxr
-# Ryan Barrett <pyblosxom2wxr@ryanb.org>
-# Version 0.2. This script is public domain.
-#
-# This script converts PyBlosxom posts and comments into a WXR (WordPress
-# eXtensible RSS) XML file that can be imported into a WordPress blog.
-#
-# Example usage:
-#
-# $ ./pyblosxom2wxr.sh post1.txt post2.txt > posts.xml
-#
-# pyblosxom2wxr has been tested with PyBlosxom 1.4 and WordPress 2.9 and 3.0. It
-# should work with other versions too, but your mileage may vary.
-#
-# TODO: comment ordering
-
-# exit on error
-set -e
-
-# check args
-if [[ $# = "0" || $1 = "--help" ]]; then
-  echo 'Usage: pyblosxom2wxr.sh FILES...'
-  exit 1
-fi
-
-# category
-all_categories=""
-all_tags=""
-
-# comment id sequence number
-commentid=1
-
-# output header
-now=`date --rfc-3339=seconds`
-cat << EOF
-<?xml version="1.0" encoding="UTF-8"?> 
-
-<!-- generator="pyblosxom2wxr/1.0" created="${now}" -->
-<rss version="2.0"
-  xmlns:excerpt="http://wordpress.org/export/1.0/excerpt/"
-  xmlns:content="http://purl.org/rss/1.0/modules/content/"
-  xmlns:wfw="http://wellformedweb.org/CommentAPI/"
-  xmlns:dc="http://purl.org/dc/elements/1.1/"
-  xmlns:wp="http://wordpress.org/export/1.0/">
-
-<channel>
-  <title></title>
-  <link></link>
-  <description></description>
-  <pubDate></pubDate>
-  <generator>http://snarfed.org/pyblosxom2wxr?v=1.0</generator>
-  <language>en</language>
-  <wp:wxr_version>1.0</wp:wxr_version>
-  <wp:base_site_url></wp:base_site_url>
-  <wp:base_blog_url></wp:base_blog_url>
-EOF
-
-
-# convert blog posts
-for file in "$@"; do
-  creator="mako"
-  fullname=`basename "$file" .txt`
-  dir=`dirname "$file"`
-  commentdir="$(dirname "$file")/comments"
-  title=`head -n 1 "$file"`
-  type="post"
-
-  name=${fullname}
-
-  pubDate=$(date -uR -r $file)
-  date=$(date -r "$file" +'%F %T')
-  dateGmt=$(date -u -r "$file" +'%F %T')
-
-  if grep -q ']]>' "$file"; then
-    echo "WARNING: $file contains the string ]]>, which makes its CDATA " \
-         "section invalid. WordPress handles this ok, but still, heads up." 1>&2
-  fi
-
-  cat << EOF
-<item>
-  <title>${title}</title>
-  <pubDate>${pubDate}</pubDate>
-  <guid isPermaLink="true">/${fullname}</guid>
-  <description></description>
-  <content:encoded><![CDATA[$(tail -n +4 "$file" | markdown_py)]]></content:encoded>
-  <wp:post_date>${date}</wp:post_date>
-  <wp:post_date_gmt>${dateGmt}</wp:post_date_gmt>
-  <wp:comment_status>open</wp:comment_status>
-  <wp:ping_status>open</wp:ping_status>
-  <wp:post_name>${name}</wp:post_name>
-  <wp:status>publish</wp:status>
-  <wp:post_parent>0</wp:post_parent>
-  <wp:menu_order>0</wp:menu_order>
-  <wp:post_type>${type}</wp:post_type>
-  <wp:post_password></wp:post_password>
-  <wp:is_sticky>0</wp:is_sticky>
-  <dc:creator>${creator}</dc:creator>
-EOF
-  # split the tags
-  raw_tags=$(grep '#tags' "$file" |perl -pe 's/.tags // '|tr ',' "\n")
-  for tag in $raw_tags; do
-    echo "<category domain=\"post_tag\" nicename=\"${tag}\">${tag}</category>"
-    #echo "<category domain=\"category\" nicename=\"$category\">$category</category>"
-  done;
-  all_tags="$all_tags$raw_tags"
-
-  # category="uncategorized"
-  # <category domain="category" nicename="$category">$category</category>
-
-  # other possible elements:
-#  <link>/${fullname}</link>
-#  <wp:post_id></wp:post_id>
-#  <excerpt:encoded></excerpt:encoded>
-
-
-  for cmtfile in ${commentdir}/"$fullname"-{all,[0-9]*}.cmt; do
-    if [[ -e "$cmtfile" ]]; then
-      set +e  # because the perl script below uses a non-zero exit code
-      tail -q -n +2 "$cmtfile" | \
-        sed -r '
-          s/^<item>$/<wp:comment>\n<wp:comment_id>X<\/wp:comment_id>/;
-          s/^<\/item>$/<wp:comment_approved>1<\/wp:comment_approved>\n<\/wp:comment>/;
-          s/<(\/)?author>/<\1wp:comment_author>/g;
-          s/<(\/)?link>/<\1wp:comment_author_url>/g;
-          s/<(\/)?ipaddress>/<\1wp:comment_author_IP>/g;
-          s/<(\/)?description>/<\1wp:comment_content>/g;
-          s/^<(ajax|cmt_date|email|openid_url|parent|post|secretToken|source|title|w3cdate)>.+$//;
-          s/^<\/?items>$//;
-          /^$/d' | \
-        perl -pe 'use HTML::Entities; decode_entities($_)' | \
-        perl -pe 'use POSIX qw(strftime);
-                  s/^<pubDate>(.+)<\/pubDate>$/"<wp:comment_date>" . (strftime "%Y-%m-%d %H:%M:%S", localtime($1)) . "<\/wp:comment_date>"/e;' | \
-        perl -e '
-          my $id = '${commentid}';
-          while (<STDIN>) {
-             s/^(<wp:comment_id>)X(<\/wp:comment_id>)$/$1 . $id++ . $2/e;
-             print $_;
-          }
-          exit $id - '${commentid}';'
-      # TODO: this is a hack since exit codes are only 8 bits unsigned.
-      # this will break on posts with >255 comments.
-      let commentid+=$?
-      set -e
-    fi
-  done
-
-  cat << EOF
-</item>
-
-EOF
-done
-
-index=2
-#for category in $(echo "$all_categories"|sort|uniq); do
-#    cat << EOF
-#    <wp:category><wp:term_id>${index}</wp:term_id><wp:category_nicename>${category}</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[${category}]]></wp:cat_name></wp:category>
-#EOF
-#    index=$(expr $index + 1)
-#done
-
-for tag in $(echo "$all_tags"|sort|uniq); do
-    cat << EOF
-    <wp:tag><wp:term_id>${index}</wp:term_id><wp:tag_slug>${tag}</wp:tag_slug><wp:tag_name>${tag}</wp:tag_name></wp:tag>
-EOF
-    index=$(expr $index + 1)
-done
-
-
-# output footer
-cat << EOF
-</channel>
-</rss>
-EOF

Benjamin Mako Hill || Want to submit a patch?