version used to do the (imperfect) export/import of revealing errors
authorBenjamin Mako Hill <mako@atdot.cc>
Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)
committerBenjamin Mako Hill <mako@atdot.cc>
Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)
pyblosxom2wxr.sh

index c3218d7..4e91ef5 100755 (executable)
@@ -26,6 +26,10 @@ if [[ $# = "0" || $1 = "--help" ]]; then
   exit 1
 fi
 
+# category
+all_categories=""
+all_tags=""
+
 # comment id sequence number
 commentid=1
 
@@ -52,46 +56,23 @@ cat << EOF
   <wp:wxr_version>1.0</wp:wxr_version>
   <wp:base_site_url></wp:base_site_url>
   <wp:base_blog_url></wp:base_blog_url>
-  <wp:category></wp:category>
-
 EOF
 
-# convert comments
+
+# convert blog posts
 for file in "$@"; do
+  creator="mako"
   fullname=`basename "$file" .txt`
   dir=`dirname "$file"`
+  commentdir="$(dirname "$file")/comments"
   title=`head -n 1 "$file"`
+  type="post"
 
-  # TODO: make this easier to customize
-  date_re="[0-9]{4}-[0-9]{2}-[0-9]{2}"
-  time_re="([0-9]{2})-([0-9]{2})"
-
-  # my pyblosxom posts have a date prefix, e.g. 2010-03-13. my pages don't.
-  if [[ "$fullname" =~ ^${date_re} ]]; then
-    type=post
-    name=${fullname:11}
-    datestr="${fullname::10} 00:00:00 -0800"
-  else
-    type=page
-    name=${fullname}
-
-    timestamp_file=${dir}/../timestamps
-    datestr=`grep --max-count=1 -E \
-               "^${date_re}-${time_re} (.+/)?${fullname}.txt\$" ${timestamp_file} | \
-        cut -f1 -d' ' | \
-        sed -r "s/-${time_re}\$/ \1:\2 -0500/"`
-  
-    if [[ ${datestr} == '' ]]; then
-      datestr=`stat --format=%y "$file"`
-    fi
-  fi
-
-  pubDate=`date -uR -d "$datestr"`
-  date=`date -d "$datestr" +'%F %T'`
-  dateGmt=`date -u -d "$datestr" +'%F %T'`
+  name=${fullname}
 
-  # TODO: category support
-  category="uncategorized"
+  pubDate=$(date -uR -r $file)
+  date=$(date -r "$file" +'%F %T')
+  dateGmt=$(date -u -r "$file" +'%F %T')
 
   if grep -q ']]>' "$file"; then
     echo "WARNING: $file contains the string ]]>, which makes its CDATA " \
@@ -102,10 +83,9 @@ for file in "$@"; do
 <item>
   <title>${title}</title>
   <pubDate>${pubDate}</pubDate>
-  <category domain="category" nicename="$category">$category</category>
   <guid isPermaLink="true">/${fullname}</guid>
   <description></description>
-  <content:encoded><![CDATA[`tail -n +3 "$file"`]]></content:encoded>
+  <content:encoded><![CDATA[$(tail -n +4 "$file" | markdown_py)]]></content:encoded>
   <wp:post_date>${date}</wp:post_date>
   <wp:post_date_gmt>${dateGmt}</wp:post_date_gmt>
   <wp:comment_status>open</wp:comment_status>
@@ -117,16 +97,27 @@ for file in "$@"; do
   <wp:post_type>${type}</wp:post_type>
   <wp:post_password></wp:post_password>
   <wp:is_sticky>0</wp:is_sticky>
+  <dc:creator>${creator}</dc:creator>
 EOF
+  # split the tags
+  raw_tags=$(grep '#tags' "$file" |perl -pe 's/.tags // '|tr ',' "\n")
+  for tag in $raw_tags; do
+    echo "<category domain=\"post_tag\" nicename=\"${tag}\">${tag}</category>"
+    #echo "<category domain=\"category\" nicename=\"$category\">$category</category>"
+  done;
+  all_tags="$all_tags$raw_tags"
+
+  # category="uncategorized"
+  # <category domain="category" nicename="$category">$category</category>
 
   # other possible elements:
 #  <link>/${fullname}</link>
 #  <wp:post_id></wp:post_id>
 #  <excerpt:encoded></excerpt:encoded>
-#  <dc:creator>${creator}</dc:creator>
 
 
-  for cmtfile in ${dir}/"$fullname"-{all,[0-9]*}.cmt; do
+  for cmtfile in ${commentdir}/"$fullname"-{all,[0-9]*}.cmt; do
     if [[ -e "$cmtfile" ]]; then
       set +e  # because the perl script below uses a non-zero exit code
       tail -q -n +2 "$cmtfile" | \
@@ -163,6 +154,22 @@ EOF
 EOF
 done
 
+index=2
+#for category in $(echo "$all_categories"|sort|uniq); do
+#    cat << EOF
+#    <wp:category><wp:term_id>${index}</wp:term_id><wp:category_nicename>${category}</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[${category}]]></wp:cat_name></wp:category>
+#EOF
+#    index=$(expr $index + 1)
+#done
+
+for tag in $(echo "$all_tags"|sort|uniq); do
+    cat << EOF
+    <wp:tag><wp:term_id>${index}</wp:term_id><wp:tag_slug>${tag}</wp:tag_slug><wp:tag_name>${tag}</wp:tag_name></wp:tag>
+EOF
+    index=$(expr $index + 1)
+done
+
+
 # output footer
 cat << EOF
 </channel>

Benjamin Mako Hill || Want to submit a patch?