version used to do the (imperfect) export/import of revealing errors

author Benjamin Mako Hill <mako@atdot.cc>

Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)

committer Benjamin Mako Hill <mako@atdot.cc>

Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)
author Benjamin Mako Hill <mako@atdot.cc>
Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)
committer Benjamin Mako Hill <mako@atdot.cc>
Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)
diff --git a/pyblosxom2wxr.sh b/pyblosxom2wxr.sh

index c3218d7d7d77c9b955f3368895f02cab74b092fd..4e91ef54aa51e07f78a12b83c27a250176e504a4 100755 (executable)
--- a/pyblosxom2wxr.sh
+++ b/pyblosxom2wxr.sh
@@ -26,6 +26,10 @@ if [[ $# = "0" || $1 = "--help" ]]; then
    exit 1
  fi
  
+# category
+all_categories=""
+all_tags=""
+
  # comment id sequence number
  commentid=1
  
@@ -52,46 +56,23 @@ cat << EOF
    <wp:wxr_version>1.0</wp:wxr_version>
    <wp:base_site_url></wp:base_site_url>
    <wp:base_blog_url></wp:base_blog_url>
-  <wp:category></wp:category>
-
  EOF
  
-# convert comments
+
+# convert blog posts
  for file in "$@"; do
+  creator="mako"
    fullname=`basename "$file" .txt`
    dir=`dirname "$file"`
+  commentdir="$(dirname "$file")/comments"
    title=`head -n 1 "$file"`
+  type="post"
  
-  # TODO: make this easier to customize
-  date_re="[0-9]{4}-[0-9]{2}-[0-9]{2}"
-  time_re="([0-9]{2})-([0-9]{2})"
-
-  # my pyblosxom posts have a date prefix, e.g. 2010-03-13. my pages don't.
-  if [[ "$fullname" =~ ^${date_re} ]]; then
-    type=post
-    name=${fullname:11}
-    datestr="${fullname::10} 00:00:00 -0800"
-  else
-    type=page
-    name=${fullname}
-
-    timestamp_file=${dir}/../timestamps
-    datestr=`grep --max-count=1 -E \
-               "^${date_re}-${time_re} (.+/)?${fullname}.txt\$" ${timestamp_file} | \
-        cut -f1 -d' ' | \
-        sed -r "s/-${time_re}\$/ \1:\2 -0500/"`
-  
-    if [[ ${datestr} == '' ]]; then
-      datestr=`stat --format=%y "$file"`
-    fi
-  fi
-
-  pubDate=`date -uR -d "$datestr"`
-  date=`date -d "$datestr" +'%F %T'`
-  dateGmt=`date -u -d "$datestr" +'%F %T'`
+  name=${fullname}
  
-  # TODO: category support
-  category="uncategorized"
+  pubDate=$(date -uR -r $file)
+  date=$(date -r "$file" +'%F %T')
+  dateGmt=$(date -u -r "$file" +'%F %T')
  
    if grep -q ']]>' "$file"; then
      echo "WARNING: $file contains the string ]]>, which makes its CDATA " \
@@ -102,10 +83,9 @@ for file in "$@"; do
  <item>
    <title>${title}</title>
    <pubDate>${pubDate}</pubDate>
-  <category domain="category" nicename="$category">$category</category>
    <guid isPermaLink="true">/${fullname}</guid>
    <description></description>
-  <content:encoded><![CDATA[`tail -n +3 "$file"`]]></content:encoded>
+  <content:encoded><![CDATA[$(tail -n +4 "$file" | markdown_py)]]></content:encoded>
    <wp:post_date>${date}</wp:post_date>
    <wp:post_date_gmt>${dateGmt}</wp:post_date_gmt>
    <wp:comment_status>open</wp:comment_status>
@@ -117,16 +97,27 @@ for file in "$@"; do
    <wp:post_type>${type}</wp:post_type>
    <wp:post_password></wp:post_password>
    <wp:is_sticky>0</wp:is_sticky>
+  <dc:creator>${creator}</dc:creator>
  EOF
+ 
+  # split the tags
+  raw_tags=$(grep '#tags' "$file" |perl -pe 's/.tags // '|tr ',' "\n")
+  for tag in $raw_tags; do
+    echo "<category domain=\"post_tag\" nicename=\"${tag}\">${tag}</category>"
+    #echo "<category domain=\"category\" nicename=\"$category\">$category</category>"
+  done;
+  all_tags="$all_tags$raw_tags"
+
+  # category="uncategorized"
+  # <category domain="category" nicename="$category">$category</category>
  
    # other possible elements:
  #  <link>/${fullname}</link>
  #  <wp:post_id></wp:post_id>
  #  <excerpt:encoded></excerpt:encoded>
-#  <dc:creator>${creator}</dc:creator>
  
  
-  for cmtfile in ${dir}/"$fullname"-{all,[0-9]*}.cmt; do
+  for cmtfile in ${commentdir}/"$fullname"-{all,[0-9]*}.cmt; do
      if [[ -e "$cmtfile" ]]; then
        set +e  # because the perl script below uses a non-zero exit code
        tail -q -n +2 "$cmtfile" | \
@@ -163,6 +154,22 @@ EOF
  EOF
  done
  
+index=2
+#for category in $(echo "$all_categories"|sort|uniq); do
+#    cat << EOF
+#    <wp:category><wp:term_id>${index}</wp:term_id><wp:category_nicename>${category}</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[${category}]]></wp:cat_name></wp:category>
+#EOF
+#    index=$(expr $index + 1)
+#done
+
+for tag in $(echo "$all_tags"|sort|uniq); do
+    cat << EOF
+    <wp:tag><wp:term_id>${index}</wp:term_id><wp:tag_slug>${tag}</wp:tag_slug><wp:tag_name>${tag}</wp:tag_name></wp:tag>
+EOF
+    index=$(expr $index + 1)
+done
+
+
  # output footer
  cat << EOF
  </channel>
author	Benjamin Mako Hill <mako@atdot.cc>
	Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)
committer	Benjamin Mako Hill <mako@atdot.cc>
	Thu, 10 Jan 2013 22:34:13 +0000 (17:34 -0500)