After figuring out the problem with the old dasBlog permalinks I had to figure out a way to convert all existing links in my blog to the new format. Lately whenever I need a script I try and take the opportunity to learn a bit of Python, so it took an hour or two to write the conversion script.
Here it is; if you want to use this for your own copy of dasBlog, change the "domain" global variable to wherever your blog is located and run this from your ~/Content directory (you can also download the script here):
#!/usr/local/bin/python
#
# convert_permalinks.py
# Quick and dirty permalink converter for dasBlog content files
#
# Tomer Gabel, 22 June 2008
# http://www.tomergabel.com
#
# This code is placed in the public domain (see http://creativecommons.org/licenses/publicdomain/)
from __future__ import with_statement
import os
import glob
import re
import urllib
# Static constants
domain = 'tomergabel.com'
href_lookup = re.compile( 'href="(http:\/\/(www\.)?' + re.escape( domain ) + '/[^"]*\+[^"]*?)"' )
# Globals
conversion_map = {}
# Takes a URL and removes all offensive characters. Tests the new URL for validity (anything other than a 404 error is considered valid).
# Returns a tuple with the converted URL and a boolean flag indicating whether the converted URL is valid or not.
def convert( url ):
new_url = url.replace( "+", "" )
# Check URL validity
valid = True
try:
resp = urllib.urlopen( new_url )
resp.close()
except:
valid = False
return [ new_url, valid ]
# Processes the source file, converts all URLs therein and writes it to the target file.
def process( source_file, target_file ):
with open( source_file, "r" ) as input:
source_text = input.read()
conv_text = source_text
match_found = False
for matcher in href_lookup.finditer( source_text ):
if ( matcher != None ):
match_found = True
original_url = matcher.group( 1 )
print "\tConverting permalink " + original_url
if not conversion_map.has_key( original_url ):
conversion_map[ original_url ] = convert( original_url )
conversion = conversion_map[ original_url ]
if conversion[ 1 ]:
print "\tConversion successful, new URL: " + conversion[ 0 ]
conv_text = conv_text.replace( original_url, conversion[ 0 ] )
else:
print "\tConversion failed!"
# Write out the target file
if match_found:
with open( target_file, "w" ) as output:
output.write( conv_text )
# Entry point
for file_name in glob.iglob( "*.xml" ):
print "Processing " + file_name
process( file_name, file_name + ".conv" )