constance

Scripts for generating (an earlier obsolete version of) my personal web site
git clone https://code.djc.id.au/git/constance/
commit 427df9f8882793896a2dcb0f0238a25791f8535b
parent 442ebd5d3475cbebe3f67dd267b3c32292369caa
Author: Dan Callaghan <djc@djc.id.au>
Date:   Sun, 31 Aug 2008 17:02:19 +1000

export tools: use optparse, expanded tabs, re-indented

Diffstat:
Mexport_readinglog_wp.py | 68+++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mexport_wp.py | 163++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
2 files changed, 147 insertions(+), 84 deletions(-)
diff --git a/export_readinglog_wp.py b/export_readinglog_wp.py
@@ -1,28 +1,54 @@
-import os, time, re, urllib, uuid
+import os, time, re, urllib, uuid, sys
 import MySQLdb
 import yaml
 
-def export(f):
-	log_entries = []
+def export(options):
+    log_entries = []
 
-	cn = MySQLdb.connect(host='cruz', user='root', passwd='ELIDED', db='wordpress', use_unicode=True)
+    cn = MySQLdb.connect(host=options.host, user=options.username, 
+            passwd=options.password, db=options.db, use_unicode=True)
 
-	cur = cn.cursor()
-	cur.execute('SELECT id, post_title, post_date, guid FROM wp_posts INNER JOIN wp_term_relationships ON wp_term_relationships.object_id = wp_posts.id WHERE post_status = %s AND term_taxonomy_id = %s ORDER BY post_date ASC', ('publish', 14))
-	for row in cur.fetchall():
-		id, title, date, guid = row
-		entry = {'Title': title, 'Date': date, 'GUID': guid}
-		subcur = cn.cursor()
-		subcur.execute('SELECT meta_key, meta_value FROM wp_postmeta WHERE post_id = %s', (id,))
-		for key, value in subcur.fetchall():
-			if key == '_readinglog_url': entry['URL'] = value
-			elif key == '_readinglog_author': entry['Author'] = value
-			elif key == '_readinglog_rating': entry['Rating'] = float(value)
-		log_entries.append(entry)
-	
-	yaml.add_representer(unicode, lambda dumper, value: dumper.represent_scalar(u'tag:yaml.org,2002:str', value))
-	yaml.dump_all(log_entries, f, default_flow_style=False, allow_unicode=True)
+    cur = cn.cursor()
+    cur.execute('SELECT id, post_title, post_date, guid FROM wp_posts '
+            'INNER JOIN wp_term_relationships ON '
+            'wp_term_relationships.object_id = wp_posts.id '
+            'WHERE post_status = %s AND term_taxonomy_id = %s '
+            'ORDER BY post_date ASC', ('publish', 14))
+    for row in cur.fetchall():
+        id, title, date, guid = row
+        entry = {'Title': title, 'Date': date, 'GUID': guid}
+        subcur = cn.cursor()
+        subcur.execute('SELECT meta_key, meta_value FROM wp_postmeta '
+                'WHERE post_id = %s', (id,))
+        for key, value in subcur.fetchall():
+            if key == '_readinglog_url': entry['URL'] = value
+            elif key == '_readinglog_author': entry['Author'] = value
+            elif key == '_readinglog_rating': entry['Rating'] = float(value)
+        log_entries.append(entry)
+
+    if options.output is not None:
+        f = open(options.output, 'w')
+    else:
+        f = sys.stdout
+    
+    yaml.add_representer(unicode, lambda dumper, value: 
+            dumper.represent_scalar(u'tag:yaml.org,2002:str', value))
+    yaml.dump_all(log_entries, f, default_flow_style=False, allow_unicode=True)
 
 if __name__ == '__main__':
-	import sys
-	export(sys.stdout)
+    from optparse import OptionParser
+    parser = OptionParser()
+    parser.add_option('-H', '--host', 
+            help='connect to MySQL server HOST [default: %default]')
+    parser.add_option('-u', '--username', 
+            help='use USERNAME when connecting [default: %default]')
+    parser.add_option('-p', '--password', 
+            help='use PASSWORD when connecting [default: no password]')
+    parser.add_option('-d', '--db', 
+            help='name of the Wordpress database [default: %default]')
+    parser.set_defaults(host='localhost', username='root', 
+            password=None, db='wordpress')
+    parser.add_option('-o', '--output', metavar='FILE', 
+            help='write output to FILE instead of stdout')
+    options, args = parser.parse_args()
+    export(options)
diff --git a/export_wp.py b/export_wp.py
@@ -2,75 +2,112 @@ import os, time, re, urllib, uuid
 import MySQLdb
 
 def html2md(s):
-	s = s.replace('<p>', '')
-	s = s.replace('</p>', '')
-	# XXX
-	return s
+    s = s.replace('<p>', '')
+    s = s.replace('</p>', '')
+    # XXX
+    return s
 
-def export(base_dir):
-	if not os.path.exists(base_dir):
-		os.mkdir(base_dir)
+def export(options):
+    base_dir = options.base_dir
+    if not os.path.exists(base_dir):
+        os.mkdir(base_dir)
 
-	cn = MySQLdb.connect(host='cruz', user='root', passwd='ELIDED', db='wordpress')
+    cn = MySQLdb.connect(host=options.host, user=options.username, 
+            passwd=options.password, db=options.db, use_unicode=True)
 
-	cur = cn.cursor()
-	cur.execute('SELECT id, post_name, post_title, post_date, post_modified, guid, post_content FROM wp_posts WHERE post_status = %s', ('publish',))
-	for row in cur.fetchall():
-		id, post_name, post_title, post_date, post_modified, guid, post_content = row
-		
-		# Wordpress stores these URL-encoded
-		post_name = urllib.unquote(post_name)
-		guid = urllib.unquote(guid)
+    cur = cn.cursor()
+    cur.execute('SELECT id, post_name, post_title, post_date, post_modified, '
+            'guid, post_content FROM wp_posts WHERE post_status = %s', 
+            ('publish',))
+    for row in cur.fetchall():
+        id, post_name, post_title, post_date, post_modified, \
+                guid, post_content = row
+        
+        # Wordpress stores these URL-encoded
+        post_name = urllib.unquote(post_name)
+        guid = urllib.unquote(guid)
 
-		subcur = cn.cursor()
-		subcur.execute('SELECT wp_terms.name FROM wp_term_relationships INNER JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms ON wp_term_taxonomy.term_id = wp_terms.term_id WHERE taxonomy = %s AND object_id = %s', ('category', id,))
-		categories = [category for category, in subcur.fetchall()]
-		subcur = cn.cursor()
-		subcur.execute('SELECT wp_terms.name FROM wp_term_relationships INNER JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms ON wp_term_taxonomy.term_id = wp_terms.term_id WHERE taxonomy = %s AND object_id = %s', ('post_tag', id,))
-		tags = [tag for tag, in subcur.fetchall()]
+        subcur = cn.cursor()
+        subcur.execute('SELECT wp_terms.name FROM wp_term_relationships '
+                'INNER JOIN wp_term_taxonomy ON '
+                'wp_term_relationships.term_taxonomy_id = '
+                'wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms '
+                'ON wp_term_taxonomy.term_id = wp_terms.term_id '
+                'WHERE taxonomy = %s AND object_id = %s', ('category', id,))
+        categories = [category for category, in subcur.fetchall()]
+        subcur = cn.cursor()
+        subcur.execute('SELECT wp_terms.name FROM wp_term_relationships '
+                'INNER JOIN wp_term_taxonomy ON '
+                'wp_term_relationships.term_taxonomy_id = '
+                'wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms '
+                'ON wp_term_taxonomy.term_id = wp_terms.term_id '
+                'WHERE taxonomy = %s AND object_id = %s', ('post_tag', id,))
+        tags = [tag for tag, in subcur.fetchall()]
 
-		# XXX
-		if 'Reading' in categories: continue
+        # XXX
+        if 'Reading' in categories: continue
 
-		os.mkdir(os.path.join(base_dir, post_name))
-		f = open(os.path.join(base_dir, post_name, 'content.txt'), 'w')
-		f.write('Title: %s\n' % post_title)
-		f.write('Publication-Date: %s\n' % post_date.strftime('%Y-%m-%d %H:%M:%S'))
-		f.write('GUID: %s\n' % guid)
-		f.write('Categories: %s\n' % ', '.join(categories))
-		f.write('Tags: %s\n' % ', '.join(tags))
-		f.write('\n')
-		f.write(post_content)
-		del f
-		os.utime(os.path.join(base_dir, post_name, 'content.txt'), 
-				(time.mktime(post_modified.timetuple()), time.mktime(post_modified.timetuple())))
+        os.mkdir(os.path.join(base_dir, post_name))
+        f = open(os.path.join(base_dir, post_name, 'content.txt'), 'w')
+        f.write('Title: %s\n' % post_title)
+        f.write('Publication-Date: %s\n' % 
+                post_date.strftime('%Y-%m-%d %H:%M:%S'))
+        f.write('GUID: %s\n' % guid)
+        f.write('Categories: %s\n' % ', '.join(categories))
+        f.write('Tags: %s\n' % ', '.join(tags))
+        f.write('\n')
+        f.write(post_content)
+        del f
+        os.utime(os.path.join(base_dir, post_name, 'content.txt'), 
+                (time.mktime(post_modified.timetuple()), 
+                 time.mktime(post_modified.timetuple())))
 
-		# comments
-		subcur = cn.cursor()
-		subcur.execute('SELECT comment_author, comment_author_email, comment_author_url, comment_author_ip, comment_date, comment_agent, comment_content FROM wp_comments WHERE comment_post_id = %s AND comment_approved LIKE %s', (id, 1))
-		os.mkdir(os.path.join(base_dir, post_name, 'comments'))
-		# XXX dir perms
-		for subrow in subcur.fetchall():
-			author, email, url, ip_addr, date, user_agent, content = subrow
-			id = str(uuid.uuid4()).replace('-', '')
-			filename = os.path.join(base_dir, post_name, 'comments', id)
-			f = open(filename, 'w')
-			if author:
-				f.write('From: %s\n' % author)
-			if email:
-				f.write('Author-Email: %s\n' % email)
-			if url:
-				f.write('Author-URL: %s\n' % url)
-			if user_agent:
-				f.write('User-Agent: %s\n' % user_agent)
-			if ip_addr:
-				f.write('Received: from %s\n' % ip_addr)
-			f.write('\n')
-			f.write(html2md(content)) # Wordpress HTMLifies comments >_<
-			del f
-			os.utime(filename, 
-					(time.mktime(date.timetuple()), time.mktime(date.timetuple())))
+        # comments
+        subcur = cn.cursor()
+        subcur.execute('SELECT comment_author, comment_author_email, '
+                'comment_author_url, comment_author_ip, comment_date, '
+                'comment_agent, comment_content FROM wp_comments WHERE '
+                'comment_post_id = %s AND comment_approved LIKE %s', (id, 1))
+        os.mkdir(os.path.join(base_dir, post_name, 'comments'))
+        # XXX dir perms
+        for subrow in subcur.fetchall():
+            author, email, url, ip_addr, date, user_agent, content = subrow
+            id = str(uuid.uuid4()).replace('-', '')
+            filename = os.path.join(base_dir, post_name, 'comments', id)
+            f = open(filename, 'w')
+            if author:
+                f.write('From: %s\n' % author)
+            if email:
+                f.write('Author-Email: %s\n' % email)
+            if url:
+                f.write('Author-URL: %s\n' % url)
+            if user_agent:
+                f.write('User-Agent: %s\n' % user_agent)
+            if ip_addr:
+                f.write('Received: from %s\n' % ip_addr)
+            f.write('\n')
+            f.write(html2md(content)) # Wordpress HTMLifies comments >_<
+            del f
+            os.utime(filename, 
+                    (time.mktime(date.timetuple()), 
+                     time.mktime(date.timetuple())))
 
 if __name__ == '__main__':
-	import sys
-	export(sys.argv[1])
+    from optparse import OptionParser
+    parser = OptionParser()
+    parser.add_option('-H', '--host', 
+            help='connect to MySQL server HOST [default: %default]')
+    parser.add_option('-u', '--username', 
+            help='use USERNAME when connecting [default: %default]')
+    parser.add_option('-p', '--password', 
+            help='use PASSWORD when connecting [default: no password]')
+    parser.add_option('-d', '--db', 
+            help='name of the Wordpress database [default: %default]')
+    parser.set_defaults(host='localhost', username='root', 
+            password=None, db='wordpress')
+    parser.add_option('-b', '--base-dir', 
+            help='base directory into which entries will be written')
+    options, args = parser.parse_args()
+    if options.base_dir is None:
+        parser.error('--base-dir must be specified')
+    export(options)