commit 427df9f8882793896a2dcb0f0238a25791f8535b
parent 442ebd5d3475cbebe3f67dd267b3c32292369caa
Author: Dan Callaghan <djc@djc.id.au>
Date: Sun, 31 Aug 2008 17:02:19 +1000
export tools: use optparse, expanded tabs, re-indented
Diffstat:
M | export_readinglog_wp.py | | | 68 | +++++++++++++++++++++++++++++++++++++++++++++++--------------------- |
M | export_wp.py | | | 163 | ++++++++++++++++++++++++++++++++++++++++++++++++------------------------------- |
2 files changed, 147 insertions(+), 84 deletions(-)
diff --git a/export_readinglog_wp.py b/export_readinglog_wp.py
@@ -1,28 +1,54 @@
-import os, time, re, urllib, uuid
+import os, time, re, urllib, uuid, sys
import MySQLdb
import yaml
-def export(f):
- log_entries = []
+def export(options):
+ log_entries = []
- cn = MySQLdb.connect(host='cruz', user='root', passwd='ELIDED', db='wordpress', use_unicode=True)
+ cn = MySQLdb.connect(host=options.host, user=options.username,
+ passwd=options.password, db=options.db, use_unicode=True)
- cur = cn.cursor()
- cur.execute('SELECT id, post_title, post_date, guid FROM wp_posts INNER JOIN wp_term_relationships ON wp_term_relationships.object_id = wp_posts.id WHERE post_status = %s AND term_taxonomy_id = %s ORDER BY post_date ASC', ('publish', 14))
- for row in cur.fetchall():
- id, title, date, guid = row
- entry = {'Title': title, 'Date': date, 'GUID': guid}
- subcur = cn.cursor()
- subcur.execute('SELECT meta_key, meta_value FROM wp_postmeta WHERE post_id = %s', (id,))
- for key, value in subcur.fetchall():
- if key == '_readinglog_url': entry['URL'] = value
- elif key == '_readinglog_author': entry['Author'] = value
- elif key == '_readinglog_rating': entry['Rating'] = float(value)
- log_entries.append(entry)
-
- yaml.add_representer(unicode, lambda dumper, value: dumper.represent_scalar(u'tag:yaml.org,2002:str', value))
- yaml.dump_all(log_entries, f, default_flow_style=False, allow_unicode=True)
+ cur = cn.cursor()
+ cur.execute('SELECT id, post_title, post_date, guid FROM wp_posts '
+ 'INNER JOIN wp_term_relationships ON '
+ 'wp_term_relationships.object_id = wp_posts.id '
+ 'WHERE post_status = %s AND term_taxonomy_id = %s '
+ 'ORDER BY post_date ASC', ('publish', 14))
+ for row in cur.fetchall():
+ id, title, date, guid = row
+ entry = {'Title': title, 'Date': date, 'GUID': guid}
+ subcur = cn.cursor()
+ subcur.execute('SELECT meta_key, meta_value FROM wp_postmeta '
+ 'WHERE post_id = %s', (id,))
+ for key, value in subcur.fetchall():
+ if key == '_readinglog_url': entry['URL'] = value
+ elif key == '_readinglog_author': entry['Author'] = value
+ elif key == '_readinglog_rating': entry['Rating'] = float(value)
+ log_entries.append(entry)
+
+ if options.output is not None:
+ f = open(options.output, 'w')
+ else:
+ f = sys.stdout
+
+ yaml.add_representer(unicode, lambda dumper, value:
+ dumper.represent_scalar(u'tag:yaml.org,2002:str', value))
+ yaml.dump_all(log_entries, f, default_flow_style=False, allow_unicode=True)
if __name__ == '__main__':
- import sys
- export(sys.stdout)
+ from optparse import OptionParser
+ parser = OptionParser()
+ parser.add_option('-H', '--host',
+ help='connect to MySQL server HOST [default: %default]')
+ parser.add_option('-u', '--username',
+ help='use USERNAME when connecting [default: %default]')
+ parser.add_option('-p', '--password',
+ help='use PASSWORD when connecting [default: no password]')
+ parser.add_option('-d', '--db',
+ help='name of the Wordpress database [default: %default]')
+ parser.set_defaults(host='localhost', username='root',
+ password=None, db='wordpress')
+ parser.add_option('-o', '--output', metavar='FILE',
+ help='write output to FILE instead of stdout')
+ options, args = parser.parse_args()
+ export(options)
diff --git a/export_wp.py b/export_wp.py
@@ -2,75 +2,112 @@ import os, time, re, urllib, uuid
import MySQLdb
def html2md(s):
- s = s.replace('<p>', '')
- s = s.replace('</p>', '')
- # XXX
- return s
+ s = s.replace('<p>', '')
+ s = s.replace('</p>', '')
+ # XXX
+ return s
-def export(base_dir):
- if not os.path.exists(base_dir):
- os.mkdir(base_dir)
+def export(options):
+ base_dir = options.base_dir
+ if not os.path.exists(base_dir):
+ os.mkdir(base_dir)
- cn = MySQLdb.connect(host='cruz', user='root', passwd='ELIDED', db='wordpress')
+ cn = MySQLdb.connect(host=options.host, user=options.username,
+ passwd=options.password, db=options.db, use_unicode=True)
- cur = cn.cursor()
- cur.execute('SELECT id, post_name, post_title, post_date, post_modified, guid, post_content FROM wp_posts WHERE post_status = %s', ('publish',))
- for row in cur.fetchall():
- id, post_name, post_title, post_date, post_modified, guid, post_content = row
-
- # Wordpress stores these URL-encoded
- post_name = urllib.unquote(post_name)
- guid = urllib.unquote(guid)
+ cur = cn.cursor()
+ cur.execute('SELECT id, post_name, post_title, post_date, post_modified, '
+ 'guid, post_content FROM wp_posts WHERE post_status = %s',
+ ('publish',))
+ for row in cur.fetchall():
+ id, post_name, post_title, post_date, post_modified, \
+ guid, post_content = row
+
+ # Wordpress stores these URL-encoded
+ post_name = urllib.unquote(post_name)
+ guid = urllib.unquote(guid)
- subcur = cn.cursor()
- subcur.execute('SELECT wp_terms.name FROM wp_term_relationships INNER JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms ON wp_term_taxonomy.term_id = wp_terms.term_id WHERE taxonomy = %s AND object_id = %s', ('category', id,))
- categories = [category for category, in subcur.fetchall()]
- subcur = cn.cursor()
- subcur.execute('SELECT wp_terms.name FROM wp_term_relationships INNER JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms ON wp_term_taxonomy.term_id = wp_terms.term_id WHERE taxonomy = %s AND object_id = %s', ('post_tag', id,))
- tags = [tag for tag, in subcur.fetchall()]
+ subcur = cn.cursor()
+ subcur.execute('SELECT wp_terms.name FROM wp_term_relationships '
+ 'INNER JOIN wp_term_taxonomy ON '
+ 'wp_term_relationships.term_taxonomy_id = '
+ 'wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms '
+ 'ON wp_term_taxonomy.term_id = wp_terms.term_id '
+ 'WHERE taxonomy = %s AND object_id = %s', ('category', id,))
+ categories = [category for category, in subcur.fetchall()]
+ subcur = cn.cursor()
+ subcur.execute('SELECT wp_terms.name FROM wp_term_relationships '
+ 'INNER JOIN wp_term_taxonomy ON '
+ 'wp_term_relationships.term_taxonomy_id = '
+ 'wp_term_taxonomy.term_taxonomy_id INNER JOIN wp_terms '
+ 'ON wp_term_taxonomy.term_id = wp_terms.term_id '
+ 'WHERE taxonomy = %s AND object_id = %s', ('post_tag', id,))
+ tags = [tag for tag, in subcur.fetchall()]
- # XXX
- if 'Reading' in categories: continue
+ # XXX
+ if 'Reading' in categories: continue
- os.mkdir(os.path.join(base_dir, post_name))
- f = open(os.path.join(base_dir, post_name, 'content.txt'), 'w')
- f.write('Title: %s\n' % post_title)
- f.write('Publication-Date: %s\n' % post_date.strftime('%Y-%m-%d %H:%M:%S'))
- f.write('GUID: %s\n' % guid)
- f.write('Categories: %s\n' % ', '.join(categories))
- f.write('Tags: %s\n' % ', '.join(tags))
- f.write('\n')
- f.write(post_content)
- del f
- os.utime(os.path.join(base_dir, post_name, 'content.txt'),
- (time.mktime(post_modified.timetuple()), time.mktime(post_modified.timetuple())))
+ os.mkdir(os.path.join(base_dir, post_name))
+ f = open(os.path.join(base_dir, post_name, 'content.txt'), 'w')
+ f.write('Title: %s\n' % post_title)
+ f.write('Publication-Date: %s\n' %
+ post_date.strftime('%Y-%m-%d %H:%M:%S'))
+ f.write('GUID: %s\n' % guid)
+ f.write('Categories: %s\n' % ', '.join(categories))
+ f.write('Tags: %s\n' % ', '.join(tags))
+ f.write('\n')
+ f.write(post_content)
+ del f
+ os.utime(os.path.join(base_dir, post_name, 'content.txt'),
+ (time.mktime(post_modified.timetuple()),
+ time.mktime(post_modified.timetuple())))
- # comments
- subcur = cn.cursor()
- subcur.execute('SELECT comment_author, comment_author_email, comment_author_url, comment_author_ip, comment_date, comment_agent, comment_content FROM wp_comments WHERE comment_post_id = %s AND comment_approved LIKE %s', (id, 1))
- os.mkdir(os.path.join(base_dir, post_name, 'comments'))
- # XXX dir perms
- for subrow in subcur.fetchall():
- author, email, url, ip_addr, date, user_agent, content = subrow
- id = str(uuid.uuid4()).replace('-', '')
- filename = os.path.join(base_dir, post_name, 'comments', id)
- f = open(filename, 'w')
- if author:
- f.write('From: %s\n' % author)
- if email:
- f.write('Author-Email: %s\n' % email)
- if url:
- f.write('Author-URL: %s\n' % url)
- if user_agent:
- f.write('User-Agent: %s\n' % user_agent)
- if ip_addr:
- f.write('Received: from %s\n' % ip_addr)
- f.write('\n')
- f.write(html2md(content)) # Wordpress HTMLifies comments >_<
- del f
- os.utime(filename,
- (time.mktime(date.timetuple()), time.mktime(date.timetuple())))
+ # comments
+ subcur = cn.cursor()
+ subcur.execute('SELECT comment_author, comment_author_email, '
+ 'comment_author_url, comment_author_ip, comment_date, '
+ 'comment_agent, comment_content FROM wp_comments WHERE '
+ 'comment_post_id = %s AND comment_approved LIKE %s', (id, 1))
+ os.mkdir(os.path.join(base_dir, post_name, 'comments'))
+ # XXX dir perms
+ for subrow in subcur.fetchall():
+ author, email, url, ip_addr, date, user_agent, content = subrow
+ id = str(uuid.uuid4()).replace('-', '')
+ filename = os.path.join(base_dir, post_name, 'comments', id)
+ f = open(filename, 'w')
+ if author:
+ f.write('From: %s\n' % author)
+ if email:
+ f.write('Author-Email: %s\n' % email)
+ if url:
+ f.write('Author-URL: %s\n' % url)
+ if user_agent:
+ f.write('User-Agent: %s\n' % user_agent)
+ if ip_addr:
+ f.write('Received: from %s\n' % ip_addr)
+ f.write('\n')
+ f.write(html2md(content)) # Wordpress HTMLifies comments >_<
+ del f
+ os.utime(filename,
+ (time.mktime(date.timetuple()),
+ time.mktime(date.timetuple())))
if __name__ == '__main__':
- import sys
- export(sys.argv[1])
+ from optparse import OptionParser
+ parser = OptionParser()
+ parser.add_option('-H', '--host',
+ help='connect to MySQL server HOST [default: %default]')
+ parser.add_option('-u', '--username',
+ help='use USERNAME when connecting [default: %default]')
+ parser.add_option('-p', '--password',
+ help='use PASSWORD when connecting [default: no password]')
+ parser.add_option('-d', '--db',
+ help='name of the Wordpress database [default: %default]')
+ parser.set_defaults(host='localhost', username='root',
+ password=None, db='wordpress')
+ parser.add_option('-b', '--base-dir',
+ help='base directory into which entries will be written')
+ options, args = parser.parse_args()
+ if options.base_dir is None:
+ parser.error('--base-dir must be specified')
+ export(options)