commit 3206cf08a5e9467baf28332a56ecd27422149486
parent ae6182286419cc0d1e4b21cc2130c57d2acdaf43
Author: Dan Callaghan <djc@djc.id.au>
Date: Wed, 17 Sep 2008 22:53:28 +1000
mostly ditched colubrid in favour of a hand-rolled WSGI callable; mostly cleaned up encoding issues
Diffstat:
5 files changed, 137 insertions(+), 75 deletions(-)
diff --git a/TODO b/TODO
@@ -17,14 +17,14 @@
- perhaps as interim step could export from Nokia and embed gmaps directly in constance?
- document on-disk format for the entry types
- tests!!!!!!!!
-- escape high bytes in URLs (should work properly since the page is UTF-8 anyway, but it seems to confuse at least MSN-Bot (and probably IE) sigh)
+- escape high bytes in URLs (should work properly since pages are served in the same encoding as what we expect URLs to be in, but it seems to confuse at least MSN-Bot (and probably IE) sigh)
- better means of generating URLs?
-- fix unicode in comments
- - really, this means cleaning up the whole self.charset crap (move into config) and apply it consistently!
- - *really*, this means patching or ditching colubrid ergh ...
+- have almost completely ditched colubrid, just need to replace StaticExports for testing then rm it
+- use encoding from config for blog.py instead of hard-coding utf8
- handle reCAPTCHA errors (including no captcha fields submitted!!!)
- invalid offsets (displays every entry at max and 500's on invalid such as alpha)
- config option to add next/prev links to page (as well as the link rels)
- markdown typography/smartypants
- or even just, better markdown
- generate goog sitemaps
+- prettier error pages
diff --git a/app.py b/app.py
@@ -1,37 +1,67 @@
# vim:encoding=utf-8
-import os
+import os, cgi, re
from itertools import chain
import wsgiref.util
from genshi.template import TemplateLoader
-from colubrid import RegexApplication, HttpResponse
-from colubrid.exceptions import PageNotFound, AccessDenied, HttpFound
from colubrid.server import StaticExports
from recaptcha.client import captcha
import config
import blog
+class HTTPException(Exception):
+ status = '500 Internal Server Error'
+ headers = []
+
+class ForbiddenError(HTTPException):
+ status = '403 Forbidden'
+
+class NotFoundError(HTTPException):
+ status = '404 Not Found'
+
+class HTTPRedirect(HTTPException):
+ def __init__(self, location):
+ assert isinstance(location, str)
+ self.headers = [('Location', location)]
+
+class HTTPFound(HTTPRedirect):
+ status = '302 Found'
+
+class HTTPTemporaryRedirect(HTTPRedirect):
+ status = '307 Temporary Redirect'
+
+class HTTPPermanentRedirect(HTTPRedirect):
+ status = '301 Moved Permanently'
+
template_loader = TemplateLoader(
os.path.join(os.path.dirname(__file__), 'templates'),
variable_lookup='strict',
auto_reload=True)
-class Constance(RegexApplication):
+class Constance(object):
+
+ def __init__(self, environ, start_response):
+ self.environ = environ
+ self.start = start_response
+ # as with SCRIPT_NAME, we want APP_URI *not* to include trailing slash
+ self.environ['APP_URI'] = wsgiref.util.application_uri(self.environ).rstrip('/')
+
+ self.config = config.ConstanceConfigParser(self.environ['constance.config_filename'])
- urls = [(r'^$', 'index'),
- (r'^\+tags/$', 'tag_cloud'),
- (r'^\+tags/(.+)$', 'tag'),
- (r'^\+reading/?$', 'reading'),
- (r'^([^+/][^/]*)/?$', 'post'),
- (r'^([^+/][^/]*)/comments/\+new$', 'add_post_comment')]
- charset = 'utf-8'
+ self.encoding = self.config.get('global', 'encoding')
+ self.args = dict((k.decode(self.encoding, 'ignore'),
+ v.decode(self.encoding, 'ignore'))
+ for k, v in
+ cgi.parse_qsl(self.environ.get('QUERY_STRING', ''), True))
+ if self.environ['REQUEST_METHOD'] == 'POST':
+ maxlen = int(self.environ['CONTENT_LENGTH'])
+ self.post_data = self.environ['wsgi.input'].read(maxlen)
+ self.form = dict((k.decode(self.encoding, 'ignore'),
+ v.decode(self.encoding, 'ignore'))
+ for k, v in cgi.parse_qsl(self.post_data, True))
- def __init__(self, *args, **kwargs):
- super(Constance, self).__init__(*args, **kwargs)
- self.request.environ['APP_URI'] = wsgiref.util.application_uri(self.request.environ) # Colubrid ought to do this for us
- self.config = config.ConstanceConfigParser(self.request.environ['constance.config_filename'])
self.blog_entries = blog.BlogEntrySet(self.config.getunicode('blog', 'dir'))
readinglog_filename = self.config.getunicode('readinglog', 'filename')
if readinglog_filename:
@@ -39,30 +69,55 @@ class Constance(RegexApplication):
else:
self.readinglog_entries = frozenset()
+ def __iter__(self):
+ try:
+ for patt, method_name in self.urls:
+ match = patt.match(self.environ['PATH_INFO'])
+ if match:
+ response_body, response_headers = getattr(self, method_name)(
+ *[x.decode(self.encoding, 'ignore') for x in match.groups()])
+ status = '200 OK'
+ self.start(status, response_headers)
+ return iter([response_body])
+ # no matching URI found, so give a 404
+ raise NotFoundError()
+ except HTTPException, e:
+ # XXX make prettier errors
+ self.start(e.status, [('Content-type', 'text/plain')] + e.headers)
+ return iter([e.status])
+
+ urls = [(r'/$', 'index'),
+ (r'/\+tags/$', 'tag_cloud'),
+ (r'/\+tags/(.+)$', 'tag'),
+ (r'/\+reading/?$', 'reading'),
+ (r'/([^+/][^/]*)/?$', 'post'),
+ (r'/([^+/][^/]*)/comments/\+new$', 'add_post_comment')]
+ urls = [(re.compile(patt), method) for patt, method in urls]
+
def index(self):
- offset = int(self.request.args.get('offset', 0))
+ offset = int(self.args.get('offset', 0))
sorted_entries = sorted(chain(self.blog_entries, self.readinglog_entries),
key=lambda e: e.publication_date, reverse=True)
- format = self.request.args.get('format', 'html')
+ format = self.args.get('format', 'html')
if format == 'html':
rendered = template_loader.load('multiple.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
title=None,
sorted_entries=sorted_entries,
offset=offset,
- ).render('xhtml')
- return HttpResponse(rendered, [('Content-Type', 'text/html')], 200)
+ ).render('xhtml', encoding=self.encoding)
+ return (rendered, [('Content-Type', 'text/html')])
elif format == 'atom':
rendered = template_loader.load('multiple_atom.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
title=None,
- self_url='%s/' % self.request.environ['APP_URI'],
+ self_url='%s/' % self.environ['APP_URI'],
sorted_entries=sorted_entries[:self.config.getint('global', 'entries_in_feed')],
feed_updated=sorted_entries[0].modified_date
- ).render('xml')
- return HttpResponse(rendered, [('Content-Type', 'application/atom+xml')], 200)
+ ).render('xml', encoding=self.encoding)
+ return (rendered, [('Content-Type', 'application/atom+xml')])
else:
raise PageNotFound('Unknown format %r' % format)
@@ -73,109 +128,105 @@ class Constance(RegexApplication):
tag_freqs[tag] = tag_freqs.get(tag, 0) + 1
rendered = template_loader.load('tag_cloud.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
tag_freqs=tag_freqs
- ).render('xhtml')
- return HttpResponse(rendered, [('Content-Type', 'text/html')], 200)
+ ).render('xhtml', encoding=self.encoding)
+ return (rendered, [('Content-Type', 'text/html')])
def post(self, id):
- id = id.decode(self.charset) # shouldn't Colubrid do this?
try:
entry = self.blog_entries[id]
except KeyError:
- raise PageNotFound()
+ raise NotFoundError()
rendered = template_loader.load('single.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
entry=entry
- ).render('xhtml')
- return HttpResponse(rendered, [('Content-Type', 'text/html')], 200)
+ ).render('xhtml', encoding=self.encoding)
+ return (rendered, [('Content-Type', 'text/html')])
def add_post_comment(self, id):
- id = id.decode(self.charset) # shouldn't Colubrid do this?
entry = self.blog_entries[id]
- form_data = self.request.form.as_dict()
if self.config.getboolean('blog', 'require_captcha'):
# first verify the captcha
captcha_response = captcha.submit(
- form_data['recaptcha_challenge_field'],
- form_data['recaptcha_response_field'],
+ self.form['recaptcha_challenge_field'],
+ self.form['recaptcha_response_field'],
self.config.get('blog', 'recaptcha_privkey'),
- self.request.environ['REMOTE_ADDR'])
+ self.environ['REMOTE_ADDR'])
if not captcha_response.is_valid:
raise ValueError(captcha_response.error_code) # XXX handle better
try:
metadata = {}
- metadata['From'] = form_data['from'] or 'Anonymous'
- if form_data['author-url']:
- metadata['Author-URL'] = form_data['author-url']
- if form_data['author-email']:
- metadata['Author-Email'] = form_data['author-email']
- if self.request.environ['HTTP_USER_AGENT']:
- metadata['User-Agent'] = self.request.environ['HTTP_USER_AGENT']
- if self.request.environ['REMOTE_ADDR']:
- metadata['Received'] = 'from %s' % self.request.environ['REMOTE_ADDR']
- entry.add_comment(metadata, form_data['comment'])
- raise HttpFound('%s/%s/' % (self.request.environ.get('SCRIPT_NAME', ''),
- id.encode(self.charset)))
+ metadata['From'] = self.form['from'] or u'Anonymous'
+ if self.form['author-url']:
+ metadata['Author-URL'] = self.form['author-url']
+ if self.form['author-email']:
+ metadata['Author-Email'] = self.form['author-email']
+ if self.environ['HTTP_USER_AGENT']:
+ metadata['User-Agent'] = self.environ['HTTP_USER_AGENT']
+ if self.environ['REMOTE_ADDR']:
+ metadata['Received'] = u'from %s' % self.environ['REMOTE_ADDR']
+ entry.add_comment(metadata, self.form['comment'])
+ raise HTTPFound('%s/%s/' % (self.environ.get('APP_URI', ''),
+ id.encode(self.encoding)))
except blog.CommentingForbiddenError:
- raise AccessDenied()
+ raise ForbiddenError()
def tag(self, tag):
- tag = tag.decode(self.charset)
with_tag = [e for e in self.blog_entries if tag in e.tags]
if not with_tag:
- raise PageNotFound()
- offset = int(self.request.args.get('offset', 0))
+ raise NotFoundError()
+ offset = int(self.args.get('offset', 0))
sorted_entries = sorted(with_tag, key=lambda e: e.publication_date, reverse=True)
- format = self.request.args.get('format', 'html')
+ format = self.args.get('format', 'html')
if format == 'html':
rendered = template_loader.load('multiple.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
title=u'ā%sā tag' % tag,
sorted_entries=sorted_entries,
offset=offset
).render('xhtml')
- return HttpResponse(rendered, [('Content-Type', 'text/html')], 200)
+ return (rendered, [('Content-Type', 'text/html')])
elif format == 'atom':
rendered = template_loader.load('multiple_atom.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
title=u'ā%sā tag' % tag,
- self_url='%s/+tags/%s' % (self.request.environ['APP_URI'], tag.encode(self.charset)),
+ self_url='%s/+tags/%s' % (self.environ['APP_URI'], tag.encode(self.encoding)),
sorted_entries=sorted_entries[:self.config.getint('global', 'entries_in_feed')],
feed_updated=sorted_entries[0].modified_date
).render('xml')
- return HttpResponse(rendered, [('Content-Type', 'application/atom+xml')], 200)
+ return (rendered, [('Content-Type', 'application/atom+xml')])
else:
raise PageNotFound('Unknown format %r' % format)
def reading(self):
sorted_entries = sorted(self.readinglog_entries, key=lambda e: e.publication_date, reverse=True)
- format = self.request.args.get('format', 'html')
+ format = self.args.get('format', 'html')
if format == 'html':
rendered = template_loader.load('multiple.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
title=u'reading log',
sorted_entries=sorted_entries,
- ).render('xhtml')
- return HttpResponse(rendered, [('Content-Type', 'text/html')], 200)
+ ).render('xhtml', encoding=self.encoding)
+ return (rendered, [('Content-Type', 'text/html')])
elif format == 'atom':
rendered = template_loader.load('multiple_atom.xml').generate(
config=self.config,
- environ=self.request.environ,
+ environ=self.environ,
title=u'reading log',
- self_url='%s/+reading/' % self.request.environ['APP_URI'],
+ self_url='%s/+reading/' % self.environ['APP_URI'],
sorted_entries=sorted_entries[:self.config.getint('global', 'entries_in_feed')],
feed_updated=sorted_entries[0].modified_date
- ).render('xml')
- return HttpResponse(rendered, [('Content-Type', 'application/atom+xml')], 200)
+ ).render('xml', encoding=self.encoding)
+ return (rendered, [('Content-Type', 'application/atom+xml')])
else:
- raise PageNotFound('Unknown format %r' % format)
+ raise NotFoundError('Unknown format %r' % format)
application = Constance
diff --git a/blog.py b/blog.py
@@ -126,9 +126,9 @@ class BlogEntry(object):
guid = uuid.uuid4().get_hex()
f = open(os.path.join(self.comments_dir, guid), 'w')
for k, v in metadata.iteritems():
- f.write('%s: %s\n' % (k, v))
+ f.write('%s: %s\n' % (k, v.encode('utf8'))) # XXX encoding
f.write('\n')
- f.write(content)
+ f.write(content.encode('utf8')) # XXX encoding
class BlogEntrySet(DirectoryEntrySet):
diff --git a/config.defaults b/config.defaults
@@ -16,6 +16,17 @@ entries_per_page = 20
# The maximum number of entries to be included in feeds.
entries_in_feed = 20
+# Character encoding to be used everywhere. That is, for:
+# * all data read from disk (including this config)
+# * URL components and query string arguments
+# * POST data
+# * rendered templates
+# and anywhere else I have forgotten. Really whenever we are converting between
+# Unicode data and bytestrings, this is the encoding that is used.
+# It is *highly* recommended that you not change this value from its default of
+# utf8!
+encoding = utf8
+
[blog]
# The directory containing blog entries.
diff --git a/config.py b/config.py
@@ -12,4 +12,4 @@ class ConstanceConfigParser(SafeConfigParser):
self.readfp(open(filename, 'r'))
def getunicode(self, section, option):
- return self.get(section, option).decode('utf8') # XXX make codec configurable?
+ return self.get(section, option).decode(self.get('global', 'encoding'))