commit e7eeabe3067c9fe2b881e975331772d73f6b8e37
parent 25738a80905ecd9cdd1d6bbece4986b08ec63e9c
Author: Dan Callaghan <djc@djc.id.au>
Date: Sat, 20 Sep 2008 21:12:30 +1000
sitemap generation
Diffstat:
3 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/TODO b/TODO
@@ -20,7 +20,6 @@
- use encoding from config for blog.py instead of hard-coding utf8
- markdown typography/smartypants
- or even just, better markdown
-- generate goog sitemaps
- change/add class names per http://microformats.org/wiki/hatom ?
- alternative entry type for tumblelog-style entries (unnamed)
- cache headers! (last-modified, e-tag)
diff --git a/app.py b/app.py
@@ -1,7 +1,7 @@
# vim:encoding=utf-8
-import os, cgi, re
+import os, cgi, re, datetime
from itertools import chain
import wsgiref.util
from genshi.template import TemplateLoader
@@ -103,10 +103,12 @@ class Constance(object):
self.start(error.status, [('Content-type', 'text/html')] + error.headers)
return iter([body.encode(self.encoding)])
+ # XXX keep sitemap in sync with these
urls = [(r'/$', 'index'),
(r'/\+tags/$', 'tag_cloud'),
(r'/\+tags/(.+)$', 'tag'),
(r'/\+reading/?$', 'reading'),
+ (r'/sitemap.xml$', 'sitemap'),
(r'/([^+/][^/]*)/?$', 'post'),
(r'/([^+/][^/]*)/comments/\+new$', 'add_post_comment')]
urls = [(re.compile(patt), method) for patt, method in urls]
@@ -259,6 +261,23 @@ class Constance(object):
else:
raise NotFoundError('Unknown format %r' % format)
+ def sitemap(self):
+ tags = {}
+ for entry in self.blog_entries:
+ for tag in entry.tags:
+ tags[tag] = max(entry.modified_date, tags.get(tag, datetime.datetime.min))
+ sorted_entries = sorted(chain(self.blog_entries, self.readinglog_entries),
+ key=lambda e: e.publication_date, reverse=True)
+ rendered = template_loader.load('sitemap.xml').generate(
+ config=self.config,
+ environ=self.environ,
+ blog_entries=self.blog_entries,
+ tags=tags,
+ readinglog_updated=max(e.date for e in self.readinglog_entries),
+ index_updated=max(e.modified_date for e in sorted_entries[:self.config.getint('global', 'entries_per_page')]),
+ ).render('xml', encoding='utf8') # sitemaps must be UTF-8
+ return (rendered, [('Content-Type', 'text/xml')])
+
application = Constance
diff --git a/templates/sitemap.xml b/templates/sitemap.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:py="http://genshi.edgewall.org/">
+
+<xi:include href="_fragments.xml" />
+
+<?python
+W3C_TIME_FORMAT = str('%Y-%m-%dT%H:%M:%S+10:00')
+?>
+
+<url>
+ <loc>${abs_uri('')}</loc>
+ <lastmod>${index_updated.strftime(W3C_TIME_FORMAT)}</lastmod>
+ <changefreq>daily</changefreq>
+ <priority>1.0</priority>
+</url>
+<url>
+ <loc>${abs_uri('+tags', '')}</loc>
+ <priority>0.25</priority>
+</url>
+<url py:for="tag, modtime in tags.iteritems()">
+ <loc>${abs_uri('+tags', tag)}</loc>
+ <lastmod>${modtime.strftime(W3C_TIME_FORMAT)}</lastmod>
+ <changefreq>weekly</changefreq>
+ <priority>0.25</priority>
+</url>
+<url>
+ <loc>${abs_uri('+reading', '')}</loc>
+ <lastmod>${readinglog_updated.strftime(W3C_TIME_FORMAT)}</lastmod>
+ <changefreq>weekly</changefreq>
+ <priority>0.75</priority>
+</url>
+<url py:for="entry in blog_entries">
+ <loc>${abs_uri(entry.id)}</loc>
+ <lastmod>${entry.modified_date.strftime(W3C_TIME_FORMAT)}</lastmod>
+ <changefreq>never</changefreq>
+ <priority>1.0</priority>
+</url>
+
+</urlset>