constance

Scripts for generating (an earlier obsolete version of) my personal web site
git clone https://code.djc.id.au/git/constance/
commit e7eeabe3067c9fe2b881e975331772d73f6b8e37
parent 25738a80905ecd9cdd1d6bbece4986b08ec63e9c
Author: Dan Callaghan <djc@djc.id.au>
Date:   Sat, 20 Sep 2008 21:12:30 +1000

sitemap generation

Diffstat:
MTODO | 1-
Mapp.py | 21++++++++++++++++++++-
Atemplates/sitemap.xml | 41+++++++++++++++++++++++++++++++++++++++++
3 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/TODO b/TODO
@@ -20,7 +20,6 @@
 - use encoding from config for blog.py instead of hard-coding utf8
 - markdown typography/smartypants
   - or even just, better markdown
-- generate goog sitemaps
 - change/add class names per http://microformats.org/wiki/hatom ?
 - alternative entry type for tumblelog-style entries (unnamed)
 - cache headers! (last-modified, e-tag)
diff --git a/app.py b/app.py
@@ -1,7 +1,7 @@
 
 # vim:encoding=utf-8
 
-import os, cgi, re
+import os, cgi, re, datetime
 from itertools import chain
 import wsgiref.util
 from genshi.template import TemplateLoader
@@ -103,10 +103,12 @@ class Constance(object):
         self.start(error.status, [('Content-type', 'text/html')] + error.headers)
         return iter([body.encode(self.encoding)])
 
+    # XXX keep sitemap in sync with these
     urls = [(r'/$', 'index'), 
             (r'/\+tags/$', 'tag_cloud'), 
             (r'/\+tags/(.+)$', 'tag'), 
             (r'/\+reading/?$', 'reading'), 
+            (r'/sitemap.xml$', 'sitemap'), 
             (r'/([^+/][^/]*)/?$', 'post'), 
             (r'/([^+/][^/]*)/comments/\+new$', 'add_post_comment')]
     urls = [(re.compile(patt), method) for patt, method in urls]
@@ -259,6 +261,23 @@ class Constance(object):
         else:
             raise NotFoundError('Unknown format %r' % format)
 
+    def sitemap(self):
+        tags = {}
+        for entry in self.blog_entries:
+            for tag in entry.tags:
+                tags[tag] = max(entry.modified_date, tags.get(tag, datetime.datetime.min))
+        sorted_entries = sorted(chain(self.blog_entries, self.readinglog_entries), 
+                key=lambda e: e.publication_date, reverse=True)
+        rendered = template_loader.load('sitemap.xml').generate(
+                config=self.config, 
+                environ=self.environ, 
+                blog_entries=self.blog_entries, 
+                tags=tags, 
+                readinglog_updated=max(e.date for e in self.readinglog_entries), 
+                index_updated=max(e.modified_date for e in sorted_entries[:self.config.getint('global', 'entries_per_page')]), 
+                ).render('xml', encoding='utf8') # sitemaps must be UTF-8
+        return (rendered, [('Content-Type', 'text/xml')])
+
 application = Constance
 
 
diff --git a/templates/sitemap.xml b/templates/sitemap.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+        xmlns:xi="http://www.w3.org/2001/XInclude"
+        xmlns:py="http://genshi.edgewall.org/">
+
+<xi:include href="_fragments.xml" />
+
+<?python
+W3C_TIME_FORMAT = str('%Y-%m-%dT%H:%M:%S+10:00')
+?>
+
+<url>
+    <loc>${abs_uri('')}</loc>
+    <lastmod>${index_updated.strftime(W3C_TIME_FORMAT)}</lastmod>
+    <changefreq>daily</changefreq>
+    <priority>1.0</priority>
+</url>
+<url>
+    <loc>${abs_uri('+tags', '')}</loc>
+    <priority>0.25</priority>
+</url>
+<url py:for="tag, modtime in tags.iteritems()">
+    <loc>${abs_uri('+tags', tag)}</loc>
+    <lastmod>${modtime.strftime(W3C_TIME_FORMAT)}</lastmod>
+    <changefreq>weekly</changefreq>
+    <priority>0.25</priority>
+</url>
+<url>
+    <loc>${abs_uri('+reading', '')}</loc>
+    <lastmod>${readinglog_updated.strftime(W3C_TIME_FORMAT)}</lastmod>
+    <changefreq>weekly</changefreq>
+    <priority>0.75</priority>
+</url>
+<url py:for="entry in blog_entries">
+    <loc>${abs_uri(entry.id)}</loc>
+    <lastmod>${entry.modified_date.strftime(W3C_TIME_FORMAT)}</lastmod>
+    <changefreq>never</changefreq>
+    <priority>1.0</priority>
+</url>
+
+</urlset>