#! /usr/bin/python import sys import xml.sax import xml.sax.handler import psycopg2 query = """INSERT INTO sputnik.event (organizerid, track, language, name, place, description, start, finish, searchable) VALUES (%s, %s, %s, %s, %s, %s, %s::TIMESTAMP WITH TIME ZONE, %s::TIMESTAMP WITH TIME ZONE+%s::INTERVAL, setweight(to_tsvector(%s, %s), 'A') || setweight(to_tsvector(%s, %s), 'B') || setweight(to_tsvector(%s, %s), 'C') || setweight(to_tsvector(%s, %s), 'D'))""" class ScheduleHandler(xml.sax.handler.ContentHandler): def __init__(self, c): self._connection = c self._count = 0 self._cursor = self._connection.cursor() self._track = '' self._language = '' self._day = '' self._room = '' self._element = '' self._id = '' self._data = {} def startElement(self, name, attributes): if 'day' == name: self._day = attributes['date'] if 'room' == name: if attributes.has_key('name'): c = self._cursor c.execute('SELECT id FROM sputnik.room WHERE name = %s', (attributes['name'], )) i = c.fetchone() self._room = int(i[0]) if 'event' == name: self._id = attributes['id'] self._data['title'] = '' self._data['subtitle'] = '' self._data['abstract'] = '' self._data['description'] = '' self._data['track'] = '' self._data['language'] = '' if 'track' == name or 'language' == name or \ 'title' == name or 'subtitle' == name or \ 'abstract' == name or 'description' == name or \ 'start' == name or 'duration' == name: self._element = name self._data[self._element] = '' def characters(self, data): if self._element != '': self._data[self._element] += data.encode('utf-8') def endElement(self, name): if 'language' == name: if 'en' == self._data['language']: self._language = 'english' elif 'de' == self._data['language']: self._language = 'german' else: self._language = self._data['language'].lower() if 0 == len(self._language): self._language = 'english' if 'event' == name: c = self._cursor c.execute(query, (self._id, self._data['track'], \ self._language, self._data['title']+' --- '+self._data['subtitle'], \ self._room, self._data['abstract']+'\n---\n'+self._data['description'], self._day+' '+self._data['start'], self._day+' '+self._data['start'], self._data['duration'], self._language, self._data['title'], self._language, self._data['subtitle'], self._language, self._data['abstract'], self._language, self._data['description'])) self._element = '' def close(self): self._connection.commit() self._cursor.close() self._cursor = None connection = psycopg2.connect("dbname=sputnik port=5432") h = ScheduleHandler(connection) xml.sax.parse(sys.argv[1], h) h.close() connection.close() connection = None