import xml.sax import xml.sax.handler """ifarchivexml: This module parses the Master-Index.xml file that is available at . You can use this module like this: import ifarchivexml (root, dirs, files) = ifarchivexml.parse('Master-Index.xml') root is an IFDir object representing the root directory ('if-archive'). dirs is a dictionary mapping directory names ('if-archive/games', for example) to IFDir objects. files is a dictionary mapping file pathname ('if-archive/games/playgame.FAQ', for example) to IFFile objects. You can display the contents of either an IFDir or IFFile object with the obj.dump() method. """ CONTEXT_NONE = 0 CONTEXT_DIR = 1 CONTEXT_FILE = 2 CONTEXT_DIRLINK = 3 CONTEXT_FILELINK = 4 class IFDir: description = None def __init__(self): self.subdirs = [] self.files = [] def __repr__(self): return '' def dump(self): print 'name: ', self.name print 'xdir: ', self.xdir print 'parent: ', self.parent, ('('+str(self.parentobj)+')') print 'subdircount:', self.subdircount print 'filecount: ', self.filecount if (self.description != None): print 'description:' print self.description print 'subdirs:' for subdir in self.subdirs: print ' ', str(subdir) print 'files:' for file in self.files: print ' ', str(file) class IFFile: size = None date = None md5 = None rawdate = None symlink = None description = None def __repr__(self): return '' def dump(self): print 'path: ', self.path print 'name: ', self.name print 'directory: ', self.directory, ('('+str(self.directoryobj)+')') if (self.symlink == 'dir'): print 'symlink to dir:' print ' name: ', self.symlinkname print ' xdir: ', self.symlinkxdir if (self.symlink == 'file'): print 'symlink to file:' print ' path: ', self.symlinkpath print 'size: ', self.size print 'date: ', self.date print 'rawdate:', self.rawdate print 'md5: ', self.md5 print 'orderindex:', self.orderindex if (self.description != None): print 'description:' print self.description class IFAParser(xml.sax.handler.ContentHandler): def __init__(self): xml.sax.ContentHandler.__init__(self) self.grabbeddata = '' self.curdir = None self.directories = {} self.files = {} self.orderindex = 0 self.context = CONTEXT_NONE self.elements = { 'ifarchive': (self.ignore_start, self.ifarchive_end), 'directory': (self.directory_start, self.directory_end), 'file': (self.file_start, self.file_end), 'name': (self.grabdata_start, self.name_end), 'xdir': (self.grabdata_start, self.xdir_end), 'filecount': (self.grabdata_start, self.filecount_end), 'subdircount': (self.grabdata_start, self.subdircount_end), 'parent': (self.grabdata_start, self.parent_end), 'path': (self.grabdata_start, self.path_end), 'size': (self.grabdata_start, self.size_end), 'date': (self.grabdata_start, self.date_end), 'rawdate': (self.grabdata_start, self.rawdate_end), 'md5': (self.grabdata_start, self.md5_end), 'description': (self.grabdata_start, self.description_end), 'symlink': (self.symlink_start, self.symlink_end), } def characters(self, data): self.grabbeddata = (self.grabbeddata + data) def startElement(self, name, attrs): if (not self.elements.has_key(name)): return (startfunc, endfunc) = self.elements.get(name) startfunc(attrs) def endElement(self, name): if (not self.elements.has_key(name)): return (startfunc, endfunc) = self.elements.get(name) endfunc() def ignore_start(self, dict): pass def ignore_end(self): pass def grabdata_start(self, dict): self.grabbeddata = '' def grabdata(self): dat = self.grabbeddata self.grabbeddata = '' return dat def directory_start(self, dict): if (self.context == CONTEXT_NONE): self.curdir = IFDir() self.context = CONTEXT_DIR elif (self.context == CONTEXT_FILE): self.grabdata_start(None) def directory_end(self): if (self.context == CONTEXT_DIR): name = self.curdir.name self.directories[name] = self.curdir self.curdir = None self.context = CONTEXT_NONE elif (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.directory = data def file_start(self, dict): if (self.context == CONTEXT_NONE): self.curfile = IFFile() self.context = CONTEXT_FILE def file_end(self): if (self.context == CONTEXT_FILE): path = self.curfile.path self.curfile.orderindex = self.orderindex self.orderindex = self.orderindex+1 self.files[path] = self.curfile self.curfile = None self.context = CONTEXT_NONE def symlink_start(self, dict): if (self.context == CONTEXT_FILE): if (dict['type'] == 'dir'): self.context = CONTEXT_DIRLINK self.curfile.symlink = 'dir' else: self.context = CONTEXT_FILELINK self.curfile.symlink = 'file' def symlink_end(self): if (self.context == CONTEXT_DIRLINK): self.context = CONTEXT_FILE elif (self.context == CONTEXT_FILELINK): self.context = CONTEXT_FILE def name_end(self): if (self.context == CONTEXT_DIR): name = self.grabdata() if (self.curdir != None): self.curdir.name = name elif (self.context == CONTEXT_FILE): name = self.grabdata() if (self.curfile != None): self.curfile.name = name elif (self.context == CONTEXT_DIRLINK): name = self.grabdata() if (self.curfile != None): self.curfile.symlinkname = name def parent_end(self): if (self.context == CONTEXT_DIR): data = self.grabdata() if (self.curdir != None): self.curdir.parent = data def xdir_end(self): if (self.context == CONTEXT_DIR): data = self.grabdata() if (self.curdir != None): self.curdir.xdir = data elif (self.context == CONTEXT_DIRLINK): data = self.grabdata() if (self.curfile != None): self.curfile.symlinkxdir = data def subdircount_end(self): if (self.context == CONTEXT_DIR): data = self.grabdata() if (self.curdir != None): self.curdir.subdircount = int(data) def filecount_end(self): if (self.context == CONTEXT_DIR): data = self.grabdata() if (self.curdir != None): self.curdir.filecount = int(data) def path_end(self): if (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.path = data elif (self.context == CONTEXT_FILELINK): data = self.grabdata() if (self.curfile != None): self.curfile.symlinkpath = data def size_end(self): if (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.size = int(data) def date_end(self): if (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.date = data def rawdate_end(self): if (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.rawdate = int(data) def md5_end(self): if (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.md5 = data def description_end(self): if (self.context == CONTEXT_DIR): data = self.grabdata() if (self.curdir != None): self.curdir.description = data elif (self.context == CONTEXT_FILE): data = self.grabdata() if (self.curfile != None): self.curfile.description = data def ifarchive_end(self): for dir in self.directories.values(): parent = dir.parent if (parent == ''): dir.parentobj = None else: dir.parentobj = self.directories[parent] dir.parentobj.subdirs.append(dir) for file in self.files.values(): parent = file.directory file.directoryobj = self.directories[parent] file.directoryobj.files.append(file) def parse(filename): parser = IFAParser() fl = open(filename, 'r') xml.sax.parse(fl, parser) fl.close() rootdir = parser.directories['if-archive'] result = (rootdir, parser.directories, parser.files) return result