Index: trunk/src/services/XMLtoJSON.py =================================================================== --- trunk/src/services/XMLtoJSON.py (revision 0) +++ trunk/src/services/XMLtoJSON.py (revision 0) @@ -0,0 +1,99 @@ +#! /usr/bin/python + +""" + Some code (the sax parser part) was taken and modified from: + http://code.activestate.com/recipes/415983/ <- original code. + Reference to website's license: http://code.activestate.com/help/terms/ + +""" + +from xml.sax.handler import ContentHandler +from xml.sax.saxutils import XMLGenerator +from xml.sax.xmlreader import XMLReader +from xml.sax import make_parser +from os import listdir + +class ArtifactHandler(ContentHandler): + def __init__(self): + self.curr = '' + self.parent = '' + self.count = 0 + self.d = {} + self.currd = {} + self.parentd = {} + self.stack = [] + self.stack2 = [] + + def updateChild(self, chlddict, chld, name): + if chlddict.has_key(name): + chlddict[name] = self.getList(chlddict[name]) + (chlddict[name]).append(chld[name]) + else: + chlddict.update(chld) + + def getList(self, dlist): + if isinstance(dlist, list): + return dlist + return [dlist] + + def startElement(self, name, attrs): + if self.count == 0: + self.parent = name + self.d[name] = dict(attrs) + self.currd = self.d + else: + chld = {name: dict(attrs)} + self.parent = self.stack[-1] + self.parentd = self.stack2[-1] + chlddict = self.parentd[self.parent] + self.updateChild(chlddict, chld, name) + self.currd = chld + + self.stack.append(name) + self.stack2.append(self.currd) + + self.curr = name + self.count += 1 + + def endElement(self, name): + self.stack.remove(name) + for item in self.stack2: + if item.has_key(name): + self.stack2.remove(item) + + def addTextValue(self, my, content): + currcontent = "" + myd = self.getDict(my) + if myd.has_key("textvalue"): + currcontent = "".join((currcontent, myd["textvalue"])) + myd["textvalue"] = "".join((currcontent, content)) + + def getDict(self, my): + if isinstance(my, dict): + return my + return my[-1] + + def characters(self, content): + content = (content.encode('utf-8')).strip() + if content and len(content) > 0: + myd = self.parentd[self.parent] + self.addTextValue(myd[self.curr], content) + +def readXML(filename): + parser = initParser() + parser.parse(filename) + +def initParser(): + handler = ArtifactHandler() + parser = make_parser() + parser.setContentHandler(handler) + return parser + +def formatReadXML(path): + for filename in listdir(path): + if filename.find(".xml") != -1: + readXML("".join((path, filename))) + +if __name__ == "__main__": + for i in range(100): + formatReadXML("../testdata/artifacts/") \ No newline at end of file