import sys import xml.etree.ElementTree as ET class XmlParser: def fixAttrs(self, attrs, c): nattrs = {} for attr in attrs: nattrs[self.buildXmlnsKey(attr, c)] = attrs[attr] return nattrs def buildXmlnsKey(self, tagtxt, tcounter): done = False if not tagtxt.startswith("{"): return tagtxt #print 'Search for: ', tagtxt, ' in ', tcounter if self.nsmap.has_key(str(tcounter)): nslmap = self.nsmap[str(tcounter)] #print 'nslmap-->', nslmap for obj in nslmap: if done is False: for key in obj.keys(): kstr = '{' + key + '}' #print 'key--->', key, ', --->', kstr if tagtxt.startswith(kstr) and done is False: done = True kval = obj[key] #print 'need to replace to: ', kval if len(kval): tagtxt = tagtxt.replace(kstr, kval+':', 1) else: tagtxt = tagtxt.replace(kstr, '', 1) if done is False and tcounter > 0: tcounter = tcounter - 1 return self.buildXmlnsKey(tagtxt, tcounter) return tagtxt def xmlToDict(self, node, dictclass = None): if dictclass is None: dictclass = {} self.ncounter = self.ncounter + 1 if len(node): if node.attrib: #print node.attrib dictclass['<<attr>>'] = self.fixAttrs(node.attrib, self.ncounter); for child in node: tagtxt = self.buildXmlnsKey(child.tag, self.ncounter) newItem = self.xmlToDict(child) #tagtxt = child.tag if dictclass.has_key(tagtxt): if type(dictclass[tagtxt]) is type([]): dictclass[tagtxt].append(newItem) else: oldItem = dictclass[tagtxt]; dictclass[tagtxt] = []; dictclass[tagtxt].append(oldItem); dictclass[tagtxt].append(newItem); else: dictclass[tagtxt] = newItem else: if node.text is None: text = '' else: text = node.text.strip() if node.attrib: #print fixAttrs(node.attrib, ncounter) dictclass['<<attr>>'] = self.fixAttrs(node.attrib, self.ncounter) dictclass['<<value>>'] = text; else: dictclass = text; return dictclass def printDic(self, dic, pos = None): if pos is None: pos = 0 for key in dic.keys(): #print 'key--->', key if dic[key] is None: print self.getLenStr(pos), key, '' elif type(dic[key]) is type({}): npos = pos + 1 print self.getLenStr(pos) + str(key), '{' self.printDic(dic[key], npos) print self.getLenStr(pos) + '}' elif type(dic[key]) is type([]): npos = pos + 1 print self.getLenStr(pos) + str(key), '[' self.printList(dic[key], npos) print self.getLenStr(pos) + ']' else: print self.getLenStr(pos) + str(key), ': ', dic[key] def printList(self, dic, pos = None): if pos is None: pos = 0 lindex = -1 for obj in dic: lindex = lindex + 1 if obj is not None: if type(obj) is type({}): print self.getLenStr(pos) + str(lindex)+'. {' npos = pos + 1 self.printDic(obj, npos) print self.getLenStr(pos) + '}' elif type(obj) is type([]): print self.getLenStr(pos) + str(lindex)+'. [' npos = pos + 1 self.printList(obj, npos) print self.getLenStr(pos) +']' else: print self.getLenStr(pos), str(lindex)+'.', obj def getLenStr(self, pos): sstr = '' while pos > 0: sstr = sstr + ' ' pos = pos - 1 return sstr def __init__(self, fileLocation): self.location = fileLocation; def parse(self): tree = ET.parse(self.location) root = tree.getroot() self.nsmap = {} self.lcounter = 0 for event, elem in ET.iterparse(self.location, events=('start', 'end', 'start-ns', 'end-ns')): if event == 'start-ns': #print 'start-ns', lcounter a, b = elem if b is not None and len(b): #print "A-->", a, ", B-->" , b scounter = self.lcounter + 0 if not self.nsmap.has_key(str(scounter)): self.nsmap[str(scounter)] = [] self.nsmap[str(scounter)].append({'http://www.w3.org/XML/1998/namespace': 'xml'}); self.nsmap[str(scounter)].append({b: str(a)}) elif event == 'start': #print 'start', lcounter if not self.nsmap.has_key(str(self.lcounter)) and self.nsmap.has_key(str(self.lcounter - 1)): #print 'exist: ', nsmap[str(lcounter - 1)] self.nsmap[str(self.lcounter)] = self.nsmap[str(self.lcounter - 1)]; self.lcounter = self.lcounter + 1 self.ncounter = 0 self.dic = self.xmlToDict(root) self.dic = {self.buildXmlnsKey(root.tag, 0): self.dic} return self.dic; if __name__ == "__main__": xmlObj = XmlParser('xml7.xml'); theXmlDictionary = xmlObj.parse() xmlObj.printDic(theXmlDictionary);
Suppose you have the xml as file:
<?xml version="1.0"?> <lib:library xmlns:lib="http://eric.van-der-vlist.com/ns/library" xmlns:hr="http://eric.van-der-vlist.com/ns/person"> <lib:book id="b0836217462" available="true"> <lib:isbn>0836217462</lib:isbn> <lib:title xml:lang="en">Being a Dog Is a Full-Time Job</lib:title> <hr:author id="CMS"> <hr:name>Charles M Schulz</hr:name> <hr:born>1922-11-26</hr:born> <hr:dead>2000-02-12</hr:dead> </hr:author> <lib:character id="PP"> <hr:name>Peppermint Patty</hr:name> <hr:born>1966-08-22</hr:born> <lib:qualification>bold, brash and tomboyish</lib:qualification> </lib:character> <lib:character id="Snoopy"> <hr:name>Snoopy</hr:name> <hr:born>1950-10-04</hr:born> <lib:qualification>extroverted beagle</lib:qualification> </lib:character> <lib:character id="Schroeder"> <hr:name>Schroeder</hr:name> <hr:born>1951-05-30</hr:born> <lib:qualification>brought classical music to the Peanuts strip</lib:qualification> </lib:character> <lib:character id="Lucy"> <hr:name>Lucy</hr:name> <hr:born>1952-03-03</hr:born> <lib:qualification>bossy, crabby and selfish</lib:qualification> </lib:character> </lib:book> <Purchase> <PurchaseId>AAAAA</PurchaseId> <PurchaseType>ONLINE</PurchaseType> </Purchase> <Purchase> <PurchaseId>BBBBB</PurchaseId> <PurchaseType>OFFLINE</PurchaseType> </Purchase> <Purchase paid='True'> <Purchase age='30'> <Purchase>HMM 1</Purchase> <Purchase>HMM 2</Purchase> </Purchase> </Purchase> </lib:library>
Output would be like this:
lib:library { Purchase [ 0. { PurchaseId : AAAAA PurchaseType : ONLINE } 1. { PurchaseId : BBBBB PurchaseType : OFFLINE } 2. { <<attr>> { paid : True } Purchase { <<attr>> { age : 30 } Purchase [ 0. HMM 1 1. HMM 2 ] } } ] lib:book { hr:author { <<attr>> { id : CMS } hr:name : Charles M Schulz hr:born : 1922-11-26 hr:dead : 2000-02-12 } <<attr>> { available : true id : b0836217462 } lib:title { <<attr>> { xml:lang : en } <<value>> : Being a Dog Is a Full-Time Job } lib:isbn : 0836217462 lib:character [ 0. { <<attr>> { id : PP } hr:name : Peppermint Patty hr:born : 1966-08-22 lib:qualification : bold, brash and tomboyish } 1. { <<attr>> { id : Snoopy } hr:name : Snoopy hr:born : 1950-10-04 lib:qualification : extroverted beagle } 2. { <<attr>> { id : Schroeder } hr:name : Schroeder hr:born : 1951-05-30 lib:qualification : brought classical music to the Peanuts strip } 3. { <<attr>> { id : Lucy } hr:name : Lucy hr:born : 1952-03-03 lib:qualification : bossy, crabby and selfish } ] } }