import sys
import xml.etree.ElementTree as ET
class XmlParser:
def fixAttrs(self, attrs, c):
nattrs = {}
for attr in attrs:
nattrs[self.buildXmlnsKey(attr, c)] = attrs[attr]
return nattrs
def buildXmlnsKey(self, tagtxt, tcounter):
done = False
if not tagtxt.startswith("{"):
return tagtxt
#print 'Search for: ', tagtxt, ' in ', tcounter
if self.nsmap.has_key(str(tcounter)):
nslmap = self.nsmap[str(tcounter)]
#print 'nslmap-->', nslmap
for obj in nslmap:
if done is False:
for key in obj.keys():
kstr = '{' + key + '}'
#print 'key--->', key, ', --->', kstr
if tagtxt.startswith(kstr) and done is False:
done = True
kval = obj[key]
#print 'need to replace to: ', kval
if len(kval):
tagtxt = tagtxt.replace(kstr, kval+':', 1)
else:
tagtxt = tagtxt.replace(kstr, '', 1)
if done is False and tcounter > 0:
tcounter = tcounter - 1
return self.buildXmlnsKey(tagtxt, tcounter)
return tagtxt
def xmlToDict(self, node, dictclass = None):
if dictclass is None:
dictclass = {}
self.ncounter = self.ncounter + 1
if len(node):
if node.attrib:
#print node.attrib
dictclass['<<attr>>'] = self.fixAttrs(node.attrib, self.ncounter);
for child in node:
tagtxt = self.buildXmlnsKey(child.tag, self.ncounter)
newItem = self.xmlToDict(child)
#tagtxt = child.tag
if dictclass.has_key(tagtxt):
if type(dictclass[tagtxt]) is type([]):
dictclass[tagtxt].append(newItem)
else:
oldItem = dictclass[tagtxt];
dictclass[tagtxt] = [];
dictclass[tagtxt].append(oldItem);
dictclass[tagtxt].append(newItem);
else:
dictclass[tagtxt] = newItem
else:
if node.text is None:
text = ''
else:
text = node.text.strip()
if node.attrib:
#print fixAttrs(node.attrib, ncounter)
dictclass['<<attr>>'] = self.fixAttrs(node.attrib, self.ncounter)
dictclass['<<value>>'] = text;
else:
dictclass = text;
return dictclass
def printDic(self, dic, pos = None):
if pos is None:
pos = 0
for key in dic.keys():
#print 'key--->', key
if dic[key] is None:
print self.getLenStr(pos), key, ''
elif type(dic[key]) is type({}):
npos = pos + 1
print self.getLenStr(pos) + str(key), '{'
self.printDic(dic[key], npos)
print self.getLenStr(pos) + '}'
elif type(dic[key]) is type([]):
npos = pos + 1
print self.getLenStr(pos) + str(key), '['
self.printList(dic[key], npos)
print self.getLenStr(pos) + ']'
else:
print self.getLenStr(pos) + str(key), ': ', dic[key]
def printList(self, dic, pos = None):
if pos is None:
pos = 0
lindex = -1
for obj in dic:
lindex = lindex + 1
if obj is not None:
if type(obj) is type({}):
print self.getLenStr(pos) + str(lindex)+'. {'
npos = pos + 1
self.printDic(obj, npos)
print self.getLenStr(pos) + '}'
elif type(obj) is type([]):
print self.getLenStr(pos) + str(lindex)+'. ['
npos = pos + 1
self.printList(obj, npos)
print self.getLenStr(pos) +']'
else:
print self.getLenStr(pos), str(lindex)+'.', obj
def getLenStr(self, pos):
sstr = ''
while pos > 0:
sstr = sstr + ' '
pos = pos - 1
return sstr
def __init__(self, fileLocation):
self.location = fileLocation;
def parse(self):
tree = ET.parse(self.location)
root = tree.getroot()
self.nsmap = {}
self.lcounter = 0
for event, elem in ET.iterparse(self.location, events=('start', 'end', 'start-ns', 'end-ns')):
if event == 'start-ns':
#print 'start-ns', lcounter
a, b = elem
if b is not None and len(b):
#print "A-->", a, ", B-->" , b
scounter = self.lcounter + 0
if not self.nsmap.has_key(str(scounter)):
self.nsmap[str(scounter)] = []
self.nsmap[str(scounter)].append({'http://www.w3.org/XML/1998/namespace': 'xml'});
self.nsmap[str(scounter)].append({b: str(a)})
elif event == 'start':
#print 'start', lcounter
if not self.nsmap.has_key(str(self.lcounter)) and self.nsmap.has_key(str(self.lcounter - 1)):
#print 'exist: ', nsmap[str(lcounter - 1)]
self.nsmap[str(self.lcounter)] = self.nsmap[str(self.lcounter - 1)];
self.lcounter = self.lcounter + 1
self.ncounter = 0
self.dic = self.xmlToDict(root)
self.dic = {self.buildXmlnsKey(root.tag, 0): self.dic}
return self.dic;
if __name__ == "__main__":
xmlObj = XmlParser('xml7.xml');
theXmlDictionary = xmlObj.parse()
xmlObj.printDic(theXmlDictionary);
No comments:
Post a Comment