Showing posts with label python. Show all posts
Showing posts with label python. Show all posts

Sunday, September 11, 2016

Python parse xml


import sys
import xml.etree.ElementTree as ET

class XmlParser:
    def fixAttrs(self, attrs, c):
        nattrs = {}
        for attr in attrs:
            nattrs[self.buildXmlnsKey(attr, c)] = attrs[attr]
        return nattrs

    def buildXmlnsKey(self, tagtxt, tcounter):
        done = False
        if not tagtxt.startswith("{"):
            return tagtxt
        #print 'Search for: ', tagtxt, ' in ', tcounter
        if self.nsmap.has_key(str(tcounter)):
            nslmap = self.nsmap[str(tcounter)]
            #print 'nslmap-->', nslmap
            for obj in nslmap:
                if done is False:
                    for key in obj.keys():
                        kstr = '{' + key + '}'
                        #print 'key--->', key, ', --->', kstr
                        if tagtxt.startswith(kstr) and done is False:
                            done = True
                            kval = obj[key]
                            #print 'need to replace to: ', kval
                            if len(kval):
                                tagtxt = tagtxt.replace(kstr, kval+':', 1)
                            else:
                                tagtxt = tagtxt.replace(kstr, '', 1)

        if done is False and tcounter > 0:
            tcounter = tcounter - 1
            return self.buildXmlnsKey(tagtxt, tcounter)
        return tagtxt
                        

    def xmlToDict(self, node, dictclass = None):
        if dictclass is None:
            dictclass = {}
        self.ncounter = self.ncounter + 1
        if len(node):        
            if node.attrib:
                #print node.attrib
                dictclass['<<attr>>'] = self.fixAttrs(node.attrib, self.ncounter);            
            for child in node:
                tagtxt = self.buildXmlnsKey(child.tag, self.ncounter)
                newItem = self.xmlToDict(child)
                #tagtxt = child.tag
                if dictclass.has_key(tagtxt):
                    if type(dictclass[tagtxt]) is type([]):
                        dictclass[tagtxt].append(newItem)
                    else:
                        oldItem = dictclass[tagtxt];
                        dictclass[tagtxt] = [];
                        dictclass[tagtxt].append(oldItem);
                        dictclass[tagtxt].append(newItem);
                else:
                    dictclass[tagtxt] = newItem
        else:
            if node.text is None:
                text = ''
            else:
                text = node.text.strip()

            
            if node.attrib:
                #print fixAttrs(node.attrib, ncounter)
                dictclass['<<attr>>'] = self.fixAttrs(node.attrib, self.ncounter)
                dictclass['<<value>>'] = text;
            else:
                dictclass = text;

        return dictclass

    def printDic(self, dic, pos = None):
        if pos is None:
            pos = 0
        for key in dic.keys():
            #print 'key--->', key
            if dic[key] is None:
                print self.getLenStr(pos), key, ''
            elif type(dic[key]) is type({}):
                npos = pos + 1
                print self.getLenStr(pos) + str(key), '{'
                self.printDic(dic[key], npos)
                print self.getLenStr(pos) + '}'
            elif type(dic[key]) is type([]):
                npos = pos + 1
                print self.getLenStr(pos) + str(key), '['
                self.printList(dic[key], npos)
                print self.getLenStr(pos) + ']'
            else:
                print self.getLenStr(pos) + str(key), ': ', dic[key]

    def printList(self, dic, pos = None):
        if pos is None:
            pos = 0
        lindex = -1
        for obj in dic:
            lindex = lindex + 1            
            if obj is not None:
                if type(obj) is type({}):
                    print self.getLenStr(pos) + str(lindex)+'. {'
                    npos = pos + 1
                    self.printDic(obj, npos)
                    print self.getLenStr(pos) + '}'
                elif type(obj) is type([]):
                    print self.getLenStr(pos) + str(lindex)+'. ['
                    npos = pos + 1
                    self.printList(obj, npos)
                    print self.getLenStr(pos) +']'
                else:
                    print self.getLenStr(pos), str(lindex)+'.', obj

    def getLenStr(self, pos):
        sstr = ''
        while pos > 0:
            sstr = sstr + '   '
            pos = pos - 1
        return sstr

    def __init__(self, fileLocation):
        self.location = fileLocation;

    def parse(self):
        tree = ET.parse(self.location)
        root = tree.getroot()
        self.nsmap = {}
        self.lcounter = 0

        for event, elem in ET.iterparse(self.location, events=('start', 'end', 'start-ns', 'end-ns')):
            if event == 'start-ns':
                #print 'start-ns', lcounter
                a, b = elem
                if b is not None and len(b):
                    #print "A-->", a, ", B-->" , b
                    scounter = self.lcounter + 0
                    if not self.nsmap.has_key(str(scounter)):
                        self.nsmap[str(scounter)] = []
                        self.nsmap[str(scounter)].append({'http://www.w3.org/XML/1998/namespace': 'xml'});
                    self.nsmap[str(scounter)].append({b: str(a)})
            elif event == 'start':
                #print 'start', lcounter
                if not self.nsmap.has_key(str(self.lcounter)) and self.nsmap.has_key(str(self.lcounter - 1)):
                    #print 'exist: ', nsmap[str(lcounter - 1)]
                    self.nsmap[str(self.lcounter)] = self.nsmap[str(self.lcounter - 1)];
                self.lcounter = self.lcounter + 1       

        self.ncounter = 0
        self.dic = self.xmlToDict(root)
        self.dic = {self.buildXmlnsKey(root.tag, 0): self.dic}
        return self.dic;

if __name__ == "__main__":
    xmlObj = XmlParser('xml7.xml');
    theXmlDictionary = xmlObj.parse()
    xmlObj.printDic(theXmlDictionary);

Suppose you have the xml as file:


<?xml version="1.0"?>
<lib:library
    xmlns:lib="http://eric.van-der-vlist.com/ns/library"
    xmlns:hr="http://eric.van-der-vlist.com/ns/person">
    <lib:book id="b0836217462" available="true">
        <lib:isbn>0836217462</lib:isbn>
        <lib:title xml:lang="en">Being a Dog Is a Full-Time Job</lib:title>
        <hr:author id="CMS">
            <hr:name>Charles M Schulz</hr:name>
            <hr:born>1922-11-26</hr:born>
            <hr:dead>2000-02-12</hr:dead>
        </hr:author>
        <lib:character id="PP">
            <hr:name>Peppermint Patty</hr:name>
            <hr:born>1966-08-22</hr:born>
            <lib:qualification>bold, brash and tomboyish</lib:qualification>
        </lib:character>
        <lib:character id="Snoopy">
            <hr:name>Snoopy</hr:name>
            <hr:born>1950-10-04</hr:born>
            <lib:qualification>extroverted beagle</lib:qualification>
        </lib:character>
        <lib:character id="Schroeder">
            <hr:name>Schroeder</hr:name>
            <hr:born>1951-05-30</hr:born>
            <lib:qualification>brought classical music to the Peanuts strip</lib:qualification>
        </lib:character>
        <lib:character id="Lucy">
            <hr:name>Lucy</hr:name>
            <hr:born>1952-03-03</hr:born>
            <lib:qualification>bossy, crabby and selfish</lib:qualification>
        </lib:character>
    </lib:book>
    <Purchase>
        <PurchaseId>AAAAA</PurchaseId>
        <PurchaseType>ONLINE</PurchaseType>
    </Purchase>
    <Purchase>
        <PurchaseId>BBBBB</PurchaseId>
        <PurchaseType>OFFLINE</PurchaseType>
    </Purchase>
    <Purchase paid='True'>
        <Purchase age='30'>
            <Purchase>HMM 1</Purchase>
            <Purchase>HMM 2</Purchase>
        </Purchase>
    </Purchase>
</lib:library>

Output would be like this:

lib:library {
   Purchase [
      0. {
         PurchaseId :  AAAAA
         PurchaseType :  ONLINE
      }
      1. {
         PurchaseId :  BBBBB
         PurchaseType :  OFFLINE
      }
      2. {
         <<attr>> {
            paid :  True
         }
         Purchase {
            <<attr>> {
               age :  30
            }
            Purchase [
                0. HMM 1
                1. HMM 2
            ]
         }
      }
   ]
   lib:book {
      hr:author {
         <<attr>> {
            id :  CMS
         }
         hr:name :  Charles M Schulz
         hr:born :  1922-11-26
         hr:dead :  2000-02-12
      }
      <<attr>> {
         available :  true
         id :  b0836217462
      }
      lib:title {
         <<attr>> {
            xml:lang :  en
         }
         <<value>> :  Being a Dog Is a Full-Time Job
      }
      lib:isbn :  0836217462
      lib:character [
         0. {
            <<attr>> {
               id :  PP
            }
            hr:name :  Peppermint Patty
            hr:born :  1966-08-22
            lib:qualification :  bold, brash and tomboyish
         }
         1. {
            <<attr>> {
               id :  Snoopy
            }
            hr:name :  Snoopy
            hr:born :  1950-10-04
            lib:qualification :  extroverted beagle
         }
         2. {
            <<attr>> {
               id :  Schroeder
            }
            hr:name :  Schroeder
            hr:born :  1951-05-30
            lib:qualification :  brought classical music to the Peanuts strip
         }
         3. {
            <<attr>> {
               id :  Lucy
            }
            hr:name :  Lucy
            hr:born :  1952-03-03
            lib:qualification :  bossy, crabby and selfish
         }
      ]
   }
}

Sunday, August 31, 2014

Python: Generate XML From Dictionary Or List


import sys
import tempfile
from XmlToDict import XmlParser

class DictToXml():
    def __init__(self, dic):
        self.dic = dic;
        self.str = "";
        self.NEW_LINE = "\n";
        self.attr = '<<attr>>';
        self.value = '<<value>>';

    def toXml(self):
        self.rx(self.dic);
        return self.str;

    def rx(self, o):
        w = 'rootTag'
        c = 1;
        self.str += "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + self.NEW_LINE;
        if type(o) == type([]):
            self.rxl(o, w, c);
        else:
            self.rxd(o, w, c);

    def rxd(self, o, w, c):
        self.str += self.tb(c - 1) + "<" + w;
        for k, o2 in o.iteritems():
            if k == self.attr:
                for k2, v2 in o2.iteritems():
                    self.str += " " + k2 + "=\"" + v2 + "\"";        
        ot = 0
        self.str += ">" + self.NEW_LINE;
        for k, o2 in o.iteritems():
            if k == self.attr:
                pass
            elif type(o2) == type ([]):
                self.rxl(o2, k, c + 1);
                ot = ot + 1;
            elif k == self.value:
                self.str = self.str[:-1]
                self.str += str(o2);
            elif type(o2) == type ({}):
                self.rxd(o2, k, c + 1);
                ot = ot + 1;
            else:                
                self.str += self.tb(c) + "<" + k + ">";
                self.str += str(o2);
                self.str += "</" + k + ">" + str(self.NEW_LINE);
                ot = ot + 1
        if ot == 0:
            self.str += self.tb(0) + "</" + w + ">" + str(self.NEW_LINE);
        else:
            self.str += self.tb(c - 1) + "</" + w + ">" + str(self.NEW_LINE);

    def rxl(self, o, w, c):
        for o2 in o:
            if type(o2) == type ({}):
                self.rxd(o2, w, c);
                #self.str += self.NEW_LINE;
            elif type(o2) == type ([]):
                self.str += self.tb(c - 1) + "<" + w + ">" + self.NEW_LINE;
                self.rxl(o2, w, c + 1);
                self.str += self.tb(c - 1) + "</" + w + ">" + self.NEW_LINE;
            else:
                self.str += self.tb(c - 1) + "<" + w + ">";
                self.str += str(o2);
                self.str += "</" + w + ">" + self.NEW_LINE;

    def tb(self, c):
        st = "";
        for num in range (0, c):
            st += str("    ");
        return st;
        
        
        
if __name__ == "__main__":
    dicobj = {}
    listobj = []
    list2 = []
    for num in range(1, 3):
        dicobj['key' + str(num)] = {};
        dicobj['key' + str(num)]['otherkey'] = str(num)
        dicobj['key' + str(num)]['<<attr>>'] = {}
        dicobj['key' + str(num)]['<<attr>>']['att1'] = 'att1: ' + str(num);
        dicobj['key' + str(num)]['<<attr>>']['att2'] = 'att2: ' + str(num);
        listobj.append(num)
        list2.append("LIST: " + str(num));
    for num in range(4, 6):
        dicobj['key' + str(num)] = {};
        dicobj['key' + str(num)]['<<value>>'] = str(num)
        dicobj['key' + str(num)]['<<attr>>'] = {}
        dicobj['key' + str(num)]['<<attr>>']['att1'] = 'att1: ' + str(num);
        dicobj['key' + str(num)]['<<attr>>']['att2'] = 'att2: ' + str(num);
        listobj.append(num)
        list2.append("LIST: " + str(num));
    listobj.append(list2)
    listobj.append({'nl': 'hmm', 'tl': 'just tl', '<<attr>>': {'att1': 'Att1'}});
    dicobj['list'] = listobj
    #print dicobj
    txml = DictToXml(dicobj).toXml()
    # printing xml string generated from data dictionary
    print txml
    print ''
    # saving xml string to a temporary file
    f = tempfile.NamedTemporaryFile(delete=False)
    f.write(txml);
    f.seek(0);
    f.close();

    # parsing the xml file using my XmlParser
    xmlObj = XmlParser(f.name);
    theXmlDictionary = xmlObj.parse()
    # priting the data dictionary as pretty format generated from xml
    xmlObj.printDic(theXmlDictionary);

    # again generating xml from data dictionary...
    print DictToXml(theXmlDictionary['rootTag']).toXml()

Output would be like this:


<?xml version="1.0" encoding="UTF-8" ?>
<rootTag>
    <key2 att2="att2: 2" att1="att1: 2">
        <otherkey>2</otherkey>
    </key2>
    <key1 att2="att2: 1" att1="att1: 1">
        <otherkey>1</otherkey>
    </key1>
    <list>1</list>
    <list>2</list>
    <list>4</list>
    <list>5</list>
    <list>
        <list>LIST: 1</list>
        <list>LIST: 2</list>
        <list>LIST: 4</list>
        <list>LIST: 5</list>
    </list>
    <list att1="Att1">
        <tl>just tl</tl>
        <nl>hmm</nl>
    </list>
    <key5 att2="att2: 5" att1="att1: 5">5</key5>
    <key4 att2="att2: 4" att1="att1: 4">4</key4>
</rootTag>


rootTag: {
   key2: {
      <<attr>>: {
         att2 :  att2: 2
         att1 :  att1: 2
      }
      otherkey :  2
   }
   key1: {
      <<attr>>: {
         att2 :  att2: 1
         att1 :  att1: 1
      }
      otherkey :  1
   }
   list: [
      0. 1
      1. 2
      2. 4
      3. 5
      4. {
         list: [
            0. LIST: 1
            1. LIST: 2
            2. LIST: 4
            3. LIST: 5
         ]
      }
      5. {
         <<attr>>: {
            att1 :  Att1
         }
         tl :  just tl
         nl :  hmm
      }
   ]
   key5: {
      <<attr>>: {
         att2 :  att2: 5
         att1 :  att1: 5
      }
      <<value>> :  5
   }
   key4: {
      <<attr>>: {
         att2 :  att2: 4
         att1 :  att1: 4
      }
      <<value>> :  4
   }
}
<?xml version="1.0" encoding="UTF-8" ?>
<rootTag>
    <key2 att2="att2: 2" att1="att1: 2">
        <otherkey>2</otherkey>
    </key2>
    <key1 att2="att2: 1" att1="att1: 1">
        <otherkey>1</otherkey>
    </key1>
    <list>1</list>
    <list>2</list>
    <list>4</list>
    <list>5</list>
    <list>
        <list>LIST: 1</list>
        <list>LIST: 2</list>
        <list>LIST: 4</list>
        <list>LIST: 5</list>
    </list>
    <list att1="Att1">
        <tl>just tl</tl>
        <nl>hmm</nl>
    </list>
    <key5 att2="att2: 5" att1="att1: 5">5</key5>
    <key4 att2="att2: 4" att1="att1: 4">4</key4>
</rootTag>

Monday, August 25, 2014

Python Read & Write Csv Files


import csv
from random import randint

class CsvObj:
    def __init__(self):
        self.kset = []
        self.vset = []
    def setheader(self, h):
        self.kset = h
    def addvalue(self, v):
        self.vset.append(v)
    def prettyprint(self):
        for obj in self.vset:
            pos = 0
            print '-----------------------------------------------------'
            for key in self.kset:
                print key, ': ', str(obj[pos])
                pos = pos + 1

    def tocsv(self, name):
        with open(name, 'w') as fp:
            a = csv.writer(fp, delimiter=',')
            data = []
            data.append(self.kset)
            for row in self.vset:
                row[0] = '%s , " , %s' % (str(row[0]), str(row[1]))
                data.append(row)
            a.writerows(data)
        return True
    

csvobj = CsvObj()
with open('countrylist.csv', 'rb') as f:
    reader = csv.reader(f)
    pos = 0
    for row in reader:
        if pos == 0:
            csvobj.setheader(row)                
        else:
            csvobj.addvalue(row)
        pos = pos + 1

print csvobj.prettyprint()
csvname = 'csv%s.csv' % str(randint(0, 99999))
if csvobj.tocsv(csvname) == True:
    print 'Csv %s created' % (csvname)

Output would be like this:

-----------------------------------------------------
Order :  1
Name :  Albania
Name :  Republic of Albania
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Tirana
Currency Code :  ALL
Currency Name :  Lek
Telephone Code :  +355
Letter Code :  AL
Letter Code :  ALB
Number :  008
Country Code TLD :  .al
-----------------------------------------------------
Order :  2
Name :  Algeria
Name :  People's Democratic Republic of Algeria
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Algiers
Currency Code :  DZD
Currency Name :  Dinar
Telephone Code :  +213
Letter Code :  DZ
Letter Code :  DZA
Number :  012
Country Code TLD :  .dz
-----------------------------------------------------
Order :  3
Name :  Andorra
Name :  Principality of Andorra
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Andorra la Vella
Currency Code :  EUR
Currency Name :  Euro
Telephone Code :  +376
Letter Code :  AD
Letter Code :  AND
Number :  020
Country Code TLD :  .ad
-----------------------------------------------------
Order :  4
Name :  Angola
Name :  Republic of Angola
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Luanda
Currency Code :  AOA
Currency Name :  Kwanza
Telephone Code :  +244
Letter Code :  AO
Letter Code :  AGO
Number :  024
Country Code TLD :  .ao
-----------------------------------------------------
Order :  5
Name :  Antigua and Barbuda
Name :  
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Saint John's
Currency Code :  XCD
Currency Name :  Dollar
Telephone Code :  +1-268
Letter Code :  AG
Letter Code :  ATG
Number :  028
Country Code TLD :  .ag
-----------------------------------------------------
Order :  6
Name :  Argentina
Name :  Argentine Republic
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Buenos Aires
Currency Code :  ARS
Currency Name :  Peso
Telephone Code :  +54
Letter Code :  AR
Letter Code :  ARG
Number :  032
Country Code TLD :  .ar
-----------------------------------------------------
Order :  7
Name :  Armenia
Name :  Republic of Armenia
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Yerevan
Currency Code :  AMD
Currency Name :  Dram
Telephone Code :  +374
Letter Code :  AM
Letter Code :  ARM
Number :  051
Country Code TLD :  .am
-----------------------------------------------------
Order :  8
Name :  Australia
Name :  Commonwealth of Australia
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Canberra
Currency Code :  AUD
Currency Name :  Dollar
Telephone Code :  +61
Letter Code :  AU
Letter Code :  AUS
Number :  036
Country Code TLD :  .au
-----------------------------------------------------
Order :  9
Name :  Austria
Name :  Republic of Austria
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Vienna
Currency Code :  EUR
Currency Name :  Euro
Telephone Code :  +43
Letter Code :  AT
Letter Code :  AUT
Number :  040
Country Code TLD :  .at

...................................

Csv csv55227.csv created

Wednesday, August 13, 2014

Python send plain/html email with attachment using smtp


import os
import tempfile
import mimetypes

from email.Utils import COMMASPACE, formatdate
from smtplib import SMTP_SSL as SMTP       # this invokes the secure SMTP protocol (port 465, uses SSL)
# from smtplib import SMTP                  # use this for standard SMTP protocol   (port 25, no encryption)
from email.MIMEText import MIMEText
from email.MIMEBase import MIMEBase
from email.MIMEImage import MIMEImage
from email.MIMEMultipart import MIMEMultipart
from email.MIMEAudio import MIMEAudio
from email import Encoders


SMTPserver = 'smtp.mail.yahoo.com';
sender =     'some.name@yahoo.com';
destination = ['some.name@domain.com'];

USERNAME = "some.name@yahoo.com";
PASSWORD = "some.password";

contentHtml = """\
<b>Html message</b>
<div><span style='color: red'>Span with color red</span></div>
<div><span style='color: blue'>Span with color blue</span></div>
""";

contentPlain = 'Plain message';

contentAsPlainTextFile = 'Content as plain text file.';

subject = "Sent from Python";

conn = None;
try:
    msg = MIMEMultipart()
    msg['Subject'] = subject;
    msg['From'] = sender;
    msg['To'] = COMMASPACE.join(destination);
    msg['Date'] = formatdate(localtime=True);

    #Typical values for text_subtype are plain, html, xml
    msg.attach( MIMEText(contentHtml, 'html') );
    msg.attach( MIMEText(contentPlain, 'plain') );

    directory = os.path.dirname(os.path.realpath(__file__))
    files = [];
    files.append(os.path.join(directory, 'mail.simple.py'));
    files.append(os.path.join(directory, 'royal.jpg'));
    files.append(os.path.join(directory, 'audio.rm'));
    files.append(os.path.join(directory, 'xml7.xml'));

    tempdirectory = tempfile.mkdtemp()
    tempfilelocal = os.path.join(tempdirectory, 'Custom Text As Attachment.txt');
    tempfilelocal = open(tempfilelocal, 'w+b');
    tempfilelocal.write(contentAsPlainTextFile);
    tempfilelocal.seek(0);
    tempfilelocal.close();
    files.append(tempfilelocal.name);

    for fullpath in files:
        if not os.path.isfile(fullpath):
            continue;
        ctype, encoding = mimetypes.guess_type(fullpath);
        if ctype is None or encoding is not None:
            ctype = 'application/octet-stream';
        maintype, subtype = ctype.split('/', 1);
        if maintype == 'text':
            fp = open(fullpath);
            msgpart = MIMEText(fp.read(), _subtype=subtype)
            fp.close()
        elif maintype == 'image':
            fp = open(fullpath, 'rb')
            msgpart = MIMEImage(fp.read(), _subtype=subtype)
            fp.close()
        elif maintype == 'audio':
            fp = open(fullpath, 'rb')
            msgpart = MIMEAudio(fp.read(), _subtype=subtype)
            fp.close()
        else:
            fp = open(fullpath, 'rb')
            msgpart = MIMEBase(maintype, subtype)
            msgpart.set_payload(fp.read())
            fp.close()
            # Encode the payload using Base64
            Encoders.encode_base64(msgpart)
        # Set the filename parameter
        msgpart.add_header('Content-Disposition', 'attachment', filename=os.path.basename(fullpath))
        msg.attach(msgpart);

    conn = SMTP(SMTPserver);
    conn.set_debuglevel(False);
    conn.login(USERNAME, PASSWORD);
    conn.sendmail(sender, destination, msg.as_string());
    print("Mail sent.");
except Exception, exc:
    print( "Mail send failed: %s" % str(exc) );
finally:
    if conn is not None:
        conn.close()
        print("Connection closed.");

If server name and credentials are correct, you will receive email as below. In my case, it is my gmail inbox.