Showing posts with label parse. Show all posts
Showing posts with label parse. Show all posts

Thursday, September 12, 2013

Parse XML using java and store data in HashMap recursively

Download XmlParser.java class

Online Debug: http://ideone.com/eV2NDC

Follow the following link to build xml from hashmap/array list


package xmlparser;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

/**
 *
 * @author Pritom K Mondal
 * @published 12th September 2013 08:04 PM
 */
public class XmlParser {
    private String xmlString = "";
    private File xmlFile = null;
    private Document doc = null;

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        String xmlString = "YOUR XML STRING HERE IF YOU WANT PARSE DATA FROM STRING";
        File file = new File("xml1.xml");
        XmlParser xmlParser = new XmlParser(xmlString);
        xmlParser = new XmlParser(file);
        
        Map xmlMap = xmlParser.parseXML();
        print(xmlMap, 0);
    }
    
    public XmlParser(String xmlString) {
        this.xmlString = xmlString;
    }
    
    public XmlParser(File xmlFile) {
        this.xmlFile = xmlFile;
    }

    public Map parseXML() {
        try {
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();

            if (this.xmlFile != null) {
                doc = dBuilder.parse(this.xmlFile);
            }
            else {
                doc = dBuilder.parse( new ByteArrayInputStream(xmlString.getBytes()) );
            }

            doc.getDocumentElement().normalize();

            NodeList resultNode = doc.getChildNodes();

            HashMap result = new HashMap();
            XmlParser.MyNodeList tempNodeList = new XmlParser.MyNodeList();

            String emptyNodeName = null, emptyNodeValue = null;

            for(int index = 0; index < resultNode.getLength(); index ++) {
                Node tempNode = resultNode.item(index);
                if (tempNode.getNodeType() == Node.ELEMENT_NODE) {
                    tempNodeList.addNode(tempNode);
                }
                emptyNodeName = tempNode.getNodeName();
                emptyNodeValue = tempNode.getNodeValue();
            }

            if (tempNodeList.getLength() == 0 && emptyNodeName != null
                    && emptyNodeValue != null) {
                result.put(emptyNodeName, emptyNodeValue);
                return result;
            }

            this.parseXMLNode(tempNodeList, result);
            return result;
        } catch (Exception ex) {
            ex.printStackTrace();
            return null;
        }
    }

    private void parseXMLNode(NodeList nList, HashMap result) {
        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE
                    && nNode.hasChildNodes()
                    && nNode.getFirstChild() != null
                    && (nNode.getFirstChild().getNextSibling() != null
                    || nNode.getFirstChild().hasChildNodes())) {
                NodeList childNodes = nNode.getChildNodes();
                XmlParser.MyNodeList tempNodeList = new XmlParser.MyNodeList();
                for(int index = 0; index < childNodes.getLength(); index ++) {
                    Node tempNode = childNodes.item(index);
                    if (tempNode.getNodeType() == Node.ELEMENT_NODE) {
                        tempNodeList.addNode(tempNode);
                    }
                }
                HashMap dataHashMap = new HashMap();
                if (result.containsKey(nNode.getNodeName()) && result.get(nNode.getNodeName()) instanceof List) {
                    List mapExisting = (List) result.get(nNode.getNodeName());
                    mapExisting.add(dataHashMap);
                } else if(result.containsKey(nNode.getNodeName())) {
                    List counterList = new ArrayList();
                    counterList.add(result.get(nNode.getNodeName()));
                    counterList.add(dataHashMap);
                    result.put(nNode.getNodeName(), counterList);
                } else {
                    result.put(nNode.getNodeName(), dataHashMap);
                }
                if (nNode.getAttributes().getLength() > 0) {
                    Map attributeMap = new HashMap();
                    for(int attributeCounter = 0;
                        attributeCounter < nNode.getAttributes().getLength();
                        attributeCounter++) {
                        attributeMap.put(
                                nNode.getAttributes().item(attributeCounter).getNodeName(),
                                nNode.getAttributes().item(attributeCounter).getNodeValue()
                        );
                    }
                    dataHashMap.put("<<attributes>>", attributeMap);
                }
                this.parseXMLNode(tempNodeList, dataHashMap);
            } else if (nNode.getNodeType() == Node.ELEMENT_NODE
                    && nNode.hasChildNodes() && nNode.getFirstChild() != null
                    && nNode.getFirstChild().getNextSibling() == null) {
                this.putValue(result, nNode);
            } else if(nNode.getNodeType() == Node.ELEMENT_NODE) {
                this.putValue(result, nNode);
            }
        }
    }

    private void putValue(HashMap result, Node nNode) {
        HashMap attributeMap = new HashMap();
        Object nodeValue = null;
        if(nNode.getFirstChild() != null) {
            nodeValue = nNode.getFirstChild().getNodeValue();
            if(nodeValue != null) {
                nodeValue = nodeValue.toString().trim();
            }
        }
        HashMap nodeMap = new HashMap();
        nodeMap.put("<<value>>", nodeValue);
        Object putNode = nodeValue;
        if (nNode.getAttributes().getLength() > 0) {
            for(int attributeCounter = 0;
                attributeCounter < nNode.getAttributes().getLength();
                attributeCounter++) {
                attributeMap.put(
                        nNode.getAttributes().item(attributeCounter).getNodeName(),
                        nNode.getAttributes().item(attributeCounter).getNodeValue()
                );
            }
            nodeMap.put("<<attributes>>", attributeMap);
            putNode = nodeMap;
        }
        if (result.containsKey(nNode.getNodeName()) && result.get(nNode.getNodeName()) instanceof List) {
            List mapExisting = (List) result.get(nNode.getNodeName());
            mapExisting.add(putNode);
        } else if(result.containsKey(nNode.getNodeName())) {
            List counterList = new ArrayList();
            counterList.add(result.get(nNode.getNodeName()));
            counterList.add(putNode);
            result.put(nNode.getNodeName(), counterList);
        } else {
            result.put(nNode.getNodeName(), putNode);
        }
    }

    class MyNodeList implements NodeList {
        List<Node> nodes = new ArrayList<Node>();
        int length = 0;
        public MyNodeList() {}

        public void addNode(Node node) {
            nodes.add(node);
            length++;
        }

        @Override
        public Node item(int index) {
            try {
                return nodes.get(index);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
            return null;
        }

        @Override
        public int getLength() {
            return length;
        }
    }
    
    private static void print(Map map, Integer tab) {
        Iterator it = map.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry pairs = (Map.Entry) it.next();
            String key = pairs.getKey().toString();
            Object value = pairs.getValue();
            if (value instanceof Map) {
                System.out.println(getTab(tab) + key + " ==> [");
                print((Map) value, tab + 1);
                System.out.println(getTab(tab) + "]");
            }
            else if (value instanceof List) {
                System.out.println(getTab(tab) + key + " ==> [");
                print((List) value, tab + 1);
                System.out.println(getTab(tab) + "]");
            }
            else {
                System.out.println(getTab(tab) + key + " ==> " + value);
            }
        }
    }

    private static void print(List list, Integer tab) {
        for (Integer index = 0; index < list.size(); index++) {
            Object value = list.get(index);
            if (value instanceof Map) {
                System.out.println(getTab(tab) + index.toString() + ": {");
                print((Map) value, tab + 1);
                System.out.println(getTab(tab) + "}");
            }
            else if (value instanceof List) {
                print((List) value, tab + 1);
            }
            else {
                System.out.println(getTab(tab) + index.toString() + ": " + value);
            }
        }
    }

    public static String getTab(Integer tab) {
        String string = "";
        for (Integer index = 0; index < tab; index++) {
            string += "    ";
        }
        return string;
    }
}

Xml file as follows...


<?xml version="1.0"?>
<lib:library xmlns:lib="http://eric.van-der-vlist.com/ns/library" xmlns:hr="http://eric.van-der-vlist.com/ns/person">
    <lib:book id="b0836217462" available="true" checkingandfind='true'>
        <lib:isbn>0836217462</lib:isbn>
        <lib:title xml:lang="en">Being a Dog Is a Full-Time Job</lib:title>
        <hr:author id="CMS">
            <hr:name>Charles M Schulz</hr:name>
            <hr:born>1922-11-26</hr:born>
            <hr:dead>2000-02-12</hr:dead>
        </hr:author>
        <lib:character id="PP">
            <hr:name>Peppermint Patty</hr:name>
            <hr:born>1966-08-22</hr:born>
            <lib:qualification>bold, brash and tomboyish</lib:qualification>
        </lib:character>
        <lib:character id="Snoopy">
            <hr:name>Snoopy</hr:name>
            <hr:born>1950-10-04</hr:born>
            <lib:qualification>extroverted beagle</lib:qualification>
        </lib:character>
        <lib:character id="Schroeder">
            <hr:name>Schroeder</hr:name>
            <hr:born>1951-05-30</hr:born>
            <lib:qualification>brought classical music to the Peanuts strip</lib:qualification>
        </lib:character>
        <lib:character id="Lucy">
            <hr:name>Lucy</hr:name>
            <hr:born>1952-03-03</hr:born>
            <lib:qualification>bossy, crabby and selfish</lib:qualification>
        </lib:character>
    </lib:book>
    <Purchase>
        <PurchaseId>AAAAA</PurchaseId>
        <PurchaseType>ONLINE</PurchaseType>
    </Purchase>
    <Purchase>
        <PurchaseId>BBBBB</PurchaseId>
        <PurchaseType>OFFLINE</PurchaseType>
    </Purchase>
    <Purchase paid='True'>
        <Purchase>
            <Purchase2 nc='true'>List 1</Purchase2>
            <Purchase2>List 2</Purchase2>
        </Purchase>
    </Purchase>
</lib:library>

Output be as follows...


lib:library ==> [
    lib:book ==> [
        lib:isbn ==> 0836217462
        <<attributes>> ==> [
            id ==> b0836217462
            checkingandfind ==> true
            available ==> true
        ]
        lib:title ==> [
            <<attributes>> ==> [
                xml:lang ==> en
            ]
            <<value>> ==> Being a Dog Is a Full-Time Job
        ]
        lib:character ==> [
            0: {
                <<attributes>> ==> [
                    id ==> PP
                ]
                lib:qualification ==> bold, brash and tomboyish
                hr:name ==> Peppermint Patty
                hr:born ==> 1966-08-22
            }
            1: {
                <<attributes>> ==> [
                    id ==> Snoopy
                ]
                lib:qualification ==> extroverted beagle
                hr:name ==> Snoopy
                hr:born ==> 1950-10-04
            }
            2: {
                <<attributes>> ==> [
                    id ==> Schroeder
                ]
                lib:qualification ==> brought classical music to the Peanuts strip
                hr:name ==> Schroeder
                hr:born ==> 1951-05-30
            }
            3: {
                <<attributes>> ==> [
                    id ==> Lucy
                ]
                lib:qualification ==> bossy, crabby and selfish
                hr:name ==> Lucy
                hr:born ==> 1952-03-03
            }
        ]
        hr:author ==> [
            <<attributes>> ==> [
                id ==> CMS
            ]
            hr:dead ==> 2000-02-12
            hr:name ==> Charles M Schulz
            hr:born ==> 1922-11-26
        ]
    ]
    <<attributes>> ==> [
        xmlns:hr ==> http://eric.van-der-vlist.com/ns/person
        xmlns:lib ==> http://eric.van-der-vlist.com/ns/library
    ]
    Purchase ==> [
        0: {
            PurchaseId ==> AAAAA
            PurchaseType ==> ONLINE
        }
        1: {
            PurchaseId ==> BBBBB
            PurchaseType ==> OFFLINE
        }
        2: {
            <<attributes>> ==> [
                paid ==> True
            ]
            Purchase ==> [
                Purchase2 ==> [
                    0: {
                        <<attributes>> ==> [
                            nc ==> true
                        ]
                        <<value>> ==> List 1
                    }
                    1: List 2
                ]
            ]
        }
    ]
]

Wednesday, August 14, 2013

How to parse XML with php code which contains namespace, cdata, ampersand

Download xml.class.php

Php code to parse xml which contains namespace, cdata and ampersand symbol included


<?php
class XmlToArrayParser {
    /** The array created by the parser can be assigned to any variable: $anyVarArr = $domObj->array.*/
    public $array = array();
    private $parse_error = false;
    private $parser;
    private $pointer;

    /** Constructor: $domObj = new xmlToArrayParser($xml); */
    public function __construct($xml) {
        $xml = str_replace(array('&'), array('&amp;'), $xml);
        $this->pointer =& $this->array;
        $this->parser = xml_parser_create("UTF-8");
        xml_set_object($this->parser, $this);
        xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
        xml_set_element_handler($this->parser, "tag_open", "tag_close");
        xml_set_character_data_handler($this->parser, "cdata");
        $this->parse_error = xml_parse($this->parser, ltrim($xml)) ? false : true;
    }

    /** Free the parser. */
    public function __destruct() {
        xml_parser_free($this->parser);
    }

    /** Get the xml error if an an error in the xml file occured during parsing. */
    public function get_xml_error() {
        if ($this->parse_error) {
            $errCode = xml_get_error_code($this->parser);
            $thisError = "Error Code [" . $errCode . "] \"<strong style='color:red;'>" . xml_error_string($errCode) . "</strong>\",
                            at char " . xml_get_current_column_number($this->parser) . "
                            on line " . xml_get_current_line_number($this->parser) . "";
        } else {
            $thisError = $this->parse_error;
        }
        return $thisError;
    }

    private function tag_open($parser, $tag, $attributes) {
        $this->convert_to_array($tag, 'attrib');
        $idx = $this->convert_to_array($tag, 'cdata');
        if (isset($idx)) {
            $this->pointer[$tag][$idx] = Array(
                '@idx' => $idx,
                '@parent' => &$this->pointer
            );
            $this->pointer =& $this->pointer[$tag][$idx];
        } else {
            $this->pointer[$tag] = Array(
                '@parent' => &$this->pointer
            );
            $this->pointer =& $this->pointer[$tag];
        }
        if (!empty($attributes)) {
            $this->pointer['attrib'] = $attributes;
        }
    }

    /** Adds the current elements content to the current pointer[cdata] array. */
    private function cdata($parser, $cdata) {
        if (strlen(trim($cdata)) > 0) {
            if (isset($this->pointer['cdata'])) {
                $this->pointer['cdata'] .= $cdata;
            } else {
                $this->pointer['cdata'] = $cdata;
            }
        }
    }

    private function tag_close($parser, $tag) {
        $current =& $this->pointer;
        if (isset($this->pointer['@idx'])) {
            unset($current['@idx']);
        }

        $this->pointer =& $this->pointer['@parent'];
        unset($current['@parent']);

        if (isset($current['cdata']) && count($current) == 1) {
            $current = $current['cdata'];
        } else if (empty($current['cdata'])) {
            unset($current['cdata']);
        }
    }

    /** Converts a single element item into array(element[0]) if a second element of the same name is encountered. */
    private function convert_to_array($tag, $item) {
        if (isset($this->pointer[$tag][$item])) {
            $content = $this->pointer[$tag];
            $this->pointer[$tag] = array(
                (0) => $content
            );
            $idx = 1;
        } else if (isset($this->pointer[$tag])) {
            $idx = count($this->pointer[$tag]);
            if (!isset($this->pointer[$tag][0])) {
                foreach ($this->pointer[$tag] as $key => $value) {
                    unset($this->pointer[$tag][$key]);
                    $this->pointer[$tag][0][$key] = $value;
                }
            }
        } else {
            $idx = null;
        }
        return $idx;
    }
}
?>

Parsing XML example


<?php
$xmlString = "<?xml version=\"1.0\" encoding=\"utf-8\"?>
<soap:Envelope xmlns:soap=\"http://www.w3.org/2003/05/soap-envelope\"
    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
    xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\">
    <soap:Body>
        <CreateCustomerResponse xmlns=\"https://www.eway.com.au/gateway/managedpayment\">
            <CreateCustomerResult>9876543211000</CreateCustomerResult>
        </CreateCustomerResponse>
    </soap:Body>
    <soap:Body>
        <CreateCustomerResponse xmlns=\"https://www.eway.com.au/gateway/managedpayment\">
            <CreateCustomerResult>9876543211000</CreateCustomerResult>
        </CreateCustomerResponse>
    </soap:Body>
    <address name=\"pritom\">
        <actual content=\"Pritom Kumar\"/>
        <forward forwarding=\"yes\" content=\"pritom\">Forward to pritom & else one</forward>
        <response responding=\"yes\"><![CDATA[Thanks you.]]></response>
    </address>
</soap:Envelope>";

echo "<pre>";
$xmlObject = new XmlToArrayParser($xmlString);
print_r($xmlObject->array);
echo "</pre>";
?>

Output


Array
(
    [soap:Envelope] => Array
        (
            [attrib] => Array
                (
                    [xmlns:soap] => http://www.w3.org/2003/05/soap-envelope
                    [xmlns:xsi] => http://www.w3.org/2001/XMLSchema-instance
                    [xmlns:xsd] => http://www.w3.org/2001/XMLSchema
                )

            [soap:Body] => Array
                (
                    [0] => Array
                        (
                            [CreateCustomerResponse] => Array
                                (
                                    [attrib] => Array
                                        (
                                            [xmlns] => https://www.eway.com.au/gateway/managedpayment
                                        )

                                    [CreateCustomerResult] => 9876543211000
                                )

                        )

                    [1] => Array
                        (
                            [CreateCustomerResponse] => Array
                                (
                                    [attrib] => Array
                                        (
                                            [xmlns] => https://www.eway.com.au/gateway/managedpayment
                                        )

                                    [CreateCustomerResult] => 9876543211000
                                )

                        )

                )

            [address] => Array
                (
                    [attrib] => Array
                        (
                            [name] => pritom
                        )

                    [actual] => Array
                        (
                            [attrib] => Array
                                (
                                    [content] => Pritom Kumar
                                )

                        )

                    [forward] => Array
                        (
                            [attrib] => Array
                                (
                                    [forwarding] => yes
                                    [content] => pritom
                                )

                            [cdata] => Forward to pritom & else one
                        )

                    [response] => Array
                        (
                            [attrib] => Array
                                (
                                    [responding] => yes
                                )

                            [cdata] => Thanks you.
                        )

                )

        )

)

Monday, April 29, 2013

Read and parse csv file using php code

Details http://code.google.com/p/parsecsv-for-php/
Or download from here

Use:

require_once('../parsecsv.lib.php');
# create new parseCSV object.
$csv = new parseCSV();
# Parse '_books.csv' using automatic delimiter detection...

$csv->conditions = 'author does not contain dan brown';
$csv->conditions = 'rating < 4 OR author is John Twelve Hawks';
$csv->conditions = 'rating > 4 AND author is Dan Brown';

$csv->sort_by = 'title';

# offset from the beginning of the file,
# ignoring the first X number of rows.
$csv->offset = 2;

# limit the number of returned rows.
$csv->limit = 3;

$csv->auto('_books.csv');

foreach ($csv->titles as $value);
foreach ($csv->data as $key => $row);