Showing posts with label xml. Show all posts
Showing posts with label xml. Show all posts

Monday, December 2, 2013

Removing invalid characters from XML

XML as you would know essentially consists of markup tags and character data. The markup tags are > (greater than), < (less than), ' (single quote), " (double quote) and & (ampersand). Character data which appears inside text nodes or in attributes could be anything, any character in any language.
But not all Unicode characters are fit to be included in XML as character data. There are two specs that one needs to refer to understand this
  1. http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char
  2. http://www.w3.org/TR/2000/REC-xml-20001006#syntax
The following Java code is an implementation of these rules. It essentially removes all these illegal Unicode characters.

private static String removeInvalidXMLCharacters(String xmlString) {
    StringBuilder out = new StringBuilder();
    int codePoint;
    int i = 0;
    while (i < xmlString.length())
    {
        // This is the unicode code of the character.
        codePoint = xmlString.codePointAt(i);
        if ((codePoint == 0x9) ||
                (codePoint == 0xA) ||
                (codePoint == 0xD) ||
                ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) ||
                ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) ||
                ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF)))
        {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint);
    }
    return out.toString();
}

Tuesday, September 17, 2013

Build xml using java code from hashmap

Download XmlBuilder.java class

Follow the following link to parse all type of xml file to map/list


package xmlparser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 *
 * @author Pritom K Mondal
 * @created 28th August 2014
 */
public class XmlBuilder {
    private Map dataMap;
    private String xmlString = "";
    private static final String NEW_LINE = "\n";
    private static final String MATCHER_REPLACE = "[^0-9a-zA-Z:\\.]+";
    private static final String MATCHER_1 = "[a-zA-Z:\\_](.*)";
    
    public static void main(String[] args) {
        Map dataMap = new HashMap();
        for(int index = 1; index <= 2; index ++) {
            HashMap tempMap = new HashMap();
            tempMap.put("id", index);
            tempMap.put("name", "Name: " + index);
            tempMap.put("roll", "Roll: " + index);
            
            HashMap attMap = new HashMap();
            attMap.put("id", index * 10);
            attMap.put("attr-name", "Name == " + (index * 20));
            tempMap.put("<<attributes>>", attMap);  

            dataMap.put("user" + index, tempMap);
        }
        
        List dataList = new ArrayList();
        for (int index = 0; index < 2; index++) {
            HashMap tempMap = new HashMap();
            tempMap.put("id", index);
            tempMap.put("name", "Name: " + index);
            tempMap.put("roll", "Roll: " + index);

            dataList.add(tempMap);
        }
        dataMap.put("listMap", dataList);

        HashMap resultMap = new HashMap();
        resultMap.put("success", true);
        resultMap.put("message", "Thank you for generating this XML.");
        resultMap.put("dataMap", dataMap);
        resultMap.put("dataList", dataList);
        
        Map finalMap = new HashMap();
        finalMap.put("rootTag", resultMap);
        
        String xmlString = new XmlBuilder(finalMap).toXML();
        System.out.println(xmlString);
    }

    XmlBuilder(Map dataMap) {
        this.dataMap = dataMap;
    }

    public String toXML() {
        /*System.out.println(this.dataMap);*/
        this.renderMapAsXml(this.dataMap, "data", 0);
        /*System.out.println(this.xmlString);*/
        return this.xmlString;
    }
    
    private void renderListAsXml(List dataList, String wrapper, int cycle) {
        wrapper = wrapper.trim();
        if (!wrapper.matches(MATCHER_1)) {
            wrapper = "obj_" + wrapper;
        }
        for (int index = 0; index < dataList.size(); index++) {
            Object dataMapValue = dataList.get(index);
            if (dataMapValue instanceof List) {
                this.xmlString += this.makeTabSpace(cycle - 1) + "<" + wrapper + '>' + NEW_LINE;
                this.renderListAsXml((List) dataMapValue, wrapper, cycle + 1);
                this.xmlString += this.makeTabSpace(cycle - 1) + "</" + wrapper + '>' + NEW_LINE;
            }
            else if (dataMapValue instanceof Map || dataMapValue instanceof HashMap) {
                this.renderMapAsXml((Map) dataMapValue, wrapper, cycle);
                this.xmlString += NEW_LINE;
            }
            else {
                this.xmlString += this.makeTabSpace(cycle - 1) + "<" + wrapper + "><![CDATA[";
                this.xmlString += dataMapValue;
                this.xmlString += "]]></" + wrapper + '>' + NEW_LINE;
            }
        }
    }

    private void renderMapAsXml(Map dataMap, String wrapper, int cycle) {
        if (cycle == 0) {
            this.xmlString = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + NEW_LINE;
        }
        wrapper = "" + wrapper;
        wrapper = wrapper.trim();
        if (!wrapper.matches(MATCHER_1)) {
            wrapper = "obj_" + wrapper;
        }
        wrapper = wrapper.replaceAll(MATCHER_REPLACE, "_");
        if (cycle > 0) {
            this.xmlString += this.makeTabSpace(cycle - 1) + '<' + wrapper;
        }
        for (Object object : dataMap.entrySet()) {
            Map.Entry entry = (Map.Entry) object;
            Object dataMapKey = entry.getKey();
            Object dataMapValue = entry.getValue();
            if (dataMapKey.toString().equalsIgnoreCase("<<attributes>>")) {
                Map attributeMap = (Map) dataMapValue;
                for (Object objectAttribute : attributeMap.entrySet()) {
                    Map.Entry entryAttribute = (Map.Entry) objectAttribute;
                    this.xmlString += " " + entryAttribute.getKey() + "='" + entryAttribute.getValue() + "'";
                }
            }
        }
        if (cycle > 0) {
            this.xmlString += '>' + NEW_LINE;
        }
        int otherTags = 0;
        try {
            for (Object object : dataMap.entrySet()) {
                Map.Entry entry = (Map.Entry) object;
                Object dataMapKey = entry.getKey();
                Object dataMapValue = entry.getValue();

                if (!dataMapKey.toString().equalsIgnoreCase("<<attributes>>")) {
                    if (dataMapValue instanceof List) {
                        this.renderListAsXml((List) dataMapValue, "" + dataMapKey, cycle + 1);
                        otherTags++;
                    } 
                    else if (dataMapKey.toString().startsWith("<<")) {
                        this.xmlString = this.xmlString.substring(0, this.xmlString.length() - this.NEW_LINE.length());
                        this.xmlString += "<![CDATA[";
                        this.xmlString += dataMapValue + "]]>";
                    }
                    else if (dataMapValue instanceof Map || dataMapValue instanceof HashMap) {
                        this.renderMapAsXml((HashMap) dataMapValue, "" + dataMapKey, cycle + 1);
                        this.xmlString += NEW_LINE;
                        otherTags++;
                    }
                    else {
                        dataMapKey = "" + dataMapKey;
                        dataMapKey = dataMapKey.toString().trim();
                        if (!dataMapKey.toString().matches(MATCHER_1)) {
                            dataMapKey = "obj_" + dataMapKey;
                        }
                        dataMapKey = dataMapKey.toString().replaceAll(MATCHER_REPLACE, "_");
                        if (dataMapValue != null) {
                            dataMapValue = "" + dataMapValue;
                            dataMapValue = dataMapValue.toString().replaceAll("]]>", "]]&gt;");
                        }
                        this.xmlString += this.makeTabSpace(cycle);
                        this.xmlString += ('<' + dataMapKey.toString() + "><![CDATA[");
                        this.xmlString += dataMapValue + "]]></";
                        this.xmlString += dataMapKey.toString() + '>' + NEW_LINE;
                        otherTags++;
                    }
                }
            }
        } catch (Exception ex) {
            ex.printStackTrace();
        }
        if (cycle > 0) {
            this.xmlString += this.makeTabSpace(otherTags == 0 ? otherTags : cycle - 1) + "</" + wrapper + ">";
        }
    }

    private String makeTabSpace(int numberOfTabs) {
        String returnString = "";
        for (int index = 1; index <= numberOfTabs; index ++) {
            returnString += "    ";
        }
        return returnString;
    }
}

Output would be as following:


<?xml version="1.0" encoding="UTF-8" ?>
<rootTag>
    <dataList>
        <id><![CDATA[0]]></id>
        <roll><![CDATA[Roll: 0]]></roll>
        <name><![CDATA[Name: 0]]></name>
    </dataList>
    <dataList>
        <id><![CDATA[1]]></id>
        <roll><![CDATA[Roll: 1]]></roll>
        <name><![CDATA[Name: 1]]></name>
    </dataList>
    <message><![CDATA[Thank you for generating this XML.]]></message>
    <dataMap>
        <user2 id='20' attr-name='Name == 40'>
            <id><![CDATA[2]]></id>
            <roll><![CDATA[Roll: 2]]></roll>
            <name><![CDATA[Name: 2]]></name>
        </user2>
        <user1 id='10' attr-name='Name == 20'>
            <id><![CDATA[1]]></id>
            <roll><![CDATA[Roll: 1]]></roll>
            <name><![CDATA[Name: 1]]></name>
        </user1>
        <listMap>
            <id><![CDATA[0]]></id>
            <roll><![CDATA[Roll: 0]]></roll>
            <name><![CDATA[Name: 0]]></name>
        </listMap>
        <listMap>
            <id><![CDATA[1]]></id>
            <roll><![CDATA[Roll: 1]]></roll>
            <name><![CDATA[Name: 1]]></name>
        </listMap>
    </dataMap>
    <success><![CDATA[true]]></success>
</rootTag>

Thursday, September 12, 2013

Parse XML using java and store data in HashMap recursively

Download XmlParser.java class

Online Debug: http://ideone.com/eV2NDC

Follow the following link to build xml from hashmap/array list


package xmlparser;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

/**
 *
 * @author Pritom K Mondal
 * @published 12th September 2013 08:04 PM
 */
public class XmlParser {
    private String xmlString = "";
    private File xmlFile = null;
    private Document doc = null;

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        String xmlString = "YOUR XML STRING HERE IF YOU WANT PARSE DATA FROM STRING";
        File file = new File("xml1.xml");
        XmlParser xmlParser = new XmlParser(xmlString);
        xmlParser = new XmlParser(file);
        
        Map xmlMap = xmlParser.parseXML();
        print(xmlMap, 0);
    }
    
    public XmlParser(String xmlString) {
        this.xmlString = xmlString;
    }
    
    public XmlParser(File xmlFile) {
        this.xmlFile = xmlFile;
    }

    public Map parseXML() {
        try {
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();

            if (this.xmlFile != null) {
                doc = dBuilder.parse(this.xmlFile);
            }
            else {
                doc = dBuilder.parse( new ByteArrayInputStream(xmlString.getBytes()) );
            }

            doc.getDocumentElement().normalize();

            NodeList resultNode = doc.getChildNodes();

            HashMap result = new HashMap();
            XmlParser.MyNodeList tempNodeList = new XmlParser.MyNodeList();

            String emptyNodeName = null, emptyNodeValue = null;

            for(int index = 0; index < resultNode.getLength(); index ++) {
                Node tempNode = resultNode.item(index);
                if (tempNode.getNodeType() == Node.ELEMENT_NODE) {
                    tempNodeList.addNode(tempNode);
                }
                emptyNodeName = tempNode.getNodeName();
                emptyNodeValue = tempNode.getNodeValue();
            }

            if (tempNodeList.getLength() == 0 && emptyNodeName != null
                    && emptyNodeValue != null) {
                result.put(emptyNodeName, emptyNodeValue);
                return result;
            }

            this.parseXMLNode(tempNodeList, result);
            return result;
        } catch (Exception ex) {
            ex.printStackTrace();
            return null;
        }
    }

    private void parseXMLNode(NodeList nList, HashMap result) {
        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE
                    && nNode.hasChildNodes()
                    && nNode.getFirstChild() != null
                    && (nNode.getFirstChild().getNextSibling() != null
                    || nNode.getFirstChild().hasChildNodes())) {
                NodeList childNodes = nNode.getChildNodes();
                XmlParser.MyNodeList tempNodeList = new XmlParser.MyNodeList();
                for(int index = 0; index < childNodes.getLength(); index ++) {
                    Node tempNode = childNodes.item(index);
                    if (tempNode.getNodeType() == Node.ELEMENT_NODE) {
                        tempNodeList.addNode(tempNode);
                    }
                }
                HashMap dataHashMap = new HashMap();
                if (result.containsKey(nNode.getNodeName()) && result.get(nNode.getNodeName()) instanceof List) {
                    List mapExisting = (List) result.get(nNode.getNodeName());
                    mapExisting.add(dataHashMap);
                } else if(result.containsKey(nNode.getNodeName())) {
                    List counterList = new ArrayList();
                    counterList.add(result.get(nNode.getNodeName()));
                    counterList.add(dataHashMap);
                    result.put(nNode.getNodeName(), counterList);
                } else {
                    result.put(nNode.getNodeName(), dataHashMap);
                }
                if (nNode.getAttributes().getLength() > 0) {
                    Map attributeMap = new HashMap();
                    for(int attributeCounter = 0;
                        attributeCounter < nNode.getAttributes().getLength();
                        attributeCounter++) {
                        attributeMap.put(
                                nNode.getAttributes().item(attributeCounter).getNodeName(),
                                nNode.getAttributes().item(attributeCounter).getNodeValue()
                        );
                    }
                    dataHashMap.put("<<attributes>>", attributeMap);
                }
                this.parseXMLNode(tempNodeList, dataHashMap);
            } else if (nNode.getNodeType() == Node.ELEMENT_NODE
                    && nNode.hasChildNodes() && nNode.getFirstChild() != null
                    && nNode.getFirstChild().getNextSibling() == null) {
                this.putValue(result, nNode);
            } else if(nNode.getNodeType() == Node.ELEMENT_NODE) {
                this.putValue(result, nNode);
            }
        }
    }

    private void putValue(HashMap result, Node nNode) {
        HashMap attributeMap = new HashMap();
        Object nodeValue = null;
        if(nNode.getFirstChild() != null) {
            nodeValue = nNode.getFirstChild().getNodeValue();
            if(nodeValue != null) {
                nodeValue = nodeValue.toString().trim();
            }
        }
        HashMap nodeMap = new HashMap();
        nodeMap.put("<<value>>", nodeValue);
        Object putNode = nodeValue;
        if (nNode.getAttributes().getLength() > 0) {
            for(int attributeCounter = 0;
                attributeCounter < nNode.getAttributes().getLength();
                attributeCounter++) {
                attributeMap.put(
                        nNode.getAttributes().item(attributeCounter).getNodeName(),
                        nNode.getAttributes().item(attributeCounter).getNodeValue()
                );
            }
            nodeMap.put("<<attributes>>", attributeMap);
            putNode = nodeMap;
        }
        if (result.containsKey(nNode.getNodeName()) && result.get(nNode.getNodeName()) instanceof List) {
            List mapExisting = (List) result.get(nNode.getNodeName());
            mapExisting.add(putNode);
        } else if(result.containsKey(nNode.getNodeName())) {
            List counterList = new ArrayList();
            counterList.add(result.get(nNode.getNodeName()));
            counterList.add(putNode);
            result.put(nNode.getNodeName(), counterList);
        } else {
            result.put(nNode.getNodeName(), putNode);
        }
    }

    class MyNodeList implements NodeList {
        List<Node> nodes = new ArrayList<Node>();
        int length = 0;
        public MyNodeList() {}

        public void addNode(Node node) {
            nodes.add(node);
            length++;
        }

        @Override
        public Node item(int index) {
            try {
                return nodes.get(index);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
            return null;
        }

        @Override
        public int getLength() {
            return length;
        }
    }
    
    private static void print(Map map, Integer tab) {
        Iterator it = map.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry pairs = (Map.Entry) it.next();
            String key = pairs.getKey().toString();
            Object value = pairs.getValue();
            if (value instanceof Map) {
                System.out.println(getTab(tab) + key + " ==> [");
                print((Map) value, tab + 1);
                System.out.println(getTab(tab) + "]");
            }
            else if (value instanceof List) {
                System.out.println(getTab(tab) + key + " ==> [");
                print((List) value, tab + 1);
                System.out.println(getTab(tab) + "]");
            }
            else {
                System.out.println(getTab(tab) + key + " ==> " + value);
            }
        }
    }

    private static void print(List list, Integer tab) {
        for (Integer index = 0; index < list.size(); index++) {
            Object value = list.get(index);
            if (value instanceof Map) {
                System.out.println(getTab(tab) + index.toString() + ": {");
                print((Map) value, tab + 1);
                System.out.println(getTab(tab) + "}");
            }
            else if (value instanceof List) {
                print((List) value, tab + 1);
            }
            else {
                System.out.println(getTab(tab) + index.toString() + ": " + value);
            }
        }
    }

    public static String getTab(Integer tab) {
        String string = "";
        for (Integer index = 0; index < tab; index++) {
            string += "    ";
        }
        return string;
    }
}

Xml file as follows...


<?xml version="1.0"?>
<lib:library xmlns:lib="http://eric.van-der-vlist.com/ns/library" xmlns:hr="http://eric.van-der-vlist.com/ns/person">
    <lib:book id="b0836217462" available="true" checkingandfind='true'>
        <lib:isbn>0836217462</lib:isbn>
        <lib:title xml:lang="en">Being a Dog Is a Full-Time Job</lib:title>
        <hr:author id="CMS">
            <hr:name>Charles M Schulz</hr:name>
            <hr:born>1922-11-26</hr:born>
            <hr:dead>2000-02-12</hr:dead>
        </hr:author>
        <lib:character id="PP">
            <hr:name>Peppermint Patty</hr:name>
            <hr:born>1966-08-22</hr:born>
            <lib:qualification>bold, brash and tomboyish</lib:qualification>
        </lib:character>
        <lib:character id="Snoopy">
            <hr:name>Snoopy</hr:name>
            <hr:born>1950-10-04</hr:born>
            <lib:qualification>extroverted beagle</lib:qualification>
        </lib:character>
        <lib:character id="Schroeder">
            <hr:name>Schroeder</hr:name>
            <hr:born>1951-05-30</hr:born>
            <lib:qualification>brought classical music to the Peanuts strip</lib:qualification>
        </lib:character>
        <lib:character id="Lucy">
            <hr:name>Lucy</hr:name>
            <hr:born>1952-03-03</hr:born>
            <lib:qualification>bossy, crabby and selfish</lib:qualification>
        </lib:character>
    </lib:book>
    <Purchase>
        <PurchaseId>AAAAA</PurchaseId>
        <PurchaseType>ONLINE</PurchaseType>
    </Purchase>
    <Purchase>
        <PurchaseId>BBBBB</PurchaseId>
        <PurchaseType>OFFLINE</PurchaseType>
    </Purchase>
    <Purchase paid='True'>
        <Purchase>
            <Purchase2 nc='true'>List 1</Purchase2>
            <Purchase2>List 2</Purchase2>
        </Purchase>
    </Purchase>
</lib:library>

Output be as follows...


lib:library ==> [
    lib:book ==> [
        lib:isbn ==> 0836217462
        <<attributes>> ==> [
            id ==> b0836217462
            checkingandfind ==> true
            available ==> true
        ]
        lib:title ==> [
            <<attributes>> ==> [
                xml:lang ==> en
            ]
            <<value>> ==> Being a Dog Is a Full-Time Job
        ]
        lib:character ==> [
            0: {
                <<attributes>> ==> [
                    id ==> PP
                ]
                lib:qualification ==> bold, brash and tomboyish
                hr:name ==> Peppermint Patty
                hr:born ==> 1966-08-22
            }
            1: {
                <<attributes>> ==> [
                    id ==> Snoopy
                ]
                lib:qualification ==> extroverted beagle
                hr:name ==> Snoopy
                hr:born ==> 1950-10-04
            }
            2: {
                <<attributes>> ==> [
                    id ==> Schroeder
                ]
                lib:qualification ==> brought classical music to the Peanuts strip
                hr:name ==> Schroeder
                hr:born ==> 1951-05-30
            }
            3: {
                <<attributes>> ==> [
                    id ==> Lucy
                ]
                lib:qualification ==> bossy, crabby and selfish
                hr:name ==> Lucy
                hr:born ==> 1952-03-03
            }
        ]
        hr:author ==> [
            <<attributes>> ==> [
                id ==> CMS
            ]
            hr:dead ==> 2000-02-12
            hr:name ==> Charles M Schulz
            hr:born ==> 1922-11-26
        ]
    ]
    <<attributes>> ==> [
        xmlns:hr ==> http://eric.van-der-vlist.com/ns/person
        xmlns:lib ==> http://eric.van-der-vlist.com/ns/library
    ]
    Purchase ==> [
        0: {
            PurchaseId ==> AAAAA
            PurchaseType ==> ONLINE
        }
        1: {
            PurchaseId ==> BBBBB
            PurchaseType ==> OFFLINE
        }
        2: {
            <<attributes>> ==> [
                paid ==> True
            ]
            Purchase ==> [
                Purchase2 ==> [
                    0: {
                        <<attributes>> ==> [
                            nc ==> true
                        ]
                        <<value>> ==> List 1
                    }
                    1: List 2
                ]
            ]
        }
    ]
]