Showing posts with label read csv. Show all posts
Showing posts with label read csv. Show all posts

Friday, October 10, 2014

Read & Construct CSV File Using Java Code




package com.pritom.kumar;

import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by pritom on 10/10/2014.
 */
public class ReadCsvFile {
    final static Charset ENCODING = StandardCharsets.UTF_8;
    private Integer maxRowSize = 0;
    List dataList = new ArrayList();
    public String DELIMITER = ",";

    public static void main(String[] args) throws Exception {
        ReadCsvFile readCsvFile = new ReadCsvFile();
        Long started = System.currentTimeMillis();
        readCsvFile.run();
        Long finished = System.currentTimeMillis();
        Long timeTaken = finished - started;
        readCsvFile.prettyPrint();
        readCsvFile.println("Time Taken: " + timeTaken + " Milli Seconds.");
        readCsvFile.println(readCsvFile.toHtml("csv-output.html"));
        readCsvFile.println(readCsvFile.toCsv("csv-output.csv"));
        System.exit(200);
    }

    public String toCsv(String fileName) throws Exception {
        File file = new File(fileName);
        List fileLines = new ArrayList();
        for (Object object : dataList) {
            fileLines.add(listToBuffer((List) object).toString());
        }
        writeStringList(fileLines, file.getAbsolutePath());
        return file.getAbsolutePath();
    }

    private StringBuffer listToBuffer(List data) {
        StringBuffer stringBuffer = new StringBuffer();
        Boolean firstValue = true;
        for (Object value : data) {
            String line = value.toString();
            if (!firstValue) {
                stringBuffer.append(DELIMITER);
            }
            stringBuffer.append("\"");
            for (Integer index = 0; index < line.length(); index++) {
                char chars = line.charAt(index);
                if (chars == '\"') {
                    stringBuffer.append("\"");
                }
                stringBuffer.append(chars);
            }
            stringBuffer.append("\"");
            firstValue = false;
        }
        return stringBuffer;
    }

    public String toHtml(String fileName) throws Exception {
        File file = new File(fileName);
        List fileLines = new ArrayList();
        for (Integer dataIndex = 0; dataIndex < dataList.size(); dataIndex++) {
            List columnData = (ArrayList) dataList.get(dataIndex);
            maxRowSize = columnData.size() > maxRowSize ? columnData.size() : maxRowSize;
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("<table>");
        for (Integer dataIndex = 0; dataIndex < dataList.size(); dataIndex++) {
            List columnData = (ArrayList) dataList.get(dataIndex);
            stringBuffer.append("<tr><td>" + (dataIndex + 1) + "</td>");
            for (Integer headerIndex = 0; headerIndex < maxRowSize; headerIndex++) {
                stringBuffer.append("<td style='border: 1px solid black;'>" +(columnData.size() > headerIndex ? columnData.get(headerIndex) : "") + "&nbsp;</td>");
            }
            stringBuffer.append("</tr>\n");
            if (dataIndex % 10 == 0) {
                fileLines.add(stringBuffer.toString());
                stringBuffer = new StringBuffer();
            }
        }
        stringBuffer.append("</table>");
        fileLines.add(stringBuffer.toString());
        writeStringList(fileLines, file.getAbsolutePath());
        return file.getAbsolutePath();
    }

    public Boolean writeStringList(List<String> aLines, String aFileName) throws IOException {
        try {
            Path path = Paths.get(aFileName);
            Files.write(path, aLines, ENCODING);
            return true;
        }
        catch (Exception ex) {
            ex.printStackTrace();
            return false;
        }
    }

    public void prettyPrint() {
        for (Integer dataIndex = 0; dataIndex < dataList.size(); dataIndex++) {
            List columnData = (ArrayList) dataList.get(dataIndex);
            print((dataIndex + 1) + ": ");
            for (Integer headerIndex = 0; headerIndex < columnData.size(); headerIndex++) {
                print("<" + columnData.get(headerIndex) + ">");
                if (headerIndex + 1 < columnData.size()) {
                    print(", ");
                }
            }
            println("");
        }
    }

    public void run() throws Exception {
        String csvFile = "input.csv";
        BufferedReader br = null;
        FileReader fileReader = new FileReader(csvFile);
        String line = "";
        try {
            br = new BufferedReader(fileReader);
            while ((line = br.readLine()) != null) {
                if (line.trim().length() > 0) {
                    List tempDataList = parseLine(line.trim());
                    maxRowSize = tempDataList.size() > maxRowSize ? tempDataList.size() : maxRowSize;
                    dataList.add(tempDataList);
                }
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (br != null) {
                try {
                    br.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (fileReader != null) {
                try {
                    fileReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    private List parseLine(String string) throws Exception{
        InputStream stream = new ByteArrayInputStream(string.getBytes("UTF-8"));
        Reader fr = new InputStreamReader(stream, "UTF-8");
        int chars = fr.read();
        while (chars == '\r') {
            chars = fr.read();
        }
        if (chars < 0) {
            return new ArrayList();
        }
        List dataList = new ArrayList();
        StringBuffer stringBuffer = new StringBuffer();
        Boolean inQuotes = false, lineStarted = false;
        while (chars > 0) {
            if (inQuotes) {
                lineStarted = true;
                if (chars == '\"') {
                    inQuotes = false;
                }
                else {
                    stringBuffer.append((char) chars);
                }
            }
            else {
                if (chars == '\"') {
                    inQuotes = true;
                    if (lineStarted) {
                        stringBuffer.append('\"');
                    }
                }
                else if (chars == DELIMITER.charAt(0)) {
                    dataList.add(stringBuffer.toString());
                    stringBuffer = new StringBuffer();
                    lineStarted = false;
                }
                else if (chars == '\r') {

                }
                else if (chars == '\n') {
                    break;
                }
                else {
                    stringBuffer.append((char) chars);
                }
            }
            chars = fr.read();
        }
        dataList.add(stringBuffer.toString());
        return dataList;
    }

    private void print(Object object) {
        System.out.print(object);
    }

    private void println(Object object) {
        System.out.println(object);
    }
}

Output would be like this:


1: <permalink>, <company>, <numEmps>, <category>, <city>, <state>, <fundedDate>, <raisedAmt>, <raisedCurrency>, <round>
2: <lifelock>, <LifeLock Limited,Company">, <>, <web>, <Tempe>, <AZ>, <1-May-07>, <6850000>, <USD>, <b>
....................
1459: <myrio>, <Myrio>, <75>, <software>, <Bothell>, <WA>, <1-Jan-01>, <20500000>, <USD>, <unattributed>
1460: <grid-networks>, <Grid Networks>, <>, <web>, <Seattle>, <WA>, <30-Oct-07>, <9500000>, <USD>, <a>
1461: <grid-networks>, <Grid Networks>, <>, <web>, <Seattle>, <WA>, <20-May-08>, <10500000>, <USD>, <b>
Time Taken: 96 Milli Seconds.
C:\codes\javap\csv-output.html
C:\codes\javap\csv-output.csv

Monday, August 25, 2014

Python Read & Write Csv Files


import csv
from random import randint

class CsvObj:
    def __init__(self):
        self.kset = []
        self.vset = []
    def setheader(self, h):
        self.kset = h
    def addvalue(self, v):
        self.vset.append(v)
    def prettyprint(self):
        for obj in self.vset:
            pos = 0
            print '-----------------------------------------------------'
            for key in self.kset:
                print key, ': ', str(obj[pos])
                pos = pos + 1

    def tocsv(self, name):
        with open(name, 'w') as fp:
            a = csv.writer(fp, delimiter=',')
            data = []
            data.append(self.kset)
            for row in self.vset:
                row[0] = '%s , " , %s' % (str(row[0]), str(row[1]))
                data.append(row)
            a.writerows(data)
        return True
    

csvobj = CsvObj()
with open('countrylist.csv', 'rb') as f:
    reader = csv.reader(f)
    pos = 0
    for row in reader:
        if pos == 0:
            csvobj.setheader(row)                
        else:
            csvobj.addvalue(row)
        pos = pos + 1

print csvobj.prettyprint()
csvname = 'csv%s.csv' % str(randint(0, 99999))
if csvobj.tocsv(csvname) == True:
    print 'Csv %s created' % (csvname)

Output would be like this:

-----------------------------------------------------
Order :  1
Name :  Albania
Name :  Republic of Albania
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Tirana
Currency Code :  ALL
Currency Name :  Lek
Telephone Code :  +355
Letter Code :  AL
Letter Code :  ALB
Number :  008
Country Code TLD :  .al
-----------------------------------------------------
Order :  2
Name :  Algeria
Name :  People's Democratic Republic of Algeria
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Algiers
Currency Code :  DZD
Currency Name :  Dinar
Telephone Code :  +213
Letter Code :  DZ
Letter Code :  DZA
Number :  012
Country Code TLD :  .dz
-----------------------------------------------------
Order :  3
Name :  Andorra
Name :  Principality of Andorra
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Andorra la Vella
Currency Code :  EUR
Currency Name :  Euro
Telephone Code :  +376
Letter Code :  AD
Letter Code :  AND
Number :  020
Country Code TLD :  .ad
-----------------------------------------------------
Order :  4
Name :  Angola
Name :  Republic of Angola
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Luanda
Currency Code :  AOA
Currency Name :  Kwanza
Telephone Code :  +244
Letter Code :  AO
Letter Code :  AGO
Number :  024
Country Code TLD :  .ao
-----------------------------------------------------
Order :  5
Name :  Antigua and Barbuda
Name :  
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Saint John's
Currency Code :  XCD
Currency Name :  Dollar
Telephone Code :  +1-268
Letter Code :  AG
Letter Code :  ATG
Number :  028
Country Code TLD :  .ag
-----------------------------------------------------
Order :  6
Name :  Argentina
Name :  Argentine Republic
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Buenos Aires
Currency Code :  ARS
Currency Name :  Peso
Telephone Code :  +54
Letter Code :  AR
Letter Code :  ARG
Number :  032
Country Code TLD :  .ar
-----------------------------------------------------
Order :  7
Name :  Armenia
Name :  Republic of Armenia
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Yerevan
Currency Code :  AMD
Currency Name :  Dram
Telephone Code :  +374
Letter Code :  AM
Letter Code :  ARM
Number :  051
Country Code TLD :  .am
-----------------------------------------------------
Order :  8
Name :  Australia
Name :  Commonwealth of Australia
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Canberra
Currency Code :  AUD
Currency Name :  Dollar
Telephone Code :  +61
Letter Code :  AU
Letter Code :  AUS
Number :  036
Country Code TLD :  .au
-----------------------------------------------------
Order :  9
Name :  Austria
Name :  Republic of Austria
Type :  Independent State
Sub Type :  
Sovereignty :  
Capital :  Vienna
Currency Code :  EUR
Currency Name :  Euro
Telephone Code :  +43
Letter Code :  AT
Letter Code :  AUT
Number :  040
Country Code TLD :  .at

...................................

Csv csv55227.csv created