Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
CSVParser |
|
| 3.3;3.3 |
1 | /* | |
2 | * Copyright (c) 2004 International Decision Systems, Inc. All Rights Reserved. | |
3 | * | |
4 | * By using this Software, You acknowledge that the Software is a valuable asset | |
5 | * and trade secret of either International Decision Systems, Inc. ("IDSI") or a | |
6 | * third party supplier of IDSI and constitutes confidential and proprietary | |
7 | * information. | |
8 | * | |
9 | * NEITHER IDSI NOR ANY AGENT OR PERSON ACTING FOR OR WITH IDSI HAS MADE OR DOES | |
10 | * MAKE ANY STATEMENTS, AFFIRMATIONS, REPRESENTATIONS OR WARRANTIES WHATSOEVER | |
11 | * TO YOU, WHETHER EXPRESS OR IMPLIED, AS TO THE SOFTWARE, THE QUALITY OR | |
12 | * CONDITION OF THE SOFTWARE, OR THE OPERATING CHARACTERISTICS OR RELIABILITY OF | |
13 | * THE SOFTWARE, OR ITS SUITABILITY FOR ANY GENERAL OR PARTICULAR PURPOSE, OR AS | |
14 | * TO ANY OTHER MATTER WHATSOEVER; ANY AND ALL OTHER WARRANTIES INCLUDING | |
15 | * WITHOUT LIMITATION ANY WARRANTIES IMPLIED BY LAW, SUCH AS THE IMPLIED | |
16 | * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND TITLE, | |
17 | * USE AND NON-INFRINGEMENT; ARE HEREBY EXPRESSLY DISCLAIMED AND EXCLUDED. | |
18 | */ | |
19 | package net.sourceforge.addam.impexp.csv; | |
20 | ||
21 | import java.io.BufferedReader; | |
22 | import java.io.IOException; | |
23 | import java.io.Reader; | |
24 | import java.text.CharacterIterator; | |
25 | import java.text.StringCharacterIterator; | |
26 | import java.util.ArrayList; | |
27 | import java.util.List; | |
28 | ||
29 | /** | |
30 | * Reads CSV files according to the rules outlined on | |
31 | * <a href="http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm#FileFormat">Creativyst</a> | |
32 | * | |
33 | * @author TIM3 | |
34 | * @since Jul 30, 2004 | |
35 | */ | |
36 | public class CSVParser { | |
37 | ||
38 | private final BufferedReader in; | |
39 | private StringCharacterIterator lineIterator; | |
40 | private int lineNumber; | |
41 | private int charNumber; | |
42 | private int row; | |
43 | private int col; | |
44 | ||
45 | /** | |
46 | * creates a CSV reader according to the rules in http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm | |
47 | * | |
48 | * @param in | |
49 | */ | |
50 | 0 | public CSVParser(Reader in) { |
51 | 0 | this.in = new BufferedReader(in); |
52 | 0 | lineNumber = 0; |
53 | 0 | row = 0; |
54 | 0 | col = 1; |
55 | 0 | } |
56 | ||
57 | public List readLine() throws IOException, CSVFormatException { | |
58 | 0 | ArrayList list = null; |
59 | 0 | if (nextLine()) { |
60 | 0 | list = new ArrayList(); |
61 | 0 | for (String field = readField(); field != null; field = readField()) { |
62 | 0 | list.add(field); |
63 | } | |
64 | } | |
65 | 0 | row++; |
66 | 0 | return list; |
67 | } | |
68 | ||
69 | protected boolean nextLine() throws IOException { | |
70 | 0 | String line = in.readLine(); |
71 | 0 | lineIterator = null; |
72 | 0 | lineNumber++; |
73 | 0 | charNumber = 0; |
74 | 0 | if (line != null) { |
75 | 0 | lineIterator = new StringCharacterIterator(line); |
76 | 0 | return true; |
77 | } | |
78 | 0 | return false; |
79 | } | |
80 | ||
81 | /** | |
82 | * reads a string array from a record, which may appear on one or more lines | |
83 | * | |
84 | * @return | |
85 | */ | |
86 | protected String readField() throws IOException, CSVFormatException { | |
87 | ||
88 | 0 | String returnValue = null; |
89 | // the implementation is NOT PRETTY, but it does the trick; it was built | |
90 | // iteratively with unit tests; if someone can find a more elegant | |
91 | // way to do this then I'd love to hear it | |
92 | ||
93 | 0 | boolean inQuotes = false; // handles how to read each character |
94 | 0 | boolean currentValueQuoted = false; // handles how to process values |
95 | 0 | StringBuffer buf = new StringBuffer(); |
96 | 0 | if (lineIterator != null) { |
97 | ||
98 | 0 | while (returnValue == null) { |
99 | ||
100 | 0 | char c = (charNumber == 0) ? lineIterator.current() : lineIterator.next(); |
101 | 0 | charNumber++; |
102 | ||
103 | // first handle quoted multi-line item (so we can reset c if necessary) | |
104 | 0 | if (inQuotes && c == CharacterIterator.DONE) { |
105 | // go to next line and reset c | |
106 | 0 | if (nextLine() == false) { |
107 | 0 | throw new CSVFormatException("quoted multi-line item doesn't end", lineNumber, charNumber); |
108 | } | |
109 | 0 | c = lineIterator.current(); |
110 | 0 | buf.append('\n'); |
111 | 0 | charNumber++; |
112 | } | |
113 | ||
114 | 0 | char peek = lineIterator.next(); |
115 | 0 | lineIterator.previous(); // rewind back after the peek |
116 | ||
117 | // now deal with double quotes | |
118 | 0 | if (c == '"' && peek == '"') { |
119 | // two double-quotes are always replaced by one | |
120 | 0 | charNumber++; |
121 | 0 | buf.append('"'); |
122 | 0 | lineIterator.next(); |
123 | 0 | } else if (c == '"' && inQuotes) { |
124 | 0 | inQuotes = false; |
125 | 0 | } else if (c == '"' && !inQuotes) { |
126 | 0 | inQuotes = true; |
127 | 0 | currentValueQuoted = true; |
128 | ||
129 | // begin quote; make sure no preceding text | |
130 | 0 | String str = buf.toString().trim(); |
131 | 0 | if (str.length() > 0) { |
132 | // if preceding text is all quotes, then we likely started | |
133 | // with some quotes, e.g. """foo""" == "foo", which is a | |
134 | // bit confusing to the parser; simply mark the string as | |
135 | // quoted and we'll be fine | |
136 | 0 | for (int i = 0; i < str.length(); i++) { |
137 | 0 | if (str.charAt(i) != '"') { |
138 | 0 | throw new CSVFormatException("invalid text before quotes:" + str, lineNumber, charNumber); |
139 | } | |
140 | } | |
141 | 0 | } else { |
142 | // get rid of any preceding whitespace | |
143 | 0 | buf.setLength(0); |
144 | 0 | buf.append(str); |
145 | } | |
146 | 0 | } else if (!inQuotes && (c == ',' || c == CharacterIterator.DONE)) { |
147 | 0 | String value = buf.toString(); |
148 | 0 | if (currentValueQuoted) { |
149 | 0 | returnValue = value; |
150 | 0 | } else { |
151 | 0 | returnValue = value.trim(); |
152 | } | |
153 | 0 | if (c == CharacterIterator.DONE) { |
154 | 0 | lineIterator = null; // done with it -- no longer valid |
155 | } | |
156 | 0 | col++; |
157 | 0 | } else if (!inQuotes && currentValueQuoted && c != ',') { |
158 | // this should only exist if text after quotes | |
159 | 0 | if (!Character.isWhitespace(c)) { |
160 | 0 | throw new CSVFormatException("invalid text after quotes:" + c, lineNumber, charNumber); |
161 | } | |
162 | } else { | |
163 | 0 | buf.append(c); |
164 | } | |
165 | 0 | } |
166 | } | |
167 | 0 | return returnValue; |
168 | } | |
169 | ||
170 | public int getLineNumber() { | |
171 | 0 | return lineNumber; |
172 | } | |
173 | ||
174 | public int getCharNumber() { | |
175 | 0 | return charNumber; |
176 | } | |
177 | ||
178 | public int getRow() { | |
179 | 0 | return row; |
180 | } | |
181 | ||
182 | public int getCol() { | |
183 | 0 | return col; |
184 | } | |
185 | ||
186 | /** | |
187 | * @return the column as an Excel string, i.e. 1=A, 2=B, 27=AA, 28=AB, etc. | |
188 | */ | |
189 | public String getExcelCell() { | |
190 | 0 | return getExcelCell(row, col); |
191 | } | |
192 | ||
193 | String getExcelCell(int row, int col) { | |
194 | 0 | final char[] alphabetOne = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); |
195 | 0 | final char[] alphabetTwo = " ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); |
196 | 0 | int column = col - 1; |
197 | 0 | String first = ""; |
198 | 0 | String second = "" + alphabetOne[(column % 26)]; |
199 | 0 | if (column / 26 > 0) { |
200 | 0 | first = "" + alphabetTwo[column / 26]; |
201 | } | |
202 | 0 | return (first + second).toUpperCase() + row; |
203 | } | |
204 | } |