| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| CSVParser |
|
| 3.3;3.3 |
| 1 | /* | |
| 2 | * Copyright (c) 2004 International Decision Systems, Inc. All Rights Reserved. | |
| 3 | * | |
| 4 | * By using this Software, You acknowledge that the Software is a valuable asset | |
| 5 | * and trade secret of either International Decision Systems, Inc. ("IDSI") or a | |
| 6 | * third party supplier of IDSI and constitutes confidential and proprietary | |
| 7 | * information. | |
| 8 | * | |
| 9 | * NEITHER IDSI NOR ANY AGENT OR PERSON ACTING FOR OR WITH IDSI HAS MADE OR DOES | |
| 10 | * MAKE ANY STATEMENTS, AFFIRMATIONS, REPRESENTATIONS OR WARRANTIES WHATSOEVER | |
| 11 | * TO YOU, WHETHER EXPRESS OR IMPLIED, AS TO THE SOFTWARE, THE QUALITY OR | |
| 12 | * CONDITION OF THE SOFTWARE, OR THE OPERATING CHARACTERISTICS OR RELIABILITY OF | |
| 13 | * THE SOFTWARE, OR ITS SUITABILITY FOR ANY GENERAL OR PARTICULAR PURPOSE, OR AS | |
| 14 | * TO ANY OTHER MATTER WHATSOEVER; ANY AND ALL OTHER WARRANTIES INCLUDING | |
| 15 | * WITHOUT LIMITATION ANY WARRANTIES IMPLIED BY LAW, SUCH AS THE IMPLIED | |
| 16 | * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND TITLE, | |
| 17 | * USE AND NON-INFRINGEMENT; ARE HEREBY EXPRESSLY DISCLAIMED AND EXCLUDED. | |
| 18 | */ | |
| 19 | package net.sourceforge.addam.impexp.csv; | |
| 20 | ||
| 21 | import java.io.BufferedReader; | |
| 22 | import java.io.IOException; | |
| 23 | import java.io.Reader; | |
| 24 | import java.text.CharacterIterator; | |
| 25 | import java.text.StringCharacterIterator; | |
| 26 | import java.util.ArrayList; | |
| 27 | import java.util.List; | |
| 28 | ||
| 29 | /** | |
| 30 | * Reads CSV files according to the rules outlined on | |
| 31 | * <a href="http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm#FileFormat">Creativyst</a> | |
| 32 | * | |
| 33 | * @author TIM3 | |
| 34 | * @since Jul 30, 2004 | |
| 35 | */ | |
| 36 | public class CSVParser { | |
| 37 | ||
| 38 | private final BufferedReader in; | |
| 39 | private StringCharacterIterator lineIterator; | |
| 40 | private int lineNumber; | |
| 41 | private int charNumber; | |
| 42 | private int row; | |
| 43 | private int col; | |
| 44 | ||
| 45 | /** | |
| 46 | * creates a CSV reader according to the rules in http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm | |
| 47 | * | |
| 48 | * @param in | |
| 49 | */ | |
| 50 | 0 | public CSVParser(Reader in) { |
| 51 | 0 | this.in = new BufferedReader(in); |
| 52 | 0 | lineNumber = 0; |
| 53 | 0 | row = 0; |
| 54 | 0 | col = 1; |
| 55 | 0 | } |
| 56 | ||
| 57 | public List readLine() throws IOException, CSVFormatException { | |
| 58 | 0 | ArrayList list = null; |
| 59 | 0 | if (nextLine()) { |
| 60 | 0 | list = new ArrayList(); |
| 61 | 0 | for (String field = readField(); field != null; field = readField()) { |
| 62 | 0 | list.add(field); |
| 63 | } | |
| 64 | } | |
| 65 | 0 | row++; |
| 66 | 0 | return list; |
| 67 | } | |
| 68 | ||
| 69 | protected boolean nextLine() throws IOException { | |
| 70 | 0 | String line = in.readLine(); |
| 71 | 0 | lineIterator = null; |
| 72 | 0 | lineNumber++; |
| 73 | 0 | charNumber = 0; |
| 74 | 0 | if (line != null) { |
| 75 | 0 | lineIterator = new StringCharacterIterator(line); |
| 76 | 0 | return true; |
| 77 | } | |
| 78 | 0 | return false; |
| 79 | } | |
| 80 | ||
| 81 | /** | |
| 82 | * reads a string array from a record, which may appear on one or more lines | |
| 83 | * | |
| 84 | * @return | |
| 85 | */ | |
| 86 | protected String readField() throws IOException, CSVFormatException { | |
| 87 | ||
| 88 | 0 | String returnValue = null; |
| 89 | // the implementation is NOT PRETTY, but it does the trick; it was built | |
| 90 | // iteratively with unit tests; if someone can find a more elegant | |
| 91 | // way to do this then I'd love to hear it | |
| 92 | ||
| 93 | 0 | boolean inQuotes = false; // handles how to read each character |
| 94 | 0 | boolean currentValueQuoted = false; // handles how to process values |
| 95 | 0 | StringBuffer buf = new StringBuffer(); |
| 96 | 0 | if (lineIterator != null) { |
| 97 | ||
| 98 | 0 | while (returnValue == null) { |
| 99 | ||
| 100 | 0 | char c = (charNumber == 0) ? lineIterator.current() : lineIterator.next(); |
| 101 | 0 | charNumber++; |
| 102 | ||
| 103 | // first handle quoted multi-line item (so we can reset c if necessary) | |
| 104 | 0 | if (inQuotes && c == CharacterIterator.DONE) { |
| 105 | // go to next line and reset c | |
| 106 | 0 | if (nextLine() == false) { |
| 107 | 0 | throw new CSVFormatException("quoted multi-line item doesn't end", lineNumber, charNumber); |
| 108 | } | |
| 109 | 0 | c = lineIterator.current(); |
| 110 | 0 | buf.append('\n'); |
| 111 | 0 | charNumber++; |
| 112 | } | |
| 113 | ||
| 114 | 0 | char peek = lineIterator.next(); |
| 115 | 0 | lineIterator.previous(); // rewind back after the peek |
| 116 | ||
| 117 | // now deal with double quotes | |
| 118 | 0 | if (c == '"' && peek == '"') { |
| 119 | // two double-quotes are always replaced by one | |
| 120 | 0 | charNumber++; |
| 121 | 0 | buf.append('"'); |
| 122 | 0 | lineIterator.next(); |
| 123 | 0 | } else if (c == '"' && inQuotes) { |
| 124 | 0 | inQuotes = false; |
| 125 | 0 | } else if (c == '"' && !inQuotes) { |
| 126 | 0 | inQuotes = true; |
| 127 | 0 | currentValueQuoted = true; |
| 128 | ||
| 129 | // begin quote; make sure no preceding text | |
| 130 | 0 | String str = buf.toString().trim(); |
| 131 | 0 | if (str.length() > 0) { |
| 132 | // if preceding text is all quotes, then we likely started | |
| 133 | // with some quotes, e.g. """foo""" == "foo", which is a | |
| 134 | // bit confusing to the parser; simply mark the string as | |
| 135 | // quoted and we'll be fine | |
| 136 | 0 | for (int i = 0; i < str.length(); i++) { |
| 137 | 0 | if (str.charAt(i) != '"') { |
| 138 | 0 | throw new CSVFormatException("invalid text before quotes:" + str, lineNumber, charNumber); |
| 139 | } | |
| 140 | } | |
| 141 | 0 | } else { |
| 142 | // get rid of any preceding whitespace | |
| 143 | 0 | buf.setLength(0); |
| 144 | 0 | buf.append(str); |
| 145 | } | |
| 146 | 0 | } else if (!inQuotes && (c == ',' || c == CharacterIterator.DONE)) { |
| 147 | 0 | String value = buf.toString(); |
| 148 | 0 | if (currentValueQuoted) { |
| 149 | 0 | returnValue = value; |
| 150 | 0 | } else { |
| 151 | 0 | returnValue = value.trim(); |
| 152 | } | |
| 153 | 0 | if (c == CharacterIterator.DONE) { |
| 154 | 0 | lineIterator = null; // done with it -- no longer valid |
| 155 | } | |
| 156 | 0 | col++; |
| 157 | 0 | } else if (!inQuotes && currentValueQuoted && c != ',') { |
| 158 | // this should only exist if text after quotes | |
| 159 | 0 | if (!Character.isWhitespace(c)) { |
| 160 | 0 | throw new CSVFormatException("invalid text after quotes:" + c, lineNumber, charNumber); |
| 161 | } | |
| 162 | } else { | |
| 163 | 0 | buf.append(c); |
| 164 | } | |
| 165 | 0 | } |
| 166 | } | |
| 167 | 0 | return returnValue; |
| 168 | } | |
| 169 | ||
| 170 | public int getLineNumber() { | |
| 171 | 0 | return lineNumber; |
| 172 | } | |
| 173 | ||
| 174 | public int getCharNumber() { | |
| 175 | 0 | return charNumber; |
| 176 | } | |
| 177 | ||
| 178 | public int getRow() { | |
| 179 | 0 | return row; |
| 180 | } | |
| 181 | ||
| 182 | public int getCol() { | |
| 183 | 0 | return col; |
| 184 | } | |
| 185 | ||
| 186 | /** | |
| 187 | * @return the column as an Excel string, i.e. 1=A, 2=B, 27=AA, 28=AB, etc. | |
| 188 | */ | |
| 189 | public String getExcelCell() { | |
| 190 | 0 | return getExcelCell(row, col); |
| 191 | } | |
| 192 | ||
| 193 | String getExcelCell(int row, int col) { | |
| 194 | 0 | final char[] alphabetOne = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); |
| 195 | 0 | final char[] alphabetTwo = " ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); |
| 196 | 0 | int column = col - 1; |
| 197 | 0 | String first = ""; |
| 198 | 0 | String second = "" + alphabetOne[(column % 26)]; |
| 199 | 0 | if (column / 26 > 0) { |
| 200 | 0 | first = "" + alphabetTwo[column / 26]; |
| 201 | } | |
| 202 | 0 | return (first + second).toUpperCase() + row; |
| 203 | } | |
| 204 | } |