View Javadoc

1   /*
2    * Copyright (c) 2004 International Decision Systems, Inc.  All Rights Reserved.
3    *
4    * By using this Software, You acknowledge that the Software is a valuable asset
5    * and trade secret of either International Decision Systems, Inc. ("IDSI") or a
6    * third party supplier of IDSI and constitutes confidential and proprietary
7    * information.
8    *
9    * NEITHER IDSI NOR ANY AGENT OR PERSON ACTING FOR OR WITH IDSI HAS MADE OR DOES
10   * MAKE ANY STATEMENTS, AFFIRMATIONS, REPRESENTATIONS OR WARRANTIES WHATSOEVER
11   * TO YOU, WHETHER EXPRESS OR IMPLIED, AS TO THE SOFTWARE, THE QUALITY OR
12   * CONDITION OF THE SOFTWARE, OR THE OPERATING CHARACTERISTICS OR RELIABILITY OF
13   * THE SOFTWARE, OR ITS SUITABILITY FOR ANY GENERAL OR PARTICULAR PURPOSE, OR AS
14   * TO ANY OTHER MATTER WHATSOEVER; ANY AND ALL OTHER WARRANTIES INCLUDING
15   * WITHOUT LIMITATION ANY WARRANTIES IMPLIED BY LAW, SUCH AS THE IMPLIED
16   * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND TITLE,
17   * USE AND NON-INFRINGEMENT; ARE HEREBY EXPRESSLY DISCLAIMED AND EXCLUDED.
18  */
19  package net.sourceforge.addam.impexp.csv;
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.text.CharacterIterator;
25  import java.text.StringCharacterIterator;
26  import java.util.ArrayList;
27  import java.util.List;
28  
29  /**
30   * Reads CSV files according to the rules outlined on
31   * <a href="http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm#FileFormat">Creativyst</a>
32   *
33   * @author TIM3
34   * @since Jul 30, 2004
35   */
36  public class CSVParser {
37  
38      private final BufferedReader in;
39      private StringCharacterIterator lineIterator;
40      private int lineNumber;
41      private int charNumber;
42      private int row;
43      private int col;
44  
45      /**
46       * creates a CSV reader according to the rules in http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
47       *
48       * @param in
49       */
50      public CSVParser(Reader in) {
51          this.in = new BufferedReader(in);
52          lineNumber = 0;
53          row = 0;
54          col = 1;
55      }
56  
57      public List readLine() throws IOException, CSVFormatException {
58          ArrayList list = null;
59          if (nextLine()) {
60              list = new ArrayList();
61              for (String field = readField(); field != null; field = readField()) {
62                  list.add(field);
63              }
64          }
65          row++;
66          return list;
67      }
68  
69      protected boolean nextLine() throws IOException {
70          String line = in.readLine();
71          lineIterator = null;
72          lineNumber++;
73          charNumber = 0;
74          if (line != null) {
75              lineIterator = new StringCharacterIterator(line);
76              return true;
77          }
78          return false;
79      }
80  
81      /**
82       * reads a string array from a record, which may appear on one or more lines
83       *
84       * @return
85       */
86      protected String readField() throws IOException, CSVFormatException {
87  
88          String returnValue = null;
89          // the implementation is NOT PRETTY, but it does the trick; it was built
90          // iteratively with unit tests; if someone can find a more elegant
91          // way to do this then I'd love to hear it
92  
93          boolean inQuotes = false; // handles how to read each character
94          boolean currentValueQuoted = false; // handles how to process values
95          StringBuffer buf = new StringBuffer();
96          if (lineIterator != null) {
97  
98              while (returnValue == null) {
99  
100                 char c = (charNumber == 0) ? lineIterator.current() : lineIterator.next();
101                 charNumber++;
102 
103                 // first handle quoted multi-line item (so we can reset c if necessary)
104                 if (inQuotes && c == CharacterIterator.DONE) {
105                     // go to next line and reset c
106                     if (nextLine() == false) {
107                         throw new CSVFormatException("quoted multi-line item doesn't end", lineNumber, charNumber);
108                     }
109                     c = lineIterator.current();
110                     buf.append('\n');
111                     charNumber++;
112                 }
113 
114                 char peek = lineIterator.next();
115                 lineIterator.previous(); // rewind back after the peek
116 
117                 // now deal with double quotes
118                 if (c == '"' && peek == '"') {
119                     // two double-quotes are always replaced by one
120                     charNumber++;
121                     buf.append('"');
122                     lineIterator.next();
123                 } else if (c == '"' && inQuotes) {
124                     inQuotes = false;
125                 } else if (c == '"' && !inQuotes) {
126                     inQuotes = true;
127                     currentValueQuoted = true;
128 
129                     // begin quote; make sure no preceding text
130                     String str = buf.toString().trim();
131                     if (str.length() > 0) {
132                         // if preceding text is all quotes, then we likely started
133                         // with some quotes, e.g. """foo""" == "foo", which is a
134                         // bit confusing to the parser; simply mark the string as
135                         // quoted and we'll be fine
136                         for (int i = 0; i < str.length(); i++) {
137                             if (str.charAt(i) != '"') {
138                                 throw new CSVFormatException("invalid text before quotes:" + str, lineNumber, charNumber);
139                             }
140                         }
141                     } else {
142                         // get rid of any preceding whitespace
143                         buf.setLength(0);
144                         buf.append(str);
145                     }
146                 } else if (!inQuotes && (c == ',' || c == CharacterIterator.DONE)) {
147                     String value = buf.toString();
148                     if (currentValueQuoted) {
149                         returnValue = value;
150                     } else {
151                         returnValue = value.trim();
152                     }
153                     if (c == CharacterIterator.DONE) {
154                         lineIterator = null; // done with it -- no longer valid
155                     }
156                     col++;
157                 } else if (!inQuotes && currentValueQuoted && c != ',') {
158                     // this should only exist if text after quotes
159                     if (!Character.isWhitespace(c)) {
160                         throw new CSVFormatException("invalid text after quotes:" + c, lineNumber, charNumber);
161                     }
162                 } else {
163                     buf.append(c);
164                 }
165             }
166         }
167         return returnValue;
168     }
169 
170     public int getLineNumber() {
171         return lineNumber;
172     }
173 
174     public int getCharNumber() {
175         return charNumber;
176     }
177 
178     public int getRow() {
179         return row;
180     }
181 
182     public int getCol() {
183         return col;
184     }
185 
186     /**
187      * @return the column as an Excel string, i.e. 1=A, 2=B, 27=AA, 28=AB, etc.
188      */
189     public String getExcelCell() {
190         return getExcelCell(row, col);
191     }
192 
193     String getExcelCell(int row, int col) {
194         final char[] alphabetOne = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray();
195         final char[] alphabetTwo = " ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray();
196         int column = col - 1;
197         String first = "";
198         String second = "" + alphabetOne[(column % 26)];
199         if (column / 26 > 0) {
200             first = "" + alphabetTwo[column / 26];
201         }
202         return (first + second).toUpperCase() + row;
203     }
204 }