java读取csv,处理特殊字符(引号 逗号 斜杠 等)。注意,wps和office对于/"、, 的处理不同。
感谢群友大小姐、黑猫的指导
附大小姐的git:https://github.com/yingyulou/Published-Articles
此算法原理是"手工编码的确定性有穷自动机"算法,有兴趣的可以学习下
import java.util.ArrayList;
import java.util.List;
public class CsvFileParseUtil {
private enum LexerStage {
Start,
Common,
Quote,
DoubleQuote,
}
private static class Parser {
StringBuilder splitStr = new StringBuilder();
LexerStage lexerStage;
//返回的分割后的数组
List splitList = new ArrayList();
//分隔符
String delim = ",";
public Parser(String delim) {
this.lexerStage = LexerStage.Start;
this.delim = delim;
}
}
private static void parseCsvLineCommonHelper(Parser parser, char curChar) {
if (parser.delim.equals(curChar + "")) {
parser.lexerStage = LexerStage.Start;
parser.splitList.add(parser.splitStr.toString());
parser.splitStr.setLength(0);
} else {
parser.splitStr.append(curChar);
}
}
private static void parseCsvLineStartHelper(Parser parser, char curChar) {
switch (curChar) {
case '"':
parser.lexerStage = LexerStage.Quote;
break;
default:
parser.lexerStage = LexerStage.Common;
parseCsvLineCommonHelper(parser, curChar);
break;
}
}
private static void parseCsvLineQuoteHelper(Parser parser, char curChar) {
switch (curChar) {
case '"':
parser.lexerStage = LexerStage.DoubleQuote;
break;
default:
parser.splitStr.append(curChar);
break;
}
}
private static void parseCsvLineDoubleQuoteHelper(Parser parser, char curChar) {
switch (curChar) {
case '"':
parser.lexerStage = LexerStage.Quote;
parser.splitStr.append('"');
break;
default:
parser.lexerStage = LexerStage.Common;
parseCsvLineCommonHelper(parser, curChar);
}
}
public static List parseCsvLine(String csvLineStr, String delim) throws Exception {
Parser parser = new Parser(delim);
for (char curChar : csvLineStr.toCharArray()) {
switch (parser.lexerStage) {
case Start:
parseCsvLineStartHelper(parser, curChar);
break;
case Common:
parseCsvLineCommonHelper(parser, curChar);
break;
case Quote:
parseCsvLineQuoteHelper(parser, curChar);
break;
case DoubleQuote:
parseCsvLineDoubleQuoteHelper(parser, curChar);
break;
default:
throw new Exception("Invalid LexerStage value");
}
}
parser.splitList.add(parser.splitStr.toString());
return parser.splitList;
}
public static void main(String[] args) throws Exception {
parseCsvLine((""4|",|"),",").
stream().
forEach(a -> System.out.println(a));
}
}



