Browse Source

fix #I2BMP1

Looly 5 years ago
parent
commit
3c2f0e46b0

+ 2 - 1
CHANGELOG.md

@@ -3,7 +3,7 @@
 
 -------------------------------------------------------------------------------------------------------------
 
-# 5.5.7 (2021-01-02)
+# 5.5.7 (2021-01-03)
 
 ### 新特性
 * 【core   】     DynaBean.create增加重载方法(pr#245@Gitee)
@@ -13,6 +13,7 @@
 * 【extra  】     MailUtil增加getSession方法
 
 ### Bug修复
+* 【core   】     修复CsvReader读取双引号未转义问题(issur#I2BMP1@Gitee)
 
 -------------------------------------------------------------------------------------------------------------
 

+ 28 - 10
hutool-core/src/main/java/cn/hutool/core/text/csv/CsvConfig.java

@@ -1,26 +1,34 @@
 package cn.hutool.core.text.csv;
 
-import java.io.Serializable;
-
 import cn.hutool.core.util.CharUtil;
 
+import java.io.Serializable;
+
 /**
- * CSV基础配置项
- * 
+ * CSV基础配置项,此配置项可用于读取和写出CSV,定义了包括字段分隔符、文本包装符等符号
+ *
  * @author looly
  * @since 4.0.5
  */
-public class CsvConfig implements Serializable{
+public class CsvConfig implements Serializable {
 	private static final long serialVersionUID = -8069578249066158459L;
-	
-	/** 字段分隔符,默认逗号',' */
+
+	/**
+	 * 字段分隔符,默认逗号','
+	 */
 	protected char fieldSeparator = CharUtil.COMMA;
-	/** 文本分隔符,文本包装符,默认双引号'"' */
+	/**
+	 * 文本包装符,默认双引号'"'
+	 */
 	protected char textDelimiter = CharUtil.DOUBLE_QUOTES;
+	/**
+	 * 注释符号,用于区分注释行,默认'#'
+	 */
+	protected char commentCharacter = '#';
 
 	/**
 	 * 设置字段分隔符,默认逗号','
-	 * 
+	 *
 	 * @param fieldSeparator 字段分隔符,默认逗号','
 	 */
 	public void setFieldSeparator(final char fieldSeparator) {
@@ -29,10 +37,20 @@ public class CsvConfig implements Serializable{
 
 	/**
 	 * 设置 文本分隔符,文本包装符,默认双引号'"'
-	 * 
+	 *
 	 * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"'
 	 */
 	public void setTextDelimiter(char textDelimiter) {
 		this.textDelimiter = textDelimiter;
 	}
+
+	/**
+	 * 设置 注释符号,用于区分注释行
+	 *
+	 * @param commentCharacter 注释符号,用于区分注释行
+	 * @since 5.5.7
+	 */
+	public void setCommentCharacter(char commentCharacter) {
+		this.commentCharacter = commentCharacter;
+	}
 }

+ 64 - 25
hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java

@@ -32,33 +32,55 @@ public final class CsvParser implements Closeable, Serializable {
 	private final CsvReadConfig config;
 
 	private final char[] buf = new char[IoUtil.DEFAULT_LARGE_BUFFER_SIZE];
-	/** 当前位置 */
+	/**
+	 * 当前位置
+	 */
 	private int bufPos;
-	/** 读取一段后数据长度 */
+	/**
+	 * 读取一段后数据长度
+	 */
 	private int bufLen;
-	/** 拷贝开始的位置,一般为上一行的结束位置 */
+	/**
+	 * 拷贝开始的位置,一般为上一行的结束位置
+	 */
 	private int copyStart;
-	/** 前一个特殊分界字符 */
+	/**
+	 * 前一个特殊分界字符
+	 */
 	private int preChar = -1;
-	/** 是否在引号包装内 */
+	/**
+	 * 是否在引号包装内
+	 */
 	private boolean inQuotes;
-	/** 当前读取字段 */
+	/**
+	 * 当前读取字段
+	 */
 	private final StrBuilder currentField = new StrBuilder(512);
-	
-	/** 标题行 */
+
+	/**
+	 * 标题行
+	 */
 	private CsvRow header;
-	/** 当前行号 */
+	/**
+	 * 当前行号
+	 */
 	private long lineNo;
-	 /** 第一行字段数,用于检查每行字段数是否一致 */
+	/**
+	 * 第一行字段数,用于检查每行字段数是否一致
+	 */
 	private int firstLineFieldCount = -1;
-	/** 最大字段数量 */
+	/**
+	 * 最大字段数量
+	 */
 	private int maxFieldCount;
-	/** 是否读取结束 */
+	/**
+	 * 是否读取结束
+	 */
 	private boolean finished;
 
 	/**
 	 * CSV解析器
-	 * 
+	 *
 	 * @param reader Reader
 	 * @param config 配置,null则为默认配置
 	 */
@@ -84,7 +106,7 @@ public final class CsvParser implements Closeable, Serializable {
 	}
 
 	/**
-	 *读取下一行数据
+	 * 读取下一行数据
 	 *
 	 * @return CsvRow
 	 * @throws IORuntimeException IO读取异常
@@ -97,7 +119,7 @@ public final class CsvParser implements Closeable, Serializable {
 			startingLineNo = ++lineNo;
 			currentFields = readLine();
 			fieldCount = currentFields.size();
-			if(fieldCount < 1){
+			if (fieldCount < 1) {
 				break;
 			}
 
@@ -135,24 +157,24 @@ public final class CsvParser implements Closeable, Serializable {
 
 	/**
 	 * 当前行做为标题行
-	 * 
+	 *
 	 * @param currentFields 当前行字段列表
 	 */
 	private void initHeader(final List<String> currentFields) {
 		final Map<String, Integer> localHeaderMap = new LinkedHashMap<>(currentFields.size());
 		for (int i = 0; i < currentFields.size(); i++) {
 			final String field = currentFields.get(i);
-			if (StrUtil.isNotEmpty(field) && false ==localHeaderMap.containsKey(field)) {
+			if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) {
 				localHeaderMap.put(field, i);
 			}
 		}
-		
-		header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap),  Collections.unmodifiableList(currentFields));
+
+		header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields));
 	}
 
 	/**
 	 * 读取一行数据
-	 * 
+	 *
 	 * @return 一行数据
 	 * @throws IORuntimeException IO异常
 	 */
@@ -185,7 +207,7 @@ public final class CsvParser implements Closeable, Serializable {
 
 					if (localPreChar == config.fieldSeparator || localCurrentField.hasContent()) {
 						//剩余部分作为一个字段
-						currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
+						addField(currentFields, localCurrentField.toStringAndReset());
 					}
 					break;
 				}
@@ -208,36 +230,40 @@ public final class CsvParser implements Closeable, Serializable {
 				}
 				copyLen++;
 			} else {
+				// 非引号内
 				if (c == config.fieldSeparator) {
 					//一个字段结束
 					if (copyLen > 0) {
 						localCurrentField.append(localBuf, localCopyStart, copyLen);
 						copyLen = 0;
 					}
-					currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
+					addField(currentFields, localCurrentField.toStringAndReset());
 					localCopyStart = localBufPos;
 				} else if (c == config.textDelimiter) {
 					// 引号开始
 					inQuotes = true;
 					copyLen++;
 				} else if (c == CharUtil.CR) {
+					// \r,直接结束
 					if (copyLen > 0) {
 						localCurrentField.append(localBuf, localCopyStart, copyLen);
 					}
-					currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
+					addField(currentFields, localCurrentField.toStringAndReset());
 					localPreChar = c;
 					localCopyStart = localBufPos;
 					break;
 				} else if (c == CharUtil.LF) {
+					// \n
 					if (localPreChar != CharUtil.CR) {
 						if (copyLen > 0) {
 							localCurrentField.append(localBuf, localCopyStart, copyLen);
 						}
-						currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
+						addField(currentFields, localCurrentField.toStringAndReset());
 						localPreChar = c;
 						localCopyStart = localBufPos;
 						break;
 					}
+					// 前一个字符是\r,已经处理过这个字段了,此处直接跳过
 					localCopyStart = localBufPos;
 				} else {
 					copyLen++;
@@ -254,9 +280,22 @@ public final class CsvParser implements Closeable, Serializable {
 
 		return currentFields;
 	}
-	
+
 	@Override
 	public void close() throws IOException {
 		reader.close();
 	}
+
+	/**
+	 * 将字段加入字段列表并自动去包装和去转义
+	 *
+	 * @param currentFields 当前的字段列表(即为行)
+	 * @param field         字段
+	 */
+	private void addField(List<String> currentFields, String field) {
+		field = StrUtil.unWrap(field, config.textDelimiter);
+		char textDelimiter = this.config.textDelimiter;
+		field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + "");
+		currentFields.add(StrUtil.unWrap(field, textDelimiter));
+	}
 }

+ 75 - 30
hutool-core/src/main/java/cn/hutool/core/text/csv/CsvWriter.java

@@ -29,17 +29,24 @@ import java.util.Collection;
 public final class CsvWriter implements Closeable, Flushable, Serializable {
 	private static final long serialVersionUID = 1L;
 
-	/** 写出器 */
+	/**
+	 * 写出器
+	 */
 	private final Writer writer;
-	/** 写出配置 */
+	/**
+	 * 写出配置
+	 */
 	private final CsvWriteConfig config;
-	/** 是否处于新行开始 */
+	/**
+	 * 是否处于新行开始
+	 */
 	private boolean newline = true;
 
 	// --------------------------------------------------------------------------------------------------- Constructor start
+
 	/**
 	 * 构造,覆盖已有文件(如果存在),默认编码UTF-8
-	 * 
+	 *
 	 * @param filePath File CSV文件路径
 	 */
 	public CsvWriter(String filePath) {
@@ -48,7 +55,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造,覆盖已有文件(如果存在),默认编码UTF-8
-	 * 
+	 *
 	 * @param file File CSV文件
 	 */
 	public CsvWriter(File file) {
@@ -57,9 +64,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造,覆盖已有文件(如果存在)
-	 * 
+	 *
 	 * @param filePath File CSV文件路径
-	 * @param charset 编码
+	 * @param charset  编码
 	 */
 	public CsvWriter(String filePath, Charset charset) {
 		this(FileUtil.file(filePath), charset);
@@ -67,8 +74,8 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造,覆盖已有文件(如果存在)
-	 * 
-	 * @param file File CSV文件
+	 *
+	 * @param file    File CSV文件
 	 * @param charset 编码
 	 */
 	public CsvWriter(File file, Charset charset) {
@@ -77,9 +84,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造
-	 * 
+	 *
 	 * @param filePath File CSV文件路径
-	 * @param charset 编码
+	 * @param charset  编码
 	 * @param isAppend 是否追加
 	 */
 	public CsvWriter(String filePath, Charset charset, boolean isAppend) {
@@ -88,9 +95,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造
-	 * 
-	 * @param file CSV文件
-	 * @param charset 编码
+	 *
+	 * @param file     CSV文件
+	 * @param charset  编码
 	 * @param isAppend 是否追加
 	 */
 	public CsvWriter(File file, Charset charset, boolean isAppend) {
@@ -99,11 +106,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造
-	 * 
+	 *
 	 * @param filePath CSV文件路径
-	 * @param charset 编码
+	 * @param charset  编码
 	 * @param isAppend 是否追加
-	 * @param config 写出配置,null则使用默认配置
+	 * @param config   写出配置,null则使用默认配置
 	 */
 	public CsvWriter(String filePath, Charset charset, boolean isAppend, CsvWriteConfig config) {
 		this(FileUtil.file(filePath), charset, isAppend, config);
@@ -111,11 +118,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造
-	 * 
-	 * @param file CSV文件
-	 * @param charset 编码
+	 *
+	 * @param file     CSV文件
+	 * @param charset  编码
 	 * @param isAppend 是否追加
-	 * @param config 写出配置,null则使用默认配置
+	 * @param config   写出配置,null则使用默认配置
 	 */
 	public CsvWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) {
 		this(FileUtil.getWriter(file, charset, isAppend), config);
@@ -123,7 +130,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造,使用默认配置
-	 * 
+	 *
 	 * @param writer {@link Writer}
 	 */
 	public CsvWriter(Writer writer) {
@@ -132,7 +139,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 构造
-	 * 
+	 *
 	 * @param writer Writer
 	 * @param config 写出配置,null则使用默认配置
 	 */
@@ -144,7 +151,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 设置是否始终使用文本分隔符,文本包装符,默认false,按需添加
-	 * 
+	 *
 	 * @param alwaysDelimitText 是否始终使用文本分隔符,文本包装符,默认false,按需添加
 	 * @return this
 	 */
@@ -155,7 +162,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 设置换行符
-	 * 
+	 *
 	 * @param lineDelimiter 换行符
 	 * @return this
 	 */
@@ -166,7 +173,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 将多行写出到Writer
-	 * 
+	 *
 	 * @param lines 多行数据
 	 * @return this
 	 * @throws IORuntimeException IO异常
@@ -183,7 +190,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 	/**
 	 * 将多行写出到Writer
-	 * 
+	 *
 	 * @param lines 多行数据,每行数据可以是集合或者数组
 	 * @return this
 	 * @throws IORuntimeException IO异常
@@ -199,17 +206,54 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 	}
 
 	/**
+	 * 写出一行
+	 *
+	 * @param fields 字段列表 ({@code null} 值会被做为空值追加)
+	 * @return this
+	 * @throws IORuntimeException IO异常
+	 * @since 5.5.7
+	 */
+	public CsvWriter writeLine(String... fields) throws IORuntimeException {
+		if (ArrayUtil.isEmpty(fields)) {
+			return writeLine();
+		}
+		appendLine(fields);
+		return this;
+	}
+
+	/**
 	 * 追加新行(换行)
 	 *
 	 * @throws IORuntimeException IO异常
 	 */
-	public void writeLine() throws IORuntimeException {
+	public CsvWriter writeLine() throws IORuntimeException {
 		try {
 			writer.write(config.lineDelimiter);
 		} catch (IOException e) {
 			throw new IORuntimeException(e);
 		}
 		newline = true;
+		return this;
+	}
+
+	/**
+	 * 写出一行注释,注释符号可自定义
+	 *
+	 * @param comment 注释内容
+	 * @return this
+	 * @see CsvConfig#commentCharacter
+	 * @since 5.5.7
+	 */
+	public CsvWriter writeComment(String comment) {
+		try {
+			writer.write(this.config.commentCharacter);
+			writer.write(comment);
+			writer.write(config.lineDelimiter);
+			newline = true;
+		} catch (IOException e) {
+			throw new IORuntimeException(e);
+		}
+		return this;
 	}
 
 	@Override
@@ -227,13 +271,14 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 	}
 
 	// --------------------------------------------------------------------------------------------------- Private method start
+
 	/**
 	 * 追加一行,末尾会自动换行,但是追加前不会换行
 	 *
 	 * @param fields 字段列表 ({@code null} 值会被做为空值追加)
 	 * @throws IORuntimeException IO异常
 	 */
-	private void appendLine(final String... fields) throws IORuntimeException {
+	private void appendLine(String... fields) throws IORuntimeException {
 		try {
 			doAppendLine(fields);
 		} catch (IOException e) {
@@ -276,7 +321,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
 
 		if (null == value) {
 			if (alwaysDelimitText) {
-				writer.write(new char[] { textDelimiter, textDelimiter });
+				writer.write(new char[]{textDelimiter, textDelimiter});
 			}
 			return;
 		}

+ 1 - 1
hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java

@@ -47,7 +47,7 @@ public class CharUtil {
 	public static final char AMP = '&';
 	/** 字符常量:冒号 {@code ':'} */
 	public static final char COLON = ':';
-	/** 字符常量:艾特 <code>'@'</code> */
+	/** 字符常量:艾特 {@code '@'} */
 	public static final char AT = '@';
 
 	/**

+ 31 - 7
hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java

@@ -1,8 +1,9 @@
 package cn.hutool.core.text.csv;
 
 import cn.hutool.core.io.FileUtil;
-import cn.hutool.core.lang.Assert;
+import cn.hutool.core.lang.Console;
 import cn.hutool.core.util.CharsetUtil;
+import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -16,26 +17,49 @@ public class CsvUtilTest {
 		//从文件中读取CSV数据
 		CsvData data = reader.read(FileUtil.file("test.csv"));
 		List<CsvRow> rows = data.getRows();
-		for (CsvRow csvRow : rows) {
-			Assert.notEmpty(csvRow.getRawList());
-		}
+		final CsvRow row0 = rows.get(0);
+		Assert.assertEquals("sss,sss", row0.get(0));
+		Assert.assertEquals("姓名", row0.get(1));
+		Assert.assertEquals("性别", row0.get(2));
+		Assert.assertEquals("关注\"对象\"", row0.get(3));
+		Assert.assertEquals("年龄", row0.get(4));
+		Assert.assertEquals("", row0.get(5));
+		Assert.assertEquals("\"", row0.get(6));
 	}
 
 	@Test
 	public void readTest2() {
 		CsvReader reader = CsvUtil.getReader();
-		reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> Assert.notEmpty(csvRow.getRawList()));
+		reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> {
+			// 只有一行,所以直接判断
+			Assert.assertEquals("sss,sss", csvRow.get(0));
+			Assert.assertEquals("姓名", csvRow.get(1));
+			Assert.assertEquals("性别", csvRow.get(2));
+			Assert.assertEquals("关注\"对象\"", csvRow.get(3));
+			Assert.assertEquals("年龄", csvRow.get(4));
+			Assert.assertEquals("", csvRow.get(5));
+			Assert.assertEquals("\"", csvRow.get(6));
+		});
 	}
 	
 	@Test
 	@Ignore
 	public void writeTest() {
-		CsvWriter writer = CsvUtil.getWriter("e:/testWrite.csv", CharsetUtil.CHARSET_UTF_8);
+		CsvWriter writer = CsvUtil.getWriter("d:/test/testWrite.csv", CharsetUtil.CHARSET_UTF_8);
 		writer.write(
 				new String[] {"a1", "b1", "c1", "123345346456745756756785656"}, 
 				new String[] {"a2", "b2", "c2"}, 
 				new String[] {"a3", "b3", "c3"}
 		);
 	}
-	
+
+	@Test
+	@Ignore
+	public void readLfTest(){
+		final CsvReader reader = CsvUtil.getReader();
+		final CsvData read = reader.read(FileUtil.file("d:/test/rw_test.csv"));
+		for (CsvRow row : read) {
+			Console.log(row);
+		}
+	}
 }

+ 1 - 1
hutool-core/src/test/resources/test.csv

@@ -1 +1 @@
-"sss,sss",姓名,"性别",关注"对象",年龄
+"sss,sss",姓名,"性别",关注"对象",年龄,"","""