Browse Source

fix excel sax null bug and add PonyCode

Looly 5 years ago
parent
commit
c99b48a4c4

+ 3 - 1
CHANGELOG.md

@@ -3,7 +3,7 @@
 
 -------------------------------------------------------------------------------------------------------------
 
-# 5.5.2 (2020-11-26)
+# 5.5.2 (2020-12-01)
 
 ### 新特性
 * 【crypto 】     KeyUtil增加重载,AES构造增加重载(issue#I25NNZ@Gitee)
@@ -28,6 +28,7 @@
 * 【core   】     修改IoUtil.read(Reader)逻辑默认关闭Reader
 * 【core   】     ZipUtil增加Zip方法(pr#222@Gitee)
 * 【all    】     增加Hutool.getAllUtils和printAllUtils方法
+* 【all    】     增加PonyCode(issue#1268@Gitee)
 
 ### Bug修复
 * 【cron   】     修复CronTimer可能死循环的问题(issue#1224@Github)
@@ -36,6 +37,7 @@
 * 【core   】     修复HexUtil.format问题(issue#I268XT@Gitee)
 * 【core   】     修复ZipUtil判断压缩文件是否位于压缩目录内的逻辑有误的问题(issue#1251@Github)
 * 【json   】     修复JSONObject.accumulate问题
+* 【core   】     修复部分xlsx文件sax方式解析空指针问题(issue#1265@Github)
 
 -------------------------------------------------------------------------------------------------------------
 

+ 218 - 0
hutool-core/src/main/java/cn/hutool/core/codec/PunyCode.java

@@ -0,0 +1,218 @@
+package cn.hutool.core.codec;
+
+import cn.hutool.core.exceptions.UtilException;
+import cn.hutool.core.util.StrUtil;
+
+/**
+ * Punycode是一个根据RFC 3492标准而制定的编码系统,主要用于把域名从地方语言所采用的Unicode编码转换成为可用于DNS系统的编码
+ * <p>
+ * 参考:https://blog.csdn.net/a19881029/article/details/18262671
+ *
+ * @author looly
+ * @since 5.5.2
+ */
+public class PunyCode {
+	private static final int TMIN = 1;
+	private static final int TMAX = 26;
+	private static final int BASE = 36;
+	private static final int INITIAL_N = 128;
+	private static final int INITIAL_BIAS = 72;
+	private static final int DAMP = 700;
+	private static final int SKEW = 38;
+	private static final char DELIMITER = '-';
+	private static final String PONY_CODE_PREFIX = "xn--";
+
+	/**
+	 * Punycodes a unicode string.
+	 *
+	 * @param input Unicode string.
+	 * @return Punycoded string.
+	 * @throws UtilException 计算异常
+	 */
+	public static String encode(String input) throws UtilException {
+		int n = INITIAL_N;
+		int delta = 0;
+		int bias = INITIAL_BIAS;
+		StringBuilder output = new StringBuilder();
+		// Copy all basic code points to the output
+		int length = input.length();
+		int b = 0;
+		char c;
+		for (int i = 0; i < length; i++) {
+			c = input.charAt(i);
+			if (isBasic(c)) {
+				output.append(c);
+				b++;
+			}
+		}
+		// Append delimiter
+		if (b > 0) {
+			output.append(DELIMITER);
+		}
+		int h = b;
+		while (h < input.length()) {
+			int m = Integer.MAX_VALUE;
+			// Find the minimum code point >= n
+			for (int i = 0; i < input.length(); i++) {
+				int c = input.charAt(i);
+				if (c >= n && c < m) {
+					m = c;
+				}
+			}
+			if (m - n > (Integer.MAX_VALUE - delta) / (h + 1)) {
+				throw new UtilException("OVERFLOW");
+			}
+			delta = delta + (m - n) * (h + 1);
+			n = m;
+			for (int j = 0; j < input.length(); j++) {
+				int c = input.charAt(j);
+				if (c < n) {
+					delta++;
+					if (0 == delta) {
+						throw new UtilException("OVERFLOW");
+					}
+				}
+				if (c == n) {
+					int q = delta;
+					for (int k = BASE; ; k += BASE) {
+						int t;
+						if (k <= bias) {
+							t = TMIN;
+						} else if (k >= bias + TMAX) {
+							t = TMAX;
+						} else {
+							t = k - bias;
+						}
+						if (q < t) {
+							break;
+						}
+						output.append((char) digit2codepoint(t + (q - t)
+								% (BASE - t)));
+						q = (q - t) / (BASE - t);
+					}
+					output.append((char) digit2codepoint(q));
+					bias = adapt(delta, h + 1, h == b);
+					delta = 0;
+					h++;
+				}
+			}
+			delta++;
+			n++;
+		}
+		return output.toString();
+	}
+
+	/**
+	 * Decode a punycoded string.
+	 *
+	 * @param input Punycode string
+	 * @return Unicode string.
+	 * @throws UtilException 计算异常
+	 */
+	public static String decode(String input) throws UtilException {
+		input = StrUtil.removePrefixIgnoreCase(input, PONY_CODE_PREFIX);
+		int n = INITIAL_N;
+		int i = 0;
+		int bias = INITIAL_BIAS;
+		StringBuilder output = new StringBuilder();
+		int d = input.lastIndexOf(DELIMITER);
+		if (d > 0) {
+			for (int j = 0; j < d; j++) {
+				char c = input.charAt(j);
+				if (false == isBasic(c)) {
+					throw new UtilException("BAD_INPUT");
+				}
+				output.append(c);
+			}
+			d++;
+		} else {
+			d = 0;
+		}
+		while (d < input.length()) {
+			int oldi = i;
+			int w = 1;
+			for (int k = BASE; ; k += BASE) {
+				if (d == input.length()) {
+					throw new UtilException("BAD_INPUT");
+				}
+				int c = input.charAt(d++);
+				int digit = codepoint2digit(c);
+				if (digit > (Integer.MAX_VALUE - i) / w) {
+					throw new UtilException("OVERFLOW");
+				}
+				i = i + digit * w;
+				int t;
+				if (k <= bias) {
+					t = TMIN;
+				} else if (k >= bias + TMAX) {
+					t = TMAX;
+				} else {
+					t = k - bias;
+				}
+				if (digit < t) {
+					break;
+				}
+				w = w * (BASE - t);
+			}
+			bias = adapt(i - oldi, output.length() + 1, oldi == 0);
+			if (i / (output.length() + 1) > Integer.MAX_VALUE - n) {
+				throw new UtilException("OVERFLOW");
+			}
+			n = n + i / (output.length() + 1);
+			i = i % (output.length() + 1);
+			output.insert(i, (char) n);
+			i++;
+		}
+		return output.toString();
+	}
+
+	public static int adapt(int delta, int numpoints, boolean first) {
+		if (first) {
+			delta = delta / DAMP;
+		} else {
+			delta = delta / 2;
+		}
+		delta = delta + (delta / numpoints);
+		int k = 0;
+		while (delta > ((BASE - TMIN) * TMAX) / 2) {
+			delta = delta / (BASE - TMIN);
+			k = k + BASE;
+		}
+		return k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
+	}
+
+	public static boolean isBasic(char c) {
+		return c < 0x80;
+	}
+
+	public static int digit2codepoint(int d) throws UtilException {
+		if (d < 26) {
+			// 0..25 : 'a'..'z'
+			return d + 'a';
+		} else if (d < 36) {
+			// 26..35 : '0'..'9';
+			return d - 26 + '0';
+		} else {
+			throw new UtilException("BAD_INPUT");
+		}
+	}
+
+	public static int codepoint2digit(int c) throws UtilException {
+		if (c - '0' < 10) {
+			// '0'..'9' : 26..35
+			return c - '0' + 26;
+		} else if (c - 'a' < 26) {
+			// 'a'..'z' : 0..25
+			return c - 'a';
+		} else {
+			throw new UtilException("BAD_INPUT");
+		}
+	}
+
+	public static void main(String[] args) {
+		String strPunycode = PONY_CODE_PREFIX + encode("北京大学");
+		System.out.println(strPunycode);
+		String strChinese = decode("xn--1lq90ic7fzpc");
+		System.out.println(strChinese);
+	}
+}

+ 6 - 6
hutool-core/src/main/java/cn/hutool/core/io/watch/WatchMonitor.java

@@ -193,7 +193,7 @@ public class WatchMonitor extends WatchServer {
 	 *
 	 * @param uri     URI
 	 * @param watcher {@link Watcher}
-	 * @return {@link WatchMonitor}
+	 * @return WatchMonitor
 	 */
 	public static WatchMonitor createAll(URI uri, Watcher watcher) {
 		return createAll(Paths.get(uri), watcher);
@@ -204,7 +204,7 @@ public class WatchMonitor extends WatchServer {
 	 *
 	 * @param url     URL
 	 * @param watcher {@link Watcher}
-	 * @return {@link WatchMonitor}
+	 * @return WatchMonitor
 	 */
 	public static WatchMonitor createAll(URL url, Watcher watcher) {
 		try {
@@ -219,7 +219,7 @@ public class WatchMonitor extends WatchServer {
 	 *
 	 * @param file    被监听文件
 	 * @param watcher {@link Watcher}
-	 * @return {@link WatchMonitor}
+	 * @return WatchMonitor
 	 */
 	public static WatchMonitor createAll(File file, Watcher watcher) {
 		return createAll(file.toPath(), watcher);
@@ -230,7 +230,7 @@ public class WatchMonitor extends WatchServer {
 	 *
 	 * @param path    路径
 	 * @param watcher {@link Watcher}
-	 * @return {@link WatchMonitor}
+	 * @return WatchMonitor
 	 */
 	public static WatchMonitor createAll(String path, Watcher watcher) {
 		return createAll(Paths.get(path), watcher);
@@ -241,7 +241,7 @@ public class WatchMonitor extends WatchServer {
 	 *
 	 * @param path    路径
 	 * @param watcher {@link Watcher}
-	 * @return {@link WatchMonitor}
+	 * @return WatchMonitor
 	 */
 	public static WatchMonitor createAll(Path path, Watcher watcher) {
 		final WatchMonitor watchMonitor = create(path, EVENTS_ALL);
@@ -348,7 +348,7 @@ public class WatchMonitor extends WatchServer {
 	 * 多个监听请使用{@link WatcherChain}
 	 *
 	 * @param watcher 监听
-	 * @return {@link WatchMonitor}
+	 * @return WatchMonitor
 	 */
 	public WatchMonitor setWatcher(Watcher watcher) {
 		this.watcher = watcher;

+ 13 - 9
hutool-poi/src/main/java/cn/hutool/poi/excel/sax/Excel07SaxReader.java

@@ -229,16 +229,20 @@ public class Excel07SaxReader extends DefaultHandler implements ExcelSaxReader<E
 	 */
 	@Override
 	public void characters(char[] ch, int start, int length) {
-		switch (this.curElementName){
-			case v:
-				// 得到单元格内容的值
-				lastContent.append(ch, start, length);
-				break;
-			case f:
-				// 得到单元格内容的值
-				lastFormula.append(ch, start, length);
-				break;
+		final ElementName elementName = this.curElementName;
+		if(null != elementName){
+			switch (elementName){
+				case v:
+					// 得到单元格内容的值
+					lastContent.append(ch, start, length);
+					break;
+				case f:
+					// 得到单元格内容的值
+					lastFormula.append(ch, start, length);
+					break;
+			}
 		}
+		// 其它标签忽略
 	}
 
 	// --------------------------------------------------------------------------------------- Private method start