6 years ago · 2e2d43d764
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@
 
				 * 【crypto】     RSA算法中，BlockSize长度策略调整（issue#721@Github）

			
 
				 * 【crypto】     删除SM2Engine，使用BC库中的对象替代

			
 
				 * 【crypto】     增加PemUtil工具类

			
 
				+* 【dfa   】     WordTree增加Filter，支持自定义特殊字符过滤器

			
 
				 

			
 
				 ### Bug修复

			
 
				 

			
--- a/hutool-core/src/main/java/cn/hutool/core/lang/Filter.java
+++ b/hutool-core/src/main/java/cn/hutool/core/lang/Filter.java
@@ -2,13 +2,14 @@ package cn.hutool.core.lang;
 
				 

			
 
				 /**

			
 
				  * 过滤器接口

			
 
				- * @author Looly

			
 
				  *

			
 
				+ * @author Looly

			
 
				  */

			
 
				 @FunctionalInterface

			
 
				 public interface Filter<T> {

			
 
				 	/**

			
 
				 	 * 是否接受对象

			
 
				+	 *

			
 
				 	 * @param t 检查的对象

			
 
				 	 * @return 是否接受对象

			
 
				 	 */

			
--- a/hutool-crypto/src/test/java/cn/hutool/crypto/test/BCUtilTest.java
+++ b/hutool-crypto/src/test/java/cn/hutool/crypto/test/BCUtilTest.java
@@ -9,6 +9,9 @@ import org.junit.Test;
 
				 

			
 
				 public class BCUtilTest {

			
 
				 

			
 
				+	/**

			
 
				+	 * 密钥生成来自：https://i.goto327.top/CryptTools/SM2.aspx?tdsourcetag=s_pctim_aiomsg

			
 
				+	 */

			
 
				 	@Test

			
 
				 	public void createECPublicKeyParametersTest() {

			
 
				 		String x = "706AD9DAA3E5CEAC3DA59F583429E8043BAFC576BE10092C4EA4D8E19846CA62";

			
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
@@ -1,4 +1,5 @@
 
				 package cn.hutool.dfa;
			
 
				+
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.Collection;
			
 
				 import java.util.HashMap;
			
@@ -7,6 +8,7 @@ import java.util.List;
 
				 import java.util.Set;
			
 
				 
			
 
				 import cn.hutool.core.collection.CollectionUtil;
			
 
				+import cn.hutool.core.lang.Filter;
			
 
				 import cn.hutool.core.text.StrBuilder;
			
 
				 import cn.hutool.core.util.StrUtil;
			
 
				 
			
@@ -16,72 +18,94 @@ import cn.hutool.core.util.StrUtil;
 
				  * 单词树使用group区分不同的关键字集合，不同的分组可以共享树枝，避免重复建树。<br>
			
 
				  * 单词树使用树状结构表示一组单词。<br>
			
 
				  * 例如：红领巾，红河构建树后为：<br>
			
 
				- *                 红                    <br>
			
 
				- *              /      \                 <br>
			
 
				- *           领         河             <br>
			
 
				- *          /                            <br>
			
 
				- *        巾                            <br>
			
 
				- *其中每个节点都是一个WordTree对象，查找时从上向下查找。<br>
			
 
				- * @author Looly
			
 
				+ * 红                    <br>
			
 
				+ * /      \                 <br>
			
 
				+ * 领         河             <br>
			
 
				+ * /                            <br>
			
 
				+ * 巾                            <br>
			
 
				+ * 其中每个节点都是一个WordTree对象，查找时从上向下查找。<br>
			
 
				  *
			
 
				+ * @author Looly
			
 
				  */
			
 
				-public class WordTree extends HashMap<Character, WordTree>{
			
 
				+public class WordTree extends HashMap<Character, WordTree> {
			
 
				 	private static final long serialVersionUID = -4646423269465809276L;
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 敏感词字符末尾标识，用于标识单词末尾字符
			
 
				 	 */
			
 
				 	private Set<Character> endCharacterSet = new HashSet<>();
			
 
				-	
			
 
				+	/**
			
 
				+	 * 字符过滤规则，通过定义字符串过滤规则，过滤不需要的字符，当accept为false时，此字符不参与匹配
			
 
				+	 */
			
 
				+	private Filter<Character> charFilter = StopChar::isNotStopChar;
			
 
				+
			
 
				 	//--------------------------------------------------------------------------------------- Constructor start
			
 
				+
			
 
				 	/**
			
 
				 	 * 默认构造
			
 
				 	 */
			
 
				 	public WordTree() {
			
 
				 	}
			
 
				 	//--------------------------------------------------------------------------------------- Constructor start
			
 
				-	
			
 
				+
			
 
				+	/**
			
 
				+	 * 设置字符过滤规则，通过定义字符串过滤规则，过滤不需要的字符<br>
			
 
				+	 * 当accept为false时，此字符不参与匹配
			
 
				+	 *
			
 
				+	 * @param charFilter 过滤函数
			
 
				+	 * @return this
			
 
				+	 * @since 5.2.0
			
 
				+	 */
			
 
				+	public WordTree setCharFilter(Filter<Character> charFilter) {
			
 
				+		this.charFilter = charFilter;
			
 
				+		return this;
			
 
				+	}
			
 
				+
			
 
				 	//------------------------------------------------------------------------------- add word
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 增加一组单词
			
 
				+	 *
			
 
				 	 * @param words 单词集合
			
 
				 	 */
			
 
				-	public void addWords(Collection<String> words){
			
 
				-		if(false == (words instanceof Set)){
			
 
				+	public void addWords(Collection<String> words) {
			
 
				+		if (false == (words instanceof Set)) {
			
 
				 			words = new HashSet<>(words);
			
 
				 		}
			
 
				 		for (String word : words) {
			
 
				 			addWord(word);
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 增加一组单词
			
 
				+	 *
			
 
				 	 * @param words 单词数组
			
 
				 	 */
			
 
				-	public void addWords(String... words){
			
 
				+	public void addWords(String... words) {
			
 
				 		HashSet<String> wordsSet = CollectionUtil.newHashSet(words);
			
 
				 		for (String word : wordsSet) {
			
 
				 			addWord(word);
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 添加单词，使用默认类型
			
 
				+	 *
			
 
				 	 * @param word 单词
			
 
				 	 */
			
 
				 	public void addWord(String word) {
			
 
				+		final Filter<Character> charFilter = this.charFilter;
			
 
				 		WordTree parent = null;
			
 
				 		WordTree current = this;
			
 
				 		WordTree child;
			
 
				 		char currentChar = 0;
			
 
				 		int length = word.length();
			
 
				-		for(int i = 0; i < length; i++){
			
 
				+		for (int i = 0; i < length; i++) {
			
 
				 			currentChar = word.charAt(i);
			
 
				-			if(false == StopChar.isStopChar(currentChar)){//只处理合法字符
			
 
				+			if (charFilter.accept(currentChar)) {//只处理合法字符
			
 
				 				child = current.get(currentChar);
			
 
				-				if(child == null){
			
 
				+				if (child == null) {
			
 
				 					//无子类，新建一个子节点后存放下一个字符
			
 
				 					child = new WordTree();
			
 
				 					current.put(currentChar, child);
			
@@ -90,79 +114,86 @@ public class WordTree extends HashMap<Character, WordTree>{
 
				 				current = child;
			
 
				 			}
			
 
				 		}
			
 
				-		if(null != parent){
			
 
				+		if (null != parent) {
			
 
				 			parent.setEnd(currentChar);
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	//------------------------------------------------------------------------------- match
			
 
				+
			
 
				 	/**
			
 
				 	 * 指定文本是否包含树中的词
			
 
				+	 *
			
 
				 	 * @param text 被检查的文本
			
 
				 	 * @return 是否包含
			
 
				 	 */
			
 
				-	public boolean isMatch(String text){
			
 
				-		if(null == text){
			
 
				+	public boolean isMatch(String text) {
			
 
				+		if (null == text) {
			
 
				 			return false;
			
 
				 		}
			
 
				 		return null != match(text);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 获得第一个匹配的关键字
			
 
				+	 *
			
 
				 	 * @param text 被检查的文本
			
 
				 	 * @return 匹配到的关键字
			
 
				 	 */
			
 
				-	public String match(String text){
			
 
				-		if(null == text){
			
 
				+	public String match(String text) {
			
 
				+		if (null == text) {
			
 
				 			return null;
			
 
				 		}
			
 
				 		List<String> matchAll = matchAll(text, 1);
			
 
				-		if(CollectionUtil.isNotEmpty(matchAll)){
			
 
				+		if (CollectionUtil.isNotEmpty(matchAll)) {
			
 
				 			return matchAll.get(0);
			
 
				 		}
			
 
				 		return null;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	//------------------------------------------------------------------------------- match all
			
 
				+
			
 
				 	/**
			
 
				 	 * 找出所有匹配的关键字
			
 
				+	 *
			
 
				 	 * @param text 被检查的文本
			
 
				 	 * @return 匹配的词列表
			
 
				 	 */
			
 
				 	public List<String> matchAll(String text) {
			
 
				 		return matchAll(text, -1);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 找出所有匹配的关键字
			
 
				-	 * @param text 被检查的文本
			
 
				+	 *
			
 
				+	 * @param text  被检查的文本
			
 
				 	 * @param limit 限制匹配个数
			
 
				 	 * @return 匹配的词列表
			
 
				 	 */
			
 
				 	public List<String> matchAll(String text, int limit) {
			
 
				 		return matchAll(text, limit, false, false);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 找出所有匹配的关键字<br>
			
 
				 	 * 密集匹配原则：假如关键词有 ab,b，文本是abab，将匹配 [ab,b,ab]<br>
			
 
				 	 * 贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
			
 
				-	 * 
			
 
				-	 * @param text 被检查的文本
			
 
				-	 * @param limit 限制匹配个数
			
 
				+	 *
			
 
				+	 * @param text           被检查的文本
			
 
				+	 * @param limit          限制匹配个数
			
 
				 	 * @param isDensityMatch 是否使用密集匹配原则
			
 
				-	 * @param isGreedMatch 是否使用贪婪匹配（最长匹配）原则
			
 
				+	 * @param isGreedMatch   是否使用贪婪匹配（最长匹配）原则
			
 
				 	 * @return 匹配的词列表
			
 
				 	 */
			
 
				 	public List<String> matchAll(String text, int limit, boolean isDensityMatch, boolean isGreedMatch) {
			
 
				-		if(null == text){
			
 
				+		if (null == text) {
			
 
				 			return null;
			
 
				 		}
			
 
				-		
			
 
				+
			
 
				 		List<String> foundWords = new ArrayList<>();
			
 
				 		WordTree current = this;
			
 
				 		int length = text.length();
			
 
				+		final Filter<Character> charFilter = this.charFilter;
			
 
				 		//存放查找到的字符缓存。完整出现一个词时加到findedWords中，否则清空
			
 
				 		final StrBuilder wordBuffer = StrUtil.strBuilder();
			
 
				 		char currentChar;
			
@@ -171,38 +202,38 @@ public class WordTree extends HashMap<Character, WordTree>{
 
				 			for (int j = i; j < length; j++) {
			
 
				 				currentChar = text.charAt(j);
			
 
				 //				Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
			
 
				-				if(StopChar.isStopChar(currentChar)){
			
 
				-					if(wordBuffer.length() > 0){
			
 
				+				if (false == charFilter.accept(currentChar)) {
			
 
				+					if (wordBuffer.length() > 0) {
			
 
				 						//做为关键词中间的停顿词被当作关键词的一部分被返回
			
 
				 						wordBuffer.append(currentChar);
			
 
				-					}else{
			
 
				+					} else {
			
 
				 						//停顿词做为关键词的第一个字符时需要跳过
			
 
				 						i++;
			
 
				 					}
			
 
				 					continue;
			
 
				-				}else if(false == current.containsKey(currentChar)){
			
 
				+				} else if (false == current.containsKey(currentChar)) {
			
 
				 					//非关键字符被整体略过，重新以下个字符开始检查
			
 
				 					break;
			
 
				 				}
			
 
				 				wordBuffer.append(currentChar);
			
 
				-				if(current.isEnd(currentChar)){
			
 
				+				if (current.isEnd(currentChar)) {
			
 
				 					//到达单词末尾，关键词成立，从此词的下一个位置开始查找
			
 
				 					foundWords.add(wordBuffer.toString());
			
 
				-					if(limit > 0 && foundWords.size() >= limit){
			
 
				+					if (limit > 0 && foundWords.size() >= limit) {
			
 
				 						//超过匹配限制个数，直接返回
			
 
				 						return foundWords;
			
 
				 					}
			
 
				-					if(false == isDensityMatch){
			
 
				+					if (false == isDensityMatch) {
			
 
				 						//如果非密度匹配，跳过匹配到的词
			
 
				 						i = j;
			
 
				 					}
			
 
				-					if(false == isGreedMatch){
			
 
				+					if (false == isGreedMatch) {
			
 
				 						//如果懒惰匹配（非贪婪匹配）。当遇到第一个结尾标记就结束本轮匹配
			
 
				 						break;
			
 
				 					}
			
 
				 				}
			
 
				 				current = current.get(currentChar);
			
 
				-				if(null == current){
			
 
				+				if (null == current) {
			
 
				 					break;
			
 
				 				}
			
 
				 			}
			
@@ -210,24 +241,27 @@ public class WordTree extends HashMap<Character, WordTree>{
 
				 		}
			
 
				 		return foundWords;
			
 
				 	}
			
 
				-	
			
 
				-	
			
 
				+
			
 
				+
			
 
				 	//--------------------------------------------------------------------------------------- Private method start
			
 
				+
			
 
				 	/**
			
 
				 	 * 是否末尾
			
 
				+	 *
			
 
				 	 * @param c 检查的字符
			
 
				 	 * @return 是否末尾
			
 
				 	 */
			
 
				-	private boolean isEnd(Character c){
			
 
				+	private boolean isEnd(Character c) {
			
 
				 		return this.endCharacterSet.contains(c);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/**
			
 
				 	 * 设置是否到达末尾
			
 
				+	 *
			
 
				 	 * @param c 设置结尾的字符
			
 
				 	 */
			
 
				-	private void setEnd(Character c){
			
 
				-		if(null != c){
			
 
				+	private void setEnd(Character c) {
			
 
				+		if (null != c) {
			
 
				 			this.endCharacterSet.add(c);
			
 
				 		}
			
 
				 	}
			
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
@@ -16,8 +16,8 @@ import cn.hutool.dfa.WordTree;
 
				  */

			
 
				 public class DfaTest {

			
 
				 

			
 
				-	// 构建被查询的文本

			
 
				-	String text = "我有一颗大土豆，刚出锅的";

			
 
				+	// 构建被查询的文本，包含停顿词

			
 
				+	String text = "我有一颗$大土^豆，刚出锅的";

			
 
				 

			
 
				 	@Test

			
 
				 	public void matchAllTest() {

			
@@ -29,7 +29,7 @@ public class DfaTest {
 
				 		// 匹配到【大】，就不再继续匹配了，因此【大土豆】不匹配

			
 
				 		// 匹配到【刚出锅】，就跳过这三个字了，因此【出锅】不匹配（由于刚首先被匹配，因此长的被匹配，最短匹配只针对第一个字相同选最短）

			
 
				 		List<String> matchAll = tree.matchAll(text, -1, false, false);

			
 
				-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土豆", "刚出锅"));

			
 
				+		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土^豆", "刚出锅"));

			
 
				 	}

			
 
				 

			
 
				 	/**

			
@@ -45,7 +45,7 @@ public class DfaTest {
 
				 		// 【大】被匹配，最短匹配原则【大土豆】被跳过，【土豆继续被匹配】

			
 
				 		// 【刚出锅】被匹配，由于不跳过已经匹配的词，【出锅】被匹配

			
 
				 		List<String> matchAll = tree.matchAll(text, -1, true, false);

			
 
				-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土豆", "刚出锅", "出锅"));

			
 
				+		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土^豆", "刚出锅", "出锅"));

			
 
				 	}

			
 
				 

			
 
				 	/**

			
@@ -61,7 +61,7 @@ public class DfaTest {
 
				 		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配

			
 
				 		// 由于【大土豆】被匹配，【土豆】被跳过，由于【刚出锅】被匹配，【出锅】被跳过

			
 
				 		List<String> matchAll = tree.matchAll(text, -1, false, true);

			
 
				-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土豆", "刚出锅"));

			
 
				+		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土^豆", "刚出锅"));

			
 
				 

			
 
				 	}

			
 
				 

			
@@ -78,7 +78,7 @@ public class DfaTest {
 
				 		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配，由于不跳过已经匹配的关键词，土豆继续被匹配

			
 
				 		// 【刚出锅】被匹配，由于不跳过已经匹配的词，【出锅】被匹配

			
 
				 		List<String> matchAll = tree.matchAll(text, -1, true, true);

			
 
				-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土豆", "土豆", "刚出锅", "出锅"));

			
 
				+		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土^豆", "土^豆", "刚出锅", "出锅"));

			
 
				 

			
 
				 	}