import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Stack;
@Description(name = "del_asymmetric_symbol", value = "_FUNC_(string) - delete_asymmetric_symbol in the string.")
public class UDFDeleteAsymmetricSymbol extends UDF {
private HashMap<Character, Character> pairwisePunctuationMap = new HashMap<>();
private String pairwisePunctuation = "()[]{}“”‘’<>《》【】()";
public UDFDeleteAsymmetricSymbol() {
for (int i = 0; i < pairwisePunctuation.length(); i=i+2) {
this.pairwisePunctuationMap.put(pairwisePunctuation.charAt(i), pairwisePunctuation.charAt(i+1));
}
}
public String evaluate(String input) {
Stack<Integer> stack = new Stack<>();
ArrayList<Integer> tobeDeleteIndex = new ArrayList<>();
Character currentCharacter = null;
StringBuffer inputCleaned = new StringBuffer();
for (int i = 0; i < input.length(); i++) {
currentCharacter = input.charAt(i);
// 左边的符号
if (pairwisePunctuationMap.containsKey(currentCharacter)) {
stack.push(i);
}
// 右边的符号
else if (pairwisePunctuationMap.containsValue(currentCharacter)) {
// 栈为空是右边符号进入,此符号一定不成对
if (stack.empty()) {
tobeDeleteIndex.add(i);
}
// 此时进入的右边符号完成配对
else if (pairwisePunctuationMap.get(input.charAt(stack.peek())) == currentCharacter) {
stack.pop();
}
// 此时进入的右边符号未完成配对
else if (pairwisePunctuationMap.get(input.charAt(stack.peek())) != currentCharacter) {
// 加入待删除列表
tobeDeleteIndex.add(i);
}
}
}
// 此时处理栈中剩下的符号
while (!stack.empty()) {
tobeDeleteIndex.add(stack.pop());
}
for (int i = 0; i < input.length(); i++) {
if (tobeDeleteIndex.contains(i)) {
continue;
} else {
inputCleaned.append(input.charAt(i));
}
}
return inputCleaned.toString();
}
// public static void main(String[] args) {
// UDFDeleteAsymmetricSymbol test = new UDFDeleteAsymmetricSymbol();
// System.out.println(test.evaluate(""));
// }
}
网友评论