美文网首页
Hive UDF 删除不成对出现的符号

Hive UDF 删除不成对出现的符号

作者: 阿涛哥 | 来源:发表于2019-10-09 15:45 被阅读0次
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Stack;


@Description(name = "del_asymmetric_symbol", value = "_FUNC_(string) - delete_asymmetric_symbol in the string.")
public class UDFDeleteAsymmetricSymbol extends UDF {
    private HashMap<Character, Character> pairwisePunctuationMap = new HashMap<>();
    private String pairwisePunctuation = "()[]{}“”‘’<>《》【】()";

    public UDFDeleteAsymmetricSymbol() {
        for (int i = 0; i < pairwisePunctuation.length(); i=i+2) {
            this.pairwisePunctuationMap.put(pairwisePunctuation.charAt(i), pairwisePunctuation.charAt(i+1));
        }
    }

    public String evaluate(String input) {
        Stack<Integer> stack = new Stack<>();
        ArrayList<Integer> tobeDeleteIndex = new ArrayList<>();
        Character currentCharacter = null;
        StringBuffer inputCleaned = new StringBuffer();
        for (int i = 0; i < input.length(); i++) {
            currentCharacter = input.charAt(i);
//            左边的符号
            if (pairwisePunctuationMap.containsKey(currentCharacter)) {
                stack.push(i);
            }
//            右边的符号
            else if (pairwisePunctuationMap.containsValue(currentCharacter)) {
//                栈为空是右边符号进入,此符号一定不成对
                if (stack.empty()) {
                    tobeDeleteIndex.add(i);
                }
//                此时进入的右边符号完成配对
                else if (pairwisePunctuationMap.get(input.charAt(stack.peek())) == currentCharacter) {
                    stack.pop();
                }
//                此时进入的右边符号未完成配对
                else if (pairwisePunctuationMap.get(input.charAt(stack.peek())) != currentCharacter) {
//                    加入待删除列表
                    tobeDeleteIndex.add(i);
                }
            }
        }

//            此时处理栈中剩下的符号
        while (!stack.empty()) {
            tobeDeleteIndex.add(stack.pop());
        }

        for (int i = 0; i < input.length(); i++) {
            if (tobeDeleteIndex.contains(i)) {
                continue;
            } else {
                inputCleaned.append(input.charAt(i));
            }
        }
        return inputCleaned.toString();
    }

//    public static void main(String[] args) {
//        UDFDeleteAsymmetricSymbol test = new UDFDeleteAsymmetricSymbol();
//        System.out.println(test.evaluate(""));
//    }
}

相关文章

网友评论

      本文标题:Hive UDF 删除不成对出现的符号

      本文链接:https://www.haomeiwen.com/subject/xfnpectx.html