美文网首页
java-web过滤器-富文本

java-web过滤器-富文本

作者: 木果渣 | 来源:发表于2018-04-19 22:30 被阅读0次
    现在需要解决的问题是,针对某些富文本的接口,需要保留常用的html富文本标签,不能完全过滤。
    

    针对某些接口,或者url这个好办,直接在过滤时对uri地址进行筛选。

    @Override
        protected void doFilterInternal(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse, FilterChain filterChain) throws ServletException, IOException {
    
            String uri = httpServletRequest.getRequestURI();
            if(uri.startsWith("/filter/richText")){
                //走富文本过滤器
                RichTextParamtersWrapper wrapper = new RichTextParamtersWrapper((HttpServletRequest) httpServletRequest);
                filterChain.doFilter(wrapper, httpServletResponse);
                return;
            }
            //其他过滤器
            ModifyParametersWrapper wrapper = new ModifyParametersWrapper((HttpServletRequest) httpServletRequest);
            filterChain.doFilter(wrapper, httpServletResponse);
        }
    

    难办的是针对富文本常见的标签,正则是不会写的,这辈子都写不来正则( ╯□╰ )
    搜了一下,貌似可以用Jsoup这个东西。它主要功能是解析html,常见是用来做爬虫的,我们可以通过加白名单(黑名单不建议,白名单的把控更好)的方式实现富文本的过滤。
    参考文档
    https://blog.csdn.net/skyrunner06/article/details/25876693
    添加的依赖

     <dependency>
           <groupId>org.jsoup</groupId>
           <artifactId>jsoup</artifactId>
           <version>1.8.3</version>
    </dependency>
    
    通过加载白名单配置文件的方式(改起来方便),将允许的标签添加到Jsoup的WhiteList规则里。
    public class JsoupUtil {
    
        public static Whitelist whitelist = null;
    
        /**
         * 配置Jsoup标签白名单
         * @return
         */
        public static Whitelist initWhiteList() {
            if (whitelist == null) synchronized (new Object()) {
                whitelist = Whitelist.relaxed();
                String jsonString = null;
                Resource resource = new ClassPathResource("/whitelist.conf");
                File file = null;
                InputStream input = null;
                Writer output = null;
                try {
                    file = resource.getFile();
                    input = new FileInputStream(file);
                    output = new StringWriter();
                    IOUtils.copy(input, output);
                    jsonString = output.toString();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } finally {
                    if (input != null) {
                        IOUtils.closeQuietly(input);
                    }
                    if (output != null) {
                        IOUtils.closeQuietly(output);
                    }
                }
    
                JSONObject whiteJson = JSONObject.parseObject(jsonString);
                JSONObject whiteListMap = whiteJson.getJSONObject("whiteList");
                JSONObject protocolsMap = whiteJson.getJSONObject("protocols");
    
                for (Map.Entry<String, Object> entry : whiteListMap.entrySet()) {
                    String tag = entry.getKey();
    
                    whitelist.addTags(tag);
                    JSONObject whiteListMap2 = (JSONObject) entry.getValue();
    
                    for (Map.Entry<String, Object> entry2 : whiteListMap2.entrySet()) {
                        String attribute = entry2.getKey();
                        whitelist.addAttributes(tag, attribute);
                    }
                }
                for (Map.Entry<String, Object> entry : protocolsMap.entrySet()) {
                    String tag = entry.getKey().substring(0, entry.getKey().indexOf("."));
                    String key = entry.getKey().substring(entry.getKey().indexOf(".") + 1, entry.getKey().length());
    
                    JSONArray jsonArray = JSONArray.parseArray(entry.getValue().toString());
    
                    for (int i = 0; i < jsonArray.size(); i++) {
                        String value = jsonArray.getString(i);
                        //给URL属性添加协议。例如: addProtocols("a", "href", "ftp", "http", "https")标签a的href键可以指向的协议有ftp、http、https
                        whitelist.addProtocols(tag, key, value);
                    }
                }
            }
            return whitelist;
        }
    }
    
    使用时:
     /**
         * 针对富文本的字符替换
         * 只有在白名单中出现的html标签才会被保留
         * @param value
         * @return
         */
        public static String richText(String value){
            JsoupUtil.initWhiteList();
            value = Jsoup.clean(value, "", JsoupUtil.whitelist);
            return value;
        }
    

    最后是白名单配置文件

    {
        "whiteList":{
            "a":{
                "href":"",
                "title":""
            },
            "b":{
    
            },
            "blockquote":{
                "cite":""
            },
            "br":{
    
            },
            "caption":{
    
            },
            "cite":{
    
            },
            "code":{
    
            },
            "col":{
                "span":"",
                "width":""
            },
            "colgroup":{
                "span":"",
                "width":""
            },
            "dd":{
    
            },
            "div":{
                "style":"/^text-align:\\s*(left|right|center);?\\s*$/i"
            },
            "dl":{
    
            },
            "dt":{
    
            },
            "em":{
    
            },
            "h1":{
    
            },
            "h2":{
    
            },
            "h3":{
    
            },
            "h4":{
    
            },
            "h5":{
    
            },
            "h6":{
    
            },
            "i":{
    
            },
            "img":{
                "align":"",
                "alt":"",
                "height":"",
                "src":"",
                "title":"",
                "width":""
            },
            "li":{
                "class":"",
                "style":"/^text-align:\\s*(left|right|center);?\\s*$/i"
            },
            "ol":{
                "start":"",
                "type":""
            },
            "p":{
                  "style":"/^text-align:\\s*(left|right|center);?\\s*$/i"
                },
            "pre":{
    
            },
            "q":{
                "cite":""
            },
            "small":{
    
            },
            "span":{
                "style":"/^\\s*font-family\\s*:\\s*(('|\\\"|\"|')?(楷体|楷体_GB2312|宋体|微软雅黑|黑体|,|\\s|\\w|sans-serif)('|\\\"|\"|')?)+;?\\s*|\\s*(color|font-size|background-color)\\s*:\\s*(#\\w*|[\\w\\s]*|rgb\\s*\\(\\s*\\d+\\s*,\\s*\\d+\\s*,\\s*\\d+\\s*\\));?\\s*|\\s*text-decoration\\s*:\\s*(underline|overline|line-through|blink)\\s*;?\\s*$/i"
    
             },
    
            "strike":{
    
            },
            "strong":{
    
            },
            "sub":{
    
            },
            "sup":{
    
            },
            "table":{
                "summary":"",
                "width":""
            },
            "tbody":{
    
            },
            "td":{
                "abbr":"",
                "axis":"",
                "colspan":"",
                "rowspan":"",
                "width":""
            },
            "tfoot":{
    
            },
            "th":{
                "abbr":"",
                "axis":"",
                "colspan":"",
                "rowspan":"",
                "scope":"",
                "width":""
            },
            "thead":{
    
            },
            "tr":{
    
            },
            "u":{
    
            },
            "ul":{
                "type":"",
                "class":""
            }
        },
        "protocols":{
            "a.href":[
                "ftp",
                "http",
                "https",
                "mailto"
            ],
            "blockquote.cite":[
                "http",
                "https"
            ],
            "cite.cite":[
                "http",
                "https"
            ],
            "img.src":[
                "http",
                "https"
            ],
            "q.cite":[
                "http",
                "https"
            ]
        }
    }
    

    一个针对富文本的过滤器get√

    相关文章

      网友评论

          本文标题:java-web过滤器-富文本

          本文链接:https://www.haomeiwen.com/subject/uibgkftx.html