美文网首页
java提取二级域名

java提取二级域名

作者: 赵海洋 | 来源:发表于2017-08-01 12:42 被阅读0次

项目中用到了提取域名,参考了http://blog.csdn.net/kuluzs/article/details/51986759后并改进后,写出此工具类(test方法仅为方便测试,使用时可删除,可另建测试类)。

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by terry on 2017/8/1.
 */
public class UrlUtils {
    private static final String RE_TOP_DOMAIN = "(com\\.cn|net\\.cn|gov\\.cn|org\\.nz|org\\.cn|com|net|org|gov|cc|biz|info|cn|co|me)";

    // 一级域名提取
    private static final String RE_TOP_1 = "(\\w*\\.?){1}\\." + RE_TOP_DOMAIN;

    // 二级域名提取
    private static final String RE_TOP_2 = "(\\w*\\.?){2}\\." + RE_TOP_DOMAIN;

    // 三级域名提取
    private static final String RE_TOP_3 = "(\\w*\\.?){3}\\." + RE_TOP_DOMAIN;

    private static final Pattern PATTEN_IP = Pattern.compile("((http://)|(https://))?((\\d+\\.){3}(\\d+))");
    private static final Pattern PATTEN_TOP1 = Pattern.compile(RE_TOP_1);
    private static final Pattern PATTEN_TOP2 = Pattern.compile(RE_TOP_2);
    private static final Pattern PATTEN_TOP3 = Pattern.compile(RE_TOP_3);

    public static String getDomain(String url, int level) {
        Matcher matcher = PATTEN_IP.matcher(url);
        if (matcher.find()){
            return matcher.group(4);
        }

        switch (level) {
            case 1:
                matcher = PATTEN_TOP1.matcher(url);
                break;
            case 2:
                matcher = PATTEN_TOP2.matcher(url);
                break;
            case 3:
                matcher = PATTEN_TOP3.matcher(url);
                break;
            default:
                return "";
        }
        if (matcher.find()) {
            return matcher.group(0);
        }
        return "";
    }

    public static void test()
    {
        String[] urls = {
                "http://meiwen.me/src/index.html",
                "http://1000chi.com/game/index.html",
                "http://see.xidian.edu.cn/cpp/html/1429.html",
                "https://docs.python.org/2/howto/regex.html",
                "https://www.google.com.hk/search?client=aff-cs-360chromium&hs=TSj&q=url%E8%A7%A3%E6%9E%90%E5%9F%9F%E5%90%8Dre&oq=url%E8%A7%A3%E6%9E%90%E5%9F%9F%E5%90%8Dre&gs_l=serp.3...74418.86867.0.87673.28.25.2.0.0.0.541.2454.2-6j0j1j1.8.0....0...1c.1j4.53.serp..26.2.547.IuHTj4uoyHg",
                "file:///D:/code/echarts-2.0.3/doc/example/tooltip.html",
                "http://api.mongodb.org/python/current/faq.html#is-pymongo-thread-safe",
                "https://pypi.python.org/pypi/publicsuffix/",
                "http://127.0.0.1:8000",
        };
        for (String url : urls) {
            for (int i = 1; i < 4; i++) {
                System.out.println(getDomain(url, i));
            }
        }
    }
}

相关文章

网友评论

      本文标题:java提取二级域名

      本文链接:https://www.haomeiwen.com/subject/aorjlxtx.html