美文网首页
android利用jsoup抓取数据

android利用jsoup抓取数据

作者: Peakmain | 来源:发表于2018-10-16 14:01 被阅读0次

效果图

这是简书上面的截图.png
我做的效果.png

首先分析html:


分析html.png

添加依赖

    implementation 'com.squareup.okhttp3:okhttp:3.11.0'
    implementation 'org.jsoup:jsoup:1.10.2'
    implementation'com.github.bumptech.glide:glide:3.7.0'

将String的url解析成html

    mRecyclerView = findViewById(R.id.main_list);
        Request request = new Request.Builder().
                url("http://www.jianshu.com").
                //如果请求的url需要提交参数,那么需改为post方式并提交对应的参数
                        get().
                        build();
        Call call = okHttpClient.newCall(request);
        call.enqueue(new Callback() {
            @Override
            public void onFailure(Call call, IOException e) {
                Log.e("TAG", e.toString());
            }

            @Override
            public void onResponse(Call call, Response response) throws IOException {
                if (response.isSuccessful()) {
                    String result = response.body().string();
                    //解析html
                    parseHtml(result);
                   }
            }
        });

解析Html

private void parseHtml(String html) {

        //将html转为Document对象
        Document document = Jsoup.parse(html);
        //获得li的元素集合
        Elements elements = document.select("div#list-container ul li");
        data = new ArrayList<>();
        JsoupBean homeBean;
        for (Element element : elements) {
            //获得作者
            String author = element.select("div.meta a").first().text();
            //获得标题
            String title = element.select("a.title").first().text();
            //获得图片url,因为文章有可能没有图片,所以这里需要特殊处理一下

            String image = element.select("a.wrap-img").first() != null ?
                    element.select("a.wrap-img").first().children().first().attr("src").substring(26)
                    : "";
            //获得文章详情url
            String targetUrl = element.select("a.title").first().attr("href");
            String content = element.select("p.abstract").first().text();
            homeBean = new JsoupBean();
            homeBean.setAuthor(author);
            homeBean.setTitle(title);
            homeBean.setImageUrl(image);
            homeBean.setArticleUrl(articleBaseUrl + targetUrl);
            homeBean.setContent(content);
            data.add(homeBean);
        }
    }

实体类

    private String imageUrl;
    private String title;
    private String articleUrl;
    private String author;
    private String content;
    .....

JsoupAdpater适配器

public class JsoupAdpater extends RecyclerView.Adapter<JsoupAdpater.ViewHolder> {
    private final List<JsoupBean> data;
    private final Context mContext;
    private String baseImageUrl = "https://upload-images.jianshu.io";

    public JsoupAdpater(Context context, List<JsoupBean> data) {
        this.mContext = context;
        this.data = data;
    }

    @NonNull
    @Override
    public ViewHolder onCreateViewHolder(@NonNull ViewGroup parent, int viewType) {
        View view = View.inflate(parent.getContext(), R.layout.item_jsoup, null);
        return new ViewHolder(view);
    }

    @Override
    public void onBindViewHolder(@NonNull ViewHolder holder, int position) {
        final JsoupBean homeBean = data.get(position);
        holder.tv_text.setText(homeBean.getContent());
        holder.tv_name.setText(homeBean.getTitle());
        holder.tv_author.setText(homeBean.getAuthor());
        if (!TextUtils.isEmpty(homeBean.getImageUrl()))
            Glide.with(mContext).load(baseImageUrl + homeBean.getImageUrl()).into(holder.iv_logo);
        holder.itemView.setOnClickListener(new View.OnClickListener() {
            @Override
            public void onClick(View v) {
                Toast.makeText(mContext, homeBean.getArticleUrl(), Toast.LENGTH_SHORT).show();
            }
        });
    }


    @Override
    public int getItemCount() {
        return data.size();
    }

    public class ViewHolder extends RecyclerView.ViewHolder {
        TextView tv_text;
        TextView tv_name;
        ImageView iv_logo;
        TextView tv_author;

        public ViewHolder(View itemView) {
            super(itemView);
            tv_name = itemView.findViewById(R.id.tv_name);
            iv_logo = itemView.findViewById(R.id.iv_logo);
            tv_text = itemView.findViewById(R.id.tv_text);
            tv_author = itemView.findViewById(R.id.tv_author);
        }
    }
}

相关文章

网友评论

      本文标题:android利用jsoup抓取数据

      本文链接:https://www.haomeiwen.com/subject/oichzftx.html