美文网首页Android开发Android开发Android开发经验谈
Android 使用Jsoup爬取码云开源项目

Android 使用Jsoup爬取码云开源项目

作者: iot_xc | 来源:发表于2019-07-19 15:27 被阅读7次

    先放一下官方文档

    implementation 'org.jsoup:jsoup:1.12.1'
    

    Jsoup可以把网页解析成Document对象,然后我们根据对应的元素id或者class以及其他的属性,获取对应的信息

    几个重要且常用的方法:
     connect(url): 从URL获取HTML来解析
     getElementById(String id):通过id来获取
     getElementsByTag(String tagName):通过标签名字来获取
     getElementsByClass(String className):通过类名来获取
     getElementsByAttribute(String key):通过属性名字来获取
     getElementsByAttributeValue(String key, String value):通过指定的属性名字,属性值来获取
     getAllElements():获取所有元素
     select(String cssQuery):通过类似于css或jQuery的选择器来查找元素

    话不多说,直接上教程

    进入码云->移动开发

    码云.jpg
    按F12查看网页内容,按shift+ctrl+C选中列表可以在右边查看到对应的信息
    dev_html.jpg
    每个元素的属性都清楚后,开始写代码解析
    根据上面分析的信息先建个文章实体类
    data class ArticleBean(val avatar: String, //头像
                           val title: String,  //标题
                           val label: String,  //分类
                           val watchers: Int,  //关注数
                           val collect: Int,   //收藏数
                           val forkNum: Int,   //fork数
                           val desc: String,   //描述
                           val date: String,   //时间
                           val urlLink: String //文章跳转链接
    ): Serializable
    

    然后使用上面提到的connect方法去获取document

    val url = "https://gitee.com/explore/mobile-develop?order=recommend"
    val document = Jsoup.connect(url).get()
    

    获取到网页内容后开始解析它,根据上面的截图可以看到,列表的内容被一个div包裹起来了,这个div的class是ui relaxed divided items explore-repo__list,那我们就根据这个class去获取我们需要的文章,注意:在填写class时要将中间的空格改成"."

    val elements = document.select("div.ui.relaxed.divided.items.explore-repo__list").select("div.item")
    

    如果说这个属性没有class,但是有id,这时候就应该将"."(不是上面说的空格)改为"#"

    val elements = document.select("div#ui.relaxed.divided.items.explore-repo__list").select("div.item")
    
    article_item.jpg

    下面就是重复上面的步骤,根据class或者id,获取我们需要的数据,一层层解析就好了

    elements.forEach {
                    val social = it.select("div.content").select("div.explore-project__meta-social.pull-right").select("a")
                    articles.add(
                        ArticleBean(
                            it.select("a.project-creator-link.ui.avatar.image.pull-left").select("img").attr("src"),
                            it.select("div.content").select("a.title.project-namespace-path").text(),
                            it.select("div.content").select("span>a").text(),
                            social[0].text().toInt(),
                            social[1].text().toInt(),
                            social[2].text().toInt(),
                            it.select("div.project-desc").text(),
                            it.select("div.project-latest").text(),
                            baseUrl + it.select("a.project-creator-link.ui.avatar.image.pull-left").attr("href")
                    ))
                }
    
    Video_20190719_031346_446.gif

    MainActivity:

    class MainActivity : AppCompatActivity(), ArticleAdapter.OnItemClick {
        private var articles = ArrayList<ArticleBean>()
        private val mAdapter: ArticleAdapter by lazy {
            ArticleAdapter(articles, this)
        }
    
        private val baseUrl = "https://gitee.com"
    
        override fun onCreate(savedInstanceState: Bundle?) {
            super.onCreate(savedInstanceState)
            setContentView(R.layout.activity_main)
            recyclerView.run {
                mAdapter.setOnItenClick(this@MainActivity)
                adapter = mAdapter
                layoutManager = LinearLayoutManager(this@MainActivity)
                addItemDecoration(DividerItemDecoration(this@MainActivity, DividerItemDecoration.VERTICAL))
            }
    
            Thread(Runnable { jsoupData() }).start()
        }
    
        private fun jsoupData() {
            val url = "https://gitee.com/explore/mobile-develop?order=recommend"
    
            try {
                val document = Jsoup.connect(url).get()
                val elements = document.select("div.ui.relaxed.divided.items.explore-repo__list").select("div.item")
                elements.forEach {
                    val social = it.select("div.content").select("div.explore-project__meta-social.pull-right").select("a")
                    articles.add(
                        ArticleBean(
                            it.select("a.project-creator-link.ui.avatar.image.pull-left").select("img").attr("src"),
                            it.select("div.content").select("a.title.project-namespace-path").text(),
                            it.select("div.content").select("span>a").text(),
                            social[0].text().toInt(),
                            social[1].text().toInt(),
                            social[2].text().toInt(),
                            it.select("div.project-desc").text(),
                            it.select("div.project-latest").text(),
                            baseUrl + it.select("a.project-creator-link.ui.avatar.image.pull-left").attr("href")
                    ))
                }
                runOnUiThread {
                    mAdapter.notifyDataSetChanged()
                }
            } catch (e: Exception) {
                Log.e("error--->", e.message)
            }
        }
    
        override fun onClick(articleBean: ArticleBean, position: Int) {
            val intent = Intent(this, WebviewActivity::class.java)
            intent.putExtra("articleBean", articleBean)
            startActivity(intent)
        }
    }
    

    adapter:

    class ArticleAdapter(var items: List<ArticleBean>, val content: Context): RecyclerView.Adapter<ArticleAdapter.ViewHolder>() {
    
        private var onItemClick: OnItemClick? = null
    
        public fun setOnItenClick(onItemClick: OnItemClick){
            this.onItemClick = onItemClick
        }
    
        override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): ViewHolder {
            val view = LayoutInflater.from(parent.context).inflate(R.layout.article_item_layout, parent, false)
            val holder = ViewHolder(view)
            if (onItemClick != null){
                holder.itemView.setOnClickListener {
                    val position = holder.adapterPosition
                    val articleBean = items.get(position)
                    onItemClick?.onClick(articleBean, position)
                }
            }
            return holder
        }
    
        override fun getItemCount(): Int {
            return items.size
        }
    
        override fun onBindViewHolder(holder: ViewHolder, position: Int) {
            Glide.with(content).load(items[position].avatar).apply(RequestOptions.circleCropTransform()).into(holder.ivAvatar)
    
            holder.tvTitle.text = items[position].title
            holder.tvContent.text = items[position].desc
            holder.tvWatchers.text = "${items[position].watchers}"
            holder.tvCollect.text = "${items[position].collect}"
            holder.tvFork.text = "${items[position].forkNum}"
            holder.tvDate.text = items[position].date
    
        }
    
    
        class ViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) {
            val ivAvatar: ImageView = itemView.findViewById(R.id.iv_avatar)
            val tvTitle: TextView = itemView.findViewById(R.id.tv_title)
            val tvContent: TextView = itemView.findViewById(R.id.tv_content)
            val tvWatchers: TextView = itemView.findViewById(R.id.tv_watchers)
            val tvCollect: TextView = itemView.findViewById(R.id.tv_collect)
            val tvFork: TextView = itemView.findViewById(R.id.tv_fork)
            val tvDate: TextView = itemView.findViewById(R.id.tv_date)
        }
    
        interface OnItemClick{
            fun onClick(articleBean: ArticleBean, position: Int)
        }
    }
    

    有什么不懂得留言告诉我

    相关文章

      网友评论

        本文标题:Android 使用Jsoup爬取码云开源项目

        本文链接:https://www.haomeiwen.com/subject/pdnjlctx.html