import requests
from bs4 import BeautifulSoup
url="https://search.51job.com/list/010000,000000,0000,00,9,99,Java%2520%25E5%25BC%2580%25E5%258F%2591,2,1.html"
res=requests.get(url)
res.encoding="gbk"
print(res)
# create对象
soup=BeautifulSoup(res.text)
# 获取职位名
position_tag=soup.find_all("p",class_="t1") # 这里class是关键字 这里需要的是属性 所以系统中加了一个下划线来区分属性 这里t1不考虑空格
position=[]
for i in range(len(position_tag)):
position.append(position_tag[i].a["title"])
print(position)
# 获取公司名
company_tag=soup.find_all("span",{"class":"t2"}) #
company=[]
for i in range(len(company_tag)-1):
company.append(company_tag[i+1].a["title"])
print(company)
# 获取工作地点
addr_tag=soup.find_all("span",{"class":"t3"}) #
addr=[]
for i in range(len(addr_tag)-1):
addr.append(addr_tag[i+1].string)
print(addr)
# 获取工资
salary_tag=soup.find_all("span",{"class":"t4"}) #
salary=[]
for i in range(len(salary_tag)-1):
salary.append(salary_tag[i+1].string)
print(salary)
import pandas as pd
from pandas import DataFrame
jobinfo=DataFrame([position,company,addr,salary]).T
jobinfo.columns=["职位","公司","地点","工资"]
jobinfo.head()
jobinfo.describe()

image.png

image.png

image.png

image.png
网友评论