去除dataframe中列为字典中的空值
data = pd.merge(data, station)
data['data'] = data['data'].map(lambda x: json.dumps(dict(map(lambda y: (y, x[y]), list(filter(lambda z: x[z] is not None, x))))))
1.坐标转换
import math
class coorTrans:
def __init__(self):
"""
:param lng: 经度
:param lat: 纬度
"""
self.x_pi = 3.14159265358979324 * 3000.0 / 180.0
self.pi = 3.1415926535897932384626 # 圆周率
self.a = 6378245.0 # 长半轴
self.ee = 0.00669342162296594323 # 偏心率平方
@staticmethod
def str_to_float(lng, lat):
"""将字符型经纬度转数字型"""
try:
if not isinstance(lng, float):
lng = float(lng)
if not isinstance(lat, float):
lat = float(lat)
return lng, lat
except Exception as e:
print(e)
print("请输入数值型的经纬度")
def gcj02_to_bd09(self, lng, lat):
"""
火星坐标(高德)转百度坐标
:return:
"""
lng, lat = coorTrans.str_to_float(lng, lat)
z = math.sqrt(lng**2 + lat**2) + 0.00002 * math.sin(lat * self.x_pi)
theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * self.x_pi)
bd_lng = z * math.cos(theta) + 0.0065
bd_lat = z * math.sin(theta) + 0.006
return bd_lng, bd_lat
def bd09_to_gcj02(self, lng, lat):
"""百度坐标(bd-090)转火星坐标(gcj-02)
百度转高德
"""
lng, lat = coorTrans.str_to_float(lng, lat)
x = lng - 0.0065
y = lat - 0.006
z = math.sqrt(x**2 + y**2) - 0.00002 * math.sin(y*self.x_pi)
theta = math.atan2(y, x) - 0.000003 * math.cos(x*self.x_pi)
gd_lng = z*math.cos(theta)
gd_lat = z*math.sin(theta)
return gd_lng, gd_lat
@staticmethod
def out_of_china(lng, lat):
"""判断是否在国内, 只对国内的做偏移矫正"""
return not (73.66 < lng < 135.05 and 3.86 < lat < 53.55)
def _transformlat(self, lng, lat):
ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat**2 + 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng)) # fabs函数求绝对值的函数
ret += (20.0 * math.sin(6.0 * lng * self.pi) + 20.0 * math.sin(2.0 * lng * self.pi)) * 2.0 / 3.0
ret += (20.0 * math.sin(lat * self.pi) + 40.0 * math.sin(lat / 3.0 * self.pi)) * 2.0 / 3.0
ret += (160.0 * math.sin(lat / 12.0 * self.pi) + 320 * math.sin(lat * self.pi / 30.0)) * 2.0 / 3.0
return ret
def _transformlng(self, lng, lat):
ret = 300.0 + lng + 2.0 * lat + 0.1 * lng**2 + 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
ret += (20.0 * math.sin(6.0 * lng * self.pi) + 20.0 * math.sin(2.0 * lng * self.pi)) * 2.0 / 3.0
ret += (20.0 * math.sin(lng * self.pi) + 40.0 * math.sin(lng / 3.0 * self.pi)) * 2.0 / 3.0
ret += (150.0 * math.sin(lng / 12.0 * self.pi) + 300.0 * math.sin(lng / 30.0 * self.pi)) * 2.0 / 3.0
return ret
def wgs84_to_gcj02(self, lng, lat):
"""
wgs84转gcj02(火星坐标)
:return:
"""
lng, lat = coorTrans.str_to_float(lng, lat)
if coorTrans.out_of_china(lng, lat):
# 不在国内不做矫正
return lng, lat
dlat = self._transformlat(lng - 105.0, lat - 35.0)
dlng = self._transformlng(lng - 105.0, lat - 35.0)
radlat = lat / 180.0 * self.pi
magic = math.sin(radlat)
magic = 1 - self.ee * magic**2
sqrtmagic = math.sqrt(magic)
dlat = (dlat * 180.0) / ((self.a * (1 - self.ee)) / (magic * sqrtmagic) * self.pi)
dlng = (dlng * 180.0) / (self.a / sqrtmagic * math.cos(radlat) * self.pi)
mglat = lat + dlat
mglng = lng + dlng
return mglng, mglat
def gcj02_to_wgs84(self, lng, lat):
"""火星坐标gcj02转wgs84坐标"""
lng, lat = coorTrans.str_to_float(lng, lat)
if coorTrans.out_of_china(lng, lat):
return lng, lat
dlat = self._transformlat(lng - 105.0, lat - 35.0)
dlng = self._transformlng(lng - 105.0, lat - 35.0)
radlat = lat / 180.0 * self.pi
magic = math.sin(radlat)
magic = 1 - self.ee * magic**2
sqrtmagic = math.sqrt(magic)
dlat = (dlat * 180.0) / ((self.a * (1 - self.ee)) / (magic * sqrtmagic) * self.pi)
dlng = (dlng * 180.0) / (self.a / sqrtmagic * math.cos(radlat) * self.pi)
mglat = lat + dlat
mglng = lng + dlng
return lng * 2 - mglng, lat * 2 - mglat
def bd09_to_wgs84(self, lng, lat):
"""百度坐标转wgs坐标"""
lng, lat = coorTrans.str_to_float(lng, lat)
lng, lat = self.bd09_to_gcj02(lng, lat)
return self.gcj02_to_wgs84(lng, lat)
def wgs84_to_bd09(self, lng, lat):
"""wgs坐标转百度坐标"""
lng, lat = coorTrans.str_to_float(lng, lat)
lng, lat = self.wgs84_to_gcj02(lng, lat)
return self.gcj02_to_bd09(lng, lat)
@staticmethod
def getDistance(pointa, pointb):
"""返回的距离单位为米; 点的组成(x['lat'], x['lon'])如(30.474302, 104.219467)"""
if pointa != pointb:
ra = 6378140 # radius of equator: meter
rb = 6356755 # radius of polar: meter
flatten = (ra - rb) / ra # Partial rate of the earth
# 角度转弧度
radLatA = math.radians(pointa[0])
radLonA = math.radians(pointa[1])
radLatB = math.radians(pointb[0])
radLonB = math.radians(pointb[1])
# 得到反正切值(为弧度)
pA = math.atan(rb / ra * math.tan(radLatA))
pB = math.atan(rb / ra * math.tan(radLatB))
# 得到反余弦值(为弧度)
x = math.acos(math.sin(pA) * math.sin(pB) + math.cos(pA) * math.cos(pB) * math.cos(radLonA - radLonB))
c1 = (math.sin(x) - x) * (math.sin(pA) + math.sin(pB)) ** 2 / math.cos(x / 2) ** 2
c2 = (math.sin(x) + x) * (math.sin(pA) - math.sin(pB)) ** 2 / math.sin(x / 2) ** 2
dr = flatten / 8 * (c1 - c2)
distance = ra * (x + dr)
# distance = distance/1000
distance = distance
return distance
else:
return 0
2pandas操作数据
如果要对相邻两行进行计算,则需要增加临时的一列,将所计算的列用shift函数向前或向后移动指定列,从而将其拼成一行,进而用apply进行计算,如下
import time
from datetime import datetime, timedelta
import pandas as pd
from numpy import NaN
from sqlalchemy import create_engine
engine = create_engine("postgresql+psycopg2://admin:123456@127.0.0.1:5432/test", max_overflow=5, encoding='utf-8')
coord = CoordTrans()
def get_grid():
sql = """select id,location, row, "column", bottom_left_coord->>'lat' as bottom_lat, bottom_left_coord->>'lng' as bottom_lng,
top_right_coord->>'lat' as top_lat, top_right_coord->>'lng' as top_lng, grid_code from grid order by id;"""
grids = pd.read_sql(sql, engine)
grids['bottom_lat'] = grids['bottom_lat'].map(float)
grids['bottom_lng'] = grids['bottom_lng'].map(float)
grids['top_lat'] = grids['top_lat'].map(float)
grids['top_lng'] = grids['top_lng'].map(float)
print(grids)
return grids
data = pd.read_csv('./ydcz-2020-6772.csv', header=None)
data.columns = ['co', 'so2', 'no2', 'o3', 'pm2_5', 'pm10', 'lat', 'lng', 'time', 'mn', 'h', 't']
data['time'] = data['time'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
data['DataTime'] = data['time'].map(lambda x: datetime(x.year, x.month, x.day, x.hour) + timedelta(hours=1))
data['published_at'] = data['time'].map(lambda x: datetime.fromtimestamp
((time.mktime(x.timetuple()) // 300) * 300)
+ timedelta(minutes=5))
grids = get_grid()
# 匹配满足条件的id
data['id'] = data.apply(lambda x: grids.loc[(grids['bottom_lat'] <= x['lat']) & (grids['top_lat'] >= x['lat']) & (grids['bottom_lng'] <= x['lng']) & (grids['top_lng'] >= x['lng']), 'id'].to_numpy()[0], axis=1)
print(data)
data.rename(columns={'id': 'grid_id'}, inplace=True)
station = pd.read_csv('./longquan.csv')
station['coord'] = station['coord'].map(eval)
# 计算data中每条数据与station中每条数据间的距离
data['distance'] = data.apply(lambda x: station.apply(lambda y: {y['station_code']: coord.getDistance([x['lat'], x['lng']], [y['coord']['lat'], y['coord']['lng']])}, axis=1).to_dict(), axis=1)
# 将距离进行排序
data['distance'] = data['distance'].map(lambda x: sorted(list(x.values()), key=lambda x: list(x.values())[0]))
# 取出距离最小的站点及对应的距离
data['small_distance'] = data['distance'].map(lambda x: list(x[0].values())[0])
data['small_station'] = data['distance'].map(lambda x: list(x[0].keys())[0])
# station.rename(columns={'grid_id_km': 'grid_id'}, inplace=True)
data = pd.merge(data, station, how='left', on='grid_id')
data.sort_values(by='time', inplace=True)
data.drop(['station_source', 'coord', 'grid_bottom_left_coord', 'grid_top_right_coord', 'grid_center_coord'], inplace=True, axis=1)
# 计算相邻两个时刻之间的速度,先将坐标及时间用shift向下移动一行在进行计算
# 将值向下移动一行,以便于隔行计算
data['tem_time'] = data['time'].shift(1)
data['tem_lat'] = data['lat'].shift(1)
data['tem_lng'] = data['lng'].shift(1)
# 相邻两行的时间差(用移动后的临时行计算)
data['time_delta'] = data.apply(lambda x: (x['time'] - x['tem_time']).seconds, axis=1)
# 值替换
data.replace(NaN, False, inplace=True)
# 计算时间间隔在0到60之间的速度
data['speed'] = data.apply(lambda x: (coord.getDistance([x['lat'], x['lng']], [x['tem_lat'], x['tem_lng']])/x['time_delta'])*3.6 if 0< x['time_delta'] < 60 and x['tem_lat'] else None, axis=1)
# 删除临时列
data.drop(['tem_time', 'tem_lat', 'tem_lng'], inplace=True, axis=1)
data.replace(False, NaN, inplace=True)
print(data)
print(data.iloc[0])
data.to_excel('./ydcz-2020-6772.xlsx', index=False)
# 对列中的字典对象中的值进行保留两位小数
# data['distance'] = data['distance'].map(lambda x: dict(map(lambda key: (key, round(x[key], 2)), x)))
# 用loc进行数据筛选并赋值
# data.loc[(data['station_name'] == '大面(成都行政学院)') & (data['published_at'] >= datetime(2020, 1, 2)) & (data['published_at'] <= datetime(2020, 1, 2, 23, 59, 59)), 'split'] = '大面'
网友评论