to_sql read_sql
from sqlalchemy import create_engine
from urllib.parse import quote_plus
pg_conn = f"postgresql://{from_user}:{quote_plus(from_pw)}@{from_host}:{from_port}/{from_database}"
pg_engine = create_engine(pg_conn)
select_fields = ['hn_id', 'format_address']
sql1 = f"select {','.join(select_fields)} " \
f"from poi_hn_edit_0826_new where source_id like 'AOI%' and building is null limit 10"
df1 = pd.read_sql(sql1, con=pg_engine)
df2.to_sql('hn_building2', con=pg_engine, if_exists='replace', index=False)
tosql method='multi'
dtypes = {}
for col in no_dup_df.columns:
dtypes[col] = VARCHAR(255)
no_dup_df.to_sql("poi_hn_road_0214_upd", con=pg_engine, if_exists='replace', index=False,
method='multi', dtype=dtypes, chunksize=10000)
merge
pd.merge(df2,gdf2,left_on='POI_ID',right_on='FeatID',how = 'inner')
In [44]: result = pd.merge(left, right, how='left', on=['key1', 'key2'])
分组后遍历
groups = df.groupby('key')
for group in groups:
df_tmp: pd.DataFrame = group[1]
df = pd.DataFrame({'A': 'a a b'.split(),
'B': [1,2,3],
'C': [4,6,5]})
In [3]: df
Out[3]:
A B C
0 a 1 4
1 a 2 6
2 b 3 5
g = df.groupby('A')
g.apply(lambda x:print(x))
A B C
0 a 1 4
1 a 2 6
A B C
2 b 3 5
aa bb cc
0 a 1 4
1 a 1 5
2 b 2 6
group.aggregate(lambda x:list(x))
bb cc
aa
a [1, 1] [4, 5]
b [2] [6]
添加一列
df1['grid1'] = df1.apply(lambda x: sGrid.encode(float(x['longitude']), float(x['latitude']), 0, 0), axis=1)
series1 = df.groupby(by=['grid1']).apply(h3_dump_flatmap)
filter
not_main_point_df = df[df.apply(lambda x: pd.notnull(x['house_num']), axis=1)].copy()
一行变多行
[[1,2],[3,4]]
series_notnull1 = series1.explode()
合并
series = pd.concat([series_notnull1, series_notnull2, series_notnull3])
一列变多列
[1,2,3,4]
df = pd.DataFrame(series.to_list())
isna
In [176]: s
Out[176]:
0 [[1, 2], [3, 4]]
1 [[2, 3]]
2 []
dtype: object
In [177]: s.explode()
Out[177]:
0 [1, 2]
0 [3, 4]
1 [2, 3]
2 NaN
dtype: object
In [206]: sp
Out[206]:
0 [1, 2]
0 [3, 4]
1 [2, 3]
2 NaN
dtype: object
In [203]: pd.isna(sp)
Out[203]:
0 False
0 False
1 False
2 True
dtype: bool
geopandas
df = pd.DataFrame(np.random.randn(50, 3),columns=['X', 'Y', 'Z'])
geom = [shapely.geometry.Point(xy) for xy in zip(df.X, df.Y)]
geodf = geopandas.GeoDataFrame(df, geometry=geom)
print type(gdf)
geodf.plot()
plt.show()
from shapely.geometry import Point
d = {'geometry': [Point(1, 2), Point(2, 1)]}
gdf = geopandas.GeoDataFrame(d)
gdf.has_sindex
#Generate the spatial index
index = gdf.sindex
gdf.has_sindex
gdf['distance'] = gdf['geometry'].distance(cent_point)
网友评论