pandas

作者: hehehehe | 来源:发表于2021-10-20 16:58 被阅读0次
    to_sql read_sql
    from sqlalchemy import create_engine
    from urllib.parse import quote_plus
    
    pg_conn = f"postgresql://{from_user}:{quote_plus(from_pw)}@{from_host}:{from_port}/{from_database}"
    pg_engine = create_engine(pg_conn)
    
    select_fields = ['hn_id', 'format_address']
    sql1 = f"select {','.join(select_fields)} " \
               f"from poi_hn_edit_0826_new where source_id like 'AOI%' and building is null limit 10"
    df1 = pd.read_sql(sql1, con=pg_engine)
    df2.to_sql('hn_building2', con=pg_engine, if_exists='replace', index=False)
    
    

    tosql method='multi'

    dtypes = {}
    for col in no_dup_df.columns:
        dtypes[col] = VARCHAR(255)
    no_dup_df.to_sql("poi_hn_road_0214_upd", con=pg_engine, if_exists='replace', index=False,
                                    method='multi', dtype=dtypes, chunksize=10000)
    

    merge

    pd.merge(df2,gdf2,left_on='POI_ID',right_on='FeatID',how = 'inner')
    In [44]: result = pd.merge(left, right, how='left', on=['key1', 'key2'])
    
    分组后遍历
    groups = df.groupby('key')
    for group in groups:
        df_tmp: pd.DataFrame = group[1]
    
    df = pd.DataFrame({'A': 'a a b'.split(),
                       'B': [1,2,3],
                       'C': [4,6,5]})
    In [3]: df
    Out[3]: 
       A  B  C
    0  a  1  4
    1  a  2  6
    2  b  3  5
    g = df.groupby('A')
    g.apply(lambda x:print(x))
       A  B  C
    0  a  1  4
    1  a  2  6
       A  B  C
    2  b  3  5
    
      aa  bb  cc
    0  a   1   4
    1  a   1   5
    2  b   2   6
    group.aggregate(lambda x:list(x))
         bb      cc
    aa                
    a   [1, 1]  [4, 5]
    b      [2]     [6]
    
    添加一列
    df1['grid1'] = df1.apply(lambda x: sGrid.encode(float(x['longitude']), float(x['latitude']), 0, 0), axis=1)
    series1 = df.groupby(by=['grid1']).apply(h3_dump_flatmap)
    
    filter
    not_main_point_df = df[df.apply(lambda x: pd.notnull(x['house_num']), axis=1)].copy()
    
    一行变多行
    [[1,2],[3,4]]
    series_notnull1 = series1.explode()
    
    合并
    series = pd.concat([series_notnull1, series_notnull2, series_notnull3])
    
    一列变多列
    [1,2,3,4]
    df = pd.DataFrame(series.to_list())
    
    isna
    In [176]: s
    Out[176]: 
    0    [[1, 2], [3, 4]]
    1            [[2, 3]]
    2                  []
    dtype: object
    In [177]: s.explode()
    Out[177]: 
    0    [1, 2]
    0    [3, 4]
    1    [2, 3]
    2       NaN
    dtype: object
    In [206]: sp
    Out[206]: 
    0    [1, 2]
    0    [3, 4]
    1    [2, 3]
    2       NaN
    dtype: object
    In [203]: pd.isna(sp)
    Out[203]: 
    0    False
    0    False
    1    False
    2     True
    dtype: bool
    

    geopandas

    df = pd.DataFrame(np.random.randn(50, 3),columns=['X', 'Y', 'Z'])
    geom = [shapely.geometry.Point(xy) for xy in zip(df.X, df.Y)]
    geodf = geopandas.GeoDataFrame(df, geometry=geom)
    print type(gdf)
    geodf.plot()
    plt.show()
    
    from shapely.geometry import Point
    d = {'geometry': [Point(1, 2), Point(2, 1)]}
    gdf = geopandas.GeoDataFrame(d)
    gdf.has_sindex
    #Generate the spatial index
    index = gdf.sindex
    gdf.has_sindex
    
    gdf['distance'] = gdf['geometry'].distance(cent_point)
    

    相关文章

      网友评论

          本文标题:pandas

          本文链接:https://www.haomeiwen.com/subject/wegtaltx.html