筛选第二级index然后求平均
import pandas as pd
import numpy as np
df = {'population': [100, 200, 300, 400, 500, 600, 700, 800]}
arrays = [['NJ', 'NJ', 'NY', 'NY', 'CA', 'CA', 'NV', 'NV'],
['A', 'B', None, 'D', 'E', 'F', None, 'G']]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=['state', 'county'])
df = pd.DataFrame(df, index=index)
df=df.drop(['D','G',np.NaN], level='county')
print(df)
population
state county
NJ A 100
B 200
CA E 500
F 600
dm=df['population'].groupby(level=[0]).apply(lambda x: x.mean())
#或者df=df.iloc[df.index.isin(['A', 'B', 'D'], level=1)]
print(dm)
state
CA 550.0
NJ 150.0
Name: population, dtype: float64
reference
网友评论