转换NOAA天气数据文件“ .fly”为Pandas DataFrame
获取数据 ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily
In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import ftplib
% matplotlib notebook
In [2]:
# download data from FTP
def download_file_from_ftp ( FTP_SERVER , FTP_PATH , FILENAME ):
with ftplib . FTP ( FTP_SERVER ) as ftp :
ftp . login ()
ftp . cwd ( FTP_PATH )
with open ( FILENAME , 'wb' ) as f :
ftp . retrbinary ( 'RETR ' + FILENAME , f . write )
查询站ID
In [3]:
def get_station_ID ( station_to_find , filename ):
for line in open ( filename ):
if station_to_find in line :
line_with_station = line
station_ID = re . split ( " " , line_with_station )[ 0 ]
return station_ID
return None
# warning, it is slow, download it only once
download_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily" , "ghcnd-stations.txt" )
station_to_find = "GUANGZHOU" # USE CAPS
station_ID = get_station_ID ( station_to_find , "ghcnd-stations.txt" )
下载天气数据
In [4]:
weather_data_filename = station_ID + '.dly'
# warning, it is slow, download it only once
download_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily/all" , weather_data_filename )
将.fly转换为pandas Dataframe
In [7]:
df = convert_dly_to_dataframe ( weather_data_filename )
df . head ()
Out[7]:
YEARMONTHELEMENTVALUE1VALUE2VALUE3VALUE4VALUE5VALUE6VALUE7...VALUE22VALUE23VALUE24VALUE25VALUE26VALUE27VALUE28VALUE29VALUE30VALUE31
0194511TAVGNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaN107.0NaN
1194512TAVG123.0136.0152.0144.0146.0189.0219.0...179.0146.0128.0107.0104.0112.0122.0127.0129.0156.0
219461TAVG150.0150.0123.0117.0112.0121.0125.0...146.0153.0173.0196.0211.0212.0218.0201.0156.0131.0
319462TAVG114.0112.0147.0181.0195.0192.0149.0...201.0196.0231.0226.0221.0229.0240.0NaNNaNNaN
419463TAVG237.0162.0142.0133.0183.0187.0160.0...183.0192.0205.0216.0223.0238.0207.0195.0233.0228.0
网友评论