美文网首页
工作中的学习(日常更新)

工作中的学习(日常更新)

作者: Jasmine晴天和我 | 来源:发表于2020-05-27 15:48 被阅读0次

    列表推导式

    x = [1,2,3,4,5]
    out = []
    for item in x:
        out.append(item**2)
    out
    #一行代码定义列表
    x = [1,2,3,4,5]
    out = [item ** 2 for item in x]
    out
    

    lambda表达式

    
    double = lambda x:x*2
    double(5)
    

    Map

    
    seq = [1,2,3,4,5]
    result = list(filter(lambda x:x>2,seq))
    result
    

    arange

    #np.arange(start,stop,step)
    np.arange(3,7,2)
    

    linspace

    #np.linspace(start,stop,num)
    np.linspace(2.0,3.0,num = 5)
    

    df.apply

    #df.apply
    import pandas as pd 
    df = pd.DataFrame([[4,9],]*3,columns=['A','B'],index = ["age","salary",'name'])
    df 
    
    import numpy as np
    df.apply(np.sqrt)
    
    df.apply(np.sum,axis = 1) #列
    
    df.apply(np.sum,axis = 0) #行
    

    pandas中Timestamp类

    import pandas as pd
    from datetime import datetime as dt
    p1=pd.Timestamp(2017,6,19)
    p2=pd.Timestamp(dt(2017,6,19,hour=9,minute=13,second=45))
    p3=pd.Timestamp("2017-6-19 9:13:45")
    
    p4=pd.to_datetime("2017-6-19 9:13:45")
    p5=pd.to_datetime(dt(2017,6,19,hour=9,minute=13,second=45))
    

    python时间转换

    将python的datetime转换为unix时间戳

    import time
    import datetime
    dtime = datetime.datetime.now()
    ans_time = time.mktime(dtime.timetuple())
    #将unix时间戳转换为python的datetime
    unix_ts = 1439111214.0
    time = datetime.datetime.fromtimestamp(unix_ts)
    # 日期时间字符串
    st = "2017-11-23 16:10:10"
    # 当前日期时间
    dt = datetime.datetime.now()
    # 当前时间戳
    sp = time.time()
    
    # 1.把datetime转成字符串
    def datetime_toString(dt):
        print("1.把datetime转成字符串: ", dt.strftime("%Y-%m-%d %H:%M:%S"))
    
    
    # 2.把字符串转成datetime
    def string_toDatetime(st):
        print("2.把字符串转成datetime: ", datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S"))
    
    
    # 3.把字符串转成时间戳形式
    def string_toTimestamp(st):
        print("3.把字符串转成时间戳形式:", time.mktime(time.strptime(st, "%Y-%m-%d %H:%M:%S")))
    
    
    # 4.把时间戳转成字符串形式
    def timestamp_toString(sp):
        print("4.把时间戳转成字符串形式: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(sp)))
    
    
    # 5.把datetime类型转外时间戳形式
    def datetime_toTimestamp(dt):
        print("5.把datetime类型转外时间戳形式:", time.mktime(dt.timetuple()))
    
    # 1.把datetime转成字符串
    datetime_toString(dt)
    # 2.把字符串转成datetime
    string_toDatetime(st)
    # 3.把字符串转成时间戳形式
    string_toTimestamp(st)
    # 4.把时间戳转成字符串形式
    timestamp_toString(sp)
    # 5.把datetime类型转外时间戳形式
    datetime_toTimestamp(dt)
    

    between_time()

    i = pd.date_range('2018-04-09', periods=4, freq='12H')
    >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
    >>> ts
                         A
    2018-04-09 00:00:00  1
    2018-04-09 12:00:00  2
    2018-04-10 00:00:00  3
    2018-04-10 12:00:00  4
    >>> ts.at_time('12:00')
     i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
    >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
    >>> ts
                         A
    2018-04-09 00:00:00  1
    2018-04-10 00:20:00  2
    2018-04-11 00:40:00  3
    2018-04-12 01:00:00  4
    
    >>> ts.between_time('0:15', '0:45')
                         A
    2018-04-10 00:20:00  2
    2018-04-11 00:40:00  3
    通过设置晚于以下设置,您获得的时间不会介于两次之间 :start_timeend_time
    
    >>> ts.between_time('0:45', '0:15')
                         A
    2018-04-09 00:00:00  1
    2018-04-12 01:00:00  4
    

    2020.5.26 阴天 希望能养成随手记的习惯
    1.dataframe series取最大值或最小值的索引

    df.idxmax(self,axis=0,skipna=True) = df.argmax()
    

    2.dataframe中的object转成需要的类型

    df = df.convert_objects()
    filedf['pub_date2']=pd.to_datetime(filedf['pub_date'],format='%m/%d/%Y %H:%M:%S')
    

    3.sklearn多项式回归

    from sklearn.preprocessing import PolynomialFeatures
    ploy =  PolynomialFeatures(degree=)
    ploy.fit(x)
    x2 = ploy.transform(x)
    
    from sklearn.Linear_model import LinearRegression
    reg   = LinearRegression()
    reg.fit(x2,y)
    y_pred = reg.predict(x2)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pred[np.agrsort(x)]
    

    2020.5.27 多云

    1.break continue

    break结束这个循环

    continue结束本次循环

    2.scipy.spatial

    scipy.spatial.distance_matrix()
    

    3.读取文件夹中的所有文件

    import os
    files = os.listdir(path)
    for file in files:
        if not os.path.isdir(file)#判断是否是文件夹,不是文件夹才能继续
    

    4.group

    group = df.groupby()
    group.count()
    group.size()
    

    5.series.value_counts().to_dict()

    6.Counter

    from collections import Counter
    Counter(data)
    

    2020.5.28 晴天
    1.对一簇经纬度点,进行凸包计算

    from scipy.spatial import ConvexHull
    points = np.random.rand(30, 2)
    hull = ConvexHull(points)
    plt.plot(points[:,0], points[:,1], 'o')
    # hull.vertices 得到凸轮廓坐标的索引值,逆时针画
    hull1=hull.vertices.tolist()#要闭合必须再回到起点[0]
    hull1.append(hull1[0])
    plt.plot(points[hull1,0], points[hull1,1], 'r--^',lw=2)
    for i in range(len(hull1)-1):
        plt.text(points[hull1[i],0], points[hull1[i],1],str(i),fontsize=20)
    

    参考链接:https://blog.csdn.net/qq_23298649/article/details/103869985

    2020.6.3 雨天
    Python DataFrame一列拆成多列以及一行拆成多行
    参考链接:https://www.jb51.net/article/167001.htm

    2020.6.16上海的梅雨季

    #1.一行代码合并两个字典
    {**{'a':1,'b':2},**{'c':3}}
    
    #2.一行代码求多个列表中的最大值
    max(max([ [1,2,3], [5,1], [4] ], key=lambda v: max(v)))
    
    #3.一行代码生成逆序序列
    list(range(10,-1,-1))
    
    #4.一行代码完成数据透视
    pd.pivot_table(df, index=['Manager', 'Rep'], values=['Price'], aggfunc=np.sum)
    
    #5.在函数中设定过滤条件,迭代元素,保留返回值为 True 的元素
    fil = filter(lambda x: x>10,[1,11,2,45,7,6,13])
    list(fil)
    
    #6.格式化输出字符串,format(value, format_spec)实质上是调用了value的format(format_spec)方法。
    print("i am {0},age{1}".format("tom",18))
    
    #7.反向迭代
    reversed([1,4,2,3,1])
    
    #8.返回一个表示由 range(start, stop, step) 所指定索引集的 slice对象
    a = [1,4,2,3,1]
    my_slice = slice(0,5,2)
    a[my_slice]
    
    #9.排序
    a = [{'name':'xiaoming','age':18,'gender':'male'},{'name':': xiaohong','age':20,'gender':'female'}]
    sorted(a,key=lambda x: x['age'],reverse=False)
    
    #10.zip
    x = [3,2,1]
    y = [4,5,6]
    list(zip(y,x))
    
    a = range(5)
    b = list('abcde')
    [str(y) + str(x) for x,y in zip(a,b)]
    
    #10.按条件分组
    def bif_by(lst, f):
      return [ [x for x in lst if f(x)],[x for x in lst if not f(x)]]
    records = [25,89,31,34]
    bif_by(records, lambda x: x<80) # [[25, 31, 34], [89]]
    

    画图

    #Matplotlib中的日期与时间间隔图
    import datetime
    import numpy as np
    import matplotlib.dates as mdates
    import matplotlib.pyplot as plt
    
    # dates for xaxis
    event_date = [datetime.datetime(2008, 12, 3), datetime.datetime(2009, 1, 5), datetime.datetime(2009, 2, 3)]
    
    # base date for yaxis can be anything, since information is in the time
    anydate = datetime.date(2001,1,1)
    
    # event times
    event_start = [datetime.time(20, 12), datetime.time(12, 15), datetime.time(8, 1,)]
    event_finish = [datetime.time(23, 56), datetime.time(16, 5), datetime.time(18, 34)]
    
    # translate times and dates lists into matplotlib date format numpy arrays
    start = np.fromiter((mdates.date2num(datetime.datetime.combine(anydate, event)) for event in event_start), dtype = 'float', count = len(event_start))
    finish = np.fromiter((mdates.date2num(datetime.datetime.combine(anydate, event)) for event in event_finish), dtype = 'float', count = len(event_finish))
    date = mdates.date2num(event_date)
    
    # calculate events durations
    duration = finish - start
    
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    
    # use errorbar to represent event duration
    ax.errorbar(date, start, [np.zeros(len(duration)), duration], linestyle = '')
    # make matplotlib treat both axis as times
    ax.xaxis_date()
    ax.yaxis_date()
    
    plt.show()
    

    SCIPY
    求局部极值(或者求波峰波谷)

    import numpy as np 
    import pylab as pl
    import matplotlib.pyplot as plt
    import scipy.signal as signal
    x=np.array([
        0, 6, 25, 20, 15, 8, 15, 6, 0, 6, 0, -5, -15, -3, 4, 10, 8, 13, 8, 10, 3,
        1, 20, 7, 3, 0 ])
    plt.figure(figsize=(16,4))
    plt.plot(np.arange(len(x)),x)
    print x[signal.argrelextrema(x, np.greater)]
    print signal.argrelextrema(x, np.greater)
    
    plt.plot(signal.argrelextrema(x,np.greater)[0],x[signal.argrelextrema(x, np.greater)],'o')
    plt.plot(signal.argrelextrema(-x,np.greater)[0],x[signal.argrelextrema(-x, np.greater)],'+')
    # plt.plot(peakutils.index(-x),x[peakutils.index(-x)],'*')
    plt.show()
    ————————————————
    版权声明:本文为CSDN博主「weijifen000」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/weijifen000/article/details/80070520/
    
    [https://www.jb51.net/article/180654.htm](https://www.jb51.net/article/180654.htm)
    
    

    带有AM和PM的时间转换

    import datetime#字符串时间转换加变换时区
    def timeutc(st):
        tl=st.split(' ')
        if tl[-1]=='PM':
            hm=tl[-2].split(':')
            h=int(hm[0])+12
            m=hm[1]
            s=hm[2]
        else:
            hm=tl[-2].split(':')
            h=hm[0]
            m=hm[1]
            s=hm[2]
        y=tl[0].split('/')[2]
        mo=tl[0].split('/')[0]
        d=tl[0].split('/')[1]
    #如果是小时是24,因为小时只能从0-23,所以,转换到第二天的0点
        if h==24:
            d=str(int(d)+1)
            h=0
            m = '00'
            s='00'
        old = datetime.datetime(int(y), int(mo), int(d),int(h),int(m),int(s))#转换为datetime格式
        new=old.__format__('%Y-%m-%d %H:%M:%S')#格式化输出
        return new
    
    #但是这种24点没法弄
    from datetime import datetime
    date_string = '2009-11-29 03:17:00 PM'
    format = '%Y-%m-%d %I:%M:%S %p'
    my_date = datetime.strptime(date_string, format) 
    

    https://www.cnblogs.com/fwl8888/p/9635505.html

    滑动回归

    #1.
    from pyfinance.ols import OLS, RollingOLS, PandasRollingOLS
    y = data.usd
    x = data.drop('usd', axis=1)
    window = 12 # months
    model = PandasRollingOLS(y=y, x=x, window=window) 
    print(model.beta.head())
    #2.
    df['slope'] = df.values.rolling(window=125).apply(lambda x: np.polyfit(np.array(range(0,125)), x, 1)[0], raw=True)
    #3.
    [https://blog.csdn.net/weixin_30701575/article/details/97739761](https://blog.csdn.net/weixin_30701575/article/details/97739761)
    #4.滑动滤波
    [https://cloud.tencent.com/developer/article/1451488](https://cloud.tencent.com/developer/article/1451488)
    #5.滑动窗口函数
    [https://www.cnblogs.com/nxf-rabbit75/p/10669516.html](https://www.cnblogs.com/nxf-rabbit75/p/10669516.html)
    #6.LSTM滑动预测
    [https://blog.csdn.net/zhonglongshen/article/details/94555337](https://blog.csdn.net/zhonglongshen/article/details/94555337)
    
    

    时区转换

    import time
    format1="%Y-%m-%d %H:%M:%S+08:00"
    format2="%Y-%m-%d %H:%M:%S"
    t1 = '2020-03-11 17:15:07+00:00'
    t = time.strptime(t1, format1)
    t2 = time.strftime(format2,t)
    print(t2)
    

    python dataframe操作
    https://www.cnblogs.com/luowei93/p/11878639.html

    pandas groupby agg

    num_agg = {'Age':['min', 'mean', 'max']}
    df.groupby('Country').agg(num_agg)
    

    Python计算时间差

    import datetime
    def time_differ(date1='12:55:05',date2='13:15:05'):
        date1=datetime.datetime.strptime(date1,"%H:%M:%S")
        date2=datetime.datetime.strptime(date2,"%H:%M:%S")
        if date1 < date2:        
            return date2-date1
        else:
            return date1-date2
    date1 = '13:05:05'
    date2 = '13:15:05'
    differ = time_differ(date1,date2)
    print(differ)
    

    字符串时间差转换

    """
    '1157 days, 9:46:39'
    '12:00:01.824952'
    '-1 day, 23:59:31.859767'
    """
    import re
    
    def parse(s):
        if 'day' in s:
            m = re.match(r'(?P<days>[-\d]+) day[s]*, (?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)', s)
        else:
            m = re.match(r'(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)', s)
        return {key: float(val) for key, val in m.groupdict().iteritems()}
    
    from datetime import timedelta
    
    s1 = '1157 days, 9:46:39'
    s2 = '12:00:01.824952'
    s3 = '-1 day, 23:59:31.859767'
    t1 = parse(s1)
    t2 = parse(s2)
    t3 = parse(s3)
    
    timedelta(**t1) # datetime.timedelta(1157, 35199)
    timedelta(**t2) # datetime.timedelta(0, 43201, 824952)
    timedelta(**t3) # datetime.timedelta(-1, 86371, 859767)
    ##或者
    import datetime
    str="15:00:00"
    strtime=str.split(":")
    strtimedelta=datetime.timedelta(hours=int(strtime[0]),minutes=int(strtime[1]),seconds=int(strtime[2]))
    

    SQL中 OVER(PARTITION BY) 取上一条,下一条等

    https://www.cnblogs.com/zhqian/p/9140313.html

    #计算某个字段在当前记录和下一条记录之间的差
    select name, 
            hiredate, 
            next_hiredate,
            next_hiredate - hiredate as diff
    from (
        select name, 
               hiredate,
               lead(hiredate)over(order by hiredate) next_hiredate
        from dataset
    )
    

    hausdorff距离##

    from shapely.geometry import Polygon
    point = Point(1, 1)
     line = LineString([(2, 0), (2, 4), (3, 4)])
    point.hausdorff_distance(line)
    point.distance(Point(3, 4))
    

    判断点是否在多边形内
    http://gnss.help/2019/06/09/check-point-in-polygon/index.html

    from shapely.geometry import Point
    from shapely.geometry import Polygon
    polygon2 = Polygon([(lon, lat) for lon, lat in points])
    point = Point(122.35, 29.70)
    polygon2.contains(point)
    

    线段相交

    from shapely.geometry import LineString
    coords = [(0, 0), (1, 1)]
    LineString(coords).crosses(LineString([(0, 1), (1, 0)]))
    

    pywt
    https://www.cnblogs.com/junge-mike/p/12761502.html

    https://mne.tools/stable/auto_examples/preprocessing/plot_xdawn_denoising.html#sphx-glr-auto-examples-preprocessing-plot-xdawn-denoising-py

    https://medium.com/impulse-neiry/simple-p300-classifier-on-open-data-27e906f68b83

    http://europepmc.org/article/PMC/5698603

    滤波
    https://www.cnblogs.com/iwuqing/p/11380131.html

    https://www.cnblogs.com/sunlightheart/p/12574848.html

    https://www.cnblogs.com/sunlightheart/p/12542842.html

    https://blog.csdn.net/shanwenkang/article/details/84345178

    字符串的时间做差

    #1.
    pd.DataFrame(pd.to_datetime(time_df['END_TIME']) - pd.to_datetime(time_df['START_TIME']))
    #2.
    def time_delta(a,b):
        return (datetime.strptime(a, "%Y-%m-%d %H:%M:%S")-datetime.strptime(b, "%Y-%m-%d %H:%M:%S")).total_seconds()/3600
    
    df['delta'] = df.apply(lambda x: time_delta(x['end_postime'],x['start_postime']),axis=1)
    

    matplotlib画图

    import numpy as np
    import matplotlib.pyplot as plt
    y = np.arange(1,10,1)
    x = np.arange(1,10,1)
    bwith = 2 #边框宽度设置为2
    ax = plt.gca()#获取边框
    plt.tick_params(axis='both',colors='gold') #设置坐标刻度和字体颜色
    ax.spines['top'].set_color('red')  # 设置上边框为红色
    ax.spines['right'].set_color('none')  # 设置上边框为无色
    ax.spines['bottom'].set_linewidth(bwith)
    ax.spines['left'].set_linewidth(bwith)
    ax.spines['top'].set_linewidth(bwith)
    ax.spines['right'].set_linewidth(bwith)
    plt.grid( color = 'black',linestyle='-.',linewidth = 1)
    plt.plot(x,y)
    

    https://blog.csdn.net/wuzlun/article/details/80059222

    https://www.cnblogs.com/zhizhan/p/5615947.html

    pyplot画多个图

    import plotly.graph_objects as go
    trace0 = go.Scatter(x=weather.index, y=weather.speed, mode='lines+markers',marker=dict(opacity=0.4), name='speed')
    trace1 = go.Scatter(x=weather.index, y=weather.wind, mode='lines+markers', marker=dict(opacity=0.5),name='wind',yaxis="y2")
    data1 = [trace0, trace1]
    
    # go.Layout可以创建图层对象,实现双坐标
    layout = go.Layout(title="双坐标示例图",
                       yaxis=dict(title="wind"),
                       yaxis2=dict(title="speed", overlaying='y', side="right"),legend=dict(x=0, y=1, font=dict(size=10, color="black"),orientation='h'),activeshape=dict(opacity=1))
    fig = go.Figure(data=data1, layout=layout)
    fig.show()
    

    plotly画多个子图

    from plotly import subplots
    
    # 设定布局,以便进行绘图,这儿是两行一列
    fig = subplots.make_subplots(rows=2,cols=1)
    trace0 = go.Scatter(x=weather.index, y=weather.speed, mode='lines+markers',marker=dict(opacity=0.4), name='speed')
    trace1 = go.Scatter(x=weather.index, y=weather.wind, mode='lines+markers', marker=dict(opacity=0.5),name='wind',yaxis="y2")
    fig.append_trace(trace1,1,1)
    fig.append_trace(trace0,2,1)
    
    # 设定每个子图的占位情况
    fig.layout.yaxis1.domain = [0.35,1.0]
    fig.layout.yaxis2.domain = [0,0.3]
    
    # 设定整个fig的大小
    fig.layout.width = 800
    fig.layout.height = 600
    fig.show()
    

    python读取sql

    conn1 = psycopg2.connect(host="c",user="datareader",password="",port=,database="")
    route1 = pd.read_sql(sql,con=conn1,parse_dates={'postime':{'origin':'unix'}})
    

    python re正则表达式

    re.findall("\d+", str) #找到字符串中的数字
    re.findall("\D+", str) #找到字符串中的非数字
    re.sub('\d+','',str) #替换字符串中的数字为空
    

    两直线的夹角
    https://blog.csdn.net/jizhidexiaoming/article/details/100009138

    判断3个点是否为直线
    https://blog.csdn.net/Changxing_J/article/details/107102182

    hough变换检测直线(python)
    https://blog.csdn.net/wss794/article/details/93023013

    几个地理packages
    https://github.com/pbrod/nvector/
    https://fiona.readthedocs.io/en/latest/
    pySAL

    Calculating coordinates given a bearing and a distance

    python绘制QQ图

    import scipy.stats as st
    import matplotlib.pyplot as plt
    import numpy as np
    
    n = 100
    samples = st.norm.rvs(loc = 5, scale = 2, size = n)
    
    samples_sort = sorted(samples)
    
    x_labels_p = np.arange(1/(2*n), 1, 1/n)
    y_labels_p = st.norm.cdf(samples_sort, loc = 5, scale = 2)
    
    plt.scatter(x_labels_p, y_labels_p)
    plt.title('PP plot for normal distribution samle')
    plt.show()
    
    
    x_labels_q = samples_sort
    y_labels_q = st.norm.ppf(x_labels_p, loc = 5, scale = 2)
    
    plt.scatter(x_labels_q, y_labels_q)
    plt.title('QQ plot for normal distribution samle')
    plt.show()
    
    import statsmodels.api as sm
    probplot = sm.ProbPlot(samples, dist = st.norm, loc = 5, scale = 2)
    probplot.qqplot(line='45')
    
    from scipy import stats
    import numpy as np
    x = np.arange(-5, 5, 0.1)
    y = stats.norm.cdf(x, 0, 1)
    plt.plot(x, y)
    
    import pandas as pd
    churn_raw_data = pd.read_csv('churn.txt')
    day_minute = churn_raw_data['Day Mins']
    sorted_ = np.sort(day_minute)
    yvals = np.arange(len(sorted_))/float(len(sorted_))
    plt.plot(sorted_, yvals)
    
    x_label = stats.norm.ppf(yvals)  #对目标累计分布函数值求标准正太分布累计分布函数的逆
    plt.scatter(x_label, sorted_)
    
    stats.probplot(day_minute, dist="norm", plot=plt)
    plt.show()
    

    ks检验

    from scipy.stats import shapiro, kstest
    k1, p1 = kstest(arr_same, 'norm')
    

    验证是否符合泊松分布

    data <- rpois(n = 100, 20)
    mean <- mean(data)
    poisson.test(sum(data), length(data), mean)
    #P值越大说明数据符合度越好
    

    嵌套列表展开

    list_1 = [[1, 2], [3, 4, 5], [6, 7], [8], [9]]
    list_2 = sum(list_1, [])
    print(list_2)
    

    找出值大于某个数的键

    {k:v for k, v in test_dict.items() if v>=3}
    

    嵌套列表计数

    from collections import Counter
    from itertools import chain
    Counter(chain.from_iterable(test))
    

    相关文章

      网友评论

          本文标题:工作中的学习(日常更新)

          本文链接:https://www.haomeiwen.com/subject/zivzjctx.html