pyBigWig处理bigwig

作者: JeremyL | 来源:发表于2022-07-24 23:09 被阅读0次

    pyBigWig是用C编写的调用libBigWig库的python扩展,可以快速访问和处理bigBed和bigWig文件。

    # 依赖

    • libcurl
    • zlib

    # 安装

    pip install pyBigWig
    

    # 调用

    import pyBigWig
    

    # 读取文件

    #本地文件
    bw = pyBigWig.open("test/test.bw")
    #远程文件
    bb = pyBigWig.open("https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed")
    #写入文件
    bw = pyBigWig.open("test/output.bw", "w")
    

    # 查看文件类型

    >>> bw = pyBigWig.open("test/test.bw")
    >>> bw.isBigWig()
    True
    >>> bw.isBigBed()
    False
    

    # 查看染色体和长度

    >>> bw.chroms()
    dict_proxy({'1': 195471971L, '10': 130694993L})
    >>> bw.chroms("1")
    195471971L
    

    # 查看header

    >>> bw.header()
    {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L, 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}
    

    # 查看一段位置的总结

    min,coverage,std

    #输出范围的均值
    >>> bw.stats("1", 0, 3)
    [0.2000000054637591]
    >>> bw.stats("1", 0, 3, type="max")
    [0.30000001192092896]
    >>> bw.stats("1",99, 200, type="max", nBins=2)
    [1.399999976158142, 1.5]
    #未设置起始终止位置,输出整条染色体信息
    >>> bw.stats("1")
    [1.3351851569281683]
    

    # 一段位置内单碱基的值

    使用stats() nBins设置为位置范围内碱基总数也可以实现此功能。

    >>> bw.values("1", 0, 3)
    [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]
    >>> bw.values("1", 0, 4)
    [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan]
    

    # Retrieve all intervals in a range

    >>> bw.intervals("1", 0, 3)
    ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896))
    #返回值组成为起始位置,终止位置,值
    

    # Retrieving bigBed entries

    >>> bb = pyBigWig.open("https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed")
    >>> bb.entries('chr1', 10000000, 10020000)
    [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'), (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'), (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]
    #返回起始位置,终止位置,其他信息组成的string
    
    >>> bb.SQL()
    table RnaElements
    "BED6 + 3 scores for RNA Elements data"
        (
        string chrom;      "Reference sequence chromosome or scaffold"
        uint   chromStart; "Start position in chromosome"
        uint   chromEnd;   "End position in chromosome"
        string name;       "Name of item"
        uint   score;      "Normalized score from 0-1000"
        char[1] strand;    "+ or - or . for unknown"
        float level;       "Expression level such as RPKM or FPKM. Set to -1 for no data."
        float signif;      "Statistical significance such as IDR. Set to -1 for no data."
        uint score2;       "Additional measurement/count e.g. number of reads. Set to 0 for no data."
        )
    

    # Add a header to a bigWig file

    >>> bw.addHeader([("chr1", 1000000), ("chr2", 1500000)])
    >>> bw.addHeader([("chr1", 1000000), ("chr2", 1500000)], maxZooms=0)
    

    #Adding entries to a bigWig file

    >>> bw.addEntries(["chr1", "chr1", "chr1"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])
    >>> bw.addEntries("chr1", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)
    >>> bw.addEntries("chr1", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)
    >>> bw.addEntries(["chr1", "chr1", "chr1"], [100, 0, 125], ends=[120, 5, 126], values=[0.0, 1.0, 200.0], validate=False)
    

    # Close a bigWig or bigBed file

    bw.close()
    

    # pyBigWig支持Numpy数据类型输入

    • 检查是否当前版本pyBigWig是否支持Numpy数据类型输入
    >>> import pyBigWig
    >>> pyBigWig.numpy
    1
    # 1表示支持
    
    >>> import pyBigWig
    >>> import numpy
    >>> bw = pyBigWig.open("/tmp/delete.bw", "w")
    >>> bw.addHeader([("1", 1000)], maxZooms=0)
    >>> chroms = np.array(["1"] * 10)
    >>> starts = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=np.int64)
    >>> ends = np.array([5, 15, 25, 35, 45, 55, 65, 75, 85, 95], dtype=np.int64)
    >>> values0 = np.array(np.random.random_sample(10), dtype=np.float64)
    >>> bw.addEntries(chroms, starts, ends=ends, values=values0)
    >>> bw.close()
    
    • values()直接返回numpy 对象
    >>> bw = bw.open("/tmp/delete.bw")
    >>> bw.values('1', 0, 10, numpy=True)
    [ 0.74336642  0.74336642  0.74336642  0.74336642  0.74336642         nan
         nan         nan         nan         nan]
    >>> type(bw.values('1', 0, 10, numpy=True))
    <type 'numpy.ndarray'>
    

    #原文

    Github pyBigWig

    相关文章

      网友评论

        本文标题:pyBigWig处理bigwig

        本文链接:https://www.haomeiwen.com/subject/avgwirtx.html