Extract/subset bigwig#

Extract/subset bigwig file for a given genomic region

[1]:
import os


def extract_and_save_regions(input_file, output_file, regions):
    # 打开输入bigwig文件
    with pyBigWig.open(input_file) as bw_in:
        # 创建一个新的bigwig文件
        headers = [
            (chrom, bw_in.chroms()[chrom])
            for chrom in set([i.split(":")[0] for i in regions])
        ]
        with pyBigWig.open(output_file, "w") as bw_out:
            # 遍历每个区域
            bw_out.addHeader(headers)
            for region in regions:
                chrom, interval = region.split(":")
                start, end = map(int, interval.split("-"))

                xxx = bw_in.intervals(chrom, start, end)
                starts = [i[0] for i in xxx]
                ends = [i[1] for i in xxx]
                values = [i[2] for i in xxx]
                bw_out.addEntries([chrom] * len(xxx), starts, ends=ends, values=values)
[2]:
import pyBigWig

input_bigwig_file = "./GSE152136_RAW/GSM4604189_1360_CUT_TAG_H3K27ac.MACS2.nodup_x_ctl.pval.signal.bigwig"
output_bigwig_file = "GSM4604189_H3K27ac.bw"

# 区域列表
regions = ["chr8:127000000-129200000", "chr14:96000000-99500000"]

extract_and_save_regions(input_bigwig_file, output_bigwig_file, regions)