Extract/subset bigwig#
Extract/subset bigwig file for a given genomic region
[1]:
import os
def extract_and_save_regions(input_file, output_file, regions):
# 打开输入bigwig文件
with pyBigWig.open(input_file) as bw_in:
# 创建一个新的bigwig文件
headers = [
(chrom, bw_in.chroms()[chrom])
for chrom in set([i.split(":")[0] for i in regions])
]
with pyBigWig.open(output_file, "w") as bw_out:
# 遍历每个区域
bw_out.addHeader(headers)
for region in regions:
chrom, interval = region.split(":")
start, end = map(int, interval.split("-"))
xxx = bw_in.intervals(chrom, start, end)
starts = [i[0] for i in xxx]
ends = [i[1] for i in xxx]
values = [i[2] for i in xxx]
bw_out.addEntries([chrom] * len(xxx), starts, ends=ends, values=values)
[2]:
import pyBigWig
input_bigwig_file = "./GSE152136_RAW/GSM4604189_1360_CUT_TAG_H3K27ac.MACS2.nodup_x_ctl.pval.signal.bigwig"
output_bigwig_file = "GSM4604189_H3K27ac.bw"
# 区域列表
regions = ["chr8:127000000-129200000", "chr14:96000000-99500000"]
extract_and_save_regions(input_bigwig_file, output_bigwig_file, regions)