Source code for trackc.pl.bigwig

import sys
import warnings
from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Union

import pandas as pd
import pyBigWig
from matplotlib.axes import Axes

from trackc.tl._getRegionsCmat import GenomeRegion

warnings.filterwarnings("ignore")


def _make_multi_region_ax(ax, lineGenomeRegions):
    lineGenomeRegions["len"] = (
        lineGenomeRegions["fetch_end"] - lineGenomeRegions["fetch_start"]
    )
    lineGenomeRegions["ax_ratio"] = (
        lineGenomeRegions["len"] / lineGenomeRegions["len"].sum()
    )
    lineGenomeRegions["ax_x"] = (
        lineGenomeRegions["ax_ratio"].cumsum(axis=0) - lineGenomeRegions["ax_ratio"]
    )
    axs = [
        ax.inset_axes([row["ax_x"], 0, row["ax_ratio"], 1])
        for i, row in lineGenomeRegions.iterrows()
    ]
    for axi in axs:
        axi.axis("off")

    return axs


[docs]def bw_track( bw, ax: Optional[Axes] = None, regions: Union[Sequence[str], str, None] = None, binsize: Optional[int] = 50000, style: Optional[str] = "bar", summary_type: Union[str, None] = "mean", vmin: Optional[float] = None, vmax: Optional[float] = None, primary_col: Union[Sequence[str], None] = "#3271B2", secondary_col: Union[Sequence[str], None] = "#FBD23C", alpha: Optional[float] = 1.0, invert_y: Optional[bool] = False, label: Optional[str] = None, label_rotation: Union[int, None] = 0, label_fontsize: Optional[int] = 9, tick_fontsize: Optional[int] = 7, tick_fl: Optional[str] = "%0.2f", ax_on: bool = False, ): """ Plot bigwig signal track, support for multiple or reverse genome regions. Parameters ---------- bw: `pyBigWig.open` query object, or bigwig file path ax: :class:`matplotlib.axes.Axes` object regions: `str` | `str list` The genome regions to show the signal. e.g. ``"chr6:1000000-2000000"`` or ``["chr6:1000000-2000000", "chr3:5000000-4000000", "chr5"]`` The start can be larger than the end (eg. ``"chr6:2000000-1000000"``), which means the reverse region binsize: `int` binsize divided to computing signal summary statistics style: `str` plot type, default='bar', options=['bar', 'line'] summary_type: `str` Summary type (mean, min, max, coverage, std), default 'mean'. vmin: `float` the minimum range of values used to define the ylim vmax: `float` the maximum range of values used to define the ylim primary_col: `str` the signal bar color secondary_col: `str` the signal bar color for negative values alpha: `float` alpha of plot color invert_y: `bool` whether reverse the y-axis label: `str` the title of the track, will show on the left label_rotation: `int` the label text rotation label_fontsize: `int` the label text fontsize tick_fontsize: `int` values range ticks text fontsize tick_fl: `str` values range ticks retains a few decimal places ax_on: `bool` whether show the spines Example ------- >>> import trackc as tc >>> import pyBigWig >>> H3K27ac = pyBigWig.open('./GSM4604189.bigwig') >>> ten = tc.tenon(figsize=(8,1)) >>> ten.add(pos='bottom', height=1, hspace=0.1) >>> ten.add(pos='bottom', height=1, hspace=0.2) >>> regions = ['chr8:127000000-129200000', 'chr14:96500000-99300000'] >>> tc.pl.bw_track(H3K27ac, ten.axs(0), regions=regions, vmax=20, label='H3K27ac', binsize=10000, primary_col='tab:blue') >>> tc.pl.bw_track(H3K27ac, ten.axs(1), regions=regions, vmax=5, label='H3K27ac', binsize=10000, invert_y=True, ax_on=True) >>> tc.savefig('trackc_bigwig_track.pdf') """ if isinstance(regions, list): line_GenomeRegions = pd.concat( [GenomeRegion(i).GenomeRegion2df() for i in regions] ) else: line_GenomeRegions = GenomeRegion(regions).GenomeRegion2df() if isinstance(bw, str) == True: bw = pyBigWig.open(bw) axs = _make_multi_region_ax(ax, line_GenomeRegions) line_GenomeRegions = line_GenomeRegions.reset_index() if isinstance(primary_col, list) == False: primary_col = [primary_col] if len(primary_col) < line_GenomeRegions.shape[0]: repeat_times = (line_GenomeRegions.shape[0] + len(primary_col) - 1) // len( primary_col ) primary_col = (primary_col * repeat_times)[: line_GenomeRegions.shape[0]] if isinstance(secondary_col, list) == False: secondary_col = [secondary_col] if len(secondary_col) < line_GenomeRegions.shape[0]: repeat_times = (line_GenomeRegions.shape[0] + len(secondary_col) - 1) // len( secondary_col ) secondary_col = (secondary_col * repeat_times)[: line_GenomeRegions.shape[0]] min_y = 0 max_y = 0 plot_bottom_line = True for i, row in line_GenomeRegions.iterrows(): bins = int(row["len"] / binsize) if row["chrom"] not in bw.chroms(): raw_chr = row["chrom"] if row["chrom"].startswith("chr"): row["chrom"] = row["chrom"].lstrip("chr") else: row["chrom"] = "chr" + row["chrom"] if row["chrom"] not in bw.chroms(): print(f"{raw_chr} not in bigwig chroms!") return plot_list = bw.stats( row["chrom"], int(row["fetch_start"]), int(row["fetch_end"]), type=summary_type, nBins=bins, ) plot_list = [0 if v is None else v for v in plot_list] if style == "line": axs[i].plot(range(0, bins), plot_list, color=primary_col[i], alpha=alpha) else: plotvalues = pd.DataFrame({"v": plot_list}, index=range(0, bins)) pos_value = plotvalues.query("v>=0") neg_value = plotvalues.query("v<0") if neg_value.shape[0] > 0: plot_bottom_line = False axs[i].bar( x=pos_value.index, height=pos_value["v"], width=1, # bottom=[0] * (pos_value.shape[0]), bottom=0, color=primary_col[i], align="edge", edgecolor=None, alpha=alpha, ) axs[i].bar( x=neg_value.index, height=neg_value["v"], width=1, # bottom=[0] * (neg_value.shape[0]), bottom=0, color=secondary_col[i], align="edge", edgecolor=None, alpha=alpha, ) right, left = bins, 0 if row["isReverse"] == True: left, right = bins, 0 axs[i].set_xlim(left, right) if min_y > min(plot_list): min_y = min(plot_list) if max_y < max(plot_list): max_y = max(plot_list) if vmin == None: vmin = min_y if vmax == None: vmax = max_y for axi in axs: if invert_y: axi.set_ylim(vmax, vmin) else: axi.set_ylim(vmin, vmax) va = "top" if invert_y: va = "bottom" ax.set_ylim(vmax, vmin) else: ax.set_ylim(vmin, vmax) if plot_bottom_line: ax.text( 0, vmax, " [{0}, {1}]".format(tick_fl % vmin, tick_fl % vmax), verticalalignment=va, fontsize=tick_fontsize, ) ax.set_yticks([]) ax.set_yticklabels("") else: ax.set_yticks([vmin, 0, vmax]) ax.set_yticklabels( [f"{tick_fl % vmin}", "0", f"{tick_fl % vmax}"], fontsize=tick_fontsize ) if invert_y: ax.set_yticks([vmax, 0, vmin]) ax.set_yticklabels( [f"{tick_fl % vmax}", "0", f"{tick_fl % vmin}"], fontsize=tick_fontsize ) ax.set_ylabel( label, fontsize=label_fontsize, rotation=label_rotation, horizontalalignment="right", verticalalignment="center", ) if ax_on == False: spines = ["top", "bottom", "left", "right"] if invert_y == True: if plot_bottom_line == True: del spines[0] else: del spines[2] else: if plot_bottom_line == True: del spines[1] else: del spines[2] for i in spines: ax.spines[i].set_visible(False) ax.set_xticks([]) ax.set_xticklabels("")