Source code for trackc.pl.gene

from typing import Optional, Sequence, Union

import pandas as pd
from matplotlib.axes import Axes

from trackc.pl.bigwig import _make_multi_region_ax
from trackc.tl._getRegionsCmat import GenomeRegion


[docs]def gene_track( ax: Optional[Axes] = None, bed12: Union[pd.DataFrame, str] = None, regions: Union[Sequence[str], str, None] = None, track_type: Union[str, None] = "gene", show_label: Union[bool, Sequence[str], str] = True, pos_strand_gene_color: Union[str, None] = "#3366CC", neg_strand_gene_color: Union[str, None] = "#EECFA1", line: Union[int, None] = 1, gene_fontsize: Union[int, None] = 7, label: Optional[str] = None, label_rotation: Union[int, None] = 0, label_fontsize: Optional[int] = 12, ax_on: bool = False, ): """ Plot gene track, support for multiple or reverse genome regions. Parameters ---------- ax: :class:`matplotlib.axes.Axes` object bed12: `pd.DataFrame` | `str` gene annotation bed12 format `DataFrame` or `filepath` Bed12 files can be converted from GTF using `trackc gtf2bed`. https://trackc.readthedocs.io/en/latest/analysis_guide/genebed12/bed12.html regions: `str` | `str list` genome regions, format: `chrom:start-end`. e.g. ``['chr18:47950000-48280000', 'chr18:75280000-74850000']`` or ``"chr18:45000000-78077248"``. If the start is bigger than end, the genome region will be reversed track_type: `str` you can select one of the options: `gene` or `dendity` gene: gene track style dendity: gene density style. Under development show_label: `bool` | `str` | `str list` If the value is `False`, the gene name will not show If want show one gene, and hide others, just set the gene or gene list as the value, eg: `PIBF1` | `['PIBF1', 'KLF5']` pos_strand_gene_color: `str` positive strand gene name color neg_strand_gene_color: `str` negative strand gene name color line: `int` rows occupied by the genes in the region plotted gene_fontsize: `int` gene label fontsize label: `str` the title of the track, will show on the left label_rotation: `int` the label text rotation label_fontsize: `int` the label text fontsize ax_on: `bool` If True, top, left and right spines will show Returns ------- None Example ------- >>> import trackc as tc >>> regions = ['chr18:47950000-48280000', 'chr18:75280000-74850000'] >>> gene_bed12 = '/path/GRCh38.84.bed12' >>> fig, axs = tc.make_spec(figsize=(7,2), height_ratios=[1]) >>> tc.pl.gene_track(gene_bed12, ax=axs[0], regions=regions, line=12) >>> tc.savefig('trackc_gene_track.pdf') """ if isinstance(regions, list): line_GenomeRegions = pd.concat( [GenomeRegion(i).GenomeRegion2df() for i in regions] ) else: line_GenomeRegions = GenomeRegion(regions).GenomeRegion2df() axs = _make_multi_region_ax(ax, line_GenomeRegions) line_GenomeRegions = line_GenomeRegions.reset_index() ax.set_ylabel( label, fontsize=label_fontsize, rotation=label_rotation, horizontalalignment="right", verticalalignment="center", ) ax.set_xticks([]) ax.set_xticklabels("") ax.set_yticks([]) ax.set_yticklabels("") if ax_on == False: spines = ["top", "bottom", "left", "right"] for i in spines: ax.spines[i].set_color("none") if isinstance(bed12, str): bed12 = pd.read_table(bed12, sep="\t", header=None) bed12 = bed12.iloc[:, 0:12] bed12.columns = [ "chrom", "start", "end", "name", "score", "strand", "thickStart", "thickEnd", "itemRgb", "blockCount", "blockSizes", "blockStarts", ] bed12["blockSizes"] = bed12["blockSizes"].str.rstrip(",") bed12["blockStarts"] = bed12["blockStarts"].str.rstrip(",") bed12["chrom"] = bed12["chrom"].astype(str) chrom_names = bed12["chrom"].unique() for ix, row in line_GenomeRegions.iterrows(): if row["chrom"] not in chrom_names: raw_chr = row["chrom"] if row["chrom"].startswith("chr"): row["chrom"] = row["chrom"].lstrip("chr") else: row["chrom"] = "chr" + row["chrom"] if row["chrom"] not in chrom_names: print(f"{raw_chr} not in bigwig chroms!") return if track_type == "gene": _plot_gene( axs[ix], bed12, row["chrom"], row["fetch_start"], row["fetch_end"], needReverse=row["isReverse"], show_label=show_label, pos_strand_gene_color=pos_strand_gene_color, neg_strand_gene_color=neg_strand_gene_color, line=line, fontsize=gene_fontsize, ax_on=ax_on, ) if track_type == "density": print("This gene type is developping") else: pass
def _plot_gene( ax, gene_bed, chrom, start, end, needReverse=False, show_label=True, pos_strand_gene_color="#3366CC", neg_strand_gene_color="#EECFA1", line=1, fontsize=5, ax_on=False, ): gene_bed = gene_bed[gene_bed["chrom"] == chrom] gene_bed_plot = gene_bed[ ((gene_bed["start"] >= start) & (gene_bed["start"] <= end)) | ((gene_bed["end"] >= start) & (gene_bed["end"] <= end)) ] gene_bed_plot = gene_bed_plot.sort_values(by="end") # print(gene_bed_plot plot_gene_num = gene_bed_plot.shape[0] ii = 0 head_length = (abs(end - start) / (line + 2)) / 5 if line <= 3: head_length = (abs(end - start) / (line * 3)) / 10 for i, row in gene_bed_plot.iterrows(): # col = pos_strand_gene_color text_col = pos_strand_gene_color if row["strand"] == "-": # col = neg_strand_gene_color text_col = neg_strand_gene_color # text_col = col plot_y = ii % line ax.plot( (row["start"], row["end"]), (plot_y + 0.5, plot_y + 0.5), color="k", linewidth=1, solid_capstyle="butt", ) starts = [int(x) for x in row["blockStarts"].split(",")] widths = [int(x) for x in row["blockSizes"].split(",")] ax.bar( x=starts, height=0.4, width=widths, bottom=plot_y + 0.3, edgecolor="k", linewidth=1, align="edge", color="k", ) if row["start"] < start: row["start"] = start if row["end"] > end: row["end"] = end arrow_s = row["end"] dx = 0.3 if row["strand"] == "-": arrow_s = row["start"] dx = -0.1 ax.arrow( arrow_s, plot_y + 0.5, dx, 0, overhang=0.5, width=0, head_width=0.28, head_length=head_length, length_includes_head=False, color=text_col, linewidth=0.5, ) if isinstance(show_label, bool): if show_label == False: ii += 1 continue if isinstance(show_label, str): if row["name"] != show_label: ii += 1 continue if isinstance(show_label, list): if row["name"] not in show_label: ii += 1 continue if (row["name"] in gene_bed_plot.iloc[-4:, :]["name"]) or ( int(line / (ii + 1)) < 2 ): ha = "right" genename = row["name"] + " " xpos = row["start"] if needReverse: ha = "left" genename = " " + row["name"] # xpos = row['end'] ypos = plot_y + 0.5 if line == 1: xpos = row["start"] + abs(row["start"] - row["end"]) / 2 ypos = 0.8 ha = "center" ax.text( xpos, ypos, genename + " ", ha=ha, va="center", color=text_col, fontsize=fontsize, ) else: ha = "left" genename = " " + row["name"] xpos = row["end"] if needReverse: ha = "right" genename = row["name"] + " " # xpos = row['start'] ypos = plot_y + 0.5 if line == 1: xpos = row["start"] + abs(row["start"] - row["end"]) / 2 ypos = 0.8 ha = "center" ax.text( xpos, ypos, genename, ha=ha, va="center", color=text_col, fontsize=fontsize, ) ii += 1 xlim_s = start xlim_e = end if needReverse == True: xlim_s = end xlim_e = start ax.set_xlim(xlim_s, xlim_e) ax.set_ylim(top=0, bottom=line) if plot_gene_num < line: ax.spines["bottom"].set_position(("data", plot_gene_num)) if ax_on == False: spines = ["top", "bottom", "left", "right"] for i in spines: ax.spines[i].set_visible(False) # for i in ['left','top','right']: ax.spines[i].set_color("none") ax.spines[i].set_linewidth(0) ax.spines["bottom"].set_color("black") ax.spines["bottom"].set_linewidth(0.5) ax.tick_params(bottom=True, top=False, left=False, right=False) ax.set_xticklabels("") ax.set_yticklabels("")