import sys
from typing import Optional, Sequence, Union
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import cm
from matplotlib.axes import Axes
from matplotlib.collections import PatchCollection
from matplotlib.colors import Colormap, TwoSlopeNorm
from matplotlib.patches import Polygon
from trackc.pl.bigwig import _make_multi_region_ax
from trackc.pl.links import _plot_loop_arc
from trackc.tl._getRegionsCmat import GenomeRegion
[docs]def bed_track(
ax: Optional[Axes] = None,
bed: Union[pd.DataFrame, str, None] = None,
regions: Union[Sequence[str], str, None] = None,
style: Union[str, None] = "bar",
primary_col: Union[Sequence[str], None] = "#3271B2",
secondary_col: Union[Sequence[str], None] = "#FBD23C",
cmap: Union[Colormap, str, None] = None,
intervals: Union[int, None] = 1,
# show_names: Union[bool, None] = False,
alpha: Union[float, None] = 1,
invert_y: Optional[bool] = False,
label: Union[str, None] = None,
label_fontsize: Union[int, None] = 12,
label_rotation: Union[int, None] = 0,
vmin: Optional[float] = None,
vmax: Optional[float] = None,
tick_fontsize: Optional[int] = 8,
tick_fl: Optional[str] = "%0.2f",
score_label_size: Union[int, None] = 7,
):
"""
Plot bed track, support for multiple or reverse genome regions.
support bed3 and bed5, the fields after the column5 will be ignored,
should be sorted py chromStart if ``style`` is `line`
Parameters
----------
ax: :class:`matplotlib.axes.Axes` object
bed: `pd.DataFrame` | `str`
If ``bed`` if a filepath, the file should have no headers.
Here is bed formats:
column1: chrom
The name of the chromosome (e.g. chr3, chrY, chr2_random) or scaffold (e.g. scaffold10671).
column2: chromStart
The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
column3: chromEnd
The ending position of the feature in the chromosome or scaffold.
column4: name
Defines the name of the BED line. Either "." (=no name), or other string
column5: score
Defines the name of the BED line. if nessasary, can be set as ``.``,
if track_type/style is one of bar/line
column6: strand
Defines the strand. Either "." (=no strand) or "+" or "-".
if the input data have 4 columns, then default input is bedGraph format: [chrom start end score]
regions: `str` | `str list`
The genome regions to plot.
e.g. ``"chr6:1000000-2000000"`` or ``["chr6:1000000-2000000", "chr3:5000000-4000000"]``
The start can be larger than the end (e.g. ``"chr6:2000000-1000000"``),
which means you want to get the reverse region
style: `str`
bed blocks style, opions in ['line', 'bar', 'link', 'tri', 'rec']
primary_col: `str`
the color of line/tri/rec, if color is color list, the block color will set by regions
secondary_col: `str`
the color of line/tri/rec for negative values
cmap: `str` | `matplotlib.colors.Colormap`
the colormap of the plot except style:line
intervals: `int`
if style is one of [tri, rec], the row number distribution for triangle or rectangle blocks
alpha: `float`
alpha of plot color
invert_y: `bool`
whether reverse the y-axis
label: `str`
the title of the track, will show on the left
label_fontsize: `int`
the label text fontsize
label_rotation: `int`
the label text rotation
vmin: `float`
the minimum range of values used to define the ylim
vmax: `float`
the maximum range of values used to define the ylim
tick_fontsize: `int`
values range ticks text fontsize
tick_fl: `str`
values range ticks retains a few decimal places
score_label_size: `int`
score colorbar label fontsize
"""
if isinstance(regions, list):
line_GenomeRegions = pd.concat(
[GenomeRegion(i).GenomeRegion2df() for i in regions]
)
else:
line_GenomeRegions = GenomeRegion(regions).GenomeRegion2df()
axs = _make_multi_region_ax(ax, line_GenomeRegions)
line_GenomeRegions = line_GenomeRegions.reset_index()
if isinstance(primary_col, list) == False:
primary_col = [primary_col]
if len(primary_col) < line_GenomeRegions.shape[0]:
repeat_times = (line_GenomeRegions.shape[0] + len(primary_col) - 1) // len(
primary_col
)
primary_col = (primary_col * repeat_times)[: line_GenomeRegions.shape[0]]
if isinstance(secondary_col, list) == False:
secondary_col = [secondary_col]
if len(secondary_col) < line_GenomeRegions.shape[0]:
repeat_times = (line_GenomeRegions.shape[0] + len(secondary_col) - 1) // len(
secondary_col
)
secondary_col = (secondary_col * repeat_times)[: line_GenomeRegions.shape[0]]
if isinstance(cmap, list) == False:
cmap = [cmap]
if len(cmap) < line_GenomeRegions.shape[0]:
repeat_times = (line_GenomeRegions.shape[0] + len(cmap) - 1) // len(cmap)
cmap = (cmap * repeat_times)[: line_GenomeRegions.shape[0]]
ax.set_ylabel(
label, fontsize=label_fontsize, rotation=label_rotation, ha="right", va="center"
)
spines = ["top", "right", "left", "bottom"]
for i in spines:
ax.spines[i].set_visible(False)
ax.set_xticks([])
ax.set_xticklabels("")
ax.set_yticks([])
ax.set_yticklabels("")
if isinstance(bed, str) == True:
bed = pd.read_table(bed, sep="\t", header=None)
else:
bed = bed.copy()
bed = bed.fillna(0)
score_label = ""
if bed.shape[1] == 3:
bed.columns = ["chrom", "start", "end"]
if bed.shape[1] == 4:
bed.columns = ["chrom", "start", "end", "score"]
if bed["score"].dtypes == "int64":
print("Input data is bedGraph format")
if bed.shape[1] == 5:
score_label = bed.columns[4]
bed.columns = ["chrom", "start", "end", "name", "score"]
if bed.shape[1] == 6:
score_label = bed.columns[4]
bed.columns = ["chrom", "start", "end", "name", "score", "strand"]
bed["chrom"] = bed["chrom"].astype(str)
min_y = None
max_y = None
max_len = 0
chromos = bed["chrom"].unique()
for ix, row in line_GenomeRegions.iterrows():
raw_chr = row["chrom"]
if row["chrom"] not in chromos:
if row["chrom"].startswith("chr"):
row["chrom"] = row["chrom"].lstrip("chr")
else:
row["chrom"] = "chr" + row["chrom"]
if row["chrom"] not in chromos:
print(f"{raw_chr} not in beg chroms!")
sys.exit(0)
bed2plot = bed[
(bed["chrom"] == row["chrom"])
& (bed["end"] >= row["fetch_start"])
& (bed["start"] <= row["fetch_end"])
]
if style in ["line", "bar"] or bed.shape[1] >= 4:
if min_y == None:
min_y = bed2plot["score"].min(skipna=True, numeric_only=True)
else:
if min_y < bed2plot["score"].min(skipna=True, numeric_only=True):
min_y = bed2plot["score"].min(skipna=True, numeric_only=True)
if max_y == None:
max_y = bed2plot["score"].max(skipna=True, numeric_only=True)
else:
if max_y > bed2plot["score"].max(skipna=True, numeric_only=True):
max_y = bed2plot["score"].max(skipna=True, numeric_only=True)
maxlength = (bed2plot["end"] - bed2plot["start"]).max(
skipna=True, numeric_only=True
)
if max_len < maxlength:
max_len = maxlength
if vmin == None:
vmin = min_y
if vmax == None:
vmax = max_y
for ix, row in line_GenomeRegions.iterrows():
raw_chr = row["chrom"]
if row["chrom"] not in chromos:
if row["chrom"].startswith("chr"):
row["chrom"] = row["chrom"].lstrip("chr")
else:
row["chrom"] = "chr" + row["chrom"]
if row["chrom"] not in chromos:
print(f"{raw_chr} not in beg chroms!")
sys.exit(0)
bed2plot = bed[
(bed["chrom"] == row["chrom"])
& (bed["end"] >= row["fetch_start"])
& (bed["start"] <= row["fetch_end"])
].copy()
if bed2plot.shape[0] == 0:
continue
if style == "line":
_plot_bed_bar_l(
axs[ix],
bed2plot,
row["fetch_start"],
row["fetch_end"],
needReverse=row["isReverse"],
style="line",
pri_col=primary_col[ix],
# 5sec_col=secondary_col[ix],
alpha=alpha,
)
if style == "bar":
_plot_bed_bar_l(
axs[ix],
bed2plot,
row["fetch_start"],
row["fetch_end"],
needReverse=row["isReverse"],
style="bar",
pri_col=primary_col[ix],
sec_col=secondary_col[ix],
alpha=alpha,
)
if style == "rec":
_plot_bed_rec(
ax,
axs[ix],
bed2plot,
row["fetch_start"],
row["fetch_end"],
needReverse=row["isReverse"],
color=primary_col[ix],
cname=cmap[ix],
alpha=alpha,
vmin=vmin,
vmax=vmax,
score_label=score_label,
intervals=intervals,
score_label_size=score_label_size,
)
if style == "tri":
_plot_bed_tri(
ax,
axs[ix],
bed2plot,
row["fetch_start"],
row["fetch_end"],
needReverse=row["isReverse"],
color=primary_col[ix],
cname=cmap[ix],
alpha=alpha,
vmin=vmin,
vmax=vmax,
score_label=score_label,
score_label_size=score_label_size,
)
if invert_y:
axs[ix].set_ylim(max_len / 2, 0)
else:
axs[ix].set_ylim(0, max_len / 2)
if style == "link":
_plot_bed_link(
ax=ax,
bed=bed2plot,
start=row["fetch_start"],
end=row["fetch_end"],
needReverse=row["isReverse"],
invert_y=invert_y,
color=primary_col[ix],
cmap=cmap[ix],
alpha=alpha,
)
if style in ["line", "bar"]:
for axi in axs:
if invert_y:
axi.set_ylim(vmax, vmin)
else:
axi.set_ylim(vmin, vmax)
if invert_y:
ax.set_ylim(vmax, vmin)
else:
ax.set_ylim(vmin, vmax)
text_label_y_pos = vmax
if invert_y:
text_label_y_pos = vmin
ax.text(
0,
vmax,
" [{1}, {0}]".format(tick_fl % vmin, tick_fl % vmax),
va="bottom",
fontsize=tick_fontsize,
)
else:
ax.text(
0,
text_label_y_pos,
" [{0}, {1}]".format(tick_fl % vmin, tick_fl % vmax),
va="top",
fontsize=tick_fontsize,
)
def _make_tri_data(start, end):
data = np.array(
[[start, 0], [end, 0], [start + (end - start) / 2, (end - start) / 2]]
)
return data
def _plot_bed_tri(
mainAX,
ax,
bed,
start,
end,
needReverse,
color,
cname,
alpha,
vmin,
vmax,
score_label=None,
score_label_size=8,
):
colors = color
norm = None
if cname != None:
if isinstance(cname, str):
map_vir = cm.get_cmap(cname)
if isinstance(cname, Colormap):
map_vir = cname
# 因为 y 大到一定程度超过临界数值后颜色就会饱和不变(不使用循环colormap)。
norm = plt.Normalize(vmin, vmax)
if vmin < 0 and vmax > 0:
norm = TwoSlopeNorm(vmin=vmin, vcenter=0, vmax=vmax)
# matplotlib.colors.Normalize 对象,可以作为参数传入到绘图方法里
# 也可给其传入数值直接计算归一化的结果
norm_y = norm(bed["score"])
colors = map_vir(norm_y)
patches = []
bed = bed.reset_index()
# print(colors[0])
# print(colors[1])
for i, row in bed.iterrows():
polygon = Polygon(
_make_tri_data(row["start"], row["end"]), closed=True, color=colors
)
patches.append(polygon)
p = PatchCollection(patches, alpha=alpha, match_original=True)
ax.add_collection(p)
xlim_s = start
xlim_e = end
if needReverse == True:
xlim_s = end
xlim_e = start
ax.set_xlim(xlim_s, xlim_e)
if cname != None:
sm = cm.ScalarMappable(norm=norm, cmap=map_vir)
cax = mainAX.inset_axes([1.01, 0, 0.01, 1])
cb = plt.colorbar(sm, ax=mainAX, cax=cax, label=score_label)
cb.set_label(score_label, fontsize=score_label_size)
def _plot_bed_rec(
mainAX,
ax,
bed,
start,
end,
needReverse,
color,
cname,
alpha,
vmin,
vmax,
score_label=None,
intervals=1,
score_label_size=8,
):
colors = color
if cname != None:
if isinstance(cname, str):
map_vir = cm.get_cmap(cname)
if isinstance(cname, Colormap):
map_vir = cname
# 因为 y 大到一定程度超过临界数值后颜色就会饱和不变(不使用循环colormap)。
norm = plt.Normalize(vmin, vmax)
if vmin < 0 and vmax > 0:
norm = TwoSlopeNorm(vmin=vmin, vcenter=0, vmax=vmax)
# matplotlib.colors.Normalize 对象,可以作为参数传入到绘图方法里
# 也可给其传入数值直接计算归一化的结果
norm_y = norm(bed["score"])
colors = map_vir(norm_y)
bed.loc[:, "length"] = abs(bed["end"] - bed["start"])
bottom = list(range(intervals))
# broadcast
if intervals < bed.shape[0]:
repeat_times = (bed.shape[0] + intervals - 1) // intervals
bottom = (bottom * repeat_times)[: bed.shape[0]]
else:
bottom = bottom[: bed.shape[0]]
bed.loc[:, "bottom"] = bottom
ax.bar(
x="start",
width="length",
height=1,
bottom="bottom",
align="edge",
color=colors,
edgecolor=None,
data=bed,
alpha=alpha,
)
xlim_s = start
xlim_e = end
if needReverse == True:
xlim_s = end
xlim_e = start
ax.set_xlim(xlim_s, xlim_e)
if cname != None:
# sm = cm.ScalarMappable(norm=norm, cmap=map_vir)
sm = cm.ScalarMappable(norm=norm, cmap=map_vir)
cax = mainAX.inset_axes([1.01, 0, 0.01, 0.9])
cb = plt.colorbar(sm, ax=mainAX, cax=cax, label=score_label)
cb.set_ticks([vmin, vmax])
cb.set_label(score_label, fontsize=score_label_size)
def _plot_bed_bar_l(
ax,
bed,
start,
end,
needReverse,
style="bar",
pri_col="#3271B2",
sec_col="#FBD23C",
alpha=1,
):
plot_bottom_line = True
if style == "bar":
bed_pos = bed.query("score>=0")
bed_neg = bed.query("score<0")
if bed_neg.shape[0] > 0:
plot_bottom_line = False
ax.bar(
x=bed_pos["start"],
width=bed_pos["end"] - bed_pos["start"],
height=bed_pos["score"],
color=pri_col,
alpha=alpha,
align="edge",
)
ax.bar(
x=bed_neg["start"],
width=bed_neg["end"] - bed_neg["start"],
height=bed_neg["score"],
color=sec_col,
alpha=alpha,
align="edge",
)
if style == "line":
ax.plot(
bed["start"],
bed["score"],
color=pri_col,
alpha=alpha,
solid_capstyle="butt",
)
xlim_s = start
xlim_e = end
if needReverse == True:
xlim_s = end
xlim_e = start
ax.set_xlim(xlim_s, xlim_e)
spines = ["top", "bottom", "left", "right"]
if needReverse == True:
if plot_bottom_line == True:
del spines[0]
else:
if plot_bottom_line == True:
del spines[1]
"""
for i in ["top", "right", "left"]:
ax.spines[i].set_color("none")
ax.spines[i].set_linewidth(0)
if plot_bottom_line:
ax.spines["bottom"].set_color("black")
ax.spines["bottom"].set_linewidth(1)
else:
ax.spines['bottom'].set_color("none")
ax.spines['bottom'].set_linewidth(0)
"""
# ax.tick_params(bottom =True,top=False,left=False,right=False)
# ax.set_xticklabels("")
# ax.set_yticklabels("")
def _plot_bed_link(
ax,
bed,
start,
end,
needReverse=False,
invert_y=False,
color="tab:blue",
cmap="RdBu",
alpha=1,
):
bed = bed[bed.columns[[0, 1, 2]]]
bed.columns = ["chrom", "start", "end"]
bed["length"] = bed["end"] - bed["start"]
max_extend = 0
if bed.shape[0] > 0:
if max_extend < max(bed["length"]):
max_extend = max(bed["length"])
_plot_loop_arc(ax, bed, color, max_extend, invert_y, start, end, "start", "end")
if needReverse == True:
ax.set_xlim(end, start)
else:
ax.set_xlim(start, end)
spines = ["top", "bottom", "left", "right"]
if invert_y == True:
del spines[0]
else:
del spines[1]
for i in spines:
ax.spines[i].set_visible(False)
ax.set_xticks([])
ax.set_xticklabels("")
ax.set_yticks([])
ax.set_yticklabels("")