Source code for sc_toolbox.plot

from enum import Enum
from typing import Dict, List, Sequence, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sb
from adjustText import adjust_text
from matplotlib import colors
from rich import print


[docs]class Colormaps(Enum): """Useful Colormaps for e.g. UMAPs.""" grey_red = colors.LinearSegmentedColormap.from_list("grouping", ["lightgray", "red", "darkred"], N=128) grey_green = colors.LinearSegmentedColormap.from_list("grouping", ["lightgray", "limegreen", "forestgreen"], N=128) grey_yellow = colors.LinearSegmentedColormap.from_list("grouping", ["lightgray", "yellow", "gold"], N=128) grey_violet = colors.LinearSegmentedColormap.from_list( "grouping", ["lightgray", "mediumvioletred", "indigo"], N=128 ) grey_blue = colors.LinearSegmentedColormap.from_list("grouping", ["lightgray", "cornflowerblue", "darkblue"], N=128)
[docs]def custom_plot_size(width: int, height: int, dpi: int): """Create a custom axis object of desired sizes. Args: width: Desired plot width height: Desired plot height dpi: Desired plot DPI. Returns: Axis of desired sizes """ fig, ax = plt.subplots(figsize=(width, height), dpi=dpi) return fig.gca()
[docs]def standard_lineplot( data, order: List, xlabel: str, ylabel: str, hue=None, gene=None, smooth: bool = None, palette=None, title=None, rotation: int = None, figsize: Tuple[int, int] = (15, 5), tick_size=None, label_size=None, order_smooth: int = 3, confidence_interval=None, scatter=None, save: str = None, ): """Draws a standard line plot based on Seaborn's lmplot. Args: data: Data frame containing averaged expression values order: Order of x-axis labels from left to right xlabel: x-axis label ylabel: y-axis label hue: Subsets of the data which will be drawn on separate facets in the grid. Example: "condition" gene: Gene of interest smooth: Whether to smoothen (interpolate) the curve palette: Color palette. For example a list of colors. title: Title of the plot rotation: Rotation of the x-axis labels figsize: Size of the figure as specified in matplotlib tick_size: Size of the ticks as specified in matplotlib label_size: Size of the labels as specified in matplotlib order_smooth: If greater than 1, numpy.polyfit is used to estimate a polynomial regression confidence_interval: Confidence interval scatter: Set to true in order to add mean expression per sample in form of scatter point save: Path to save the plot to """ if smooth: # Possible to set alpha of scatter with scatter_kws={'alpha': 0.1} if hue: cat = sb.lmplot( data=data, x=xlabel, y=gene, ci=confidence_interval, order=order_smooth, scatter=scatter, hue=hue, truncate=True, palette=palette, ) else: cat = sb.lmplot( data=data, x=xlabel, y=gene, ci=confidence_interval, order=order_smooth, scatter=scatter, palette=palette, ) else: # Removed Parameter order = order, as order should be given numerically anyways. if hue: cat = sb.catplot(data=data, x=xlabel, y=gene, linestyles="-", kind="point", hue=hue, palette=palette) else: cat = sb.catplot(data=data, x=xlabel, y=gene, linestyles="-", kind="point", palette=palette) if scatter: cat2 = sb.stripplot(data=data, x=xlabel, y=gene, palette=palette, hue=hue, size=7) if hue: cat2.legend_.remove() cat.set(xticks=np.unique(data.loc[:, xlabel])) cat.set_xticklabels(order) cat.fig.set_size_inches(figsize) if rotation: cat.ax.set_xticklabels(order, rotation="vertical") cat.ax.set_title(title, size=label_size) cat.ax.set_xlabel(xlabel, size=label_size) cat.ax.set_ylabel(ylabel, size=label_size) cat.ax.tick_params(labelsize=tick_size) if save: full_save_name = f"{gene}_{save}" cat.fig.savefig(f"{full_save_name}", bbox_inches="tight") print(f"[bold blue]Saving figure to {full_save_name}") plt.show() plt.close()
[docs]def average_expression( gene_expression, genes, order: List[str], id_label: str = "identifier", xlabel: str = "days", cluster: str = "all", hue=None, palette: str = "tab:blue", figsize: Tuple[int, int] = (15, 6), smooth=None, rotation: int = None, order_smooth=None, conf_int=None, scatter=None, save: str = None, ): """Draw a line plot showing the gene expression over time. Expression values are averaged by individual sample. Args: gene_expression: Data frame containing gene expression values genes: List of genes for which individual line plots will be generated order: Order of x-axis labels from left to right id_label: Adata column in which sample id information is stored xlabel: x-axis label cluster: Which clusters to plot. Select 'all" if all clusters should be drawn. hue: Which value to color by figsize: Size of the figure as specified in matplotlib smooth: Set to true for smoothened line plot using polynomial regression rotation: set to True to rotate x-axis labels 90 degrees order_smooth: If greater than 1, use numpy.polyfit to estimate a polynomial regression conf_int: Size of the confidence interval for the regression estimate scatter: Set to True to add average expression values per sample ID as dots save: Path to save the plot to Example smooth: .. image:: /_images/average_expression_smooth.png Example raw: .. image:: /_images/average_expression_raw.png """ for gene in genes: meanpid = gene_expression.groupby([id_label, xlabel])[gene].mean().reset_index() # cluster_label = ", ".join(cluster) cluster_label = ", ".join(cluster) if isinstance(cluster, list) else cluster standard_lineplot( meanpid, order=order, xlabel=xlabel, ylabel=f"Average expression in cluster {cluster_label}", hue=hue, gene=gene, smooth=smooth, palette=palette, title=gene, rotation=rotation, figsize=figsize, save=save, order_smooth=order_smooth, confidence_interval=conf_int, scatter=scatter, )
[docs]def average_expression_per_cluster( gene_expression, genes, order, obs=None, id_label: str = "identifier", xlabel: str = "days", cluster: str = "all", hue=None, figsize: Tuple[int, int] = (15, 6), smooth=None, rotation=None, tick_size: int = 12, label_size: int = 15, order_smooth=None, conf_int=None, palette=None, scatter=None, save: str = None, ): """Plots gene expression over time split by cluster identity. One line per cluster. Args: gene_expression: Data frame containing gene expression values genes: List of genes for which individual line plots will be generated order: Order of x-axis labels from left to right obs: Data frame containing meta data information xlabel: x-axis label cluster: Which clusters to plot. Select 'all" if all clusters should be drawn. id_label: Meta data column in which sample id information is stored hue: Split expression values by this grouping, one line per category will be drawn figsize: Size of the figure as specified in matplotlib smooth: Set to True for smoothened line plot using polynomial regression rotation: Set to True to rotate x-axis labels 90 degrees tick_size: Size of the ticks as specified in matplotlib label_size: Size of the labels as specified in matplotlib order_smooth: If greater than 1, use numpy.polyfit to estimate a polynomial regression conf_int: Size of the confidence interval for the regression estimate palette: Color palette that gets passed to Seaborn's lineplot. For example a list of colors. scatter: Set to True to add average expression values per sample ID as dots save: Path to save the plot to """ for gene in genes: meanpid = gene_expression.groupby([id_label, xlabel])[gene].mean().reset_index() if hue: cell_types = {} combis = obs.groupby([id_label, hue]).groups.keys() for c in combis: cell_types[c[0]] = c[1] meanpid[hue] = [cell_types[label] for label in meanpid.identifier] # cluster_label = ", ".join(cluster) cluster_label = ", ".join(cluster) if isinstance(cluster, list) else cluster standard_lineplot( meanpid, order=order, xlabel=xlabel, ylabel=f"Average expression in cluster {cluster_label}", hue=hue, gene=gene, smooth=smooth, palette=palette, title=gene, tick_size=tick_size, label_size=label_size, rotation=rotation, figsize=figsize, save=save, order_smooth=order_smooth, confidence_interval=conf_int, scatter=scatter, )
[docs]def average_expression_split_cluster( gene_expression, genes, order, id_label="identifier", xlabel="days", hue="genotype", cluster=None, figsize=(15, 6), smooth=None, rotation=None, cols=None, tick_size=12, label_size=15, order_smooth=None, conf_int=None, scatter=None, save=None, ): """ Plot average gene expression as line plots for multiple clusters at once. Args: gene_expression: Data frame containing gene expression values genes: List of genes for which individual line plots will be generated order: Order of x-axis labels from left to right id_label: Meta data column in which sample id information is stored xlabel: x-axis label hue: Split expression values by this grouping, one line per category, will be drawn cluster: Which clusters to plot. Select 'all" if all clusters should be drawn. figsize: Size of the figure as specified in matplotlib smooth: Set to True for smoothened line plot using polynomial regression rotation: x-axis label rotation cols: List of colors to use for line plot tick_size: Size of the ticks as specified in matplotlib label_size: Size of the labels as specified in matplotlib order_smooth: If greater than 1, numpy.polyfit is used to estimate a polynomial regression conf_int: Size of the confidence interval for the regression estimate scatter: Set to True to add average expression values per sample ID as dots save: Path to save the plot to Example smooth: .. image:: /_images/average_expression_per_cluster_smooth.png Example raw: .. image:: /_images/average_expression_per_cluster_raw.png """ if cluster: if isinstance(cluster, list): ylab = f"Average expression in {', '.join(cluster)}" else: ylab = f"Average expression in {cluster}" else: ylab = "Average expression" for gene in genes: meanpid = gene_expression.groupby([id_label, hue, xlabel])[gene].mean().reset_index() standard_lineplot( meanpid, order=order, xlabel=xlabel, ylabel=ylab, hue=hue, gene=gene, smooth=smooth, palette=cols, title=gene, tick_size=tick_size, label_size=label_size, rotation=rotation, figsize=figsize, save=save, order_smooth=order_smooth, confidence_interval=conf_int, scatter=scatter, )
[docs]def average_expression_per_cell( gene_expression, genes, order, xlabel: str = "days", cluster: str = "all", hue=None, figsize: Tuple[int, int] = (15, 6), smooth=None, rotation=None, tick_size=12, label_size=15, order_smooth=None, conf_int=None, scatter=None, cols=None, save: str = None, ): """ Plots the average gene expression as a line plot per cell. Ideally used when the scatter point should not be sample wise, but cell wise. Args: gene_expression: Data frame containing gene expression values genes: List of genes for which individual line plots will be generated order: Order of x-axis labels from left to right xlabel: x-axis label cluster: Which clusters to plot. Select 'all" if all clusters should be drawn. hue: Split expression values by this grouping, one line per category, will be drawn figsize: Size of the figure as specified in matplotlib smooth: Set to true for smoothened line plot using polynomial regression rotation: Set to True to rotate x-axis labels 90 degrees tick_size: Size of the ticks as specified in matplotlib label_size: Size of the labels as specified in matplotlib order_smooth: If greater than 1, use numpy.polyfit to estimate a polynomial regression conf_int: Size of the confidence interval for the regression estimate scatter: Set to True to add average expression values per sample ID as dots cols: List of colors to use for line plot save: Path to save the plot to """ for gene in genes: cluster_label = ", ".join(cluster) if isinstance(cluster, list) else cluster standard_lineplot( gene_expression, order=order, xlabel=xlabel, ylabel=f"Average expression in cluster {cluster_label}", hue=hue, gene=gene, smooth=smooth, palette=cols, title=gene, tick_size=tick_size, label_size=label_size, rotation=rotation, figsize=figsize, save=save, order_smooth=order_smooth, confidence_interval=conf_int, scatter=scatter, )
[docs]def gene_expression_dpt_ordered( data, genes, xlabel, order=3, conf_int=95, figsize: Tuple[int, int] = (12, 6), condition=None, label_size: int = 15, cols=None, scale=None, ylim=None, save: str = None, ): """ Plot smoothed expression of all cells ordered by pseudo time. Args: data: AnnData object genes: List of genes for which individual line plots will be generated xlabel: x-axis label order: Order of x-axis labels from left to right conf_int: Size of the confidence interval for the regression estimate figsize: Size of the figure as specified in matplotlib condition: Split expression values by this grouping, one line per category will be drawn label_size: Size of the labels as specified in matplotlib cols: List of colors to use for line plot scale: Set to True to scale expression value to a range between 0 and 1 ylim: Upper limit on the y-axis if desired save: Path to save the plot to Example: .. image:: /_images/gene_expression_dpt_ordered.png Example with columns: .. image:: /_images/gene_expression_dpt_ordered_col.png """ import matplotlib.patches as mpatches patches = [] data = data.copy() fig, ax = plt.subplots(figsize=figsize) # use rainbow colour palette if no colours are specified if cols is None: from matplotlib import colors bins = len(np.unique(data.loc[:, condition])) if condition else len(genes) cmap = plt.cm.rainbow cmaplist = [cmap(i) for i in range(cmap.N)] cmap = colors.LinearSegmentedColormap.from_list("colours", cmaplist, N=bins) cols = [cmap(i) for i in range(bins)] # only working for one gene at a time for now if condition: conditions = np.unique(data.loc[:, condition]) gene = genes[0] data = pd.pivot(data, columns=[condition]) columns = [ f"{data.columns.get_level_values(0)[i]}_{data.columns.get_level_values(1)[i]}" for i in range(len(data.columns.values)) ] data.columns = columns data[xlabel] = data.filter(like=xlabel).sum(axis=1).values for i, con in enumerate(conditions): col = f"{gene}_{con}" if scale: data[col] = np.interp(data[col], (data[col].min(), data[col].max()), (0, +1)) cat = sb.regplot( data=data, x=xlabel, y=col, scatter=False, order=order, truncate=True, ax=ax, color=cols[i], ci=conf_int ) patches.append(mpatches.Patch(color=cols[i], label=col)) else: for i, gene in enumerate(genes): if scale: data[gene] = np.interp(data[gene], (data[gene].min(), data[gene].max()), (0, +1)) cat = sb.regplot( data=data, x=xlabel, y=gene, scatter=False, order=order, truncate=True, ax=ax, color=cols[i], ci=conf_int, ) patches.append(mpatches.Patch(color=cols[i], label=gene)) cat.set_ylabel("expression", size=label_size) cat.set_xlabel(xlabel, size=label_size) cat.tick_params(labelsize=label_size) sb.despine() plt.legend(handles=patches, loc="center left", bbox_to_anchor=(1.02, 0.5), prop={"size": label_size}, frameon=False) if ylim: cat.set(ylim=ylim) if save: plt.savefig(f"{save}", bbox_to_anchor="tight") print("[bold blue]Saving figure to {save}") plt.show() plt.close()
[docs]def relative_frequencies_boxplots( relative_frequencies: pd.DataFrame, cluster, cols, order, xlabel: str = "days", hue: str = "batch", figsize: Tuple[int, int] = (15, 6), width: float = 0.5, jitter=None, save=None, ) -> None: """Plots the relative frequencies as split boxplots. Use calc_relative_frequencies to get the required input format. Args: relative_frequencies: Calculated by calc_relative_frequencies as Pandas DataFrame cluster: Cluster to be plotted cols: List of colors to use for boxes order: Order of x-axis labels from left to right xlabel: x-axis label hue: Value to color by figsize: Size of the figure as specified in matplotlib width: Width of the plot as specified in matplotlib jitter: Set to True for individual dots per sample save: Path to save the plot to Example: .. image:: /_images/relative_frequencies_boxplots.png """ # Subset according to order relative_frequencies = relative_frequencies.loc[relative_frequencies[xlabel].isin(order)] split_boxplot( relative_frequencies, order=order, xlabel=xlabel, ylabel="relative frequency", hue=hue, column=cluster, cols=cols, width=width, title=cluster, figsize=figsize, jitter=jitter, save=save, )
[docs]def split_boxplot( table, order, xlabel: str, ylabel: str, column=None, hue=None, cols=None, width: float = 1, title=None, figsize: Tuple[int, int] = (15, 6), jitter=None, save: str = None, ) -> None: """Draws a boxsplit split by hue. Args: table: Table containing the data to draw the boxplots for order: Order of the boxplot labels xlabel: x-axis label ylabel: y-axis label column: hue: Value to split relative frequencies by cols: List of colors to use for boxes width: Width of the desired plot title: Title of the plot figsize: Size of the figure as specified in matplotlib jitter: Set to True for individual dots per sample save: Path to save the plot to """ fig, ax = plt.subplots() fig.set_size_inches(figsize) if cols is not None: fig = sb.boxplot(data=table, hue=hue, x=xlabel, y=column, order=order, width=width, palette=cols) else: fig = sb.boxplot(data=table, hue=hue, x=xlabel, y=column, order=order, width=width) if jitter is not None: fig = sb.swarmplot(data=table, color="black", x=xlabel, y=column, order=order) if hue is not None: plt.legend(loc="upper right") if title: fig.set_title(title, size=15) fig.set_xlabel(xlabel, size=15) fig.set_ylabel(ylabel, size=15) fig.tick_params(labelsize=12) if save: fig.get_figure().savefig("{save}") plt.show() plt.close()
[docs]def marker_dendrogram( marker_table: pd.DataFrame, threshold: float = 0.7, column: str = "cluster", log_fc_key: str = "log_FC", label_size: int = 10, orientation: str = "top", figsize: Tuple[int, int] = (10, 4), save: str = None, ): """Plots a dendogram of used marker genes. Args: marker_table: A marker table as generated by sct.calc.extended_marker_table threshold: Threshold for the log fold change column: Column to create pivot by; usually just the clusters log_fc_key: Key for the stored log fold changes in the marker table label_size: Font size of the labels orientation: Orientation of the figure; Currently just 'top' or no orientation figsize: Size of the figure as specified in matplotlib save: Path to save the plot to Example: .. image:: /_images/marker_dendrogram.png """ import scipy.cluster.hierarchy as hc marker_table = marker_table[marker_table[log_fc_key] > threshold] marker_table = marker_table.pivot(index="gene", columns=column, values=log_fc_key) marker_table.fillna(value=0, inplace=True) corr = 1 - marker_table.corr() corr = hc.distance.squareform(corr) # convert to condensed z = hc.linkage(corr, method="complete") plt.figure(figsize=figsize) rot = 90 if orientation == "top" else 0 hc.dendrogram( z, labels=marker_table.columns, leaf_rotation=rot, color_threshold=0, orientation=orientation, leaf_font_size=label_size, above_threshold_color="black", ) plt.yticks(size=label_size) if save is None: plt.show() else: plt.savefig("{save}") print(f"[bold blue]Saving figure to {save}") plt.close()
[docs]def volcano_plot( table, fdr_thresh: float = None, log_fc_thresh: float = 0, adj_p_val: str = "adj_p_val", log_fc: str = "avg_logFC", gene: str = "gene", sig_col: str = "tab:orange", col: str = "tab:blue", figsize: Tuple[int, int] = (8, 6), save=None, ): """ Scatter plot of differential gene expression results generated by diffxpy Args: table: diffxpy generated table of results fdr_thresh: -log(FDR) threshold for labeling genes. If set to None, we will consider the 99th percentile of -log(FDR) values the threshold. log_fc_thresh: absolute(log_fc) threshold for labeling genes. adj_p_val: Label of the adjusted p value, these are considered FDRs log_fc: Label of the log fold change gene: Label of column with gene names col: Color of dots sig_col: Colour of dots surpassing defined FDR threshold figsize: Size of the figure as specified in matplotlib save: Path to save the plot to Example: .. image:: /_images/diffxpy_volcano.png """ table["-log_FDR"] = -np.log(table[adj_p_val]) # take the 99% quantile by default for highlighting if not fdr_thresh: fdr_thresh = np.percentile(table.loc[:, "-log_FDR"], 99) if not log_fc_thresh: log_fc_thresh = 0 lowqval_highfc_de = table.loc[(table["-log_FDR"] > fdr_thresh) & (abs(table[log_fc]) >= log_fc_thresh)] other_de = table.loc[~table.index.isin(lowqval_highfc_de.index)] fig, ax = plt.subplots() fig.set_size_inches(figsize) sb.regplot(x=other_de[log_fc], y=other_de["-log_FDR"], fit_reg=False, scatter_kws={"s": 6}) sb.regplot(x=lowqval_highfc_de[log_fc], y=lowqval_highfc_de["-log_FDR"], fit_reg=False, scatter_kws={"s": 6}) ax.set_xlabel("log2 FC", fontsize=20) ax.set_ylabel("-log Q-value", fontsize=20) ax.tick_params(labelsize=15) ax.grid(False) # Label names and positions x = [i - 0.1 for i in lowqval_highfc_de[log_fc]] y = [i + 0.1 for i in lowqval_highfc_de["-log_FDR"]] labels = lowqval_highfc_de[gene] max_n_labels = 50 if len(labels) > max_n_labels: print(f"[bold yellow]Warning: given your thresholds, more than {max_n_labels} genes would have to be labeled.") print( "[bold yellow]To prevent overcrowding of your plot, make your thresholds stricter.\n" "We will leave out the labels for now." ) else: # plot labels, and use adjust_text to make sure that labels don't overlap: labels = [ plt.text(x, y, label, ha="center", va="center") for x, y, label in zip(x, y, labels) if not x == np.inf ] adjust_text(labels) if save: fig.savefig(f"{save}") else: plt.show() plt.close()
[docs]def cluster_composition_stacked_barplot( relative_frequencies: pd.DataFrame, xlabel: str = "name", figsize: Tuple[int, int] = (6, 10), width: float = 0.8, order=None, error_bar=None, label_size: int = 15, tick_size: int = 13, capsize: int = None, margins: Tuple[float, float] = (0.02, 0.04), colors=None, save: str = None, ): """Plot relative frequencies as a stacked barplot. Args: relative_frequencies: Data frame containing relative Frequencies as calculated by calc_relFreq() xlabel: x-axis label figsize: Size of the figure as specified in matplotlib width: Width of the bars order: Order of x-axis labels from left to right error_bar: Set to True to add error bars (only possible when grouping the frequencies) tick_size: Size of the ticks as specified in matplotlib label_size: Size of the labels as specified in matplotlib capsize: Size of the horizontal lines of the error bar margins: Change margins of the plot if desired colors: List of colors to use for the bands save: Path to save the plot to Example: .. image:: /_images/cluster_composition_stacked_barplot.png """ import matplotlib.patches as mpatches if not colors: raise ValueError("Colors was not passed. Obtain them from e.g. adata.uns['cluster_key_colors']") patches = [] fig, ax = plt.subplots() fig.set_size_inches(figsize) order = np.unique(relative_frequencies.loc[:, xlabel]) if order is None else order ci = 95 if error_bar else None ax.margins(margins[0], margins[1]) cell_types = np.flip([col for col in relative_frequencies.columns if col not in ["identifier", xlabel]]) # cell_types = np.flip(np.setdiff1d(relFreqs.columns, ["identifier", xlabel])) bars = pd.DataFrame(index=order, data=np.zeros(len(order))) plot_data = pd.DataFrame(relative_frequencies.loc[:, xlabel]) for i, typ in enumerate(cell_types): sum_up = [ relative_frequencies.loc[:, typ].values[i] + bars.loc[g].values[0] for i, g in enumerate(relative_frequencies.loc[:, xlabel]) ] plot_data[typ] = sum_up bars.iloc[:, 0] = ( bars.iloc[:, 0] + relative_frequencies.loc[:, [typ, xlabel]].groupby(xlabel).mean().loc[order, typ] ) for i, typ in enumerate(reversed(cell_types)): fig = sb.barplot( data=plot_data, x=xlabel, y=typ, order=order, ci=ci, errcolor="black", color=colors[i], capsize=capsize ) patches.append(mpatches.Patch(color=colors[i], label=typ)) ax.set_xlabel(xlabel, size=label_size) ax.set_ylabel("relative frequency", size=label_size) ax.tick_params(labelsize=tick_size) ax.set_xticklabels(labels=order, rotation="vertical") # Change the bar width for bar in fig.patches: centre = bar.get_x() + bar.get_width() / 2.0 bar.set_x(centre - width / 2.0) bar.set_width(width) plt.legend(handles=patches, loc="center left", bbox_to_anchor=(1.02, 0.5), prop={"size": tick_size}, frameon=False) if save: plt.savefig(f"{save}") print(f"[bold blue]Saving Figure to {save}") plt.show() plt.close()
[docs]def gene_boxplot( table, palette: List[str], xlabel: str = "cell_types", hue: str = None, figsize: Tuple[int, int] = (10, 5), legend=True, score="Axin2", scatter=None, rotate=False, width=0.7, save=None, ): """Plot gene values as split boxplots. Args: table: Pandas DataFrame palette: xlabel: x-axis label hue: figsize: Size of the figure as specified in matplotlib legend: Whether to draw a legend or not score: scatter: rotate: width: Width of the desired plot save: Path to save the plot to Example: .. image:: /_images/gene_boxplot.png """ sb.set_style("ticks") fig, ax = plt.subplots() fig.set_size_inches(figsize) sf = False if scatter else True if hue: fig = sb.boxplot(data=table, x=xlabel, y=score, width=width, hue=hue, showfliers=sf, palette=palette) if scatter: fig = sb.stripplot(data=table, x=xlabel, y=score, palette=["black"], size=4, hue=hue, dodge=True) else: fig = sb.boxplot(data=table, x=xlabel, y=score, width=width, showfliers=sf, palette=palette) if scatter: fig = sb.stripplot(data=table, x=xlabel, y=score, palette=["black"], size=4, dodge=True) if rotate: fig.set_xticklabels(fig.get_xticklabels(), rotation=90) else: fig.set_xticklabels(fig.get_xticklabels()) if legend: ax.legend(bbox_to_anchor=(1.05, 1.06)) else: ax.legend_.remove() plt.setp(ax.artists, edgecolor="black") plt.setp(ax.lines, color="black") sb.despine() # to not show ouline box if save: print(f"Saving to {save}") plt.savefig(save, bbox_to_anchor="tight") plt.show()
[docs]def colors_overview(colors: Dict, ncols: int = 2, figsize: Tuple[int, int] = (8, 5), save: str = None): """Draw an overview plot of all used colors. Args: colors: Dictionary of color name and color ncols: How many columns for the plot figsize: Size of the figure as specified in matplotlib save: Path to save the plot to Example: .. image:: /_images/colors.png """ from matplotlib import colors as mcolors # Sort colors by hue, saturation, value and name. by_hsv = sorted((tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name) for name, color in colors.items()) sorted_names = [name for hsv, name in by_hsv] n = len(sorted_names) nrows = n // ncols + 1 fig, ax = plt.subplots(figsize=figsize) # Get height and width x, y = fig.get_dpi() * fig.get_size_inches() h = y / (nrows + 1) w = x / ncols for i, name in enumerate(sorted_names): col = i % ncols row = i // ncols y = y - (row * h) - h xi_line = w * (col + 0.05) xf_line = w * (col + 0.25) xi_text = w * (col + 0.3) ax.text( xi_text, y, "%s %s" % (name, colors[name]), fontsize=(h * 0.4), horizontalalignment="left", verticalalignment="center", ) ax.hlines(y + h * 0.1, xi_line, xf_line, color=colors[name], linewidth=(h * 0.6)) ax.set_xlim(0, x) ax.set_ylim(0, y) ax.set_axis_off() fig.subplots_adjust(left=0, right=1, top=1, bottom=0, hspace=0, wspace=0) if save: print(f"Saving to {save}") plt.savefig(save, bbox_to_anchor="tight") plt.show()
[docs]def relative_frequencies_lineplot( relative_frequencies: pd.DataFrame, order, cluster, xlabel: str = "days", ylabel: str = "relative frequency", hue: str = None, smooth: bool = None, cols=None, title: str = None, rotation: int = None, figsize: Tuple[int, int] = (15, 5), tick_size: int = None, label_size: int = None, order_smooth: int = 3, conf_int=None, scatter=None, save: str = None, ): """Plot relative frequencies as a line plot. Args: relative_frequencies: Data frame containing relative Frequencies as calculated by calc_relFreq() order: Order of x-axis labels from left to right cluster: Which cluster to plot xlabel: x-axis label ylabel: y-axis label hue: Value to color by smooth: Whether to smoothen the plot cols: List of colors to use for line plot title: Title of the plot rotation: Rotation of the x-axis labels figsize: Size of the figure as specified in matplotlib tick_size: Size of the ticks as specified in matplotlib label_size: Size of the labels as specified in matplotlib order_smooth: If greater than 1, numpy.polyfit is used to estimate a polynomial regression conf_int: Size of the confidence interval for the regression estimate scatter: Set to True to add average expression values per sample ID as dots save: Path to save the plot to Example: .. image:: /_images/relative_frequencies_lineplots.png """ if hue: sub_freqs = relative_frequencies.loc[:, [cluster] + [xlabel, hue]] sub_freqs = pd.melt(sub_freqs, id_vars=[xlabel, hue]) else: sub_freqs = relative_frequencies.loc[:, [cluster] + [xlabel]] sub_freqs = pd.melt(sub_freqs, id_vars=[xlabel]) standard_lineplot( sub_freqs, order=order, xlabel=xlabel, ylabel=ylabel, hue=hue, gene="value", smooth=smooth, palette=cols, title=title, rotation=rotation, figsize=figsize, tick_size=tick_size, label_size=label_size, order_smooth=order_smooth, confidence_interval=conf_int, scatter=scatter, save=save, )
[docs]def annotated_cell_type_umap( adata, primary_color: Union[str, Sequence[str]], cell_type_color: str, legend_loc: str = "on data", legend_fontsize: int = 8, title: str = "Plot title", palette=None, cmap=None, figsize=(8, 6), save=None, ): """Plots a UMAP which is colored by the primary_color, but also draws all labels on top of all clusters. Args: adata: AnnData object primary_color: Primary color to color all cells by, e.g. 'genotype' cell_type_color: Key containing all cell types, e.g. 'cell_type' legend_loc: Location of the legend (default: 'on data') legend_fontsize: Font size of the legend (default: 8) title: Title of the plot palette: Color cmap: Color map of the UMAP figsize: Size of the figure save: Path to save the plot to Returns: fig and axs Matplotlib objects Example: .. image:: /_images/annotated_cell_type_umap.png """ fig, axs = plt.subplots(figsize=figsize) sc.pl.umap(adata, color=primary_color, show=False, palette=palette, cmap=cmap, ax=axs) sc.pl.umap( adata, color=cell_type_color, alpha=0, legend_loc=legend_loc, legend_fontsize=legend_fontsize, title=title, show=False, ax=axs, ) if save: fig.savefig(save, dpi=1200, format="pdf", bbox_inches="tight") return fig, axs
[docs]def genotype_vs_genotype_umaps( adata, genotype_key: str, genotype_label_1: str, genotype_label_2: str, color: str, hide_one_legend: bool = True, figsize: Tuple[int, int] = (12, 6), ): """Plots a two UMAPs of genotypes next to each other displaying only the colors of the second UMAP. Args: adata: AnnData object genotype_key: Key of the genotypes genotype_label_1: Name of the first genotype; Must be contained in the genotypes genotype_label_2: Name of the second genotype; Must be contained in the genotypes color: Key to color by hide_one_legend: Whether to hide the legend of the genotype_label_1 figsize: Size of the figure Example: .. image:: /_images/genotype_vs_genotype_umaps.png """ genotype_data_1 = adata[adata.obs[genotype_key].isin([genotype_label_1])].copy() genotype_data_2 = adata[adata.obs[genotype_key].isin([genotype_label_2])].copy() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize) sc.pl.umap( genotype_data_1, color=color, ax=ax1, palette=sc.pl.palettes.default_20, legend_fontsize="xx-small", size=40, show=False, ) if hide_one_legend: ax1.get_legend().remove() ax1.set_title(genotype_label_1) sc.pl.umap( genotype_data_2, color=color, ax=ax2, palette=sc.pl.palettes.default_20, legend_fontsize="xx-small", size=40, show=False, ) _ = ax2.set_title(genotype_label_2)