Slignshot

[1]:
# load modules
import os
import numpy as np
import pandas as pd
import pickle

# Plotting imports
import matplotlib
import matplotlib.pyplot as plt

import palantir
import scanpy as sc
[2]:
%matplotlib inline

Download data

Anndata objects with all the data and metadata are publically avaiable at: https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep[1-3].h5ad. This notebook use replicate 1 (https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad) for illustration.

Description of the anndata object is available at https://s3.amazonaws.com/dp-lab-data-public/palantir/readme

Load data

[3]:
# Load the AnnData object
download_url = "https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad"
ad = sc.read('annadata/human_cd34_bm_rep1.h5ad', backup_url=download_url)
colors = pd.Series(ad.uns['cluster_colors'])
ct_colors = pd.Series(ad.uns['ct_colors'])

Slingshot

The following genes were used to determine trends

[4]:
genes = ['CD79B',
 'SPI1',
 'RAG1',
 'CD34',
 'CEBPG',
 'GATA1',
 'ITGA2B',
 'CD79A',
 'IRF8',
 'CSF1R',
 'MPO',
 'CEBPD']

Multi-scale was computed as described in the sample notebook. Let this object be represented as ms_data

We export the multi-scale data, clusters and gene expression for the above genes and use the Slingshot R Script to compute the results. The exporting code is shown below

[ ]:

# Export data
slignshot_dir = 'slingshot/'
os.makedirs(slignshot_dir, exist_ok=True)

# Genes
# Expression matrices
pd.DataFrame(ms_data).to_csv(f'{slignshot_dir}/data.csv', index=None, header=None)
pd.DataFrame(ad.obs['clusters']).to_csv(f'{slignshot_dir}/clusters.csv', index=None, header=None)
pd.DataFrame(ad[:, genes].X.T, index=genes, columns=ad.obs_names).to_csv(f'{slignshot_dir}/exprs.csv')

R snippet used is shown below was saved as slingshot_wrapper.R

library(slingshot)

args <- commandArgs(trailingOnly = TRUE)
out_dir <- args[1]

# Load data
data = read.csv(sprintf("%s/data.csv", out_dir), header=FALSE)
clusters = read.csv(sprintf("%s/clusters.csv", out_dir), header=FALSE)[, 1]
exprs = read.csv(sprintf("%s/exprs.csv", out_dir), row.names=1)


# Slingshot
print('Lineages...')
lin1 <- getLineages(data, clusters, start.clus = 0)
print('Curves...')
crv1 <- getCurves(lin1)
print('Pseudotime...')
pst <- slingPseudotime(crv1)
w <- slingCurveWeights(crv1)
L <- length(slingLineages(crv1))

write.csv(w, sprintf("%s/weights.csv", out_dir))

# Trends for each lineage
print('Gene trends...')
for(l in seq_len(L)){
    print(sprintf("Lineage: %d", l))

    for(gene in rownames(exprs)){
        print(sprintf("Gene: %s", gene))
        y <- exprs[gene, ]
        p <- loess(unlist(y) ~ pst[,l], weights = w[,l])
        pl = predict(p, se=TRUE)
        df = cbind(p$x, p$fitted, qt(0.975,pl$df)*pl$se)
        colnames(df) <- c('Trajectory', 'trends', 'ci')
        write.csv(df, sprintf("%s/Lineage%d_%s.csv", out_dir, l, gene))
    }
}

The above script was invoked as shown below

import subprocess

ss_path = 'slingshot/slingshot_wrapper.R'
args = f'Rscript {ss_path} {out_dir}'
print('Running Slingshot using: {}\n'.format(args))
with subprocess.Popen(args,
                      stdout=None, stderr=subprocess.PIPE,
                      shell=True) as p:
    out, err = p.communicate()

The results from the above run is available at results/slingshot

Load results

[5]:
import glob
res_dir = 'results/slingshot/'
files = glob.glob(res_dir + 'Lin*.csv')
lineages = pd.Series(files).str.split('/').str.get(-1).str.split('_').str.get(0).unique()
[6]:
# Update trajectory
max_traj = -np.inf
min_traj = np.inf
trends = pd.Series()
for l in lineages:
    trends[l] = pd.Series()
    for gene in genes:
        trends[l][gene] = pd.read_csv(f'{res_dir}{l}_{gene}.csv', index_col=0 )
        if max_traj < trends[l][gene]['Trajectory'].max():
            max_traj = trends[l][gene]['Trajectory'].max()
        if min_traj > trends[l][gene]['Trajectory'].min():
            min_traj = trends[l][gene]['Trajectory'].min()
/var/folders/03/f2kf8n8500jd23r44g8fg2cr0000gq/T/ipykernel_20930/2939155888.py:4: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  trends = pd.Series()
/var/folders/03/f2kf8n8500jd23r44g8fg2cr0000gq/T/ipykernel_20930/2939155888.py:6: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  trends[l] = pd.Series()
/var/folders/03/f2kf8n8500jd23r44g8fg2cr0000gq/T/ipykernel_20930/2939155888.py:6: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  trends[l] = pd.Series()
/var/folders/03/f2kf8n8500jd23r44g8fg2cr0000gq/T/ipykernel_20930/2939155888.py:6: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  trends[l] = pd.Series()
/var/folders/03/f2kf8n8500jd23r44g8fg2cr0000gq/T/ipykernel_20930/2939155888.py:6: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  trends[l] = pd.Series()
[7]:
vmin = np.inf; vmax = -np.inf;
for l in lineages:
    pt = trends[l][gene].Trajectory
    vmin = np.min([vmin, pt.min()])
    vmax = np.max([vmax, pt.max()])

tSNE map generated using PCA was used in the manuscript. The results are shown below with tSNE generated using scaled diffusion components

[8]:
tsne = pd.DataFrame(ad.obsm['tsne'], index=ad.obs_names, columns=['x', 'y'])
[9]:
plt.figure(figsize=[5, 5])
plt.scatter(tsne.loc[:, 'x'], tsne.loc[:, 'y'], s=5,
           color=colors[ad.obs['clusters']])
ax = plt.gca()
ax.set_axis_off()


../../_build/doctrees/nbsphinx/notebooks_comparisons_slignshot_25_0.png

Lineage cells

[10]:
fig = palantir.plot.FigureGrid(len(lineages), len(lineages))
for ax, l in zip(fig, lineages):
    pt = trends[l][gene].Trajectory
    ax.scatter(tsne.loc[:, 'x'], tsne.loc[:, 'y'], s=5, color='lightgrey')
    ax.scatter(tsne.loc[pt.index, 'x'],
              tsne.loc[pt.index, 'y'], s=5, c=pt,
    cmap=matplotlib.cm.plasma, vmin=vmin, vmax=vmax)
    ax.set_axis_off()
    ax.set_title(l)
../../_build/doctrees/nbsphinx/notebooks_comparisons_slignshot_27_0.png

Weights

[11]:
weights =  pd.read_csv(f'{res_dir}/weights.csv', index_col=0)
weights.index = ad.obs_names
weights.columns = lineages
[12]:
fig = palantir.plot.FigureGrid(len(lineages), len(lineages))
for ax, l in zip(fig, lineages):
    ax.scatter(tsne.loc[weights.index, 'x'],
              tsne.loc[weights.index, 'y'], s=5, c=weights.loc[:, l],
    cmap=matplotlib.cm.plasma)
    ax.set_axis_off()
    ax.set_title(l)
../../_build/doctrees/nbsphinx/notebooks_comparisons_slignshot_30_0.png