Quick Demo: Reference-based simulation for CODEX spatial proteomics data

[ ]:
import simspace as ss
import pandas as pd
[2]:
# Step 1: Load the reference dataset. We here provide a sample dataset from Xenium human breast tumor dataset.
ref_meta = pd.read_csv('../data/CODEX_metadata.csv', index_col=0)
ref_omics = pd.read_csv('../data/CODEX_count.csv', index_col=0)
[3]:
ref_meta.head(3)
[3]:
Unnamed: 0.2 Unnamed: 0.1 Unnamed: 0 CellID centroid_x centroid_y Area perimeter axis_ratio tile_h ... IFNG Ki67 CD44 PD1 TIM3 necrosis_2clusters necrosis_3clusters peri_intra_tumoral X_centroid Y_centroid
Unnamed: 0.3
876849 876849 876849 3790987 1 39.842857 2.228571 70.0 32.863961 2.111366 37888 ... N N N N N 0 0 peri 37927.842857 26882.228571
876850 876850 876850 3790988 2 81.000000 1.500000 28.0 18.000000 1.788854 37888 ... N N N N N 0 0 peri 37969.000000 26881.500000
876851 876851 876851 3790989 3 94.694118 3.058824 85.0 34.485281 1.467060 37888 ... N N N N N 0 0 peri 37982.694118 26883.058824

3 rows × 26 columns

[4]:
ref_omics.head(3)
[4]:
DAPI MPO Ecadherin PDL1 CD163 PD1 CD47 GAL3 PARP1 LAG3 ... MLH1 Ki67 CD20 ARID1A IFNG CD31 PMS CD44 PanCytokeratin CD3e
CellID
1 21.585714 0.028571 0.1 0.928571 0.014286 1.214286 1.128571 32.971428 5.657143 1.900000 ... 24.314285 0.900000 5.000000 7.657143 0.900000 0.400000 1.814286 2.828571 1.685714 20.142857
2 20.071428 0.000000 0.0 0.178571 0.428571 0.392857 0.607143 14.000000 0.071429 1.428571 ... 5.678571 0.857143 1.928571 0.678571 0.678571 0.142857 0.142857 2.821429 1.928571 1.500000
3 17.858824 0.000000 0.0 0.047059 0.000000 0.235294 0.058824 7.482353 0.564706 1.364706 ... 6.764706 0.329412 1.835294 1.141176 0.458824 0.082353 0.200000 0.741176 1.635294 0.717647

3 rows × 31 columns

[ ]:
# Step 2: Fit SimSpace spatial parameters from the reference.
# This step uses the reference metadata and omics data to fit the spatial parameters for the simulation.
# One can do this by using the `ss.fit()` method and some preprocessing, which is shown in details in spaital_fitting.ipynb.
# Here we provide a pre-fitted parameters for the CODEX human endometrial cancer dataset.
# The full dataset will be available upon request.
params = ss.util.load_params('../data/CODEX_fitted_params.json')
[ ]:
# Step 3: Simulate the spatial omics data using the fitted parameters.
# This step uses the fitted parameters to simulate the spatial omics data, just as the reference-free simulation.
sim = ss.util.sim_from_params(
    parameters=params,
    shape=(50, 50),
    custom_neighbor=ss.spatial.generate_offsets(3, method='manhattan'),
    seed=0,
)
# This results is the simulated spatial omics data shown in Fig. 6, with x and y axis switched.
sim.plot(figsize=(5, 5), dpi=100, size=14)
../_images/tutorials_reference_based_CODEX_6_0.png
[ ]:
# Step 4 (Optional): Simulate omics data based on the spatial simulation.
# The omics profile can also be fitted from the reference dataset if the reference is available.
# Here we use the `fit_scdesign` method to fit the spatial simulation with the reference dataset,
# which requires the R package 'scDesign3' to be installed. One can find more details in README about the installation.
sim.fit_scdesign(
    '../data/CODEX_count.csv',
    '../data/CODEX_metadata.csv',
    'phenotype',      # Column in metadata that contains the cell type information
    'X_centroid',   # Column in metadata that contains the x coordinate of the cell centroid
    'Y_centroid',   # Column in metadata that contains the y coordinate of the cell centroid
    seed=0,
)
Temporary directory created.
scDesgin fit complete.
[12]:
# Example of plotting a specific gene, e.g., 'PD1'.
sim.plot('PD1', figsize=(5,5), dpi=100, title='PD1 Expression')
../_images/tutorials_reference_based_CODEX_8_0.png
[13]:
sim.plot('fitted_celltype', figsize=(6,6), dpi=100)
../_images/tutorials_reference_based_CODEX_9_0.png
[ ]: