Quick Demo: Spatial fitting for reference-based simulation

[1]:
import simspace as ss
import numpy as np
import pandas as pd
[ ]:
# Step 0: Load the reference metadata and omics data.
# This step is necessary to calculate the spatial parameters for the simulation.
meta = pd.read_csv('/Users/zhaotianxiao/Library/CloudStorage/Dropbox/FenyoLab/Project/Spatialsim/SimSpace/data/reference_metadata.csv',
                   index_col=0)

# Step 1: Calculate the spatial parameters from the reference metadata.
# This step uses the metadata to calculate the spatial parameters for the simulation.
# The parameters include the Moran's I and local entropy, which are used to simulate the spatial omics data.
# The Moran's I is further sorted by the cell type abundance.

mi_sim = ss.spatial.integrate_morans_I(meta['Cluster'], meta[['x_centroid', 'y_centroid']], meta['Cluster'].unique())
cell_counts = meta['Cluster'].value_counts().sort_index()
mi_sim = np.array([mi_sim[i] for i in np.argsort(-cell_counts)])
le_sim = ss.spatial.calculate_local_entropy(meta['Cluster'], meta[['x_centroid', 'y_centroid']])
le_pdf, _ = np.histogram(le_sim, bins=[x / 4 for x in range(0, 12)], density=True)

# Combine the local entropy and Moran's I into a single result to use as the target for the simulation.
sim_res = np.hstack((le_pdf, mi_sim))
[ ]:
# Step 2: Fit the spatial parameters using the target data.
# This step uses the target data to fit the spatial parameters for the simulation.
# The fitting process uses a genetic algorithm to optimize the parameters.
# The parameters include the number of groups, states, and other simulation settings.

fitted_params = ss.spatial_fit(
    target=sim_res,
    population_size=20, # Population size for the genetic algorithm
    generations=10,     # Number of generations for the genetic algorithm
    shape=(80, 80),     # Shape of the simulated spatial data
    n_group=2,          # Number of groups in the simulation
    n_state=9,          # Number of states in the simulation
    replicate=1,        # Number of replicates for the simulation; one replicate is sufficient for this example, but can be increased for more robust results
    seed=0,             # Random seed for reproducibility
)
Generation 0: Best Fitness = 0.8865341823928504
Generation 1: Best Fitness = 0.7345467491828108
Generation 2: Best Fitness = 0.7345467491828108
Generation 3: Best Fitness = 0.7345467491828108
Generation 4: Best Fitness = 0.7345467491828108
Generation 5: Best Fitness = 0.7345467491828108
Generation 6: Best Fitness = 0.7345467491828108
Generation 7: Best Fitness = 0.7345467491828108
Generation 8: Best Fitness = 0.7345467491828108
Generation 9: Best Fitness = 0.7345467491828108
Optimization complete!
Best solution: {'n_group': 2, 'n_state': 9, 'niche_theta': array([0.44650192]), 'theta_list': [array([-0.23788805,  0.14410818, -0.65546195, -0.53543666,  0.46746016,
        0.16297951, -0.42029195,  0.63835194,  0.33794159,  0.6070672 ,
        0.42238894, -0.42500922, -0.61284523, -0.58728033, -0.33537799,
        0.45484526,  0.61651146,  0.50322044,  0.17606711, -0.34819478,
        0.24709473,  0.41157442, -0.13967493, -0.56440546,  0.00303249,
       -0.41817377, -0.01151323,  0.22120886, -0.78949714, -0.21934801,
        0.40140046, -0.12264095,  0.23446904, -0.09443843,  0.24419911,
       -0.47536871]), array([ 0.22030581, -0.67874357,  0.05354833, -0.48604492, -0.19933063,
        0.54230041, -0.14458549,  0.64517522, -0.14049037,  0.59986419,
        0.14690163,  0.74240016, -0.61149856,  0.02747397,  0.24378214,
       -0.33702031,  0.63185828,  0.55551364, -0.15115636,  0.29365377,
       -0.68781948,  0.73080375,  0.60152348,  0.69201143, -0.51194426,
       -0.41362416, -0.07710352,  0.65742366, -0.62071043, -0.78308975,
       -0.01639067,  0.4687816 ,  0.63788746,  0.78072464,  0.56216736,
        0.68947799])], 'density_replicates': array([0.31653189, 0.2517224 , 0.21976696, 0.29889523, 0.31183007,
       0.08628342, 0.14120465, 0.2978457 , 0.3517975 ]), 'phi_replicates': 4.8784433349811405}
Best fitness: 0.7345467491828108
[ ]:
# Step 3: Simulate the spatial omics data using the fitted parameters, similar as the reference-free simulation.
sim = ss.util.sim_from_params(
    fitted_params,
    shape=(80, 80),
    num_iteration=4,
    n_iter=6,
    custom_neighbor=ss.spatial.generate_offsets(3),
    seed=42
)
[13]:
sim.plot(dpi=100, figsize=(5,5), title='Simulation Result')
../_images/tutorials_spatial_fitting_5_0.png
[ ]: