diff --git a/.github/workflows/render-rmarkdown.yaml b/.github/workflows/render-rmarkdown.yaml index 2d48dde..e19efae 100644 --- a/.github/workflows/render-rmarkdown.yaml +++ b/.github/workflows/render-rmarkdown.yaml @@ -49,10 +49,6 @@ jobs: # run: | # pkgdown::build_site() # shell: Rscript {0} - - name: Render Rmarkdown files - run : | - pkgdown::build_article(name='install.Rmd', quiet=FALSE) - shell: Rscript {0} - name: Commit files run: | git config --local user.name "$GITHUB_ACTOR" diff --git a/docs/articles/myofibroblast-GRN.html b/docs/articles/myofibroblast-GRN.html index 5c6d19f..79ce26b 100644 --- a/docs/articles/myofibroblast-GRN.html +++ b/docs/articles/myofibroblast-GRN.html @@ -90,7 +90,7 @@

Gene-regulatory network of myofibroblast differentiation in myocardial infarction

-

Compiled: June 29, 2024

+

Compiled: July 01, 2024

@@ -113,21 +113,21 @@

Compiled: June 29, 2024

script of cleaning the data and preparing these objects is found here.

Run the following commands to download the data:

-# if(!dir.exists('./Fibroblast')){
-#     dir.create('./Fibroblast')
-# }
+if(!dir.exists('./Fibroblast')){
+    dir.create('./Fibroblast')
+}
 
-# download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/Fibroblast/snRNA.rds', 
-#               destfile = './Fibroblast/snRNA.rds', 
-#               method = 'wget', extra = '--no-check-certificate')
+download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/Fibroblast/snRNA.rds', 
+              destfile = './Fibroblast/snRNA.rds', 
+              method = 'wget', extra = '--no-check-certificate')
 
-# download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/Fibroblast/snATAC.rds', 
-#               destfile = './Fibroblast/snATAC.rds', 
-#               method = 'wget', extra = '--no-check-certificate')
+download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/Fibroblast/snATAC.rds', 
+              destfile = './Fibroblast/snATAC.rds', 
+              method = 'wget', extra = '--no-check-certificate')
               
-# download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/Fibroblast/gene.activity.rds', 
-#               destfile = './Fibroblast/gene.activity.rds', 
-#               method = 'wget', extra = '--no-check-certificate')
+download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/Fibroblast/gene.activity.rds', + destfile = './Fibroblast/gene.activity.rds', + method = 'wget', extra = '--no-check-certificate')

Next, we load all necessary packages:

 suppressMessages(library(ArchR))
@@ -206,15 +206,9 @@ 

Co-embedding## Find 19026 common genes between ATAC and RNA

## Subset ATAC and RNA data
## Normalize gene activity score for ATAC data
-
## Centering and scaling data matrix
## Performing data integration using Seurat...
## Warning: Different features in new layer data than already exists for
 ## scale.data
-
## Running CCA
-
## Merging objects
-
## Finding neighborhoods
-
## Finding anchors
-
##  Found 28478 anchors
## Finding integration vectors
## Finding integration vector weights
## Transfering 19026 features onto reference data
@@ -224,68 +218,12 @@

Co-embedding## Warning: Different features in new layer data than already exists for ## scale.data
## Warning: Different cells in new layer data than already exists for scale.data
-
## Warning: The following 47 features requested have zero variance; running
-## reduction without them: BMPR1A, LINC02345, PLAG1, ZNF600, R3HDM4, RHOT2,
-## TOP1MT, NDUFV2-AS1, VBP1, CCL24, RNF138, CELF5, TMEM184A, SNHG12, ISYNA1,
-## GRHL3, CLK3, LRRC40, LINC02273, SH3GL1, BBC3, PGK1, KLC2, TBC1D3, MARK4,
-## ANKRD33B, CTCF, FBXL19-AS1, STEAP1B, TRAF5, BAG3, CDHR4, CS, HCRT, MCM3AP-AS1,
-## DTX3, DNASE2B, ASTN1, MYLK3, PLEKHG3, MMP24, GDPD2, SMAD2, GLB1L, EIF6, RBP7,
-## PNMA5
-
## PC_ 1 
-## Positive:  SCN7A, ABCA10, DCN, ZBTB20, MIR99AHG, NFIA, CELF2, ZEB1, IL1RAPL1, COL15A1 
-##     C7, IMMP2L, SGCD, C1orf21, MLIP, COLEC12, EPHA3, CAMK2D, LINC01088, ZNF521 
-##     DDR2, CRADD, CRYBG3, AFF1, CMSS1, STARD13, DDX5, LRBA, NEK7, AGAP1 
-## Negative:  VMP1, THBS1, SLC20A1, ADAMTSL1, COL27A1, UACA, ARFGAP1, ILF3, EML4, MERTK 
-##     KDM4B, NOTCH3, UBE2G2, CERCAM, F3, BDP1, NFATC2, PABPC1, FAM50A, MYL9 
-##     TMEM44, PTPRC, FBLIM1, TANC1, NR4A1, PLXNB2, FOSB, KSR1, RFX2, CNKSR3 
-## PC_ 2 
-## Positive:  DCN, MLIP, SCN7A, TCAP, ESRRG, LINC01088, SGCD, CST3, IL1RAPL1, ZBTB20 
-##     MIR99AHG, NFIA, COL15A1, ABCA10, KLHL24, COX7C, TTN-AS1, ATP5MG, LUM, CORIN 
-##     EYS, COLEC12, PCDH9-AS2, NDUFB10, TENM3, CPNE5, SERF2, SMPX, CELF2, TACC2 
-## Negative:  BNC2, VMP1, THBS1, EPB41L2, AGAP1, UACA, CYTH3, PPP3CA, SLC20A1, DDX5 
-##     GALNT2, SMG6, HNRNPA2B1, STARD13, FGF7, SLIT3, NR4A1, AKAP9, STAG1, DIAPH2 
-##     CSNK1A1, ZEB1, FERMT2, EML4, BDP1, PIBF1, KDM4B, NF1, RFX2, NFATC2 
-## PC_ 3 
-## Positive:  COL15A1, BNC2, HSPG2, AGAP1, RNF24, DDX5, ZEB1, STAG1, CMSS1, DEC1 
-##     CST3, IL1RAPL1, STARD13, FRMD4B, TRERF1, ABI2, ARSJ, SMG6, CALM2, ZNF521 
-##     GALNT2, PKP4, EPN2, FARP1, RNF111, SLC20A1, EYS, PDSS2, MPZL1, SERF2 
-## Negative:  UACA, NR4A1, LINC01088, C7, FOSB, PNISR, NFATC2, SLIT3, AKAP9, ABCA10 
-##     AFF1, CELF2, RFX2, HNRNPA2B1, ANKRD12, CNTNAP2, C1orf21, KDM4B, TANC1, CAMK2D 
-##     VMP1, CLK1, MLIP, BDP1, CSAD, SCN7A, DOCK11, KLHL24, ABCA5, CNKSR3 
-## PC_ 4 
-## Positive:  SCN7A, LINC01088, DCN, UACA, HSPG2, CELF2, AGAP1, CYTH3, PNISR, ZNF83 
-##     ZEB1, RUFY3, ANKRD12, DNM1, SHROOM3, EPHA3, CCNI, LUM, FRMD4B, CST3 
-##     TCAP, DDR2, ZNF521, DOCK11, MIR99AHG, NFATC2, MLXIP, ZBTB20, HNRNPA2B1, CSAD 
-## Negative:  MLIP, CAMK2D, FGF7, IL1RAPL1, C7, ESRRG, SGCD, THBS1, ABCA10, NUDT4 
-##     VMP1, C1orf21, DIAPH2, NEK7, PALMD, EPB41L2, NR4A1, CORIN, TBC1D8, DYRK1A 
-##     COL15A1, RNF24, IQCJ-SCHIP1, COL27A1, MICAL3, TANC1, RBPJ, COLEC12, TACC2, CRADD 
-## PC_ 5 
-## Positive:  C7, COL15A1, SCN7A, EPB41L2, IL1RAPL1, PDE7B, AKAP9, UACA, FRMD4B, ABCA10 
-##     SMG6, EPHA3, COL27A1, HNRNPA2B1, VMP1, COLEC12, EML4, PNISR, SLC20A1, RNF24 
-##     HSPG2, LUM, MIR99AHG, BDP1, FUS, KCNS3, ADAMTSL1, DIO2, NF1, NREP 
-## Negative:  CELF2, MLIP, LINC01088, IMMP2L, STARD13, DIAPH2, AFF1, ZBTB20, CRADD, TCAP 
-##     CMSS1, DDR2, UGP2, DCN, THBS1, CAMK2D, PPP3CA, USP15, SOS1, BABAM2 
-##     NEK7, SLC4A4, ZBTB7C, RFTN1, DSTN, C1orf21, STAG1, CLIP1, PDSS2, EPN2
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
 ## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
 ## This message will be shown once per session
-
## 17:03:53 UMAP embedding parameters a = 0.9922 b = 1.112
-
## 17:03:53 Read 51996 rows and found 30 numeric columns
-
## 17:03:53 Using Annoy for neighbor search, n_neighbors = 30
-
## 17:03:53 Building Annoy index with metric = cosine, n_trees = 50
-
## 0%   10   20   30   40   50   60   70   80   90   100%
-
## [----|----|----|----|----|----|----|----|----|----|
-
## **************************************************|
-## 17:03:58 Writing NN index file to temp file /data/pinello/tmp/zl_tmp/Rtmpfd24Vp/file1c4557bddc40
-## 17:03:58 Searching Annoy index using 1 thread, search_k = 3000
-## 17:04:18 Annoy recall = 100%
-## 17:04:22 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
-## 17:04:26 Initializing from normalized Laplacian + noise (using RSpectra)
-## 17:04:27 Commencing optimization for 200 epochs, with 2446902 positive edges
-## 17:05:29 Optimization finished

We next visualize the snRNA-seq and snATAC-seq in this shared UMAP space. The cells are colored by patients or modalities.

-
+
 p1 <- DimPlot(obj.coembed, group.by = "patient", shuffle = TRUE, label = TRUE)
 p2 <- DimPlot(obj.coembed, group.by = "tech", shuffle = TRUE, label = TRUE)
 
@@ -294,7 +232,7 @@ 

Co-embeddingHarmony to perform batch correction and generate a new UMAP embedding.

-
+
 obj.coembed <- RunHarmony(obj.coembed, 
                           group.by.vars = "patient",
                           reduction.use = "pca", 
@@ -312,7 +250,7 @@ 

Co-embedding## Harmony 1/10

## Harmony 2/10
## Harmony converged after 2 iterations
-
+
 obj.coembed <- RunUMAP(
   obj.coembed,
   dims = 1:30,
@@ -322,7 +260,7 @@ 

Co-embedding= FALSE )

We can plot the data again

-
+
 p1 <-
   DimPlot(obj.coembed, group.by = "patient", reduction = "umap_harmony")
 p2 <-
@@ -342,7 +280,7 @@ 

Co-embeddingNebulosa to plot the data.

-
+
 p1 <-
   plot_density(obj.coembed,
                features = "SCARA5",
@@ -379,11 +317,11 @@ 

Sub-clustering -
+
 obj.coembed <- FindNeighbors(obj.coembed, reduction = "harmony", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
-
+
 obj.coembed <- FindClusters(obj.coembed, resolution = 1, verbose = FALSE)
 
 cols <- ArchR::paletteDiscrete(obj.coembed@meta.data[, "RNA_snn_res.1"])
@@ -397,7 +335,7 @@ 

Sub-clustering

We can use the function CellPropPlot to visualize the cell propotion across all patients.

-
+
 p <- CellPropPlot(obj.coembed,
                   group.by = "RNA_snn_res.1",
                   prop.in = "patient_region_id",
@@ -411,7 +349,7 @@ 

Sub-clusteringCompareCellProp.

The idea is to identify sub-populations that show significant difference.

-
+
 obj.coembed$patient_group <- factor(obj.coembed$patient_group, 
                                     levels = c("myogenic", "ischemic", "fibrotic"))
 
@@ -435,7 +373,7 @@ 

Sub-clustering

We subset the object to only keep the clusters with significant difference and re-do the clustering

-
+
 Idents(obj.coembed) <- "RNA_snn_res.1"
 coembed.sub <- subset(obj.coembed, idents = c(0, 1, 11, 12, 3, 4, 7, 8, 9))
 coembed.sub
@@ -445,7 +383,7 @@

Sub-clustering## 5 layers present: data, counts.2, scale.data.2, counts, scale.data ## 2 other assays present: ATAC, GeneActivity ## 4 dimensional reductions calculated: pca, umap, harmony, umap_harmony

-
+
 # re-do the UMAP
 coembed.sub <- RunUMAP(coembed.sub, 
                        dims = 1:30, 
@@ -455,12 +393,12 @@ 

Sub-clustering verbose = FALSE)

## Warning: Keys should be one or more alphanumeric characters followed by an
 ## underscore, setting key from umap_harmony_ to umapharmony_
-
+
 ## re-clustering with a lower resolution
 coembed.sub <- FindNeighbors(coembed.sub, reduction = "harmony", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
-
+
 coembed.sub <- FindClusters(coembed.sub, resolution = 0.2, verbose = FALSE)
 
 cols <- ArchR::paletteDiscrete(coembed.sub@meta.data[, "RNA_snn_res.0.2"])
@@ -473,7 +411,7 @@ 

Sub-clusteringp

Visualize the patient and RNA/ATAC distribution

-
+
 p1 <- DimPlot(coembed.sub, group.by = "patient", reduction = "umap_harmony", shuffle = TRUE, label = TRUE)
 p2 <- DimPlot(coembed.sub, group.by = "tech", reduction = "umap_harmony", shuffle = TRUE, label = TRUE)
 
@@ -481,7 +419,7 @@ 

Sub-clustering

Next, we identify the markers for each cluster and visualize the top 10.

-
+
 all.markers <- FindAllMarkers(coembed.sub, 
                               only.pos = TRUE, 
                               min.pct = 0.5, 
@@ -489,7 +427,7 @@ 

Sub-clustering## Calculating cluster 0

## Calculating cluster 1
## Calculating cluster 2
-
+
 df <- all.markers %>%
     group_by(cluster) %>%
     slice_max(n = 10, order_by = avg_log2FC)
@@ -497,13 +435,13 @@ 

Sub-clusteringp <- DotPlot(coembed.sub, features = unique(df$gene)) + RotatedAxis()

## Warning: Scaling data with a low number of groups may produce misleading
 ## results
-
+
 print(p)

The above dot plot demonstrates the top 10 markers per cluster and we can easily classify cluster 1 as myofibroblasts.

Let’s compare the cell proportion again:

-
+
 coembed.sub$patient_group <- factor(coembed.sub$patient_group, 
                                     levels = c("myogenic", "ischemic", "fibrotic"))
 
@@ -518,7 +456,7 @@ 

Sub-clusteringp

Save data

-
+
 saveRDS(coembed.sub, "./Fibroblast/coembed.sub.rds")
@@ -533,17 +471,17 @@

Dimensionality reductionTo infer trajectory, we will perform dimension reduction using diffusion map via the function RunDiffusionMap. This is based on the R package destiny.

-
+
 coembed.sub <- RunDiffusionMap(coembed.sub, reduction = "harmony")
-
## finding knns......done. Time: 405.89s
+
## finding knns......done. Time: 395.14s
 ## Calculating transition probabilities...
## Warning: 'as(<dsCMatrix>, "dsTMatrix")' is deprecated.
 ## Use 'as(., "TsparseMatrix")' instead.
 ## See help("Deprecated") and help("Matrix-deprecated").
-
## ...done. Time: 0.36s
+
## ...done. Time: 0.37s
 ## 
-## performing eigen decomposition......done. Time: 3.63s
-
+## performing eigen decomposition......done. Time: 3.51s
+
 cols <- ArchR::paletteDiscrete(coembed.sub@meta.data[, "RNA_snn_res.0.2"])
 
 p <- DimPlot(coembed.sub, group.by = "RNA_snn_res.0.2", label = TRUE,
@@ -553,7 +491,7 @@ 

Dimensionality reductionp

We can also plot snATAC-seq and snRNA-seq individually

-
+
 DimPlot(coembed.sub, reduction = "dm", 
         group.by = "RNA_snn_res.0.2", split.by = "tech", cols = cols)

@@ -569,18 +507,19 @@

Cell pairingKartha, Vinay K., et al. to match the cells.

-
+
 df.pair <- PairCells(object = coembed.sub, reduction = "harmony",
                      pair.mode = "greedy",
                      pair.by = "tech", ident1 = "ATAC", ident2 = "RNA")
## Getting dimensional reduction data for pairing cells...
## ATAC assay doesn't leave any cells, so it is removed
## GeneActivity assay doesn't leave any cells, so it is removed
+
## Number of ATAC cells: 4107, number of RNA cells: 31598
## Pairing cells using greedy mode...
## [1] "Pairing all ATAC cells to nearest RNA cells"
## Finished!

We can visualize the paired cells

-
+
 sel_cells <- c(df.pair$ATAC, df.pair$RNA)
 coembed.sub2 <- coembed.sub[, sel_cells]
 
@@ -590,7 +529,7 @@ 

Cell pairing

We next create a new Seurat object for there paired cells as if they are generated by single-cell multimodal protocol.

-
+
 obj.pair <- CreatePairedObject(df.pair = df.pair, 
                                obj.coembed = coembed.sub2,
                                obj.rna = obj.rna,
@@ -598,13 +537,13 @@ 

Cell pairing= "RNA", atac.assay = "ATAC")

## Merging objects...
-
+
 
## Normalizing layer: counts

Finally, we infer a pseudo-time trajectory from SCARA5+ fibroblasts to myofibroblast using the approach from ArchR. Here we modified the function to allow to take a Seurat object as input

-
+
 obj.pair <- AddTrajectory(object = obj.pair, 
                           trajectory = c(0, 2, 1),
                           group.by = "RNA_snn_res.0.2", 
@@ -636,7 +575,7 @@ 

Select TFschromVAR.

-
+
 # Get a list of motif position frequency matrices from the JASPAR database
 pfm <- getMatrixSet(
   x = JASPAR2020,
@@ -653,7 +592,7 @@ 

Select TFs## Building motif matrix

## Finding motif positions
## Creating Motif object
-
+
 obj <- RunChromVAR(
   object = obj,
   genome = BSgenome.Hsapiens.UCSC.hg38,
@@ -664,7 +603,7 @@ 

Select TFs## Computing deviations from background

## Constructing chromVAR assay
## Warning: Layer counts isn't present in the assay object; returning NULL
-
+
 res <- SelectTFs(object = obj, return.heatmap = TRUE)
## Creating Trajectory Group Matrix..
## Some values are below 0, this could be the Motif activity matrix in which scaleTo should be set = NULL.
@@ -673,11 +612,11 @@ 

Select TFs## Creating Trajectory Group Matrix..

## Smoothing...
## Find 456 shared features!
-
+
 df.cor <- res$tfs
 ht <- res$heatmap

We can visualize the TF activity dynamic along the trajectory

-
+
 draw(ht)

@@ -689,7 +628,7 @@

Select genes
+
 res <- SelectGenes(object = obj,
                   labelTop1 = 0,
                   labelTop2 = 0)
@@ -698,7 +637,7 @@

Select genes## Creating Trajectory Group Matrix..

## Smoothing...
## Linking cis-regulatory elements to genes...
-
+
 df.p2g <- res$p2g
 ht <- res$heatmap
 
@@ -712,61 +651,72 @@ 

Gene regulatory network inferenceWe here will try to predict a gene regulatory network based on the correlation of the TF binding activity as estimated from snATAC-seq and gene expression as measured by snRNA-seq along the trajectory.

-
-# tf.gene.cor <- GetTFGeneCorrelation(object = obj, 
-#                                     tf.use = df.cor$tfs, 
-#                                     gene.use = unique(df.p2g$gene),
-#                                     tf.assay = "chromvar", 
-#                                     gene.assay = "RNA",
-#                                     trajectory.name = "Trajectory")
+
+tf.gene.cor <- GetTFGeneCorrelation(object = obj, 
+                                    tf.use = df.cor$tfs, 
+                                    gene.use = unique(df.p2g$gene),
+                                    tf.assay = "chromvar", 
+                                    gene.assay = "RNA",
+                                    trajectory.name = "Trajectory")
+
## Creating Trajectory Group Matrix..
+
## Some values are below 0, this could be the Motif activity matrix in which scaleTo should be set = NULL.
+## Continuing without depth normalization!
+
## Smoothing...
+
## Creating Trajectory Group Matrix..
+
## Smoothing...

We can then visualize this correlation matrix by heatmap. Also, we can cluster the genes and TFs to identify different regulatory modules for the predefined sub-populations.

-
-# ht <- GRNHeatmap(tf.gene.cor, 
-#                  tf.timepoint = df.cor$time_point)
-# ht
+
+ht <- GRNHeatmap(tf.gene.cor, 
+                 tf.timepoint = df.cor$time_point)
+ht
+

To associate genes to TFs, we will use the peak-to-gene links and TF binding sites information. Specifically, if a gene is regulated by a peak and this peak is bound by a TF, then we consider this gene to be a target of this TF.

-
-# motif.matching <- obj@assays$ATAC@motifs@data
-# colnames(motif.matching) <- obj@assays$ATAC@motifs@motif.names
-# motif.matching <-
-#     motif.matching[unique(df.p2g$peak), unique(tf.gene.cor$tf)]
+
+motif.matching <- obj@assays$ATAC@motifs@data
+colnames(motif.matching) <- obj@assays$ATAC@motifs@motif.names
+motif.matching <-
+    motif.matching[unique(df.p2g$peak), unique(tf.gene.cor$tf)]
 
 
-# df.grn <- GetGRN(motif.matching = motif.matching, 
-#                  df.cor = tf.gene.cor, 
-#                  df.p2g = df.p2g)
+df.grn <- GetGRN(motif.matching = motif.matching, + df.cor = tf.gene.cor, + df.p2g = df.p2g)
+
## Filtering network by peak-to-gene links...
+
## Filtering network by TF binding site prediction...

Next, we can visualize our network as the last step of this analysis

-
+
 # define colors for nodes representing TFs (i.e., regulators)
-# df.cor <- df.cor[order(df.cor$time_point), ]
-# tfs.timepoint <- df.cor$time_point
-# names(tfs.timepoint) <- df.cor$tfs
+df.cor <- df.cor[order(df.cor$time_point), ]
+tfs.timepoint <- df.cor$time_point
+names(tfs.timepoint) <- df.cor$tfs
 
-# # plot the graph, here we can highlight some genes
-# df.grn2 <- df.grn %>%
-#     subset(correlation > 0.4) %>%
-#     select(c(tf, gene, correlation)) %>%
-#     rename(weights = correlation)
+# plot the graph, here we can highlight some genes
+df.grn2 <- df.grn %>%
+    subset(correlation > 0.4) %>%
+    select(c(tf, gene, correlation)) %>%
+    rename(weights = correlation)
 
-# saveRDS(df.grn2,"./Fibroblast/final_grn.Rds")    
+saveRDS(df.grn2,"./Fibroblast/final_grn.Rds")    
 
-# p <- GRNPlot(df.grn2, 
-#              tfs.timepoint = tfs.timepoint,
-#              show.tf.labels = TRUE,
-#              seed = 42, 
-#              plot.importance = FALSE,
-#             min.importance = 2,
-#             remove.isolated = FALSE)
+p <- GRNPlot(df.grn2, 
+             tfs.timepoint = tfs.timepoint,
+             show.tf.labels = TRUE,
+             seed = 42, 
+             plot.importance = FALSE,
+            min.importance = 2,
+            remove.isolated = FALSE)
 
-# options(repr.plot.height = 20, repr.plot.width = 20)
+options(repr.plot.height = 20, repr.plot.width = 20)
 
-# print(p)
+print(p)
+
## Warning: Using alpha for a discrete variable is not advised.
+

GRN visualization @@ -778,143 +728,45 @@

GRN along the trajectory

Here we select two TFs for visualization.

-
-# obj <- AddTargetAssay(object = obj, df.grn = df.grn2)
-
-# p1 <- PseudotimePlot(object = obj, tf.use = "NR3C2")
-# p2 <- PseudotimePlot(object = obj, tf.use = "RUNX1")
-
-# p1 + p2
+
+obj <- AddTargetAssay(object = obj, df.grn = df.grn2)
+
## Warning: Layer counts isn't present in the assay object; returning NULL
+
+p1 <- PseudotimePlot(object = obj, tf.use = "NR3C2")
+
## Creating Trajectory Group Matrix..
+
## Some values are below 0, this could be the Motif activity matrix in which scaleTo should be set = NULL.
+## Continuing without depth normalization!
+
## Smoothing...
+
## Creating Trajectory Group Matrix..
+
## Smoothing...
+
## Creating Trajectory Group Matrix..
+
## Some values are below 0, this could be the Motif activity matrix in which scaleTo should be set = NULL.
+## Continuing without depth normalization!
+
## Smoothing...
+
+p2 <- PseudotimePlot(object = obj, tf.use = "RUNX1")
+
## Creating Trajectory Group Matrix..
+
## Some values are below 0, this could be the Motif activity matrix in which scaleTo should be set = NULL.
+## Continuing without depth normalization!
+
## Smoothing...
+
## Creating Trajectory Group Matrix..
+
## Smoothing...
+
## Creating Trajectory Group Matrix..
+
## Some values are below 0, this could be the Motif activity matrix in which scaleTo should be set = NULL.
+## Continuing without depth normalization!
+
## Smoothing...
+
+p1 + p2
+
## `geom_smooth()` using formula = 'y ~ x'
+
## `geom_smooth()` using formula = 'y ~ x'
+

The x-axis in above plots present pseudotime point along the trajectory, and the y-axis represent TF binding acitivty, TF expression, and TF target expression after z-score transformation.

-

-
-

GRN in space -

-

As an additional step, if the spatial transcriptome (ST) data (e.g., -generated by using 10X Visium assay) are available, we can then -visualize the target gene expression for some interesting TFs to -understand the regulon activity in spatial space. First let’s download -the ST data

-
-dir.create('./VisiumSpatial')
-
-
-download.file(url = 'https://costalab.ukaachen.de/open_data/scMEGA/VisiumSpatial/AKK004_157772.rds', 
-              destfile = './VisiumSpatial/AKK004_157772.rds', 
-              method = 'wget', extra = '--no-check-certificate')
-

Of note, each spot in the 10X Visium assay contains multiple cells, -meaning that we usually cannot identify cell types by clustering the -data. To address this issue, people developed various computation tools -to estimate cell-type-specific abundance in each spot. Here we have -perform such analysis using cell2location -based on our snRNA-seq data. Let’s first visualize the fibroblast -proportion.

-
-obj.spatial <- readRDS("./VisiumSpatial/AKK004_157772.rds")
-
-DefaultAssay(obj.spatial) <- "c2l_props"
-
-p <- SpatialFeaturePlot(obj.spatial, features = "Fib", min.cutoff = "q5",
-                       max.cutoff = "q95") +
-    scale_fill_viridis(option = "D") +
-    ggtitle("") +
-    labs(fill = "") +
-    theme(legend.position = "bottom")
-
-print(p)
-

We next can check the gene expression in space. Here, we use the -above TFs.

-
-DefaultAssay(obj.spatial) <- "SCT"
-
-p1 <- SpatialFeaturePlot(object = obj.spatial, features = "NR3C2") +
-    ggtitle("") +
-    theme(legend.position = "bottom") +
-scale_fill_viridis(option = "C")
-
-p2 <- SpatialFeaturePlot(object = obj.spatial, features = "RUNX1") +
-    ggtitle("") +
-    theme(legend.position = "bottom") +
-scale_fill_viridis(option = "C")
-
-p1 + p2
-

Then, we visualize the regulon activity of NR3C2 and RUNX1 by using -the GRN that we predicted. The major idea is that if the target genes of -a TF are up-regulated, then this TF should have high activity. This -principle has been used to develop computation methods to estimate TF -activity from gene expression data, such as DoRothEA.

-
-DefaultAssay(obj.spatial) <- "SCT"
-
-p1 <- GRNSpatialPlot(object = obj.spatial, assay = "SCT", df.grn = df.grn2,
-                   tf.use = "NR3C2", vis.option = "B") +
-    ggtitle("") +
-    theme(legend.position = "bottom") 
-
-p2 <- GRNSpatialPlot(object = obj.spatial, assay = "SCT", df.grn = df.grn2,
-                   tf.use = "RUNX1", vis.option = "B") +
-    ggtitle("") +
-    theme(legend.position = "bottom")
-
-p1 + p2
-

The above plots demonstrate a clear spatial-distributed pattern for -the regulon activity of NR3C2 and RUNX1.

Save data

-
+
 saveRDS(obj, "./Fibroblast/coembed.trajectory.rds")
-
-
-
-

Network analysis -

-

As an addition step, we next will explore the gene regulatory -networks generated by scMEGA in fibroblasts during myocardial infarction -by using network analysis methods.

-

This will allow us to

-
    -
  • explore regulators regarding their network topological -properties
  • -
  • to vizualise regulatory networks centred in selected transcription -factors.
  • -
-

We first load the network produced by scMEGA:

-
-dfgrn <- readRDS("./Fibroblast/final_grn.Rds")
-netobj <- graph_from_data_frame(dfgrn,directed = TRUE)
-V(netobj)$type <- ifelse(V(netobj)$name %in% dfgrn$tf,"TF/Gene","Gene")
-

Next, we will estimate distint network topological measures as node -degree, node centrality and node page rank score and plot these in a -principal component based embedding with a focus on transcription -factors. We plot bellow the 3 first PCs.

-
-

Network topological measures embedding: PC1 and PC2 -

-
-p <- TopEmbGRN(df.grn=netobj)
-
-
-

Network topological measures embedding: PC2 and PC3 -

-
-p <- TopEmbGRN(df.grn=netobj,axis=c(2,3))
-

This reveals that RUNX1 is a central TF in this network, as it has -high in/out-degree values and high mediator score.

-
-
-

RUNX1 centric network. -

-

To further inspect genes and TFs associated with RUNX1, we create a -centric layout embedding.

-
-NetCentPlot(netobj,"RUNX1")
-

This network reveals direct targets of RUNX1 such as TEAD4 and HIF1A -and indirect targets as TEAD1 and TEAD2. If you want, you can explore a -highlight parameter to personalize the plot by only showing the desired -genes/TFs. By default, the TFs with a betweenness score higher than an -average is depicted in the plot.

-
+
 # Check session information
 sessionInfo()
## R version 4.3.3 (2024-02-29)
@@ -1011,62 +863,63 @@ 

RUNX1 centric network.## [91] bit_4.0.5 pcaMethods_1.94.0 ## [93] fastmatch_1.1-4 codetools_0.2-20 ## [95] textshaping_0.3.7 TTR_0.24.4 -## [97] bslib_0.7.0 e1071_1.7-14 -## [99] destiny_3.16.0 GetoptLong_1.0.5 -## [101] ggplot.multistats_1.0.0 plotly_4.10.4 -## [103] mime_0.12 splines_4.3.3 -## [105] fastDummies_1.7.3 knitr_1.47 -## [107] blob_1.2.4 utf8_1.2.4 -## [109] clue_0.3-65 seqLogo_1.68.0 -## [111] fs_1.6.4 listenv_0.9.1 -## [113] ggsignif_0.6.4 tibble_3.2.1 -## [115] tzdb_0.4.0 tweenr_2.0.3 -## [117] pkgconfig_2.0.3 tools_4.3.3 -## [119] cachem_1.1.0 RhpcBLASctl_0.23-42 -## [121] RSQLite_2.3.7 viridisLite_0.4.2 -## [123] smoother_1.3 DBI_1.2.3 -## [125] fastmap_1.2.0 rmarkdown_2.27 -## [127] scales_1.3.0 ica_1.0-3 -## [129] Rsamtools_2.18.0 broom_1.0.6 -## [131] sass_0.4.9 FNN_1.1.4 -## [133] dotCall64_1.1-1 carData_3.0-5 -## [135] RANN_2.6.1 farver_2.1.2 -## [137] tidygraph_1.3.1 yaml_2.3.8 -## [139] ggthemes_5.1.0 cli_3.6.3 -## [141] purrr_1.0.2 motifmatchr_1.24.0 -## [143] leiden_0.4.3.1 lifecycle_1.0.4 -## [145] uwot_0.2.2 mvtnorm_1.2-5 -## [147] presto_1.0.0 backports_1.5.0 -## [149] BiocParallel_1.36.0 annotate_1.80.0 -## [151] rjson_0.2.21 ggridges_0.5.6 -## [153] progressr_0.14.0 parallel_4.3.3 -## [155] jsonlite_1.8.8 RcppHNSW_0.6.0 -## [157] bitops_1.0-7 bit64_4.0.5 -## [159] Rtsne_0.17 spatstat.utils_3.0-5 -## [161] nabor_0.5.0 ranger_0.16.0 -## [163] CNEr_1.38.0 jquerylib_0.1.4 -## [165] highr_0.11 knn.covertree_1.0 -## [167] R.utils_2.12.3 lazyeval_0.2.2 -## [169] shiny_1.8.1.1 htmltools_0.5.8.1 -## [171] GO.db_3.18.0 sctransform_0.4.1 -## [173] glue_1.7.0 factoextra_1.0.7 -## [175] TFMPvalue_0.0.9 spam_2.10-0 -## [177] VIM_6.2.2 RCurl_1.98-1.14 -## [179] mclust_6.1.1 ks_1.14.2 -## [181] boot_1.3-30 R6_2.5.1 -## [183] tidyr_1.3.1 SingleCellExperiment_1.24.0 -## [185] RcppRoll_0.3.0 vcd_1.4-12 -## [187] labeling_0.4.3 cluster_2.1.6 -## [189] Rhdf5lib_1.24.2 DirichletMultinomial_1.44.0 -## [191] DelayedArray_0.28.0 tidyselect_1.2.1 -## [193] vipor_0.4.7 ggforce_0.4.2 -## [195] car_3.1-2 AnnotationDbi_1.64.1 -## [197] future_1.33.2 munsell_0.5.1 -## [199] KernSmooth_2.23-24 laeken_0.5.3 -## [201] htmlwidgets_1.6.4 RColorBrewer_1.1-3 -## [203] rlang_1.1.4 spatstat.sparse_3.1-0 -## [205] spatstat.explore_3.2-7 fansi_1.0.6 -## [207] Cairo_1.6-2 beeswarm_0.4.0

+## [97] bslib_0.7.0 textshape_1.7.5 +## [99] e1071_1.7-14 destiny_3.16.0 +## [101] GetoptLong_1.0.5 ggplot.multistats_1.0.0 +## [103] plotly_4.10.4 mime_0.12 +## [105] splines_4.3.3 fastDummies_1.7.3 +## [107] knitr_1.47 blob_1.2.4 +## [109] utf8_1.2.4 clue_0.3-65 +## [111] seqLogo_1.68.0 fs_1.6.4 +## [113] listenv_0.9.1 ggsignif_0.6.4 +## [115] tibble_3.2.1 tzdb_0.4.0 +## [117] tweenr_2.0.3 pkgconfig_2.0.3 +## [119] tools_4.3.3 cachem_1.1.0 +## [121] RhpcBLASctl_0.23-42 RSQLite_2.3.7 +## [123] viridisLite_0.4.2 smoother_1.3 +## [125] DBI_1.2.3 fastmap_1.2.0 +## [127] rmarkdown_2.27 scales_1.3.0 +## [129] ica_1.0-3 Rsamtools_2.18.0 +## [131] broom_1.0.6 sass_0.4.9 +## [133] FNN_1.1.4 dotCall64_1.1-1 +## [135] carData_3.0-5 RANN_2.6.1 +## [137] farver_2.1.2 mgcv_1.9-1 +## [139] tidygraph_1.3.1 yaml_2.3.8 +## [141] ggthemes_5.1.0 cli_3.6.3 +## [143] purrr_1.0.2 motifmatchr_1.24.0 +## [145] leiden_0.4.3.1 lifecycle_1.0.4 +## [147] uwot_0.2.2 mvtnorm_1.2-5 +## [149] presto_1.0.0 backports_1.5.0 +## [151] BiocParallel_1.36.0 annotate_1.80.0 +## [153] rjson_0.2.21 ggridges_0.5.6 +## [155] progressr_0.14.0 parallel_4.3.3 +## [157] jsonlite_1.8.8 RcppHNSW_0.6.0 +## [159] bitops_1.0-7 bit64_4.0.5 +## [161] Rtsne_0.17 spatstat.utils_3.0-5 +## [163] nabor_0.5.0 ranger_0.16.0 +## [165] CNEr_1.38.0 jquerylib_0.1.4 +## [167] highr_0.11 knn.covertree_1.0 +## [169] R.utils_2.12.3 lazyeval_0.2.2 +## [171] shiny_1.8.1.1 htmltools_0.5.8.1 +## [173] GO.db_3.18.0 sctransform_0.4.1 +## [175] glue_1.7.0 factoextra_1.0.7 +## [177] TFMPvalue_0.0.9 spam_2.10-0 +## [179] VIM_6.2.2 RCurl_1.98-1.14 +## [181] mclust_6.1.1 ks_1.14.2 +## [183] boot_1.3-30 R6_2.5.1 +## [185] tidyr_1.3.1 SingleCellExperiment_1.24.0 +## [187] RcppRoll_0.3.0 vcd_1.4-12 +## [189] labeling_0.4.3 cluster_2.1.6 +## [191] Rhdf5lib_1.24.2 DirichletMultinomial_1.44.0 +## [193] DelayedArray_0.28.0 tidyselect_1.2.1 +## [195] vipor_0.4.7 ggforce_0.4.2 +## [197] car_3.1-2 AnnotationDbi_1.64.1 +## [199] future_1.33.2 munsell_0.5.1 +## [201] KernSmooth_2.23-24 laeken_0.5.3 +## [203] htmlwidgets_1.6.4 RColorBrewer_1.1-3 +## [205] rlang_1.1.4 spatstat.sparse_3.1-0 +## [207] spatstat.explore_3.2-7 fansi_1.0.6 +## [209] Cairo_1.6-2 beeswarm_0.4.0
diff --git a/docs/articles/myofibroblast-GRN_files/figure-html/viz_network-1.png b/docs/articles/myofibroblast-GRN_files/figure-html/viz_network-1.png index 5893b1f..5d77220 100644 Binary files a/docs/articles/myofibroblast-GRN_files/figure-html/viz_network-1.png and b/docs/articles/myofibroblast-GRN_files/figure-html/viz_network-1.png differ