scGPS introduction (original) (raw)

Here we look at one example use case to find relationship between clusters within one sample or between two sample

#run prediction for 3 clusters
cluster_mixedpop1 <- colData(mixedpop1)[,1]
cluster_mixedpop2 <- colData(mixedpop2)[,1]
#cluster_mixedpop2 <- as.numeric(as.vector(colData(mixedpop2)[,1]))

c_selectID <- 1
#top 200 gene markers distinguishing cluster 1
genes = DEgenes$id[1:200]

LSOLDA_dat1 <- bootstrap_prediction(nboots = 2, mixedpop1 = mixedpop2,
                        mixedpop2 = mixedpop2, genes=genes, c_selectID, 
                        listData =list(),
                        cluster_mixedpop1 = cluster_mixedpop2,
                        cluster_mixedpop2 = cluster_mixedpop2)

c_selectID <- 2
genes = DEgenes$id[1:200]

LSOLDA_dat2 <- bootstrap_prediction(nboots = 2,mixedpop1 = mixedpop2,
                        mixedpop2 = mixedpop2, genes=genes, c_selectID, 
                        listData =list(),
                        cluster_mixedpop1 = cluster_mixedpop2,
                        cluster_mixedpop2 = cluster_mixedpop2)

c_selectID <- 3
genes = DEgenes$id[1:200]
LSOLDA_dat3 <- bootstrap_prediction(nboots = 2,mixedpop1 = mixedpop2,
                        mixedpop2 = mixedpop2, genes=genes, c_selectID, 
                        listData =list(),
                        cluster_mixedpop1 = cluster_mixedpop2,
                        cluster_mixedpop2 = cluster_mixedpop2)

c_selectID <- 4
genes = DEgenes$id[1:200]
LSOLDA_dat4 <- bootstrap_prediction(nboots = 2,mixedpop1 = mixedpop2,
                        mixedpop2 = mixedpop2, genes=genes, c_selectID, 
                        listData =list(),
                        cluster_mixedpop1 = cluster_mixedpop2,
                        cluster_mixedpop2 = cluster_mixedpop2)


#prepare table input for sankey plot

LASSO_C1S2  <- reformat_LASSO(c_selectID=1, mp_selectID = 2,
                             LSOLDA_dat=LSOLDA_dat1,
                             nPredSubpop = length(unique(colData(mixedpop2)
                                [,1])),
                             Nodes_group ="#7570b3")

LASSO_C2S2  <- reformat_LASSO(c_selectID=2, mp_selectID =2,
                             LSOLDA_dat=LSOLDA_dat2,
                             nPredSubpop = length(unique(colData(mixedpop2)
                                [,1])),
                             Nodes_group ="#1b9e77")

LASSO_C3S2  <- reformat_LASSO(c_selectID=3, mp_selectID =2,
                             LSOLDA_dat=LSOLDA_dat3,
                             nPredSubpop = length(unique(colData(mixedpop2)
                                [,1])),
                             Nodes_group ="#e7298a")

LASSO_C4S2  <- reformat_LASSO(c_selectID=4, mp_selectID =2,
                             LSOLDA_dat=LSOLDA_dat4,
                             nPredSubpop = length(unique(colData(mixedpop2)
                                [,1])),
                             Nodes_group ="#00FFFF")

combined <- rbind(LASSO_C1S2,LASSO_C2S2,LASSO_C3S2, LASSO_C4S2 )
combined <- combined[is.na(combined$Value) != TRUE,]

nboots = 2
#links: source, target, value
#source: node, nodegroup
combined_D3obj <-list(Nodes=combined[,(nboots+3):(nboots+4)],
                     Links=combined[,c((nboots+2):(nboots+1),ncol(combined))])

library(networkD3)

Node_source <- as.vector(sort(unique(combined_D3obj$Links$Source)))
Node_target <- as.vector(sort(unique(combined_D3obj$Links$Target)))
Node_all <-unique(c(Node_source, Node_target))

#assign IDs for Source (start from 0)
Source <-combined_D3obj$Links$Source
Target <- combined_D3obj$Links$Target

for(i in 1:length(Node_all)){
   Source[Source==Node_all[i]] <-i-1
   Target[Target==Node_all[i]] <-i-1
}
# 
combined_D3obj$Links$Source <- as.numeric(Source)
combined_D3obj$Links$Target <- as.numeric(Target)
combined_D3obj$Links$LinkColor <- combined$NodeGroup

#prepare node info
node_df <-data.frame(Node=Node_all)
node_df$id <-as.numeric(c(0, 1:(length(Node_all)-1)))

suppressMessages(library(dplyr))
Color <- combined %>% count(Node, color=NodeGroup) %>% select(2)
node_df$color <- Color$color

suppressMessages(library(networkD3))
p1<-sankeyNetwork(Links =combined_D3obj$Links, Nodes = node_df,
                 Value = "Value", NodeGroup ="color", LinkGroup = "LinkColor", 
                 NodeID="Node", Source="Source", Target="Target", fontSize = 22)
p1

#saveNetwork(p1, file = paste0(path,'Subpopulation_Net.html'))