tissue_of_interest = "Liver"
library(here)
source("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/boilerplate.R")
#tiss = load_tissue_droplet(tissue_of_interest)
#library(scater)
library(dplyr)
library(Seurat)
library(cowplot)
#library(MAST)
require(stringr)
require(reshape2)
require(ggplot2)
require(MASS)
library(tools)
require(data.table)
library(ggfortify)
library(tidyverse)
require(dplyr)
library(miscTools)
library(caret)
library(Rtsne)
library(ggrepel)
G172
G172.umis <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/filtered_feature_bc_matrix")
#G172.umis.raw <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/raw_feature_bc_matrix/")
G172.htos <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/HASH_Citeseq/HASH_Results_filtered-10X-BC/umi_count", gene.column=1)
# Select cell barcodes detected by both RNA and HTO In the example datasets we have already
# filtered the cells for you, but perform this step for clarity.
joint.bcs <- intersect(colnames(G172.umis), colnames(G172.htos))
# Subset RNA and HTO counts by joint cell barcodes
joint_cite1_cite2 <- intersect(colnames(G172.htos), colnames(df2))
G172.umis <- G172.umis[, joint.bcs]
G172.htos <- as.matrix(G172.htos[, joint.bcs])
G172.htos <- G172.htos[-5,]
# Confirm that the HTO have the correct names
rownames(G172.htos)
# Setup Seurat object
G172.hashtag <- CreateSeuratObject(counts = G172.umis)
# Normalize RNA data with log normalization
G172.hashtag <- NormalizeData(G172.hashtag)
# Find and scale variable features
G172.hashtag <- FindVariableFeatures(G172.hashtag, selection.method = "mean.var.plot")
G172.hashtag <- ScaleData(G172.hashtag, features = VariableFeatures(G172.hashtag))
# Add HTO data as a new assay independent from RNA
G172.hashtag[["HTO"]] <- CreateAssayObject(counts = G172.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
G172.hashtag <- NormalizeData(G172.hashtag, assay = "HTO", normalization.method = "CLR")
# If you have a very large dataset we suggest using k_function = 'clara'. This is a k-medoid
# clustering function for large applications You can also play with additional parameters (see
# documentation for HTODemux()) to adjust the threshold for classification Here we are using the
# default settings
G172.hashtag <- HTODemux(G172.hashtag, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
# Global classification results
table(G172.hashtag$HTO_classification.global)
table(G172.hashtag$hash.ID)
# Group cells based on the max HTO signal
Idents(G172.hashtag) <- "hash.ID"
RidgePlot(G172.hashtag, assay = "HTO", features = rownames(G172.hashtag[["HTO"]])[1:4], ncol = 2, nrow=2)
#Idents(G172.hashtag) <- "HTO_classification.global"
# First, we will remove negative cells from the object
G172.hashtag.subset <- subset(G172.hashtag, idents =c("Negative","Doublet"), invert = TRUE)
## Feature scatterplot for hastags IDs ########
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M2-TGACGCCGTTGTTGT")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M3-GCCTAGTATGATCCA", feature2 = "M4-AGTCACAGTATTCCA")
# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = G172.hashtag.subset, assay = "HTO"))))
# Calculate tSNE embeddings with a distance matrix
G172.hashtag.subset <- RunTSNE(G172.hashtag.subset, distance.matrix = hto.dist.mtx, perplexity = 100)
DimPlot(G172.hashtag.subset)
HTOHeatmap(G172.hashtag, assay = "HTO", ncells = 26740)
########################## Rescue doublets ########################
G172.doublet <- subset(G172.hashtag, idents = "Doublet")
G172.doublet.rescue <- HTODemux(G172.doublet, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
Idents(G172.doublet.rescue) <- "hash.ID"
RidgePlot(G172.doublet.rescue, assay = "HTO", features = rownames(G172.doublet.rescue[["HTO"]])[1:4], ncol = 2, nrow=2)
###############################################################################
# Extract the singlets M1 #############3
G172.M1 <- subset(G172.hashtag, idents = "M1-ATGATGAACAGCCAG", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1$stim <- "G172M1"
DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- SCTransform(G172.M1,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1 <- NormalizeData(G172.M1, verbose = FALSE)
G172.M1 <- FindVariableFeatures(G172.M1, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1 <- ScaleData(G172.M1, features = VariableFeatures(G172.M1))
# Run PCA
G172.M1 <- RunPCA(G172.M1,npcs = 30, features = VariableFeatures(G172.M1))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1 <- RunUMAP(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindNeighbors(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindClusters(G172.M1, resolution = 0.6 )
G172.M1.p1<- UMAPPlot(G172.M1, reduction = "umap", label=TRUE, label.size=5)
G172.M1.p1
DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- NormalizeData(G172.M1, verbose = TRUE)
d1 <- DotPlot(G172.M1, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.p1,d1)
# Extract the singlet for M2 #############3
G172.M2 <- subset(G172.hashtag, idents = "M2-TGACGCCGTTGTTGT", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2$stim <- "G172M2"
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- SCTransform(G172.M2,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2 <- NormalizeData(G172.M2, verbose = FALSE)
G172.M2 <- FindVariableFeatures(G172.M2, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2 <- ScaleData(G172.M2, features = VariableFeatures(G172.M2))
# Run PCA
G172.M2 <- RunPCA(G172.M2,npcs = 30, features = VariableFeatures(G172.M2))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2 <- RunUMAP(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindNeighbors(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindClusters(G172.M2, resolution = 0.5 )
G172.M2.p1<- UMAPPlot(G172.M2, reduction = "umap", label=TRUE, label.size=5)
G172.M2.p1
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- NormalizeData(G172.M2, verbose = TRUE)
d2 <- DotPlot(G172.M2, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.p1,d2)
#### Extract the singlet for M3 #####
G172.M3 <- subset(G172.hashtag, idents = "M3-GCCTAGTATGATCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- SCTransform(G172.M3,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3 <- NormalizeData(G172.M3, verbose = FALSE)
G172.M3 <- FindVariableFeatures(G172.M3, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3 <- ScaleData(G172.M3, features = VariableFeatures(G172.M3))
# Run PCA
G172.M3 <- RunPCA(G172.M3,npcs = 30, features = VariableFeatures(G172.M3))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3 <- RunUMAP(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindNeighbors(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindClusters(G172.M3, resolution = 0.5 )
G172.M3.p1<- UMAPPlot(G172.M3, reduction = "umap", label=TRUE, label.size=5)
G172.M3.p1
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- NormalizeData(G172.M3, verbose = TRUE)
d3 <- DotPlot(G172.M3, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.p1,d3)
##### Extract the singlet for M4 #####
G172.M4 <- subset(G172.hashtag, idents = "M4-AGTCACAGTATTCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- SCTransform(G172.M4,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4 <- NormalizeData(G172.M4, verbose = FALSE)
G172.M4 <- FindVariableFeatures(G172.M4, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4 <- ScaleData(G172.M4, features = VariableFeatures(G172.M4))
# Run PCA
G172.M4 <- RunPCA(G172.M4,npcs = 30, features = VariableFeatures(G172.M4))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4 <- RunUMAP(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindNeighbors(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindClusters(G172.M4, resolution = 0.5 )
G172.M4.p1<- UMAPPlot(G172.M4, reduction = "umap", label=TRUE, label.size=5)
G172.M4.p1
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- NormalizeData(G172.M4, verbose = TRUE)
d4 <- DotPlot(G172.M4, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.p1,d4)
############## Combined ####################33
G172.M5 <- subset(G172.hashtag, idents = c("M1-ATGATGAACAGCCAG","M2-TGACGCCGTTGTTGT","M3-GCCTAGTATGATCCA","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- SCTransform(G172.M5,verbose =TRUE)
# Select the top 1000 most variable features
G172.M5 <- NormalizeData(G172.M5, verbose = FALSE)
G172.M5 <- FindVariableFeatures(G172.M5, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M5 <- ScaleData(G172.M5, features = VariableFeatures(G172.M5))
# Run PCA
G172.M5 <- RunPCA(G172.M5,npcs = 30, features = VariableFeatures(G172.M5))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M5 <- RunUMAP(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindNeighbors(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindClusters(G172.M5, resolution = 0.5 )
G172.M5.p1<- UMAPPlot(G172.M5, reduction = "umap", label=TRUE, label.size=5)
G172.M5.p1
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- NormalizeData(G172.M5, verbose = TRUE)
d5 <- DotPlot(G172.M5, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M5.p1,d5)
### control
G172.M6 <- subset(G172.hashtag.subset, idents = c("M1-ATGATGAACAGCCAG","M3-GCCTAGTATGATCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M6) <- "RNA"
#G172.M6 <- SCTransform(G172.M6,verbose =TRUE)
# Select the top 1000 most variable features
G172.M6 <- NormalizeData(G172.M6, verbose = FALSE)
G172.M6 <- FindVariableFeatures(G172.M6, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M6 <- ScaleData(G172.M6, features = VariableFeatures(G172.M6))
# Run PCA
G172.M6 <- RunPCA(G172.M6,npcs = 30, features = VariableFeatures(G172.M6))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M6 <- RunUMAP(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindNeighbors(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindClusters(G172.M6, resolution = 0.5 )
G172.M6.p1<- UMAPPlot(G172.M6, reduction = "umap", label=TRUE, label.size=5)
G172.M6.p1
DefaultAssay(G172.M6) <- "RNA"
G172.M6 <- NormalizeData(G172.M6, verbose = TRUE)
d6 <- DotPlot(G172.M6, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M6.p1,d6)
#### TCPO
G172.M7 <- subset(G172.hashtag, idents = c("M2-TGACGCCGTTGTTGT","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- SCTransform(G172.M7,verbose =TRUE)
# Select the top 1000 most variable features
G172.M7 <- NormalizeData(G172.M7, verbose = FALSE)
G172.M7 <- FindVariableFeatures(G172.M7, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M7 <- ScaleData(G172.M7, features = VariableFeatures(G172.M7))
# Run PCA
G172.M7 <- RunPCA(G172.M7,npcs = 30, features = VariableFeatures(G172.M7))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M7 <- RunUMAP(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindNeighbors(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindClusters(G172.M7, resolution = 0.5 )
G172.M7.p1<- UMAPPlot(G172.M7, reduction = "umap", label=TRUE, label.size=5)
G172.M7.p1
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- NormalizeData(G172.M7, verbose = TRUE)
d7 <- DotPlot(G172.M7, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M7.p1,d7, nrow=2)
########## function load_tissue_droplet############
droplet_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(droplet_metadata)[1] <- "channel"
tissue_metadata = filter(droplet_metadata, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]
raw.data <- Read10X("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/Liver-10X_P4_2/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata$channel[1],'_',x))
meta.data1 = data.frame(row.names = colnames(raw.data))
meta.data1['channel'] = tissue_metadata$channel[1]
if (length(tissue_metadata$channel) > 1){
# Some tissues, like Thymus and Heart had only one channel
for(i in 2:nrow(tissue_metadata)){
subfolder = paste0("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/",tissue_of_interest, '-', tissue_metadata$channel[i])
new.data1 <- Read10X(data.dir = subfolder)
colnames(new.data1) <- lapply(colnames(new.data1), function(x) paste0(tissue_metadata$channel[i],'_', x))
new.metadata1 = data.frame(row.names = colnames(new.data1))
new.metadata1['channel'] = tissue_metadata$channel[i]
raw.data = cbind(raw.data, new.data1)
meta.data1 = rbind(meta.data1, new.metadata1)
}
}
rnames = row.names(meta.data1)
meta.data1 <- merge(meta.data1, tissue_metadata, sort = F)
row.names(meta.data1) <- rnames
# Order the cells alphabetically to ensure consistency.
ordered_cell_names = order(colnames(raw.data))
raw.data = raw.data[,ordered_cell_names]
meta.data1 = meta.data1[ordered_cell_names,]
# Find ERCC's, compute the percent ERCC, and drop them from the raw data.
erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
raw.data <- raw.data[-ercc.index,]
# Create the Seurat object with all the data
droplet <- CreateSeuratObject(raw.data) # dropseq
droplet <- AddMetaData(object = droplet, meta.data1)
droplet@meta.data$tech <- "droplet"
#n.pcs = 10
#droplet <- SubsetData(droplet,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000)) # old version of seurat
droplet <- subset(droplet, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
droplet <- NormalizeData(droplet, verbose = FALSE)
droplet <- FindVariableFeatures(droplet, selection.method = "vst", nfeatures = 2000)
droplet <- ScaleData(droplet, verbose = FALSE)
#droplet <- RunPCA(droplet, npcs = 10, verbose = FALSE)
droplet$stim <- "droplet"
droplet$cond <- "ctrl"
# droplet <- ScaleData(droplet, verbose = FALSE)
droplet <- RunPCA(droplet, npcs = 30, verbose = FALSE)
droplet <- RunUMAP(droplet, reduction = "pca", dims = 1:25)
droplet <- FindNeighbors(droplet, reduction = "pca", dims = 1:10)
droplet <- FindClusters(droplet, resolution = 0.5 )
p1<- UMAPPlot(droplet, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
p2 <- UMAPPlot(droplet, label=TRUE, label.size=6)
p3<- UMAPPlot(droplet, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)
res.used <- 1
#droplet <- FindClusters(object = droplet, reduction.type = "pca", dims.use = 1:n.pcs, resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
droplet <- RunTSNE(object = droplet, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = droplet, do.label = T, pt.size = 1.2, label.size = 4)
droplet <- RenameIdents(droplet, `0` = "Hep-Mid-M", `1` = "Hep-PC-F", `2` = "Hep-PP-F",`3` = "Hep-PP-M", `4` = "Hep-Mid-M", `5` = "Hep-PC-M", `6` = "Hep-Mid-F", `7` = "Hep-Mid-F", `8` = "Endo-F", `9` = "Bileduct-F")
### this is tranformation option for develpment SCtransform program #######
droplet <- SCTransform(droplet,verbose =TRUE)
######3 Smartseq #########################
plate_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Liver_facs_annotation.csv", sep=",", header = TRUE)
colnames(plate_metadata)[1] <- "plate.barcode"
raw.data = read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/liver_facs_scrna_data.csv", sep=",", row.names=1)
colnames(plate_metadata)[1] <- "plate.barcode"
plate.barcodes = lapply(colnames(raw.data), function(x) strsplit(strsplit(x, "_")[[1]][1], '.', fixed=TRUE)[[1]][2])
barcode.df = t.data.frame(as.data.frame(plate.barcodes))
rownames(barcode.df) = colnames(raw.data)
barcode.df= cbind(barcode.df, colnames(raw.data))
colnames(barcode.df) = c('plate.barcode1', 'plate.barcode')
rnames = row.names(barcode.df)
meta.data <- merge(barcode.df, plate_metadata, by='plate.barcode', sort = F)
row.names(meta.data) <- rnames
# Sort cells by cell name
meta.data = meta.data[order(rownames(meta.data)), ]
raw.data = raw.data[,rownames(meta.data)]
# Create the Seurat object with all the data
smartseq <- CreateSeuratObject(raw.data)
smartseq <- AddMetaData(object = smartseq, meta.data)
#smartseq@meta.data$tech <- "smartseq"
smartseq$stim <- "smartseq"
smartseq$cond <- "ctrl"
#smartseq <- SubsetData(smartseq,subset.names = c("nGene", "nUMI"),low.thresholds = c(500, 1000)) #old version of seurat
smartseq<- subset(smartseq, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#smartseq <- SCTransform(smartseq)
smartseq <- NormalizeData(smartseq, verbose = FALSE)
smartseq <- FindVariableFeatures(smartseq, selection.method = "vst", nfeatures = 2000)
smartseq <- ScaleData(smartseq, verbose = FALSE)
smartseq <- RunPCA(smartseq, npcs = 30, verbose = FALSE)
smartseq <- RunUMAP(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindNeighbors(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindClusters(smartseq, resolution = 0.5 )
#p4<- UMAPPlot(smartseq, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
p5 <- UMAPPlot(smartseq, label=TRUE, label.size=6)
p6<- UMAPPlot(smartseq, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)
########## sctransform ###################3
smartseq <- SCTransform(smartseq)
############################### Integration Regular workflow ############################
anchors <- FindIntegrationAnchors(object.list = c(droplet.list, smartseq.list,G171B.list, G172.M1.list, G172.M2.list), dims = 1:50, anchor.features = 3000)
combined <- IntegrateData(anchorset = anchors, dims = 1:50)
DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
# t-SNE and Clustering
combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20)
combined <- FindClusters(combined, resolution = 0.5 )
#combined <- RunTSNE(combined, reduction = "pca", dims = 1:20)
# Visualization
p1 <- DimPlot(combined, reduction = "umap", group.by = "stim")
p2 <- DimPlot(combined, reduction = "umap", group.by = "mouse.sex")
p3 <- DimPlot(combined, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(combined, label=TRUE)
plot_grid(p2, p3,p4)
DimPlot(combined, reduction = "umap", split.by = "stim")
p5 <- DimPlot(combined, reduction = "tsne", group.by = "stim")
p6 <- DimPlot(combined, reduction = "tsne", group.by = "mouse.sex")
p7 <- DimPlot(combined, reduction = "tsne", label = TRUE)
p8 <- TSNEPlot(combined, label =T)
plot_grid(p6, p7,p8)
DimPlot(combined, reduction = "tsne", split.by = "stim")
#################################### Refernce based Integration ################################
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list <- c(droplet.list, smartseq.list)
for (i in names(ctrl.list)) {
ctrl.list[[i]] <- SCTransform(ctrl.list[[i]], verbose =TRUE)
}
ctrl.list1 <- c(ctrl.list, G171B.list)
#dims = 1:50
ref.features <- SelectIntegrationFeatures(object.list = ctrl.list1, dims=1:50, anchor.features = 3000)
ref.list <- PrepSCTIntegration(object.list = ctrl.list1, anchor.features = ref.features)
ref.anchors <- FindIntegrationAnchors(object.list = ref.list, normalization.method = "SCT", anchor.features = ref.features, reference = c(1,2,3))
ref.integrated <- IntegrateData(anchorset = ref.anchors, normalization.method = "SCT")
DefaultAssay(ref.integrated) <- "integrated"
ref.integrated <- ScaleData(ref.integrated, verbose = FALSE)
ref.integrated <- RunPCA(object = ref.integrated, npcs=30,verbose = FALSE)
ref.integrated <- RunUMAP(object = ref.integrated ,dims = 1:30, seed.use = 10, perplexity=30)
ref.integrated <- FindNeighbors(ref.integrated, reduction = "pca", dims = 1:30)
ref.integrated <- FindClusters(ref.integrated, resolution = 0.2 )
r1 <- DimPlot(ref.integrated, split.by="stim", pt.size = 0.5)
r2 <- DimPlot(ref.integrated, label=TRUE, pt.size = 0.5)
r3 <- DimPlot(ref.integrated, split.by = c("cond"), pt.size = 0.5)
plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4,
byrow = TRUE, override.aes = list(size = 2.5))))
CombinePlots(plots)
ref.integrated$celltype.stim <- paste(Idents(ref.integrated), ref.integrated$stim, sep = "_")
ref.integrated$seurat_clusters <- Idents(ref.integrated)
Idents(ref.integrated) <- "celltype.stim"
fp <- FeaturePlot(ref.integrated, features = "ncRNA-as-chr10-8460", cols = c("lightgrey","darkred"), order = TRUE, shape.by = "stim", pt.size = 2)
DefaultAssay(ref.integrated) <-"RNA"
ref.integrated <- NormalizeData(ref.integrated)
ref.integrated <- ScaleData(ref.integrated)
dot1 <- DotPlot(ref.integrated, features= all_genes)+RotatedAxis()
d1 <- DoHeatmap(ref.integrated, features = c(all_genes,"Mup20","Xist"), group.by = "seurat_clusters", assay= 'RNA')
#d1 <- DoHeatmap(ref.integrated, features = highTPM, group.by = "celltype.stim", assay= 'RNA',raster = F, disp.max = 0.5)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)
Drop.combined.markers <- FindAllMarkers(object = ref.integrated, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
############ merge G172 M1 #############33
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list.2 <- c( G172.M1.list, G172.M2.list)
for (i in names(ctrl.list.2)) {
ctrl.list.2[[i]] <- SCTransform(ctrl.list.2[[i]], verbose =TRUE)
}
ref.features.1 <- SelectIntegrationFeatures(object.list = ctrl.list.2, dims = 1:50, anchor.features = 3000)
ref.list.1 <- PrepSCTIntegration(object.list = ctrl.list.2, anchor.features = ref.features.1)
ref.anchors.1 <- FindIntegrationAnchors(object.list = ref.list.1, normalization.method = "SCT",
anchor.features = ref.features.1, reference = c(1,2))
ref.integrated.1 <- IntegrateData(anchorset = ref.anchors.1, normalization.method = "SCT")
DefaultAssay(ref.integrated.1) <- "integrated"
ref.integrated.1 <- ScaleData(ref.integrated.1, verbose = FALSE)
ref.integrated.1 <- RunPCA(object = ref.integrated.1, npcs=30,verbose = FALSE)
ref.integrated.1 <- RunUMAP(object = ref.integrated.1, dims = 1:30, seed.use = 10 , perplexity=30)
#ref.integrated.1 <- RunTSNE(object = ref.integrated.1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
ref.integrated.1 <- FindNeighbors(ref.integrated.1, reduction = "pca", dims = 1:30)
ref.integrated.1 <- FindClusters(ref.integrated.1, resolution = 0.2)
rG172M1.1 <- UMAPPlot(ref.integrated.1, split.by="stim", label=T)
rG172M1.2 <- UMAPPlot(ref.integrated.1, label=TRUE, combine = FALSE)
rG172M1.3 <- UMAPPlot(ref.integrated.1, split.by = c("mouse.sex"))
rG172M1.4 <- TSNEPlot(ref.integrated.1, split.by="stim")
DefaultAssay(ref.integrated.1) <-"RNA"
ref.integrated.1 <- NormalizeData(ref.integrated.1)
ref.integrated.1 <- ScaleData(ref.integrated.1)
DoHeatmap(ref.integrated.1, features = c(all_genes,'Sox9','Mup20'), group.by = "seurat_clusters", assay= 'RNA' ,raster = F)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)
ref.integrated.1$celltype <- RenameIdents(ref.integrated.1, c(`0` = "PP", `1` = "PC", `2` = "Endo",`3` = "HSC", `4` = "Kupffer", `5` = "Dividing", `6` = "Immune", `7` = "B-NK", `8` = "NA"))
ref.integrated.1.markers <- FindAllMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
write.csv(ref.integrated.1.markers, "ref_integrated_G172M1-M2_Allmarker")
ref.integrated.1$celltype.stim <- paste(Idents(ref.integrated.1), ref.integrated.1$stim, sep = "_")
ref.integrated.1$celltype <- Idents(ref.integrated.1)
Idents(ref.integrated.1) <- "celltype.stim"
DoHeatmap(ref.integrated.1, features = TPM4, group.by = "seurat_clusters", assay= 'RNA' )
ref.integrated.1.markers <- FindMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
G172M1 and G172 M2 DE analysis
DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1" ,"1_G172M1", "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1", "8_G172M1"), ident.2 = c("0_G172M2" ,"1_G172M2", "2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2", "8_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE_All_M1_M2 <- DE1.1
DE_hep_NPC <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M2" ,"1_G172M2"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE_NPC_M1_M2 <- FindMarkers(ref.integrated.1, ident.1 = c( "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE_Drop <- FindAllMarkers(ref.integrated, features = )
DE0.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1"), ident.2 = c("0_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
write.csv(DE0.1, "marker/DE0.1_G172_0_M1_M2")
DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("1_G172M1"), ident.2 = c("1_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE2.1 <- FindMarkers(ref.integrated.1, ident.1 = c("2_G172M1"), ident.2 = c("2_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE3.1 <- FindMarkers(ref.integrated.1, ident.1 = c("3_G172M1"), ident.2 = c("3_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE4.1 <- FindMarkers(ref.integrated.1, ident.1 = c("4_G172M1"), ident.2 = c("4_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE5.1 <- FindMarkers(ref.integrated.1, ident.1 = c("5_G172M1"), ident.2 = c("5_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE6.1 <- FindMarkers(ref.integrated.1, ident.1 = c("6_G172M1"), ident.2 = c("6_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE7.1 <- FindMarkers(ref.integrated.1, ident.1 = c("7_G172M1"), ident.2 = c("7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
################# G171 TCPO expsosed ########
G171B_metadata_G171B <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/G171_metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(G171B_metadata_G171B)[1] <- "channel"
tissue_metadata_G171B = filter(G171B_metadata_G171B, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]
raw.data <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/Transcript_Refined/Liver-10X_G171B/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata_G171B$channel[1],'_',x))
meta.data1 = data.frame(row.names = colnames(raw.data))
meta.data1['channel'] = tissue_metadata_G171B$channel[1]
rnames = row.names(meta.data1)
meta.data1 <- merge(meta.data1, tissue_metadata_G171B, sort = F)
row.names(meta.data1) <- rnames
# Order the cells alphabetically to ensure consistency.
ordered_cell_names = order(colnames(raw.data))
raw.data = raw.data[,ordered_cell_names]
meta.data1 = meta.data1[ordered_cell_names,]
# Find ERCC's, compute the percent ERCC, and drop them from the raw data.
erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
raw.data <- raw.data[-ercc.index,]
ncRNA.genes <- grep(pattern = "^ncRNA", x = rownames(x = raw.data), value = TRUE)
percent.ncRNA <- Matrix::colSums(raw.data[ncRNA.genes, ])/Matrix::colSums(raw.data)
KRAB.genes <- grep(pattern = "^KRAB", x = rownames(x = raw.data), value = TRUE)
cherry.genes <- grep(pattern = "^mcherry", x = rownames(x = raw.data), value = TRUE)
# Create the Seurat object with all the data
G171B <- CreateSeuratObject(raw.data) # dropseq
G171B <- AddMetaData(object = G171B, meta.data1)
G171B@meta.data$tech <- "G171B"
#G171B <- SubsetData(G171B,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000)) # old version of seurat
#G171B <- subset(G171B, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G171B <- NormalizeData(G171B, verbose = FALSE)
G171B <- FindVariableFeatures(G171B, selection.method = "vst", nfeatures = 2000)
G171B$stim <- "G171B"
G171B$cond <- "TCPO"
### this is tranformation option for develpment SCtransform program #######
G171B <- SCTransform(G171B,verbose =TRUE)
diffusion plt code
# Before running MDS, we first calculate a distance matrix between all pairs of cells. Here we
# use a simple euclidean distance metric on all genes, using scale.data as input
d <- dist(t(GetAssayData(combined, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
combined[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(combined))
# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(combined, reduction = "mds", pt.size = 0.5)
genes_hep_main =c('Alb', 'Ttr', 'Apoa1', 'Serpina1c')
genes_hep = c('Alb', 'Ttr', 'Apoa1', 'Serpina1c',
'Cyp2e1', 'Glul', 'Oat', 'Gulo',
'Ass1', 'Hamp', 'Gstp1', 'Ubb',
'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')
genes_endo = c('Pecam1', 'Nrp1', 'Kdr','Oit3')
genes_kuppfer = c( 'Clec4f', 'Cd68')
genes_nk = c('Il2rb', 'Nkg7', 'Cxcr6', 'Gzma')
genes_b = c('Cd79a', 'Cd79b')
genes_bec = c('Epcam', 'Krt19', 'Krt7')
genes_immune = 'Ptprc'
HSC = c("Dcn","Lama1","Nes")
Dividing = "Top2a"
Bplasma= "Jchain"
Mac= "Csf1r"
Chol="Sox9"
sex <- c("Cyp2d9", "ncRNA-inter_chrX-15394")
all_genes = c(genes_hep, genes_endo, genes_kuppfer, Mac,genes_nk, genes_b, genes_bec, genes_immune, HSC, Dividing)
genes_bec_b_immune = c(genes_bec,genes_b,genes_immune)
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
'Ass1', 'Hamp', 'Gstp1', 'Ubb',
'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')
receptor_KO <- c("ncRNA-inter-chr7-5998","Cyp2b10","Nr1i2","Nr1i3","Ppara","Pparg","Ppargc1b","Ppard")
cell <- c("Stab2","Csf1r","Cd3g","Ebf1","Irf8","Sox9","Apoc3","Top2a","Dcn")
TPM4<- c('ncRNA-inter-chr7-6524',
'ncRNA-inter-chr19-14853',
'ncRNA-inter-chr6-5675',
'ncRNA-inter-chr4-3468',
'ncRNA-inter-chr9-8122',
'ncRNA-inter-chr12-10476',
'ncRNA-inter-chr17-14026',
'ncRNA-as-chr2-1457',
'ncRNA-as-chr19-14883',
'ncRNA-as-chr10-8460',
'ncRNA-as-chr5-4325',
'ncRNA-inter-chr7-5998',
'ncRNA-as-chr9-7843',
'ncRNA-as-chr9-8142',
'ncRNA-inter-chr11-9925',
'ncRNA-inter-chr19-14873',
'ncRNA-as-chr9-8172',
'ncRNA-inter-chr4-3779',
'ncRNA-inter-chr3-2504',
'ncRNA-as-chr19-15054',
'ncRNA-inter-chr8-7423',
'ncRNA-as-chr7-6302',
'ncRNA-inter-chr10-9418',
'ncRNA-inter-chr12-10454',
'ncRNA-as-chr7-5999',
'ncRNA-as-chr6-5335',
'ncRNA-inter-chr19-14987',
'ncRNA-inter-chr16-13170',
'ncRNA-inter-chr3-2988',
'ncRNA-inter-chr8-7430',
'ncRNA-inter-chr3-2168',
'ncRNA-inter-chr9-7874',
'ncRNA-inter-chr4-3778',
'ncRNA-inter-chr2-2011',
'ncRNA-inter-chr5-4335',
'ncRNA-inter-chr9-8301',
'ncRNA-inter-chr16-13510',
'ncRNA-as-chr9-8401',
'ncRNA-as-chr16-13512',
'ncRNA-as-chr12-10896',
'ncRNA-as-chr8-7359',
'ncRNA-as-chr5-4744',
'ncRNA-inter-chr5-4499',
'ncRNA-inter-chr16-13509',
'ncRNA-inter-chr17-13692',
'ncRNA-as-chr17-13834',
'ncRNA-inter-chr9-8147',
'ncRNA-inter-chr10-8697',
'ncRNA-inter-chr6-5551',
'ncRNA-as-chr9-8317',
'ncRNA-inter-chr7-6509',
'ncRNA-as-chr19-14977',
'ncRNA-inter-chr6-5318',
'ncRNA-inter-chr5-4578',
'ncRNA-inter-chr12-10509',
'ncRNA-as-chr10-9015',
'ncRNA-inter-chr3-2411',
'ncRNA-inter-chr9-7875',
'ncRNA-inter-chr5-4336',
'ncRNA-inter-chr12-10910',
'ncRNA-as-chr1-782',
'ncRNA-inter-chr17-13924',
'ncRNA-intra-chr7-5920',
'ncRNA-inter-chr16-13225',
'ncRNA-inter-chr3-2269',
'ncRNA-inter-chr14-12016',
'ncRNA-as-chr4-3800',
'ncRNA-as-chr5-4655',
'ncRNA-inter-chr9-7989',
'ncRNA-intra-chr5-4728',
'ncRNA-inter-chrX-15248',
'ncRNA-inter-chr10-8767',
'ncRNA-inter-chr19-14717',
'ncRNA-inter-chr8-6766',
'ncRNA-inter-chr13-11122',
'ncRNA-inter-chr7-6074',
'ncRNA-inter-chr15-12439',
'ncRNA-as-chr11-9787',
'ncRNA-inter-chr2-1827',
'ncRNA-as-chr19-14976',
'ncRNA-inter-chr19-14947',
'ncRNA-inter-chr6-5248',
'ncRNA-inter-chr2-1098',
'ncRNA-inter-chr8-7180',
'ncRNA-inter-chr9-8118',
'ncRNA-inter-chr14-12199',
'ncRNA-as-chr6-5336',
'ncRNA-inter-chr4-3867',
'ncRNA-inter-chr10-9000',
'ncRNA-inter-chr14-12290',
'ncRNA-inter-chr2-1491',
'ncRNA-inter-chr15-12606',
'ncRNA-inter-chr10-9222',
'ncRNA-inter-chr5-4322',
'ncRNA-inter-chr12-10942',
'ncRNA-inter-chr18-14690',
'ncRNA-inter-chr2-1471',
'ncRNA-inter-chr3-2410',
'ncRNA-inter-chr19-14880',
'ncRNA-inter-chr13-11074',
'ncRNA-inter-chr9-8056',
'ncRNA-inter-chr5-4654',
'ncRNA-inter-chr3-2166',
'ncRNA-inter-chr8-6944',
'ncRNA-as-chr7-6065',
'ncRNA-inter-chr8-6896',
'ncRNA-inter-chr2-1963',
'ncRNA-inter-chr4-3425',
'ncRNA-inter-chr13-11385',
'ncRNA-inter-chr9-7885',
'ncRNA-as-chr10-9411',
'ncRNA-as-chr9-8419',
'ncRNA-as-chr12-10618',
'ncRNA-inter-chr7-6390',
'ncRNA-inter-chr17-14151',
'ncRNA-as-chr13-11787',
'ncRNA-as-chr2-1965',
'ncRNA-inter-chr6-5721',
'ncRNA-inter-chr6-5822',
'ncRNA-inter-chr11-9635',
'ncRNA-inter-chr11-9965',
'ncRNA-inter-chr4-3052',
'ncRNA-inter-chr17-13857',
'ncRNA-inter-chr13-11201',
'ncRNA-inter-chr6-5249',
'ncRNA-inter-chr19-14851',
'ncRNA-inter-chr6-5638',
'ncRNA-inter-chr17-14130',
'ncRNA-inter-chr9-7993',
'ncRNA-inter-chr18-14656',
'ncRNA-inter-chr9-7992',
'ncRNA-inter-chr1-566',
'ncRNA-inter-chr12-10715',
'ncRNA-inter-chr17-14102',
'ncRNA-inter-chr4-3010',
'ncRNA-inter-chr3-2764',
'ncRNA-inter-chr4-3306',
'ncRNA-inter-chr19-14979',
'ncRNA-inter-chr1-570',
'ncRNA-inter-chr19-14790',
'ncRNA-inter-chr4-3282',
'ncRNA-inter-chr5-4777',
'ncRNA-inter-chr8-7605',
'ncRNA-inter-chr9-8000',
'ncRNA-as-chr15-12920',
'ncRNA-as-chr1-369',
'ncRNA-inter-chr19-14952',
'ncRNA-as-chr9-8393',
'ncRNA-as-chr14-12074',
'ncRNA-inter-chr12-10415',
'ncRNA-inter-chr7-6559',
'ncRNA-inter-chr1-630',
'ncRNA-inter-chr4-3142',
'ncRNA-inter-chr6-5131',
'ncRNA-inter-chr16-13050',
'ncRNA-inter-chr7-6411',
'ncRNA-inter-chr5-4746',
'ncRNA-inter-chr1-633',
'ncRNA-inter-chr10-8461',
'ncRNA-inter-chr2-2016',
'ncRNA-inter-chr6-5137',
'ncRNA-as-chr4-3300',
'ncRNA-inter-chr4-3009',
'ncRNA-inter-chr2-1923',
'ncRNA-inter-chr1-670',
'ncRNA-inter-chr15-12835',
'ncRNA-inter-chr18-14655',
'ncRNA-inter-chr16-13211',
'ncRNA-as-chr7-6050',
'ncRNA-inter-chr7-6508',
'ncRNA-inter-chr11-10206',
'ncRNA-inter-chr13-11399',
'ncRNA-inter-chr8-6757',
'ncRNA-inter-chr1-129',
'ncRNA-inter-chr12-10459',
'ncRNA-inter-chr7-6709',
'ncRNA-inter-chr12-10713',
'ncRNA-inter-chr7-6523',
'ncRNA-inter-chr2-2017',
'ncRNA-inter-chr7-6343',
'ncRNA-inter-chr1-63',
'ncRNA-as-chr3-2936',
'ncRNA-inter-chr18-14691',
'ncRNA-inter-chr7-6097',
'ncRNA-inter-chr6-5723')
DefaultAssay(droplet) <- "RNA"
#droplet <- NormalizeData(combined, verbose = TRUE, normalization.method = "RC", scale.factor = 1e6)
combined <- NormalizeData(combined, verbose = TRUE)
DotPlot(combined, features = all_genes)
FeaturePlot(combined, features = genes_hep_main, min.cutoff = "q9")
#hepatocytes
subtissplot <- DotPlot(combined, features = c(genes_hep_main, genes_endo, genes_bec_b_immune, genes_kuppfer, genes_nk))
PC <- DotPlot(combined, features = c(genes_hep_main,genes_zones))
NPC <- DotPlot(combined, features = c(genes_endo,genes_kuppfer, genes_nk))
all <- DotPlot(combined, features=c(all_genes))
### coexpression plots####
f1 <- FeaturePlot(KO.cells, features = c('Cyp2b10','ncRNA-inter-chr7-5998'), reduction = "mds", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 0.5)
########## this is exact averaging formula ###############33
x <- (AverageExpression(KO.cells, verbose = TRUE, assays = "RNA" ,slot="counts")$RNA)
x["ncRNA-inter-chr7-5998",]
# G171B G171C
#ncRNA-inter-chr7-5998 1.871795 1.091463
########
#Idents(combined) <- factor(Idents(combined), levels = c(0,1,12))
markers.to.plot <- c("Alb","ncRNA-inter-chr7-5998")
DotPlot(combined, features = rev(markers.to.plot), cols = c("blue", "red"), dot.scale = 8,
split.by = "stim") + RotatedAxis()
FeaturePlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9","KRAB","AAV8-mCherry"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red"))
######################### vlnplot ##########################
plots <- VlnPlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)
plots <- VlnPlot(combined, features = c("Lhx4","Dtna","Fam189a1","Galnt16","Kalrn"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)
#endothelial
DotPlot(combined, features = genes_endo)
#zones
zones <- DotPlot(combined, features = genes_zones)
f1 <- FeaturePlot(combined, features = c('Cyp2e1','Cyp2f2','Ass1'), min.cutoff = "q9", reduction = "tsne")
DimPlot(combined, label = TRUE)
save(combined, file="Seurat_smart-drop_integrated.Robj")
################# save raw counts from cluster #####################
Idents(combined) <- "stim"
### to avergae out the matrix from KO cells
combined.raw.data.0.1 <- as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 0,idents = "stim")])
combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])
combined.raw.data.2 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])
#combined.raw.data.[i] <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 1)])
#combined.raw.data.12 <-as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 12)])
#combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts"))
x <- AverageExpression(test.combined,assays = "RNA",add.ident = "stim", slot = "data",use.scale = FALSE, use.counts = FALSE)$RNA
#}######## CAR data
avg.combined.cells <- (AverageExpression(combined, verbose = FALSE)$RNA)
avg.combined.cells$gene <- rownames(avg.combined.cells)
CAR_FP <- FeaturePlot(combined, features = c('Cyp2b10','Nr1i3'), reduction = "umap", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 1, min.cutoff = 0, pt.size = 0.5, repel = TRUE)
CAR_DOT_NR <- DotPlot(combined, features = 'Nr1i3', col.min = 0)
Cyp2b10_FP <- FeaturePlot(combined, features = 'Cyp2b10', reduction = "umap", min.cutoff = 0)
########### tSNE #################################
combined <- NormalizeData(object = combined)
combined <- FindVariableFeatures(combined, selection.method = "vst", nfeatures = 2000)
G172_TPM_count <- ref.integrated.1
G172_TPM_count <- NormalizeData(G172_TPM_count,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
TPMcount0M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_G172M1')]))
TPMcount1M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_G172M1')]))
TPMcount2M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_G172M1')]))
TPMcount3M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_G172M1')]))
TPMcount4M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_G172M1')]))
TPMcount5M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_G172M1')]))
TPMcount6M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_G172M1')]))
TPMcount7M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_G172M1')]))
TPMcount8M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '8_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '8_G172M1')]))
TPMcount0M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_G172M2')]))
TPMcount1M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_G172M2')]))
TPMcount2M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_G172M2')]))
TPMcount3M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_G172M2')]))
TPMcount4M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_G172M2')]))
TPMcount5M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_G172M2')]))
TPMcount6M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_G172M2')]))
TPMcount7M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_G172M2')]))
TPMcount8M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '8_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '8_G172M2')]))
write.csv(TPMcount0M1, "CountResult/counts_TPMcount0M1")
write.csv(TPMcount1M1, "CountResult/counts_TPMcount1M1")
write.csv(TPMcount2M1, "CountResult/counts_TPMcount2M1")
write.csv(TPMcount3M1, "CountResult/counts_TPMcount3M1")
write.csv(TPMcount4M1, "CountResult/counts_TPMcount4M1")
write.csv(TPMcount5M1, "CountResult/counts_TPMcount5M1")
write.csv(TPMcount6M1, "CountResult/counts_TPMcount6M1")
write.csv(TPMcount7M1, "CountResult/counts_TPMcount7M1")
write.csv(TPMcount8M1, "CountResult/counts_TPMcount8M1")
write.csv(TPMcount0M2, "CountResult/counts_TPMcount0M2")
write.csv(TPMcount1M2, "CountResult/counts_TPMcount1M2")
write.csv(TPMcount2M2, "CountResult/counts_TPMcount2M2")
write.csv(TPMcount3M2, "CountResult/counts_TPMcount3M2")
write.csv(TPMcount4M2, "CountResult/counts_TPMcount4M2")
write.csv(TPMcount5M2, "CountResult/counts_TPMcount5M2")
write.csv(TPMcount6M2, "CountResult/counts_TPMcount6M2")
write.csv(TPMcount7M2, "CountResult/counts_TPMcount7M2")
write.csv(TPMcount8M2, "CountResult/counts_TPMcount8M2")
#avg.KO.cells <- log1p(AverageExpression(KO.cells, verbose = FALSE)$RNA) #original code log transformed
avg.KO.cells <- (AverageExpression(KO.cells, verbose = FALSE)$RNA)
avg.KO.cells$gene <- rownames(avg.KO.cells)
genes.to.label= ("ncRNA-inter-chr7-5998")
#genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
p1 <- ggplot(avg.KO.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE)
plot_grid(p1, p2)
KO.cells <- RunUMAP(KO.cells, reduction = "pca", dims = 1:20 )
KO.cells <- FindNeighbors(KO.cells, reduction = "pca", dims = 1:20)
KO.cells <- FindClusters(KO.cells, resolution = 0.5 )
KO.cells <- RunTSNE(KO.cells, reduction = "pca", dims = 1:20)
Hep.cells <- RunUMAP(Hep.cells, reduction = "pca", dims = 1:20 )
Hep.cells <- FindNeighbors(Hep.cells, reduction = "pca", dims = 1:20)
Hep.cells <- FindClusters(Hep.cells, resolution = 0.5 )
Hep.cells <- RunTSNE(Hep.cells, reduction = "pca", dims = 1:20)
# Visualization
p1 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(KO.cells, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(KO.cells, label=TRUE)
plot_grid(p1,p4)
DimPlot(KO.cells, reduction = "umap", split.by = "stim")
#hepatocyte cells
p5 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "stim")
p6 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "mouse.sex")
p7 <- UMAPPlot(Hep.cells, reduction = "umap", label = TRUE)
p8 <- UMAPPlot(Hep.cells, label=TRUE)
plot_grid(p5,p8)
DimPlot(KO.cells, reduction = "umap", split.by = "stim")
raw.data.KO.0 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 0)])
raw.data.KO.1 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 1)])
raw.data.KO.2 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 2)])
raw.data.KO.3 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 3)])
raw.data.KO.4 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 4)])
raw.data.KO.5 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 5)])
write.csv(raw.data.KO.0, "CountResult/Markers/raw.data.KO.0")
write.csv(raw.data.KO.1, "CountResult/Markers/raw.data.KO.1")
write.csv(raw.data.KO.2, "CountResult/Markers/raw.data.KO.2")
write.csv(raw.data.KO.3, "CountResult/Markers/raw.data.KO.3")
write.csv(raw.data.KO.4, "CountResult/Markers/raw.data.KO.4")
write.csv(raw.data.KO.5, "CountResult/Markers/raw.data.KO.5")
f1 <- FeaturePlot(KO.cells, features = c('ncRNA-inter-chr7-5998'), reduction = "umap", split.by = "stim")
plot_grid(f1,p1,p4)
DefaultAssay(KO.cells) <- "RNA"
KO.cells <- NormalizeData(KO.cells, verbose = FALSE)
plots <- VlnPlot(KO.cells, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)
Three_five_six <- subset(KO.cells, idents = c("5","6"))
Three_five_six <- RunUMAP(Three_five_six, reduction = "pca", dims = 1:20 )
Three_five_six <- FindNeighbors(Three_five_six, reduction = "pca", dims = 1:20)
Three_five_six <- FindClusters(Three_five_six, resolution = 1 )
Three_five_six <- RunTSNE(Three_five_six, reduction = "pca", dims = 1:20)
p1 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(Three_five_six, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(Three_five_six, label=TRUE)
plot_grid(p1,p4)
DimPlot(Three_five_six, reduction = "umap", split.by = "stim")
f1 <- FeaturePlot(Three_five_six, features = c('ncRNA-inter-chr7-5998'), reduction = "umap", split.by = "stim")
lnc5998 <- subset(combined, cells = lnc5998.cells, idents = "1")
lnc5998 <- RunUMAP(lnc5998, reduction = "pca", dims = 1:20 )
lnc5998 <- FindNeighbors(lnc5998, reduction = "pca", dims = 1:20)
lnc5998 <- FindClusters(lnc5998, resolution = 1 )
lnc5998 <- RunTSNE(lnc5998, reduction = "pca", dims = 1:20)
# Visualization
p1 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(lnc5998, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(lnc5998, label=TRUE)
plot_grid(p1,p4)
DimPlot(lnc5998, reduction = "umap", split.by = "stim")
lnc5998.cells <- WhichCells(object = combined, expression = "ncRNA-inter-chr7-5998" > 1)
FeaturePlot(lnc5998, features = c("ncRNA-inter-chr7-5998"), split.by = "stim",
+ cols = c("grey", "red"), cells = lnc5998.cells,min.cutoff = 0.5)
DefaultAssay(lnc5998) <- "RNA"
lnc5998 <- NormalizeData(lnc5998, verbose = FALSE)
plots <- VlnPlot(lnc5998, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)
d <- dist(t(GetAssayData(KO.cells, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
KO.cells[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(KO.cells))
# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(KO.cells, reduction = "mds", pt.size = 0.5)
Find differential markers
KO.cells$celltype.stim <- paste(Idents(KO.cells), KO.cells$stim, sep = "_")
KO.cells$celltype <- Idents(KO.cells)
Idents(KO.cells) <- "celltype.stim"
response3 <- FindMarkers(KO.cells, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)
#DE1: Compare cluster 0+1+12 (that expressed lnc5998) with Other hepatocyte clusters (2+5+8)
#DE2: compare cluster 1 (showed major effects in the KD) vs Cluster 0 (that showed little KD)
#DE3: For KO.cells that formed five subcluster, compare clusters 3+4+5+1 vs 2+0
#DE4: for KO.cells that formed five clusters. Comapre cluster 4 vs. 2
DE0112.258 <- FindMarkers(combined, ident.1 = c("0","1","12" ), ident.2 = c("2","5","8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE0112.All <- FindMarkers(combined, ident.1 = c("0","1","12" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.2 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.5 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.8 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE2.5 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE2.8 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE5.8 <- FindMarkers(combined, ident.1 = c("5" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4351.20 <- FindMarkers(KO.cells, ident.1 = c("3","4","5","1" ), ident.2 = c("2","0"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.3 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "3",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.2 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "2",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.0 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "0",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.1 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "1",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.5 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "5",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
##### comaprison between G171B vs G171C for KO.cell clusters of 0+1+12 ######33
DEK4_C.B <- FindMarkers(KO.cells, ident.1 = "4_G171C", ident.2 = "4_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK3_C.B <- FindMarkers(KO.cells, ident.1 = "3_G171C", ident.2 = "3_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK1_C.B <- FindMarkers(KO.cells, ident.1 = "1_G171C", ident.2 = "1_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK2_C.B <- FindMarkers(KO.cells, ident.1 = "2_G171C", ident.2 = "2_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK0_C.B <- FindMarkers(KO.cells, ident.1 = "0_G171C", ident.2 = "0_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK5_C.B <- FindMarkers(KO.cells, ident.1 = "5_G171C", ident.2 = "5_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
################################### Write the results #########################
write.csv(DE0112.258, "CountResult/Markers/DE0112.258")
write.csv(DE0112.All, "CountResult/Markers/DE0112.All")
write.csv(DE1.2, "CountResult/Markers/DE1.2")
write.csv(DE1.5, "CountResult/Markers/DE1.5")
write.csv(DE1.8, "CountResult/Markers/DE1.8")
write.csv(DE2.5, "CountResult/Markers/DE2.5")
write.csv(DE2.8, "CountResult/Markers/DE2.8")
write.csv(DE5.8, "CountResult/Markers/DE5.8")
write.csv(DEK4351.20, "CountResult/Markers/DEK4351.20")
write.csv(DEK4.3, "CountResult/Markers/DEK4.3")
write.csv(DEK4.2, "CountResult/Markers/DEK4.2")
write.csv(DEK4.0, "CountResult/Markers/DEK4.0")
write.csv(DEK4.1, "CountResult/Markers/DEK4.1")
write.csv(DEK4.5, "CountResult/Markers/DEK4.5")
write.csv(DEK4_C.B, "CountResult/Markers/DEK4_C.B")
write.csv(DEK3_C.B, "CountResult/Markers/DEK3_C.B")
write.csv(DEK1_C.B, "CountResult/Markers/DEK1_C.B")
write.csv(DEK2_C.B, "CountResult/Markers/DEK2_C.B")
write.csv(DEK0_C.B, "CountResult/Markers/DEK0_C.B")
write.csv(DEK5_C.B, "CountResult/Markers/DEK5_C.B")
combined$celltype.stim <- paste(Idents(combined), combined$stim, sep = "_")
combined$celltype <- Idents(combined)
Idents(combined) <- "celltype.stim"
test.combined$celltype.stim <- paste(Idents(test.combined), test.combined$stim, sep = "_")
test.combined$celltype <- Idents(test.combined)
Idents(test.combined) <- "celltype.stim"
Combined_G171B_vs_G171C <- FindMarkers(combined, ident.1 = c("0_G171B","1_G171B","2_G171B","3_G171B","4_G171B","5_G171B","6_G171B","7_G171B","8_G171B","9_G171B","10_G171B","11_G171B","12_G171B" ), ident.2 = c("0_G171C","1_G171C","2_G171C","3_G171C","4_G171C","5_G171C","6_G171C","7_G171C","8_G171C","9_G171C","10_G171C","11_G171C","12_G171C" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust1 <- FindMarkers(combined, ident.1 = c("1_G171C"), ident.2 = c("1_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust0 <- FindMarkers(combined, ident.1 = c("0_G171C"), ident.2 = c("0_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust12 <- FindMarkers(combined, ident.1 = c("12_G171C"), ident.2 = c("12_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust2 <- FindMarkers(combined, ident.1 = c("2_G171C"), ident.2 = c("2_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust5 <- FindMarkers(combined, ident.1 = c("5_G171C"), ident.2 = c("5_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust8 <- FindMarkers(combined, ident.1 = c("8_G171C"), ident.2 = c("8_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust3 <- FindMarkers(combined, ident.1 = c("3_G171C"), ident.2 = c("3_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust4 <- FindMarkers(combined, ident.1 = c("4_G171C"), ident.2 = c("4_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust6 <- FindMarkers(combined, ident.1 = c("6_G171C"), ident.2 = c("6_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust7 <- FindMarkers(combined, ident.1 = c("7_G171C"), ident.2 = c("7_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust9 <- FindMarkers(combined, ident.1 = c("9_G171C"), ident.2 = c("9_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust10 <- FindMarkers(combined, ident.1 = c("10_G171C"), ident.2 = c("10_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust11 <- FindMarkers(combined, ident.1 = c("11_G171C"), ident.2 = c("11_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
write.csv(Combined_G171C_vs_G171B_Clust1, "CountResult/Markers/Combined_G171C_vs_G171B_Clust1")
write.csv(Combined_G171C_vs_G171B_Clust0, "CountResult/Markers/Combined_G171C_vs_G171B_Clust0")
write.csv(Combined_G171C_vs_G171B_Clust12, "CountResult/Markers/Combined_G171C_vs_G171B_Clust12")
write.csv(Combined_G171C_vs_G171B_Clust2, "CountResult/Markers/Combined_G171C_vs_G171B_Clust2")
write.csv(Combined_G171C_vs_G171B_Clust5, "CountResult/Markers/Combined_G171C_vs_G171B_Clust5")
write.csv(Combined_G171C_vs_G171B_Clust8, "CountResult/Markers/Combined_G171C_vs_G171B_Clust8")
write.csv(Combined_G171C_vs_G171B_Clust3, "CountResult/Markers/Combined_G171C_vs_G171B_Clust3")
write.csv(Combined_G171C_vs_G171B_Clust4, "CountResult/Markers/Combined_G171C_vs_G171B_Clust4")
write.csv(Combined_G171C_vs_G171B_Clust6, "CountResult/Markers/Combined_G171C_vs_G171B_Clust6")
write.csv(Combined_G171C_vs_G171B_Clust7, "CountResult/Markers/Combined_G171C_vs_G171B_Clust7")
write.csv(Combined_G171C_vs_G171B_Clust9, "CountResult/Markers/Combined_G171C_vs_G171B_Clust9")
write.csv(Combined_G171C_vs_G171B_Clust10, "CountResult/Markers/Combined_G171C_vs_G171B_Clust10")
write.csv(Combined_G171C_vs_G171B_Clust11, "CountResult/Markers/Combined_G171C_vs_G171B_Clust11")
PC_vs_PP_G171B <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B"), ident.2 = c("0_G171B","2_G171B"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171B, n = 15)
PC_vs_PP_G171C <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C"), ident.2 = c("0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)
PC_vs_PP_G171BC <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","4_G171C","3_G171C"), ident.2 = c("0_G171B","2_G171B","0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)
lnc5998_KO_DE_1 <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","5_G171B"), ident.2 = c("4_G171C","3_G171C","5_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)
lnc5998_KO_DE_2 <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C","5_G171C"), ident.2 =c("4_G171B","3_G171B","5_G171B") , verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)
cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = cell.type.genes, bg.genes = rownames(KO.cells@assays$RNA@dataKO.cells@assays$RNA@data), organism = "Mouse")
cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = rownames(cell.type.genes), bg.genes = rownames(KO.cells@assays$RNA@data), organism = "Mouse")
AvergeExpression2 <- function (object, assays = NULL, features = NULL, return.seurat = FALSE,
add.ident = NULL, slot = "data", use.scale = FALSE, use.counts = FALSE,
verbose = TRUE, ...)
{
fxn.average <- switch(EXPR = slot, data = function(x) {
return(mean(x = x))
}, mean)
object.assays <- FilterObjects(object = object, classes.keep = "Assay")
assays <- assays %||% object.assays
ident.orig <- Idents(object = object)
orig.levels <- levels(x = Idents(object = object))
ident.new <- c()
if (!all(assays %in% object.assays)) {
assays <- assays[assays %in% object.assays]
if (length(assays) == 0) {
stop("None of the requested assays are present in the object")
}
else {
warning("Requested assays that do not exist in object. Proceeding with existing assays only.")
}
}
if (!is.null(x = add.ident)) {
new.data <- FetchData(object = object, vars = add.ident)
new.ident <- paste(Idents(object)[rownames(x = new.data)],
new.data[, 1], sep = "_")
Idents(object, cells = rownames(new.data)) <- new.ident
}
data.return <- list()
for (i in 1:length(x = assays)) {
data.use <- GetAssayData(object = object, assay = assays[i],
slot = slot)
features.assay <- features
if (length(x = intersect(x = features, y = rownames(x = data.use))) <
1) {
features.assay <- rownames(x = data.use)
}
data.all <- data.frame(row.names = features.assay)
for (j in levels(x = Idents(object))) {
temp.cells <- WhichCells(object = object, idents = j)
features.assay <- unique(x = intersect(x = features.assay,
y = rownames(x = data.use)))
if (length(x = temp.cells) == 1) {
data.temp <- (data.use[features.assay, temp.cells])
if (slot == "data") {
data.temp <- data.temp
}
}
if (length(x = temp.cells) > 1) {
data.temp <- apply(X = data.use[features.assay,
temp.cells, drop = FALSE], MARGIN = 1, FUN = fxn.average)
}
data.all <- cbind(data.all, data.temp)
colnames(x = data.all)[ncol(x = data.all)] <- j
if (verbose) {
message(paste("Finished averaging", assays[i],
"for cluster", j))
}
if (i == 1) {
ident.new <- c(ident.new, as.character(x = ident.orig[temp.cells[1]]))
}
}
names(x = ident.new) <- levels(x = Idents(object))
data.return[[i]] <- data.all
names(x = data.return)[i] <- assays[[i]]
}
if (return.seurat) {
toRet <- CreateSeuratObject(counts = data.return[[1]],
project = "Average", assay = names(x = data.return)[1],
...)
if (length(x = data.return) > 1) {
for (i in 2:length(x = data.return)) {
toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]])
}
}
if (DefaultAssay(object = object) %in% names(x = data.return)) {
DefaultAssay(object = toRet) <- DefaultAssay(object = object)
}
Idents(toRet, cells = colnames(x = toRet)) <- ident.new[colnames(x = toRet)]
Idents(object = toRet) <- factor(x = Idents(object = toRet),
levels = as.character(x = orig.levels), ordered = TRUE)
toRet <- NormalizeData(object = toRet, verbose = verbose)
toRet <- ScaleData(object = toRet, verbose = verbose)
return(toRet)
}
else {
return(data.return)
}
}
Find differential expression markers
combined.markers <- FindAllMarkers(object = combined, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
combined.markers %>% group_by(cluster) %>% top_n(2, avg_logFC)
KO.markers <- FindAllMarkers(object = KO.cells, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
response3 <- FindMarkers(combined, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)
Visualize top genes in principal components
Later on (in FindClusters and TSNE) you will pick a number of principal components to use. This has the effect of keeping the major directions of variation in the data and, ideally, supressing noise. There is no correct answer to the number to use, but a decent rule of thumb is to go until the plot plateaus.
PCElbowPlot(object = tiss1)
Choose the number of principal components to use.
# Set number of principal components.
n.pcs = 10
The clustering is performed based on a nearest neighbors graph. Cells that have similar expression will be joined together. The Louvain algorithm looks for groups of cells with high modularity–more connections within the group than between groups. The resolution parameter determines the scale. Higher resolution will give more clusters, lower resolution will give fewer.
For the top-level clustering, aim to under-cluster instead of over-cluster. It will be easy to subset groups and further analyze them below.
# Set resolution
res.used <- 4
tiss1 <- FindClusters(object = tiss1, reduction.type = "pca", dims.use = 1:n.pcs,
resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
We use TSNE solely to visualize the data.
# If cells are too spread out, you can raise the perplexity. If you have few cells, try a lower perplexity (but never less than 10).
tiss1 <- RunTSNE(object = tiss1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = tiss1, do.label = T, pt.size = 1.2, label.size = 4)
Compare to previous annotations
previous_annotation = read.csv("/Users/kkarri/Documents/Lab/Single_cell_project/dropseq/Liver_droplet_annotation.csv", stringsAsFactors = FALSE)
cols = c('free_annotation', 'cell_ontology_class')
for (col in cols){
previous_col = paste0('previous_', col)
tiss1@meta.data[, previous_col] <- "NA"
tiss1@meta.data[as.character(previous_annotation$X), previous_col] <- previous_annotation[, col]
print(table(tiss1@meta.data[, previous_col]))
print(table(tiss1@meta.data[, previous_col], tiss@ident))
}
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss@ident)
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss1@ident)
TSNEPlot(tiss1, group.by="mouse.sex")
TSNEPlot(tiss1, group.by="mouse.id")
Significant genes:
hepatocyte: Alb, Ttr, Apoa1, and Serpina1c pericentral: Cyp2e1, Glul, Oat, Gulo midlobular: Ass1, Hamp, Gstp1, Ubb periportal: Cyp2f2, Pck1, Hal, Cdh1
endothelial cells: Pecam1, Nrp1, Kdr+ and Oit3+ Kuppfer cells: Emr1, Clec4f, Cd68, Irf7 NK/NKT cells: Zap70, Il2rb, Nkg7, Cxcr6, Klr1c, Gzma B cells: Cd79a, Cd79b, Cd74 and Cd19 Immune cells: Ptprc
Dotplots let you see the intensity of exppression and the fraction of cells expressing for each of your genes of interest. The radius shows you the percent of cells in that cluster with at least one read sequenced from that gene. The color level indicates the average Z-score of gene expression for cells in that cluster, where the scaling is done over taken over all cells in the sample.
We have various immune cell types in the last cluster
Using the markers above, we can confidentaly label many of the clusters:
19: endothelial cells 20: bile duct epithelial cells 21: immune cells rest are hepatocytes
We will add those cell_ontology_classes to the dataset.
tiss1 <- StashIdent(object = tiss1, save.name = "cluster.ids")
cluster.ids <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
free_annotation <- c(
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
"bile duct epithelial cells",
"endothelial cell of hepatic sinusoid",
NA
)
cell_ontology_class <- c(
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"hepatocyte",
"duct epithelial cell",
"endothelial cell of hepatic sinusoid",
"hepatocyte")
tiss1 = stash_annotations(tiss1, cluster.ids, free_annotation, cell_ontology_class)
Checking for batch effects
Color by metadata, like plate barcode, to check for batch effects.
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "channel")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "free_annotation")
Subcluster
Let’s drill down on the hepatocytes.
subtiss1 = SubsetData(tiss1, ident.use = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20))
subtiss1 <- subtiss1 %>% ScaleData() %>%
FindVariableGenes(do.plot = FALSE, x.high.cutoff = Inf, y.cutoff = 0.5) %>%
RunPCA(do.print = FALSE)
PCHeatmap(object = subtiss1, pc.use = 1:3, cells.use = 20, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
PCElbowPlot(subtiss1)
sub.n.pcs = 8
sub.res.use = 0.5
subtiss1 <- subtiss1 %>% FindClusters(reduction.type = "pca", dims.use = 1:sub.n.pcs,
resolution = sub.res.use, print.output = 0, save.SNN = TRUE, force.recalc = TRUE) %>%
RunTSNE(dims.use = 1:sub.n.pcs, seed.use = 10, perplexity=8)
TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4)
BuildClusterTree(subtiss1)
From these genes, it appears that the clusters represent:
0: midlobular male 1: pericentral female 2: periportal female 3: periportal male 4: midlobular male 5: pericentral male 6: midlobular female 7: midlobular female
The multitude of clusters of each type correspond mostly to individual animals/sexes.
table(FetchData(subtiss1, c('mouse.sex','ident')) %>% droplevels())
sub.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6, 7)
sub.free_annotation <- c("periportal female", "midlobular male", "pericentral female", "periportal male", "midlobular male", "pericentral male", "midlobular female", "midlobular female")
sub.cell_ontology_class <- c("hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte")
subtiss1 = stash_annotations(subtiss1, sub.cluster.ids, sub.free_annotation, sub.cell_ontology_class)
tiss1 = stash_subtiss_in_tiss(tiss1, subtiss1)
Liver zonation markers
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
'Ass1', 'Hamp', 'Gstp1', 'Ubb',
'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')
FeaturePlot(subtiss1,c(genes_zones),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(subtiss1,c(genes_zones), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")
TSNEPlot(object = tiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")
Find cluster markers for lncRNAs
MIN_LOGFOLD_CHANGE = 1 # set to minimum required average log fold change in gene expression.
MIN_PCT_CELLS_EXPR_GENE = 0.1
all.markers = FindAllMarkers(tiss1,
min.pct = MIN_PCT_CELLS_EXPR_GENE,
logfc.threshold = MIN_LOGFOLD_CHANGE,
only.pos = TRUE,
test.use="bimod") # likelihood ratio test
lnc_all_markers <- grep(pattern = "^ncRNA", x= rownames(all.markers), value = TRUE)
lnc_all_markers
#[1] "ncRNA_inter_chr10_92081" "ncRNA_intra_chr16_13383" "ncRNA_inter_chr17_13605" "ncRNA_inter_chr14_11815"
#[5] "ncRNA_inter_chr18_14344"
FeaturePlot(subtiss1,c(lnc_all_markers),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
######################### lncRNA markers- CELL TYPE MARKER ############
markers.hep <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
lnc_markers_hep <- grep(pattern = "^ncRNA", x= rownames(markers.hep), value = TRUE)
lnc_markers_hep
FeaturePlot(tiss1,c(lnc_markers_hep),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_hep, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
#[1] "ncRNA_as_chr11_9423" "ncRNA_as_chr7_6166" "ncRNA_inter_chr4_3295" "ncRNA_inter_chr17_14026"
#[5] "ncRNA_inter_chr3_2915" "ncRNA_inter_chr5_4547" "ncRNA_inter_chr15_12684"
markers.hep.MAST <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, test.use = "MAST")
lnc_markers_hep_MAST_TABLE <- subset(markers.hep.MAST, grepl("^ncRNA", rownames(markers.hep.MAST)))
lnc_markers_hep_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.hep.MAST), value = TRUE)
lnc_markers_hep_MAST
markers.endo <- FindMarkers(object = tiss1, ident.1 = c(18,19), only.pos = TRUE, min.pct = 0.25, thresh.use = 0.5)
lnc_markers_endo <- grep(pattern = "^ncRNA", x= rownames(markers.endo), value = TRUE)
lnc_markers_endo
FeaturePlot(tiss1,c(lnc_markers_endo),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_endo, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
#"ncRNA_inter_chr15_12770", "ncRNA_inter_chr12_10817", "ncRNA_as_chr13_11451",
markers.endo.MAST <- FindMarkers(object = tiss1, ident.1 = 19, test.use = "MAST" ,only.pos = TRUE)
lnc_markers_endo_MAST_TABLE <- subset(markers.endo.MAST, grepl("^ncRNA", rownames(markers.endo.MAST)))
lnc_markers_endo_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.endo.MAST), value = TRUE)
lnc_markers_endo_MAST
################## lncRNA expression ########################3
# "ncRNA_inter_chr17_13605" , "ncRNA_intra_chr16_13383"
########## Periporal markers- zonation markers ############
markers.pc <- FindMarkers(object = subtiss1, ident.1 = c(2,5),
only.pos = FALSE, min.pct = 0.001, thresh.use = 0.001, test.use = "bimod" )
markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc <- subset(markers.pc, grepl("^ncRNA", rownames(markers.pc)))
lnc_markers_pc <- grep(pattern = "^ncRNA", x= rownames(markers.pc), value = TRUE)
lnc_markers_pc
markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc_MAST <- subset(markers.pc.MAST, grepl("^ncRNA", rownames(markers.pc.MAST)))
lnc_markers_pc_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pc.MAST), value = TRUE)
lnc_markers_pc_MAST
DotPlot(tiss1, lnc_markers_pc, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()
FeaturePlot(subtiss1,c(lnc_markers_pc),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
############################### midlobular genes #############
markers.mid <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7),
only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)
lnc_markers_mid <- subset(markers.mid, grepl("^ncRNA", rownames(markers.mid)))
lnc_markers_mid <- grep(pattern = "^ncRNA", x= rownames(markers.mid), value = TRUE)
lnc_markers_mid
DotPlot(tiss1, lnc_markers_mid, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()
FeaturePlot(subtiss1,c(lnc_markers_mid),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
markers.mid.MAST <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7),test.use = "MAST",only.pos = TRUE )
lnc_markers_mid_MAST_TABLE <- subset(markers.mid.MAST, grepl("^ncRNA", rownames(markers.mid.MAST)))
lnc_markers_mid_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.mid.MAST), value = TRUE)
lnc_markers_mid_MAST
#####3 periportalmarker genes############3
markers.pp <- FindMarkers(object = subtiss1, ident.1 = c(0,3),
only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)
lnc_markers_pp <- subset(markers.pp, grepl("^ncRNA", rownames(markers.pp)))
lnc_markers_pp <- grep(pattern = "^ncRNA", x= rownames(markers.pp), value = TRUE)
lnc_markers_pp
markers.pp.MAST <- FindMarkers(object = subtiss1, ident.1 = c(0,3), ident.2 = c(2,5),test.use = "MAST",only.pos = TRUE )
lnc_markers_pp_MAST_TABLE <- subset(markers.pp.MAST, grepl("^ncRNA", rownames(markers.pp.MAST)))
lnc_markers_pp_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pp.MAST), value = TRUE)
lnc_markers_pp_MAST
FeaturePlot(subtiss1,c(lnc_markers_pp),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_pp,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()
################## amle and female specific ############################
markers.female <- FindMarkers(object = subtiss1, ident.1 = c(0,2,6,7),
only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1)
lnc_markers_female <- subset(markers.female, grepl("^ncRNA", rownames(markers.female)))
lnc_markers_female <- grep(pattern = "^ncRNA", x= rownames(markers.female), value = TRUE)
lnc_markers_female
FeaturePlot(subtiss1,c(lnc_markers_female),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_female,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()
markers.male <- FindMarkers(object = subtiss1, ident.1 = c(1,3,4,5),
only.pos = TRUE, min.pct = 0.001, thresh.use = 0.05)
lnc_markers_male <- subset(markers.male, grepl("^ncRNA", rownames(markers.male)))
lnc_markers_male <- grep(pattern = "^ncRNA", x= rownames(markers.male), value = TRUE)
lnc_markers_male
FeaturePlot(subtiss1,c(lnc_markers_male),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_male,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()
############################ Female zonate specific genes ###################################
markers.pericentral.female <- FindMarkers(object = tiss1, ident.1 = c(6,11,14,20), test.use = "MAST",
only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,12,13,8,5,16), logfc.threshold = 1)
markers.periportal.female <- FindMarkers(object = tiss1, ident.1 = c(2,3,15),
only.pos = TRUE, min.pct = 0.1, ident.2 = c(6,11,14,20,12,13,8,5,16), logfc.threshold = 1)
markers.pericentral.male <- FindMarkers(object = tiss1, ident.1 = c(13,12), test.use = "MAST",
only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,8,5,16,6,11,14,20), logfc.threshold = 1)
markers.periportal.male <- FindMarkers(object = tiss1, ident.1 = c(8,5,16), test.use = "MAST",
only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,13,12,6,11,14,20), logfc.threshold = 1)
############### xeno-lncs CAR?RXR ##################
FeaturePlot(tiss1,c("ncRNA_inter_chr15_12684","ncRNA_inter_chr8_7430","ncRNA_inter_chr7_6222"),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
#######################################################################
markers.endo.2 <- FindMarkers(object = seurat_drop, logfc.threshold = 2,ident.1 = "Endothelial",
only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
lnc.endo.2 <- grep(pattern = "^ncRNA", x= rownames(markers.endo.2), value = TRUE)
lnc.endo.2
Zonated lncRNAs
pp_zontaed <- c('ncRNA_inter_chr14_12016','ncRNA_as_chr19_15090','ncRNA_inter_chr10_9351','ncRNA_inter_chr16_13170',
'ncRNA_inter_chr3_2697','ncRNA_inter_chr1_274','ncRNA_as_chr6_5518','ncRNA_inter_chr14_12066','ncRNA_intra_chr12_10871',
'ncRNA_inter_chr16_13510','ncRNA_inter_chr3_2314','ncRNA_inter_chr10_9264,'ncRNA_inter_chr9_8122')
Checking for batch effects
Color by metadata, like plate barcode, to check for batch effects.
TSNEPlot(object = subtiss1, do.return = TRUE, group.by = "mouse.sex")
Final coloring
Color by cell ontology class on the original TSNE.
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "cell_ontology_class")
Save the Robject for later
filename = here('00_data_ingest', '04_tiss1ue_robj_generated',
paste0("droplet_", tiss1ue_of_interest, "refinedcells_seurat_tiss1.Robj"))
print(filename)
save(tiss1, file=filename)
# To reload a saved object
filename = here('00_data_ingest', '04_tiss1ue_robj_generated',
paste0("droplet_", tissue_of_interest, "seurat_smartdrop-integrated-8272019.Robj"))
load(file=filename)
---
 title: "Liver Droplet- Raw data Notebook"
 output: html_notebook
---




```{r}
tissue_of_interest = "Liver"
library(here)
source("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/boilerplate.R")
#tiss = load_tissue_droplet(tissue_of_interest)
#library(scater)
library(dplyr)
library(Seurat)
library(cowplot)
#library(MAST)
require(stringr)
require(reshape2)
require(ggplot2)
require(MASS)
library(tools)
require(data.table)
library(ggfortify)
library(tidyverse)
require(dplyr)
library(miscTools)
library(caret)
library(Rtsne)
library(ggrepel)

```

G172

```{r}

G172.umis <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/filtered_feature_bc_matrix")

#G172.umis.raw <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/raw_feature_bc_matrix/")

G172.htos <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/HASH_Citeseq/HASH_Results_filtered-10X-BC/umi_count", gene.column=1)

# Select cell barcodes detected by both RNA and HTO In the example datasets we have already
# filtered the cells for you, but perform this step for clarity.
joint.bcs <- intersect(colnames(G172.umis), colnames(G172.htos))
# Subset RNA and HTO counts by joint cell barcodes
joint_cite1_cite2 <- intersect(colnames(G172.htos), colnames(df2))
G172.umis <- G172.umis[, joint.bcs]
G172.htos <- as.matrix(G172.htos[, joint.bcs])
G172.htos <- G172.htos[-5,]

# Confirm that the HTO have the correct names
rownames(G172.htos)

# Setup Seurat object
G172.hashtag <- CreateSeuratObject(counts = G172.umis)
# Normalize RNA data with log normalization
G172.hashtag <- NormalizeData(G172.hashtag)
# Find and scale variable features
G172.hashtag <- FindVariableFeatures(G172.hashtag, selection.method = "mean.var.plot")
G172.hashtag <- ScaleData(G172.hashtag, features = VariableFeatures(G172.hashtag))
# Add HTO data as a new assay independent from RNA
G172.hashtag[["HTO"]] <- CreateAssayObject(counts = G172.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
G172.hashtag <- NormalizeData(G172.hashtag, assay = "HTO", normalization.method = "CLR")

# If you have a very large dataset we suggest using k_function = 'clara'. This is a k-medoid
# clustering function for large applications You can also play with additional parameters (see
# documentation for HTODemux()) to adjust the threshold for classification Here we are using the
# default settings
G172.hashtag <- HTODemux(G172.hashtag, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
# Global classification results
table(G172.hashtag$HTO_classification.global)
table(G172.hashtag$hash.ID)

# Group cells based on the max HTO signal
Idents(G172.hashtag) <- "hash.ID"
RidgePlot(G172.hashtag, assay = "HTO", features = rownames(G172.hashtag[["HTO"]])[1:4], ncol = 2, nrow=2)

#Idents(G172.hashtag) <- "HTO_classification.global"

# First, we will remove negative cells from the object
G172.hashtag.subset <- subset(G172.hashtag, idents =c("Negative","Doublet"), invert = TRUE)

## Feature scatterplot for hastags IDs ########
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M2-TGACGCCGTTGTTGT")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M3-GCCTAGTATGATCCA", feature2 = "M4-AGTCACAGTATTCCA")


# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = G172.hashtag.subset, assay = "HTO"))))

# Calculate tSNE embeddings with a distance matrix
G172.hashtag.subset <- RunTSNE(G172.hashtag.subset, distance.matrix = hto.dist.mtx, perplexity = 100)
DimPlot(G172.hashtag.subset)
HTOHeatmap(G172.hashtag, assay = "HTO", ncells = 26740)


########################## Rescue doublets ########################
G172.doublet <- subset(G172.hashtag, idents = "Doublet")
G172.doublet.rescue <- HTODemux(G172.doublet, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
Idents(G172.doublet.rescue) <- "hash.ID"
RidgePlot(G172.doublet.rescue, assay = "HTO", features = rownames(G172.doublet.rescue[["HTO"]])[1:4], ncol = 2, nrow=2)
###############################################################################


# Extract the singlets M1 #############3
G172.M1 <- subset(G172.hashtag, idents = "M1-ATGATGAACAGCCAG", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1$stim  <- "G172M1"
DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- SCTransform(G172.M1,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1 <- NormalizeData(G172.M1, verbose = FALSE)
G172.M1 <- FindVariableFeatures(G172.M1, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1 <- ScaleData(G172.M1, features = VariableFeatures(G172.M1))
# Run PCA
G172.M1 <- RunPCA(G172.M1,npcs = 30, features = VariableFeatures(G172.M1))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1 <- RunUMAP(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindNeighbors(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindClusters(G172.M1, resolution = 0.6 )   
G172.M1.p1<- UMAPPlot(G172.M1, reduction = "umap", label=TRUE, label.size=5)
G172.M1.p1
DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- NormalizeData(G172.M1, verbose = TRUE)
d1 <- DotPlot(G172.M1, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.p1,d1)


# Extract the singlet for M2 #############3
G172.M2 <- subset(G172.hashtag, idents = "M2-TGACGCCGTTGTTGT", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2$stim  <- "G172M2"
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- SCTransform(G172.M2,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2 <- NormalizeData(G172.M2, verbose = FALSE)
G172.M2 <- FindVariableFeatures(G172.M2, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2 <- ScaleData(G172.M2, features = VariableFeatures(G172.M2))
# Run PCA
G172.M2 <- RunPCA(G172.M2,npcs = 30, features = VariableFeatures(G172.M2))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2 <- RunUMAP(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindNeighbors(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindClusters(G172.M2, resolution = 0.5 )   
G172.M2.p1<- UMAPPlot(G172.M2, reduction = "umap", label=TRUE, label.size=5)
G172.M2.p1
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- NormalizeData(G172.M2, verbose = TRUE)
d2 <- DotPlot(G172.M2, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.p1,d2)


#### Extract the singlet for M3 #####
G172.M3 <- subset(G172.hashtag, idents = "M3-GCCTAGTATGATCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- SCTransform(G172.M3,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3 <- NormalizeData(G172.M3, verbose = FALSE)
G172.M3 <- FindVariableFeatures(G172.M3, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3 <- ScaleData(G172.M3, features = VariableFeatures(G172.M3))
# Run PCA
G172.M3 <- RunPCA(G172.M3,npcs = 30, features = VariableFeatures(G172.M3))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3 <- RunUMAP(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindNeighbors(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindClusters(G172.M3, resolution = 0.5 )   
G172.M3.p1<- UMAPPlot(G172.M3, reduction = "umap", label=TRUE, label.size=5)
G172.M3.p1
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- NormalizeData(G172.M3, verbose = TRUE)
d3 <- DotPlot(G172.M3, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.p1,d3)

##### Extract the singlet for M4 #####
G172.M4 <- subset(G172.hashtag, idents = "M4-AGTCACAGTATTCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- SCTransform(G172.M4,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4 <- NormalizeData(G172.M4, verbose = FALSE)
G172.M4 <- FindVariableFeatures(G172.M4, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4 <- ScaleData(G172.M4, features = VariableFeatures(G172.M4))
# Run PCA
G172.M4 <- RunPCA(G172.M4,npcs = 30, features = VariableFeatures(G172.M4))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4 <- RunUMAP(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindNeighbors(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindClusters(G172.M4, resolution = 0.5 )   
G172.M4.p1<- UMAPPlot(G172.M4, reduction = "umap", label=TRUE, label.size=5)
G172.M4.p1
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- NormalizeData(G172.M4, verbose = TRUE)
d4 <- DotPlot(G172.M4, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.p1,d4)


############## Combined ####################33
G172.M5 <- subset(G172.hashtag, idents = c("M1-ATGATGAACAGCCAG","M2-TGACGCCGTTGTTGT","M3-GCCTAGTATGATCCA","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- SCTransform(G172.M5,verbose =TRUE)
# Select the top 1000 most variable features
G172.M5 <- NormalizeData(G172.M5, verbose = FALSE)
G172.M5 <- FindVariableFeatures(G172.M5, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M5 <- ScaleData(G172.M5, features = VariableFeatures(G172.M5))
# Run PCA
G172.M5 <- RunPCA(G172.M5,npcs = 30, features = VariableFeatures(G172.M5))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M5 <- RunUMAP(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindNeighbors(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindClusters(G172.M5, resolution = 0.5 )   
G172.M5.p1<- UMAPPlot(G172.M5, reduction = "umap", label=TRUE, label.size=5)
G172.M5.p1
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- NormalizeData(G172.M5, verbose = TRUE)
d5 <- DotPlot(G172.M5, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M5.p1,d5)


### control 
G172.M6 <- subset(G172.hashtag.subset, idents = c("M1-ATGATGAACAGCCAG","M3-GCCTAGTATGATCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M6) <- "RNA"
#G172.M6 <- SCTransform(G172.M6,verbose =TRUE)
# Select the top 1000 most variable features
G172.M6 <- NormalizeData(G172.M6, verbose = FALSE)
G172.M6 <- FindVariableFeatures(G172.M6, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M6 <- ScaleData(G172.M6, features = VariableFeatures(G172.M6))
# Run PCA
G172.M6 <- RunPCA(G172.M6,npcs = 30, features = VariableFeatures(G172.M6))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M6 <- RunUMAP(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindNeighbors(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindClusters(G172.M6, resolution = 0.5 )   
G172.M6.p1<- UMAPPlot(G172.M6, reduction = "umap", label=TRUE, label.size=5)
G172.M6.p1
DefaultAssay(G172.M6) <- "RNA"
G172.M6 <- NormalizeData(G172.M6, verbose = TRUE)
d6 <- DotPlot(G172.M6, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M6.p1,d6)


#### TCPO 
G172.M7 <- subset(G172.hashtag, idents = c("M2-TGACGCCGTTGTTGT","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- SCTransform(G172.M7,verbose =TRUE)
# Select the top 1000 most variable features
G172.M7 <- NormalizeData(G172.M7, verbose = FALSE)
G172.M7 <- FindVariableFeatures(G172.M7, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M7 <- ScaleData(G172.M7, features = VariableFeatures(G172.M7))
# Run PCA
G172.M7 <- RunPCA(G172.M7,npcs = 30, features = VariableFeatures(G172.M7))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M7 <- RunUMAP(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindNeighbors(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindClusters(G172.M7, resolution = 0.5 )   
G172.M7.p1<- UMAPPlot(G172.M7, reduction = "umap", label=TRUE, label.size=5)
G172.M7.p1
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- NormalizeData(G172.M7, verbose = TRUE)
d7 <- DotPlot(G172.M7, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M7.p1,d7, nrow=2)


```


```{r}
########## function load_tissue_droplet############
droplet_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(droplet_metadata)[1] <- "channel"
tissue_metadata = filter(droplet_metadata, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]

raw.data <- Read10X("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/Liver-10X_P4_2/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata$channel[1],'_',x))
  meta.data1 = data.frame(row.names = colnames(raw.data))
  meta.data1['channel'] = tissue_metadata$channel[1]

  if (length(tissue_metadata$channel) > 1){
    # Some tissues, like Thymus and Heart had only one channel
    for(i in 2:nrow(tissue_metadata)){
subfolder = paste0("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/",tissue_of_interest, '-', tissue_metadata$channel[i])
      new.data1 <- Read10X(data.dir = subfolder)
      colnames(new.data1) <- lapply(colnames(new.data1), function(x) paste0(tissue_metadata$channel[i],'_', x))
      
      new.metadata1 = data.frame(row.names = colnames(new.data1))
      new.metadata1['channel'] = tissue_metadata$channel[i]
      
      raw.data = cbind(raw.data, new.data1)
      meta.data1 = rbind(meta.data1, new.metadata1)
    }
  }
  
  rnames = row.names(meta.data1)
  meta.data1 <- merge(meta.data1, tissue_metadata, sort = F)
  row.names(meta.data1) <- rnames
  # Order the cells alphabetically to ensure consistency.
    ordered_cell_names = order(colnames(raw.data))
  raw.data = raw.data[,ordered_cell_names]
  meta.data1 = meta.data1[ordered_cell_names,]
    # Find ERCC's, compute the percent ERCC, and drop them from the raw data.
  erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
  percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
  ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
  raw.data <- raw.data[-ercc.index,]
  
  # Create the Seurat object with all the data
  droplet <- CreateSeuratObject(raw.data)   # dropseq
  droplet <- AddMetaData(object = droplet, meta.data1) 
  droplet@meta.data$tech <- "droplet"

#n.pcs = 10
  #droplet <- SubsetData(droplet,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000))  # old version of seurat
droplet <-  subset(droplet, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
droplet <- NormalizeData(droplet, verbose = FALSE)
droplet <- FindVariableFeatures(droplet, selection.method = "vst", nfeatures = 2000)
droplet <- ScaleData(droplet, verbose = FALSE)
#droplet <- RunPCA(droplet, npcs = 10, verbose = FALSE)
droplet$stim <- "droplet"
droplet$cond <- "ctrl"

# droplet <- ScaleData(droplet, verbose = FALSE)
 droplet <- RunPCA(droplet, npcs = 30, verbose = FALSE)
 droplet <- RunUMAP(droplet, reduction = "pca", dims = 1:25)
 droplet <- FindNeighbors(droplet, reduction = "pca", dims = 1:10)
 droplet <- FindClusters(droplet, resolution = 0.5 )   
 p1<- UMAPPlot(droplet, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
 p2 <- UMAPPlot(droplet, label=TRUE, label.size=6)
 p3<- UMAPPlot(droplet, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)

res.used <- 1
#droplet <- FindClusters(object = droplet, reduction.type = "pca", dims.use = 1:n.pcs, resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

droplet <- RunTSNE(object = droplet, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = droplet, do.label = T, pt.size = 1.2, label.size = 4)


droplet <- RenameIdents(droplet, `0` = "Hep-Mid-M", `1` = "Hep-PC-F", `2` = "Hep-PP-F",`3` = "Hep-PP-M", `4` = "Hep-Mid-M", `5` = "Hep-PC-M", `6` = "Hep-Mid-F", `7` = "Hep-Mid-F", `8` = "Endo-F", `9` = "Bileduct-F")



### this is tranformation option for develpment SCtransform program #######
droplet <- SCTransform(droplet,verbose =TRUE)


######3 Smartseq #########################

plate_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Liver_facs_annotation.csv", sep=",",  header = TRUE)
colnames(plate_metadata)[1] <- "plate.barcode"
  
raw.data = read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/liver_facs_scrna_data.csv", sep=",", row.names=1)
colnames(plate_metadata)[1] <- "plate.barcode"
  
plate.barcodes = lapply(colnames(raw.data), function(x) strsplit(strsplit(x, "_")[[1]][1], '.', fixed=TRUE)[[1]][2])

  barcode.df = t.data.frame(as.data.frame(plate.barcodes))
  
  rownames(barcode.df) = colnames(raw.data)
  barcode.df= cbind(barcode.df, colnames(raw.data))
  colnames(barcode.df) = c('plate.barcode1', 'plate.barcode')
  
  rnames = row.names(barcode.df)
  meta.data <- merge(barcode.df, plate_metadata, by='plate.barcode', sort = F)
  row.names(meta.data) <- rnames
    
  # Sort cells by cell name
  meta.data = meta.data[order(rownames(meta.data)), ]
  raw.data = raw.data[,rownames(meta.data)]
  
  # Create the Seurat object with all the data
  smartseq <- CreateSeuratObject(raw.data)
  smartseq <- AddMetaData(object = smartseq, meta.data)
  #smartseq@meta.data$tech <- "smartseq"
  smartseq$stim <- "smartseq"
  smartseq$cond <- "ctrl"

#smartseq <- SubsetData(smartseq,subset.names = c("nGene", "nUMI"),low.thresholds = c(500, 1000))  #old version of seurat
smartseq<-  subset(smartseq, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#smartseq <- SCTransform(smartseq)
smartseq <- NormalizeData(smartseq, verbose = FALSE)
smartseq <- FindVariableFeatures(smartseq, selection.method = "vst", nfeatures = 2000)
smartseq <- ScaleData(smartseq, verbose = FALSE)
smartseq <- RunPCA(smartseq, npcs = 30, verbose = FALSE)
smartseq <- RunUMAP(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindNeighbors(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindClusters(smartseq, resolution = 0.5 )   
#p4<- UMAPPlot(smartseq, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
p5 <- UMAPPlot(smartseq, label=TRUE, label.size=6)
p6<- UMAPPlot(smartseq, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)

########## sctransform ###################3
smartseq <- SCTransform(smartseq)

############################### Integration Regular workflow ############################
anchors <- FindIntegrationAnchors(object.list = c(droplet.list, smartseq.list,G171B.list, G172.M1.list, G172.M2.list), dims = 1:50, anchor.features = 3000)
combined <- IntegrateData(anchorset = anchors, dims = 1:50)    

DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
                                                    
# t-SNE and Clustering
combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20)
combined <- FindClusters(combined, resolution = 0.5 )   
#combined <- RunTSNE(combined, reduction = "pca", dims = 1:20)
    
 # Visualization
p1 <- DimPlot(combined, reduction = "umap", group.by = "stim")
p2 <- DimPlot(combined, reduction = "umap", group.by = "mouse.sex")
p3 <- DimPlot(combined, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(combined, label=TRUE)
plot_grid(p2, p3,p4) 
DimPlot(combined, reduction = "umap", split.by = "stim")   

p5 <- DimPlot(combined, reduction = "tsne", group.by = "stim")
p6 <- DimPlot(combined, reduction = "tsne", group.by = "mouse.sex")
p7 <- DimPlot(combined, reduction = "tsne", label = TRUE)
p8 <- TSNEPlot(combined, label =T)
plot_grid(p6, p7,p8) 
DimPlot(combined, reduction = "tsne", split.by = "stim") 

#################################### Refernce based Integration ################################
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list <- c(droplet.list, smartseq.list)

for (i in names(ctrl.list)) {
    ctrl.list[[i]] <- SCTransform(ctrl.list[[i]], verbose =TRUE)
}

ctrl.list1 <- c(ctrl.list, G171B.list)
#dims = 1:50
ref.features <- SelectIntegrationFeatures(object.list = ctrl.list1, dims=1:50, anchor.features = 3000)
ref.list <- PrepSCTIntegration(object.list = ctrl.list1, anchor.features = ref.features)
ref.anchors <- FindIntegrationAnchors(object.list = ref.list, normalization.method = "SCT", anchor.features = ref.features, reference = c(1,2,3))
ref.integrated <- IntegrateData(anchorset = ref.anchors, normalization.method = "SCT")

DefaultAssay(ref.integrated) <- "integrated"
ref.integrated <- ScaleData(ref.integrated, verbose = FALSE)
ref.integrated <- RunPCA(object = ref.integrated, npcs=30,verbose = FALSE)
ref.integrated <- RunUMAP(object = ref.integrated ,dims = 1:30, seed.use = 10, perplexity=30)
ref.integrated <- FindNeighbors(ref.integrated, reduction = "pca", dims = 1:30)
ref.integrated <- FindClusters(ref.integrated, resolution = 0.2 )   

r1 <- DimPlot(ref.integrated, split.by="stim", pt.size = 0.5)
r2 <- DimPlot(ref.integrated, label=TRUE, pt.size = 0.5)
r3 <- DimPlot(ref.integrated, split.by = c("cond"), pt.size = 0.5)


plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, 
    byrow = TRUE, override.aes = list(size = 2.5))))
CombinePlots(plots)


ref.integrated$celltype.stim <- paste(Idents(ref.integrated), ref.integrated$stim, sep = "_")
ref.integrated$seurat_clusters <- Idents(ref.integrated)
Idents(ref.integrated) <- "celltype.stim"

fp <- FeaturePlot(ref.integrated, features = "ncRNA-as-chr10-8460", cols = c("lightgrey","darkred"), order = TRUE,  shape.by = "stim", pt.size = 2)

DefaultAssay(ref.integrated) <-"RNA"
ref.integrated <- NormalizeData(ref.integrated)
ref.integrated <- ScaleData(ref.integrated)

dot1 <- DotPlot(ref.integrated, features= all_genes)+RotatedAxis()
d1 <- DoHeatmap(ref.integrated, features = c(all_genes,"Mup20","Xist"), group.by = "seurat_clusters", assay= 'RNA')
#d1 <- DoHeatmap(ref.integrated, features = highTPM, group.by = "celltype.stim", assay= 'RNA',raster = F, disp.max = 0.5)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)


Drop.combined.markers <- FindAllMarkers(object = ref.integrated, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

############ merge G172 M1 #############33
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list.2 <- c( G172.M1.list, G172.M2.list)


for (i in names(ctrl.list.2)) {
    ctrl.list.2[[i]] <- SCTransform(ctrl.list.2[[i]], verbose =TRUE)
}

ref.features.1 <- SelectIntegrationFeatures(object.list = ctrl.list.2, dims = 1:50, anchor.features = 3000)
ref.list.1 <- PrepSCTIntegration(object.list = ctrl.list.2, anchor.features = ref.features.1)


ref.anchors.1 <- FindIntegrationAnchors(object.list = ref.list.1, normalization.method = "SCT", 
    anchor.features = ref.features.1, reference = c(1,2))
ref.integrated.1 <- IntegrateData(anchorset = ref.anchors.1, normalization.method = "SCT")

DefaultAssay(ref.integrated.1) <- "integrated"
ref.integrated.1 <- ScaleData(ref.integrated.1, verbose = FALSE)
ref.integrated.1 <- RunPCA(object = ref.integrated.1, npcs=30,verbose = FALSE)
ref.integrated.1 <- RunUMAP(object = ref.integrated.1, dims = 1:30, seed.use = 10 , perplexity=30)
#ref.integrated.1 <- RunTSNE(object = ref.integrated.1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
ref.integrated.1 <- FindNeighbors(ref.integrated.1, reduction = "pca", dims = 1:30)
ref.integrated.1 <- FindClusters(ref.integrated.1, resolution = 0.2)   

rG172M1.1 <- UMAPPlot(ref.integrated.1, split.by="stim", label=T)
rG172M1.2 <- UMAPPlot(ref.integrated.1, label=TRUE, combine = FALSE)
rG172M1.3 <- UMAPPlot(ref.integrated.1, split.by = c("mouse.sex"))

rG172M1.4 <- TSNEPlot(ref.integrated.1, split.by="stim")


DefaultAssay(ref.integrated.1) <-"RNA"
ref.integrated.1 <- NormalizeData(ref.integrated.1)
ref.integrated.1 <- ScaleData(ref.integrated.1)

DoHeatmap(ref.integrated.1, features = c(all_genes,'Sox9','Mup20'), group.by = "seurat_clusters", assay= 'RNA' ,raster = F)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)


ref.integrated.1$celltype <- RenameIdents(ref.integrated.1, c(`0` = "PP", `1` = "PC", `2` = "Endo",`3` = "HSC", `4` = "Kupffer", `5` = "Dividing", `6` = "Immune", `7` = "B-NK", `8` = "NA"))


ref.integrated.1.markers <- FindAllMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers  %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)

write.csv(ref.integrated.1.markers, "ref_integrated_G172M1-M2_Allmarker")

ref.integrated.1$celltype.stim <- paste(Idents(ref.integrated.1), ref.integrated.1$stim, sep = "_")
ref.integrated.1$celltype <- Idents(ref.integrated.1)
Idents(ref.integrated.1) <- "celltype.stim"

DoHeatmap(ref.integrated.1, features = TPM4, group.by = "seurat_clusters", assay= 'RNA' )

ref.integrated.1.markers <- FindMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers  %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)


```


G172M1 and G172 M2 DE analysis
```{r}

DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1" ,"1_G172M1", "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1", "8_G172M1"), ident.2 = c("0_G172M2" ,"1_G172M2", "2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2", "8_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_All_M1_M2 <- DE1.1 
DE_hep_NPC <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M2" ,"1_G172M2"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_NPC_M1_M2 <- FindMarkers(ref.integrated.1, ident.1 = c( "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_Drop <- FindAllMarkers(ref.integrated, features = )



DE0.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1"), ident.2 = c("0_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

write.csv(DE0.1, "marker/DE0.1_G172_0_M1_M2")

DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("1_G172M1"), ident.2 = c("1_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE2.1 <- FindMarkers(ref.integrated.1, ident.1 = c("2_G172M1"), ident.2 = c("2_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE3.1 <- FindMarkers(ref.integrated.1, ident.1 = c("3_G172M1"), ident.2 = c("3_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DE4.1 <- FindMarkers(ref.integrated.1, ident.1 = c("4_G172M1"), ident.2 = c("4_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE5.1 <- FindMarkers(ref.integrated.1, ident.1 = c("5_G172M1"), ident.2 = c("5_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE6.1 <- FindMarkers(ref.integrated.1, ident.1 = c("6_G172M1"), ident.2 = c("6_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE7.1 <- FindMarkers(ref.integrated.1, ident.1 = c("7_G172M1"), ident.2 = c("7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


```



```{r}
################# G171 TCPO expsosed ########
G171B_metadata_G171B <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/G171_metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(G171B_metadata_G171B)[1] <- "channel"
tissue_metadata_G171B = filter(G171B_metadata_G171B, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]

raw.data <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/Transcript_Refined/Liver-10X_G171B/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata_G171B$channel[1],'_',x))
  meta.data1 = data.frame(row.names = colnames(raw.data))
  meta.data1['channel'] = tissue_metadata_G171B$channel[1]
  
  rnames = row.names(meta.data1)
  meta.data1 <- merge(meta.data1, tissue_metadata_G171B, sort = F)
  row.names(meta.data1) <- rnames
  # Order the cells alphabetically to ensure consistency.
  
  ordered_cell_names = order(colnames(raw.data))
  raw.data = raw.data[,ordered_cell_names]
  meta.data1 = meta.data1[ordered_cell_names,]
  
  # Find ERCC's, compute the percent ERCC, and drop them from the raw data.
  erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
  percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
  ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
  raw.data <- raw.data[-ercc.index,]
  
  ncRNA.genes <- grep(pattern = "^ncRNA", x = rownames(x = raw.data), value = TRUE)
  percent.ncRNA <- Matrix::colSums(raw.data[ncRNA.genes, ])/Matrix::colSums(raw.data)
  KRAB.genes <- grep(pattern = "^KRAB", x = rownames(x = raw.data), value = TRUE)
  cherry.genes <- grep(pattern = "^mcherry", x = rownames(x = raw.data), value = TRUE)
  
  # Create the Seurat object with all the data
  G171B <- CreateSeuratObject(raw.data)   # dropseq
  G171B <- AddMetaData(object = G171B, meta.data1) 
  G171B@meta.data$tech <- "G171B"

#G171B <- SubsetData(G171B,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000))  # old version of seurat
#G171B <-  subset(G171B, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G171B <- NormalizeData(G171B, verbose = FALSE)
G171B <- FindVariableFeatures(G171B, selection.method = "vst", nfeatures = 2000)
G171B$stim <- "G171B"
G171B$cond <- "TCPO"

### this is tranformation option for develpment SCtransform program #######
G171B <- SCTransform(G171B,verbose =TRUE)


```


diffusion plt code 

```{r}
# Before running MDS, we first calculate a distance matrix between all pairs of cells.  Here we
# use a simple euclidean distance metric on all genes, using scale.data as input
d <- dist(t(GetAssayData(combined, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
combined[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(combined))

# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(combined, reduction = "mds", pt.size = 0.5)
```



```{r}
genes_hep_main =c('Alb', 'Ttr', 'Apoa1', 'Serpina1c')
genes_hep = c('Alb', 'Ttr', 'Apoa1', 'Serpina1c',
                   'Cyp2e1', 'Glul', 'Oat', 'Gulo',
                   'Ass1', 'Hamp', 'Gstp1', 'Ubb',
                   'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')
genes_endo = c('Pecam1', 'Nrp1', 'Kdr','Oit3')
genes_kuppfer = c( 'Clec4f', 'Cd68')
genes_nk = c('Il2rb', 'Nkg7', 'Cxcr6', 'Gzma')
genes_b = c('Cd79a', 'Cd79b')
genes_bec = c('Epcam', 'Krt19', 'Krt7')
genes_immune = 'Ptprc'
HSC = c("Dcn","Lama1","Nes")
Dividing = "Top2a"
Bplasma= "Jchain"
Mac= "Csf1r"
Chol="Sox9"

sex <- c("Cyp2d9", "ncRNA-inter_chrX-15394")
all_genes = c(genes_hep, genes_endo, genes_kuppfer, Mac,genes_nk, genes_b, genes_bec, genes_immune, HSC, Dividing)
genes_bec_b_immune  = c(genes_bec,genes_b,genes_immune)
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
              'Ass1', 'Hamp', 'Gstp1', 'Ubb',
              'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')

receptor_KO <- c("ncRNA-inter-chr7-5998","Cyp2b10","Nr1i2","Nr1i3","Ppara","Pparg","Ppargc1b","Ppard")
cell <- c("Stab2","Csf1r","Cd3g","Ebf1","Irf8","Sox9","Apoc3","Top2a","Dcn")
```

```{r}
TPM4<- c('ncRNA-inter-chr7-6524',
'ncRNA-inter-chr19-14853',
'ncRNA-inter-chr6-5675',
'ncRNA-inter-chr4-3468',
'ncRNA-inter-chr9-8122',
'ncRNA-inter-chr12-10476',
'ncRNA-inter-chr17-14026',
'ncRNA-as-chr2-1457',
'ncRNA-as-chr19-14883',
'ncRNA-as-chr10-8460',
'ncRNA-as-chr5-4325',
'ncRNA-inter-chr7-5998',
'ncRNA-as-chr9-7843',
'ncRNA-as-chr9-8142',
'ncRNA-inter-chr11-9925',
'ncRNA-inter-chr19-14873',
'ncRNA-as-chr9-8172',
'ncRNA-inter-chr4-3779',
'ncRNA-inter-chr3-2504',
'ncRNA-as-chr19-15054',
'ncRNA-inter-chr8-7423',
'ncRNA-as-chr7-6302',
'ncRNA-inter-chr10-9418',
'ncRNA-inter-chr12-10454',
'ncRNA-as-chr7-5999',
'ncRNA-as-chr6-5335',
'ncRNA-inter-chr19-14987',
'ncRNA-inter-chr16-13170',
'ncRNA-inter-chr3-2988',
'ncRNA-inter-chr8-7430',
'ncRNA-inter-chr3-2168',
'ncRNA-inter-chr9-7874',
'ncRNA-inter-chr4-3778',
'ncRNA-inter-chr2-2011',
'ncRNA-inter-chr5-4335',
'ncRNA-inter-chr9-8301',
'ncRNA-inter-chr16-13510',
'ncRNA-as-chr9-8401',
'ncRNA-as-chr16-13512',
'ncRNA-as-chr12-10896',
'ncRNA-as-chr8-7359',
'ncRNA-as-chr5-4744',
'ncRNA-inter-chr5-4499',
'ncRNA-inter-chr16-13509',
'ncRNA-inter-chr17-13692',
'ncRNA-as-chr17-13834',
'ncRNA-inter-chr9-8147',
'ncRNA-inter-chr10-8697',
'ncRNA-inter-chr6-5551',
'ncRNA-as-chr9-8317',
'ncRNA-inter-chr7-6509',
'ncRNA-as-chr19-14977',
'ncRNA-inter-chr6-5318',
'ncRNA-inter-chr5-4578',
'ncRNA-inter-chr12-10509',
'ncRNA-as-chr10-9015',
'ncRNA-inter-chr3-2411',
'ncRNA-inter-chr9-7875',
'ncRNA-inter-chr5-4336',
'ncRNA-inter-chr12-10910',
'ncRNA-as-chr1-782',
'ncRNA-inter-chr17-13924',
'ncRNA-intra-chr7-5920',
'ncRNA-inter-chr16-13225',
'ncRNA-inter-chr3-2269',
'ncRNA-inter-chr14-12016',
'ncRNA-as-chr4-3800',
'ncRNA-as-chr5-4655',
'ncRNA-inter-chr9-7989',
'ncRNA-intra-chr5-4728',
'ncRNA-inter-chrX-15248',
'ncRNA-inter-chr10-8767',
'ncRNA-inter-chr19-14717',
'ncRNA-inter-chr8-6766',
'ncRNA-inter-chr13-11122',
'ncRNA-inter-chr7-6074',
'ncRNA-inter-chr15-12439',
'ncRNA-as-chr11-9787',
'ncRNA-inter-chr2-1827',
'ncRNA-as-chr19-14976',
'ncRNA-inter-chr19-14947',
'ncRNA-inter-chr6-5248',
'ncRNA-inter-chr2-1098',
'ncRNA-inter-chr8-7180',
'ncRNA-inter-chr9-8118',
'ncRNA-inter-chr14-12199',
'ncRNA-as-chr6-5336',
'ncRNA-inter-chr4-3867',
'ncRNA-inter-chr10-9000',
'ncRNA-inter-chr14-12290',
'ncRNA-inter-chr2-1491',
'ncRNA-inter-chr15-12606',
'ncRNA-inter-chr10-9222',
'ncRNA-inter-chr5-4322',
'ncRNA-inter-chr12-10942',
'ncRNA-inter-chr18-14690',
'ncRNA-inter-chr2-1471',
'ncRNA-inter-chr3-2410',
'ncRNA-inter-chr19-14880',
'ncRNA-inter-chr13-11074',
'ncRNA-inter-chr9-8056',
'ncRNA-inter-chr5-4654',
'ncRNA-inter-chr3-2166',
'ncRNA-inter-chr8-6944',
'ncRNA-as-chr7-6065',
'ncRNA-inter-chr8-6896',
'ncRNA-inter-chr2-1963',
'ncRNA-inter-chr4-3425',
'ncRNA-inter-chr13-11385',
'ncRNA-inter-chr9-7885',
'ncRNA-as-chr10-9411',
'ncRNA-as-chr9-8419',
'ncRNA-as-chr12-10618',
'ncRNA-inter-chr7-6390',
'ncRNA-inter-chr17-14151',
'ncRNA-as-chr13-11787',
'ncRNA-as-chr2-1965',
'ncRNA-inter-chr6-5721',
'ncRNA-inter-chr6-5822',
'ncRNA-inter-chr11-9635',
'ncRNA-inter-chr11-9965',
'ncRNA-inter-chr4-3052',
'ncRNA-inter-chr17-13857',
'ncRNA-inter-chr13-11201',
'ncRNA-inter-chr6-5249',
'ncRNA-inter-chr19-14851',
'ncRNA-inter-chr6-5638',
'ncRNA-inter-chr17-14130',
'ncRNA-inter-chr9-7993',
'ncRNA-inter-chr18-14656',
'ncRNA-inter-chr9-7992',
'ncRNA-inter-chr1-566',
'ncRNA-inter-chr12-10715',
'ncRNA-inter-chr17-14102',
'ncRNA-inter-chr4-3010',
'ncRNA-inter-chr3-2764',
'ncRNA-inter-chr4-3306',
'ncRNA-inter-chr19-14979',
'ncRNA-inter-chr1-570',
'ncRNA-inter-chr19-14790',
'ncRNA-inter-chr4-3282',
'ncRNA-inter-chr5-4777',
'ncRNA-inter-chr8-7605',
'ncRNA-inter-chr9-8000',
'ncRNA-as-chr15-12920',
'ncRNA-as-chr1-369',
'ncRNA-inter-chr19-14952',
'ncRNA-as-chr9-8393',
'ncRNA-as-chr14-12074',
'ncRNA-inter-chr12-10415',
'ncRNA-inter-chr7-6559',
'ncRNA-inter-chr1-630',
'ncRNA-inter-chr4-3142',
'ncRNA-inter-chr6-5131',
'ncRNA-inter-chr16-13050',
'ncRNA-inter-chr7-6411',
'ncRNA-inter-chr5-4746',
'ncRNA-inter-chr1-633',
'ncRNA-inter-chr10-8461',
'ncRNA-inter-chr2-2016',
'ncRNA-inter-chr6-5137',
'ncRNA-as-chr4-3300',
'ncRNA-inter-chr4-3009',
'ncRNA-inter-chr2-1923',
'ncRNA-inter-chr1-670',
'ncRNA-inter-chr15-12835',
'ncRNA-inter-chr18-14655',
'ncRNA-inter-chr16-13211',
'ncRNA-as-chr7-6050',
'ncRNA-inter-chr7-6508',
'ncRNA-inter-chr11-10206',
'ncRNA-inter-chr13-11399',
'ncRNA-inter-chr8-6757',
'ncRNA-inter-chr1-129',
'ncRNA-inter-chr12-10459',
'ncRNA-inter-chr7-6709',
'ncRNA-inter-chr12-10713',
'ncRNA-inter-chr7-6523',
'ncRNA-inter-chr2-2017',
'ncRNA-inter-chr7-6343',
'ncRNA-inter-chr1-63',
'ncRNA-as-chr3-2936',
'ncRNA-inter-chr18-14691',
'ncRNA-inter-chr7-6097',
'ncRNA-inter-chr6-5723')
```


```{r}
DefaultAssay(droplet) <- "RNA"
#droplet <- NormalizeData(combined, verbose = TRUE, normalization.method = "RC", scale.factor = 1e6)
combined <- NormalizeData(combined, verbose = TRUE)

DotPlot(combined, features = all_genes)
FeaturePlot(combined, features = genes_hep_main, min.cutoff = "q9")
#hepatocytes 
subtissplot <- DotPlot(combined, features = c(genes_hep_main, genes_endo, genes_bec_b_immune, genes_kuppfer, genes_nk))
PC <- DotPlot(combined, features = c(genes_hep_main,genes_zones))
NPC <- DotPlot(combined, features = c(genes_endo,genes_kuppfer, genes_nk))
all <- DotPlot(combined, features=c(all_genes))

### coexpression plots####

f1 <- FeaturePlot(KO.cells, features = c('Cyp2b10','ncRNA-inter-chr7-5998'), reduction = "mds", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 0.5)


########## this is exact averaging formula ###############33
  x <- (AverageExpression(KO.cells, verbose = TRUE, assays = "RNA" ,slot="counts")$RNA)
   x["ncRNA-inter-chr7-5998",]
#                         G171B    G171C
#ncRNA-inter-chr7-5998 1.871795 1.091463
########
#Idents(combined) <- factor(Idents(combined), levels = c(0,1,12))
markers.to.plot <- c("Alb","ncRNA-inter-chr7-5998")
DotPlot(combined, features = rev(markers.to.plot), cols = c("blue", "red"), dot.scale = 8, 
    split.by = "stim") + RotatedAxis()

FeaturePlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9","KRAB","AAV8-mCherry"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red"))


######################### vlnplot ##########################

plots <- VlnPlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

plots <- VlnPlot(combined, features = c("Lhx4","Dtna","Fam189a1","Galnt16","Kalrn"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)


#endothelial
DotPlot(combined, features = genes_endo)

#zones
zones <- DotPlot(combined, features = genes_zones)

f1 <- FeaturePlot(combined, features = c('Cyp2e1','Cyp2f2','Ass1'), min.cutoff = "q9", reduction = "tsne")

DimPlot(combined, label = TRUE)

save(combined, file="Seurat_smart-drop_integrated.Robj")


################# save raw counts from cluster #####################

Idents(combined) <- "stim"

### to avergae out the matrix from KO cells 

combined.raw.data.0.1 <- as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 0,idents = "stim")])
combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])
combined.raw.data.2 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])

#combined.raw.data.[i] <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 1)])
#combined.raw.data.12 <-as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 12)])
#combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts"))
x <- AverageExpression(test.combined,assays = "RNA",add.ident = "stim", slot = "data",use.scale = FALSE, use.counts = FALSE)$RNA

#}######## CAR data 
avg.combined.cells <- (AverageExpression(combined, verbose = FALSE)$RNA) 
avg.combined.cells$gene <- rownames(avg.combined.cells)

CAR_FP <- FeaturePlot(combined, features = c('Cyp2b10','Nr1i3'), reduction = "umap", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 1, min.cutoff = 0, pt.size = 0.5, repel = TRUE)
CAR_DOT_NR <- DotPlot(combined, features = 'Nr1i3', col.min = 0)

Cyp2b10_FP <- FeaturePlot(combined, features = 'Cyp2b10', reduction = "umap", min.cutoff = 0)
########### tSNE #################################
combined <- NormalizeData(object = combined)
combined <- FindVariableFeatures(combined, selection.method = "vst", nfeatures = 2000)

```


```{r}

G172_TPM_count  <- ref.integrated.1
G172_TPM_count  <- NormalizeData(G172_TPM_count,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)



TPMcount0M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_G172M1')]))

TPMcount1M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_G172M1')]))

TPMcount2M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_G172M1')]))

TPMcount3M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_G172M1')]))

TPMcount4M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_G172M1')]))

TPMcount5M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_G172M1')]))

TPMcount6M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_G172M1')]))

TPMcount7M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_G172M1')]))

TPMcount8M1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '8_G172M1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '8_G172M1')]))




TPMcount0M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_G172M2')]))

TPMcount1M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_G172M2')]))

TPMcount2M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_G172M2')]))

TPMcount3M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_G172M2')]))

TPMcount4M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_G172M2')]))

TPMcount5M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_G172M2')]))

TPMcount6M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_G172M2')]))

TPMcount7M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_G172M2')]))

TPMcount8M2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '8_G172M2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '8_G172M2')]))




write.csv(TPMcount0M1, "CountResult/counts_TPMcount0M1")
write.csv(TPMcount1M1, "CountResult/counts_TPMcount1M1")
write.csv(TPMcount2M1, "CountResult/counts_TPMcount2M1")
write.csv(TPMcount3M1, "CountResult/counts_TPMcount3M1")
write.csv(TPMcount4M1, "CountResult/counts_TPMcount4M1")
write.csv(TPMcount5M1, "CountResult/counts_TPMcount5M1")
write.csv(TPMcount6M1, "CountResult/counts_TPMcount6M1")
write.csv(TPMcount7M1, "CountResult/counts_TPMcount7M1")
write.csv(TPMcount8M1, "CountResult/counts_TPMcount8M1")


write.csv(TPMcount0M2, "CountResult/counts_TPMcount0M2")
write.csv(TPMcount1M2, "CountResult/counts_TPMcount1M2")
write.csv(TPMcount2M2, "CountResult/counts_TPMcount2M2")
write.csv(TPMcount3M2, "CountResult/counts_TPMcount3M2")
write.csv(TPMcount4M2, "CountResult/counts_TPMcount4M2")
write.csv(TPMcount5M2, "CountResult/counts_TPMcount5M2")
write.csv(TPMcount6M2, "CountResult/counts_TPMcount6M2")
write.csv(TPMcount7M2, "CountResult/counts_TPMcount7M2")
write.csv(TPMcount8M2, "CountResult/counts_TPMcount8M2")


#avg.KO.cells <- log1p(AverageExpression(KO.cells, verbose = FALSE)$RNA)  #original code log transformed
avg.KO.cells <- (AverageExpression(KO.cells, verbose = FALSE)$RNA) 
avg.KO.cells$gene <- rownames(avg.KO.cells)

genes.to.label= ("ncRNA-inter-chr7-5998")
#genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
p1 <- ggplot(avg.KO.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE)
plot_grid(p1, p2)
```


```{r}
KO.cells <- RunUMAP(KO.cells, reduction = "pca", dims = 1:20 )
KO.cells <- FindNeighbors(KO.cells, reduction = "pca", dims = 1:20)
KO.cells <- FindClusters(KO.cells, resolution = 0.5 )   
KO.cells <- RunTSNE(KO.cells, reduction = "pca", dims = 1:20)
 

Hep.cells <- RunUMAP(Hep.cells, reduction = "pca", dims = 1:20 )
Hep.cells <- FindNeighbors(Hep.cells, reduction = "pca", dims = 1:20)
Hep.cells <- FindClusters(Hep.cells, resolution = 0.5 )   
Hep.cells <- RunTSNE(Hep.cells, reduction = "pca", dims = 1:20)
 

   
 # Visualization
p1 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(KO.cells, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(KO.cells, label=TRUE)
plot_grid(p1,p4) 
DimPlot(KO.cells, reduction = "umap", split.by = "stim")   


#hepatocyte cells

p5 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "stim")
p6 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "mouse.sex")
p7 <- UMAPPlot(Hep.cells, reduction = "umap", label = TRUE)
p8 <- UMAPPlot(Hep.cells, label=TRUE)
plot_grid(p5,p8) 
DimPlot(KO.cells, reduction = "umap", split.by = "stim")   


raw.data.KO.0 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 0)])
raw.data.KO.1 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 1)])
raw.data.KO.2 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 2)])
raw.data.KO.3 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 3)])
raw.data.KO.4 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 4)])
raw.data.KO.5 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 5)])


write.csv(raw.data.KO.0, "CountResult/Markers/raw.data.KO.0")
write.csv(raw.data.KO.1, "CountResult/Markers/raw.data.KO.1")
write.csv(raw.data.KO.2, "CountResult/Markers/raw.data.KO.2")
write.csv(raw.data.KO.3, "CountResult/Markers/raw.data.KO.3")
write.csv(raw.data.KO.4, "CountResult/Markers/raw.data.KO.4")
write.csv(raw.data.KO.5, "CountResult/Markers/raw.data.KO.5")



f1 <- FeaturePlot(KO.cells, features = c('ncRNA-inter-chr7-5998'),  reduction = "umap", split.by = "stim")
plot_grid(f1,p1,p4) 

DefaultAssay(KO.cells) <- "RNA"
KO.cells <- NormalizeData(KO.cells, verbose = FALSE)

plots <- VlnPlot(KO.cells, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

Three_five_six <- subset(KO.cells, idents = c("5","6"))

Three_five_six <- RunUMAP(Three_five_six, reduction = "pca", dims = 1:20 )
Three_five_six <- FindNeighbors(Three_five_six, reduction = "pca", dims = 1:20)
Three_five_six <- FindClusters(Three_five_six, resolution = 1 )   
Three_five_six <- RunTSNE(Three_five_six, reduction = "pca", dims = 1:20)

p1 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(Three_five_six, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(Three_five_six, label=TRUE)
plot_grid(p1,p4) 
DimPlot(Three_five_six, reduction = "umap", split.by = "stim")   

f1 <- FeaturePlot(Three_five_six, features = c('ncRNA-inter-chr7-5998'),  reduction = "umap", split.by = "stim")


lnc5998 <- subset(combined, cells = lnc5998.cells, idents = "1")

lnc5998 <- RunUMAP(lnc5998, reduction = "pca", dims = 1:20 )
lnc5998 <- FindNeighbors(lnc5998, reduction = "pca", dims = 1:20)
lnc5998 <- FindClusters(lnc5998, resolution = 1 )   
lnc5998 <- RunTSNE(lnc5998, reduction = "pca", dims = 1:20)
    
 # Visualization
p1 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(lnc5998, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(lnc5998, label=TRUE)
plot_grid(p1,p4) 
DimPlot(lnc5998, reduction = "umap", split.by = "stim")   

lnc5998.cells <- WhichCells(object = combined, expression = "ncRNA-inter-chr7-5998" > 1)
FeaturePlot(lnc5998, features = c("ncRNA-inter-chr7-5998"), split.by = "stim",  
+             cols = c("grey", "red"), cells = lnc5998.cells,min.cutoff = 0.5)


DefaultAssay(lnc5998) <- "RNA"
lnc5998 <- NormalizeData(lnc5998, verbose = FALSE)

plots <- VlnPlot(lnc5998, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

 
```

```{r}
d <- dist(t(GetAssayData(KO.cells, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
KO.cells[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(KO.cells))

# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(KO.cells, reduction = "mds", pt.size = 0.5)
```

Find differential markers

```{r}
KO.cells$celltype.stim <- paste(Idents(KO.cells), KO.cells$stim, sep = "_")
KO.cells$celltype <- Idents(KO.cells)
Idents(KO.cells) <- "celltype.stim"
response3 <- FindMarkers(KO.cells, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)


#DE1: Compare cluster 0+1+12 (that expressed lnc5998) with Other hepatocyte clusters (2+5+8)
#DE2: compare cluster 1 (showed major effects in the KD) vs Cluster 0 (that showed little KD)
#DE3: For KO.cells that formed five subcluster, compare clusters 3+4+5+1 vs 2+0
#DE4: for KO.cells that formed five clusters. Comapre cluster 4 vs. 2

DE0112.258 <- FindMarkers(combined, ident.1 = c("0","1","12" ), ident.2 = c("2","5","8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE0112.All <- FindMarkers(combined, ident.1 = c("0","1","12" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.2 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.5 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.8 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DE2.5 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE2.8 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE5.8 <- FindMarkers(combined, ident.1 = c("5" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DEK4351.20 <- FindMarkers(KO.cells, ident.1 = c("3","4","5","1" ), ident.2 = c("2","0"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DEK4.3 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "3",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.2 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "2",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.0 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "0",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.1 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "1",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.5 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "5",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)



##### comaprison between G171B vs G171C for KO.cell clusters of 0+1+12 ######33

DEK4_C.B <- FindMarkers(KO.cells, ident.1 = "4_G171C", ident.2 = "4_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK3_C.B <- FindMarkers(KO.cells, ident.1 = "3_G171C", ident.2 = "3_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK1_C.B <- FindMarkers(KO.cells, ident.1 = "1_G171C", ident.2 = "1_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK2_C.B <- FindMarkers(KO.cells, ident.1 = "2_G171C", ident.2 = "2_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK0_C.B <- FindMarkers(KO.cells, ident.1 = "0_G171C", ident.2 = "0_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK5_C.B <- FindMarkers(KO.cells, ident.1 = "5_G171C", ident.2 = "5_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


################################### Write the results #########################
write.csv(DE0112.258, "CountResult/Markers/DE0112.258")
write.csv(DE0112.All, "CountResult/Markers/DE0112.All")
write.csv(DE1.2, "CountResult/Markers/DE1.2")
write.csv(DE1.5, "CountResult/Markers/DE1.5")
write.csv(DE1.8, "CountResult/Markers/DE1.8")
write.csv(DE2.5, "CountResult/Markers/DE2.5")
write.csv(DE2.8, "CountResult/Markers/DE2.8")
write.csv(DE5.8, "CountResult/Markers/DE5.8")

write.csv(DEK4351.20, "CountResult/Markers/DEK4351.20")
write.csv(DEK4.3, "CountResult/Markers/DEK4.3")
write.csv(DEK4.2, "CountResult/Markers/DEK4.2")
write.csv(DEK4.0, "CountResult/Markers/DEK4.0")
write.csv(DEK4.1, "CountResult/Markers/DEK4.1")
write.csv(DEK4.5, "CountResult/Markers/DEK4.5")


write.csv(DEK4_C.B, "CountResult/Markers/DEK4_C.B")
write.csv(DEK3_C.B, "CountResult/Markers/DEK3_C.B")
write.csv(DEK1_C.B, "CountResult/Markers/DEK1_C.B")
write.csv(DEK2_C.B, "CountResult/Markers/DEK2_C.B")
write.csv(DEK0_C.B, "CountResult/Markers/DEK0_C.B")
write.csv(DEK5_C.B, "CountResult/Markers/DEK5_C.B")




combined$celltype.stim <- paste(Idents(combined), combined$stim, sep = "_")
combined$celltype <- Idents(combined)
Idents(combined) <- "celltype.stim"


test.combined$celltype.stim <- paste(Idents(test.combined), test.combined$stim, sep = "_")
test.combined$celltype <- Idents(test.combined)
Idents(test.combined) <- "celltype.stim"




Combined_G171B_vs_G171C <- FindMarkers(combined, ident.1 = c("0_G171B","1_G171B","2_G171B","3_G171B","4_G171B","5_G171B","6_G171B","7_G171B","8_G171B","9_G171B","10_G171B","11_G171B","12_G171B" ), ident.2 = c("0_G171C","1_G171C","2_G171C","3_G171C","4_G171C","5_G171C","6_G171C","7_G171C","8_G171C","9_G171C","10_G171C","11_G171C","12_G171C" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


Combined_G171C_vs_G171B_Clust1 <- FindMarkers(combined, ident.1 = c("1_G171C"), ident.2 = c("1_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust0 <- FindMarkers(combined, ident.1 = c("0_G171C"), ident.2 = c("0_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust12 <- FindMarkers(combined, ident.1 = c("12_G171C"), ident.2 = c("12_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust2 <- FindMarkers(combined, ident.1 = c("2_G171C"), ident.2 = c("2_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust5 <- FindMarkers(combined, ident.1 = c("5_G171C"), ident.2 = c("5_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust8 <- FindMarkers(combined, ident.1 = c("8_G171C"), ident.2 = c("8_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


Combined_G171C_vs_G171B_Clust3 <- FindMarkers(combined, ident.1 = c("3_G171C"), ident.2 = c("3_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust4 <- FindMarkers(combined, ident.1 = c("4_G171C"), ident.2 = c("4_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust6 <- FindMarkers(combined, ident.1 = c("6_G171C"), ident.2 = c("6_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust7 <- FindMarkers(combined, ident.1 = c("7_G171C"), ident.2 = c("7_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust9 <- FindMarkers(combined, ident.1 = c("9_G171C"), ident.2 = c("9_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust10 <- FindMarkers(combined, ident.1 = c("10_G171C"), ident.2 = c("10_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust11 <- FindMarkers(combined, ident.1 = c("11_G171C"), ident.2 = c("11_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)





write.csv(Combined_G171C_vs_G171B_Clust1, "CountResult/Markers/Combined_G171C_vs_G171B_Clust1")
write.csv(Combined_G171C_vs_G171B_Clust0, "CountResult/Markers/Combined_G171C_vs_G171B_Clust0")
write.csv(Combined_G171C_vs_G171B_Clust12, "CountResult/Markers/Combined_G171C_vs_G171B_Clust12")
write.csv(Combined_G171C_vs_G171B_Clust2, "CountResult/Markers/Combined_G171C_vs_G171B_Clust2")
write.csv(Combined_G171C_vs_G171B_Clust5, "CountResult/Markers/Combined_G171C_vs_G171B_Clust5")
write.csv(Combined_G171C_vs_G171B_Clust8, "CountResult/Markers/Combined_G171C_vs_G171B_Clust8")

write.csv(Combined_G171C_vs_G171B_Clust3, "CountResult/Markers/Combined_G171C_vs_G171B_Clust3")
write.csv(Combined_G171C_vs_G171B_Clust4, "CountResult/Markers/Combined_G171C_vs_G171B_Clust4")
write.csv(Combined_G171C_vs_G171B_Clust6, "CountResult/Markers/Combined_G171C_vs_G171B_Clust6")
write.csv(Combined_G171C_vs_G171B_Clust7, "CountResult/Markers/Combined_G171C_vs_G171B_Clust7")
write.csv(Combined_G171C_vs_G171B_Clust9, "CountResult/Markers/Combined_G171C_vs_G171B_Clust9")
write.csv(Combined_G171C_vs_G171B_Clust10, "CountResult/Markers/Combined_G171C_vs_G171B_Clust10")
write.csv(Combined_G171C_vs_G171B_Clust11, "CountResult/Markers/Combined_G171C_vs_G171B_Clust11")





PC_vs_PP_G171B <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B"), ident.2 = c("0_G171B","2_G171B"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171B, n = 15)


PC_vs_PP_G171C <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C"), ident.2 = c("0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)


PC_vs_PP_G171BC <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","4_G171C","3_G171C"), ident.2 = c("0_G171B","2_G171B","0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)


lnc5998_KO_DE_1 <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","5_G171B"), ident.2 = c("4_G171C","3_G171C","5_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)


lnc5998_KO_DE_2 <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C","5_G171C"), ident.2 =c("4_G171B","3_G171B","5_G171B") , verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)

cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = cell.type.genes, bg.genes = rownames(KO.cells@assays$RNA@dataKO.cells@assays$RNA@data), organism = "Mouse")

cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = rownames(cell.type.genes), bg.genes = rownames(KO.cells@assays$RNA@data), organism = "Mouse")
 
```

```{r}

AvergeExpression2 <- function (object, assays = NULL, features = NULL, return.seurat = FALSE, 
          add.ident = NULL, slot = "data", use.scale = FALSE, use.counts = FALSE, 
          verbose = TRUE, ...) 
{
    
    fxn.average <- switch(EXPR = slot, data = function(x) {
        return(mean(x = x))
    }, mean)
    object.assays <- FilterObjects(object = object, classes.keep = "Assay")
    assays <- assays %||% object.assays
    ident.orig <- Idents(object = object)
    orig.levels <- levels(x = Idents(object = object))
    ident.new <- c()
    if (!all(assays %in% object.assays)) {
        assays <- assays[assays %in% object.assays]
        if (length(assays) == 0) {
            stop("None of the requested assays are present in the object")
        }
        else {
            warning("Requested assays that do not exist in object. Proceeding with existing assays only.")
        }
    }
    if (!is.null(x = add.ident)) {
        new.data <- FetchData(object = object, vars = add.ident)
        new.ident <- paste(Idents(object)[rownames(x = new.data)], 
                           new.data[, 1], sep = "_")
        Idents(object, cells = rownames(new.data)) <- new.ident
    }
    data.return <- list()
    for (i in 1:length(x = assays)) {
        data.use <- GetAssayData(object = object, assay = assays[i], 
                                 slot = slot)
        features.assay <- features
        if (length(x = intersect(x = features, y = rownames(x = data.use))) < 
            1) {
            features.assay <- rownames(x = data.use)
        }
        data.all <- data.frame(row.names = features.assay)
        for (j in levels(x = Idents(object))) {
            temp.cells <- WhichCells(object = object, idents = j)
            features.assay <- unique(x = intersect(x = features.assay, 
                                                   y = rownames(x = data.use)))
            if (length(x = temp.cells) == 1) {
                data.temp <- (data.use[features.assay, temp.cells])
                if (slot == "data") {
                    data.temp <-  data.temp
                }
            }
            if (length(x = temp.cells) > 1) {
                data.temp <- apply(X = data.use[features.assay, 
                                                temp.cells, drop = FALSE], MARGIN = 1, FUN = fxn.average)
            }
            data.all <- cbind(data.all, data.temp)
            colnames(x = data.all)[ncol(x = data.all)] <- j
            if (verbose) {
                message(paste("Finished averaging", assays[i], 
                              "for cluster", j))
            }
            if (i == 1) {
                ident.new <- c(ident.new, as.character(x = ident.orig[temp.cells[1]]))
            }
        }
        names(x = ident.new) <- levels(x = Idents(object))
        data.return[[i]] <- data.all
        names(x = data.return)[i] <- assays[[i]]
    }
    if (return.seurat) {
        toRet <- CreateSeuratObject(counts = data.return[[1]], 
                                    project = "Average", assay = names(x = data.return)[1], 
                                    ...)
        if (length(x = data.return) > 1) {
            for (i in 2:length(x = data.return)) {
                toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]])
            }
        }
        if (DefaultAssay(object = object) %in% names(x = data.return)) {
            DefaultAssay(object = toRet) <- DefaultAssay(object = object)
        }
        Idents(toRet, cells = colnames(x = toRet)) <- ident.new[colnames(x = toRet)]
        Idents(object = toRet) <- factor(x = Idents(object = toRet), 
                                         levels = as.character(x = orig.levels), ordered = TRUE)
        toRet <- NormalizeData(object = toRet, verbose = verbose)
        toRet <- ScaleData(object = toRet, verbose = verbose)
        return(toRet)
    }
    else {
        return(data.return)
    }
}
```



Find differential expression markers

```{r}
combined.markers <- FindAllMarkers(object = combined, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
combined.markers %>% group_by(cluster) %>% top_n(2, avg_logFC)

KO.markers <- FindAllMarkers(object = KO.cells, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

response3 <- FindMarkers(combined, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)

```




Visualize top genes in principal components

```{r, echo=FALSE, fig.height=4, fig.width=8}
PCHeatmap(object = tiss1, pc.use = 1:3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
```

Later on (in FindClusters and TSNE) you will pick a number of principal components to use. This has the effect of keeping the major directions of variation in the data and, ideally, supressing noise. There is no correct answer to the number to use, but a decent rule of thumb is to go until the plot plateaus.

```{r}
PCElbowPlot(object = tiss1)
```

Choose the number of principal components to use.
```{r}
# Set number of principal components. 
n.pcs = 10
```

The clustering is performed based on a nearest neighbors graph. Cells that have similar expression will be joined together. The Louvain algorithm looks for groups of cells with high modularity--more connections within the group than between groups. The resolution parameter determines the scale. Higher resolution will give more clusters, lower resolution will give fewer.

For the top-level clustering, aim to under-cluster instead of over-cluster. It will be easy to subset groups and further analyze them below.

```{r}
# Set resolution 
res.used <- 4
tiss1 <- FindClusters(object = tiss1, reduction.type = "pca", dims.use = 1:n.pcs, 
    resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
```

We use TSNE solely to visualize the data.
```{r}
# If cells are too spread out, you can raise the perplexity. If you have few cells, try a lower perplexity (but never less than 10).
tiss1 <- RunTSNE(object = tiss1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
```

```{r}
TSNEPlot(object = tiss1, do.label = T, pt.size = 1.2, label.size = 4)
```
## Compare to previous annotations
```{r}
previous_annotation = read.csv("/Users/kkarri/Documents/Lab/Single_cell_project/dropseq/Liver_droplet_annotation.csv", stringsAsFactors = FALSE)
cols = c('free_annotation', 'cell_ontology_class')
    for (col in cols){
      previous_col = paste0('previous_', col)
      tiss1@meta.data[, previous_col] <- "NA"
      tiss1@meta.data[as.character(previous_annotation$X), previous_col] <- previous_annotation[, col]
      print(table(tiss1@meta.data[, previous_col]))
      print(table(tiss1@meta.data[, previous_col], tiss@ident))
      
    }
    
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss@ident)
```


```{r}
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss1@ident)
```


```{r}
TSNEPlot(tiss1, group.by="mouse.sex")
TSNEPlot(tiss1, group.by="mouse.id")
```


Significant genes:

hepatocyte: Alb, Ttr, Apoa1, and Serpina1c
pericentral: Cyp2e1, Glul, Oat, Gulo
midlobular: Ass1, Hamp, Gstp1, Ubb
periportal: Cyp2f2, Pck1, Hal, Cdh1

endothelial cells: Pecam1, Nrp1, Kdr+ and Oit3+
Kuppfer cells: Emr1, Clec4f, Cd68, Irf7
NK/NKT cells: Zap70, Il2rb, Nkg7, Cxcr6, Klr1c, Gzma
B cells: Cd79a, Cd79b, Cd74 and Cd19
Immune cells: Ptprc




```{r, echo=FALSE, fig.height=16, fig.width=12}
# Hepatic marker
FeaturePlot(tiss1, c(genes_hep), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# Endothelial markers
FeaturePlot(tiss1, c(genes_endo), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# Kupffer cells
FeaturePlot(tiss1, c(genes_kuppfer), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes_nk
FeaturePlot(tiss1, c(genes_nk), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes_b
FeaturePlot(tiss1, c(genes_b), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes bile duct endo cells
FeaturePlot(tiss1, c(genes_bec), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes immune
FeaturePlot(tiss1, c(genes_immune), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))


```

Dotplots let you see the intensity of exppression and the fraction of cells expressing for each of your genes of interest.
The radius shows you the percent of cells in that cluster with at least one read sequenced from that gene. The color level indicates the average
Z-score of gene expression for cells in that cluster, where the scaling is done over taken over all cells in the sample.

#We have various immune cell types in the last cluster
```{r, echo=FALSE, fig.height=4, fig.width=10}
DotPlot(tiss1, c(genes_kuppfer, genes_nk, genes_b, "Ptprc"), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
```

```{r, echo=FALSE, fig.height=8, fig.width=10}
DotPlot(tiss1, c(genes_hep_main, genes_endo, genes_nk, genes_kuppfer, genes_bec_b_immune), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
```

Using the markers above, we can confidentaly label many of the clusters:

19: endothelial cells
20: bile duct epithelial cells
21: immune cells
rest are hepatocytes

We will add those cell_ontology_classes to the dataset.

```{r}
tiss1 <- StashIdent(object = tiss1, save.name = "cluster.ids")
cluster.ids <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
free_annotation <- c(
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  "bile duct epithelial cells",
  "endothelial cell of hepatic sinusoid",
  NA
  )
cell_ontology_class <- c(
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "duct epithelial cell",
  "endothelial cell of hepatic sinusoid",
  "hepatocyte")
tiss1 = stash_annotations(tiss1, cluster.ids, free_annotation, cell_ontology_class)
```

## Checking for batch effects

Color by metadata, like plate barcode, to check for batch effects.
```{r}
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "channel")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "free_annotation")

```

## Subcluster

Let's drill down on the hepatocytes.

```{r}
subtiss1 = SubsetData(tiss1, ident.use = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20))


```

```{r}
subtiss1 <- subtiss1 %>% ScaleData() %>%
  FindVariableGenes(do.plot = FALSE, x.high.cutoff = Inf, y.cutoff = 0.5) %>%
  RunPCA(do.print = FALSE)
```

```{r}
PCHeatmap(object = subtiss1, pc.use = 1:3, cells.use = 20, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
PCElbowPlot(subtiss1)
```


```{r}
sub.n.pcs = 8
sub.res.use = 0.5
subtiss1 <- subtiss1 %>% FindClusters(reduction.type = "pca", dims.use = 1:sub.n.pcs,
    resolution = sub.res.use, print.output = 0, save.SNN = TRUE, force.recalc = TRUE) %>%
    RunTSNE(dims.use = 1:sub.n.pcs, seed.use = 10, perplexity=8)
TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4)
```

```{r, echo=FALSE, fig.height=25, fig.width=25}
FeaturePlot(subtiss1, genes_hep,cols.use = c("grey", "red"), pt.size = 4, nCol = 4)
```

```{r, echo=FALSE, fig.height=8, fig.width=10}
DotPlot(subtiss1, all_genes, col.max = 2.5, plot.legend = T, do.return = T) + coord_flip()
```

```{r}
BuildClusterTree(subtiss1)
```

```{r, echo=FALSE, fig.height=10, fig.width=8}
#female genes have lower expression in cluster 6 relative to other female clusters, especally Xist
FeaturePlot(subtiss1,c('Mup20', 'Mup1','Mup12', 'Mup21', 'Cyp2d9', 'Xist', 'A1bg', 'Cyp2c69'),cols.use = c("grey", "red"), pt.size = 3, nCol = 2)

DotPlot(tiss1,c('Mup20', 'Mup1','Mup12', 'Mup21', 'Cyp2d9', 'Xist', 'A1bg', 'Cyp2c69'), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()

```


From these genes, it appears that the clusters represent:

0: midlobular male
1: pericentral female
2: periportal female
3: periportal male
4: midlobular male
5: pericentral male
6: midlobular female
7: midlobular female

The multitude of clusters of each type correspond mostly to individual animals/sexes.

```{r}
table(FetchData(subtiss1, c('mouse.sex','ident')) %>% droplevels())
```

```{r}
sub.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6, 7)
sub.free_annotation <- c("periportal female", "midlobular male", "pericentral female", "periportal male", "midlobular male", "pericentral male", "midlobular female", "midlobular female")
sub.cell_ontology_class <- c("hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte")
subtiss1 = stash_annotations(subtiss1, sub.cluster.ids, sub.free_annotation, sub.cell_ontology_class)
tiss1 = stash_subtiss_in_tiss(tiss1, subtiss1)
```

Liver zonation markers

```{r}
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
              'Ass1', 'Hamp', 'Gstp1', 'Ubb',
              'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')

FeaturePlot(subtiss1,c(genes_zones),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)

DotPlot(subtiss1,c(genes_zones), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()


TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")

TSNEPlot(object = tiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")

```



##########
Find cluster markers for lncRNAs
```{r}

MIN_LOGFOLD_CHANGE = 1 # set to minimum required average log fold change in gene expression.
MIN_PCT_CELLS_EXPR_GENE = 0.1

all.markers = FindAllMarkers(tiss1,
                             min.pct = MIN_PCT_CELLS_EXPR_GENE,
                             logfc.threshold = MIN_LOGFOLD_CHANGE,
                             only.pos = TRUE,
                             test.use="bimod") # likelihood ratio test
lnc_all_markers <- grep(pattern = "^ncRNA", x= rownames(all.markers), value = TRUE)
lnc_all_markers

#[1] "ncRNA_inter_chr10_92081" "ncRNA_intra_chr16_13383" "ncRNA_inter_chr17_13605" "ncRNA_inter_chr14_11815"
#[5] "ncRNA_inter_chr18_14344"

FeaturePlot(subtiss1,c(lnc_all_markers),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)

######################### lncRNA markers- CELL TYPE MARKER ############
markers.hep <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
lnc_markers_hep <- grep(pattern = "^ncRNA", x= rownames(markers.hep), value = TRUE)
lnc_markers_hep
FeaturePlot(tiss1,c(lnc_markers_hep),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_hep, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
#[1] "ncRNA_as_chr11_9423"     "ncRNA_as_chr7_6166"      "ncRNA_inter_chr4_3295"   "ncRNA_inter_chr17_14026"
#[5] "ncRNA_inter_chr3_2915"   "ncRNA_inter_chr5_4547"   "ncRNA_inter_chr15_12684"


markers.hep.MAST <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, test.use = "MAST")
lnc_markers_hep_MAST_TABLE <- subset(markers.hep.MAST, grepl("^ncRNA", rownames(markers.hep.MAST)))
lnc_markers_hep_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.hep.MAST), value = TRUE)
lnc_markers_hep_MAST



markers.endo <- FindMarkers(object = tiss1, ident.1 = c(18,19),  only.pos = TRUE, min.pct = 0.25, thresh.use = 0.5)
lnc_markers_endo <- grep(pattern = "^ncRNA", x= rownames(markers.endo), value = TRUE)
lnc_markers_endo
FeaturePlot(tiss1,c(lnc_markers_endo),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_endo, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()


#"ncRNA_inter_chr15_12770", "ncRNA_inter_chr12_10817", "ncRNA_as_chr13_11451",


markers.endo.MAST <- FindMarkers(object = tiss1, ident.1 = 19, test.use = "MAST" ,only.pos = TRUE)
lnc_markers_endo_MAST_TABLE <- subset(markers.endo.MAST, grepl("^ncRNA", rownames(markers.endo.MAST)))
lnc_markers_endo_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.endo.MAST), value = TRUE)
lnc_markers_endo_MAST


################## lncRNA expression ########################3

# "ncRNA_inter_chr17_13605" , "ncRNA_intra_chr16_13383"

########## Periporal markers- zonation markers ############
markers.pc <- FindMarkers(object = subtiss1, ident.1 = c(2,5), 
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.001, test.use = "bimod" )

markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc <- subset(markers.pc, grepl("^ncRNA", rownames(markers.pc)))
lnc_markers_pc <- grep(pattern = "^ncRNA", x= rownames(markers.pc), value = TRUE)
lnc_markers_pc 

markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc_MAST <- subset(markers.pc.MAST, grepl("^ncRNA", rownames(markers.pc.MAST)))
lnc_markers_pc_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pc.MAST), value = TRUE)
lnc_markers_pc_MAST

DotPlot(tiss1, lnc_markers_pc, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()
FeaturePlot(subtiss1,c(lnc_markers_pc),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)


############################### midlobular genes #############

markers.mid <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7), 
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)

lnc_markers_mid <- subset(markers.mid, grepl("^ncRNA", rownames(markers.mid)))
lnc_markers_mid <- grep(pattern = "^ncRNA", x= rownames(markers.mid), value = TRUE)
lnc_markers_mid
DotPlot(tiss1, lnc_markers_mid, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()


FeaturePlot(subtiss1,c(lnc_markers_mid),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)



markers.mid.MAST <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7),test.use = "MAST",only.pos = TRUE )

lnc_markers_mid_MAST_TABLE <- subset(markers.mid.MAST, grepl("^ncRNA", rownames(markers.mid.MAST)))
lnc_markers_mid_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.mid.MAST), value = TRUE)
lnc_markers_mid_MAST


#####3 periportalmarker genes############3

markers.pp <- FindMarkers(object = subtiss1, ident.1 = c(0,3),
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)


lnc_markers_pp <- subset(markers.pp, grepl("^ncRNA", rownames(markers.pp)))
lnc_markers_pp <- grep(pattern = "^ncRNA", x= rownames(markers.pp), value = TRUE)
lnc_markers_pp

markers.pp.MAST <- FindMarkers(object = subtiss1, ident.1 = c(0,3), ident.2 = c(2,5),test.use = "MAST",only.pos = TRUE )

lnc_markers_pp_MAST_TABLE <- subset(markers.pp.MAST, grepl("^ncRNA", rownames(markers.pp.MAST)))
lnc_markers_pp_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pp.MAST), value = TRUE)
lnc_markers_pp_MAST

FeaturePlot(subtiss1,c(lnc_markers_pp),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_pp,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()


################## amle and female specific ############################

markers.female <- FindMarkers(object = subtiss1, ident.1 = c(0,2,6,7),
                              only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1)

lnc_markers_female <- subset(markers.female, grepl("^ncRNA", rownames(markers.female)))
lnc_markers_female <- grep(pattern = "^ncRNA", x= rownames(markers.female), value = TRUE)
lnc_markers_female

FeaturePlot(subtiss1,c(lnc_markers_female),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_female,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()



markers.male <- FindMarkers(object = subtiss1, ident.1 = c(1,3,4,5),
                              only.pos = TRUE, min.pct = 0.001, thresh.use = 0.05)

lnc_markers_male <- subset(markers.male, grepl("^ncRNA", rownames(markers.male)))
lnc_markers_male <- grep(pattern = "^ncRNA", x= rownames(markers.male), value = TRUE)
lnc_markers_male

FeaturePlot(subtiss1,c(lnc_markers_male),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_male,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()


############################ Female zonate specific genes ###################################

markers.pericentral.female <- FindMarkers(object = tiss1, ident.1 = c(6,11,14,20), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,12,13,8,5,16), logfc.threshold = 1)

markers.periportal.female <- FindMarkers(object = tiss1, ident.1 = c(2,3,15),
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(6,11,14,20,12,13,8,5,16), logfc.threshold = 1)


markers.pericentral.male <- FindMarkers(object = tiss1, ident.1 = c(13,12), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,8,5,16,6,11,14,20), logfc.threshold = 1)


markers.periportal.male <- FindMarkers(object = tiss1, ident.1 = c(8,5,16), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,13,12,6,11,14,20), logfc.threshold = 1)




############### xeno-lncs CAR?RXR ##################

FeaturePlot(tiss1,c("ncRNA_inter_chr15_12684","ncRNA_inter_chr8_7430","ncRNA_inter_chr7_6222"),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)






#######################################################################

markers.endo.2 <- FindMarkers(object = seurat_drop, logfc.threshold = 2,ident.1 = "Endothelial", 
                              only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

lnc.endo.2 <- grep(pattern = "^ncRNA", x= rownames(markers.endo.2), value = TRUE)
lnc.endo.2





```



Zonated lncRNAs 

```{r}
pp_zontaed <- c('ncRNA_inter_chr14_12016','ncRNA_as_chr19_15090','ncRNA_inter_chr10_9351','ncRNA_inter_chr16_13170',
'ncRNA_inter_chr3_2697','ncRNA_inter_chr1_274','ncRNA_as_chr6_5518','ncRNA_inter_chr14_12066','ncRNA_intra_chr12_10871',
'ncRNA_inter_chr16_13510','ncRNA_inter_chr3_2314','ncRNA_inter_chr10_9264,'ncRNA_inter_chr9_8122')





```

## Checking for batch effects

Color by metadata, like plate barcode, to check for batch effects.
```{r}
TSNEPlot(object = subtiss1, do.return = TRUE, group.by = "mouse.sex")

```

# Final coloring

Color by cell ontology class on the original TSNE.

```{r}
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "cell_ontology_class")
```

# Save the Robject for later

```{r}
filename = here('00_data_ingest', '04_tiss1ue_robj_generated', 
                     paste0("droplet_", tiss1ue_of_interest, "refinedcells_seurat_tiss1.Robj"))
print(filename)
save(tiss1, file=filename)
```

```{r}
# To reload a saved object
filename = here('00_data_ingest', '04_tiss1ue_robj_generated',
                      paste0("droplet_", tissue_of_interest, "seurat_smartdrop-integrated-8272019.Robj"))
load(file=filename)
```


# Export the final metadata


```{r}
save_annotation_csv(tiss1, tiss1ue_of_interest, "droplet")
```
