tissue_of_interest = "Liver"
library(here)
source("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/boilerplate.R")
#tiss = load_tissue_droplet(tissue_of_interest)
#library(scater)
library(dplyr)
library(Seurat)
library(cowplot)
#library(MAST)
require(stringr)
require(reshape2)
require(ggplot2)
require(MASS)
library(tools)
require(data.table)
library(ggfortify)
library(tidyverse)
require(dplyr)
library(miscTools)
library(caret)
library(Rtsne)
library(ggrepel)
library(reticulate) # to import anndata use low version of gcc/5.50 in enviroenm sccloud.
# demux results from demuxEM
# source : https://cumulus-doc.readthedocs.io/en/latest/hashing_cite_seq.html#load-demultiplexing-results-into-python-and-r
ad1 <- import("pytz'", convert = FALSE)

ad <- import("anndata", convert = FALSE)
#data <- ad$read_h5ad("output_name_demux.h5ad")
data <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_demux.h5ad")
adtdata <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_ADTs.h5ad")
scdata <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_demux.h5sc")

data <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/software/Xist_ChrY/G172_M2-M4_Output_demux.h5ad")


#import the anndata predictions from python to R and then to a csv
rdata_obs <- py_to_r(data$obs)
write.table(as.data.frame(rdata_obs), "/restricted/projectnb/waxmanlab/kkarri/G172_demux/results/G172_demux_classification")

write.table(as.data.frame(rdata_obs), "/restricted/projectnb/waxmanlab/kkarri/software/Xist_ChrY/G172_M2-M4_Xist-chrY_demux_classification", sep="\t")

#G172.umis <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/filtered_feature_bc_matrix")

G172.umis <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Gene/outs/filtered_feature_bc_matrix")


ncRNA <- grep(pattern = "^ncRNA", x = rownames(x = G172.umis), value = TRUE)
lncRNA <- Matrix::colSums(G172.umis[ncRNA, ]>0)

#G172.umis.raw <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/raw_feature_bc_matrix/")

G172.htos <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/HASH_Citeseq/HASH_Results_filtered-10X-BC/umi_count", gene.column=1)

G172.Xist.ChrY <- read.csv("/restricted/projectnb/waxmanlab/kkarri/software/Xist_ChrY/G172_M2_M4_nonzero.csv", header = T, row.names = "X")


G172.htos <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/HASH_Citeseq/HASH_Results_raw-10X-BC/umi_count", gene.column=1)


G172.demux.htos <- ReadH5AD("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_ADTs.h5ad")
#G172top <- read.table("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/G172_Top_M1.txt", header = T, row.names = "cellbarcode") ### this is only for M1 top
G172.hash.metadata <- read.table("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/Top_Bottom_Metadata.txt",header = T, sep = "\t", row.names = "cellbarcode")

G172.hash.metadata$cellbarcode <- rownames(G172.hash.metadata)

G172top <- G172.hash.metadata[grep("Top",G172.hash.metadata$Top.Bottom),]
G172.top.M1 <- G172top[grep("M1",G172top$hash.ID),]
G172.top.M2 <- G172top[grep("M2",G172top$hash.ID),]
#G172bottom <- read.table("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/G172_Bottom_M1.txt", header = T, row.names = "cellbarcode") ## this is ont for M1 bottom
G172bottom <- G172.hash.metadata[grep("Bottom",G172.hash.metadata$Top.Bottom),]
G172.bottom.M1 <- G172bottom[grep("M1",G172bottom$hash.ID),]
G172.bottom.M2 <- G172bottom[grep("M2",G172bottom$hash.ID),]

############################ All pre-labelled samples from demuxEM and HTodemux (Seurat) ###################
M1_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_both_2656.txt", row.names = "ID")
M2_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_both_2868.txt", row.names = "ID")
M3_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_both_2376.txt", row.names = "ID")
M4_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_both_2679.txt", row.names = "ID")
M1_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_demuxEM_1149.txt", row.names = "ID")
M2_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_demuxEM_3880.txt", row.names = "ID")
M3_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_demuxEM_1276.txt", row.names = "ID")
M4_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_demuxEM_1076.txt", row.names = "ID")
M1_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_HTOdemux_827.txt", row.names = "ID")
M2_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_HTOdemux_125.txt", row.names = "ID")
M3_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_HTOdemux_827.txt", row.names = "ID")
M4_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_HTOdemux_889.txt", row.names = "ID")

M1_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_clean_both_782.txt", row.names = "ID")

M2_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_clean_both_950.txt", row.names = "ID")

M3_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_clean_both_852.txt", row.names = "ID")

M4_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_clean_both_965.txt", row.names = "ID")



M1_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_both_top80.txt", row.names = "ID")

M2_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_both_top80.txt", row.names = "ID")

M3_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_both_top80.txt", row.names = "ID")

M4_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_both_top80.txt", row.names = "ID")

#############################################################################

  
# Select cell barcodes detected by both RNA and HTO In the example datasets we have already
# filtered the cells for you, but perform this step for clarity.
joint.bcs <- intersect(colnames(G172.umis), colnames(G172.htos))
joint.bcs.top <- intersect(colnames(G172.umis), rownames(G172top))
joint.bcs.bottom <- intersect(colnames(G172.umis), rownames(G172bottom))
joint.demux <- intersect(colnames(G172.umis), colnames(G172.))
joint.xist.chrY <- intersect(colnames(G172.umis), colnames(G172.Xist.ChrY))

# Subset RNA and HTO counts by joint cell barcodes
joint_cite1_cite2 <- intersect(colnames(G172.htos), colnames(df2))
G172.umis <- G172.umis[, joint.bcs]
G172.umis <- G172.umis[, joint.xist.chrY]

G172.htos <- as.matrix(G172.htos[, joint.bcs])
G172.htos <- G172.htos[-5,]
G172.Xist.ChrY <- as.matrix(G172.Xist.ChrY[, joint.xist.chrY])


################################ joint barcodes for preloaded barcodes ###################
joint.bcs.M1.both <- intersect(colnames(G172.umis), rownames(M1_both))
joint.bcs.M2.both <- intersect(colnames(G172.umis), rownames(M2_both))
joint.bcs.M3.both <- intersect(colnames(G172.umis), rownames(M3_both))
joint.bcs.M4.both <- intersect(colnames(G172.umis), rownames(M4_both))
joint.bcs <- c(joint.bcs.M1.both, joint.bcs.M2.both,joint.bcs.M3.both,joint.bcs.M4.both)

joint.bcs.M1.demuxEM <- intersect(colnames(G172.umis), rownames(M1_demuxEM))
joint.bcs.M2.demuxEM <- intersect(colnames(G172.umis), rownames(M2_demuxEM))
joint.bcs.M3.demuxEM <- intersect(colnames(G172.umis), rownames(M3_demuxEM))
joint.bcs.M4.demuxEM <- intersect(colnames(G172.umis), rownames(M4_demuxEM))

joint.bcs.M1.HTOdemux <- intersect(colnames(G172.umis), rownames(M1_HTOdemux))
joint.bcs.M2.HTOdemux <- intersect(colnames(G172.umis), rownames(M2_HTOdemux))
joint.bcs.M3.HTOdemux <- intersect(colnames(G172.umis), rownames(M3_HTOdemux))
joint.bcs.M4.HTOdemux <- intersect(colnames(G172.umis), rownames(M4_HTOdemux))


joint.bcs.M1.top <- intersect(colnames(G172.umis), rownames(G172.top.M1))
joint.bcs.M2.top <- intersect(colnames(G172.umis), rownames(G172.top.M2))
joint.bcs.M1.bottom <- intersect(colnames(G172.umis), rownames(G172.bottom.M1))
joint.bcs.M2.bottom <- intersect(colnames(G172.umis), rownames(G172.bottom.M2))


joint.bcs.M1.clean.both <- intersect(colnames(G172.umis), rownames(M1_clean_both))
joint.bcs.M2.clean.both <- intersect(colnames(G172.umis), rownames(M2_clean_both))
joint.bcs.M3.clean.both <- intersect(colnames(G172.umis), rownames(M3_clean_both))
joint.bcs.M4.clean.both <- intersect(colnames(G172.umis), rownames(M4_clean_both))



joint.bcs.M1.both.top80 <- intersect(colnames(G172.umis), rownames(M1_both_top80))
joint.bcs.M2.both.top80 <- intersect(colnames(G172.umis), rownames(M2_both_top80))
joint.bcs.M3.both.top80 <- intersect(colnames(G172.umis), rownames(M3_both_top80))
joint.bcs.M4.both.top80 <- intersect(colnames(G172.umis), rownames(M4_both_top80))

  #colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata$channel[1],'_',x))
  
# Confirm that the HTO have the correct names
rownames(G172.htos)
#################### Before making a Seurat object remove sex-biased genes or TCPO-induced genes####################
# Remove ERCC from count.data
#All.sex.index <- grep(c("Alb|Cyp2b10"), x = rownames(count.data), value = FALSE) 
x1 <- grep(c("1810046K07Rik|2700097O09Rik|A1bg|Abcb1a|Abcc1|Abcc4|Abcd2|Abhd2|Acnat2|Acot2|Acot3|Acot4|Acot6|Acss2|Acss3|Actg1|Adcy1|Adrb2|Aif1|Akna|Akr1b7|Akr1c18|Akr1c20|Akr1d1|Aldh1b1|Aldh3a2|Amigo2|Ankrd55|Anxa6|Ar|Arhgap30|Arhgef19|Arhgef37|Arl13b|Arrdc4|Asb2|Atf3|Atp2b2|Atp6v0d2|Atp8a1|B3galnt1|B430306N03Rik|Batf2|Bche|Bin2|Bmper|C5ar1|Cables1|Camk1d|Camkk2|Cbr1|Ccbe1|Ccdc141|Ccdc68|Cchcr1|Ccl2|Ccl5|Ccnd1|Cd14|Cd163|Cd274|Cd300e|Cd36|Cd3e|Cd44|Cd52|Cd53|Cd72|Cd74|Cd83|Cdcp1|Cdkn1c|Cdx4|Cdyl2|Cenpm|Cep152|Cers6|Ces1b|Ces1c|Cfp|Chic1|Chrna4|Clec12a|Clic3|Cmklr1|Cmpk2|Cnst|Corin|Crls1|Csf1|Ctbp2|Cth|Ctsc|Ctss|Cux2|Cxcl10|Cxcl13|Cxcl9|Cyba|Cybb|Cyp17a1|Cyp2a22|Cyp2a4|Cyp2a5|Cyp2b10|Cyp2b13|Cyp2b9|Cyp2c37|Cyp2c38|Cyp2c39|Cyp2c40|Cyp2c50|Cyp2c54|Cyp2c69|Cyp2g1|Cyp39a1|Cyp3a16|Cyp3a41a|Cyp3a41b|Cyp3a44|Cyp46a1|Cyp4a10|Cyp4a14|Cyp4a31|Cyp4f16|Cyp7a1|D130043K22Rik|Dapk1|Dct|Ddah1|Dhrs7|Dlg4|Dll1|Dntt|Dock10|Dock7|Dqx1|Dram1|Dusp6|Echdc3|Eci3|Eif2s3x|Eif4e3|Elf4|Emp2|Enpp1|Esr1|Esrrg|Evc|Evc2|Evi2a|Fabp7|Fam126a|Fam49a|Fam84b|Fam89a|Fancc|Far1|Fbxl21|Fcer1g|Fgd2|Fibin|Fign|Fmn1|Fmo1|Fmo2|Fmo3|Fmo4|Folr2|Fpr1|Fpr2|Fut8|Fyb|G6pdx|Gadd45b|Gas1|Gas6|Gbp10|Gbp2|Gbp3|Gbp4|Gbp5|Gbp6|Gbp7|Gbp8|Gimap8|Gm11437|Gm4841|Golt1a|Got1|Gpr65|Gpsm1|Gpx7|Grid1|Gsto2|Gstt1|Gstt3|Gypc|Gzmb|H2-Aa|H2-Ab1|H2-DMb1|H2-Eb1|H2-Q1|H2-Q4|H2-T24|Hacl1|Hamp|Hamp2|Hao2|Hbegf|Hck|Hcls1|Hexb|Hist1h4i|Hk3|Hpd|Hsd3b1|Htra4|Id1|Ifi202b|Ifi204|Ifi205|Ifi27l2b|Ifit1|Ifit2|Ifit3|Igfbp1|Igfbp3|Igsf6|Igtp|Il10ra|Il15|Il18bp|Il1b|Il2rg|Il7|Ildr2|Irgm1|Isg15|Itga4|Itga6|Itgal|Itgb2|Jazf1|Kcnj10|Kitl|Klf6|Klhl13|Klra2|Lacc1|Lck|Lect2|Lgals1|Lgr5|Lhpp|Lig1|Lrrc24|Lrrc25|Lrtm1|Lyz2|Maob|Marcksl1|Marco|Mat1a|Mbl2|Mdm1|Me2|Mest|Mfsd2a|Mgst3|Mkx|Mllt3|Mmd2|Mme|Mmrn2|Mpc1|Mroh6|Ms4a4d|Ms4a6c|Myo1f|N4bp2l1|Ncald|Ncf1|Ncf2|Ncf4|Ncmap|Ndrg1|Nfkb2|Ngfr|Nipal1|Nlrp3|Nnmt|Npas2|Nqo1|Nr4a1|Nrp2|Nt5e|Ntrk2|Oasl2|Oat|Orm3|P2ry4|Papss2|Parp11|Pbx1|Pced1b|Pcp4l1|Pcsk5|Pde1a|Pde4d|Pdk4|Pgm5|Pik3r5|Pira2|Pla1a|Pla2g4b|Plek|Plgrkt|Plscr2|Pnpla3|Pola1|Ppl|Prex1|Prlr|Prom1|Prom2|Psd|Psmb8|Psmb9|Ptgds|Ptpdc1|Ptprc|Pygo1|Rab27a|Rab30|Rac2|Raet1e|Rbl1|Rcan2|Rcan3|Rdh16|Ren1|Rfx4|Rgn|Rgs12|Rnd1|Rsph4a|Rtn4|S100a13|Sall1|Samd9l|Sbk1|Sdc1|Sept1|Serpina3g|Serpina6|Serpinb1a|Serpinb8|Serpine3|Setd4|Sgsm1|Sgsm2|Sh2d4a|Sh3bgrl3|Sh3yl1|Shroom1|Ski|Slc13a3|Slc15a3|Slc16a5|Slc16a7|Slc17a4|Slc22a26|Slc22a27|Slc22a29|Slc25a21|Slc25a24|Slc25a27|Slc25a34|Slc26a10|Slc34a2|Slc39a5|Slc47a1|Slc4a4|Slc6a12|Slco1a4|Slfn8|Smim24|Smtnl2|Snai2|Snx12|Sorl1|Sox7|Sparcl1|Spic|Spice1|Spred1|Src|Srgap3|Srl|Srrm4|St6galnac4|Stambpl1|Stat1|Stk39|Sult1a1|Sult1c2|Sult1d1|Sult1e1|Sult2a1|Sult2a2|Sult2a3|Sult2a5|Sult2a6|Sult3a1|Sybu|Syk|Sytl5|Tbc1d8|Tcf7|Tcn2|Tead1|Tenm3|Tgm1|Tgtp1|Tgtp2|Thbs2|Thy1|Timd4|Tlr13|Tm4sf4|Tm6sf2|Tmem173|Tmem200b|Tmem26|Tmem98|Tmsb4x|Tnfaip8|Tnfaip8l3|Tnfsf10|Tnik|Tox|Treml4|Trim13|Trim24|Trim30a|Trim80|Tspyl4|Ttc21b|Txndc16|Tyrobp|Uba7|Ugt1a1|Ugt1a5|Ugt2b37|Utp14b|Vcam1|Vldlr|Wfdc15b|Wfdc17|Wfdc2|Zap70|Zbp1|Zc2hc1a|Zc3h12d|Zfp292|Zfp36l1|Zfp429|Zfp820|Zfp947|Zfp960|1810055G02Rik|2010003K11Rik|2200002D01Rik|3110082I17Rik|9130409I23Rik|A2m|Aadat|Abcb10|Abcg2|Abhd17b|Abhd5|Acsf2|Acsm2|Adamtsl2|Adarb2|Adora1|Adrb3|Ak4|Alas2|Aldoc|Alpl|Aox1|Aox3|Apoa1|Apoc2|Apol10a|Aqp4|Arhgap44|Arl6ip1|Arsa|Arsg|Asns|Atg16l2|Atp8b4|B3galt1|B4galnt3|Bcl6|Bik|Bmp4|Bmyc|Bok|C6|C7|C8a|C8b|C9|Cad|Caln1|Camk2b|Capn8|Cblc|Ccdc171|Ccne1|Ccnf|Ces1e|Ces2b|Ces2c|Ces3a|Ces3b|Ces4a|Chac1|Chpt1|Chrna2|Ciart|Cib3|Cidec|Clec2h|Clstn3|Col27a1|Col5a3|Comt|Cpne2|Cpne8|Cpsf4l|Crlf2|Crybb3|Csad|Cspg5|Ctps|Cxcl14|Cyp21a1|Cyp2c67|Cyp2d40|Cyp2d9|Cyp2f2|Cyp2j9|Cyp2u1|Cyp3a11|Cyp4a12a|Cyp4a12b|Cyp4a32|Cyp4v3|Cyp7b1|Cyp8b1|Cys1|Dbp|Ddx3y|Derl3|Dirc2|Dnaic1|Dnajc12|Dpy19l1|Dpy19l3|Dsg1c|Efna3|Egfr|Eif2s3y|Eif4ebp3|Elovl3|Enpp2|Enpp3|Ephx1|Ephx2|Eps8l2|Erc2|Ero1lb|F2r|F830016B08Rik"),x = rownames(G172.umis), value = FALSE)
x2 <- grep(c("Fabp5|Fam171b|Fam222a|Fam47e|Fbxo21|Fgf21|Fitm1|Fkbp11|Fmn2|Fst|Gcnt4|Gde1|Gdpd1|Gjc3|Glo1|Gm7694|Gna12|Gna14|Gpc1|Gpr12|Grem2|Grhpr|Grm8|Gse1|Gsta2|Gstm3|Gstp1|Gstp2|Gtf2b|Gys2|H1fx|H6pd|Hao1|Haus4|Hba-a1|Hba-a2|Hbb-bs|Hc|Hes6|Hp|Hpx|Hsbp1l1|Hsd3b2|Hsd3b5|Hspa1b|Hspb1|Hspb6|Hunk|Igsf23|Ihh|Ikbkg|Il12rb1|Il1r1|Impa2|Insc|Isyna1|Kctd15|Kdm5d|Keg1|Klhdc7a|Lama3|Lcn2|Ldhd|Lhx6|Lpl|Lrg1|Lrrc20|Magee1|Maml3|Mas1|Mcm10|Mmp15|Mn1|Mrgpre|Mri1|Mrvi1|Mthfd1l|Mthfd2|Mtmr7|Mtnr1a|Mug1|Mup1|Mup10|Mup11|Mup12|Mup13|Mup14|Mup15|Mup16|Mup17|Mup19|Mup20|Mup21|Mup3|Mup5|Mup6|Mup7|Mup9|Mycn|Myef2|Nat8|Nek2|Nhej1|Nlrp12|Nox4|Nudt7|Nuggc|Obp2a|Ociad2|Olfm3|Olfr613|Omd|Orm1|Orm2|Osgin1|Paqr7|Pard3b|Pdilt|Per2|Pfkfb3|Pgp|Pitx3|Pkdrej|Plekha1|Pnlip|Podn|Ppp1r1b|Proca1|Prss8|Prtn3|Rab34|Rarres1|Rassf3|Rbbp4|Retsat|Rnase13|Rnaseh2a|Rnaseh2b|Saa1|Saa2|Saa3|Scara5|Scp2|Sdr16c5|Sebox|Sel1l3|Selenbp2|Serinc2|Serpina11|Serpina12|Serpina1a|Serpina1c|Serpina1d|Serpina3a|Serpina3k|Serpina5|Serpina7|Serpina9|Serpine2|Slc12a4|Slc13a5|Slc15a5|Slc17a8|Slc22a28|Slc22a7|Slc25a33|Slc35e3|Slc41a2|Slc6a8|Slco1a1|Smpd3|Snhg11|Snx29|Sort1|Sox12|Spatc1l|Spon2|Spp1|Srd5a1|Stbd1|Stk19|Susd4|Sycp3|Tcaim|Tdo2|Tff3|Tmc7|Tmem41a|Tnfrsf12a|Tpmt|Tram2|Treh|Trip4|Tsku|Tspan33|Tspan4|Ttc39a|Ttc39c|Uba5|Ugdh|Ugt1a6b|Ugt2b1|Ugt2b35|Ugt2b38|Ugt2b5|Unc119|Unc13b|Unc5b|Usp2|Uty|Vmo1|Wee1|Wipi1|Wnk4|Xlr3a|Zbtb7c|Zdhhc2|Zfp445|Zfp687|Zfp809|Zfp872|Gm47283"),x = rownames(G172.umis), value = FALSE)
x3 <- grep(c("ncRNA_as_chr10_9385|ncRNA_as_chr11_10058|ncRNA_as_chr11_10170|ncRNA_as_chr14_11846|ncRNA_as_chr15_12679|ncRNA_as_chr15_12957|ncRNA_as_chr16_13512|ncRNA_as_chr17_13628|ncRNA_as_chr1_757|ncRNA_as_chr2_1125|ncRNA_as_chr2_1343|ncRNA_as_chr4_3230|ncRNA_as_chr4_3843|ncRNA_as_chr5_4731|ncRNA_as_chr5_4745|ncRNA_as_chr7_6007|ncRNA_as_chr8_7290|ncRNA_as_chr8_7332|ncRNA_as_chr9_7824|ncRNA_as_chrx_15546|ncRNA_inter_chr10_9183|ncRNA_inter_chr10_9222|ncRNA_inter_chr10_9223|ncRNA_inter_chr10_9224|ncRNA_inter_chr12_10681|ncRNA_inter_chr12_10922|ncRNA_inter_chr1_290|ncRNA_inter_chr13_11189|ncRNA_inter_chr13_11437|ncRNA_inter_chr13_11438|ncRNA_inter_chr1_341|ncRNA_inter_chr14_11978|ncRNA_inter_chr14_12237|ncRNA_inter_chr15_12585|ncRNA_inter_chr15_12683|ncRNA_inter_chr15_12684|ncRNA_inter_chr16_13062|ncRNA_inter_chr16_13225|ncRNA_inter_chr16_13428|ncRNA_inter_chr16_13510|ncRNA_inter_chr18_14392|ncRNA_inter_chr18_14432|ncRNA_inter_chr18_14590|ncRNA_inter_chr19_14770|ncRNA_inter_chr19_14802|ncRNA_inter_chr1_977|ncRNA_inter_chr2_1422|ncRNA_inter_chr2_1424|ncRNA_inter_chr2_1430|ncRNA_inter_chr2_1432|ncRNA_inter_chr2_2012|ncRNA_inter_chr2_2016|ncRNA_inter_chr2_2017|ncRNA_inter_chr2_2085|ncRNA_inter_chr3_2743|ncRNA_inter_chr3_2915|ncRNA_inter_chr3_2937|ncRNA_inter_chr3_3002|ncRNA_inter_chr4_3141|ncRNA_inter_chr4_3177|ncRNA_inter_chr4_3344|ncRNA_inter_chr5_4115|ncRNA_inter_chr5_4116|ncRNA_inter_chr5_4365|ncRNA_inter_chr5_4527|ncRNA_inter_chr5_4763|ncRNA_inter_chr5_4764|ncRNA_inter_chr6_5138|ncRNA_inter_chr6_5253|ncRNA_inter_chr6_5691|ncRNA_inter_chr7_6113|ncRNA_inter_chr8_6766|ncRNA_inter_chr8_6946|ncRNA_inter_chr8_7105|ncRNA_inter_chr8_7334|ncRNA_inter_chr8_7684|ncRNA_inter_chr9_7878|ncRNA_inter_chr9_8015|ncRNA_inter_chr9_8249"),x=rownames(G172.umis),value=FALSE)
x4 <- grep(c("ncRNA_inter_chrX_15376|ncRNA_inter_chrX_15394|ncRNA_inter_chrX_15549|ncRNA_intra_chr15_12674|ncRNA_intra_chr15_12675|ncRNA_intra_chr19_15008|ncRNA_intra_chr19_15009|ncRNA_intra_chr19_15014|ncRNA_intra_chr5_4726|ncRNA_intra_chr5_4728|ncRNA_intra_chr5_4730|ncRNA_intra_chr7_5920|ncRNA_intra_chr7_6001|ncRNA_intra_chr8_7308|ncRNA_intra_chr8_7310|ncRNA_as_chr10_9240|ncRNA_as_chr1_231|ncRNA_as_chr14_12074|ncRNA_as_chr15_12874|ncRNA_as_chr19_14711|ncRNA_as_chr19_15054|ncRNA_as_chr3_2192|ncRNA_as_chr3_2800|ncRNA_as_chr4_3297|ncRNA_as_chr6_5601|ncRNA_as_chr6_5851|ncRNA_as_chr7_6302|ncRNA_as_chr8_7087|ncRNA_as_chr9_8142|ncRNA_as_chr9_8420|ncRNA_as_chrX_15318|ncRNA_inter_chr10_8466|ncRNA_inter_chr10_8767|ncRNA_inter_chr10_8829|ncRNA_inter_chr10_8999|ncRNA_inter_chr10_9123|ncRNA_inter_chr10_9138|ncRNA_inter_chr10_9245|ncRNA_inter_chr10_9313|ncRNA_inter_chr10_9349|ncRNA_inter_chr10_9351|ncRNA_inter_chr11_10078|ncRNA_inter_chr11_10159|ncRNA_inter_chr11_10162|ncRNA_inter_chr1_115|ncRNA_inter_chr11_9925|ncRNA_inter_chr12_10423|ncRNA_inter_chr12_10454|ncRNA_inter_chr12_10458|ncRNA_inter_chr12_10562|ncRNA_inter_chr12_10910|ncRNA_inter_chr1_234|ncRNA_inter_chr13_11222|ncRNA_inter_chr13_11553|ncRNA_inter_chr13_11670|ncRNA_inter_chr13_11782|ncRNA_inter_chr1_406|ncRNA_inter_chr14_11858|ncRNA_inter_chr14_11989|ncRNA_inter_chr14_12201|ncRNA_inter_chr1_420|ncRNA_inter_chr1_477|ncRNA_inter_chr15_12341|ncRNA_inter_chr15_12609|ncRNA_inter_chr15_12937|ncRNA_inter_chr16_13170|ncRNA_inter_chr16_13173|ncRNA_inter_chr16_13176|ncRNA_inter_chr16_13177|ncRNA_inter_chr16_13349|ncRNA_inter_chr16_13451|ncRNA_inter_chr16_13470|ncRNA_inter_chr16_13471|ncRNA_inter_chr1_630|ncRNA_inter_chr17_13817|ncRNA_inter_chr17_13827|ncRNA_inter_chr17_13938|ncRNA_inter_chr17_13940|ncRNA_inter_chr17_13983"),x=rownames(G172.umis),value=FALSE)
x5 <- grep(c("ncRNA_inter_chr17_13986|ncRNA_inter_chr17_14028|ncRNA_inter_chr17_14151|ncRNA_inter_chr1_734|ncRNA_inter_chr18_14223|ncRNA_inter_chr19_14853|ncRNA_inter_chr19_14873|ncRNA_inter_chr1_931|ncRNA_inter_chr2_1156|ncRNA_inter_chr2_1157|ncRNA_inter_chr2_1232|ncRNA_inter_chr2_1233|ncRNA_inter_chr2_1462|ncRNA_inter_chr2_1471|ncRNA_inter_chr2_1479|ncRNA_inter_chr2_1502|ncRNA_inter_chr2_1689|ncRNA_inter_chr2_2002|ncRNA_inter_chr2_2037|ncRNA_inter_chr3_2314|ncRNA_inter_chr3_2504|ncRNA_inter_chr3_2786|ncRNA_inter_chr3_2935|ncRNA_inter_chr4_3079|ncRNA_inter_chr4_3468|ncRNA_inter_chr4_3723|ncRNA_inter_chr5_4681|ncRNA_inter_chr5_4787|ncRNA_inter_chr5_4819|ncRNA_inter_chr6_5023|ncRNA_inter_chr6_5248|ncRNA_inter_chr6_5309|ncRNA_inter_chr6_5316|ncRNA_inter_chr6_5551|ncRNA_inter_chr6_5595|ncRNA_inter_chr7_5922|ncRNA_inter_chr7_6087|ncRNA_inter_chr7_6220|ncRNA_inter_chr7_6222|ncRNA_inter_chr7_6509|ncRNA_inter_chr8_6739|ncRNA_inter_chr8_6741|ncRNA_inter_chr8_6876|ncRNA_inter_chr8_6971|ncRNA_inter_chr8_7280|ncRNA_inter_chr8_7420|ncRNA_inter_chr8_7423|ncRNA_inter_chr8_7430|ncRNA_inter_chr9_7753|ncRNA_inter_chr9_8099|ncRNA_inter_chr9_8122|ncRNA_inter_chr9_8383|ncRNA_intra_chr12_10856|ncRNA_intra_chr12_10872|ncRNA_intra_chr15_12911|ncRNA_intra_chr18_14533|ncRNA_intra_chr19_14773|ncRNA_inter_chrX_15394"),x = rownames(G172.umis), value = FALSE)


TCPO1 <- grep(c("Corin|Ppp1r3c|Xlr3a|Xlr3b|Cyp2a5|Ikbke|Gdf15|Anln|Rarb|Cdca2|Gna14|Kif20b|Tpx2|Cenpe|Arhgef39|Kif2c|Cxcl1|Mmd2|Ephb6|Wipf3|Gprc5d|Ttyh1|Atp4a|Plekha4|Mrgprb3|Bcl7c|Prr36|Fgl1|Slc18a1|Fcho1|Gm10638|St3gal4|Qsox1|Armc2|Dnajc12|Avpr1a|Kif5a|Camk2b|Olfr1393|Sgsm2|Capn8|Gpx6|Dcdc2a|Cxcl14|Lrit2|Sftpa1|Slc39a2|Tspyl5|Cpne8|Npff|Gp5|Prr18|Caskin1|Fam129b|Zfp811|Abcg5|Abcg8|Zbtb7c|Rom1|Cnnm1|Timp1|Xlr3c|Slc1a2|Spag4|Gm826|Rbpjl|Satb2|Gm6525|Ankrd6|Tmem169|Ttll10|Crot|Zfyve28|Ncapg|Ccdc18|Trpm8|Ugt1a5|Rph3a|Fzd10|Akr1d1|Zfp398|Gprin3|Mad2l1|Bmp10|Kbtbd12|Fancd2|Cdca3|Rad51ap1|Foxm1|Ptpn4|Bhlhe41|Lig1|Cyp2g1|Nphs1|Ccne1|Ticrr|Gsta3|Fzd4|Cdk18|Plk1|Kif22|Chid1|Shcbp1|Kif14|Cenpu|Neil3|Aspm|Ankle1|Pdp2|Gins2|Sipa1l2|Mmp12|Sesn3|Fam129a|Ccdc15|Cbl|H2afx|Apoa4|Cyp1a1|Kif23|Zwilch|Pif1|Traip|Fam198a|Kif15|Adgb|Sult3a1|Cdk1|Parpbp|Gas2l3|Kitl|Syt1|Nuf2|Cobl|Uhmk1|Fbxo48|Spdl1|Hmmr|Olfr16|Aurkb|Spag5|Exo1|Ska2|Lrrc46|Brca1|BC030867|Kif18b|Birc5|Rrm2|Rdh11|Plekhh1|Serpina6|Ncapg2|Adarb2|Zfp184|Hist1h2bk|Hist1h2be|Ppp1r3g|Atf3|Zfp369|Cenph|Meig1|Wnt5a|Cdkn3|Dlgap5|Tnfrsf19|Pbk|Klf12|Lmbrd2|Nebl|Recql4|Troap|Sapcd2|Cdc45|Polq|C330027C09Rik|Epha3|Ncam2|Cyyr1|Zfp97|Pkmyt1|Chtf18|Neurl1B|Kifc5b|Kifc1|Emilin2|Ndc80|Strn|Fzd8|Mib1|Stard4|Cdc25c|Kif20a|Ldlrad4|Mc5r|Ska1|Cfap53|Cyb5a|Gstp2|Incenp|Fen1|Ptar1|Kif11|Cep55|Cyp2c66|Lcor|4933411K16Rik|Slc9a7|Kif4|Fancb|Kif18a|Arhgap11a|Bub1b|Ncaph|Bub1|Wfdc16|Ube2c|Aurka|Gm14440|Ccna2|Plk4|Trim59|Tlr2|Iqgap3|Them5|Hist2h2bb|Nbeal1|Psrc1|Pard3b|Depdc1a|Mms22l|Melk|Smc2|Ptgr1|Mup19|Mup5|Mup21|Mup11|Mup15|Acot11|Orc1|Cdkn2c|Stil|Rad54l|Ptch2|Cdca8|Clspn|Aunip"),x = rownames(G172.umis), value = FALSE)
TCPO2 <- grep(c("Megf6|Vwa1|Cgref1|Sult5a1|Dio1|Ren1|Slc15a2|A1bg|Mup17|Trpv1|Klhl25|Cbr1|Gpcpd1|A2m|Cadm4|Ppp1r14a|Saa3|Rad51b|Fos|Dsg1c|Tchh|Crym|Gmnn|Bik|Trib3|Nnmt|E2f2|Sult1d1|Por|Mad1l1|Cyp3a11|Gadd45a|Cyp2b10|Tsku|Ces2a|4931406C07Rik|Alas1|Gadd45b|9130409I23Rik|Pnpla3|Dtx4|Cyp2c55|Cyp2c29|Rarres1|Gstm3|Pla2g12a|Rdh16|Gadd45g|Ttll13|Rnf186|Tmub1|Fabp2|Sult1e1|Katna1|Cyp21a1|Cyp3a44|Cyp2c40|Cyp2c69|Rdh9|Dbp|Atp2b2|Cdc20|Slc34a2|Akr1b7|Taf1d|Trmt61a|Nedd9|Pfkfb3|Gm14295|Gm14403|Exosc8|D630039A03Rik|Mug2|Pkp3|Derl3|Ndufa4l2|Slc16a11|Sema5b|Bmf|Ell3|Dnaic1|Cyp4a14|Cyp4a10|Azin2|Mt2|Mt1|Fbxo31|Igfbp1|Slc13a5|Pole|Ckap2|Apol10a|Sult1c2|Tcp11l1|Hao2|Mup12|Slc10a2|Colgalt2|Psmc3ip|Ttll8|Ptges|Gstm2|Gstm1|Ctps|Chka|Gjb4|Cdkn1a|Mki67|Cenpf|Ccnb1|Espl1|Knstrn|Thrsp|Shroom1|Hspa2|Pusl1|Hmgn3|Btnl9|Barhl1|Ttbk1|Stk36|Ihh|Saa2|Sebox|Cyb561|Slc7a15|Adamtsl2|Rbm20|Map3k6|Sc5d|Mis18bp1|Mastl|Chaf1a|Slc22a26|Slco1a4|Myc|Mbd1|Orm3|Fgf21|Adrb3|Serpina3n|Arid5a|Apol10b|Mup16|Vps37b|Rhbdd2|Papss2|Cyp26a1|Prss22|E030018B13Rik|Socs2|Upp2|Lnx1|Cux2|Zbed5|Slc12a9|Nxpe5|Sun1|3110082I17Rik|Tmem184a|Cyp3a16|Gpr12|Pon1|Hes6|Akr1b8|Trim24|Atoh8|Pparg|Gucy2c|Gys2|Ldhb|Rep15|Zfp773|Grik5|Phldb3|Lgi4|Nupr1|Adora1|Oat|B4galnt4|Lrrc56|Slc25a22|Cdkn1c|Ano1|Eif4ebp1|Rfxank|Pbx4|Slc27a1|Klf1|Hook2|Spata2L|Agt|Fam89a|Ccdc151|Oaf|1810046K07Rik|Pcbp4|Gnat1|Slc38a3|Mst1r|Col7a1|Plxnb1|Tmie|Acaa1a|Acvr2b|Cyp8b1|Tpd52l1|Fmo3|Col18a1|Tle2|Mterf2|Mgst3|Lrp1|B4galnt1|Itga7|Gls2|Tbc1d10a|Srebf1|Trp53i13|Rara|Arl4d|Rundc3a|Slc16a5|Llgl2|Tmc6|Plekhg3|Acot3|Rps6kl1|Eml1|D130043K22Rik|Foxq1|Fbp1|Ube2ql1|Slc12a7|Ccno|Mcm10|Il17rb|Rpgrip1|Rabggta|Tgm1|Irf9|Gulo|Sorbs3|Dct|Kifc2|Pdxp|Pmm1|Cenpm|Arhgap8|Celsr1|Chkb|Rnd1|Lmbr1l|Csad|Smim22|Dexi|Prodh|Clcn2|Agpat2|Sardh|Crat|Gm9992|Slc22a3|Unc93a|Ass1|Pim1|Slc25a25|St6galnac6|Cyp4f16|Gnmt|Plin5|Egr1|Gnpda1|Ppargc1b|Arhgef37|Pcyox1l|Bad|Macrod1|Slc22a29|Rab3il1|Trpm6|Got1|Col17a1|Gsto1|Plekhs1|Slc7a3|Mum1l1|Pdk1|Pla2g4f|Prom2|Acss2|Neurl2|Slc2a2|Thbs3"),x = rownames(G172.umis), value = FALSE)
TCPO3 <- grep(c("Il6ra|Selenbp1|Anxa9|Ctsk|Alpk1|Cth|Col27a1|Ccdc30|Hpca|Ptpru|Tmem200b|Cnksr1|Paqr7|Ifnlr1|Alpl|Rap1gap|Klhdc7a|Ugt2b35|Mob1b|Dr1|Ugt1a9|Hsph1|C1galt1|Grm8|Zfp212|Kbtbd8|B4galnt3|Slc15a5|Pla2g4c|Zfp619|Gas2|Cd55|Yod1|Prss23|Acer3|Ipo7|Mrvi1|Dkk3|Mical2|Psma1|Pde3b|Pdilt|Zfp768|Tnks|Ppp1r3b|Zfp617|Calr|Edem3|Swsap1|Zfp872|Hspa8|Tagln|Tex12|Arpp19|Pgm3|Spsb4|Poc1a|Manf|Prkar2a|Epm2aip1|Higd1a|Abhd5|Ginm1|Cacybp|Ddx21|Timp3|Creg1|Arl1|Dusp6|Wif1|Prim1|Irgm1|Hnrnpab|Bcl6b|Psmd11|Pigw|Rad51c|Nme1|Gm11541|Lsm12|Efcab3|Kpna2|Desi2|Zfp750|Rsad2|Dnajb9|Pygl|Pigh|Adam4|Tmed8|Alkbh1|Tc2n|Hsp90aa1|Tnfaip2|Nid1|Inhba|Rnf144b|Lysmd3|F2r|Itga1|Spryd7|Slc25a30|Lifr|Atad2|Ndrg1|St3gal1|Cyp2d9|Cyp2d12|Klhdc7b|Ankrd39|Fgd4|Sdf2l1|Tubb4b|Alg3|Map3k13|Lrrc58|Naa50|Dzip3|Rab44|Glo1|Ppp1r10|Enpp4|Catsperd|Nudt12|Srd5a2|Yipf4|Zbtb26|Psma8|Scai|Rnf125|Tmed7|Slc22a28|Ms4a4d|Mamdc2|Pik3ap1|Cox15|Xk|Pgrmc1|Zbtb33|Klhl15|Yipf6|Cdx4|Chic1|Npas2|Reps2|Mospd2|Cwc22|Sppl2a|Atrn|Pcna|4921509C19Rik|Rbl1|Zgpat|Il7|Supt20|Tcf24|Serp1|P2ry12|Ssr3|Aox1|S100a10|Nop58|Gstm4|5330417C22Rik|Prdm14|Xkr9|Rnpc3|Tmem56|Alg14|Fubp1|Fzd5|Ndufaf4|Mup3|Mup1|Mup10|Dph2|Akirin1|AW011738|Keg1|Per3|Insc|Nrg4|Ptp4a1|Abcc3|1810032O08Rik|Slc46a3|Rnf152|Steap3|Bbc3|Cyp2a4|Cyp2a22|Sh2d4a|Gamt|Tbc1d30|1810010H24Rik|Ccdc57|Syne3|Zfp395|Apol7a|Tef|Igfals|Slc22a7|Hsd3b3|Slc44a3|Uox|Cyp7a1|Enho|Tcea3|Arhgef19|Fam131c|Slc2a5|Lrg1|Cxcl10|Abhd2|Mycn|Nrep|P2ry4|Padi4|Insig2|Prg4|Leap2|Crp|Mal2|Rapgef4|Fam110a|Jun|8430408G22Rik|Lad1|Ces3a|Arrdc3|G0s2|Hsd3b5|Hsd3b2|Cyp2u1|Mreg|Il22ra1|Cyp3a59|Rgs1|Ddit4|Lcn2|1810055G02Rik|Rgs12|Cc2d2a|Ppargc1a|Nipal1|Ugt2b5|Afp|Gbp10|Gbp6|Fgfrl1|Sgsm1|2610524H06Rik|Oasl1|Fbxo21|Tbx3|Oas1c|Stx2|Cyp3a41a|Per2|Irf5|Pask|Zfp467|Avl9|Nat8|Serpinb8|Rassf4|Usp18|Clstn3|Clec2h|Ceacam2|Cd79a|Cyp2b9|Sertad3|Cd22|Tulp2|Svip|Wee1|Eef2k|Aqp8|Cd19|Fcer2a|Cd209c|Htra4|Mtnr1a|Klkb1|Lrp2bp|Nr3c2|Ces1f|Mmp15|Ces2h|Ces3b|Slc9a5|Mvd|Tmem218|Nxpe2|Cd276|Gclc|Nt5e|6430571L13Rik|Acaa1b|Dlec1|Wisp3|Tube1|Chst3|Fmo4|Fmo1|Rab36|Gstt2|Lss|Izumo4|Tjp3|Lin7a|Inhbe|Nab2|Mettl7b|Slc39a5|Osbp2|Ddc|Snrnp25|Pcp4l1|Pttg1|D930048N14Rik|Pctp|Tob1|Nr1d1|Krt23|Abca8a|Cbx8|Cbx4|Cbx2|Fn3k|Psen2|Cys1|Cmpk2|Atxn7l1|Acot4|Zc2hc1c|Serpina9|Akr1c19|Dsp|Rd3|Tppp|Mblac2|Adamts6|Pde4d|Fst|4930452B06Rik|Lrtm1|Lrit1|Fdft1|Chrna2|C1ql3|Prlr|Apol9a|Apol9b|Fam227a|Arsa|Entpd2|Nit2|Hunk|Ccnf|Pde9a|Cyp4f14|Tcf19|Zbtb12"),x = rownames(G172.umis), value = FALSE)
TCPO4 <- grep(c("Dnph1|Aqp4|Dsg2|Lipg|Cacnb4|Hsbp1l1|Acy3|Snx32|Chrm1|Fads3|Kank1|Vldlr|Hhex|Pde6c|Cyp2c38|Mid1ip1|Fign|Sytl4|Bhlhb9|Kantr|Klhl41|Madd|Abtb2|Elf5|Pak6|Itpka|Wdr76|Sord|Nabp1|Rbm38|Slc17a9|Pgap1|Pfn2|Sucnr1|Bche|Aox3|Mab21l2|Hcn3|Fdps|Car14|Hist2h2be|Trim45|Gnat2|Adh4|Ddah1|Coro2a|Col15a1|Mup8|Mpdz|Igfbp5|2610528J11Rik|Ncmap|Gale|Arhgef16|Insig1|Sh3bp2|Stx18|Adamts3|Slc10a6|Mfsd7a|Crybb3|Ung|Glt1d1|Asl|Crcp|Serpine1|Cyp3a57|Cyp3a25|Aass|Lmod2|Atp6v0a4|Snx10|St3gal5|Podxl2|Nop2|Inhbb|Clec9a|Gprc5a|Pik3c2g|Amn1|Nlrp12|Hif3a|Saa1|Klf13|Slc45a3|Adm|Rassf10|Sult1a1|Tacc2|Dmbt1|Ifitm5|Lrrc8e|Irs2|Enpp6|Hp|Tat|Wwox|Map1lc3b|Cdh15|Mmp8|Ppan|Cnn1|Bmper|Adamts15|Ubash3b|Rgs16|Zbtb16|Zkscan7|Map3k5|Ctgf|Adamts14|Slc19a1|Slc35e3|Adcy1|Lgalsl|Rasd1|Apcs|Ccl6|Ypel2|Etv4|Grn|Gfap|Itgb3|H3f3b|Tha1|Lpin1|Prps1l1|Susd6|Ppp4r4|Serpina3i|Serpina3m|Serpina3f|Serpina3g|Slc25a47|Simc1|Klhl3|Ctsl|Rpp38|Hspa14|Itga2|Gnl3|Itih4|Fam35a|Apex1|Zc3h13|Farp1|Nrbp2|Eppk1|Maff|Pim3|Slc38a2|Dnaja3|Socs1|Cebpd|Obp2a|Nr1i2|Cldn14|Dopey2|Rpl10a|Fkbp5|Rsph1|Angptl4|Rpl12|Supt3|Cyp39a1|Fsd1|Mpnd|Dpp9|Myom1|Rbbp8|Nr6a1|Fbn2|Ablim3|2010003K11Rik|Ms4a8a|Sgms1|Acta2|Cyp26c1|Cyp2c70|Rrp12|Ablim1|Gfra1|6030498E09Rik|B3galt1|Il1r1|Mtch2|Thbs1|Tmem87b|Nop56|Thbd|Bcl2l1|Kcnb1|Cebpb|Arhgef26|Tiparp|Gpatch4|Adamtsl4|Trp53inp1|Slc25a51|Tomm5|Ambp|Orm1|Lurap1l|Cyp2j8|Prkag3|Cyp27a1|Mfsd2a|Gja4|Tmem51|Steap4|Abcb1a|Serpina7|Ugt2b34|Stbd1|Fam47e|Prss8|Smpd3|Gstt3|Gstt1|Fam110c|Chaf1b|Cyp2c54|Cyp2c37|Cyp2c50|Podn|Lgr5|Ang|Chac1|Ugt1a1|Tfcp2l1|Tomt|Cyp1a2|Gas1|Ppp1r42|Mup9|Inmt|Ces1d|Setd4|Susd4|Tifa|Fam222a|Cyp26b1|Prtn3|Rtn4rl1|Bhmt|Dntt|Fosl2|Prom1|Nfe2l3|Prc1|Ccnd1|Ces1c|Slc13a2|Ccl9|Top2a|Fam84a|Ahr|Klhl33|Slc25a37|Aldh1a7|Ermn|Elovl3|Timm8a1|Ckap2l|Slc23a2|Fam83d|Ect2|Hgfac|Lgals4|Syt3|Fut1|Rab30|Gas6|Rnf122|6430573F11Rik|Isyna1|Junb|Rfx4|Hsd17b6|Sun3|Per1|Socs3|Ltbp2|Cyp46a1|Nfil3|Lamb3|Itih3|Mat1a|Rnase4|Nuggc|Scara5|Sybu|Aifm3|Mas1|Cables1|Awat2|Ripply1|Slc43a1|Myh7b|D630003M21Rik|Pck1|Fabp5|Lingo4|Etnppl|Sytl1|Extl1|Ugt2b1|Ugt2b36|Ugt2a1|Prkg2|Hrk|Kntc1|Nup205|Jazf1|Reep1|Mob1a|Mcm2|Atg7|Mcm3|Nudt19|Acmsd|Mcm6|Mctp2|Fanci|Acsm2|Zbed6|Nrg1|Lpl|Calr3|Mcm5|Asf1b|Ces2c|Fanca|Lamc2|Aph1b|Bmp5|Ttk|Xirp1|Echdc1|Sgpl1|H2afy2|BC055324|Fignl1|Tgtp2|Sar1b|9530068E07Rik|Slfn9|Cdc6|Ern1|Foxa1|Klhl28|Ifi27l2b|Serpina1d|Serpina1e|Serpina1a|Idi1|Dtl|Nek2|Lect2|Depdc1b|Wdhd1|Socs4|Esco2|Abcc4|Tmtc4|Pdzd2|Arl5b|Ncald|Wisp1|Alg10b|Scn8a|Krt4|Krt79|Mcm4|Sidt1|Hspa13|Nrip1"),x = rownames(G172.umis), value = FALSE)
TCPO5 <- grep(c("Adamts1|Uhrf1|Ston1|Pcdhgc3|Prelid2|Clcf1|Fads1|Aldh1a1|Dock8|A1cf|Hells|Cyp2c65|Abcc2|Sorcs3|Pnliprp1|Sytl5|Pola1|Ercc6l|Cenpi|Rad51|Nusap1|Mybl1|Zfp931|Chrna4|Slc10a5|Zfp687|Chd1l|Prok1|Lactb2|Agl|Impad1|Ccne2|Alg2|Mup13|Mup2|Mup7|Mup14|Ttc39b|Adamtsl1|Fndc5|Irs1|Sowahb|Klhl8|Nudt7|Slc16a13|Rab11fip4|Sntg2|Acot6|Sgcg|C9|Fam171b|Gpat2|Srms|Itgb3bp|Ube2u|A630001G21Rik|Shisa3|Ugt2b37|Cxcl13|Gbp8|Slc15a4|Ybx3|Sult2a5|Sult2a2|Sult2a1|Sult2a4|Etnk2|Dpep1|9530077C05Rik|Uck2|Rims2|Cyp4f15|H2-Eb1|Notch4|H2-Q2|Dock11|Rnf24|Tgm2|Coq10b|Kcna2|Gbp2b|Igfbp2|Myom3|Col4a3"),x = rownames(G172.umis), value = FALSE)

TCPO6 <- grep(c("ncRNA_inter_chr16_13412|ncRNA_inter_chr2_1923|ncRNA_inter_chr5_4197|ncRNA_inter_chr9_8103|ncRNA_inter_chr10_9254|ncRNA_inter_chr18_14655|ncRNA_inter_chr6_5137|ncRNA_as_chr1_369|ncRNA_inter_chr5_4424|ncRNA_inter_chr5_4619|ncRNA_as_chr5_4731|ncRNA_as_chr5_4745|ncRNA_inter_chr5_4781|ncRNA_as_chr6_5038|ncRNA_inter_chr6_5610|ncRNA_inter_chr7_5913|ncRNA_inter_chr7_5914|ncRNA_as_chr7_6050|ncRNA_inter_chr7_6074|ncRNA_inter_chr7_6411|ncRNA_inter_chr1_552|ncRNA_inter_chr1_547|ncRNA_inter_chr8_6741|ncRNA_inter_chr8_6944|ncRNA_inter_chr8_6955|ncRNA_inter_chr9_7766|ncRNA_as_chr9_7843|ncRNA_as_chr9_7880|ncRNA_inter_chr9_7881|ncRNA_inter_chr9_7938|ncRNA_inter_chr9_7994|ncRNA_inter_chr9_7992|ncRNA_as_chr9_8419|ncRNA_inter_chr10_8796|ncRNA_inter_chr10_9195|ncRNA_as_chr10_9385|ncRNA_as_chr10_9411|ncRNA_inter_chr11_9543|ncRNA_inter_chr11_10206|ncRNA_as_chr12_10281|ncRNA_inter_chr12_10630|ncRNA_inter_chr12_10754|ncRNA_as_chr12_10762|ncRNA_inter_chr12_10814|ncRNA_inter_chr12_10916|ncRNA_inter_chr12_10938|ncRNA_inter_chr13_11125|ncRNA_inter_chr13_11153|ncRNA_inter_chr13_11203|ncRNA_inter_chr13_11204|ncRNA_inter_chr13_11201|ncRNA_inter_chr13_11412|ncRNA_inter_chr13_11602|ncRNA_as_chr14_11908|ncRNA_as_chr14_11919|ncRNA_as_chr14_11991|ncRNA_inter_chr14_12061|ncRNA_inter_chr15_12776|ncRNA_inter_chr15_12819|ncRNA_inter_chr16_13477|ncRNA_inter_chr17_14011|ncRNA_as_chr18_14302|ncRNA_as_chr18_14461|ncRNA_inter_chr18_14589|ncRNA_as_chr18_14689|ncRNA_as_chr19_14883|ncRNA_inter_chr19_14949|ncRNA_as_chr19_15129|ncRNA_inter_chr2_1477|ncRNA_as_chr2_1652|ncRNA_inter_chr2_1990|ncRNA_inter_chr3_2232|ncRNA_inter_chr3_2551|ncRNA_inter_chr3_2550|ncRNA_inter_chr3_2629|ncRNA_inter_chr1_66|ncRNA_inter_chr1_63|ncRNA_inter_chr1_62|ncRNA_as_chr3_2878|ncRNA_inter_chr3_2901|ncRNA_inter_chr3_3003|ncRNA_inter_chr4_3157|ncRNA_as_chr4_3800|ncRNA_inter_chr4_3862|ncRNA_inter_chr4_3863|ncRNA_inter_chr5_3988|ncRNA_inter_chr5_4315|ncRNA_inter_chr5_4322|ncRNA_inter_chr5_4335|ncRNA_as_chr1_400|ncRNA_intra_chr5_4728|ncRNA_inter_chr6_4886|ncRNA_as_chr6_5132|ncRNA_inter_chr6_5310|ncRNA_inter_chr6_5421|ncRNA_inter_chr6_5502|ncRNA_inter_chr6_5721"), x = rownames(G172.umis), value = FALSE)

TCPO6.2 <- grep(c("ncRNA_inter_chr6_5817|ncRNA_as_chr7_5955|ncRNA_inter_chr7_6085|ncRNA_as_chr7_6192|ncRNA_inter_chr7_6367|ncRNA_inter_chr7_6392|ncRNA_inter_chr7_6508|ncRNA_inter_chr7_6510|ncRNA_inter_chr1_570|ncRNA_inter_chr1_571|ncRNA_inter_chr1_566|ncRNA_inter_chr1_568|ncRNA_inter_chr1_567|ncRNA_inter_chr8_6775|ncRNA_inter_chr1_590|ncRNA_as_chr8_7071|ncRNA_inter_chr8_7141|ncRNA_inter_chr1_610|ncRNA_intra_chr1_611|ncRNA_inter_chr8_7169|ncRNA_intra_chr8_7312|ncRNA_intra_chr8_7310|ncRNA_inter_chr1_633|ncRNA_as_chr8_7521|ncRNA_as_chr8_7528|ncRNA_inter_chr8_7612|ncRNA_inter_chr8_7683|ncRNA_inter_chr9_8105|ncRNA_inter_chr9_8099|ncRNA_inter_chr9_8104|ncRNA_as_chr9_8172|ncRNA_inter_chr9_8279|ncRNA_as_chr9_8334|ncRNA_inter_chr9_8350|ncRNA_inter_chr10_8999|ncRNA_inter_chr10_9000|ncRNA_inter_chr10_9222|ncRNA_intra_chr11_9593|ncRNA_inter_chr11_9636|ncRNA_as_chr11_9787|ncRNA_as_chr11_9790|ncRNA_inter_chr11_9995|ncRNA_inter_chr12_10421|ncRNA_inter_chr12_10476|ncRNA_inter_chr12_10549|ncRNA_inter_chr12_10672|ncRNA_intra_chr12_10866|ncRNA_intra_chr12_10859|ncRNA_intra_chr12_10851|ncRNA_inter_chr12_10949|ncRNA_inter_chr13_11074|ncRNA_inter_chr13_11070|ncRNA_inter_chr13_11399|ncRNA_inter_chr13_11438|ncRNA_inter_chr13_11440|ncRNA_inter_chr14_11853|ncRNA_inter_chr14_11911|ncRNA_inter_chr14_11987|ncRNA_inter_chr14_12058|ncRNA_as_chr14_12074|ncRNA_inter_chr14_12191|ncRNA_inter_chr14_12193|ncRNA_inter_chr14_12205|ncRNA_inter_chr14_12291|ncRNA_inter_chr14_12297|ncRNA_inter_chr15_12606|ncRNA_as_chr15_12697|ncRNA_intra_chr15_12771|ncRNA_inter_chr15_12777|ncRNA_inter_chr16_12998|ncRNA_inter_chr16_13190|ncRNA_inter_chr16_13349|ncRNA_inter_chr17_13743|ncRNA_inter_chr17_13808|ncRNA_as_chr17_13828|ncRNA_inter_chr17_13841|ncRNA_inter_chr17_13842|ncRNA_inter_chr17_13876|ncRNA_inter_chr17_13919|ncRNA_inter_chr17_14015|ncRNA_inter_chr17_14085|ncRNA_inter_chr17_14129|ncRNA_inter_chr18_14339|ncRNA_inter_chr18_14656|ncRNA_inter_chr19_14717|ncRNA_inter_chr19_14856|ncRNA_inter_chr19_14854|ncRNA_inter_chr19_14953|ncRNA_inter_chr19_14952|ncRNA_inter_chr2_1436|ncRNA_inter_chr2_1492|ncRNA_inter_chr2_1577|ncRNA_inter_chr2_1821|ncRNA_inter_chr2_1826|ncRNA_inter_chr2_1827|ncRNA_inter_chr3_2764|ncRNA_as_chr3_2936|ncRNA_inter_chr3_2983|ncRNA_inter_chr4_3142|ncRNA_inter_chr4_3306|ncRNA_inter_chr4_3512|ncRNA_inter_chr4_3521|ncRNA_inter_chr4_3673|ncRNA_inter_chr4_3732|ncRNA_inter_chr4_3778|ncRNA_as_chr4_3843|ncRNA_as_chr10_9015|ncRNA_inter_chr19_14873|ncRNA_inter_chr3_2165|ncRNA_inter_chr10_8471|ncRNA_inter_chr19_15097|ncRNA_inter_chr11_10188|ncRNA_inter_chr1_420|ncRNA_inter_chr8_7363|ncRNA_inter_chr8_7423|ncRNA_inter_chr8_7430|ncRNA_inter_chr11_9864"),x = rownames(G172.umis), value = FALSE)


TCPO6.1 <-  grep(c("ncRNA_inter_chr11_9925|ncRNA_inter_chr11_9968|ncRNA_inter_chr1_914|ncRNA_inter_chr16_13176|ncRNA_as_chr18_14602|ncRNA_inter_chr3_2162|ncRNA_inter_chr3_2504|ncRNA_as_chr4_3300|ncRNA_inter_chr6_5793|ncRNA_inter_chr8_7610|ncRNA_as_chr9_7959|ncRNA_inter_chr15_12869|ncRNA_inter_chr18_14442|ncRNA_inter_chr19_15169|ncRNA_inter_chr19_15179|ncRNA_inter_chr19_15177|ncRNA_inter_chr19_15171|ncRNA_inter_chr2_1725|ncRNA_inter_chr3_2543|ncRNA_inter_chr4_3120|ncRNA_as_chr5_4655|ncRNA_inter_chr5_4654|ncRNA_as_chr6_5335|ncRNA_inter_chr7_5998|ncRNA_inter_chr9_8301|ncRNA_as_chr10_8460|ncRNA_as_chr11_9709|ncRNA_inter_chr1_931|ncRNA_inter_chr15_12834|ncRNA_inter_chr16_13509|ncRNA_inter_chr16_13510|ncRNA_as_chr16_13512|ncRNA_inter_chr18_14690|ncRNA_inter_chr19_15002|ncRNA_inter_chr19_15004|ncRNA_inter_chr16_13225|ncRNA_inter_chr2_2017|ncRNA_inter_chr6_5253|ncRNA_inter_chr9_7763|ncRNA_inter_chr5_4777|ncRNA_inter_chr6_5551|ncRNA_inter_chr14_11945|ncRNA_as_chr9_8142|ncRNA_inter_chr2_1471|ncRNA_inter_chr11_10185|ncRNA_inter_chr9_8000|ncRNA_inter_chr4_3282|ncRNA_inter_chr5_4499|ncRNA_inter_chr1_496|ncRNA_inter_chr8_6894|ncRNA_inter_chr8_6896|ncRNA_inter_chr15_12836|ncRNA_inter_chr19_14947|ncRNA_as_chr7_6065|ncRNA_inter_chr13_11031|ncRNA_inter_chr4_3156|ncRNA_inter_chr9_7809|ncRNA_inter_chr13_11385|ncRNA_inter_chr6_5248|ncRNA_inter_chr9_7875|ncRNA_inter_chr1_775|ncRNA_intra_chr11_9594|ncRNA_as_chr15_12959|ncRNA_inter_chr5_4316|ncRNA_inter_chr11_9635|ncRNA_inter_chr13_11100|ncRNA_inter_chr16_13428|ncRNA_inter_chr17_14130|ncRNA_inter_chr3_2721|ncRNA_inter_chr7_6113|ncRNA_as_chr5_4372|ncRNA_inter_chr5_4773|ncRNA_as_chr7_5921|ncRNA_inter_chr7_5935|ncRNA_as_chr7_6007|ncRNA_inter_chr7_6070"),x = rownames(G172.umis), value = FALSE)

TCPO7 <- grep(c("ncRNA_inter_chr9_7989|ncRNA_inter_chr9_8417|ncRNA_as_chr10_9016|ncRNA_inter_chr10_9418|ncRNA_inter_chr13_11163|ncRNA_inter_chr15_12319|ncRNA_inter_chr16_13050|ncRNA_inter_chr16_13211|ncRNA_inter_chr17_14188|ncRNA_inter_chr19_14987|ncRNA_as_chr2_1343|ncRNA_inter_chr2_1594|ncRNA_as_chr3_2968|ncRNA_as_chr8_7332|ncRNA_inter_chr11_9965|ncRNA_inter_chr13_11622|ncRNA_inter_chr2_1996|ncRNA_as_chr5_4325|ncRNA_inter_chr7_6087|ncRNA_inter_chr7_6094|ncRNA_inter_chr7_6692|ncRNA_inter_chr1_591|ncRNA_inter_chr9_7878|ncRNA_inter_chr9_8118|ncRNA_inter_chr10_9123|ncRNA_inter_chr10_9313|ncRNA_inter_chr12_10454|ncRNA_inter_chr13_11437|ncRNA_inter_chr17_13889|ncRNA_inter_chr2_2037|ncRNA_inter_chr3_2168|ncRNA_inter_chr4_3549|ncRNA_inter_chr9_8302|ncRNA_inter_chr10_9256|ncRNA_inter_chr11_9442|ncRNA_as_chr19_14777|ncRNA_as_chr19_14779|ncRNA_inter_chr19_14967|ncRNA_inter_chr2_1430|ncRNA_inter_chr4_3618|ncRNA_as_chr19_14976|ncRNA_inter_chr5_4338|ncRNA_as_chr7_5999|ncRNA_as_chr8_7359|ncRNA_inter_chr3_2988|ncRNA_inter_chr2_2016|ncRNA_inter_chr6_5316|ncRNA_inter_chr1_290|ncRNA_inter_chr5_4775|ncRNA_inter_chr7_6220|ncRNA_inter_chr7_6222|ncRNA_inter_chr9_8122|ncRNA_inter_chr11_9599|ncRNA_inter_chr12_10910|ncRNA_as_chr15_12920|ncRNA_inter_chr16_13170|ncRNA_inter_chr16_13177|ncRNA_inter_chr17_14151|ncRNA_inter_chr17_14162|ncRNA_inter_chr3_2166|ncRNA_inter_chr4_3651|ncRNA_inter_chr5_3974|ncRNA_inter_chr9_8067|ncRNA_inter_chr10_9181|ncRNA_inter_chr1_129|ncRNA_inter_chr15_12796|ncRNA_as_chr2_1207|ncRNA_inter_chr19_15175|ncRNA_inter_chr2_2012|ncRNA_inter_chr2_2011|ncRNA_inter_chr1_365|ncRNA_inter_chr5_4491|ncRNA_as_chr6_5635|ncRNA_inter_chr6_5710|ncRNA_inter_chr7_6110|ncRNA_inter_chr7_6489|ncRNA_inter_chr7_6587|ncRNA_inter_chr7_6639|ncRNA_inter_chr8_6738|ncRNA_inter_chr8_6759|ncRNA_inter_chr8_6766|ncRNA_inter_chr8_6942|ncRNA_inter_chr8_6946|ncRNA_inter_chr8_7074|ncRNA_inter_chr8_7180|ncRNA_inter_chr8_7432|ncRNA_inter_chr9_7691|ncRNA_inter_chr9_7819|ncRNA_inter_chr9_7813|ncRNA_inter_chr1_113|ncRNA_inter_chr9_7885|ncRNA_as_chr9_8043|ncRNA_inter_chr9_8049|ncRNA_as_chr9_8317|ncRNA_inter_chr10_8697"),x = rownames(G172.umis), value = FALSE)

TCPO7.1  <- grep(c("ncRNA_inter_chr10_8776|ncRNA_inter_chr10_9125|ncRNA_inter_chr10_9366|ncRNA_inter_chr10_9363|ncRNA_intra_chr11_9623|ncRNA_as_chr1_814|ncRNA_inter_chr11_9743|ncRNA_inter_chr1_913|ncRNA_inter_chr12_10509|ncRNA_inter_chr12_10539|ncRNA_inter_chr12_10713|ncRNA_inter_chr12_10943|ncRNA_as_chr13_11127|ncRNA_as_chr1_978|ncRNA_inter_chr13_11222|ncRNA_inter_chr13_11361|ncRNA_inter_chr13_11572|ncRNA_as_chr13_11787|ncRNA_inter_chr2_1050|ncRNA_inter_chr14_11938|ncRNA_as_chr14_11957|ncRNA_inter_chr14_12034|ncRNA_inter_chr14_12032|ncRNA_inter_chr14_12031|ncRNA_inter_chr15_12446|ncRNA_as_chr15_12818|ncRNA_inter_chr15_12890|ncRNA_as_chr15_12942|ncRNA_inter_chr16_13173|ncRNA_as_chr1_153|ncRNA_inter_chr17_13857|ncRNA_inter_chr17_13924|ncRNA_inter_chr17_13983|ncRNA_inter_chr17_14026|ncRNA_as_chr17_14041|ncRNA_inter_chr17_14102|ncRNA_as_chr18_14369|ncRNA_as_chr19_14782|ncRNA_inter_chr2_1310|ncRNA_inter_chr19_14874|ncRNA_inter_chr19_14880|ncRNA_inter_chrX_15449|ncRNA_inter_chr2_1423|ncRNA_inter_chr2_1497|ncRNA_inter_chr2_1501|ncRNA_inter_chr1_31|ncRNA_inter_chr2_1829|ncRNA_as_chr2_1870|ncRNA_inter_chr2_1958|ncRNA_inter_chr2_2124|ncRNA_inter_chr3_2140|ncRNA_inter_chr3_2311|ncRNA_inter_chr3_2505|ncRNA_inter_chr3_2743|ncRNA_inter_chr3_2940|ncRNA_inter_chr4_3009|ncRNA_inter_chr4_3010|ncRNA_inter_chr4_3163|ncRNA_as_chr4_3654|ncRNA_inter_chr4_3689|ncRNA_inter_chr4_3867|ncRNA_inter_chr5_4010|ncRNA_inter_chr5_4123|ncRNA_inter_chr5_4138|ncRNA_inter_chr5_4336|ncRNA_inter_chr5_4337|ncRNA_inter_chr5_4656|ncRNA_inter_chr5_4685|ncRNA_as_chr5_4687|ncRNA_inter_chr6_4828|ncRNA_inter_chr6_5154|ncRNA_inter_chr6_5675|ncRNA_inter_chr6_5684|ncRNA_inter_chr6_5723|ncRNA_as_chr6_5861|ncRNA_inter_chr7_6097|ncRNA_as_chr7_6384|ncRNA_inter_chr7_6559|ncRNA_inter_chr7_6709|ncRNA_inter_chr8_6887|ncRNA_inter_chr8_7072|ncRNA_inter_chr8_7105"),x = rownames(G172.umis), value = FALSE)
TCPO7.2 <-  grep(c("ncRNA_as_chr8_7190|ncRNA_intra_chr8_7324|ncRNA_inter_chr1_635|ncRNA_inter_chr1_630|ncRNA_inter_chr8_7512|ncRNA_inter_chr1_648|ncRNA_inter_chr1_678|ncRNA_inter_chr9_7996|ncRNA_inter_chr9_7995|ncRNA_inter_chr9_8022|ncRNA_inter_chr9_8100|ncRNA_intra_chr9_8171|ncRNA_inter_chr10_9128|ncRNA_inter_chr10_9199|ncRNA_intra_chr11_9595|ncRNA_inter_chr11_9600|ncRNA_inter_chr11_9651|ncRNA_as_chr11_9663|ncRNA_as_chr11_9684|ncRNA_inter_chr11_9780|ncRNA_inter_chr11_9784|ncRNA_inter_chr11_9991|ncRNA_inter_chr11_10051|ncRNA_inter_chr12_10415|ncRNA_intra_chr12_10508|ncRNA_inter_chr12_10543|ncRNA_inter_chr12_10628|ncRNA_intra_chr12_10870|ncRNA_intra_chr12_10872|ncRNA_intra_chr12_10863|ncRNA_intra_chr12_10854|ncRNA_intra_chr12_10856|ncRNA_intra_chr12_10857|ncRNA_inter_chr12_10895|ncRNA_inter_chr12_10978|ncRNA_inter_chr13_11199|ncRNA_inter_chr14_11851|ncRNA_inter_chr14_12016|ncRNA_inter_chr14_12145|ncRNA_inter_chr14_12154|ncRNA_inter_chr14_12239|ncRNA_inter_chr14_12290|ncRNA_as_chr14_12314|ncRNA_inter_chr15_12368|ncRNA_inter_chr2_1097|ncRNA_as_chr2_1101|ncRNA_as_chr15_12682|ncRNA_as_chr16_13145|ncRNA_inter_chr16_13270|ncRNA_inter_chr16_13316|ncRNA_inter_chr16_13317|ncRNA_inter_chr17_13539|ncRNA_as_chr17_13731|ncRNA_inter_chr17_14132|ncRNA_inter_chr18_14338|ncRNA_inter_chr18_14336|ncRNA_inter_chr18_14337|ncRNA_inter_chr18_14674|ncRNA_inter_chr19_14790|ncRNA_inter_chr19_14892|ncRNA_inter_chr19_14999|ncRNA_inter_chr19_15136|ncRNA_inter_chrX_15248|ncRNA_inter_chrX_15240|ncRNA_inter_chrX_15389|ncRNA_inter_chr2_1686|ncRNA_inter_chr2_1736|ncRNA_intra_chr2_1796|ncRNA_inter_chr2_2112|ncRNA_inter_chr3_2574|ncRNA_inter_chr4_3161|ncRNA_inter_chr4_3184|ncRNA_as_chr4_3206|ncRNA_as_chr4_3310|ncRNA_inter_chr4_3424|ncRNA_inter_chr4_3422|ncRNA_inter_chr4_3723|ncRNA_as_chr8_7333|ncRNA_as_chr5_4744|ncRNA_inter_chr5_4746|ncRNA_inter_chr6_5638|ncRNA_inter_chr7_6108|ncRNA_inter_chr7_6109|ncRNA_as_chr13_11025|ncRNA_inter_chr14_12201|ncRNA_inter_chr15_12439|ncRNA_inter_chr15_12815|ncRNA_inter_chr18_14223|ncRNA_as_chr1_222|ncRNA_inter_chr8_6744|ncRNA_inter_chr8_7334|ncRNA_inter_chr2_1425|ncRNA_inter_chr2_1426|ncRNA_inter_chr3_2413|ncRNA_inter_chr3_2410|ncRNA_inter_chr3_2411|ncRNA_inter_chr5_4407|ncRNA_inter_chr6_5249|ncRNA_inter_chr8_7511|ncRNA_inter_chr9_7874|ncRNA_inter_chr9_8056|ncRNA_as_chr9_8393|ncRNA_inter_chr13_11254|ncRNA_intra_chr2_1057|ncRNA_as_chr14_12315|ncRNA_inter_chr15_12684|ncRNA_inter_chr15_12937|ncRNA_inter_chr17_13692|ncRNA_inter_chr17_14175|ncRNA_inter_chr18_14688|ncRNA_intra_chr19_14773|ncRNA_inter_chr3_2169|ncRNA_as_chr4_3537"),x = rownames(G172.umis), value = FALSE)

TCPO8 <- grep(c("ncRNA_inter_chr5_4750|ncRNA_inter_chr7_6390|ncRNA_inter_chr9_8300|ncRNA_inter_chr13_11116|ncRNA_inter_chr15_12835|ncRNA_inter_chr6_5318|ncRNA_inter_chr11_9914|ncRNA_as_chr12_10974|ncRNA_inter_chr14_12199|ncRNA_inter_chr15_12824|ncRNA_intra_chr19_15008|ncRNA_as_chr17_13834|ncRNA_inter_chr19_14946|ncRNA_inter_chr5_4150|ncRNA_inter_chr5_4149|ncRNA_as_chr5_4157|ncRNA_as_chr5_4156|ncRNA_inter_chr5_4180|ncRNA_inter_chr5_4242|ncRNA_inter_chr1_366|ncRNA_inter_chr5_4273|ncRNA_as_chr5_4276|ncRNA_as_chr5_4370|ncRNA_intra_chr5_4388|ncRNA_inter_chr5_4402|ncRNA_inter_chr5_4507|ncRNA_inter_chr5_4532|ncRNA_inter_chr5_4564|ncRNA_inter_chr5_4560|ncRNA_as_chr1_404|ncRNA_as_chr5_4712|ncRNA_inter_chr5_4774|ncRNA_inter_chr5_4776|ncRNA_inter_chr5_4785|ncRNA_inter_chr5_4784|ncRNA_inter_chr5_4783|ncRNA_as_chr6_5114|ncRNA_inter_chr6_5117|ncRNA_inter_chr6_5125|ncRNA_inter_chr6_5127|ncRNA_inter_chr6_5159|ncRNA_inter_chr1_444|ncRNA_as_chr6_5250|ncRNA_inter_chr1_463|ncRNA_inter_chr6_5304|ncRNA_inter_chr6_5313|ncRNA_inter_chr6_5323|ncRNA_inter_chr6_5370|ncRNA_inter_chr6_5429|ncRNA_inter_chr6_5552|ncRNA_inter_chr6_5557|ncRNA_inter_chr6_5563|ncRNA_inter_chr6_5582|ncRNA_as_chr6_5587|ncRNA_inter_chr6_5630|ncRNA_inter_chr6_5673|ncRNA_as_chr7_5939|ncRNA_inter_chr7_5990|ncRNA_as_chr7_6012|ncRNA_inter_chr7_6022|ncRNA_inter_chr7_6101|ncRNA_inter_chr7_6118|ncRNA_inter_chr7_6115|ncRNA_inter_chr7_6119|ncRNA_inter_chr7_6121|ncRNA_inter_chr7_6114|ncRNA_inter_chr7_6223|ncRNA_inter_chr7_6221|ncRNA_as_chr7_6288|ncRNA_inter_chr7_6338|ncRNA_inter_chr8_6861|ncRNA_as_chr8_6963|ncRNA_inter_chr8_7012|ncRNA_inter_chr8_7256|ncRNA_inter_chr8_7280|ncRNA_as_chr8_7331|ncRNA_inter_chr8_7330|ncRNA_inter_chr8_7362|ncRNA_as_chr8_7369|ncRNA_inter_chr8_7394|ncRNA_inter_chr8_7424|ncRNA_inter_chr8_7603|ncRNA_inter_chr8_7649|ncRNA_inter_chr8_7650|ncRNA_inter_chr9_7754|ncRNA_inter_chr9_7873|ncRNA_as_chr9_7931|ncRNA_as_chr9_7947|ncRNA_as_chr9_7949|ncRNA_inter_chr9_8016|ncRNA_inter_chr9_8015|ncRNA_inter_chr9_8117|ncRNA_inter_chr9_8123|ncRNA_inter_chr9_8121|ncRNA_inter_chr9_8132|ncRNA_as_chr9_8271|ncRNA_inter_chr9_8396|ncRNA_inter_chr1_115|ncRNA_inter_chr1_14|ncRNA_inter_chr10_8829|ncRNA_inter_chr10_8950|ncRNA_inter_chr10_8951|ncRNA_inter_chr10_9011|ncRNA_as_chr10_9007|ncRNA_as_chr10_9023|ncRNA_inter_chr10_9138|ncRNA_as_chr10_9240|ncRNA_inter_chr10_9245|ncRNA_as_chr10_9389|ncRNA_inter_chr10_9409|ncRNA_inter_chr11_9670|ncRNA_inter_chr11_9674|ncRNA_as_chr11_9688|ncRNA_as_chr11_9692|ncRNA_inter_chr11_9833|ncRNA_inter_chr11_9862|ncRNA_inter_chr11_9873|ncRNA_inter_chr11_9926|ncRNA_as_chr11_9928|ncRNA_inter_chr11_9938|ncRNA_as_chr11_9940|ncRNA_inter_chr11_9939|ncRNA_inter_chr1_889"),x = rownames(G172.umis), value = FALSE)

TCPO8.1 <- grep(c("ncRNA_inter_chr11_10087|ncRNA_as_chr11_10132|ncRNA_inter_chr11_10148|ncRNA_as_chr11_10149|ncRNA_inter_chr11_10202|ncRNA_inter_chr11_10201|ncRNA_inter_chr11_10211|ncRNA_inter_chr1_915|ncRNA_inter_chr12_10412|ncRNA_inter_chr12_10411|ncRNA_inter_chr1_928|ncRNA_inter_chr12_10573|ncRNA_inter_chr12_10629|ncRNA_as_chr12_10655|ncRNA_inter_chr12_10657|ncRNA_inter_chr12_10665|ncRNA_inter_chr12_10681|ncRNA_as_chr12_10764|ncRNA_inter_chr1_948|ncRNA_inter_chr1_949|ncRNA_inter_chr12_10819|ncRNA_inter_chr12_10881|ncRNA_inter_chr12_10893|ncRNA_as_chr13_11068|ncRNA_inter_chr13_11069|ncRNA_inter_chr13_11237|ncRNA_inter_chr13_11236|ncRNA_inter_chr13_11216|ncRNA_inter_chr13_11242|ncRNA_inter_chr13_11255|ncRNA_inter_chr1_1000|ncRNA_inter_chr13_11439|ncRNA_as_chr13_11486|ncRNA_inter_chr13_11651|ncRNA_inter_chr13_11752|ncRNA_inter_chr13_11771|ncRNA_inter_chr2_1049|ncRNA_inter_chr14_11950|ncRNA_inter_chr14_11944|ncRNA_inter_chr14_11946|ncRNA_inter_chr14_11949|ncRNA_inter_chr14_11989|ncRNA_inter_chr14_11998|ncRNA_inter_chr14_11999|ncRNA_as_chr14_12111|ncRNA_as_chr14_12166|ncRNA_inter_chr14_12198|ncRNA_inter_chr14_12200|ncRNA_inter_chr2_1077|ncRNA_inter_chr2_1078|ncRNA_inter_chr15_12338|ncRNA_inter_chr15_12489|ncRNA_inter_chr15_12514|ncRNA_as_chr15_12542|ncRNA_inter_chr15_12633|ncRNA_as_chr15_12677|ncRNA_inter_chr15_12685|ncRNA_as_chr15_12706|ncRNA_intra_chr15_12715|ncRNA_inter_chr15_12719|ncRNA_inter_chr15_12718"),x = rownames(G172.umis), value = FALSE)

TCPO8.2 <- grep(c("ncRNA_inter_chr15_12720|ncRNA_as_chr15_12721|ncRNA_inter_chr15_12745|ncRNA_inter_chr15_12823|ncRNA_inter_chr15_12875|ncRNA_inter_chr15_12916|ncRNA_inter_chr15_12936|ncRNA_inter_chr15_12953|ncRNA_inter_chr15_12954|ncRNA_inter_chr16_13049|ncRNA_inter_chr16_13171|ncRNA_as_chr16_13227|ncRNA_inter_chr16_13340|ncRNA_inter_chr16_13434|ncRNA_inter_chr16_13470|ncRNA_inter_chr17_13681|ncRNA_intra_chr17_13894|ncRNA_as_chr17_13905|ncRNA_inter_chr17_13939|ncRNA_inter_chr17_13938|ncRNA_inter_chr17_14163|ncRNA_as_chr17_14173|ncRNA_inter_chr17_14176|ncRNA_inter_chr18_14317|ncRNA_inter_chr18_14321|ncRNA_inter_chr18_14323|ncRNA_inter_chr18_14331|ncRNA_inter_chr18_14330|ncRNA_inter_chr18_14328|ncRNA_inter_chr18_14324|ncRNA_as_chr18_14408|ncRNA_inter_chr18_14534|ncRNA_inter_chr18_14590|ncRNA_inter_chr18_14650|ncRNA_inter_chr2_1303|ncRNA_as_chr2_1305|ncRNA_inter_chr2_1302|ncRNA_inter_chr19_14802|ncRNA_as_chr19_14823|ncRNA_inter_chr19_14822|ncRNA_inter_chr19_14991|ncRNA_inter_chr19_14990|ncRNA_intra_chr19_15006|ncRNA_as_chr19_15010|ncRNA_as_chr19_15007|ncRNA_as_chrX_15387|ncRNA_inter_chrX_15422|ncRNA_inter_chrX_15550|ncRNA_inter_chr2_1481|ncRNA_inter_chr2_1502|ncRNA_inter_chr2_1528|ncRNA_inter_chr2_1600|ncRNA_inter_chr2_1782|ncRNA_inter_chr2_1792|ncRNA_inter_chr2_1790|ncRNA_inter_chr2_1793|ncRNA_inter_chr1_196|ncRNA_inter_chr1_195|ncRNA_inter_chr2_1989|ncRNA_inter_chr2_2038|ncRNA_inter_chr3_2167|ncRNA_inter_chr1_44|ncRNA_inter_chr3_2407|ncRNA_inter_chr3_2437|ncRNA_as_chr1_221|ncRNA_inter_chr3_2507|ncRNA_inter_chr3_2546|ncRNA_as_chr3_2544|ncRNA_inter_chr1_238|ncRNA_as_chr3_2603|ncRNA_inter_chr3_2636|ncRNA_inter_chr3_2768|ncRNA_as_chr3_2781|ncRNA_as_chr3_2778|ncRNA_inter_chr3_2790|ncRNA_inter_chr3_2791|ncRNA_inter_chr3_2798|ncRNA_inter_chr3_2889|ncRNA_inter_chr3_2896|ncRNA_as_chr3_2894|ncRNA_inter_chr3_2904|ncRNA_inter_chr4_3027|ncRNA_as_chr4_3224|ncRNA_inter_chr4_3308|ncRNA_inter_chr4_3350|ncRNA_inter_chr4_3381|ncRNA_inter_chr4_3380|ncRNA_inter_chr4_3389|ncRNA_inter_chr4_3391|ncRNA_inter_chr4_3390|ncRNA_as_chr1_86|ncRNA_inter_chr4_3575|ncRNA_inter_chr4_3652|ncRNA_inter_chr4_3690|ncRNA_inter_chr4_3764|ncRNA_inter_chr4_3775|ncRNA_inter_chr4_3905|ncRNA_as_chr4_3910|ncRNA_inter_chr4_3923|ncRNA_inter_chr5_4066|ncRNA_inter_chr5_4067|ncRNA_inter_chr5_4065|ncRNA_inter_chr5_4116|ncRNA_inter_chr5_4472|ncRNA_inter_chr5_4500|ncRNA_as_chr5_4627|ncRNA_inter_chr5_4749|ncRNA_inter_chr6_4993|ncRNA_inter_chr6_5281|ncRNA_inter_chr6_5282|ncRNA_as_chr6_5722|ncRNA_inter_chr1_494|ncRNA_inter_chr1_495|ncRNA_as_chr7_5880|ncRNA_inter_chr7_6091|ncRNA_inter_chr7_6090|ncRNA_inter_chr7_6080|ncRNA_as_chr7_6174|ncRNA_inter_chr1_520|ncRNA_as_chr7_6377|ncRNA_as_chr7_6389|ncRNA_inter_chr7_6415|ncRNA_inter_chr7_6433|ncRNA_as_chr7_6467"),x = rownames(G172.umis), value = FALSE)

TCPO9 <- grep(c("ncRNA_as_chr7_6656|ncRNA_inter_chr8_6742|ncRNA_as_chr8_6747|ncRNA_inter_chr8_6740|ncRNA_inter_chr8_6743|ncRNA_inter_chr8_6895|ncRNA_inter_chr8_6953|ncRNA_inter_chr8_6989|ncRNA_inter_chr8_6990|ncRNA_inter_chr8_7336|ncRNA_inter_chr8_7399|ncRNA_inter_chr8_7398|ncRNA_as_chr8_7632|ncRNA_as_chr9_7778|ncRNA_inter_chr9_7810|ncRNA_inter_chr9_7923|ncRNA_as_chr9_7924|ncRNA_inter_chr9_8169|ncRNA_inter_chr9_8299|ncRNA_inter_chr9_8297|ncRNA_inter_chr9_8387|ncRNA_inter_chr10_8469|ncRNA_inter_chr10_8777|ncRNA_as_chr10_9182|ncRNA_inter_chr10_9193|ncRNA_inter_chr10_9209|ncRNA_inter_chr10_9210|ncRNA_inter_chr1_792|ncRNA_inter_chr10_9351|ncRNA_as_chr11_9447|ncRNA_as_chr11_9449|ncRNA_inter_chr1_125|ncRNA_inter_chr1_126|ncRNA_inter_chr11_9693|ncRNA_inter_chr1_842|ncRNA_inter_chr11_9894|ncRNA_inter_chr11_9960|ncRNA_inter_chr11_9967|ncRNA_inter_chr11_9966|ncRNA_inter_chr11_10019|ncRNA_inter_chr11_10091|ncRNA_as_chr11_10193|ncRNA_inter_chr12_10324|ncRNA_inter_chr12_10322|ncRNA_inter_chr12_10477|ncRNA_inter_chr12_10562|ncRNA_as_chr12_10847|ncRNA_as_chr12_10889|ncRNA_as_chr12_10887|ncRNA_as_chr12_10885|ncRNA_intra_chr12_10888|ncRNA_as_chr12_10852|ncRNA_intra_chr12_10886|ncRNA_inter_chr12_10988|ncRNA_inter_chr13_11071|ncRNA_inter_chr13_11128|ncRNA_inter_chr13_11381|ncRNA_inter_chr13_11386|ncRNA_inter_chr13_11425|ncRNA_as_chr13_11491|ncRNA_inter_chr13_11490|ncRNA_inter_chr13_11603|ncRNA_as_chr13_11785|ncRNA_as_chr13_11784|ncRNA_inter_chr14_12018|ncRNA_inter_chr2_1072|ncRNA_as_chr15_12330|ncRNA_inter_chr15_12644|ncRNA_inter_chr15_12695|ncRNA_inter_chr15_12830|ncRNA_as_chr15_12837|ncRNA_inter_chr15_12832|ncRNA_inter_chr15_12831|ncRNA_inter_chr15_12833|ncRNA_inter_chr15_12849|ncRNA_as_chr16_13514|ncRNA_inter_chr17_13787|ncRNA_inter_chr17_13875|ncRNA_inter_chr17_14006|ncRNA_inter_chr17_14023|ncRNA_inter_chr17_14125|ncRNA_as_chr19_14728|ncRNA_intra_chr19_15028|ncRNA_as_chr19_15027|ncRNA_inter_chr19_15058|ncRNA_inter_chr19_15187|ncRNA_as_chr19_15188|ncRNA_as_chrX_15318|ncRNA_inter_chr2_1427|ncRNA_inter_chr2_1419|ncRNA_inter_chr2_1421|ncRNA_inter_chr2_1420|ncRNA_inter_chr2_1424|ncRNA_inter_chr2_1422|ncRNA_inter_chr2_1432|ncRNA_inter_chr2_1431|ncRNA_as_chr1_173|ncRNA_inter_chr1_171|ncRNA_inter_chr1_169|ncRNA_inter_chr2_1851|ncRNA_intra_chr2_1852|ncRNA_inter_chr2_1877|ncRNA_inter_chr2_1883|ncRNA_inter_chr2_1887|ncRNA_inter_chr2_1888|ncRNA_inter_chr2_1926|ncRNA_inter_chr2_2009|ncRNA_inter_chr2_2078|ncRNA_inter_chr2_2079|ncRNA_inter_chr2_2084|ncRNA_inter_chr2_2085|ncRNA_inter_chr3_2207|ncRNA_as_chr3_2409|ncRNA_as_chr3_2418|ncRNA_inter_chr3_2481|ncRNA_inter_chr3_2549|ncRNA_inter_chr3_2579|ncRNA_inter_chr1_256|ncRNA_inter_chr1_257|ncRNA_inter_chr3_2705|ncRNA_inter_chr3_2887|ncRNA_inter_chr4_3056|ncRNA_inter_chr4_3079|ncRNA_as_chr4_3210|ncRNA_as_chr4_3275|ncRNA_as_chr4_3313"),x = rownames(G172.umis), value = FALSE)

TCPO10 <-  grep(c("ncRNA_as_chr1_312|ncRNA_inter_chr4_3534|ncRNA_inter_chr4_3584|ncRNA_inter_chr4_3685|ncRNA_inter_chr4_3884|ncRNA_as_chr7_6302|ncRNA_intra_chr8_7322|ncRNA_intra_chr8_7321|ncRNA_intra_chr8_7319|ncRNA_intra_chr8_7317|ncRNA_intra_chr8_7314|ncRNA_intra_chr8_7313|ncRNA_intra_chr8_7323|ncRNA_inter_chr10_8461|ncRNA_as_chr1_782|ncRNA_inter_chr12_10621|ncRNA_inter_chr12_10715|ncRNA_as_chr16_13511|ncRNA_inter_chr18_14691|ncRNA_inter_chr19_15003|ncRNA_intra_chr19_15014|ncRNA_intra_chr19_15011|ncRNA_as_chr9_7767|ncRNA_as_chr19_14977|ncRNA_as_chr2_1965|ncRNA_as_chr1_483|ncRNA_inter_chr8_6961|ncRNA_inter_chr13_11436|ncRNA_inter_chr2_1098|ncRNA_inter_chr3_2269|ncRNA_inter_chr4_3183|ncRNA_as_chr5_4694|ncRNA_as_chr15_12817|ncRNA_as_chr19_15130|ncRNA_inter_chr2_1963|ncRNA_inter_chr4_3468|ncRNA_intra_chr7_5920|ncRNA_inter_chr3_2137|ncRNA_inter_chr14_12024|ncRNA_inter_chr1_401|ncRNA_inter_chr6_5138|ncRNA_inter_chr6_5824|ncRNA_inter_chr7_6511|ncRNA_inter_chr9_8249|ncRNA_inter_chr10_8767|ncRNA_as_chr10_8927|ncRNA_inter_chr19_14853|ncRNA_inter_chr4_3052|ncRNA_inter_chr5_4577|ncRNA_inter_chr5_4578|ncRNA_inter_chr8_6816|ncRNA_inter_chr9_7993|ncRNA_as_chr9_8050|ncRNA_inter_chr9_8147|ncRNA_as_chr11_10151|ncRNA_inter_chr11_10195|ncRNA_inter_chr12_10942|ncRNA_inter_chr1_987|ncRNA_inter_chr14_12026|ncRNA_inter_chr18_14293|ncRNA_inter_chr18_14604|ncRNA_inter_chr19_14979|ncRNA_inter_chrX_15549|ncRNA_inter_chr1_73|ncRNA_inter_chr1_51|ncRNA_inter_chr1_53|ncRNA_inter_chr1_47|ncRNA_inter_chr1_56|ncRNA_as_chr4_3235|ncRNA_inter_chr11_9996|ncRNA_inter_chr2_1498|ncRNA_inter_chr2_1491|ncRNA_inter_chr4_3178|ncRNA_inter_chr5_4137|ncRNA_inter_chr5_4192|ncRNA_inter_chr5_4330|ncRNA_inter_chr5_4329|ncRNA_as_chr5_4662|ncRNA_inter_chr5_4674|ncRNA_inter_chr6_5118|ncRNA_inter_chr6_5131|ncRNA_as_chr6_5336|ncRNA_inter_chr6_5595|ncRNA_inter_chr6_5822|ncRNA_inter_chr7_6343|ncRNA_inter_chr7_6391|ncRNA_inter_chr7_6509|ncRNA_inter_chr7_6522|ncRNA_inter_chr7_6523|ncRNA_inter_chr7_6524|ncRNA_inter_chr7_6534|ncRNA_inter_chr8_6757|ncRNA_intra_chr1_604|ncRNA_intra_chr8_7327|ncRNA_inter_chr1_631|ncRNA_inter_chr8_7605|ncRNA_inter_chr8_7606|ncRNA_as_chr9_8401|ncRNA_inter_chr11_9433|ncRNA_inter_chr11_9655|ncRNA_inter_chr12_10459|ncRNA_as_chr12_10896|ncRNA_inter_chr12_10979|ncRNA_inter_chr13_11120|ncRNA_inter_chr13_11294|ncRNA_as_chr13_11596|ncRNA_inter_chr14_12214|ncRNA_inter_chr14_12250|ncRNA_inter_chr2_1099|ncRNA_inter_chr15_12609|ncRNA_inter_chr15_12752|ncRNA_inter_chr15_12774|ncRNA_inter_chr15_12793|ncRNA_inter_chr15_12792|ncRNA_inter_chr16_13508|ncRNA_inter_chr19_14851|ncRNA_inter_chr19_14855|ncRNA_inter_chr19_15030|ncRNA_as_chr19_15054|ncRNA_inter_chr19_15190|ncRNA_as_chr2_1457|ncRNA_inter_chr3_2303|ncRNA_as_chr3_2567|ncRNA_inter_chr3_2722|ncRNA_inter_chr3_2937|ncRNA_inter_chr4_3307|ncRNA_inter_chr4_3425|ncRNA_inter_chr4_3779|ncRNA_inter_chr11_9631|ncRNA_inter_chr13_11122|ncRNA_inter_chr13_11383|ncRNA_inter_chr15_12855|ncRNA_as_chr18_14310|ncRNA_inter_chr3_2488|ncRNA_inter_chr3_2744|ncRNA_inter_chr4_3859|ncRNA_inter_chr5_4115|ncRNA_as_chr6_5601|ncRNA_inter_chr1_670|ncRNA_inter_chr10_9183|ncRNA_inter_chr11_10217|ncRNA_as_chr12_10618|ncRNA_inter_chr16_13137|ncRNA_inter_chr4_3285|ncRNA_inter_chr4_3286"),x = rownames(G172.umis), value = FALSE)

G172M1M3 is the correct batch DE geens

DE1.M1M3.0.1 <- grep(c("ncRNA_as_chr7_5999|ncRNA_as_chr15_12340|ncRNA_inter_chrX_15446|ncRNA_inter_chr2_1419|ncRNA_inter_chr19_14746|ncRNA_inter_chr17_13909|ncRNA_as_chr8_7528|ncRNA_inter_chr9_7809|ncRNA_as_chr4_3532|ncRNA_as_chr19_14823|ncRNA_as_chr19_14883|ncRNA_as_chr8_7521|ncRNA_inter_chr10_8697|ncRNA_intra_chr6_5593|ncRNA_as_chr6_5589|ncRNA_as_chr8_6954|ncRNA_inter_chr5_3974|ncRNA_as_chr5_4325|ncRNA_inter_chr8_6743|ncRNA_as_chr12_10281|ncRNA_as_chr7_6166|ncRNA_as_chr12_10697|ncRNA_inter_chr11_9512|ncRNA_as_chr14_12038|ncRNA_inter_chr5_4221|ncRNA_as_chr5_4744|ncRNA_inter_chr3_2988|ncRNA_inter_chr17_13928|ncRNA_inter_chr13_11227|ncRNA_inter_chr15_12608|ncRNA_inter_chr12_10270|ncRNA_inter_chr4_3436|ncRNA_inter_chr2_1302|ncRNA_inter_chr2_1077|ncRNA_inter_chr1_292|ncRNA_as_chr1_373|ncRNA_as_chr19_14977|ncRNA_as_chr1_625|ncRNA_intra_chr14_11961|ncRNA_as_chr5_4334|ncRNA_as_chr10_8962|ncRNA_inter_chr2_1491|ncRNA_inter_chr13_11070|ncRNA_as_chr6_5634|ncRNA_as_chrX_15320|ncRNA_inter_chr10_9000|ncRNA_inter_chr5_4220|ncRNA_inter_chr11_9434|ncRNA_inter_chr10_9264|ncRNA_as_chr15_12817|ncRNA_as_chr9_7702|ncRNA_as_chr11_10075|ncRNA_as_chr19_14882|ncRNA_inter_chr3_2156|ncRNA_as_chr7_6192|ncRNA_inter_chr8_7363|ncRNA_inter_chr10_9193|ncRNA_inter_chr13_11752|ncRNA_inter_chr5_4395|ncRNA_as_chr17_13708|ncRNA_as_chr3_2894|ncRNA_inter_chr15_12365|ncRNA_inter_chr14_11945|ncRNA_inter_chrX_15437|ncRNA_inter_chr10_9086|ncRNA_as_chr18_14602|ncRNA_as_chr1_252|ncRNA_inter_chr8_6726|ncRNA_inter_chr13_11602|ncRNA_as_chr4_3083|ncRNA_inter_chr5_4593|ncRNA_as_chr17_14018|ncRNA_inter_chr12_10880|ncRNA_inter_chr3_2768|ncRNA_as_chr12_10521|ncRNA_inter_chr4_3294|ncRNA_as_chr8_7632|ncRNA_as_chr2_1440|ncRNA_inter_chr8_6759|ncRNA_as_chr2_1543|ncRNA_inter_chr11_9922|ncRNA_as_chr2_1884|ncRNA_as_chr17_13683|ncRNA_as_chr1_978|ncRNA_as_chr7_6467|ncRNA_inter_chr17_14026|ncRNA_inter_chr17_13801|ncRNA_inter_chr12_10476|ncRNA_as_chr4_3235|ncRNA_as_chr7_5921|ncRNA_inter_chr1_291|ncRNA_inter_chr1_161|ncRNA_inter_chr5_4052|ncRNA_inter_chr11_9911|ncRNA_inter_chr7_6501|ncRNA_as_chr4_3559|ncRNA_as_chr6_5860|ncRNA_inter_chr7_6318|ncRNA_as_chr1_222|ncRNA_inter_chr10_8469|ncRNA_as_chr5_4281|ncRNA_as_chr11_9956|ncRNA_as_chr2_1695|ncRNA_as_chr4_3224|ncRNA_as_chr15_12467|ncRNA_as_chr17_13834|ncRNA_as_chr10_9389|ncRNA_inter_chr7_6369|ncRNA_inter_chr11_9926|ncRNA_as_chr15_12818|ncRNA_inter_chr1_496|ncRNA_inter_chr6_5862|ncRNA_as_chr1_400|ncRNA_inter_chr10_9208|ncRNA_inter_chr17_14128|ncRNA_as_chr8_7331|ncRNA_as_chr6_5722|ncRNA_as_chr19_14782|ncRNA_inter_chr3_2697|ncRNA_inter_chr12_10462|ncRNA_inter_chr11_9923|ncRNA_inter_chr1_846|ncRNA_inter_chr2_2079|ncRNA_inter_chr9_7999|ncRNA_inter_chr4_3458|ncRNA_inter_chr8_7654|ncRNA_as_chr18_14332|ncRNA_inter_chr3_2166|ncRNA_as_chr2_1439|ncRNA_as_chr19_14976|ncRNA_as_chr12_10245|ncRNA_inter_chr5_3975|ncRNA_inter_chr1_840|ncRNA_as_chr4_3298|ncRNA_inter_chr6_4980|ncRNA_as_chr1_762|ncRNA_inter_chr7_6189|ncRNA_inter_chr19_14990|ncRNA_as_chr13_11068|ncRNA_inter_chr19_14880|ncRNA_inter_chr11_9948|ncRNA_inter_chr6_4887|ncRNA_as_chr13_11149|ncRNA_as_chr2_1550|ncRNA_inter_chr4_3698|ncRNA_inter_chr10_9366|ncRNA_inter_chr5_4065|ncRNA_as_chr8_7375|ncRNA_as_chr19_14713|ncRNA_inter_chr9_8121|ncRNA_as_chr2_1677|ncRNA_inter_chr5_4022|ncRNA_as_chr2_1966|ncRNA_as_chr19_15130|ncRNA_inter_chr12_10561|ncRNA_inter_chr4_3819|ncRNA_as_chr19_14732|ncRNA_inter_chrX_15297|ncRNA_inter_chr11_9635|ncRNA_as_chr1_221|ncRNA_inter_chr8_6878|ncRNA_inter_chr9_7885|ncRNA_as_chr11_9990|ncRNA_inter_chr14_11845|ncRNA_as_chr11_10222|ncRNA_inter_chr3_2161|ncRNA_inter_chr2_1958|ncRNA_inter_chr11_10189|ncRNA_intra_chr8_6800|ncRNA_inter_chr1_852|ncRNA_as_chr7_6161|ncRNA_inter_chr1_109|ncRNA_inter_chr16_13456|ncRNA_inter_chr19_15132"),x = rownames(G172.umis), value = FALSE)
DE2.M1M3.0.1 <- grep(c("ncRNA_inter_chr19_14954|ncRNA_inter_chr3_2411|ncRNA_inter_chr16_13190|ncRNA_inter_chr15_12770|ncRNA_as_chr5_4384|ncRNA_inter_chr6_5510|ncRNA_as_chr11_10149|ncRNA_inter_chr9_8350|ncRNA_as_chr2_1145|ncRNA_inter_chr6_5542|ncRNA_inter_chrX_15278|ncRNA_inter_chr13_11385|ncRNA_as_chr2_1579|ncRNA_inter_chr6_4875|ncRNA_as_chr10_9365|ncRNA_inter_chr7_6338|ncRNA_as_chr8_7096|ncRNA_inter_chr14_12199|ncRNA_inter_chr1_812|ncRNA_inter_chr14_11979|ncRNA_as_chr17_14069|ncRNA_inter_chr1_120|ncRNA_as_chr17_13658|ncRNA_as_chr14_12055|ncRNA_as_chr16_13150|ncRNA_inter_chr6_5310|ncRNA_as_chr9_7957|ncRNA_inter_chr1_119|ncRNA_inter_chr2_2066|ncRNA_intra_chr7_5919|ncRNA_inter_chr14_12030|ncRNA_inter_chr17_13792|ncRNA_as_chr14_11948|ncRNA_as_chr2_1073|ncRNA_inter_chr8_6742|ncRNA_inter_chr6_5610|ncRNA_inter_chr7_6334|ncRNA_inter_chr10_9320|ncRNA_as_chr7_6032|ncRNA_as_chr6_5508|ncRNA_as_chr5_3947|ncRNA_inter_chr6_4981|ncRNA_inter_chrX_15240|ncRNA_as_chr13_11779|ncRNA_inter_chr4_3793|ncRNA_inter_chr10_8794|ncRNA_as_chr11_10121|ncRNA_as_chr9_8224|ncRNA_as_chr4_3820|ncRNA_as_chr19_15151|ncRNA_inter_chr15_12845|ncRNA_as_chr11_9812|ncRNA_inter_chr6_5318|ncRNA_as_chr2_2108|ncRNA_inter_chr13_11254|ncRNA_inter_chr12_10973|ncRNA_inter_chr19_14964|ncRNA_inter_chr6_5582|ncRNA_as_chr10_9147|ncRNA_as_chr15_12321|ncRNA_inter_chr2_1171|ncRNA_inter_chr3_2165|ncRNA_as_chr11_9897|ncRNA_intra_chr8_6802|ncRNA_inter_chr8_6788|ncRNA_inter_chr16_13443|ncRNA_as_chr1_253|ncRNA_as_chr8_7410|ncRNA_inter_chr12_10269|ncRNA_as_chr7_6655|ncRNA_inter_chr8_7682|ncRNA_as_chr9_8316|ncRNA_inter_chr8_7267|ncRNA_inter_chr2_1237|ncRNA_inter_chr17_13939|ncRNA_inter_chr11_10233|ncRNA_as_chr11_9838|ncRNA_inter_chr9_8000|ncRNA_inter_chr13_11669|ncRNA_inter_chr19_14774|ncRNA_inter_chr10_9210|ncRNA_inter_chr4_3549|ncRNA_as_chr8_6874|ncRNA_inter_chr16_13171|ncRNA_as_chr3_2545|ncRNA_inter_chr6_5045|ncRNA_inter_chr10_9263|ncRNA_as_chr10_9048|ncRNA_inter_chr7_6182|ncRNA_inter_chrX_15244|ncRNA_inter_chr12_10649|ncRNA_as_chr18_14628|ncRNA_as_chr9_8428|ncRNA_as_chr19_15065|ncRNA_inter_chr3_2572|ncRNA_as_chr5_4771|ncRNA_inter_chr17_14187|ncRNA_inter_chr12_10411|ncRNA_inter_chr19_14874|ncRNA_as_chr11_9767|ncRNA_as_chr16_13458|ncRNA_as_chr8_7367|ncRNA_as_chr7_6293|ncRNA_inter_chr15_12653|ncRNA_inter_chr11_10211|ncRNA_as_chr17_13856|ncRNA_as_chr2_1542|ncRNA_inter_chr4_3285|ncRNA_inter_chr3_2653|ncRNA_inter_chr7_6397|ncRNA_inter_chr9_8104|ncRNA_as_chr3_2554|ncRNA_as_chr14_12134|ncRNA_inter_chr17_13551|ncRNA_as_chr8_7523|ncRNA_as_chr15_12460|ncRNA_as_chr2_1101|ncRNA_inter_chr8_7519|ncRNA_as_chr17_13822|ncRNA_inter_chr4_3738|ncRNA_inter_chr2_1830|ncRNA_inter_chr5_4321|ncRNA_as_chr2_1694|ncRNA_inter_chr17_14182|ncRNA_inter_chr16_13352|ncRNA_inter_chr3_2742|ncRNA_inter_chr11_9503|ncRNA_inter_chr6_5724|ncRNA_inter_chr1_591|ncRNA_inter_chr13_11523|ncRNA_as_chr4_3316|ncRNA_inter_chr3_2345|ncRNA_as_chr3_2544"),x=rownames(G172.umis),value=FALSE)

DE3.M1M3.0.1 <- grep(c("Gm26992|Dpyd|Cwc22|Cps1|Upp2|Rad51b|Carmil1|Clpx|Fgb|Ang4|Fga|Gclc|Fgg|Fam13a|Sult3a2|Abcc2|Gsta3|Kcnt2|Irf9|Meiob|Crot|Gadd45g|G6pc|Pzp|Fgl1|Chrm3|Cfh|Etfbkmt|Sntg2|Cxcl1|Setbp1|Tymp|Crtc3|Pah|Kmo|Vmp1|Nfia|Rbpms|Slc7a2|Fam210b|Slc16a10|Plin2|St3gal1|Sox5|Ly6e|Sc5d|Wwox|mt-Atp6|Tacc2|Chka|Qsox1|Ass1|Pxmp4|Fam214a|Rnf125|Adgrv1|Por|Mbl1|Zbtb16|Adk|mt-Co3|Rrbp1|Map2k6|Pdgfc|Nfib|2810459M11Rik|Slc25a30|Adck5|Mindy3|Ypel2|Nrp1|Cryl1|mt-Cytb|Fbf1|Gas2|Otc|C4bp|Ldha|Aldh1l1|Garem1|Adh4|mt-Nd4|Inmt|mt-Nd2|Gm42906|Slc38a3|St3gal4|Ugt2b36|Ptprd|Insig1|Immp2l|Gon4l|Desi2|Tacc1|Cyb5a|Vwa8|Klf12|Gnmt|Prpf6|Cfi|Pter|Rapgef4|Fggy|Pitpnc1|Lbp|Peak1|Cdo1|Diaph2|Adh1|Mapk15|Slc30a10|Sdr9c7|Mamdc2|Rims2|Litaf|Ahcyl2|Abcb11|mt-Co2|Cyp2c29|Crp|Slc25a47|Slc37a1|Pnrc1|Hgd|Rai14|Prg4|Gcnt7|mt-Nd3|Rint1|Fgfr2|Gclm|Glt1d1|Ddc|Vtn|Eva1a|Trpm3|Zc3h13|Trp53inp2|Saa4|Nr3c2|Zbtb20|Cebpb|Cpb2|Nr2c2ap|Gsap|Itih3|Suds3|Insig2|Igf1|Hhex|Lnx2|Vcl|Sugct|Tmem243|mt-Nd4l|Chn2|Clock|Mthfr|Agmo|Pid1|Fech|Gm4952|Skap2|Pdia5|Onecut1|Acat1|Kynu|Tmed5|mt-Nd1|Nos1ap|Acaca|Serpina10|Ptpn3|Tifa|Bhmt|Clec2d|Nfil3|Abca6|Ern1|Ces1d|Pcyt2|Ftcd|Slc38a4|Gspt1|Inhba|Larp4b|Simc1|Traf5|Gtpbp4|Pik3c2g|Phf20l1|Timm23|Rpl36|Senp5|Slc17a5|Fabp1|Gldc|Ripor2|Lrpprc|Tab2|Zpr1|Etfdh|Thrsp|Ube2f|Peg3|Slc24a5|Gm3839|Tgoln1|Agmat|Kpna4|Fetub|St5|Ypel3|Sf3b3|Rlf|Pkhd1|Slco1b2|Gfod2|Ccdc12|Sco2|Exoc5|Ftl1|Zfp697|Eif2s2|Fnbp1|Nek6|Txndc15|Mastl|Bach2|Wsb1|Map3k14|Abcg8|Zbtb7a|Pawr|Tmem219|Gpd1|Erlin1|Gpc4|Fpgs|Eif1|Ap3s1|Akr1a1|Macrod2|mt-Co1|Slc25a17|Pck1|A230050P20Rik|Zkscan1|Cxadr|Gm6614|Ldah|Ppp1r3c|B3gat3|Fermt2|Cpn1|Mpp6|D430042O09Rik|Rtf2|Itih4|Dbi|Prodh|Lgals9|Bend7|Tstd1|Gm4756|Ptges3|Hsd17b13|March2|Fam210a|Cox8a|Lars2|Hsd17b12|Sqstm1|Col6a6|Etnppl|Ppfia1|Tbc1d5|Zc3h15|Calr|Sigmar1|Ptpn1|Iscu|Tmem248|Tecpr2|Ccdc152|Gabarapl1|Mrpl38|Cep44|2810474O19Rik|Ftl1-ps1|Grina|Lars|Serinc1|Slc25a51|Slc25a25|Ppp4r3b|Nsd3|Lcp1|Socs3|Usp6nl|Fth1|Pms1|Hmgcl|Sik1|Clpp|Ech1|Kifc3|Ssu72|Clic4|Dnajc3|Ei24|Stx16|Prr16|Rffl|Rasal2|Cfap54|Scai|Serping1|Mettl26|C4b|Hspa4|4931406P16Rik|Dnmt3b|Chd4|Cul1|Mal2|Ncoa5|Scarb1|Dcaf5|Akap8|Plk3|Tor1aip1|Strbp|Inhbc|Zfp410|Nedd4l|Gm11639|Catspere2|Sh3bgrl2|1600014C10Rik|Slc11a2|Creb3l3|Atp7a|Taok3|Ank3|Dab1|Zfp141|Nr1i3|Dmgdh|Ube2k|Clcn3|Amn1|Retreg3|Etnk1|Ergic1|Gata4|Coq3|Coq10b|Eif3c|Rpl24|Pfn1|Hint1|Trmt112|Brip1|Ifi35|Serpina3n|Helz2|Saysd1|Msmo1|Atox1|Cox19|Lactb|Errfi1|Rabac1|Fam168b|Ndufa4l2|Cyp2e1|Slc15a4|Sntb1|Glyat|Ranbp2|Tpd52l2|Hectd2|Dcaf7|Cox7a2l|Pcsk7|Kdm5b|Rb1|Mcl1|mt-Nd5|Fam135a|Pnkd|Zfhx2|Dcaf6|Nasp|Timm9|Kras|Cox6a1|Ifrd1|Adap2|Plpp3|Znrf2|Rexo1|Eif4e|Nckap5|Pxmp2|Ddx39b|Gm28305|Krt18|5830473C10Rik|Lrig1|Amacr|Agpat2|Sod2|Dhrs4|Slc25a20|Cggbp1|Irak4|Tm9sf2|Traf6|Ghr|Myo10|Serp1|Wdr37|Atp1a1|Nr2f2|Nck1|Prpf4b|Frrs1|Gnas|Tmem30a|Ndrg3|Ttc7b|Abhd17c|Cpeb4|Drg1|Fam160a2|Zmynd8|Smim13|Gfer|Otulin|Ahsa2|Mphosph8|Nedd8|Rc3h1|Slc25a42|Creld1|Lap3|Gsdme|Fn1|Exd1|Nrbp2|Serf2|Abhd14b|Nars2|Cdh1|Caprin1|Fam162a|Akirin1|Selenot|Eps15|Dtnbp1|Syvn1|Luzp1|Cebpz|Slk|Plbd2|Lsm6|Prr3|Eif4e2|Ghitm|Ggps1|Rpl6|Pigl|Neb|Cabyr|Park7|Cab39|Creg1|Chd7|Elavl1|Hspa5|Fem1c|Plcxd2|Gtf2ird1|Nup98|Hdlbp|C1ra|AI182371|Psmd11|Srsf7|Cyp2c23|Cyp3a25|Pycrl|Nostrin|Bag3|Apobec1|Nipa2|Uqcr10|Alkbh5|Susd1|Tra2b|C8g|Sdf2|Baiap2l1|Gbe1|Plcb1|Tmed2|Ttr|2210408I21Rik|Dnm1l|Sema6b|Aldh1a1|Eif4h|Cyp2j6|Gm37240|Palld|Sat1|Wdr20|Pnpla7|Parva|Urgcp|Top1|Rab6a|Cyp2j5|B4galt5|Cp|Zfp970|Elob|Dnaja2|Ndufa4|B4galt1|Gltpd2|Lasp1|Nufip2|Sftpd|Pak2|Nup54|Mtfr1|Slc10a2|Cep170|Midn|Rnf11|Micall1|1110008F13Rik|Sardh|Peli1|Tmem238|Slc30a9|Polr2a|Sgpl1|Ormdl3|Slc3a1|Aldh1a7|Etv6|Hmgcs2|Xbp1|Nectin2|Rap1b|Gatad2a|Acsl1|Golga5|Bcl7c|Fam111a|Mapkapk2|Polr2j|Rtp3|Inpp5a|Frmd4b|Slc35b3|Naca|Mical2|Ppm1g|Mapre3|Id2|Srpr|Cpox|Eif4b|Ntan1|Zfp810"),x = rownames(G172.umis), value = FALSE)
DE4.M1M3.0.1 <- grep(c("Cox7a2|Astn2|Smad1|Serbp1|Rusc2|Foxa3|Ergic2|Spin1|Cela1|Nudt12|Rnf24|Tex2|Chdh|Zkscan3|Shtn1|Slc35d1|Cd81|Zfp799|Vcp|Akap1|Chuk|Gphn|Lonp2|Etf1|Bace1|Slc39a11|Dnah8|Ccni|Alkbh1|Mpp7|Hook2|Yy1|Mob4|Scyl2|Gatad1|Rere|Parg|Zfp182|Il6st|Ap1ar|Acadvl|Atxn7l3b|Stra6l|Btbd1|Yaf2|Brd1|Vtcn1|Rpl11|Orc3|Ndufv3|Hibadh|Hsd17b6|Tmx4|Ppp2ca|Ubr4|Nek1|Morn1|Herc4|Ubqln1|Gm10563|Dzip3|Tank|Farp1|Tcta|Papola|Bmpr1a|Hspa9|Eif5|Tmem131|Bri3|Pank1|Mttp|1110059E24Rik|Lrfn3|Hivep1|Neu1|Tbce|Efna5|Zcchc6|Actr2|Slc16a1|Sec22b|Ythdf2|Tef|Ybx1|Preb|Zbtb1|Pld1|F9|Rarb|Gmeb2|Rad23b|Rcc2|Gorasp2|Lztfl1|Fam104a|Sfpq|Smagp|Ppil4|Edf1|Nhsl1|Ints7|Zfp385b|Psmb1|Timm13|Zfp652|Itsn1|Mrps34|Eef1b2|Lurap1l|Gsr|Ddx50|Wdr53|Sec62|Gls2|Cnot7|Xdh|Cluh|Atxn7l1|Tmem150a|Net1|Senp6|Fam120c|Gpx4|Ethe1|Commd7|Fbxo42|Mrps26|Cltb|Car1|Kansl3|Pspc1|Tbc1d22a|Pex13|Rnf2|B230219D22Rik|Dclre1c|1700019A02Rik|Acox1|Wipi2|Vezt|Dpf3|Gm14403|Gmcl1|Hsdl2|Eif2b1|Emc3|Rap1gap|Trit1|Pqlc1|Nf1|Scaf8|Shq1|Larp4|Ngrn|Ubb|Vcpip1|Ube2j2|Lrrc29|Snx3|Mast2|Atp5l|Rnf19a|Atp6v1f|Eif4g2|Klhl2|Slc7a7|Cox7b|Adam11|Mbd1|Yap1|Grb10|Mprip|Lin7c|Zbtb11|Pura|Gabarap|Srsf6|8430408G22Rik|Oma1|Akr1c19|H2-T23|Szrd1|Mrnip|Nsd2|Usmg5|Rab18|Sppl2a|Mcm7|Kif5b|Thrap3|Tpst2|Blvrb|Samm50|Fbxw8|Lztr1|Ppp2r2d|Actl6a|Btaf1|Bcl3|Atr|Clu|Erp29|Pla2g12b|Csnk2a2|Sdf4|Sdr42e1|Ttpal|Rsbn1l|Gm379|Ubxn1|Ythdc1|H2afj|Prickle1|Abat|Nolc1|Nrip1|Acbd5|Spata24|Ror1|Mtmr14|Igfals|Map7d1|Plin5|Casp8ap2|Rnf10|Gin1|Tc2n|Nr2f6|Acp5|Tgfa|Zfp24|Gpx1|Dnaja3|Ifitm3|Usp16|1700037C18Rik|Idnk|Rp9|Cdip1|Rpn1|Cdv3|Eif4a1|Hmgcs1|Smad9|9030624G23Rik|Sult5a1|Oplah|Zfp106|Mtmr1|Colgalt2|Rai1|Dgkh|Ppp4r3a|Ddx23|Dstn|Stat5a|Hmces|Ythdf1|Adipor2|Dynll2|Gcgr|Dhx9|Asb13|Eci1|Zhx2|Eif2b4|Upb1|Psmb2|Foxn2|Zbtb5|Rad54l2|Cpeb2|Tmed4|Kdm3a|Metap1|Ankrd46|Akirin2|Lpin2|Mrps7|Nprl3|Ctsz|Soat2|Atp6v1h|Caml|Upf3b|Map4k5|Pdia3|4932438A13Rik|Dnajb11|Smurf2|Mtif2|Cyp4f17|Paip2|Slc29a1|Tars|Vamp8|G3bp1|Adh7|Vwce|Anapc16|Scyl3|Cops6|Snw1|Ranbp9|Gosr2|Chordc1|Mrps10|Ncam2|E130311K13Rik|Wars|Eif5b|Znhit1|Actb|Gmfb|Crk|Hipk2|Crem|Nr0b2|Dusp1|Sdf2l1|Hadha|Atp11a|Ddb1|Prkd3|Htatip2|Ints8|Qprt|Rcbtb2|Greb1l|Klf9|Pithd1|Ugt3a1|Mrpl52|Psmd4|Ppib|Clns1a|Rad54b|Smarcad1|Vps72|Ttc14|Hadhb|Xiap|Ceacam1|Sec63|Cox11|Clptm1|Atp11b|Cox7c|Shmt1|Abl2|Ngef|Mgat2|Dus1l|Serpina3c|Tmem259|Rhoa|Rpl27a|Ndufb11|Brwd3|Fam53a|Hmgcr|2310030G06Rik|Med13|Fer|Reln|Uxt|Junb|Pdia6|P4ha1|Ndufa10|Atp5c1|C1s2|Wdr4|Cebpg|Stk38|Nadsyn1|Dexi|Fam120b|Trabd|Elmo1|Ahnak|Tmem234|Stk24|Ywhaz|Ppdpf|Tpp2|Mreg|Nphp3|Gpkow|Prelid2|Slc30a5|Derl1|Stx5a|Stat3|Pyurf|Ppp2r1b|Tbca|Phrf1|Utp14a|Tmed7|Patl1|Pebp1|Vps50|Caap1|Ssb|Pdrg1|Gnl3|Farsa|Ufc1|Tubb4b|Foxj3|Ubn1|Cox6c|Rspry1|Kpna1|Btd|Dcaf10|Mbd2|4921524J17Rik|Uqcrq|Pdia4|Paip1|Shpk|Stard13|Plxnb2|Utp4|Bsg|Rnf114|Cfap97|Tmem127|Rnps1|Map3k10|Riok1|Nop58|Gcsh|Rac1|Fyco1|Trappc4|Bclaf1|Ptpn2|Ddx19b|Hs2st1|Zswim8|Gm5617|Dcun1d3|Actn1|4930556J24Rik|Lrp5|Cdk7|Asgr1|Supt5|Pabpc1|Hnrnpl|Lamp1|Emc6|R3hdm1|Bckdhb|Srsf11|Wdr7|Tmem143|Aatf|Pef1|Acad8|Togaram1|Sympk|Mrpl34|Myl12b|C1galt1|Sync|Zfp867|Snap47|Tma7|Mtss1|Rest|Tssc4|Creb1|Fkbp8|Large1|Imp3|Sdsl|Eif3e|Rpl10a|Atp6v0b|Prpsap1|Cdc42bpb|Actr1a|Mocos|Rab2b|Lcat|Scaf1|Sf3b6|Atp13a1|Ndufb5|Mat2a|Igf2bp3|Prdx5|Hnmt|Cox5a|Ighmbp2|Setdb2|Tmem19|Actn4|Oxld1|Maged1|Sox6|Gjb2|Lsm12|Sec61a1|Baz1b|Psmd5|1190002N15Rik|Ndufb9|Abhd3|Dicer1|Cldn1|Mindy1|Acads|Dhps|Rpl19|Baz1a|Iws1|Supt16|Ccnt1|Atg2a|Fbxw7|Rps3a1|Wdr45b|Dock4|Fam25c|Sdhaf3|Swt1|Serpinc1|Gan|Sbds|Med6|Pcf11|Coq10a|Smap1|Gpr68|Psmd8|Txn2|Ascc2|Fbxo31|Vti1b|Taf11|Fam222b|Ldlr|Tle3|Acbd3|Lrrc58|Sirt1|Lmna|Slc40a1|Gpt|Ccdc122|Tex30|Cpsf1|Mrpl2|Esco1|Cdc42bpa|Xylt2|Nlrp6|Mrpl57|Zfp451|Gpatch2l|4833439L19Rik|Prpf31|Csnk1d|Rabep2|Gria3|Ndufs3|Rapgef5|Shoc2|Rnf217|Chd9|Rbm4b|Ap2a2|Myl6|Cars|Mlxip|Tbc1d20|Calm1|Col4a1|Fam207a"),x = rownames(G172.umis), value = FALSE)
DE5.M1M3.0.1 <- grep(c("Ppard|Rps11|Wdtc1|Vapa|Neto2|Kpna3|Mrpl4|Nf2|Mtor|Tm9sf4|Wdr82|Cox18|Nrn1|Zfp444|Mrpl17|Tm2d1|Angptl4|Prok1|Mcph1|Ulk1|Tmem214|Taf1a|Sem1|Fndc3b|Fchsd2|Olfm2|Gm11808|Tpk1|Itgb1|Mkl1|Fbxo38|Rras2|Rnf111|Tsc22d2|Tmem216|Nr2c2|Aaed1|Apoo|Hyal2|Atp2c1|Pi4kb|Slc39a14|Tst|Kdelr1|Ube2a|Clpb|Coa3|Ap5z1|Ddx21|Nop10|Gimap9|Wsb2|Abce1|Ccar1|Inca1|Tgm2|Ogfod1|Mthfs|Bdp1|Il13ra1|Rdh14|Cgrrf1|Hnrnpk|Zfp830|Stk16|Vav2|Gm7298|Trp53inp1|Dpagt1|Rnf138|Tcim|Sec11a|Cdkn1a|Ttc19|Faf2|Atg14|H3f3a|Thumpd3|Mrpl49|Cdc123|Srp9|Lrp1|Rabgef1|Spcs2|Lamp2|Sfr1|Rexo2|Anxa7|Pcgf6|Bbox1|Ube2d2a|Nploc4|Lonrf3|Car14|Psmc3|Malsu1|Ints6|Dnajc15|Sec13|Lpar6|Fbxw4|Zfp512|Heatr1|Cdkn2aip|Ganab|Bbs9|Pla2g6|Tmem141|Manba|D17Wsu92e|Cpt2|Cldn2|Thoc7|Clta|Gtf2a2|Grpel1|Atg2b|Hdac4|Sec24c|Snrnp48|Zfp773|Atp5a1|Zfp664|Cmah|Mogs|Ddost|Cyp26a1|Trrap|Ext2|Eif3a|Canx|Rplp1|Mul1|Nudt5|Nono|Slc35a2|Irak2|Fkrp|Lrig2|Brms1l|Dek|Heatr6|Morf4l1|Ddhd1|Zcchc11|Tbl1x|Mcfd2|Map3k20|Ston1|Rabggtb|Steap4|Minos1|U2af1|Bmt2|Crip2|March5|Gtf2e1|Snrnp70|Edem3|Ehd1|Cyc1|Rbm10|Ing4|Xpo4|Sidt2|Ankrd11|Rbm18|Vps37a|Mob1b|Abcf2|Ginm1|Tmem68|Zcchc14|C1rl|Dad1|Usp15|Ube2v2|Ssfa2|Kxd1|Ttll4|Zdhhc18|Tmem163|Hfe|Pex6|Ezh2|Mapk1|Scarb2|Ostc|Brix1|Atp6ap2|Ddhd2|Apcs|Spint2|Tbck|Tsen34|Rpl5|Nat10|Mtfr1l|Ninj1|Ano6|Fnta|Fgfr4|Pgk1|Ccdc28a|Nipsnap2|Nudt13|Xpr1|Cct2|Ppm1d|Atp9a|Upf2|Gak|Kdm3b|Hnrnph1|Trim44|Agrn|Csde1|Olfr16|Ppip5k2|Dennd5b|Pdcd6ip|Pop1|Tmem176b|Efl1|Eif5a|Otud5|Hist1h2bc|Zfp740|Dip2b|Alkbh8|Pfdn5|Npc1|Gabarapl2|Ppil2|Tmem167|Calm2|Ppp1r13b|Abca8b|Mcm9|Proc|Mrps21|Cnot8|Pxylp1|Alkbh2|Ptpn11|Zfand5|Tom1l2|Tspo|Sra1|Rps10|Fam76b|Zfp131|Cldn3|Mrps12|Ifnar2|Ces1f|Nfe2l1|Ube2n|0610030E20Rik|Cog4|Mbnl2|Camk2n1|Gatad2b|H2afy|Vezf1|Pkd2|Ddx1|Trappc8|Wwc2|Rnf38|Aak1|Atp5o|Hmgxb3|Slx4ip|Capza1|mt-Atp8|Mon1a|Smim10l1|Gspt2|Baz2a|Snx16|Cenpa|Riok3|Rrn3|Cwc15|Susd6|Oxsm|Apol9b|Ccm2|Cpq|Ccdc47|Fdft1|Cope|Mtf2|Zc3h7a|Psmg2|Tedc2|Tert|Rab11a|Rps13|Slc25a15|Fbxw2|Ercc4|Atg12|Ciapin1|Efr3a|Atp6v0a2|1110032A03Rik|Osbp|Fam118b|Pphln1|Pex19|Erbin|Aff4|Mlec|mt-Nd6|Trpc4ap|Zfp568|Zcchc24|Fnbp1l|Fam213b|Aimp1|Vac14|Ncor1|Akr7a5|Atp8b1|Cldn14|Ccdc25|Phf3|Supt6|Heatr5a|Jade2|Mif|Usf2|Tmem64|Chid1|Psmd7|Scaf11|Tex12|Mettl2|Gm11273|Rpl18|Alg11|Spsb2|Ube2z|Xrn2|Icam1|Ddx46|Sephs1|Slc35a3|Tram1|Bcl7b|Med27|Nrf1|Cntrl|Sec24b|Nol8|Xrcc4|Sumf1|Ier2|Hdac1|Heatr3|Mfn1|Ccdc91|Ssna1|Eif2s1|Nars|Cyp2j8|Cacybp|Txndc9|Spaca6|Zdhhc14|Samd8|Elmod3|Erh|Naa50|Oaz1|Me1|Glmp|Apon|Acbd4|Ngdn|Ufd1|Pou6f1|Prkar1a|Coq5|Eif3j1|Hspe1|Grpel2|Fxr2|Rab5a|Spata5|Senp2|Atp6v1a|Eri3|Xylb|Ago3|Agt|Tmem131l|Pllp|Atp6v1e1|Scand1|Tmem56|Gstm1|Psmd3|Tbc1d14|Faim|2210016F16Rik|Tmem208|Zfp768|Atat1|Timm8b|Crebzf|Pdhx|Copb2|Prkag2|Slc30a7|Timm17a|Dhx33|Nlk|Ppp1cb|Acbd6|Zfp326|Pqlc2|Leng9|Stk38l|Tmem126a|Hdgfl2|Slc25a26|Flii|Higd2a|C1s1|Lsg1|Puf60|Zfp655|Ribc1|Dhx40|Ppp1r37|Hoga1|Paqr9|Ccs|Nxf1|Ssr1|Fhit|Nme7|Pnpo|Kctd20|Eapp|Nras|Hectd1|Zfr2|Krt8|Ttf1|Ecd|Vps54|Mical3|Rprd1b|Zfx|Pcbp2|Ankrd13c|Cry1|Ikbkap|Agpat3|Mrpl40|Tat|Rtraf|Cdc26|Mrfap1|Ppp1r2|Vnn1|Ivns1abp|Alg14|Mrpl50|Gchfr|Cnpy3|Ost4|Ppp1r42|Dnase2a|Mrpl43|Vkorc1l1|Clp1|Slc25a5|Gm49356|Snip1|Slc35f5|Acad11|Polr3h|Tmem29|Zfc3h1|Usp9x|Utp11|Eny2|Irgm2|Ilf2|Zfp654|Anxa5|Klf3|Prpf19|Bcas3|Ccnl1|Pcbd1|Mospd3|Ctbs|Gm17660|Tprkb|Camta2|Mrpl20|Dusp11|Zfp513|Rlim|Mif4gd|Foxq1|Inppl1|Prr14|Cyb561|Chd8|Rab40c|Fbxw11|Dna2|Usp3|Klhdc8b|Cmas|Proser1|Nvl|Rrp1|Synj1|Sin3a|Hnrnpll|Taf2|Slc39a1|Cpsf2|Slain2|Ndufs7|Uqcrc1|Cmtm6|Hnrnpf|Mat2b|Ndst2|Gtf3c1|2410015M20Rik|Tomm22|AA986860|Immp1l|Top1mt|Qrich1|Ubc|Pex11a|Slc6a9|Mink1|Ppp2cb|Eif1a|Mrps24|BC005561|Slc26a1|Ctnnbl1|Rrp8|Pon1|Mrpl14|Tirap|Echdc2|Ppp4c|Hnf1b|Uso1|Carhsp1|Pcm1|Hspa13|Ehmt1|Dnajb4|Ep300|Apopt1|Atp2a2|Polk|Usp12|Pde3b|Tspan31|Qtrt1|Golim4|Tfr2|Zfp511|Hmbox1|Snx13|Atraid|Lyrm2|Paxx|Rps15"),x = rownames(G172.umis), value = FALSE)
DE6.M1M3.0.1 <- grep(c("Smim4|Prdm2|Pdss2|Vps53|Usp50|Acin1|Zbtb44|Atp6v1g1|Ppp2r5c|Ilvbl|Kdm5a|Wdr91|Oaz2|Pskh1|Sharpin|Fabp2|Suclg1|Slc39a10|Rab22a|3110043O21Rik|Med28|Lyrm1|Xpo7|Leprotl1|Zfr|Atf1|Tkfc|Tmed9|Mettl7a1|Dhx32|Tcp11l2|Tfeb|Acyp1|Ppp6c|Golga3|Pdpk1|Ptov1|Pfdn6|Snd1|Rps6ka1|Cul5|Rad17|Dnajb12|Emc7|Phf11c|Stt3b|Zfp592|Hist1h1c|Tdrd7|Ttyh2|Timp3|Rsf1|Lrrc28|Cdk17|Immt|Dnajc11|Cfap20|S100pbp|Btg1|Ube2e1|March7|Asap3|Fam98a|Naa30|Taf13|Tipin|Pip5k1c|Pole4|Gucd1|Smim14|Pcgf2|Maf|Mrps33|Ssr2|Fgfr1|Rnh1|Rpusd3|Igsf5|Pom121|Utp20|Lyn|E2f4|Cbfb|Lpgat1|Ncaph2|Spop|Srp19|Uox|3110021N24Rik|Atp5f1|Myo6|Farsb|Lsm1|Rpl18a|Ndufaf7|Selenof|Thoc1|Lypla2|Tgfbr1|Gm43064|Mtpn|Mrps18a|Aars|Cirbp|Tcp1|Phkg1|Slc25a39|Epn1|Ranbp17|Casd1|Mapk1ip1l|Pkdcc|Sept7|Cand1|a|Pitpnb|Lrpap1|Tmem186|Yme1l1|Gpbp1l1|Sirpa|Rpl23|Commd3|Vps28|Adam10|Gmps|Sec23ip|Naa25|Tgfbrap1|Pecr|Mars|Sgf29|Slc25a46|Rfc2|Hnrnpa1|Psma7|Foxp4|Parn|Txnrd1|4930453N24Rik|Prorsd1|Atg4a|Aqp8|Znrf1|Cdk11b|Man2a2|Sgta|Btbd10|Ddx31|Eci2|Spr|Iars2|Cast|Med15|Zranb2|Trim11|Psmc6|Rex1bd|Tspan12|Ankrd36|Pdcd7|Tut1|Rnf126|Dnajc5|Mroh1|Plcg1|Nucks1|Cct7|Bid|Cdadc1|Cmtm4|Exosc7|Abcf1|Tbc1d12|Zbtb43|Kif1c|Sbno1|Srpk1|Myh9|Plaa|Rack1|Akr1c6|Rps16|Phc3|Nmnat1|Ogdh|Uggt1|Epb41l4b|Herpud2|Mfap3|Cactin|Ddx42|Cxxc1|Mctp2|2310039H08Rik|Tm9sf3|Rps5|Trim25|Mrps11|Uckl1|Aes|Wdr12|Zfp367|Nudcd1|Pigyl|Hnrnpa0|Ep400|Flcn|Serpina1b|1110065P20Rik|Phf8|Eif2ak1|Cops3|Pdzrn3|Nr5a2|2010107E04Rik|Tmem175|Rin2|Usp39|Eef1d|Psmf1|1700123O20Rik|Trub2|Atp5h|Mad2l2|Lrrc3|Smdt1|Zbtb24|Wdr89|Ublcp1|Epas1|Fermt1|Tuba4a|Rps19|Abi2|Ccdc82|Zkscan7|Srrm2|Wdr61|Txlng|Ntmt1|Nfs1|Uros|Ccdc77|1700017B05Rik|Ubap2|Ppp1r15b|Cct4|Mt1|BC031181|Rnf216|Dhx29|Ablim3|Synrg|Zfp263|Nol9|Nup153|Habp2|Mphosph10|Rad50|Rpsa|C87436|Mocs1|Fastkd1|Dcaf17|Lmbrd1|Foxk2|Ccdc107|Epg5|Hars|Eif2ak2|Hras|Stt3a|Epo|Efna1|Uri1|Epc2|Stx8|Hyou1|Zfp646|1700109H08Rik|Dnajc21|Klhdc10|Ap4m1|Tmem70|Dync2h1|Mtmr10|Mob1a|Bop1|Alas1|P2rx4|Thap2|Mylip|H13|Ndufb10|Nelfa|Prelid1|Iqcb1|Snx33|Zhx3|Nup155|Mrap|Gm14325|Clk2|Eml3|Suz12|Necap1|Tmem123|Map3k7|Taz|Rab3gap1|Lrrc51|Fam32a|Wars2|Skp1a|Vipas39|Pcif1|Slc16a4|Ndufa8|Cyp2d26|Jmy|Tmem222|Tigd2|Eef2|Nabp1|Ssrp1|Thnsl2|Dhx16|Pgam5|Tsfm|Syn3|Pnkp|Uxs1|B3galnt2|Ggnbp2|Apmap|Cdc42se2|Lss|Mrpl27|Kctd2|Lsm4|Polr3f|Atp5d|Rps17|Trim33|Wdcp|Rpl32|Rab9|Slc18a1|Rab11b|Nceh1|Clec16a|Pepd|Trappc13|Rabgap1l|Psmc1|Nectin1|Ascc3|Aftph|Mtch2|Grip1|Map1lc3a|Fancl|Gemin5|Shprh|Adam19|Lage3|Nom1|Dnajc24|Pgs1|Prrg2|Ctdspl2|Grk5|Pik3c3|Slc35e2|Stap2|Uvssa|Mvb12a|Slc8b1|Rhbdd1|Ptpn9|Nck2|Csnk1g2|Laptm4a|Pus10|Ube2g2|Pir|Wrap53|Idh3g|Med8|Pomp|BC005624|Pcbp1|Polr2c|Klc4|F8|Ywhae|Brd7|Ppp4r1|Adnp2|Dennd5a|Prrc1|Ino80d|Zswim5|Rnf6|Otud4|Prr14l|Tmem106a|Cdc37|Col4a3bp|Tbc1d15|Sprtn|Phtf1|Bbs7|Mapk8ip3|Sugp1|Depdc7|Yipf5|Gpd2|Calu|Wbp2|Pnldc1|Slc35d2|Sf3b1|Tmem258|Gpr89|Pank2|Rps2|Ip6k1|Dolk|Ube2m|Smc5|Mrpl48|Cmc4|Maml1|Ccdc138|Rps4x|Pik3c2a|Emsy|Med4|Gng5|Rps26|Mettl27|Pak7|Psmb5|Poc1b|Cideb|Ppp1r10|Cbwd1|Ddx10|Lman2l|Dld|Mecr|Ptgr2|Cep57|Phb2|Cnpy2|Scamp2|Mdn1|Fip1l1|Parp4|Pxk|Bola3|Ube2j1|Stxbp3|Ahsg|Dgkq|Zcchc9|Mfsd11|Sh3gl1|Tpi1|Sdccag3|Tdrd3|Hnf4g|Polr1d|Hopx|Gid4|Tmod3|Rpl12|Safb|Btbd9|Vdac3|Akap9|Cep97|Rfwd3|March8|Smim12|Ccdc117|Abraxas1|Hbp1|Pdzd8|Fbxw9|Gapvd1|Thap3|Kat6a|Setdb1|Fam241a|Dph5|Slc25a13|Trim6|Abcf3|Tnfrsf1b|Desi1|Paxbp1|Slc38a10|Nedd1|AU040320|H2-Ke6|Tmem161b|Surf4|Cul2|Fat1|Adcy6|Trim41|Naa15|Rtf1|Dnajb6|Csnk2b|Cep350|Grn|Zfp282|Zfp346|Amdhd1|Acox3|Ubxn7|Tardbp|E130308A19Rik|Selenok|Eif3b|Vegfd|Pcnp|Irf3|Api5|Ndufv1|Zbtb41|Dcaf8|2510039O18Rik|Nfic|Dalrd3|Smarcc1|Ino80b|Abcc6|Sft2d3|Sipa1l3|Cpn2|Ola1|Gnb1|Galc|Ube2q2|Ncbp2|Vps26a|Prr13|Hace1|Zfp260|Coq7|Hagh|Mief1|Lias|Elf2"),x = rownames(G172.umis), value = FALSE)
DE1.M2M4.0.1 <- grep(c("Gm26992|Etfbkmt|ncRNA-inter-chrX-15446|ncRNA-inter-chr8-6726|Sult3a2|ncRNA-as-chr6-5589|ncRNA-intra-chr6-5593|Gm3839|Acsl1|ncRNA-inter-chr3-2156|Tymp|Errfi1|Ghr|Carmil1|ncRNA-as-chr15-12340|Cyp2c23|Pkhd1|Car1|G6pc|Adgrv1|ncRNA-as-chr4-3532|Dpyd|ncRNA-inter-chr11-9512|Alb|ncRNA-as-chr15-12321|Gm4756|C3|Apoe|Sugct|Dlgap1|Pbld1|Fga|Slc7a2|Vwa8|Nckap5|Klf9|Gm6614|Nfia|Rbpms|Hmgcs1|Mtif2|Peak1|ncRNA-inter-chr17-13909|Sult2a8|Clpx|Syne1|Baiap2l1|ncRNA-as-chr10-8791|ncRNA-inter-chr18-14605|Zfp697|ncRNA-as-chr2-1904|Cxadr|Sox5|Mapk15|Gm5934|Gm28305|ncRNA-inter-chr15-12608|Gm42906|Lpin2|Abcb11|Echdc2|ncRNA-as-chrX-15320|Abhd14b|Hmgcs2|ncRNA-as-chr15-12323|Tmem219|Gm37240|ncRNA-as-chr6-5860|ncRNA-as-chr19-14772|Eva1a|Glud1|Adra1b|Bri3|Acat1|ncRNA-as-chr4-3298|Fgfr4|Mrnip|Adh1|Fmo5|Iigp1|Sdr9c7|Slc39a14|Ttc7b|Suds3|ncRNA-inter-chr5-4221|Igf2bp3|ncRNA-inter-chr11-9948|Adck5|Sirt3|Col6a6|Chrm3|ncRNA-inter-chr4-3819|Fgfr2|Iah1|Immp2l|ncRNA-as-chr8-6954|Rint1|Saa4|Nhsl1|Gcnt7|Abcb4|Sh3d19|ncRNA-inter-chr3-2161|ncRNA-as-chr10-8962|Slc10a1|Timm9|ncRNA-as-chr2-1543|Tmem126a|ncRNA-inter-chr6-4887|Gbe1|Fam214a|Trp53inp2|Fech|Thnsl2|ncRNA-as-chr16-13178|Tmem150a|Eif1|Neb|Vmp1|ncRNA-as-chr17-13683|Nfib|Gm33543|ncRNA-as-chr7-6547|Herpud1|ncRNA-intra-chr6-5591|Cps1|Zbtb20|Zfp707|Ap3m1|Mettl23|Atp5o|Blm|Sdr42e1|Vcl|Ppara|Mab21l3|Prpf4b|Igf1|Preb|Tshz2|Shtn1|Nedd4l|Cabyr|ncRNA-as-chr4-3500|Zdhhc14|ncRNA-as-chr9-7702|Slc25a42|ncRNA-inter-chr13-11227|Stim2|Gm4952|ncRNA-intra-chr7-5919|Mecr|ncRNA-inter-chr12-10880|ncRNA-inter-chr4-3698|St3gal3|Hsd17b13|Plin2|Strbp|4833420G17Rik|Thap2|Fbf1|Zfand6|Ugt2a3|Olfr56|ncRNA-as-chr5-4334|Sco2|Numb|Bmp1|Dab1|ncRNA-as-chr2-1884|Adipor2|Irgm2|ncRNA-as-chr2-1145|ncRNA-as-chr12-10521|ncRNA-as-chr11-10075|Rnf25|Atr|Etfdh|F11|ncRNA-inter-chr10-9264|Bcas3|Acox2|Kyat1|ncRNA-inter-chr4-3294|Sqstm1|Proser1|Tmem243|Pid1|Btbd9|Me1|ncRNA-as-chr16-13380|Smad9|Smim13|Gucd1|Drg1|Phyh|Pde4c|ncRNA-inter-chr10-8794|Nr1h4|Mtss1|Elovl5|Slc37a4|Lztfl1|Stard5|Dnase2b|C1rl|Xiap|Msmo1|Ypel3|ncRNA-as-chr17-14018|Aaed1|Ggact|ncRNA-inter-chr1-846|ncRNA-inter-chr1-161|Ces1g|Sdc4|Ppp1r9a|Ap3s1|Mettl26|Trim7|Dhps|Rbfox1|Trim28|ncRNA-inter-chr11-10189|Eci2|Stx16|Gm10563|Dhrs4|Slc27a2|Gin1|Atp6v0b|Cpt1a|Nrp1|ncRNA-as-chr18-14628|Pxmp4|Rora|ncRNA-as-chr7-6161|ncRNA-inter-chr11-9922|Ttc14|Tfr2|Ifnk|Oxld1|Hadha|Nos1ap|Sf3b3|ncRNA-inter-chr12-10595|ncRNA-inter-chr15-12365|Fyco1|ncRNA-inter-chr12-10270"),x = rownames(G172.umis), value = FALSE)
DE2.M2M4.0.1  <- grep(c("Fam210b|ncRNA-as-chr1-979|Aldh1l1|ncRNA-inter-chr8-6975|ncRNA-inter-chr17-13928|Dnaja2|Plcb1|Net1|ncRNA-inter-chr6-5862|Lonp2|ncRNA-as-chr9-8316|Peli1|Them4|2810474O19Rik|Gclm|Zpr1|ncRNA-as-chr9-7825|ncRNA-inter-chr12-10561|Gria3|ncRNA-inter-chr13-11669|Ackr4|Rnf169|Ttpa|Gsdme|ncRNA-inter-chr12-10555|Slc7a8|Ggps1|1810022K09Rik|Slc24a5|ncRNA-as-chr4-3083|Rtp3|Son|Ddhd2|Dclre1c|ncRNA-as-chr19-15060|ncRNA-inter-chr5-4395|ncRNA-as-chr2-1439|Astn2|Traf6|Ssbp2|Slc35e2|ncRNA-as-chr6-5261|Ror1|Osbpl9|Pdrg1|ncRNA-as-chr11-10136|Nme7|1700037C18Rik|Cdip1|ncRNA-inter-chr1-109|Lims2|Hopx|Eef1akmt1|Pld1|Slc38a4|Esco1|Cox19|ncRNA-inter-chrX-15437|Ppdpf|Mrps10|Inca1|Bet1|ncRNA-inter-chr15-12713|ncRNA-as-chr1-373|Apobec1|Plcxd2|Sash1|Zfp385b|ncRNA-inter-chr1-292|Ocel1|ncRNA-inter-chr17-14182|Aldh9a1|Oaz1|Stat3|Tpk1|Zkscan1|Fbxl20|Mettl7a1|ncRNA-inter-chr11-9911|P2ry14|Pycrl|Prkd3|ncRNA-as-chr6-5634|Sigmar1|ncRNA-as-chr7-6100|ncRNA-as-chr11-9956|Myo6|Lpp|ncRNA-inter-chr4-3458|Slc16a4|Acsm3|ncRNA-as-chr2-1073|Dmac2|Zfr2|Rbp4|Gpt2|Agmo|ncRNA-inter-chr19-14876|ncRNA-as-chr17-14065|Rsph3b|Nek1|Slc25a20|Aacs|Tap1|Gm11639|Akr1c6|2810459M11Rik|ncRNA-inter-chr19-14746|ncRNA-inter-chr6-5561|Stk16|Dlat|Psmd7|Pibf1|Phlpp1|ncRNA-inter-chr19-15180|Nfx1|Ip6k2|Abi2|Zfp951|Gon4l|Gm12185|Mettl27|Lactb|Chdh|9030624G23Rik|ncRNA-inter-chr4-3352|Gpat3|Ppp3cc|Nr2c2ap|Tlcd1|Morn1|ncRNA-inter-chr5-4052|Catspere2|Zfp646|Adam11|ncRNA-inter-chr17-14128|Spg20|Cenpv|B4galt5|ncRNA-inter-chr3-2128|Ncoa5|BC049762|Zscan26|Apob|Fuca1|Sirpa|Rnps1|Pawr|Vps9d1|Ddx39b|Crebzf|Vwce|ncRNA-as-chr1-252|Rnf4|Cfap54|Snw1|Evi5|Mib2|Pex19|Ak2|Pura|ncRNA-inter-chr4-3707|Yipf5|Ergic2|Plpp3|Ifi35|Rab11b|ncRNA-as-chr5-3947|Ptprg|Tmem106a|Polr1a|Amacr|Prpsap1|ncRNA-inter-chr6-4853|Crtc3|ncRNA-inter-chr3-2134|Dip2c|Gm7298|Saysd1|Deptor|Dpf3|H2-D1|ncRNA-inter-chr19-15138|ncRNA-as-chr13-11149|Apoh|Pdcd4|Mpp1|Apbb2|Slc16a10|Timm23|Wsb1|ncRNA-as-chr15-12821|2310039H08Rik|Phf7|Scamp1|B3gat3|ncRNA-as-chr2-1550|Pan3|Gch1|Hmgcl|Tom1l2|Srsf7|ncRNA-inter-chr1-120|Ttc19|ncRNA-as-chr4-3023|Fam111a|Slc35b3|ncRNA-inter-chr1-840|Pls3|ncRNA-as-chr5-4079|Asic5|ncRNA-inter-chr8-6878|ncRNA-inter-chr12-10462|Serf2|Hibadh|Ndfip1|Trpm3|A230050P20Rik|Zfp799|Akap8l|ncRNA-inter-chr12-10403|Leng8|Mat2a|Lars2|ncRNA-as-chr6-5726|Zfp141|Slc22a23|AI182371|Pqlc1|ncRNA-inter-chr6-5510|Ulk2|ncRNA-as-chr10-9073|ncRNA-inter-chr2-1959|Ppil2|ncRNA-inter-chr12-10973|Herc6|Ocln|Uvrag|ncRNA-inter-chr5-4278"),x = rownames(G172.umis), value = FALSE)
DE3.M2M4.0.1  <- grep(c("Mfsd11|Clock|Dcakd|ncRNA-as-chr18-14332|ncRNA-inter-chr6-5384|Gm765|Atp11b|ncRNA-inter-chr12-10930|Senp5|ncRNA-as-chr6-5266|Palmd|D230025D16Rik|Zc3h15|ncRNA-as-chr5-4281|Tuft1|Cyp4f13|Nmnat3|Ccdc174|Dact2|Gne|mt-Co3|ncRNA-intra-chr19-15016|Dido1|Itpr2|ncRNA-as-chr5-4600|Tkfc|ncRNA-as-chr16-13310|Myo1e|ncRNA-inter-chr1-119|Hnrnpl|Brd1|Aven|Dnah7a|AC149090.1|Fpgs|Ddx50|Hmces|Sec24c|Eci1|Paip2|Thoc1|Mylk|Klhl2|Zc3h6|Dnase2a|Fahd1|Acbd5|Nudcd2|Rmdn1|Wdr82|Myo10|Fbxw9|Slc17a5|Sharpin|Cabin1|Txndc15|AU022252|Srek1|Coq3|ncRNA-inter-chr14-11988|Irf3|Zcchc6|Slc25a13|Mrpl18|Pdia5|Ldlr|Tmed9|Klhdc8b|Rcbtb1|Adap2|B2m|Klhl24|Lbp|ncRNA-as-chr8-7337|ncRNA-as-chr10-8953|Sf3b6|Slc33a1|Lrpprc|Srsf4|Mindy3|Gstp3|Vps37a|Dnajb4|ncRNA-as-chr19-15065|Mast3|Skp1a|Zbtb1|Ndufa10|Psip1|ncRNA-as-chr8-7367|ncRNA-inter-chr11-9923|Btbd1|Slc38a11|Sema6b|ncRNA-inter-chr5-4321|Hsf2|Zmynd8|Luzp1|Pdgfa|ncRNA-inter-chr4-3038|Ube2b|Lamp2|Atl3|Prdx6|Ei24|Cecr2|Zc3h14|Cdc40|Ccdc125|Gspt2|Gpd1|ncRNA-as-chr5-4384|mt-Nd2|Casc3|Ell2|Rpl36|Sh3rf1|Lrrc29|Edf1|ncRNA-as-chr10-8939|A430033K04Rik|Lrig1|Ctsh|ncRNA-as-chr11-10222|Smap1|Ntan1|Clint1|Atxn1|Thap3|Irak4|Ugt3a1|Hnrnph3|Pja1|Btg1|Tgfbr2|Cox11|Tmem29|Brap|Gdap2|Rnf214|F13b|ncRNA-as-chr16-13146|Tpst2|Pik3c2a|ncRNA-inter-chr19-14964|Bcdin3d|Ahnak|Smim4|Commd7|Top2b|ncRNA-inter-chr11-10215|ncRNA-inter-chr13-11374|ncRNA-as-chr16-13153|ncRNA-as-chr8-7096|ncRNA-as-chr4-3820|Kxd1|Mzt1|Tmem143|Eif4ebp2|Mbtd1|Rbm47|Nipsnap3b|ncRNA-as-chr10-9130|ncRNA-inter-chr17-14177|Dhx40|Akap8|Pls1|Srsf11|Pex13|Cldn2|Osbpl1a|Cfap20|Mrpl52|ncRNA-as-chr7-6300|Zfp933|1110059E24Rik|Tab2|Gle1|Yaf2|Kmo|Lcp1|Gnl2|ncRNA-as-chr15-12467|Lsm6|Eps15|ncRNA-inter-chr17-14187|ncRNA-as-chr19-14904|Ighmbp2|ncRNA-as-chr2-1542|Atat1|Mtmr1|ncRNA-as-chr8-7410|Depdc7|Ccs|Scyl2|mt-Co2|Lars|Pts|Rexo1|Tssc4|Usp53|ncRNA-as-chr4-3559|Gfod1|Nars2|ncRNA-inter-chr4-3579|Eif3j1|ncRNA-as-chr1-795|ncRNA-inter-chr8-6981|Pde4a|ncRNA-inter-chr3-2881|Slc39a10|E030030I06Rik|Camk1|Ppp2r1b|Amot|ncRNA-inter-chr17-14010|Cntn5|Ube2h|Fbxo36|Wbp1l|ncRNA-as-chr2-2108|Zbtb44|Hectd2|Mgrn1"),x = rownames(G172.umis), value = FALSE)
All.sex.index <- c(x1,x2,x3,x4,x5)
All.sex.TCPO.index <- c(x1,x2,x3,x4,x5, TCPO1, TCPO2, TCPO3,TCPO4, TCPO5, TCPO6,TCPO6.1, TCPO6.2, TCPO7,TCPO7.1, TCPO7.2, TCPO8,TCPO8.1,TCPO8.2,TCPO9, TCPO10)

DE.G172M1M3.0.1 <- c(DE1.M1M3.0.1,DE2.M1M3.0.1,DE3.M1M3.0.1,DE4.M1M3.0.1,DE5.M1M3.0.1,DE6.M1M3.0.1,DE7.M1M3.0.1,DE8.M1M3.0.1,DE9.M1M3.0.1,DE10.M1M3.0.1)
All.sex.index.batch <- c(x1,x2,x3,x4,x5,DE1.M1M3.0.1,DE2.M1M3.0.1,DE3.M1M3.0.1,DE4.M1M3.0.1,DE5.M1M3.0.1,DE6.M1M3.0.1)
G172.umis.sex <- G172.umis[-All.sex.index, ]
G172.umis.sex.batch <- G172.umis[-All.sex.index.batch, ]


DE.G172M2M4.0.1 <- c(DE1.M2M4.0.1,DE2.M2M4.0.1,DE3.M2M4.0.1)
All.sex.TCPO.index.batch <- c(All.sex.TCPO.index ,DE.G172M2M4.0.1) 
G172.umis.sex.TCPO.batch <- G172.umis[-All.sex.TCPO.index.batch, ]

G172.umis.sex.TCPO <- G172.umis[-All.sex.TCPO.index, ]

# Setup Seurat object
#G172.hashtag <- CreateSeuratObject(counts = G172.umis.sex)

G172.hashtag <- CreateSeuratObject(counts = G172.umis)
G172.hashtag <- AddMetaData(object = G172.hashtag,  metadata = lncRNA, col.name = "nlncRNA") 
#G172.hashtag <- AddMetaData(object = G172.hashtag, meta.data0.5) 

 
G172.hashtag.sex.batch <- CreateSeuratObject(counts = G172.umis.sex.batch)



G172.hashtag.TCPO <- CreateSeuratObject(counts = G172.umis.sex.TCPO)
G172.hashtag.sex.TCPO.batch <- CreateSeuratObject(counts = G172.umis.sex.TCPO.batch)


# Normalize RNA data with log normalization

#G172.hashtag <- AddMetaData(object = G172.hashtag) 
G172.hashtag <- NormalizeData(G172.hashtag)
# Find and scale variable features
G172.hashtag <- FindVariableFeatures(G172.hashtag, selection.method = "vst", nfeatures = 2000)
#G172.hashtag <- FindVariableFeatures(G172.hashtag, selection.method = "mean.var.plot")
G172.hashtag <- ScaleData(G172.hashtag, features = VariableFeatures(G172.hashtag))
G172.hashtag <- RunPCA(G172.hashtag,npcs = 30, features = VariableFeatures(G172.hashtag))
G172.hashtag <- RunUMAP(G172.hashtag, reduction = "pca", dims = 1:30)
G172.hashtag <- FindNeighbors(G172.hashtag, reduction = "pca", dims = 1:30)
G172.hashtag <- FindClusters(G172.hashtag, resolution = 0.25)   
G172.hashtag.p1<- UMAPPlot(G172.hashtag, reduction = "umap", label=TRUE, label.size=5)
G172.hashtag.p1
DefaultAssay(G172.hashtag) <- "RNA"
G172.hashtag <- NormalizeData(G172.hashtag, verbose = TRUE)
d1 <- DotPlot(G172.hashtag, features = All_)+RotatedAxis()
plot_grid(G172.M1.p1,d1)
FeaturePlot(G172.hashtag, features = "Cyp2c55")
FeaturePlot(G172.hashtag, features = "ncRNA-inter-chrX-15394")

# Add HTO data as a new assay independent from RNA
#G172.hashtag[["HTO"]] <- CreateAssayObject(counts = G172.htos)
G172.hashtag[["HTO"]] <- CreateAssayObject(counts = G172.Xist.ChrY)

# Normalize HTO data, here we use centered log-ratio (CLR) transformation
G172.hashtag <- NormalizeData(G172.hashtag, assay = "HTO", normalization.method = "CLR")

# If you have a very large dataset we suggest using k_function = 'clara'. This is a k-medoid
# clustering function for large applications You can also play with additional parameters (see
# documentation for HTODemux()) to adjust the threshold for classification Here we are using the
# default settings
G172.hashtag <- HTODemux(G172.hashtag, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
# Global classification results
table(G172.hashtag$HTO_classification.global)
table(G172.hashtag$hash.ID)

# Group cells based on the max HTO signal
Idents(G172.hashtag) <- "hash.ID"
RidgePlot(G172.hashtag, assay = "HTO", features = rownames(G172.hashtag[["HTO"]])[1:4], ncol = 2, nrow=2)
G172.hashtag$HTO_classification <- G172.hashtag$hash.ID
#Idents(G172.hashtag) <- "HTO_classification.global"

G172.hashtag.subset <- G172.hashtag
# First, we will remove negative cells from the object
G172.hashtag.subset <- subset(G172.hashtag, idents =c("Negative","Doublet"), invert = TRUE)

## Feature scatterplot for hastags IDs ########
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M2-TGACGCCGTTGTTGT")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M3-GCCTAGTATGATCCA", feature2 = "M4-AGTCACAGTATTCCA")


# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = G172.hashtag.subset, assay = "HTO"))))


G172.hashtag.subset <- NormalizeData(G172.hashtag.subset)
# Find and scale variable features
G172.hashtag.subset <- FindVariableFeatures(G172.hashtag.subset, selection.method = "mean.var.plot")
G172.hashtag.subset <- ScaleData(G172.hashtag.subset, features = VariableFeatures(G172.hashtag.subset))
G172.hashtag.subset <- RunPCA(G172.hashtag.subset,npcs = 30, features = VariableFeatures(G172.hashtag.subset))

# Calculate tSNE embeddings with a distance matrix
G172.hashtag.subset <- RunTSNE(G172.hashtag.subset, distance.matrix = hto.dist.mtx, perplexity = 100)
DimPlot(G172.hashtag.subset)
HTOHeatmap(G172.hashtag, assay = "HTO", ncells = 3030)


########################## Rescue doublets ########################
G172.doublet <- subset(G172.hashtag, idents = "Negative")
G172.doublet.rescue <- HTODemux(G172.doublet, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
Idents(G172.doublet.rescue) <- "hash.ID"
RidgePlot(G172.doublet.rescue, assay = "HTO", features = rownames(G172.doublet.rescue[["HTO"]])[1:4], ncol = 2, nrow=2)
###############################################################################


# Extract the singlets M1 #############3
G172.M1 <- subset(G172.hashtag.subset, idents = "M1-ATGATGAACAGCCAG", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1 <- subset(G172.hashtag.subset, idents = "M1-ATGATGAACAGCCAG")

#G172.M1 <- G172.hashtag
G172.M1$stim  <- "G172M1"
DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- SCTransform(G172.M1,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1 <- NormalizeData(G172.M1, verbose = FALSE)
G172.M1 <- FindVariableFeatures(G172.M1, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1 <- ScaleData(G172.M1, features = VariableFeatures(G172.M1))
# Run PCA
G172.M1 <- RunPCA(G172.M1,npcs = 30, features = VariableFeatures(G172.M1))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1 <- RunUMAP(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindNeighbors(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindClusters(G172.M1, resolution = 0.5)   
G172.M1.p1<- UMAPPlot(G172.M1, reduction = "umap", label=TRUE, label.size=5)
G172.M1.p1


DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- NormalizeData(G172.M1, verbose = TRUE)
d1 <- DotPlot(G172.M1, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.p1,d1)


 for(i in 0:(length(levels(G172.M1@meta.data$seurat_clusters))-1))
 {
 label <- paste("p", i, sep="") 
 assign(label, subset(G172.M1, idents = i))}
#label1 <- paste("df", i, sep="") 
##paste0("p",i,"@assays$RNA@counts")
# assign(label1,split.default((as.data.frame(eval(parse(text=(paste0((as.name(label)),"@assays$RNA@counts")))))),0:(length(as.data.frame(e val(parse(text=(paste0((as.name(label)),"@assays$RNA@counts")))))-1)%/%5), rowSums))
# 
# colnames(as.name(label1)) <- paste("C",i, colnames(as.name(label1)), sep = "_") }

mydf0 <- sapply(split.default(as.data.frame(p0@assays$RNA@counts), 0:(length(as.data.frame(p0@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf0) <- paste("C0", colnames(mydf0), sep = "_")

mydf1 <- sapply(split.default(as.data.frame(p1@assays$RNA@counts), 0:(length(as.data.frame(p1@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf1) <- paste("C1", colnames(mydf1), sep = "_")

mydf2 <- sapply(split.default(as.data.frame(p2@assays$RNA@counts), 0:(length(as.data.frame(p2@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf2) <- paste("C2", colnames(mydf2), sep = "_")

mydf3 <- sapply(split.default(as.data.frame(p3@assays$RNA@counts), 0:(length(as.data.frame(p3@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf3) <- paste("C3", colnames(mydf3), sep = "_")

mydf4 <- sapply(split.default(as.data.frame(p4@assays$RNA@counts), 0:(length(as.data.frame(p4@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf4) <- paste("C4", colnames(mydf4), sep = "_")

mydf5 <- sapply(split.default(as.data.frame(p5@assays$RNA@counts), 0:(length(as.data.frame(p5@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf5) <- paste("C5", colnames(mydf5), sep = "_")

mydf6 <- sapply(split.default(as.data.frame(p6@assays$RNA@counts), 0:(length(as.data.frame(p6@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf6) <- paste("C6", colnames(mydf6), sep = "_")

mydf7 <- sapply(split.default(as.data.frame(p7@assays$RNA@counts), 0:(length(as.data.frame(p7@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf7) <- paste("C7", colnames(mydf7), sep = "_")

mydf8 <- sapply(split.default(as.data.frame(p8@assays$RNA@counts), 0:(length(as.data.frame(p8@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf8) <- paste("C8", colnames(mydf8), sep = "_")

mydf9 <- sapply(split.default(as.data.frame(p9@assays$RNA@counts), 0:(length(as.data.frame(p9@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf9) <- paste("C9", colnames(mydf9), sep = "_")

merge_df <- cbind(mydf0,mydf1,mydf2,mydf3,mydf4,mydf5,mydf6,mydf7,mydf8,mydf9)


G172.M1.merge <- CreateSeuratObject(counts = merge_df)
DefaultAssay(G172.M1) <- "RNA"
G172.M1.merge <- SCTransform(G172.M1.merge,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.merge <- NormalizeData(G172.M1.merge, verbose = FALSE)
G172.M1.merge <- FindVariableFeatures(G172.M1.merge, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.merge <- ScaleData(G172.M1.merge, features = VariableFeatures(G172.M1.merge))
# Run PCA
G172.M1.merge <- RunPCA(G172.M1.merge,npcs = 30, features = VariableFeatures(G172.M1.merge))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.merge <- RunUMAP(G172.M1.merge, reduction = "pca", dims = 1:25)
G172.M1.merge <- FindNeighbors(G172.M1.merge, reduction = "pca", dims = 1:25)
G172.M1.merge <- FindClusters(G172.M1.merge, resolution = 0.5 )   
G172.M1.p1.merge<- UMAPPlot(G172.M1.merge, reduction = "umap", label=TRUE, label.size=5)
G172.M1.p1.merge


DefaultAssay(G172.M1.merge) <- "RNA"
G172.M1.merge <- NormalizeData(G172.M1.merge, verbose = TRUE)
d1 <- DotPlot(G172.M1.merge, features = all_genes, cols=c('white','blue'))+RotatedAxis()
plot_grid(G172.M1.p1.merge,d1)

################################## Extract the singlet for M2 ################################################
G172.M2 <- subset(G172.hashtag, idents = "M2-TGACGCCGTTGTTGT", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2$stim  <- "G172M2"
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- SCTransform(G172.M2,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2 <- NormalizeData(G172.M2, verbose = FALSE)
G172.M2 <- FindVariableFeatures(G172.M2, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2 <- ScaleData(G172.M2, features = VariableFeatures(G172.M2))
# Run PCA
G172.M2 <- RunPCA(G172.M2,npcs = 30, features = VariableFeatures(G172.M2))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2 <- RunUMAP(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindNeighbors(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindClusters(G172.M2, resolution = 0.5 )   
G172.M2.p1<- UMAPPlot(G172.M2, reduction = "umap", label=TRUE, label.size=5)
G172.M2.p1
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- NormalizeData(G172.M2, verbose = TRUE)
d2 <- DotPlot(G172.M2, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.p1,d2)

for(i in 0:(length(levels(G172.M2@meta.data$seurat_clusters))-1))
 {
 label <- paste("q", i, sep="") 
 assign(label, subset(G172.M2, idents = i))}
# label1 <- paste("df", i, sep="") 
#
dfq0 <- sapply(split.default(as.data.frame(q0@assays$RNA@counts), 0:(length(as.data.frame(q0@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq0) <- paste("Q0", colnames(dfq0), sep = "_")

dfq1 <- sapply(split.default(as.data.frame(q1@assays$RNA@counts), 0:(length(as.data.frame(q1@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq1) <- paste("Q1", colnames(dfq1), sep = "_")

dfq2 <- sapply(split.default(as.data.frame(q2@assays$RNA@counts), 0:(length(as.data.frame(q2@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq2) <- paste("Q2", colnames(dfq2), sep = "_")

dfq3 <- sapply(split.default(as.data.frame(q3@assays$RNA@counts), 0:(length(as.data.frame(q3@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq3) <- paste("Q3", colnames(dfq3), sep = "_")

dfq4 <- sapply(split.default(as.data.frame(q4@assays$RNA@counts), 0:(length(as.data.frame(q4@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq4) <- paste("Q4", colnames(dfq4), sep = "_")

dfq5 <- sapply(split.default(as.data.frame(q5@assays$RNA@counts), 0:(length(as.data.frame(q5@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq5) <- paste("Q5", colnames(dfq5), sep = "_")

dfq6 <- sapply(split.default(as.data.frame(q6@assays$RNA@counts), 0:(length(as.data.frame(q6@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq6) <- paste("Q6", colnames(dfq6), sep = "_")

dfq7 <- sapply(split.default(as.data.frame(q7@assays$RNA@counts), 0:(length(as.data.frame(q7@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq7) <- paste("Q7", colnames(dfq7), sep = "_")

dfq8 <- sapply(split.default(as.data.frame(q8@assays$RNA@counts), 0:(length(as.data.frame(q8@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq8) <- paste("C8", colnames(dfq8), sep = "_")


merge_df2 <- cbind(dfq0,dfq1,dfq2,dfq3,dfq4,dfq5,dfq6,dfq7,dfq8)
G172.M2.merge <- CreateSeuratObject(counts = merge_df2)
DefaultAssay(G172.M2) <- "RNA"
G172.M2.merge <- SCTransform(G172.M2.merge,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.merge <- NormalizeData(G172.M2.merge, verbose = FALSE)
G172.M2.merge <- FindVariableFeatures(G172.M2.merge, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.merge <- ScaleData(G172.M2.merge, features = VariableFeatures(G172.M2.merge))
# Run PCA
G172.M2.merge <- RunPCA(G172.M2.merge,npcs = 30, features = VariableFeatures(G172.M2.merge))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.merge <- RunUMAP(G172.M2.merge, reduction = "pca", dims = 1:25)
G172.M2.merge <- FindNeighbors(G172.M2.merge, reduction = "pca", dims = 1:25)
G172.M2.merge <- FindClusters(G172.M2.merge, resolution = 0.5 )   
G172.M2.p1.merge<- UMAPPlot(G172.M2.merge, reduction = "umap", label=TRUE, label.size=5)
G172.M2.p1.merge

DefaultAssay(G172.M2.merge) <- "RNA"
G172.M2.merge <- NormalizeData(G172.M2.merge, verbose = TRUE)
d1 <- DotPlot(G172.M2.merge, features = all_genes, cols=c('white','blue'))+RotatedAxis()
plot_grid(G172.M2.p1.merge,d1)

#######################################################################################


############################# Extract the singlet for M3 ##############################

G172.M3 <- subset(G172.hashtag, idents = "M3-GCCTAGTATGATCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- SCTransform(G172.M3,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3 <- NormalizeData(G172.M3, verbose = FALSE)
G172.M3 <- FindVariableFeatures(G172.M3, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3 <- ScaleData(G172.M3, features = VariableFeatures(G172.M3))
# Run PCA
G172.M3 <- RunPCA(G172.M3,npcs = 30, features = VariableFeatures(G172.M3))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3 <- RunUMAP(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindNeighbors(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindClusters(G172.M3, resolution = 0.5 )   
G172.M3.p1<- UMAPPlot(G172.M3, reduction = "umap", label=TRUE, label.size=5)
G172.M3.p1
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- NormalizeData(G172.M3, verbose = TRUE)
d3 <- DotPlot(G172.M3, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.p1,d3)




##### Extract the singlet for M4 #####
G172.M4 <- subset(G172.hashtag, idents = "M4-AGTCACAGTATTCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- SCTransform(G172.M4,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4 <- NormalizeData(G172.M4, verbose = FALSE)
G172.M4 <- FindVariableFeatures(G172.M4, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4 <- ScaleData(G172.M4, features = VariableFeatures(G172.M4))
# Run PCA
G172.M4 <- RunPCA(G172.M4,npcs = 30, features = VariableFeatures(G172.M4))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4 <- RunUMAP(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindNeighbors(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindClusters(G172.M4, resolution = 0.5 )   
G172.M4.p1<- UMAPPlot(G172.M4, reduction = "umap", label=TRUE, label.size=5)
G172.M4.p1
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- NormalizeData(G172.M4, verbose = TRUE)
d4 <- DotPlot(G172.M4, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.p1,d4)


############## Combined ####################33
G172.M5 <- subset(G172.hashtag, idents = c("M1-ATGATGAACAGCCAG","M2-TGACGCCGTTGTTGT","M3-GCCTAGTATGATCCA","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- SCTransform(G172.M5,verbose =TRUE)
# Select the top 1000 most variable features
G172.M5 <- NormalizeData(G172.M5, verbose = FALSE)
G172.M5 <- FindVariableFeatures(G172.M5, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M5 <- ScaleData(G172.M5, features = VariableFeatures(G172.M5))
# Run PCA
G172.M5 <- RunPCA(G172.M5,npcs = 30, features = VariableFeatures(G172.M5))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M5 <- RunUMAP(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindNeighbors(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindClusters(G172.M5, resolution = 0.5 )   
G172.M5.p1<- UMAPPlot(G172.M5, reduction = "umap", label=TRUE, label.size=5)
G172.M5.p1
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- NormalizeData(G172.M5, verbose = TRUE)
d5 <- DotPlot(G172.M5, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M5.p1,d5)


### control 
G172.M6 <- subset(G172.hashtag.subset, idents = c("M1-ATGATGAACAGCCAG","M3-GCCTAGTATGATCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M6) <- "RNA"
#G172.M6 <- SCTransform(G172.M6,verbose =TRUE)
# Select the top 1000 most variable features
G172.M6 <- NormalizeData(G172.M6, verbose = FALSE)
G172.M6 <- FindVariableFeatures(G172.M6, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M6 <- ScaleData(G172.M6, features = VariableFeatures(G172.M6))
# Run PCA
G172.M6 <- RunPCA(G172.M6,npcs = 30, features = VariableFeatures(G172.M6))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M6 <- RunUMAP(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindNeighbors(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindClusters(G172.M6, resolution = 0.5 )   
G172.M6.p1<- UMAPPlot(G172.M6, reduction = "umap", label=TRUE, label.size=5)
G172.M6.p1
DefaultAssay(G172.M6) <- "RNA"
G172.M6 <- NormalizeData(G172.M6, verbose = TRUE)
d6 <- DotPlot(G172.M6, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M6.p1,d6)


#### TCPO 
G172.M7 <- subset(G172.hashtag, idents = c("M2-TGACGCCGTTGTTGT","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- SCTransform(G172.M7,verbose =TRUE)
# Select the top 1000 most variable features
G172.M7 <- NormalizeData(G172.M7, verbose = FALSE)
G172.M7 <- FindVariableFeatures(G172.M7, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M7 <- ScaleData(G172.M7, features = VariableFeatures(G172.M7))
# Run PCA
G172.M7 <- RunPCA(G172.M7,npcs = 30, features = VariableFeatures(G172.M7))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M7 <- RunUMAP(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindNeighbors(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindClusters(G172.M7, resolution = 0.5 )   
G172.M7.p1<- UMAPPlot(G172.M7, reduction = "umap", label=TRUE, label.size=5)
G172.M7.p1
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- NormalizeData(G172.M7, verbose = TRUE)
d7 <- DotPlot(G172.M7, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M7.p1,d7, nrow=2)

G172 classified cells already labelled by demuxEM or HTOdemux (Seurat) to calculate TPM


G172.M1.both <- subset(G172.hashtag,  cells= joint.bcs.M1.both)
G172.M2.both <- subset(G172.hashtag,  cells= joint.bcs.M2.both)
G172.M3.both <- subset(G172.hashtag,  cells= joint.bcs.M3.both)
G172.M4.both <- subset(G172.hashtag,  cells= joint.bcs.M4.both)

G172.M1.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M1.demuxEM)
G172.M2.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M2.demuxEM)
G172.M3.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M4.demuxEM)
G172.M4.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M4.demuxEM)


G172.M1.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M1.HTOdemux)
G172.M2.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M2.HTOdemux)
G172.M3.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M3.HTOdemux)
G172.M4.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M4.HTOdemux)


G172.M1.both  <- NormalizeData(G172.M1.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.both  <- NormalizeData(G172.M2.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M3.both  <- NormalizeData(G172.M3.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M4.both  <- NormalizeData(G172.M4.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.both.avg  <- rowMeans(as.matrix(G172.M1.both@assays$RNA@data))
G172.M2.both.avg  <- rowMeans(as.matrix(G172.M2.both@assays$RNA@data))
G172.M3.both.avg  <- rowMeans(as.matrix(G172.M3.both@assays$RNA@data))
G172.M4.both.avg  <- rowMeans(as.matrix(G172.M4.both@assays$RNA@data))


G172.M1.demuxEM  <- NormalizeData(G172.M1.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.demuxEM  <- NormalizeData(G172.M2.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M3.demuxEM  <- NormalizeData(G172.M3.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M4.demuxEM  <- NormalizeData(G172.M4.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.demuxEM.avg  <- rowMeans(as.matrix(G172.M1.demuxEM@assays$RNA@data))
G172.M2.demuxEM.avg  <- rowMeans(as.matrix(G172.M2.demuxEM@assays$RNA@data))
G172.M3.demuxEM.avg  <- rowMeans(as.matrix(G172.M3.demuxEM@assays$RNA@data))
G172.M4.demuxEM.avg  <- rowMeans(as.matrix(G172.M4.demuxEM@assays$RNA@data))


G172.M1.HTOdemux  <- NormalizeData(G172.M1.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.HTOdemux  <- NormalizeData(G172.M2.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M3.HTOdemux  <- NormalizeData(G172.M3.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M4.HTOdemux  <- NormalizeData(G172.M4.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.HTOdemux.avg  <- rowMeans(as.matrix(G172.M1.HTOdemux@assays$RNA@data))
G172.M2.HTOdemux.avg  <- rowMeans(as.matrix(G172.M2.HTOdemux@assays$RNA@data))
G172.M3.HTOdemux.avg  <- rowMeans(as.matrix(G172.M3.HTOdemux@assays$RNA@data))
G172.M4.HTOdemux.avg  <- rowMeans(as.matrix(G172.M4.HTOdemux@assays$RNA@data))

G172.M1.Top <- subset(G172.hashtag,  cells= joint.bcs.M1.top)
G172.M2.Top <- subset(G172.hashtag,  cells= joint.bcs.M2.top)

G172.M1.Top  <- NormalizeData(G172.M1.Top,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.Top  <- NormalizeData(G172.M2.Top,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.bottom <- subset(G172.hashtag,  cells= joint.bcs.M1.bottom)
G172.M2.bottom <- subset(G172.hashtag,  cells= joint.bcs.M2.bottom)

G172.M1.bottom  <- NormalizeData(G172.M1.bottom,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.bottom  <- NormalizeData(G172.M2.bottom,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172_TPM_M1_Top <- cbind(as.matrix(G172.M1.Top@assays$RNA@counts)) 
G172_TPM_M2_Top <- cbind(as.matrix(G172.M2.Top@assays$RNA@counts)) 
G172_TPM_M1_bottom <- cbind(as.matrix(G172.M1.bottom@assays$RNA@counts)) 
G172_TPM_M2_bottom <- cbind(as.matrix(G172.M2.bottom@assays$RNA@counts)) 


G172_counts_M1_Top <- cbind(as.matrix(G172.M1.Top@assays$RNA@counts)) 
G172_counts_M2_Top <- cbind(as.matrix(G172.M2.Top@assays$RNA@counts)) 
G172_counts_M1_bottom <- cbind(as.matrix(G172.M1.bottom@assays$RNA@counts)) 
G172_counts_M2_bottom <- cbind(as.matrix(G172.M2.bottom@assays$RNA@counts)) 

write.table(G172_TPM_M1_Top, "demuxEM_HTodemux_analysis/G172_TPM_M1_Top")
write.table(G172_TPM_M2_Top, "demuxEM_HTodemux_analysis/G172_TPM_M2_Top")
write.table(G172_TPM_M1_bottom, "demuxEM_HTodemux_analysis/G172_TPM_M1_bottom")
write.table(G172_TPM_M2_bottom, "demuxEM_HTodemux_analysis/G172_TPM_M2_bottom")


write.table(G172_counts_M1_Top, "demuxEM_HTodemux_analysis/G172_counts_M1_Top")
write.table(G172_counts_M2_Top, "demuxEM_HTodemux_analysis/G172_counts_M2_Top")
write.table(G172_counts_M1_bottom, "demuxEM_HTodemux_analysis/G172_counts_M1_bottom")
write.table(G172_counts_M2_bottom, "demuxEM_HTodemux_analysis/G172_counts_M2_bottom")


combined_avg_prelabelled <- cbind(G172.M1.both.avg, G172.M2.both.avg,G172.M3.both.avg,G172.M4.both.avg, G172.M1.demuxEM.avg,G172.M2.demuxEM.avg, G172.M3.demuxEM.avg, G172.M4.demuxEM.avg, G172.M1.HTOdemux.avg, G172.M2.HTOdemux.avg, G172.M3.HTOdemux.avg, G172.M4.HTOdemux.avg) 

write.table(combined_avg_prelabelled, "demuxEM_HTodemux_analysis/combined_avgTPM_prelabelled")

combined_M1_M2 <- cbind(as.matrix(G172.M1.both@assays$RNA@data), as.matrix(G172.M1.both@assays$RNA@data)) 

G172.M1.both <- subset(G172.hashtag, cells= joint.bcs.M1.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#G172.M1.both <- subset(G172.hashtag.sex.batch, cells= joint.bcs.M1.both)
#G172.M1.both <- subset(G172.hashtag, cells= joint.bcs.M1.both)
G172.M1.both$stim  <- "G172M1_both"
DefaultAssay(G172.M1.both) <- "RNA"
G172.M1.both <- SCTransform(G172.M1.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.both <- NormalizeData(G172.M1.both, verbose = FALSE)
G172.M1.both <- FindVariableFeatures(G172.M1.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.both <- ScaleData(G172.M1.both, features = VariableFeatures(G172.M1.both))
# Run PCA
G172.M1.both <- RunPCA(G172.M1.both,npcs = 30, features = VariableFeatures(G172.M1.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.both <- RunUMAP(G172.M1.both, reduction = "pca", dims = 1:30)
G172.M1.both <- FindNeighbors(G172.M1.both, reduction = "pca", dims = 1:30)
G172.M1.both <- FindClusters(G172.M1.both, resolution = 0.25)   
G172.M1.both.p1<- UMAPPlot(G172.M1.both, reduction = "umap", label=TRUE, label.size=5)
G172.M1.both.p1
DefaultAssay(G172.M1.both) <- "RNA"
G172.M1.both <- NormalizeData(G172.M1.both, verbose = TRUE)
d2 <- DotPlot(G172.M1.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.both.p1,d2)

################### clean hepatocytes ##################

G172.M1.clean.both <- subset(G172.hashtag, cells= joint.bcs.M1.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1.clean.both$stim  <- "G172M1_clean_both"
DefaultAssay(G172.M1.clean.both) <- "RNA"
G172.M1.clean.both <- SCTransform(G172.M1.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.clean.both <- NormalizeData(G172.M1.clean.both, verbose = FALSE)
G172.M1.clean.both <- FindVariableFeatures(G172.M1.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.clean.both <- ScaleData(G172.M1.clean.both, features = VariableFeatures(G172.M1.clean.both))
# Run PCA
G172.M1.clean.both <- RunPCA(G172.M1.clean.both,npcs = 30, features = VariableFeatures(G172.M1.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.clean.both <- RunUMAP(G172.M1.clean.both, reduction = "pca", dims = 1:30)
G172.M1.clean.both <- FindNeighbors(G172.M1.clean.both, reduction = "pca", dims = 1:30)
G172.M1.clean.both <- FindClusters(G172.M1.clean.both, resolution = 0.5 )   
G172.M1.clean.both.p1<- UMAPPlot(G172.M1.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M1.clean.both.p1
DefaultAssay(G172.M1.clean.both) <- "RNA"
G172.M1.clean.both <- NormalizeData(G172.M1.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M1.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.clean.both.p1,d2)

################ both top80 M1
G172.M1.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M1.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1.both.top80$stim  <- "G172M1_both_top80"
DefaultAssay(G172.M1.both.top80) <- "RNA"
G172.M1.both.top80 <- SCTransform(G172.M1.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.both.top80 <- NormalizeData(G172.M1.both.top80, verbose = FALSE)
G172.M1.both.top80 <- FindVariableFeatures(G172.M1.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.both.top80 <- ScaleData(G172.M1.both.top80, features = VariableFeatures(G172.M1.both.top80))
# Run PCA
G172.M1.both.top80 <- RunPCA(G172.M1.both.top80,npcs = 30, features = VariableFeatures(G172.M1.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.both.top80 <- RunUMAP(G172.M1.both.top80, reduction = "pca", dims = 1:25)
G172.M1.both.top80 <- FindNeighbors(G172.M1.both.top80, reduction = "pca", dims = 1:25)
G172.M1.both.top80 <- FindClusters(G172.M1.both.top80, resolution = 0.25 )   
G172.M1.both.top80.p1<- UMAPPlot(G172.M1.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M1.both.top80.p1
DefaultAssay(G172.M1.both.top80) <- "RNA"
G172.M1.both.top80 <- NormalizeData(G172.M1.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M1.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.both.top80.p1,d2)



############################################## G172 M2 both ######################

#G172.M2.both <- subset(G172.hashtag, cells= joint.bcs.M2.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2.both <- subset(G172.hashtag, cells= joint.bcs.M2.both)
#G172.M2.both <- subset(G172.hashtag.TCPO, cells= joint.bcs.M2.both)
#G172.M2.both <- subset(G172.hashtag.sex.TCPO.batch, cells= joint.bcs.M2.both)
G172.M2.both$stim  <- "G172M2_both"
DefaultAssay(G172.M2.both) <- "RNA"
G172.M2.both <- SCTransform(G172.M2.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.both <- NormalizeData(G172.M2.both, verbose = FALSE)
G172.M2.both <- FindVariableFeatures(G172.M2.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.both <- ScaleData(G172.M2.both, features = VariableFeatures(G172.M2.both))
# Run PCA
G172.M2.both <- RunPCA(G172.M2.both,npcs = 30, features = VariableFeatures(G172.M2.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.both <- RunUMAP(G172.M2.both, reduction = "pca", dims = 1:20)
G172.M2.both <- FindNeighbors(G172.M2.both, reduction = "pca", dims = 1:20)
G172.M2.both <- FindClusters(G172.M2.both, resolution = 0.35)   
G172.M2.both.p1<- UMAPPlot(G172.M2.both, reduction = "umap", label=TRUE, label.size=5)
G172.M2.both.p1
DefaultAssay(G172.M2.both) <- "RNA"
G172.M2.both <- NormalizeData(G172.M2.both, verbose = TRUE)
d2 <- DotPlot(G172.M2.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.both.p1,d2)

############# clean M2 ########################
G172.M2.clean.both <- subset(G172.hashtag, cells= joint.bcs.M2.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2.clean.both$stim  <- "G172M2_both"
DefaultAssay(G172.M2.clean.both) <- "RNA"
G172.M2.clean.both <- SCTransform(G172.M2.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.clean.both <- NormalizeData(G172.M2.clean.both, verbose = FALSE)
G172.M2.clean.both <- FindVariableFeatures(G172.M2.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.clean.both <- ScaleData(G172.M2.clean.both, features = VariableFeatures(G172.M2.clean.both))
# Run PCA
G172.M2.clean.both <- RunPCA(G172.M2.clean.both,npcs = 30, features = VariableFeatures(G172.M2.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.clean.both <- RunUMAP(G172.M2.clean.both, reduction = "pca", dims = 1:30)
G172.M2.clean.both <- FindNeighbors(G172.M2.clean.both, reduction = "pca", dims = 1:30)
G172.M2.clean.both <- FindClusters(G172.M2.clean.both, resolution = 0.35)   
G172.M2.clean.both.p1<- UMAPPlot(G172.M2.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M2.clean.both.p1
DefaultAssay(G172.M2.clean.both) <- "RNA"
G172.M2.clean.both <- NormalizeData(G172.M2.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M2.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.clean.both.p1,d2)

################# both top 80 M2 ################
G172.M2.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M2.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2.both.top80$stim  <- "G172M2_both"
DefaultAssay(G172.M2.both.top80) <- "RNA"
G172.M2.both.top80 <- SCTransform(G172.M2.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.both.top80 <- NormalizeData(G172.M2.both.top80, verbose = FALSE)
G172.M2.both.top80 <- FindVariableFeatures(G172.M2.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.both.top80 <- ScaleData(G172.M2.both.top80, features = VariableFeatures(G172.M2.both.top80))
# Run PCA
G172.M2.both.top80 <- RunPCA(G172.M2.both.top80,npcs = 30, features = VariableFeatures(G172.M2.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.both.top80 <- RunUMAP(G172.M2.both.top80, reduction = "pca", dims = 1:30)
G172.M2.both.top80 <- FindNeighbors(G172.M2.both.top80, reduction = "pca", dims = 1:30)
G172.M2.both.top80 <- FindClusters(G172.M2.both.top80, resolution = 0.35)   
G172.M2.both.top80.p1<- UMAPPlot(G172.M2.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M2.both.top80.p1
DefaultAssay(G172.M2.both.top80) <- "RNA"
G172.M2.both.top80 <- NormalizeData(G172.M2.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M2.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.both.top80.p1,d2)


############# both M3 #############
G172.M3.both <- subset(G172.hashtag, cells= joint.bcs.M3.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#G172.M3.both <- subset(G172.hashtag.sex.batch, cells= joint.bcs.M3.both)
G172.M3.both <- subset(G172.hashtag, cells= joint.bcs.M3.both)
G172.M3.both$stim  <- "G172M3_both"
DefaultAssay(G172.M3.both) <- "RNA"
G172.M3.both <- SCTransform(G172.M3.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3.both <- NormalizeData(G172.M3.both, verbose = FALSE)
G172.M3.both <- FindVariableFeatures(G172.M3.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3.both <- ScaleData(G172.M3.both, features = VariableFeatures(G172.M3.both))
# Run PCA
G172.M3.both <- RunPCA(G172.M3.both,npcs = 30, features = VariableFeatures(G172.M3.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3.both <- RunUMAP(G172.M3.both, reduction = "pca", dims = 1:30)
G172.M3.both <- FindNeighbors(G172.M3.both, reduction = "pca", dims = 1:30)
G172.M3.both <- FindClusters(G172.M3.both, resolution = 0.35)   
G172.M3.both.p1<- UMAPPlot(G172.M3.both, reduction = "umap", label=TRUE, label.size=5)
G172.M3.both.p1
DefaultAssay(G172.M3.both) <- "RNA"
G172.M3.both <- NormalizeData(G172.M3.both, verbose = TRUE)
d2 <- DotPlot(G172.M3.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.both.p1,d2)

################### clean M3 ##########################
G172.M3.clean.both <- subset(G172.hashtag, cells= joint.bcs.M3.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M3.clean.both$stim  <- "G172M3_both"
DefaultAssay(G172.M3.clean.both) <- "RNA"
G172.M3.clean.both <- SCTransform(G172.M3.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3.clean.both <- NormalizeData(G172.M3.clean.both, verbose = FALSE)
G172.M3.clean.both <- FindVariableFeatures(G172.M3.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3.clean.both <- ScaleData(G172.M3.clean.both, features = VariableFeatures(G172.M3.clean.both))
# Run PCA
G172.M3.clean.both <- RunPCA(G172.M3.clean.both,npcs = 30, features = VariableFeatures(G172.M3.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3.clean.both <- RunUMAP(G172.M3.clean.both, reduction = "pca", dims = 1:30)
G172.M3.clean.both <- FindNeighbors(G172.M3.clean.both, reduction = "pca", dims = 1:30)
G172.M3.clean.both <- FindClusters(G172.M3.clean.both, resolution = 0.4)   
G172.M3.clean.both.p1<- UMAPPlot(G172.M3.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M3.clean.both.p1
DefaultAssay(G172.M3.clean.both) <- "RNA"
G172.M3.clean.both <- NormalizeData(G172.M3.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M3.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.clean.both.p1,d2)

######################## both top80 M3 #################3
G172.M3.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M3.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M3.both.top80$stim  <- "G172M3_both"
DefaultAssay(G172.M3.both.top80) <- "RNA"
G172.M3.both.top80 <- SCTransform(G172.M3.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3.both.top80 <- NormalizeData(G172.M3.both.top80, verbose = FALSE)
G172.M3.both.top80 <- FindVariableFeatures(G172.M3.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3.both.top80 <- ScaleData(G172.M3.both.top80, features = VariableFeatures(G172.M3.both.top80))
# Run PCA
G172.M3.both.top80 <- RunPCA(G172.M3.both.top80,npcs = 30, features = VariableFeatures(G172.M3.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3.both.top80 <- RunUMAP(G172.M3.both.top80, reduction = "pca", dims = 1:30)
G172.M3.both.top80 <- FindNeighbors(G172.M3.both.top80, reduction = "pca", dims = 1:30)
G172.M3.both.top80 <- FindClusters(G172.M3.both.top80, resolution = 0.4)   
G172.M3.both.top80.p1<- UMAPPlot(G172.M3.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M3.both.top80.p1
DefaultAssay(G172.M3.both.top80) <- "RNA"
G172.M3.both.top80 <- NormalizeData(G172.M3.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M3.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.both.top80.p1,d2)


############################
#G172.M4.both <- subset(G172.hashtag, cells= joint.bcs.M4.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#G172.M4.both <- subset(G172.hashtag.TCPO, cells= joint.bcs.M4.both)
G172.M4.both <- subset(G172.hashtag.sex.TCPO.batch, cells= joint.bcs.M4.both)
G172.M4.both$stim  <- "G172M4_both"
DefaultAssay(G172.M4.both) <- "RNA"
G172.M4.both <- SCTransform(G172.M4.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4.both <- NormalizeData(G172.M4.both, verbose = FALSE)
G172.M4.both <- FindVariableFeatures(G172.M4.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4.both <- ScaleData(G172.M4.both, features = VariableFeatures(G172.M4.both))
# Run PCA
G172.M4.both <- RunPCA(G172.M4.both,npcs = 30, features = VariableFeatures(G172.M4.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4.both <- RunUMAP(G172.M4.both, reduction = "pca", dims = 1:25)
G172.M4.both <- FindNeighbors(G172.M4.both, reduction = "pca", dims = 1:25)
G172.M4.both <- FindClusters(G172.M4.both, resolution = 0.35)   
G172.M4.both.p1<- UMAPPlot(G172.M4.both, reduction = "umap", label=TRUE, label.size=5)
G172.M4.both.p1
DefaultAssay(G172.M4.both) <- "RNA"
G172.M4.both <- NormalizeData(G172.M4.both, verbose = TRUE)
d2 <- DotPlot(G172.M4.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.both.p1,d2)


################################ clean M4 ##############
G172.M4.clean.both <- subset(G172.hashtag, cells= joint.bcs.M4.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M4.clean.both$stim  <- "G172M4_both"
DefaultAssay(G172.M4.clean.both) <- "RNA"
G172.M4.clean.both <- SCTransform(G172.M4.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4.clean.both <- NormalizeData(G172.M4.clean.both, verbose = FALSE)
G172.M4.clean.both <- FindVariableFeatures(G172.M4.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4.clean.both <- ScaleData(G172.M4.clean.both, features = VariableFeatures(G172.M4.clean.both))
# Run PCA
G172.M4.clean.both <- RunPCA(G172.M4.clean.both,npcs = 30, features = VariableFeatures(G172.M4.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4.clean.both <- RunUMAP(G172.M4.clean.both, reduction = "pca", dims = 1:30)
G172.M4.clean.both <- FindNeighbors(G172.M4.clean.both, reduction = "pca", dims = 1:30)
G172.M4.clean.both <- FindClusters(G172.M4.clean.both, resolution = 0.25)   
G172.M4.clean.both.p1<- UMAPPlot(G172.M4.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M4.clean.both.p1
DefaultAssay(G172.M4.clean.both) <- "RNA"
G172.M4.clean.both <- NormalizeData(G172.M4.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M4.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.clean.both.p1,d2)

############# M4 top 80 ###########3
G172.M4.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M4.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M4.both.top80$stim  <- "G172M4_both"
DefaultAssay(G172.M4.both.top80) <- "RNA"
G172.M4.both.top80 <- SCTransform(G172.M4.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4.both.top80 <- NormalizeData(G172.M4.both.top80, verbose = FALSE)
G172.M4.both.top80 <- FindVariableFeatures(G172.M4.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4.both.top80 <- ScaleData(G172.M4.both.top80, features = VariableFeatures(G172.M4.both.top80))
# Run PCA
G172.M4.both.top80 <- RunPCA(G172.M4.both.top80,npcs = 30, features = VariableFeatures(G172.M4.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4.both.top80 <- RunUMAP(G172.M4.both.top80, reduction = "pca", dims = 1:25)
G172.M4.both.top80 <- FindNeighbors(G172.M4.both.top80, reduction = "pca", dims = 1:25)
G172.M4.both.top80 <- FindClusters(G172.M4.both.top80, resolution = 0.25)   
G172.M4.both.top80.p1<- UMAPPlot(G172.M4.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M4.both.top80.p1
DefaultAssay(G172.M4.both.top80) <- "RNA"
G172.M4.both.top80 <- NormalizeData(G172.M4.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M4.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.both.top80.p1,d2)



############### both M1 + m3 ###########33

G172.M6.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M6.both <- merge(G172.M1.both, G172.M3.both)
DefaultAssay(G172.M6.both) <- "RNA"
#G172.M6.both <- SCTransform(G172.M6.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M6.both <- NormalizeData(G172.M6.both, verbose = FALSE)
G172.M6.both <- FindVariableFeatures(G172.M6.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M6.both <- ScaleData(G172.M6.both, features = VariableFeatures(G172.M6.both))
# Run PCA
G172.M6.both <- RunPCA(G172.M6.both,npcs = 30, features = VariableFeatures(G172.M6.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M6.both <- RunUMAP(G172.M6.both, reduction = "pca", dims = 1:30)
G172.M6.both <- FindNeighbors(G172.M6.both, reduction = "pca", dims = 1:30)
G172.M6.both <- FindClusters(G172.M6.both, resolution = 0.25 )   
G172.M6.both.p1<- UMAPPlot(G172.M6.both, reduction = "umap", label=TRUE, label.size=5)
G172.M6.both.p1
G172.M6.both.p2 <- UMAPPlot(G172.M6.both, reduction = "umap", group="stim", label=TRUE, label.size=5)
G172.M6.both.p2
DefaultAssay(G172.M6.both) <- "RNA"
G172.M6.both <- NormalizeData(G172.M6.both, verbose = TRUE)
d6 <- DotPlot(G172.M6.both, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M6.both.p1,d6)

# find markers between the male and female cluster hepatocytes #################
DE.sex.hep <- FindMarkers(G172.M6.both, ident.1 = 0, ident.2 = 1 ,verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
write.table(DE.sex.hep, "DE_Analysis/DE_sex_hep_G173_M1-M3_for_sex_batch_depleted", sep="\t")

###################33 both M2 + M4 ####
G172.M7.both <- subset(G172.hashtag, cells = c(joint.bcs.M2.both,joint.bcs.M4.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M7.both <- merge(G172.M2.both, G172.M4.both)
DefaultAssay(G172.M7.both) <- "RNA"
#G172.M7.both <- SCTransform(G172.M7.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M7.both <- NormalizeData(G172.M7.both, verbose = FALSE)
G172.M7.both <- FindVariableFeatures(G172.M7.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M7.both <- ScaleData(G172.M7.both, features = VariableFeatures(G172.M7.both))
# Run PCA
G172.M7.both <- RunPCA(G172.M7.both,npcs = 30, features = VariableFeatures(G172.M7.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M7.both <- RunUMAP(G172.M7.both, reduction = "pca", dims = 1:25)
G172.M7.both <- FindNeighbors(G172.M7.both, reduction = "pca", dims = 1:25)
G172.M7.both <- FindClusters(G172.M7.both, resolution = 0.3 )   
G172.M7.both.p1<- UMAPPlot(G172.M7.both, reduction = "umap", label=TRUE, label.size=5)
G172.M7.both.p1
G172.M7.both.p2 <- UMAPPlot(G172.M7.both, reduction = "umap", group="stim", label=TRUE, label.size=5)
G172.M7.both.p2
DefaultAssay(G172.M7.both) <- "RNA"
G172.M7.both <- NormalizeData(G172.M7.both, verbose = TRUE)
d7 <- DotPlot(G172.M7.both, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M7.both.p1,d7)
DE.sex.hep.M7 <- FindMarkers(G172.M7.both, ident.1 = 0, ident.2 = 1 ,verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
write.table(DE.sex.hep.M7, "DE_Analysis/DE_sex_hep_G172_M2-M4_for_sexdepleated", sep="\t")


######################## clean M8 (M1+M3) #########################
G172.M8.clean.both <- subset(G172.hashtag, cells= c(joint.bcs.M1.clean.both,joint.bcs.M3.clean.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M8.clean.both$stim  <- "G172M8_both"
DefaultAssay(G172.M8.clean.both) <- "RNA"
G172.M8.clean.both <- SCTransform(G172.M8.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M8.clean.both <- NormalizeData(G172.M8.clean.both, verbose = FALSE)
G172.M8.clean.both <- FindVariableFeatures(G172.M8.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M8.clean.both <- ScaleData(G172.M8.clean.both, features = VariableFeatures(G172.M8.clean.both))
# Run PCA
G172.M8.clean.both <- RunPCA(G172.M8.clean.both,npcs = 30, features = VariableFeatures(G172.M8.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M8.clean.both <- RunUMAP(G172.M8.clean.both, reduction = "pca", dims = 1:30)
G172.M8.clean.both <- FindNeighbors(G172.M8.clean.both, reduction = "pca", dims = 1:30)
G172.M8.clean.both <- FindClusters(G172.M8.clean.both, resolution = 0.25)   
G172.M8.clean.both.p1<- UMAPPlot(G172.M8.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M8.clean.both.p1
DefaultAssay(G172.M8.clean.both) <- "RNA"
G172.M8.clean.both <- NormalizeData(G172.M8.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M8.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M8.clean.both.p1,d2)

#####3 clean M9 (M2+ M4)#####33
G172.M9.clean.both <- subset(G172.hashtag, cells= c(joint.bcs.M2.clean.both,joint.bcs.M4.clean.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M9.clean.both$stim  <- "G172M9_both"
DefaultAssay(G172.M9.clean.both) <- "RNA"
G172.M9.clean.both <- SCTransform(G172.M9.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M9.clean.both <- NormalizeData(G172.M9.clean.both, verbose = FALSE)
G172.M9.clean.both <- FindVariableFeatures(G172.M9.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M9.clean.both <- ScaleData(G172.M9.clean.both, features = VariableFeatures(G172.M9.clean.both))
# Run PCA
G172.M9.clean.both <- RunPCA(G172.M9.clean.both,npcs = 30, features = VariableFeatures(G172.M9.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M9.clean.both <- RunUMAP(G172.M9.clean.both, reduction = "pca", dims = 1:30)
G172.M9.clean.both <- FindNeighbors(G172.M9.clean.both, reduction = "pca", dims = 1:30)
G172.M9.clean.both <- FindClusters(G172.M9.clean.both, resolution = 0.25)   
G172.M9.clean.both.p1<- UMAPPlot(G172.M9.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M9.clean.both.p1
DefaultAssay(G172.M9.clean.both) <- "RNA"
G172.M9.clean.both <- NormalizeData(G172.M9.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M9.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M9.clean.both.p1,d2)

################### M1+ M3 both top 80

G172.M10.both.top80 <- subset(G172.hashtag, cells= c(joint.bcs.M1.both.top80,joint.bcs.M3.both.top80), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M10.both.top80$stim  <- "G172M10_both"
DefaultAssay(G172.M10.both.top80) <- "RNA"
G172.M10.both.top80 <- SCTransform(G172.M10.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M10.both.top80 <- NormalizeData(G172.M10.both.top80, verbose = FALSE)
G172.M10.both.top80 <- FindVariableFeatures(G172.M10.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M10.both.top80 <- ScaleData(G172.M10.both.top80, features = VariableFeatures(G172.M10.both.top80))
# Run PCA
G172.M10.both.top80 <- RunPCA(G172.M10.both.top80,npcs = 30, features = VariableFeatures(G172.M10.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M10.both.top80 <- RunUMAP(G172.M10.both.top80, reduction = "pca", dims = 1:30)
G172.M10.both.top80 <- FindNeighbors(G172.M10.both.top80, reduction = "pca", dims = 1:30)
G172.M10.both.top80 <- FindClusters(G172.M10.both.top80, resolution = 0.25)   
G172.M10.both.top80.p1<- UMAPPlot(G172.M10.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M10.both.top80.p1
DefaultAssay(G172.M10.both.top80) <- "RNA"
G172.M10.both.top80 <- NormalizeData(G172.M10.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M10.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M10.both.top80.p1,d2)


############ M2+ M4 top 80
G172.M11.both.top80 <- subset(G172.hashtag, cells= c(joint.bcs.M2.both.top80,joint.bcs.M4.both.top80), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M11.both.top80$stim  <- "G172M11_both"
DefaultAssay(G172.M11.both.top80) <- "RNA"
G172.M11.both.top80 <- SCTransform(G172.M11.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M11.both.top80 <- NormalizeData(G172.M11.both.top80, verbose = FALSE)
G172.M11.both.top80 <- FindVariableFeatures(G172.M11.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M11.both.top80 <- ScaleData(G172.M11.both.top80, features = VariableFeatures(G172.M11.both.top80))
# Run PCA
G172.M11.both.top80 <- RunPCA(G172.M11.both.top80,npcs = 30, features = VariableFeatures(G172.M11.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M11.both.top80 <- RunUMAP(G172.M11.both.top80, reduction = "pca", dims = 1:25)
G172.M11.both.top80 <- FindNeighbors(G172.M11.both.top80, reduction = "pca", dims = 1:25)
G172.M11.both.top80 <- FindClusters(G172.M11.both.top80, resolution = 0.25)   
G172.M11.both.top80.p1<- UMAPPlot(G172.M11.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M11.both.top80.p1
DefaultAssay(G172.M11.both.top80) <- "RNA"
G172.M11.both.top80 <- NormalizeData(G172.M11.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M11.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M11.both.top80.p1,d2)


#################################### M1+M2+M3+M4 sex delpetec genes #########################
#G172.M12.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M12.both <- merge(G172.M6.both, G172.M7.both)
#G172.M12.both <- subset(G172.M12.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)

DefaultAssay(G172.M12.both) <- "RNA"
#G172.M12.both <- SCTransform(G172.M12.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M12.both <- NormalizeData(G172.M12.both, verbose = FALSE)
G172.M12.both <- FindVariableFeatures(G172.M12.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M12.both <- ScaleData(G172.M12.both, features = VariableFeatures(G172.M12.both))
# Run PCA
G172.M12.both <- RunPCA(G172.M12.both,npcs = 30, features = VariableFeatures(G172.M12.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M12.both <- RunUMAP(G172.M12.both, reduction = "pca", dims = 1:30)
G172.M12.both <- FindNeighbors(G172.M12.both, reduction = "pca", dims = 1:30)
G172.M12.both <- FindClusters(G172.M12.both, resolution = 0.25 )   
G172.M12.both.p1<- UMAPPlot(G172.M12.both, reduction = "umap", label=TRUE, label.size=5)
G172.M12.both.p1
G172.M12.both.p2 <- UMAPPlot(G172.M12.both, reduction = "umap", group="stim", label=TRUE, label.size=5)
G172.M12.both.p2
plot_grid(G172.M12.both.p1, G172.M12.both.p2)
FeaturePlot(G172.M12.both, features = "Cyp2c55")
FeaturePlot(G172.M12.both, features = "ncRNA-inter-chrX-15394")
d12 <- DotPlot(G172.M12.both, features = c(all_genes_new))+RotatedAxis()

#################################### G172 M1 +M2+M3+M4 ######################
#G172.M13.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both,joint.bcs.M2.both,joint.bcs.M4.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M13.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both,joint.bcs.M2.both,joint.bcs.M4.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M13.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both,joint.bcs.M2.both,joint.bcs.M4.both))

DefaultAssay(G172.M13.both) <- "RNA"
#G172.M13.both <- SCTransform(G172.M13.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M13.both <- NormalizeData(G172.M13.both, verbose = FALSE)
G172.M13.both <- FindVariableFeatures(G172.M13.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M13.both <- ScaleData(G172.M13.both, features = VariableFeatures(G172.M13.both))
# Run PCA
G172.M13.both <- RunPCA(G172.M13.both,npcs = 30, features = VariableFeatures(G172.M13.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M13.both <- RunUMAP(G172.M13.both, reduction = "pca", dims = 1:30)
G172.M13.both <- FindNeighbors(G172.M13.both, reduction = "pca", dims = 1:30)
G172.M13.both <- FindClusters(G172.M13.both, resolution = 0.25 )   
G172.M13.both.p1<- UMAPPlot(G172.M13.both, reduction = "umap", label=TRUE, label.size=5)
plot_grid(G172.M13.both.p1, G172.M13.both.p2)
FeaturePlot(G172.M13.both, features = "Cyp2c55")
FeaturePlot(G172.M13.both, features = "ncRNA-inter-chrX-15394")
d13 <- DotPlot(G172.M13.both, features = c(all_genes_new))+RotatedAxis()
plot_grid(G172.M13.both.p1, d13)
FeaturePlot(G172.M13.both, features = "Cyp2c55")
FeaturePlot(G172.M13.both, features = "ncRNA-inter-chrX-15394")
########## function load_tissue_droplet############
droplet_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(droplet_metadata)[1] <- "channel"
tissue_metadata = filter(droplet_metadata, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]

raw.data <- Read10X("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/Liver-10X_P4_2/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata$channel[1],'_',x))
  meta.data1 = data.frame(row.names = colnames(raw.data))
  meta.data1['channel'] = tissue_metadata$channel[1]

  if (length(tissue_metadata$channel) > 1){
    # Some tissues, like Thymus and Heart had only one channel
    for(i in 2:nrow(tissue_metadata)){
subfolder = paste0("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/",tissue_of_interest, '-', tissue_metadata$channel[i])
      new.data1 <- Read10X(data.dir = subfolder)
      colnames(new.data1) <- lapply(colnames(new.data1), function(x) paste0(tissue_metadata$channel[i],'_', x))
      
      new.metadata1 = data.frame(row.names = colnames(new.data1))
      new.metadata1['channel'] = tissue_metadata$channel[i]
      
      raw.data = cbind(raw.data, new.data1)
      meta.data1 = rbind(meta.data1, new.metadata1)
    }
  }
  
  rnames = row.names(meta.data1)
  meta.data1 <- merge(meta.data1, tissue_metadata, sort = F)
  row.names(meta.data1) <- rnames
  # Order the cells alphabetically to ensure consistency.
    ordered_cell_names = order(colnames(raw.data))
  raw.data = raw.data[,ordered_cell_names]
  meta.data1 = meta.data1[ordered_cell_names,]
    # Find ERCC's, compute the percent ERCC, and drop them from the raw data.
  erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
  percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
  ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
  raw.data <- raw.data[-ercc.index,]
  
  # Create the Seurat object with all the data
  droplet <- CreateSeuratObject(raw.data)   # dropseq
  droplet <- AddMetaData(object = droplet, meta.data1) 
  droplet@meta.data$tech <- "droplet"

#n.pcs = 10
  #droplet <- SubsetData(droplet,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000))  # old version of seurat
droplet <-  subset(droplet, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
droplet <- NormalizeData(droplet, verbose = FALSE)
droplet <- FindVariableFeatures(droplet, selection.method = "vst", nfeatures = 2000)
droplet <- ScaleData(droplet, verbose = FALSE)
#droplet <- RunPCA(droplet, npcs = 10, verbose = FALSE)
droplet$stim <- "droplet"
droplet$cond <- "ctrl"
# droplet <- ScaleData(droplet, verbose = FALSE)
 droplet <- RunPCA(droplet, npcs = 30, verbose = FALSE)
 droplet <- RunUMAP(droplet, reduction = "pca", dims = 1:25)
 droplet <- FindNeighbors(droplet, reduction = "pca", dims = 1:10)
 droplet <- FindClusters(droplet, resolution = 0.5 )   
 p1<- UMAPPlot(droplet, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
 p2 <- UMAPPlot(droplet, label=TRUE, label.size=6)
 p3<- UMAPPlot(droplet, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)

res.used <- 1
#droplet <- FindClusters(object = droplet, reduction.type = "pca", dims.use = 1:n.pcs, resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

droplet <- RunTSNE(object = droplet, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = droplet, do.label = T, pt.size = 1.2, label.size = 4)


droplet <- RenameIdents(droplet, `0` = "Hep-Mid-M", `1` = "Hep-PC-F", `2` = "Hep-PP-F",`3` = "Hep-PP-M", `4` = "Hep-Mid-M", `5` = "Hep-PC-M", `6` = "Hep-Mid-F", `7` = "Hep-Mid-F", `8` = "Endo-F", `9` = "Bileduct-F")



### this is tranformation option for develpment SCtransform program #######
droplet <- SCTransform(droplet,verbose =TRUE)


######3 Smartseq #########################

plate_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Liver_facs_annotation.csv", sep=",",  header = TRUE)
colnames(plate_metadata)[1] <- "plate.barcode"
  
raw.data = read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/liver_facs_scrna_data.csv", sep=",", row.names=1)
colnames(plate_metadata)[1] <- "plate.barcode"
  
plate.barcodes = lapply(colnames(raw.data), function(x) strsplit(strsplit(x, "_")[[1]][1], '.', fixed=TRUE)[[1]][2])

  barcode.df = t.data.frame(as.data.frame(plate.barcodes))
  
  rownames(barcode.df) = colnames(raw.data)
  barcode.df= cbind(barcode.df, colnames(raw.data))
  colnames(barcode.df) = c('plate.barcode1', 'plate.barcode')
  
  rnames = row.names(barcode.df)
  meta.data <- merge(barcode.df, plate_metadata, by='plate.barcode', sort = F)
  row.names(meta.data) <- rnames
    
  # Sort cells by cell name
  meta.data = meta.data[order(rownames(meta.data)), ]
  raw.data = raw.data[,rownames(meta.data)]
  
  # Create the Seurat object with all the data
  smartseq <- CreateSeuratObject(raw.data)
  smartseq <- AddMetaData(object = smartseq, meta.data)
  #smartseq@meta.data$tech <- "smartseq"
  smartseq$stim <- "smartseq"
  smartseq$cond <- "ctrl"
#smartseq <- SubsetData(smartseq,subset.names = c("nGene", "nUMI"),low.thresholds = c(500, 1000))  #old version of seurat
smartseq<-  subset(smartseq, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#smartseq <- SCTransform(smartseq)
smartseq <- NormalizeData(smartseq, verbose = FALSE)
smartseq <- FindVariableFeatures(smartseq, selection.method = "vst", nfeatures = 2000)
smartseq <- ScaleData(smartseq, verbose = FALSE)
smartseq <- RunPCA(smartseq, npcs = 30, verbose = FALSE)
smartseq <- RunUMAP(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindNeighbors(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindClusters(smartseq, resolution = 0.5 )   
#p4<- UMAPPlot(smartseq, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
p5 <- UMAPPlot(smartseq, label=TRUE, label.size=6)
p6<- UMAPPlot(smartseq, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)

########## sctransform ###################3
smartseq <- SCTransform(smartseq)

############################### Integration Regular workflow ############################
anchors <- FindIntegrationAnchors(object.list = c(droplet.list, smartseq.list,G171B.list, G172.M1.list, G172.M2.list), dims = 1:50, anchor.features = 3000)
combined <- IntegrateData(anchorset = anchors, dims = 1:50)    

DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
                                                    
# t-SNE and Clustering
combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20)
combined <- FindClusters(combined, resolution = 0.5 )   
#combined <- RunTSNE(combined, reduction = "pca", dims = 1:20)
    
 # Visualization
p1 <- DimPlot(combined, reduction = "umap", group.by = "stim")
p2 <- DimPlot(combined, reduction = "umap", group.by = "mouse.sex")
p3 <- DimPlot(combined, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(combined, label=TRUE)
plot_grid(p2, p3,p4) 
DimPlot(combined, reduction = "umap", split.by = "stim")   

p5 <- DimPlot(combined, reduction = "tsne", group.by = "stim")
p6 <- DimPlot(combined, reduction = "tsne", group.by = "mouse.sex")
p7 <- DimPlot(combined, reduction = "tsne", label = TRUE)
p8 <- TSNEPlot(combined, label =T)
plot_grid(p6, p7,p8) 
DimPlot(combined, reduction = "tsne", split.by = "stim") 

#################################### Refernce based Integration ################################
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list1 <- c(droplet.list)
ctrl.list <- c(droplet.list, smartseq.list)

for (i in names(ctrl.list)) {
    ctrl.list[[i]] <- SCTransform(ctrl.list[[i]], verbose =TRUE)
}

ctrl.list1 <- c(ctrl.list, G171B.list)
#dims = 1:50
ref.features <- SelectIntegrationFeatures(object.list = ctrl.list1, dims=1:50, anchor.features = 3000)
ref.list <- PrepSCTIntegration(object.list = ctrl.list1, anchor.features = ref.features)
ref.anchors <- FindIntegrationAnchors(object.list = ref.list, normalization.method = "SCT", anchor.features = ref.features, reference = c(1,2,3))
ref.integrated <- IntegrateData(anchorset = ref.anchors, normalization.method = "SCT")

DefaultAssay(ref.integrated) <- "integrated"
ref.integrated <- ScaleData(ref.integrated, verbose = FALSE)
ref.integrated <- RunPCA(object = ref.integrated, npcs=30,verbose = FALSE)
ref.integrated <- RunUMAP(object = ref.integrated ,dims = 1:30, seed.use = 10, perplexity=30)
ref.integrated <- FindNeighbors(ref.integrated, reduction = "pca", dims = 1:30)
ref.integrated <- FindClusters(ref.integrated, resolution = 0.2 )   

r1 <- UMAPPlot(ref.integrated, split.by="stim")
r2 <- UMAPPlot(ref.integrated, label=TRUE)
r3 <- UMAPPlot(ref.integrated, split.by = c("mouse.sex"))


plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, 
    byrow = TRUE, override.aes = list(size = 2.5))))
CombinePlots(plots)


ref.integrated$celltype.stim <- paste(Idents(ref.integrated), ref.integrated$stim, sep = "_")
ref.integrated$seurat_clusters <- Idents(ref.integrated)
Idents(ref.integrated) <- "celltype.stim"

fp <- FeaturePlot(ref.integrated, features = "ncRNA-as-chr10-8460", cols = c("lightgrey","darkred"), order = TRUE,  shape.by = "stim", pt.size = 2)

DefaultAssay(ref.integrated) <-"RNA"
ref.integrated <- NormalizeData(ref.integrated)
ref.integrated <- ScaleData(ref.integrated)

dot1 <- DotPlot(ref.integrated, features= all_genes)+RotatedAxis()
d1 <- DoHeatmap(ref.integrated, features = c(all_genes,"Mup20","Xist"), group.by = "seurat_clusters", assay= 'RNA')
#d1 <- DoHeatmap(ref.integrated, features = highTPM, group.by = "celltype.stim", assay= 'RNA',raster = F, disp.max = 0.5)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)


############ merge G172 M1 #############33
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list.2 <- c( G172.M1.list, G172.M2.list)
G172.M1.merge.list <- SplitObject(G172.M1.merge, split.by = "stim")
G172.M2.merge.list <- SplitObject(G172.M2.merge, split.by = "stim")

merged.list <- c(G172.M1.merge.list, G172.M2.merge.list)

for (i in names(ctrl.list.2)) {
    ctrl.list.2[[i]] <- SCTransform(ctrl.list.2[[i]], verbose =TRUE)
}

ref.features.1 <- SelectIntegrationFeatures(object.list = ctrl.list.2, dims = 1:50, anchor.features = 3000)
ref.list.1 <- PrepSCTIntegration(object.list = ctrl.list.2, anchor.features = ref.features.1)


ref.anchors.1 <- FindIntegrationAnchors(object.list = ref.list.1, normalization.method = "SCT", 
    anchor.features = ref.features.1, reference = c(1,2))
ref.integrated.1 <- IntegrateData(anchorset = ref.anchors.1, normalization.method = "SCT")

DefaultAssay(ref.integrated.1) <- "integrated"
ref.integrated.1 <- ScaleData(ref.integrated.1, verbose = FALSE)
ref.integrated.1 <- RunPCA(object = ref.integrated.1, npcs=30,verbose = FALSE)
ref.integrated.1 <- RunUMAP(object = ref.integrated.1, dims = 1:30, seed.use = 10 , perplexity=30)
#ref.integrated.1 <- RunTSNE(object = ref.integrated.1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
ref.integrated.1 <- FindNeighbors(ref.integrated.1, reduction = "pca", dims = 1:30)
ref.integrated.1 <- FindClusters(ref.integrated.1, resolution = 0.2)   

rG172M1.1 <- UMAPPlot(ref.integrated.1, split.by="stim", label=T)
rG172M1.2 <- UMAPPlot(ref.integrated.1, label=TRUE, combine = FALSE)
rG172M1.3 <- UMAPPlot(ref.integrated.1, split.by = c("mouse.sex"))

rG172M1.4 <- TSNEPlot(ref.integrated.1, split.by="stim")


DefaultAssay(ref.integrated.1) <-"RNA"
ref.integrated.1 <- NormalizeData(ref.integrated.1)
ref.integrated.1 <- ScaleData(ref.integrated.1)

DoHeatmap(ref.integrated.1, features = c(all_genes,'Sox9','Mup20'), group.by = "seurat_clusters", assay= 'RNA' ,raster = F)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)


ref.integrated.1$celltype <- RenameIdents(ref.integrated.1, c(`0` = "PP", `1` = "PC", `2` = "Endo",`3` = "HSC", `4` = "Kupffer", `5` = "Dividing", `6` = "Immune", `7` = "B-NK", `8` = "NA"))


ref.integrated.1.markers <- FindAllMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers  %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)

write.csv(ref.integrated.1.markers, "ref_integrated_G172M1-M2_Allmarker")

ref.integrated.1$celltype.stim <- paste(Idents(ref.integrated.1), ref.integrated.1$stim, sep = "_")
ref.integrated.1$celltype <- Idents(ref.integrated.1)
Idents(ref.integrated.1) <- "celltype.stim"

DoHeatmap(ref.integrated.1, features = TPM4, group.by = "seurat_clusters", assay= 'RNA' )

ref.integrated.1.markers <- FindMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers  %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)


##################################################################################################3

for (i in names(merged.list)) {
    merged.list[[i]] <- SCTransform(merged.list[[i]], verbose =TRUE)
}
ref.features.2 <- SelectIntegrationFeatures(object.list = merged.list, dims = 1:50, anchor.features = 3000)
ref.list.2 <- PrepSCTIntegration(object.list = merged.list, anchor.features = ref.features.2)


ref.anchors.2 <- FindIntegrationAnchors(object.list = ref.list.2, normalization.method = "SCT", 
    anchor.features = ref.features.2, reference = 1)
ref.integrated.2 <- IntegrateData(anchorset = ref.anchors.2, normalization.method = "SCT")

DefaultAssay(ref.integrated.2) <- "integrated"
ref.integrated.2 <- ScaleData(ref.integrated.2, verbose = FALSE)
ref.integrated.2 <- RunPCA(object = ref.integrated.2, npcs=30,verbose = FALSE)
ref.integrated.2 <- RunUMAP(object = ref.integrated.2, dims = 1:30, seed.use = 10 , perplexity=30)
ref.integrated.2 <- RunTSNE(object = ref.integrated.2, dims.use = 1:30, seed.use = 10, perplexity=30)
ref.integrated.2 <- FindNeighbors(ref.integrated.2, reduction = "pca", dims = 1:30)
ref.integrated.2 <- FindClusters(ref.integrated.2, resolution = 0.5)   

rG172M1.1 <- UMAPPlot(ref.integrated.2, split.by="stim", label=T)
rG172M1.2 <- UMAPPlot(ref.integrated.2, label=TRUE)
rG172M1.3 <- TSNEPlot(ref.integrated.2, split.by="stim")
rG172M1.4 <- TSNEPlot(ref.integrated.2, label=TRUE)


DefaultAssay(ref.integrated.2) <-"RNA"
ref.integrated.2 <- NormalizeData(ref.integrated.2)
ref.integrated.2 <- ScaleData(ref.integrated.2)
d3 <- DotPlot(ref.integrated.2, features = all_genes)+ RotatedAxis()
plot_grid(rG172M1.2,rG172M1.1 ,d3)
G172.M1.list <- SplitObject(G172.M1.both, split.by = "stim")
G172.M3.list <- SplitObject(G172.M3.both, split.by = "stim")
merged.list <- c(G172.M1.list, G172.M3.list)

for (i in names(merged.list)) {
   merged.list[[i]] <- SCTransform(merged.list[[i]], verbose =TRUE)
}

ref.features.1 <- SelectIntegrationFeatures(object.list = merged.list, dims = 1:50, anchor.features = 2000)
ref.list.1 <- PrepSCTIntegration(object.list = merged.list, anchor.features = ref.features.1)

ref.anchors.1 <- FindIntegrationAnchors(object.list = ref.list.1, normalization.method = "SCT", 
    anchor.features = ref.features.1)
ref.integrated.1 <- IntegrateData(anchorset = ref.anchors.1, normalization.method = "SCT")

DefaultAssay(ref.integrated.1) <- "integrated"
ref.integrated.1 <- ScaleData(ref.integrated.1, verbose = FALSE)
ref.integrated.1 <- RunPCA(object = ref.integrated.1, npcs=30,verbose = FALSE)
ref.integrated.1 <- RunUMAP(object = ref.integrated.1, dims = 1:30, seed.use = 10 , perplexity=30)
#ref.integrated.1 <- RunTSNE(object = ref.integrated.1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
ref.integrated.1 <- FindNeighbors(ref.integrated.1, reduction = "pca", dims = 1:30)
ref.integrated.1 <- FindClusters(ref.integrated.1, resolution = 0.2)   

rG172M1.1 <- UMAPPlot(ref.integrated.1, group.by="stim")
rG172M1.2 <- UMAPPlot(ref.integrated.1, label=TRUE)
#rG172M1.4 <- TSNEPlot(ref.integrated.1, split.by="stim")

DefaultAssay(ref.integrated.1) <-"RNA"
ref.integrated.1 <- NormalizeData(ref.integrated.1)
ref.integrated.1 <- ScaleData(ref.integrated.1)
d3 <- DotPlot(ref.integrated.1, features = all_genes)+ RotatedAxis()
plot_grid(rG172M1.2,rG172M1.1 ,d3)

##########3 use the standar integration workflow ##################
anchors <- FindIntegrationAnchors(object.list = merged.list, dims = 1:50, anchor.features = 2000)
#anchors <- FindIntegrationAnchors(object.list = list(lnc5998, droplet1), dims = 1:50, anchor.features = 3000)
combined <- IntegrateData(anchorset = anchors, dims = 1:50)    
DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
                                                    
# t-SNE and Clustering
combined <- RunUMAP(combined, reduction = "pca", dims = 1:30)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:30)
combined <- FindClusters(combined, resolution = 0.2 )   

 # Visualization
p1 <- UMAPPlot(combined, reduction = "umap", group.by = "stim")
p4 <- UMAPPlot(combined, label=TRUE, label.size=6)

DefaultAssay(combined) <-"RNA"
combined <- NormalizeData(combined)
combined <- ScaleData(combined)
d3 <- DotPlot(combined, features = all_genes)+ RotatedAxis()
plot_grid(p1,p4 ,d3)

G172M1 and G172 M2 DE analysis


DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1" ,"1_G172M1", "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1", "8_G172M1"), ident.2 = c("0_G172M2" ,"1_G172M2", "2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2", "8_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_All_M1_M2 <- DE1.1 
write.csv(DE_All_M1_M2, "/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/marker/DE_G172_All_M1_M2")
DE_hep_NPC <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M2" ,"1_G172M2"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_NPC_M1_M2 <- FindMarkers(ref.integrated.1, ident.1 = c( "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_Hep0.1_NPC_M2 <- FindMarkers(ref.integrated.1, ident.1 = c( "0_G172M2" ,"1_G172M2"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

write.csv(DE_Hep0.1_NPC_M2, "G172_markers/DE_Hep01_NPC_M2")

DE0.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1"), ident.2 = c("0_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("1_G172M1"), ident.2 = c("1_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE2.1 <- FindMarkers(ref.integrated.1, ident.1 = c("2_G172M1"), ident.2 = c("2_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE3.1 <- FindMarkers(ref.integrated.1, ident.1 = c("3_G172M1"), ident.2 = c("3_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE4.1 <- FindMarkers(ref.integrated.1, ident.1 = c("4_G172M1"), ident.2 = c("4_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE5.1 <- FindMarkers(ref.integrated.1, ident.1 = c("5_G172M1"), ident.2 = c("5_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE6.1 <- FindMarkers(ref.integrated.1, ident.1 = c("6_G172M1"), ident.2 = c("6_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE7.1 <- FindMarkers(ref.integrated.1, ident.1 = c("7_G172M1"), ident.2 = c("7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE0.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1"), ident.2 = c("0_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
################# G171 TCPO expsosed ########
G171B_metadata_G171B <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/G171_metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(G171B_metadata_G171B)[1] <- "channel"
tissue_metadata_G171B = filter(G171B_metadata_G171B, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]

raw.data <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/Transcript_Refined/Liver-10X_G171B/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata_G171B$channel[1],'_',x))
  meta.data1 = data.frame(row.names = colnames(raw.data))
  meta.data1['channel'] = tissue_metadata_G171B$channel[1]
  
  rnames = row.names(meta.data1)
  meta.data1 <- merge(meta.data1, tissue_metadata_G171B, sort = F)
  row.names(meta.data1) <- rnames
  # Order the cells alphabetically to ensure consistency.
  
  ordered_cell_names = order(colnames(raw.data))
  raw.data = raw.data[,ordered_cell_names]
  meta.data1 = meta.data1[ordered_cell_names,]
  
  # Find ERCC's, compute the percent ERCC, and drop them from the raw data.
  erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
  percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
  ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
  raw.data <- raw.data[-ercc.index,]
  
  ncRNA.genes <- grep(pattern = "^ncRNA", x = rownames(x = raw.data), value = TRUE)
  percent.ncRNA <- Matrix::colSums(raw.data[ncRNA.genes, ])/Matrix::colSums(raw.data)
  KRAB.genes <- grep(pattern = "^KRAB", x = rownames(x = raw.data), value = TRUE)
  cherry.genes <- grep(pattern = "^mcherry", x = rownames(x = raw.data), value = TRUE)
  
  # Create the Seurat object with all the data
  G171B <- CreateSeuratObject(raw.data)   # dropseq
  G171B <- AddMetaData(object = G171B, meta.data1) 
  G171B@meta.data$tech <- "G171B"

#G171B <- SubsetData(G171B,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000))  # old version of seurat
#G171B <-  subset(G171B, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G171B <- NormalizeData(G171B, verbose = FALSE)
G171B <- FindVariableFeatures(G171B, selection.method = "vst", nfeatures = 2000)
G171B$stim <- "G171B"

### this is tranformation option for develpment SCtransform program #######
G171B <- SCTransform(G171B,verbose =TRUE)

diffusion plt code

# Before running MDS, we first calculate a distance matrix between all pairs of cells.  Here we
# use a simple euclidean distance metric on all genes, using scale.data as input
d <- dist(t(GetAssayData(combined, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
combined[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(combined))

# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(combined, reduction = "mds", pt.size = 0.5)
genes_hep_main =c('Alb', 'Ttr', 'Apoa1', 'Serpina1c')
genes_hep = c('Alb', 'Ttr', 'Apoa1', 'Serpina1c',
                   'Cyp2e1', 'Glul', 'Oat', 'Gulo',
                   'Ass1', 'Hamp', 'Gstp1', 'Ubb',
                   'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')
genes_endo = c('Pecam1', 'Nrp1', 'Kdr','Oit3','Igfbp7','Aqp1')
genes_kuppfer = c( 'Clec4f', 'Cd68')
genes_nk = c('Il2rb', 'Nkg7', 'Cxcr6', 'Gzma')
genes_b = c('Cd79a', 'Cd79b')
genes_bec = c('Epcam', 'Krt19', 'Krt7')
genes_immune = 'Ptprc'
HSC = c("Dcn","Lama1","Nes")
Dividing = "Top2a"
Bplasma= "Jchain"
Mac= "Csf1r"
Chol="Sox9"

Y_genes <- c("Uty","Ddx3y","Kdm5d","Eif2s3y",   "Gm47283")
sex <- c("Cyp2d9", "ncRNA-inter-chrX-15394","Cyp2c69", 'Mup20', 'Mup1','Mup12', 'Mup21', 'Cyp2d9')
F_sex <- c('Sult3a1', 'A1bg', 'Fmo3', 'Cyp2b9', 'Sult2a1','Cyp2b13')
all_genes = c(genes_hep, genes_endo, genes_kuppfer, Mac,genes_nk, genes_b, genes_bec, genes_immune, HSC, Dividing)
genes_bec_b_immune  = c(genes_bec,genes_b,genes_immune)
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
              'Ass1', 'Hamp', 'Gstp1', 'Ubb',
              'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')

All_sex_genes <- c('Mup11','Mup7','Elovl3','Cyp4a12a','Tff3','Scara5','Mup14','Gstp2','Cyp4a12b','Mup20','Mup12','Mup21','Cyp2d9','Mup1',
                   'Sult2a5','Sult2a2','Sult2a3','Cyp3a16','Sult2a6','Cyp3a41b','Cyp3a44','Cyp3a41a','Cyp2a4','Slc22a26','Sult3a1',
'Sult2a1','Cyp2b13','A1bg','Fmo3','Cyp2c69','Cyp2b9')

#All_sex_genes <- c('Mup11|Mup7|Elovl3|Cyp4a12a|Tff3|Scara5|Mup14|Gstp2|Cyp4a12b|Mup20|Mup12|Mup21|Cyp2d9|Mup1|
                  # Sult2a5|Sult2a2|Sult2a3|Cyp3a16|Sult2a6|Cyp3a41b|Cyp3a44|Cyp3a41a|Cyp2a4|Slc22a26|Sult3a1|
#Sult2a1|Cyp2b13|A1bg|Fmo3|Cyp2c69|Cyp2b9')


All_sex_genes_all <- c(Y_genes, "ncRNA-inter-chrX-15394", All_sex_genes)

receptor_KO <- c("ncRNA-inter-chr7-5998","Cyp2b10","Nr1i2","Nr1i3","Ppara","Pparg","Ppargc1b","Ppard")
cell <- c("Stab2","Csf1r","Cd3g","Ebf1","Irf8","Sox9","Apoc3","Top2a","Dcn")
TPM4<- c('ncRNA-inter-chr7-6524',
'ncRNA-inter-chr19-14853',
'ncRNA-inter-chr6-5675',
'ncRNA-inter-chr4-3468',
'ncRNA-inter-chr9-8122',
'ncRNA-inter-chr12-10476',
'ncRNA-inter-chr17-14026',
'ncRNA-as-chr2-1457',
'ncRNA-as-chr19-14883',
'ncRNA-as-chr10-8460',
'ncRNA-as-chr5-4325',
'ncRNA-inter-chr7-5998',
'ncRNA-as-chr9-7843',
'ncRNA-as-chr9-8142',
'ncRNA-inter-chr11-9925',
'ncRNA-inter-chr19-14873',
'ncRNA-as-chr9-8172',
'ncRNA-inter-chr4-3779',
'ncRNA-inter-chr3-2504',
'ncRNA-as-chr19-15054',
'ncRNA-inter-chr8-7423',
'ncRNA-as-chr7-6302',
'ncRNA-inter-chr10-9418',
'ncRNA-inter-chr12-10454',
'ncRNA-as-chr7-5999',
'ncRNA-as-chr6-5335',
'ncRNA-inter-chr19-14987',
'ncRNA-inter-chr16-13170',
'ncRNA-inter-chr3-2988',
'ncRNA-inter-chr8-7430',
'ncRNA-inter-chr3-2168',
'ncRNA-inter-chr9-7874',
'ncRNA-inter-chr4-3778',
'ncRNA-inter-chr2-2011',
'ncRNA-inter-chr5-4335',
'ncRNA-inter-chr9-8301',
'ncRNA-inter-chr16-13510',
'ncRNA-as-chr9-8401',
'ncRNA-as-chr16-13512',
'ncRNA-as-chr12-10896',
'ncRNA-as-chr8-7359',
'ncRNA-as-chr5-4744',
'ncRNA-inter-chr5-4499',
'ncRNA-inter-chr16-13509',
'ncRNA-inter-chr17-13692',
'ncRNA-as-chr17-13834',
'ncRNA-inter-chr9-8147',
'ncRNA-inter-chr10-8697',
'ncRNA-inter-chr6-5551',
'ncRNA-as-chr9-8317',
'ncRNA-inter-chr7-6509',
'ncRNA-as-chr19-14977',
'ncRNA-inter-chr6-5318',
'ncRNA-inter-chr5-4578',
'ncRNA-inter-chr12-10509',
'ncRNA-as-chr10-9015',
'ncRNA-inter-chr3-2411',
'ncRNA-inter-chr9-7875',
'ncRNA-inter-chr5-4336',
'ncRNA-inter-chr12-10910',
'ncRNA-as-chr1-782',
'ncRNA-inter-chr17-13924',
'ncRNA-intra-chr7-5920',
'ncRNA-inter-chr16-13225',
'ncRNA-inter-chr3-2269',
'ncRNA-inter-chr14-12016',
'ncRNA-as-chr4-3800',
'ncRNA-as-chr5-4655',
'ncRNA-inter-chr9-7989',
'ncRNA-intra-chr5-4728',
'ncRNA-inter-chrX-15248',
'ncRNA-inter-chr10-8767',
'ncRNA-inter-chr19-14717',
'ncRNA-inter-chr8-6766',
'ncRNA-inter-chr13-11122',
'ncRNA-inter-chr7-6074',
'ncRNA-inter-chr15-12439',
'ncRNA-as-chr11-9787',
'ncRNA-inter-chr2-1827',
'ncRNA-as-chr19-14976',
'ncRNA-inter-chr19-14947',
'ncRNA-inter-chr6-5248',
'ncRNA-inter-chr2-1098',
'ncRNA-inter-chr8-7180',
'ncRNA-inter-chr9-8118',
'ncRNA-inter-chr14-12199',
'ncRNA-as-chr6-5336',
'ncRNA-inter-chr4-3867',
'ncRNA-inter-chr10-9000',
'ncRNA-inter-chr14-12290',
'ncRNA-inter-chr2-1491',
'ncRNA-inter-chr15-12606',
'ncRNA-inter-chr10-9222',
'ncRNA-inter-chr5-4322',
'ncRNA-inter-chr12-10942',
'ncRNA-inter-chr18-14690',
'ncRNA-inter-chr2-1471',
'ncRNA-inter-chr3-2410',
'ncRNA-inter-chr19-14880',
'ncRNA-inter-chr13-11074',
'ncRNA-inter-chr9-8056',
'ncRNA-inter-chr5-4654',
'ncRNA-inter-chr3-2166',
'ncRNA-inter-chr8-6944',
'ncRNA-as-chr7-6065',
'ncRNA-inter-chr8-6896',
'ncRNA-inter-chr2-1963',
'ncRNA-inter-chr4-3425',
'ncRNA-inter-chr13-11385',
'ncRNA-inter-chr9-7885',
'ncRNA-as-chr10-9411',
'ncRNA-as-chr9-8419',
'ncRNA-as-chr12-10618',
'ncRNA-inter-chr7-6390',
'ncRNA-inter-chr17-14151',
'ncRNA-as-chr13-11787',
'ncRNA-as-chr2-1965',
'ncRNA-inter-chr6-5721',
'ncRNA-inter-chr6-5822',
'ncRNA-inter-chr11-9635',
'ncRNA-inter-chr11-9965',
'ncRNA-inter-chr4-3052',
'ncRNA-inter-chr17-13857',
'ncRNA-inter-chr13-11201',
'ncRNA-inter-chr6-5249',
'ncRNA-inter-chr19-14851',
'ncRNA-inter-chr6-5638',
'ncRNA-inter-chr17-14130',
'ncRNA-inter-chr9-7993',
'ncRNA-inter-chr18-14656',
'ncRNA-inter-chr9-7992',
'ncRNA-inter-chr1-566',
'ncRNA-inter-chr12-10715',
'ncRNA-inter-chr17-14102',
'ncRNA-inter-chr4-3010',
'ncRNA-inter-chr3-2764',
'ncRNA-inter-chr4-3306',
'ncRNA-inter-chr19-14979',
'ncRNA-inter-chr1-570',
'ncRNA-inter-chr19-14790',
'ncRNA-inter-chr4-3282',
'ncRNA-inter-chr5-4777',
'ncRNA-inter-chr8-7605',
'ncRNA-inter-chr9-8000',
'ncRNA-as-chr15-12920',
'ncRNA-as-chr1-369',
'ncRNA-inter-chr19-14952',
'ncRNA-as-chr9-8393',
'ncRNA-as-chr14-12074',
'ncRNA-inter-chr12-10415',
'ncRNA-inter-chr7-6559',
'ncRNA-inter-chr1-630',
'ncRNA-inter-chr4-3142',
'ncRNA-inter-chr6-5131',
'ncRNA-inter-chr16-13050',
'ncRNA-inter-chr7-6411',
'ncRNA-inter-chr5-4746',
'ncRNA-inter-chr1-633',
'ncRNA-inter-chr10-8461',
'ncRNA-inter-chr2-2016',
'ncRNA-inter-chr6-5137',
'ncRNA-as-chr4-3300',
'ncRNA-inter-chr4-3009',
'ncRNA-inter-chr2-1923',
'ncRNA-inter-chr1-670',
'ncRNA-inter-chr15-12835',
'ncRNA-inter-chr18-14655',
'ncRNA-inter-chr16-13211',
'ncRNA-as-chr7-6050',
'ncRNA-inter-chr7-6508',
'ncRNA-inter-chr11-10206',
'ncRNA-inter-chr13-11399',
'ncRNA-inter-chr8-6757',
'ncRNA-inter-chr1-129',
'ncRNA-inter-chr12-10459',
'ncRNA-inter-chr7-6709',
'ncRNA-inter-chr12-10713',
'ncRNA-inter-chr7-6523',
'ncRNA-inter-chr2-2017',
'ncRNA-inter-chr7-6343',
'ncRNA-inter-chr1-63',
'ncRNA-as-chr3-2936',
'ncRNA-inter-chr18-14691',
'ncRNA-inter-chr7-6097',
'ncRNA-inter-chr6-5723')
DefaultAssay(droplet) <- "RNA"
#droplet <- NormalizeData(combined, verbose = TRUE, normalization.method = "RC", scale.factor = 1e6)
combined <- NormalizeData(combined, verbose = TRUE)

DotPlot(combined, features = all_genes)
FeaturePlot(combined, features = genes_hep_main, min.cutoff = "q9")
#hepatocytes 
subtissplot <- DotPlot(combined, features = c(genes_hep_main, genes_endo, genes_bec_b_immune, genes_kuppfer, genes_nk))
PC <- DotPlot(combined, features = c(genes_hep_main,genes_zones))
NPC <- DotPlot(combined, features = c(genes_endo,genes_kuppfer, genes_nk))
all <- DotPlot(combined, features=c(all_genes))

### coexpression plots####

f1 <- FeaturePlot(KO.cells, features = c('Cyp2b10','ncRNA-inter-chr7-5998'), reduction = "mds", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 0.5)


########## this is exact averaging formula ###############33
  x <- (AverageExpression(KO.cells, verbose = TRUE, assays = "RNA" ,slot="counts")$RNA)
   x["ncRNA-inter-chr7-5998",]
#                         G171B    G171C
#ncRNA-inter-chr7-5998 1.871795 1.091463
########
#Idents(combined) <- factor(Idents(combined), levels = c(0,1,12))
markers.to.plot <- c("Alb","ncRNA-inter-chr7-5998")
DotPlot(combined, features = rev(markers.to.plot), cols = c("blue", "red"), dot.scale = 8, 
    split.by = "stim") + RotatedAxis()

FeaturePlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9","KRAB","AAV8-mCherry"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red"))


######################### vlnplot ##########################

plots <- VlnPlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

plots <- VlnPlot(combined, features = c("Lhx4","Dtna","Fam189a1","Galnt16","Kalrn"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)


#endothelial
DotPlot(combined, features = genes_endo)

#zones
zones <- DotPlot(combined, features = genes_zones)

f1 <- FeaturePlot(combined, features = c('Cyp2e1','Cyp2f2','Ass1'), min.cutoff = "q9", reduction = "tsne")

DimPlot(combined, label = TRUE)

save(combined, file="Seurat_smart-drop_integrated.Robj")


################# save raw counts from cluster #####################

Idents(combined) <- "stim"

### to avergae out the matrix from KO cells 

combined.raw.data.0.1 <- as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 0,idents = "stim")])
combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])
combined.raw.data.2 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])

#combined.raw.data.[i] <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 1)])
#combined.raw.data.12 <-as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 12)])
#combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts"))
x <- AverageExpression(test.combined,assays = "RNA",add.ident = "stim", slot = "data",use.scale = FALSE, use.counts = FALSE)$RNA

#}######## CAR data 
avg.combined.cells <- (AverageExpression(combined, verbose = FALSE)$RNA) 
avg.combined.cells$gene <- rownames(avg.combined.cells)

CAR_FP <- FeaturePlot(combined, features = c('Cyp2b10','Nr1i3'), reduction = "umap", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 1, min.cutoff = 0, pt.size = 0.5, repel = TRUE)
CAR_DOT_NR <- DotPlot(combined, features = 'Nr1i3', col.min = 0)

Cyp2b10_FP <- FeaturePlot(combined, features = 'Cyp2b10', reduction = "umap", min.cutoff = 0)
########### tSNE #################################
combined <- NormalizeData(object = combined)
combined <- FindVariableFeatures(combined, selection.method = "vst", nfeatures = 2000)
###### I used these for merged cells ####

G172_TPM_count  <- ref.integrated.2
G172_TPM_count  <- NormalizeData(G172_TPM_count,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)


G172_TPM_count $celltype.stim <- paste(Idents(G172_TPM_count ), G172_TPM_count $stim, sep = "_")
G172_TPM_count $celltype <- Idents(G172_TPM_count )
Idents(G172_TPM_count ) <- "celltype.stim"
G172_TPM_count <- RenameCells(G172_TPM_count, new.names = paste0(colnames(G172_TPM_count),"-",G172_TPM_count$celltype.stim))


KO.cells <- subset(G172_TPM_count, idents = c("0","1","2","3","4","5","6","7"))
Idents(KO.cells) <- "stim"
colnames(KO.cells )
colnames(KO.cells) <- lapply(colnames(KO.cells), function(x) paste0(KO.cells$stim,'_',x))

### to avergae out the matrix from KO cells 
raw.data.0 <- as.matrix(GetAssayData(G172_TPM_count, slot = c("counts","data"))[, WhichCells(G172_TPM_count, ident = 0)])
raw.data.1 <- as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 1)])
raw.data.2 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 2)])
raw.data.3 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 3)])
raw.data.4 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 4)])
raw.data.5 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 5)])
raw.data.6 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 6)])
raw.data.7 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 7)])
raw.data.8 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 8)])


TPMcount0<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_MergedM2')]))



TPMcount1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_MergedM2')]))

TPMcount2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_MergedM2')]))

TPMcount3<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_MergedM2')]))


TPMcount4<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_MergedM2')]))

TPMcount5<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_MergedM2')]))


TPMcount6<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_MergedM2')]))

TPMcount7<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_MergedM2')]))


TPMcountBottom <- cbind(as.matrix(GetAssayData(G172.M1, slot = "counts")), as.matrix(GetAssayData(G172.M1, slot = "data"))) 
write.csv(TPMcountBottom, "CountResult/counts_TPMcountBottom")

TPMcountTop <- cbind(as.matrix(GetAssayData(G172.M1, slot = "counts")), as.matrix(GetAssayData(G172.M1, slot = "data"))) 
write.csv(TPMcountTop, "CountResult/counts_TPMcountTop")

TPMcountTopM2 <- cbind(as.matrix(GetAssayData(G172.M2, slot = "counts")), as.matrix(GetAssayData(G172.M2, slot = "data"))) 
write.csv(TPMcountTopM2, "CountResult/counts_TPMcountTopG172M2")


TPMcountG172M1 <- cbind(as.matrix(GetAssayData(G172.M1, slot = "counts")), as.matrix(GetAssayData(G172.M1, slot = "data"))) 
write.csv(TPMcountG172M1, "CountResult/counts_TPMcountG172M1_Full")

TPMcountG172M2 <-  as.matrix(GetAssayData(G172.M2, slot = "data")) 
write.csv(TPMcountG172M1, "CountResult/counts_TPMcountG172M1_Full")

TPMcountG172M1_merge <-  as.matrix(GetAssayData(G172.M1.merge, slot = "data")) 
write.csv(TPMcountG172M1_merge_avg,"CountResult/PMcountG172M1_merge_avg")


TPMcountG172M2_merge <-  as.matrix(GetAssayData(G172.M2.merge, slot = "data")) 
write.csv(TPMcountG172M2_merge_avg, "CountResult/TPMcountG172M2_merge_avg")

TPMcount1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 1)]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = 1)]))

TPMcount2<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 2)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 2)]))

TPMcount3<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 3)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 3)]))

TPMcount4<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 4)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 4)]))

TPMcount5<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 5)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 5)]))

TPMcount6<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 6)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 6)]))

TPMcount7<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 7)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 7)]))

TPMcount8<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 8)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 8)]))

TPMcount9<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 9)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 9)]))

TPMcount10<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 10)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 10)]))

TPMcount11<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 11)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 11)]))

TPMcount12<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 12)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 12)]))




write.csv(TPMcount0, "CountResult/counts_TPMcount0.csv")
write.csv(TPMcount1, "CountResult/counts_TPMcount1.csv")
write.csv(TPMcount2, "CountResult/counts_TPMcount2.csv")
write.csv(TPMcount3, "CountResult/counts_TPMcount3.csv")
write.csv(TPMcount4, "CountResult/counts_TPMcount4.csv")
write.csv(TPMcount5, "CountResult/counts_TPMcount5.csv")
write.csv(TPMcount6, "CountResult/counts_TPMcount6.csv")
write.csv(TPMcount7, "CountResult/counts_TPMcount7.csv")
write.csv(TPMcount8, "CountResult/counts.TPMcount8.csv")
write.csv(TPMcount9, "CountResult/counts.TPMcount9.csv")
write.csv(TPMcount10, "CountResult/counts.TPMcount10.csv")
write.csv(TPMcount11, "CountResult/counts.TPMcount11.csv")
write.csv(TPMcount12, "CountResult/counts.TPMcount12.csv")



#avg.KO.cells <- log1p(AverageExpression(KO.cells, verbose = FALSE)$RNA)  #original code log transformed
avg.KO.cells <- (AverageExpression(KO.cells, verbose = FALSE)$RNA) 
avg.KO.cells$gene <- rownames(avg.KO.cells)

genes.to.label= ("ncRNA-inter-chr7-5998")
#genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
p1 <- ggplot(avg.KO.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE)
plot_grid(p1, p2)
KO.cells <- RunUMAP(KO.cells, reduction = "pca", dims = 1:20 )
KO.cells <- FindNeighbors(KO.cells, reduction = "pca", dims = 1:20)
KO.cells <- FindClusters(KO.cells, resolution = 0.5 )   
KO.cells <- RunTSNE(KO.cells, reduction = "pca", dims = 1:20)
 

Hep.cells <- RunUMAP(Hep.cells, reduction = "pca", dims = 1:20 )
Hep.cells <- FindNeighbors(Hep.cells, reduction = "pca", dims = 1:20)
Hep.cells <- FindClusters(Hep.cells, resolution = 0.5 )   
Hep.cells <- RunTSNE(Hep.cells, reduction = "pca", dims = 1:20)
 

   
 # Visualization
p1 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(KO.cells, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(KO.cells, label=TRUE)
plot_grid(p1,p4) 
DimPlot(KO.cells, reduction = "umap", split.by = "stim")   


#hepatocyte cells

p5 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "stim")
p6 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "mouse.sex")
p7 <- UMAPPlot(Hep.cells, reduction = "umap", label = TRUE)
p8 <- UMAPPlot(Hep.cells, label=TRUE)
plot_grid(p5,p8) 
DimPlot(KO.cells, reduction = "umap", split.by = "stim")   


raw.data.KO.0 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 0)])
raw.data.KO.1 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 1)])
raw.data.KO.2 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 2)])
raw.data.KO.3 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 3)])
raw.data.KO.4 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 4)])
raw.data.KO.5 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 5)])


write.csv(raw.data.KO.0, "CountResult/Markers/raw.data.KO.0")
write.csv(raw.data.KO.1, "CountResult/Markers/raw.data.KO.1")
write.csv(raw.data.KO.2, "CountResult/Markers/raw.data.KO.2")
write.csv(raw.data.KO.3, "CountResult/Markers/raw.data.KO.3")
write.csv(raw.data.KO.4, "CountResult/Markers/raw.data.KO.4")
write.csv(raw.data.KO.5, "CountResult/Markers/raw.data.KO.5")



f1 <- FeaturePlot(KO.cells, features = c('ncRNA-inter-chr7-5998'),  reduction = "umap", split.by = "stim")
plot_grid(f1,p1,p4) 

DefaultAssay(KO.cells) <- "RNA"
KO.cells <- NormalizeData(KO.cells, verbose = FALSE)

plots <- VlnPlot(KO.cells, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

Three_five_six <- subset(KO.cells, idents = c("5","6"))

Three_five_six <- RunUMAP(Three_five_six, reduction = "pca", dims = 1:20 )
Three_five_six <- FindNeighbors(Three_five_six, reduction = "pca", dims = 1:20)
Three_five_six <- FindClusters(Three_five_six, resolution = 1 )   
Three_five_six <- RunTSNE(Three_five_six, reduction = "pca", dims = 1:20)

p1 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(Three_five_six, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(Three_five_six, label=TRUE)
plot_grid(p1,p4) 
DimPlot(Three_five_six, reduction = "umap", split.by = "stim")   

f1 <- FeaturePlot(Three_five_six, features = c('ncRNA-inter-chr7-5998'),  reduction = "umap", split.by = "stim")


lnc5998 <- subset(combined, cells = lnc5998.cells, idents = "1")

lnc5998 <- RunUMAP(lnc5998, reduction = "pca", dims = 1:20 )
lnc5998 <- FindNeighbors(lnc5998, reduction = "pca", dims = 1:20)
lnc5998 <- FindClusters(lnc5998, resolution = 1 )   
lnc5998 <- RunTSNE(lnc5998, reduction = "pca", dims = 1:20)
    
 # Visualization
p1 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(lnc5998, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(lnc5998, label=TRUE)
plot_grid(p1,p4) 
DimPlot(lnc5998, reduction = "umap", split.by = "stim")   

lnc5998.cells <- WhichCells(object = combined, expression = "ncRNA-inter-chr7-5998" > 1)
FeaturePlot(lnc5998, features = c("ncRNA-inter-chr7-5998"), split.by = "stim",  
+             cols = c("grey", "red"), cells = lnc5998.cells,min.cutoff = 0.5)


DefaultAssay(lnc5998) <- "RNA"
lnc5998 <- NormalizeData(lnc5998, verbose = FALSE)

plots <- VlnPlot(lnc5998, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

 
d <- dist(t(GetAssayData(KO.cells, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
KO.cells[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(KO.cells))

# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(KO.cells, reduction = "mds", pt.size = 0.5)

Find differential markers

KO.cells$celltype.stim <- paste(Idents(KO.cells), KO.cells$stim, sep = "_")
KO.cells$celltype <- Idents(KO.cells)
Idents(KO.cells) <- "celltype.stim"
response3 <- FindMarkers(KO.cells, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)


#DE1: Compare cluster 0+1+12 (that expressed lnc5998) with Other hepatocyte clusters (2+5+8)
#DE2: compare cluster 1 (showed major effects in the KD) vs Cluster 0 (that showed little KD)
#DE3: For KO.cells that formed five subcluster, compare clusters 3+4+5+1 vs 2+0
#DE4: for KO.cells that formed five clusters. Comapre cluster 4 vs. 2

DE0112.258 <- FindMarkers(combined, ident.1 = c("0","1","12" ), ident.2 = c("2","5","8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE0112.All <- FindMarkers(combined, ident.1 = c("0","1","12" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.2 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.5 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.8 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DE2.5 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE2.8 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE5.8 <- FindMarkers(combined, ident.1 = c("5" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DEK4351.20 <- FindMarkers(KO.cells, ident.1 = c("3","4","5","1" ), ident.2 = c("2","0"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DEK4.3 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "3",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.2 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "2",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.0 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "0",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.1 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "1",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.5 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "5",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)



##### comaprison between G171B vs G171C for KO.cell clusters of 0+1+12 ######33

DEK4_C.B <- FindMarkers(KO.cells, ident.1 = "4_G171C", ident.2 = "4_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK3_C.B <- FindMarkers(KO.cells, ident.1 = "3_G171C", ident.2 = "3_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK1_C.B <- FindMarkers(KO.cells, ident.1 = "1_G171C", ident.2 = "1_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK2_C.B <- FindMarkers(KO.cells, ident.1 = "2_G171C", ident.2 = "2_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK0_C.B <- FindMarkers(KO.cells, ident.1 = "0_G171C", ident.2 = "0_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK5_C.B <- FindMarkers(KO.cells, ident.1 = "5_G171C", ident.2 = "5_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


################################### Write the results #########################
write.csv(DE0112.258, "CountResult/Markers/DE0112.258")
write.csv(DE0112.All, "CountResult/Markers/DE0112.All")
write.csv(DE1.2, "CountResult/Markers/DE1.2")
write.csv(DE1.5, "CountResult/Markers/DE1.5")
write.csv(DE1.8, "CountResult/Markers/DE1.8")
write.csv(DE2.5, "CountResult/Markers/DE2.5")
write.csv(DE2.8, "CountResult/Markers/DE2.8")
write.csv(DE5.8, "CountResult/Markers/DE5.8")

write.csv(DEK4351.20, "CountResult/Markers/DEK4351.20")
write.csv(DEK4.3, "CountResult/Markers/DEK4.3")
write.csv(DEK4.2, "CountResult/Markers/DEK4.2")
write.csv(DEK4.0, "CountResult/Markers/DEK4.0")
write.csv(DEK4.1, "CountResult/Markers/DEK4.1")
write.csv(DEK4.5, "CountResult/Markers/DEK4.5")


write.csv(DEK4_C.B, "CountResult/Markers/DEK4_C.B")
write.csv(DEK3_C.B, "CountResult/Markers/DEK3_C.B")
write.csv(DEK1_C.B, "CountResult/Markers/DEK1_C.B")
write.csv(DEK2_C.B, "CountResult/Markers/DEK2_C.B")
write.csv(DEK0_C.B, "CountResult/Markers/DEK0_C.B")
write.csv(DEK5_C.B, "CountResult/Markers/DEK5_C.B")




combined$celltype.stim <- paste(Idents(combined), combined$stim, sep = "_")
combined$celltype <- Idents(combined)
Idents(combined) <- "celltype.stim"


test.combined$celltype.stim <- paste(Idents(test.combined), test.combined$stim, sep = "_")
test.combined$celltype <- Idents(test.combined)
Idents(test.combined) <- "celltype.stim"




Combined_G171B_vs_G171C <- FindMarkers(combined, ident.1 = c("0_G171B","1_G171B","2_G171B","3_G171B","4_G171B","5_G171B","6_G171B","7_G171B","8_G171B","9_G171B","10_G171B","11_G171B","12_G171B" ), ident.2 = c("0_G171C","1_G171C","2_G171C","3_G171C","4_G171C","5_G171C","6_G171C","7_G171C","8_G171C","9_G171C","10_G171C","11_G171C","12_G171C" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


Combined_G171C_vs_G171B_Clust1 <- FindMarkers(combined, ident.1 = c("1_G171C"), ident.2 = c("1_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust0 <- FindMarkers(combined, ident.1 = c("0_G171C"), ident.2 = c("0_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust12 <- FindMarkers(combined, ident.1 = c("12_G171C"), ident.2 = c("12_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust2 <- FindMarkers(combined, ident.1 = c("2_G171C"), ident.2 = c("2_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust5 <- FindMarkers(combined, ident.1 = c("5_G171C"), ident.2 = c("5_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust8 <- FindMarkers(combined, ident.1 = c("8_G171C"), ident.2 = c("8_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


Combined_G171C_vs_G171B_Clust3 <- FindMarkers(combined, ident.1 = c("3_G171C"), ident.2 = c("3_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust4 <- FindMarkers(combined, ident.1 = c("4_G171C"), ident.2 = c("4_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust6 <- FindMarkers(combined, ident.1 = c("6_G171C"), ident.2 = c("6_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust7 <- FindMarkers(combined, ident.1 = c("7_G171C"), ident.2 = c("7_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust9 <- FindMarkers(combined, ident.1 = c("9_G171C"), ident.2 = c("9_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust10 <- FindMarkers(combined, ident.1 = c("10_G171C"), ident.2 = c("10_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust11 <- FindMarkers(combined, ident.1 = c("11_G171C"), ident.2 = c("11_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)





write.csv(Combined_G171C_vs_G171B_Clust1, "CountResult/Markers/Combined_G171C_vs_G171B_Clust1")
write.csv(Combined_G171C_vs_G171B_Clust0, "CountResult/Markers/Combined_G171C_vs_G171B_Clust0")
write.csv(Combined_G171C_vs_G171B_Clust12, "CountResult/Markers/Combined_G171C_vs_G171B_Clust12")
write.csv(Combined_G171C_vs_G171B_Clust2, "CountResult/Markers/Combined_G171C_vs_G171B_Clust2")
write.csv(Combined_G171C_vs_G171B_Clust5, "CountResult/Markers/Combined_G171C_vs_G171B_Clust5")
write.csv(Combined_G171C_vs_G171B_Clust8, "CountResult/Markers/Combined_G171C_vs_G171B_Clust8")

write.csv(Combined_G171C_vs_G171B_Clust3, "CountResult/Markers/Combined_G171C_vs_G171B_Clust3")
write.csv(Combined_G171C_vs_G171B_Clust4, "CountResult/Markers/Combined_G171C_vs_G171B_Clust4")
write.csv(Combined_G171C_vs_G171B_Clust6, "CountResult/Markers/Combined_G171C_vs_G171B_Clust6")
write.csv(Combined_G171C_vs_G171B_Clust7, "CountResult/Markers/Combined_G171C_vs_G171B_Clust7")
write.csv(Combined_G171C_vs_G171B_Clust9, "CountResult/Markers/Combined_G171C_vs_G171B_Clust9")
write.csv(Combined_G171C_vs_G171B_Clust10, "CountResult/Markers/Combined_G171C_vs_G171B_Clust10")
write.csv(Combined_G171C_vs_G171B_Clust11, "CountResult/Markers/Combined_G171C_vs_G171B_Clust11")





PC_vs_PP_G171B <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B"), ident.2 = c("0_G171B","2_G171B"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171B, n = 15)


PC_vs_PP_G171C <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C"), ident.2 = c("0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)


PC_vs_PP_G171BC <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","4_G171C","3_G171C"), ident.2 = c("0_G171B","2_G171B","0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)


lnc5998_KO_DE_1 <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","5_G171B"), ident.2 = c("4_G171C","3_G171C","5_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)


lnc5998_KO_DE_2 <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C","5_G171C"), ident.2 =c("4_G171B","3_G171B","5_G171B") , verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)

cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = cell.type.genes, bg.genes = rownames(KO.cells@assays$RNA@dataKO.cells@assays$RNA@data), organism = "Mouse")

cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = rownames(cell.type.genes), bg.genes = rownames(KO.cells@assays$RNA@data), organism = "Mouse")
 

AvergeExpression2 <- function (object, assays = NULL, features = NULL, return.seurat = FALSE, 
          add.ident = NULL, slot = "data", use.scale = FALSE, use.counts = FALSE, 
          verbose = TRUE, ...) 
{
    
    fxn.average <- switch(EXPR = slot, data = function(x) {
        return(mean(x = x))
    }, mean)
    object.assays <- FilterObjects(object = object, classes.keep = "Assay")
    assays <- assays %||% object.assays
    ident.orig <- Idents(object = object)
    orig.levels <- levels(x = Idents(object = object))
    ident.new <- c()
    if (!all(assays %in% object.assays)) {
        assays <- assays[assays %in% object.assays]
        if (length(assays) == 0) {
            stop("None of the requested assays are present in the object")
        }
        else {
            warning("Requested assays that do not exist in object. Proceeding with existing assays only.")
        }
    }
    if (!is.null(x = add.ident)) {
        new.data <- FetchData(object = object, vars = add.ident)
        new.ident <- paste(Idents(object)[rownames(x = new.data)], 
                           new.data[, 1], sep = "_")
        Idents(object, cells = rownames(new.data)) <- new.ident
    }
    data.return <- list()
    for (i in 1:length(x = assays)) {
        data.use <- GetAssayData(object = object, assay = assays[i], 
                                 slot = slot)
        features.assay <- features
        if (length(x = intersect(x = features, y = rownames(x = data.use))) < 
            1) {
            features.assay <- rownames(x = data.use)
        }
        data.all <- data.frame(row.names = features.assay)
        for (j in levels(x = Idents(object))) {
            temp.cells <- WhichCells(object = object, idents = j)
            features.assay <- unique(x = intersect(x = features.assay, 
                                                   y = rownames(x = data.use)))
            if (length(x = temp.cells) == 1) {
                data.temp <- (data.use[features.assay, temp.cells])
                if (slot == "data") {
                    data.temp <-  data.temp
                }
            }
            if (length(x = temp.cells) > 1) {
                data.temp <- apply(X = data.use[features.assay, 
                                                temp.cells, drop = FALSE], MARGIN = 1, FUN = fxn.average)
            }
            data.all <- cbind(data.all, data.temp)
            colnames(x = data.all)[ncol(x = data.all)] <- j
            if (verbose) {
                message(paste("Finished averaging", assays[i], 
                              "for cluster", j))
            }
            if (i == 1) {
                ident.new <- c(ident.new, as.character(x = ident.orig[temp.cells[1]]))
            }
        }
        names(x = ident.new) <- levels(x = Idents(object))
        data.return[[i]] <- data.all
        names(x = data.return)[i] <- assays[[i]]
    }
    if (return.seurat) {
        toRet <- CreateSeuratObject(counts = data.return[[1]], 
                                    project = "Average", assay = names(x = data.return)[1], 
                                    ...)
        if (length(x = data.return) > 1) {
            for (i in 2:length(x = data.return)) {
                toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]])
            }
        }
        if (DefaultAssay(object = object) %in% names(x = data.return)) {
            DefaultAssay(object = toRet) <- DefaultAssay(object = object)
        }
        Idents(toRet, cells = colnames(x = toRet)) <- ident.new[colnames(x = toRet)]
        Idents(object = toRet) <- factor(x = Idents(object = toRet), 
                                         levels = as.character(x = orig.levels), ordered = TRUE)
        toRet <- NormalizeData(object = toRet, verbose = verbose)
        toRet <- ScaleData(object = toRet, verbose = verbose)
        return(toRet)
    }
    else {
        return(data.return)
    }
}

Find differential expression markers

combined.markers <- FindAllMarkers(object = combined, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
combined.markers %>% group_by(cluster) %>% top_n(2, avg_logFC)

KO.markers <- FindAllMarkers(object = KO.cells, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

response3 <- FindMarkers(combined, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)

Visualize top genes in principal components

Later on (in FindClusters and TSNE) you will pick a number of principal components to use. This has the effect of keeping the major directions of variation in the data and, ideally, supressing noise. There is no correct answer to the number to use, but a decent rule of thumb is to go until the plot plateaus.

PCElbowPlot(object = tiss1)

Choose the number of principal components to use.

# Set number of principal components. 
n.pcs = 10

The clustering is performed based on a nearest neighbors graph. Cells that have similar expression will be joined together. The Louvain algorithm looks for groups of cells with high modularity–more connections within the group than between groups. The resolution parameter determines the scale. Higher resolution will give more clusters, lower resolution will give fewer.

For the top-level clustering, aim to under-cluster instead of over-cluster. It will be easy to subset groups and further analyze them below.

# Set resolution 
res.used <- 4
tiss1 <- FindClusters(object = tiss1, reduction.type = "pca", dims.use = 1:n.pcs, 
    resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

We use TSNE solely to visualize the data.

# If cells are too spread out, you can raise the perplexity. If you have few cells, try a lower perplexity (but never less than 10).
tiss1 <- RunTSNE(object = tiss1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = tiss1, do.label = T, pt.size = 1.2, label.size = 4)

Compare to previous annotations

previous_annotation = read.csv("/Users/kkarri/Documents/Lab/Single_cell_project/dropseq/Liver_droplet_annotation.csv", stringsAsFactors = FALSE)
cols = c('free_annotation', 'cell_ontology_class')
    for (col in cols){
      previous_col = paste0('previous_', col)
      tiss1@meta.data[, previous_col] <- "NA"
      tiss1@meta.data[as.character(previous_annotation$X), previous_col] <- previous_annotation[, col]
      print(table(tiss1@meta.data[, previous_col]))
      print(table(tiss1@meta.data[, previous_col], tiss@ident))
      
    }
    
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss@ident)
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss1@ident)
TSNEPlot(tiss1, group.by="mouse.sex")
TSNEPlot(tiss1, group.by="mouse.id")

Significant genes:

hepatocyte: Alb, Ttr, Apoa1, and Serpina1c pericentral: Cyp2e1, Glul, Oat, Gulo midlobular: Ass1, Hamp, Gstp1, Ubb periportal: Cyp2f2, Pck1, Hal, Cdh1

endothelial cells: Pecam1, Nrp1, Kdr+ and Oit3+ Kuppfer cells: Emr1, Clec4f, Cd68, Irf7 NK/NKT cells: Zap70, Il2rb, Nkg7, Cxcr6, Klr1c, Gzma B cells: Cd79a, Cd79b, Cd74 and Cd19 Immune cells: Ptprc

Dotplots let you see the intensity of exppression and the fraction of cells expressing for each of your genes of interest. The radius shows you the percent of cells in that cluster with at least one read sequenced from that gene. The color level indicates the average Z-score of gene expression for cells in that cluster, where the scaling is done over taken over all cells in the sample.

We have various immune cell types in the last cluster

Using the markers above, we can confidentaly label many of the clusters:

19: endothelial cells 20: bile duct epithelial cells 21: immune cells rest are hepatocytes

We will add those cell_ontology_classes to the dataset.

tiss1 <- StashIdent(object = tiss1, save.name = "cluster.ids")
cluster.ids <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
free_annotation <- c(
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  "bile duct epithelial cells",
  "endothelial cell of hepatic sinusoid",
  NA
  )
cell_ontology_class <- c(
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "duct epithelial cell",
  "endothelial cell of hepatic sinusoid",
  "hepatocyte")
tiss1 = stash_annotations(tiss1, cluster.ids, free_annotation, cell_ontology_class)

Checking for batch effects

Color by metadata, like plate barcode, to check for batch effects.

TSNEPlot(object = tiss1, do.return = TRUE, group.by = "channel")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "free_annotation")

Subcluster

Let’s drill down on the hepatocytes.

subtiss1 = SubsetData(tiss1, ident.use = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20))
subtiss1 <- subtiss1 %>% ScaleData() %>%
  FindVariableGenes(do.plot = FALSE, x.high.cutoff = Inf, y.cutoff = 0.5) %>%
  RunPCA(do.print = FALSE)
PCHeatmap(object = subtiss1, pc.use = 1:3, cells.use = 20, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
PCElbowPlot(subtiss1)
sub.n.pcs = 8
sub.res.use = 0.5
subtiss1 <- subtiss1 %>% FindClusters(reduction.type = "pca", dims.use = 1:sub.n.pcs,
    resolution = sub.res.use, print.output = 0, save.SNN = TRUE, force.recalc = TRUE) %>%
    RunTSNE(dims.use = 1:sub.n.pcs, seed.use = 10, perplexity=8)
TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4)
BuildClusterTree(subtiss1)

From these genes, it appears that the clusters represent:

0: midlobular male 1: pericentral female 2: periportal female 3: periportal male 4: midlobular male 5: pericentral male 6: midlobular female 7: midlobular female

The multitude of clusters of each type correspond mostly to individual animals/sexes.

table(FetchData(subtiss1, c('mouse.sex','ident')) %>% droplevels())
sub.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6, 7)
sub.free_annotation <- c("periportal female", "midlobular male", "pericentral female", "periportal male", "midlobular male", "pericentral male", "midlobular female", "midlobular female")
sub.cell_ontology_class <- c("hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte")
subtiss1 = stash_annotations(subtiss1, sub.cluster.ids, sub.free_annotation, sub.cell_ontology_class)
tiss1 = stash_subtiss_in_tiss(tiss1, subtiss1)

Liver zonation markers

genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
              'Ass1', 'Hamp', 'Gstp1', 'Ubb',
              'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')

FeaturePlot(subtiss1,c(genes_zones),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)

DotPlot(subtiss1,c(genes_zones), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()


TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")

TSNEPlot(object = tiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")

Find cluster markers for lncRNAs


MIN_LOGFOLD_CHANGE = 1 # set to minimum required average log fold change in gene expression.
MIN_PCT_CELLS_EXPR_GENE = 0.1

all.markers = FindAllMarkers(tiss1,
                             min.pct = MIN_PCT_CELLS_EXPR_GENE,
                             logfc.threshold = MIN_LOGFOLD_CHANGE,
                             only.pos = TRUE,
                             test.use="bimod") # likelihood ratio test
lnc_all_markers <- grep(pattern = "^ncRNA", x= rownames(all.markers), value = TRUE)
lnc_all_markers

#[1] "ncRNA_inter_chr10_92081" "ncRNA_intra_chr16_13383" "ncRNA_inter_chr17_13605" "ncRNA_inter_chr14_11815"
#[5] "ncRNA_inter_chr18_14344"

FeaturePlot(subtiss1,c(lnc_all_markers),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)

######################### lncRNA markers- CELL TYPE MARKER ############
markers.hep <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
lnc_markers_hep <- grep(pattern = "^ncRNA", x= rownames(markers.hep), value = TRUE)
lnc_markers_hep
FeaturePlot(tiss1,c(lnc_markers_hep),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_hep, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
#[1] "ncRNA_as_chr11_9423"     "ncRNA_as_chr7_6166"      "ncRNA_inter_chr4_3295"   "ncRNA_inter_chr17_14026"
#[5] "ncRNA_inter_chr3_2915"   "ncRNA_inter_chr5_4547"   "ncRNA_inter_chr15_12684"


markers.hep.MAST <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, test.use = "MAST")
lnc_markers_hep_MAST_TABLE <- subset(markers.hep.MAST, grepl("^ncRNA", rownames(markers.hep.MAST)))
lnc_markers_hep_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.hep.MAST), value = TRUE)
lnc_markers_hep_MAST



markers.endo <- FindMarkers(object = tiss1, ident.1 = c(18,19),  only.pos = TRUE, min.pct = 0.25, thresh.use = 0.5)
lnc_markers_endo <- grep(pattern = "^ncRNA", x= rownames(markers.endo), value = TRUE)
lnc_markers_endo
FeaturePlot(tiss1,c(lnc_markers_endo),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_endo, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()


#"ncRNA_inter_chr15_12770", "ncRNA_inter_chr12_10817", "ncRNA_as_chr13_11451",


markers.endo.MAST <- FindMarkers(object = tiss1, ident.1 = 19, test.use = "MAST" ,only.pos = TRUE)
lnc_markers_endo_MAST_TABLE <- subset(markers.endo.MAST, grepl("^ncRNA", rownames(markers.endo.MAST)))
lnc_markers_endo_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.endo.MAST), value = TRUE)
lnc_markers_endo_MAST


################## lncRNA expression ########################3

# "ncRNA_inter_chr17_13605" , "ncRNA_intra_chr16_13383"

########## Periporal markers- zonation markers ############
markers.pc <- FindMarkers(object = subtiss1, ident.1 = c(2,5), 
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.001, test.use = "bimod" )

markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc <- subset(markers.pc, grepl("^ncRNA", rownames(markers.pc)))
lnc_markers_pc <- grep(pattern = "^ncRNA", x= rownames(markers.pc), value = TRUE)
lnc_markers_pc 

markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc_MAST <- subset(markers.pc.MAST, grepl("^ncRNA", rownames(markers.pc.MAST)))
lnc_markers_pc_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pc.MAST), value = TRUE)
lnc_markers_pc_MAST

DotPlot(tiss1, lnc_markers_pc, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()
FeaturePlot(subtiss1,c(lnc_markers_pc),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)


############################### midlobular genes #############

markers.mid <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7), 
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)

lnc_markers_mid <- subset(markers.mid, grepl("^ncRNA", rownames(markers.mid)))
lnc_markers_mid <- grep(pattern = "^ncRNA", x= rownames(markers.mid), value = TRUE)
lnc_markers_mid
DotPlot(tiss1, lnc_markers_mid, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()


FeaturePlot(subtiss1,c(lnc_markers_mid),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)



markers.mid.MAST <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7),test.use = "MAST",only.pos = TRUE )

lnc_markers_mid_MAST_TABLE <- subset(markers.mid.MAST, grepl("^ncRNA", rownames(markers.mid.MAST)))
lnc_markers_mid_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.mid.MAST), value = TRUE)
lnc_markers_mid_MAST


#####3 periportalmarker genes############3

markers.pp <- FindMarkers(object = subtiss1, ident.1 = c(0,3),
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)


lnc_markers_pp <- subset(markers.pp, grepl("^ncRNA", rownames(markers.pp)))
lnc_markers_pp <- grep(pattern = "^ncRNA", x= rownames(markers.pp), value = TRUE)
lnc_markers_pp

markers.pp.MAST <- FindMarkers(object = subtiss1, ident.1 = c(0,3), ident.2 = c(2,5),test.use = "MAST",only.pos = TRUE )

lnc_markers_pp_MAST_TABLE <- subset(markers.pp.MAST, grepl("^ncRNA", rownames(markers.pp.MAST)))
lnc_markers_pp_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pp.MAST), value = TRUE)
lnc_markers_pp_MAST

FeaturePlot(subtiss1,c(lnc_markers_pp),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_pp,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()


################## amle and female specific ############################

markers.female <- FindMarkers(object = subtiss1, ident.1 = c(0,2,6,7),
                              only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1)

lnc_markers_female <- subset(markers.female, grepl("^ncRNA", rownames(markers.female)))
lnc_markers_female <- grep(pattern = "^ncRNA", x= rownames(markers.female), value = TRUE)
lnc_markers_female

FeaturePlot(subtiss1,c(lnc_markers_female),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_female,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()



markers.male <- FindMarkers(object = subtiss1, ident.1 = c(1,3,4,5),
                              only.pos = TRUE, min.pct = 0.001, thresh.use = 0.05)

lnc_markers_male <- subset(markers.male, grepl("^ncRNA", rownames(markers.male)))
lnc_markers_male <- grep(pattern = "^ncRNA", x= rownames(markers.male), value = TRUE)
lnc_markers_male

FeaturePlot(subtiss1,c(lnc_markers_male),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_male,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()


############################ Female zonate specific genes ###################################

markers.pericentral.female <- FindMarkers(object = tiss1, ident.1 = c(6,11,14,20), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,12,13,8,5,16), logfc.threshold = 1)

markers.periportal.female <- FindMarkers(object = tiss1, ident.1 = c(2,3,15),
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(6,11,14,20,12,13,8,5,16), logfc.threshold = 1)


markers.pericentral.male <- FindMarkers(object = tiss1, ident.1 = c(13,12), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,8,5,16,6,11,14,20), logfc.threshold = 1)


markers.periportal.male <- FindMarkers(object = tiss1, ident.1 = c(8,5,16), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,13,12,6,11,14,20), logfc.threshold = 1)




############### xeno-lncs CAR?RXR ##################

FeaturePlot(tiss1,c("ncRNA_inter_chr15_12684","ncRNA_inter_chr8_7430","ncRNA_inter_chr7_6222"),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)






#######################################################################

markers.endo.2 <- FindMarkers(object = seurat_drop, logfc.threshold = 2,ident.1 = "Endothelial", 
                              only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

lnc.endo.2 <- grep(pattern = "^ncRNA", x= rownames(markers.endo.2), value = TRUE)
lnc.endo.2

Zonated lncRNAs

pp_zontaed <- c('ncRNA_inter_chr14_12016','ncRNA_as_chr19_15090','ncRNA_inter_chr10_9351','ncRNA_inter_chr16_13170',
'ncRNA_inter_chr3_2697','ncRNA_inter_chr1_274','ncRNA_as_chr6_5518','ncRNA_inter_chr14_12066','ncRNA_intra_chr12_10871',
'ncRNA_inter_chr16_13510','ncRNA_inter_chr3_2314','ncRNA_inter_chr10_9264,'ncRNA_inter_chr9_8122')

Checking for batch effects

Color by metadata, like plate barcode, to check for batch effects.

TSNEPlot(object = subtiss1, do.return = TRUE, group.by = "mouse.sex")

Final coloring

Color by cell ontology class on the original TSNE.

TSNEPlot(object = tiss1, do.return = TRUE, group.by = "cell_ontology_class")

Save the Robject for later

filename = here('00_data_ingest', '04_tiss1ue_robj_generated', 
                     paste0("droplet_", tiss1ue_of_interest, "refinedcells_seurat_tiss1.Robj"))
print(filename)
save(tiss1, file=filename)
# To reload a saved object
filename = here('00_data_ingest', '04_tiss1ue_robj_generated',
                      paste0("droplet_", tissue_of_interest, "seurat_smartdrop-integrated-8272019.Robj"))
load(file=filename)

Export the final metadata

save_annotation_csv(tiss1, tiss1ue_of_interest, "droplet")
---
 title: "Liver Droplet- Raw data Notebook"
 output: html_notebook
---




```{r}
tissue_of_interest = "Liver"
library(here)
source("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/boilerplate.R")
#tiss = load_tissue_droplet(tissue_of_interest)
#library(scater)
library(dplyr)
library(Seurat)
library(cowplot)
#library(MAST)
require(stringr)
require(reshape2)
require(ggplot2)
require(MASS)
library(tools)
require(data.table)
library(ggfortify)
library(tidyverse)
require(dplyr)
library(miscTools)
library(caret)
library(Rtsne)
library(ggrepel)
library(reticulate) # to import anndata use low version of gcc/5.50 in enviroenm sccloud.

```





```{r}
# demux results from demuxEM
# source : https://cumulus-doc.readthedocs.io/en/latest/hashing_cite_seq.html#load-demultiplexing-results-into-python-and-r
ad1 <- import("pytz'", convert = FALSE)

ad <- import("anndata", convert = FALSE)
#data <- ad$read_h5ad("output_name_demux.h5ad")
data <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_demux.h5ad")
adtdata <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_ADTs.h5ad")
scdata <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_demux.h5sc")

data <- ad$read_h5ad("/restricted/projectnb/waxmanlab/kkarri/software/Xist_ChrY/G172_M2-M4_Output_demux.h5ad")


#import the anndata predictions from python to R and then to a csv
rdata_obs <- py_to_r(data$obs)
write.table(as.data.frame(rdata_obs), "/restricted/projectnb/waxmanlab/kkarri/G172_demux/results/G172_demux_classification")

write.table(as.data.frame(rdata_obs), "/restricted/projectnb/waxmanlab/kkarri/software/Xist_ChrY/G172_M2-M4_Xist-chrY_demux_classification", sep="\t")

```

```{r}

#G172.umis <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/filtered_feature_bc_matrix")

G172.umis <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Gene/outs/filtered_feature_bc_matrix")


ncRNA <- grep(pattern = "^ncRNA", x = rownames(x = G172.umis), value = TRUE)
lncRNA <- Matrix::colSums(G172.umis[ncRNA, ]>0)

#G172.umis.raw <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/10X_TCPO_premRNA_Transcript/outs/raw_feature_bc_matrix/")

G172.htos <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/HASH_Citeseq/HASH_Results_filtered-10X-BC/umi_count", gene.column=1)

G172.Xist.ChrY <- read.csv("/restricted/projectnb/waxmanlab/kkarri/software/Xist_ChrY/G172_M2_M4_nonzero.csv", header = T, row.names = "X")


G172.htos <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/HASH_Citeseq/HASH_Results_raw-10X-BC/umi_count", gene.column=1)


G172.demux.htos <- ReadH5AD("/restricted/projectnb/waxmanlab/kkarri/G172_demux/G172_demuxEM_citeUMI_ADTs.h5ad")
#G172top <- read.table("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/G172_Top_M1.txt", header = T, row.names = "cellbarcode") ### this is only for M1 top
G172.hash.metadata <- read.table("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/Top_Bottom_Metadata.txt",header = T, sep = "\t", row.names = "cellbarcode")

G172.hash.metadata$cellbarcode <- rownames(G172.hash.metadata)

G172top <- G172.hash.metadata[grep("Top",G172.hash.metadata$Top.Bottom),]
G172.top.M1 <- G172top[grep("M1",G172top$hash.ID),]
G172.top.M2 <- G172top[grep("M2",G172top$hash.ID),]
#G172bottom <- read.table("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/G172_Bottom_M1.txt", header = T, row.names = "cellbarcode") ## this is ont for M1 bottom
G172bottom <- G172.hash.metadata[grep("Bottom",G172.hash.metadata$Top.Bottom),]
G172.bottom.M1 <- G172bottom[grep("M1",G172bottom$hash.ID),]
G172.bottom.M2 <- G172bottom[grep("M2",G172bottom$hash.ID),]

############################ All pre-labelled samples from demuxEM and HTodemux (Seurat) ###################
M1_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_both_2656.txt", row.names = "ID")
M2_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_both_2868.txt", row.names = "ID")
M3_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_both_2376.txt", row.names = "ID")
M4_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_both_2679.txt", row.names = "ID")
M1_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_demuxEM_1149.txt", row.names = "ID")
M2_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_demuxEM_3880.txt", row.names = "ID")
M3_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_demuxEM_1276.txt", row.names = "ID")
M4_demuxEM <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_demuxEM_1076.txt", row.names = "ID")
M1_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_HTOdemux_827.txt", row.names = "ID")
M2_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_HTOdemux_125.txt", row.names = "ID")
M3_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_HTOdemux_827.txt", row.names = "ID")
M4_HTOdemux <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_HTOdemux_889.txt", row.names = "ID")

M1_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_clean_both_782.txt", row.names = "ID")

M2_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_clean_both_950.txt", row.names = "ID")

M3_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_clean_both_852.txt", row.names = "ID")

M4_clean_both <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_clean_both_965.txt", row.names = "ID")



M1_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M1_both_top80.txt", row.names = "ID")

M2_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M2_both_top80.txt", row.names = "ID")

M3_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M3_both_top80.txt", row.names = "ID")

M4_both_top80 <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/demuxEM_HTodemux_analysis/M4_both_top80.txt", row.names = "ID")

#############################################################################

  
# Select cell barcodes detected by both RNA and HTO In the example datasets we have already
# filtered the cells for you, but perform this step for clarity.
joint.bcs <- intersect(colnames(G172.umis), colnames(G172.htos))
joint.bcs.top <- intersect(colnames(G172.umis), rownames(G172top))
joint.bcs.bottom <- intersect(colnames(G172.umis), rownames(G172bottom))
joint.demux <- intersect(colnames(G172.umis), colnames(G172.))
joint.xist.chrY <- intersect(colnames(G172.umis), colnames(G172.Xist.ChrY))

# Subset RNA and HTO counts by joint cell barcodes
joint_cite1_cite2 <- intersect(colnames(G172.htos), colnames(df2))
G172.umis <- G172.umis[, joint.bcs]
G172.umis <- G172.umis[, joint.xist.chrY]

G172.htos <- as.matrix(G172.htos[, joint.bcs])
G172.htos <- G172.htos[-5,]
G172.Xist.ChrY <- as.matrix(G172.Xist.ChrY[, joint.xist.chrY])


################################ joint barcodes for preloaded barcodes ###################
joint.bcs.M1.both <- intersect(colnames(G172.umis), rownames(M1_both))
joint.bcs.M2.both <- intersect(colnames(G172.umis), rownames(M2_both))
joint.bcs.M3.both <- intersect(colnames(G172.umis), rownames(M3_both))
joint.bcs.M4.both <- intersect(colnames(G172.umis), rownames(M4_both))
joint.bcs <- c(joint.bcs.M1.both, joint.bcs.M2.both,joint.bcs.M3.both,joint.bcs.M4.both)

joint.bcs.M1.demuxEM <- intersect(colnames(G172.umis), rownames(M1_demuxEM))
joint.bcs.M2.demuxEM <- intersect(colnames(G172.umis), rownames(M2_demuxEM))
joint.bcs.M3.demuxEM <- intersect(colnames(G172.umis), rownames(M3_demuxEM))
joint.bcs.M4.demuxEM <- intersect(colnames(G172.umis), rownames(M4_demuxEM))

joint.bcs.M1.HTOdemux <- intersect(colnames(G172.umis), rownames(M1_HTOdemux))
joint.bcs.M2.HTOdemux <- intersect(colnames(G172.umis), rownames(M2_HTOdemux))
joint.bcs.M3.HTOdemux <- intersect(colnames(G172.umis), rownames(M3_HTOdemux))
joint.bcs.M4.HTOdemux <- intersect(colnames(G172.umis), rownames(M4_HTOdemux))


joint.bcs.M1.top <- intersect(colnames(G172.umis), rownames(G172.top.M1))
joint.bcs.M2.top <- intersect(colnames(G172.umis), rownames(G172.top.M2))
joint.bcs.M1.bottom <- intersect(colnames(G172.umis), rownames(G172.bottom.M1))
joint.bcs.M2.bottom <- intersect(colnames(G172.umis), rownames(G172.bottom.M2))


joint.bcs.M1.clean.both <- intersect(colnames(G172.umis), rownames(M1_clean_both))
joint.bcs.M2.clean.both <- intersect(colnames(G172.umis), rownames(M2_clean_both))
joint.bcs.M3.clean.both <- intersect(colnames(G172.umis), rownames(M3_clean_both))
joint.bcs.M4.clean.both <- intersect(colnames(G172.umis), rownames(M4_clean_both))



joint.bcs.M1.both.top80 <- intersect(colnames(G172.umis), rownames(M1_both_top80))
joint.bcs.M2.both.top80 <- intersect(colnames(G172.umis), rownames(M2_both_top80))
joint.bcs.M3.both.top80 <- intersect(colnames(G172.umis), rownames(M3_both_top80))
joint.bcs.M4.both.top80 <- intersect(colnames(G172.umis), rownames(M4_both_top80))

  #colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata$channel[1],'_',x))
  
# Confirm that the HTO have the correct names
rownames(G172.htos)
#################### Before making a Seurat object remove sex-biased genes or TCPO-induced genes####################
```

```{r}
# Remove ERCC from count.data
#All.sex.index <- grep(c("Alb|Cyp2b10"), x = rownames(count.data), value = FALSE) 
x1 <- grep(c("1810046K07Rik|2700097O09Rik|A1bg|Abcb1a|Abcc1|Abcc4|Abcd2|Abhd2|Acnat2|Acot2|Acot3|Acot4|Acot6|Acss2|Acss3|Actg1|Adcy1|Adrb2|Aif1|Akna|Akr1b7|Akr1c18|Akr1c20|Akr1d1|Aldh1b1|Aldh3a2|Amigo2|Ankrd55|Anxa6|Ar|Arhgap30|Arhgef19|Arhgef37|Arl13b|Arrdc4|Asb2|Atf3|Atp2b2|Atp6v0d2|Atp8a1|B3galnt1|B430306N03Rik|Batf2|Bche|Bin2|Bmper|C5ar1|Cables1|Camk1d|Camkk2|Cbr1|Ccbe1|Ccdc141|Ccdc68|Cchcr1|Ccl2|Ccl5|Ccnd1|Cd14|Cd163|Cd274|Cd300e|Cd36|Cd3e|Cd44|Cd52|Cd53|Cd72|Cd74|Cd83|Cdcp1|Cdkn1c|Cdx4|Cdyl2|Cenpm|Cep152|Cers6|Ces1b|Ces1c|Cfp|Chic1|Chrna4|Clec12a|Clic3|Cmklr1|Cmpk2|Cnst|Corin|Crls1|Csf1|Ctbp2|Cth|Ctsc|Ctss|Cux2|Cxcl10|Cxcl13|Cxcl9|Cyba|Cybb|Cyp17a1|Cyp2a22|Cyp2a4|Cyp2a5|Cyp2b10|Cyp2b13|Cyp2b9|Cyp2c37|Cyp2c38|Cyp2c39|Cyp2c40|Cyp2c50|Cyp2c54|Cyp2c69|Cyp2g1|Cyp39a1|Cyp3a16|Cyp3a41a|Cyp3a41b|Cyp3a44|Cyp46a1|Cyp4a10|Cyp4a14|Cyp4a31|Cyp4f16|Cyp7a1|D130043K22Rik|Dapk1|Dct|Ddah1|Dhrs7|Dlg4|Dll1|Dntt|Dock10|Dock7|Dqx1|Dram1|Dusp6|Echdc3|Eci3|Eif2s3x|Eif4e3|Elf4|Emp2|Enpp1|Esr1|Esrrg|Evc|Evc2|Evi2a|Fabp7|Fam126a|Fam49a|Fam84b|Fam89a|Fancc|Far1|Fbxl21|Fcer1g|Fgd2|Fibin|Fign|Fmn1|Fmo1|Fmo2|Fmo3|Fmo4|Folr2|Fpr1|Fpr2|Fut8|Fyb|G6pdx|Gadd45b|Gas1|Gas6|Gbp10|Gbp2|Gbp3|Gbp4|Gbp5|Gbp6|Gbp7|Gbp8|Gimap8|Gm11437|Gm4841|Golt1a|Got1|Gpr65|Gpsm1|Gpx7|Grid1|Gsto2|Gstt1|Gstt3|Gypc|Gzmb|H2-Aa|H2-Ab1|H2-DMb1|H2-Eb1|H2-Q1|H2-Q4|H2-T24|Hacl1|Hamp|Hamp2|Hao2|Hbegf|Hck|Hcls1|Hexb|Hist1h4i|Hk3|Hpd|Hsd3b1|Htra4|Id1|Ifi202b|Ifi204|Ifi205|Ifi27l2b|Ifit1|Ifit2|Ifit3|Igfbp1|Igfbp3|Igsf6|Igtp|Il10ra|Il15|Il18bp|Il1b|Il2rg|Il7|Ildr2|Irgm1|Isg15|Itga4|Itga6|Itgal|Itgb2|Jazf1|Kcnj10|Kitl|Klf6|Klhl13|Klra2|Lacc1|Lck|Lect2|Lgals1|Lgr5|Lhpp|Lig1|Lrrc24|Lrrc25|Lrtm1|Lyz2|Maob|Marcksl1|Marco|Mat1a|Mbl2|Mdm1|Me2|Mest|Mfsd2a|Mgst3|Mkx|Mllt3|Mmd2|Mme|Mmrn2|Mpc1|Mroh6|Ms4a4d|Ms4a6c|Myo1f|N4bp2l1|Ncald|Ncf1|Ncf2|Ncf4|Ncmap|Ndrg1|Nfkb2|Ngfr|Nipal1|Nlrp3|Nnmt|Npas2|Nqo1|Nr4a1|Nrp2|Nt5e|Ntrk2|Oasl2|Oat|Orm3|P2ry4|Papss2|Parp11|Pbx1|Pced1b|Pcp4l1|Pcsk5|Pde1a|Pde4d|Pdk4|Pgm5|Pik3r5|Pira2|Pla1a|Pla2g4b|Plek|Plgrkt|Plscr2|Pnpla3|Pola1|Ppl|Prex1|Prlr|Prom1|Prom2|Psd|Psmb8|Psmb9|Ptgds|Ptpdc1|Ptprc|Pygo1|Rab27a|Rab30|Rac2|Raet1e|Rbl1|Rcan2|Rcan3|Rdh16|Ren1|Rfx4|Rgn|Rgs12|Rnd1|Rsph4a|Rtn4|S100a13|Sall1|Samd9l|Sbk1|Sdc1|Sept1|Serpina3g|Serpina6|Serpinb1a|Serpinb8|Serpine3|Setd4|Sgsm1|Sgsm2|Sh2d4a|Sh3bgrl3|Sh3yl1|Shroom1|Ski|Slc13a3|Slc15a3|Slc16a5|Slc16a7|Slc17a4|Slc22a26|Slc22a27|Slc22a29|Slc25a21|Slc25a24|Slc25a27|Slc25a34|Slc26a10|Slc34a2|Slc39a5|Slc47a1|Slc4a4|Slc6a12|Slco1a4|Slfn8|Smim24|Smtnl2|Snai2|Snx12|Sorl1|Sox7|Sparcl1|Spic|Spice1|Spred1|Src|Srgap3|Srl|Srrm4|St6galnac4|Stambpl1|Stat1|Stk39|Sult1a1|Sult1c2|Sult1d1|Sult1e1|Sult2a1|Sult2a2|Sult2a3|Sult2a5|Sult2a6|Sult3a1|Sybu|Syk|Sytl5|Tbc1d8|Tcf7|Tcn2|Tead1|Tenm3|Tgm1|Tgtp1|Tgtp2|Thbs2|Thy1|Timd4|Tlr13|Tm4sf4|Tm6sf2|Tmem173|Tmem200b|Tmem26|Tmem98|Tmsb4x|Tnfaip8|Tnfaip8l3|Tnfsf10|Tnik|Tox|Treml4|Trim13|Trim24|Trim30a|Trim80|Tspyl4|Ttc21b|Txndc16|Tyrobp|Uba7|Ugt1a1|Ugt1a5|Ugt2b37|Utp14b|Vcam1|Vldlr|Wfdc15b|Wfdc17|Wfdc2|Zap70|Zbp1|Zc2hc1a|Zc3h12d|Zfp292|Zfp36l1|Zfp429|Zfp820|Zfp947|Zfp960|1810055G02Rik|2010003K11Rik|2200002D01Rik|3110082I17Rik|9130409I23Rik|A2m|Aadat|Abcb10|Abcg2|Abhd17b|Abhd5|Acsf2|Acsm2|Adamtsl2|Adarb2|Adora1|Adrb3|Ak4|Alas2|Aldoc|Alpl|Aox1|Aox3|Apoa1|Apoc2|Apol10a|Aqp4|Arhgap44|Arl6ip1|Arsa|Arsg|Asns|Atg16l2|Atp8b4|B3galt1|B4galnt3|Bcl6|Bik|Bmp4|Bmyc|Bok|C6|C7|C8a|C8b|C9|Cad|Caln1|Camk2b|Capn8|Cblc|Ccdc171|Ccne1|Ccnf|Ces1e|Ces2b|Ces2c|Ces3a|Ces3b|Ces4a|Chac1|Chpt1|Chrna2|Ciart|Cib3|Cidec|Clec2h|Clstn3|Col27a1|Col5a3|Comt|Cpne2|Cpne8|Cpsf4l|Crlf2|Crybb3|Csad|Cspg5|Ctps|Cxcl14|Cyp21a1|Cyp2c67|Cyp2d40|Cyp2d9|Cyp2f2|Cyp2j9|Cyp2u1|Cyp3a11|Cyp4a12a|Cyp4a12b|Cyp4a32|Cyp4v3|Cyp7b1|Cyp8b1|Cys1|Dbp|Ddx3y|Derl3|Dirc2|Dnaic1|Dnajc12|Dpy19l1|Dpy19l3|Dsg1c|Efna3|Egfr|Eif2s3y|Eif4ebp3|Elovl3|Enpp2|Enpp3|Ephx1|Ephx2|Eps8l2|Erc2|Ero1lb|F2r|F830016B08Rik"),x = rownames(G172.umis), value = FALSE)
x2 <- grep(c("Fabp5|Fam171b|Fam222a|Fam47e|Fbxo21|Fgf21|Fitm1|Fkbp11|Fmn2|Fst|Gcnt4|Gde1|Gdpd1|Gjc3|Glo1|Gm7694|Gna12|Gna14|Gpc1|Gpr12|Grem2|Grhpr|Grm8|Gse1|Gsta2|Gstm3|Gstp1|Gstp2|Gtf2b|Gys2|H1fx|H6pd|Hao1|Haus4|Hba-a1|Hba-a2|Hbb-bs|Hc|Hes6|Hp|Hpx|Hsbp1l1|Hsd3b2|Hsd3b5|Hspa1b|Hspb1|Hspb6|Hunk|Igsf23|Ihh|Ikbkg|Il12rb1|Il1r1|Impa2|Insc|Isyna1|Kctd15|Kdm5d|Keg1|Klhdc7a|Lama3|Lcn2|Ldhd|Lhx6|Lpl|Lrg1|Lrrc20|Magee1|Maml3|Mas1|Mcm10|Mmp15|Mn1|Mrgpre|Mri1|Mrvi1|Mthfd1l|Mthfd2|Mtmr7|Mtnr1a|Mug1|Mup1|Mup10|Mup11|Mup12|Mup13|Mup14|Mup15|Mup16|Mup17|Mup19|Mup20|Mup21|Mup3|Mup5|Mup6|Mup7|Mup9|Mycn|Myef2|Nat8|Nek2|Nhej1|Nlrp12|Nox4|Nudt7|Nuggc|Obp2a|Ociad2|Olfm3|Olfr613|Omd|Orm1|Orm2|Osgin1|Paqr7|Pard3b|Pdilt|Per2|Pfkfb3|Pgp|Pitx3|Pkdrej|Plekha1|Pnlip|Podn|Ppp1r1b|Proca1|Prss8|Prtn3|Rab34|Rarres1|Rassf3|Rbbp4|Retsat|Rnase13|Rnaseh2a|Rnaseh2b|Saa1|Saa2|Saa3|Scara5|Scp2|Sdr16c5|Sebox|Sel1l3|Selenbp2|Serinc2|Serpina11|Serpina12|Serpina1a|Serpina1c|Serpina1d|Serpina3a|Serpina3k|Serpina5|Serpina7|Serpina9|Serpine2|Slc12a4|Slc13a5|Slc15a5|Slc17a8|Slc22a28|Slc22a7|Slc25a33|Slc35e3|Slc41a2|Slc6a8|Slco1a1|Smpd3|Snhg11|Snx29|Sort1|Sox12|Spatc1l|Spon2|Spp1|Srd5a1|Stbd1|Stk19|Susd4|Sycp3|Tcaim|Tdo2|Tff3|Tmc7|Tmem41a|Tnfrsf12a|Tpmt|Tram2|Treh|Trip4|Tsku|Tspan33|Tspan4|Ttc39a|Ttc39c|Uba5|Ugdh|Ugt1a6b|Ugt2b1|Ugt2b35|Ugt2b38|Ugt2b5|Unc119|Unc13b|Unc5b|Usp2|Uty|Vmo1|Wee1|Wipi1|Wnk4|Xlr3a|Zbtb7c|Zdhhc2|Zfp445|Zfp687|Zfp809|Zfp872|Gm47283"),x = rownames(G172.umis), value = FALSE)
x3 <- grep(c("ncRNA_as_chr10_9385|ncRNA_as_chr11_10058|ncRNA_as_chr11_10170|ncRNA_as_chr14_11846|ncRNA_as_chr15_12679|ncRNA_as_chr15_12957|ncRNA_as_chr16_13512|ncRNA_as_chr17_13628|ncRNA_as_chr1_757|ncRNA_as_chr2_1125|ncRNA_as_chr2_1343|ncRNA_as_chr4_3230|ncRNA_as_chr4_3843|ncRNA_as_chr5_4731|ncRNA_as_chr5_4745|ncRNA_as_chr7_6007|ncRNA_as_chr8_7290|ncRNA_as_chr8_7332|ncRNA_as_chr9_7824|ncRNA_as_chrx_15546|ncRNA_inter_chr10_9183|ncRNA_inter_chr10_9222|ncRNA_inter_chr10_9223|ncRNA_inter_chr10_9224|ncRNA_inter_chr12_10681|ncRNA_inter_chr12_10922|ncRNA_inter_chr1_290|ncRNA_inter_chr13_11189|ncRNA_inter_chr13_11437|ncRNA_inter_chr13_11438|ncRNA_inter_chr1_341|ncRNA_inter_chr14_11978|ncRNA_inter_chr14_12237|ncRNA_inter_chr15_12585|ncRNA_inter_chr15_12683|ncRNA_inter_chr15_12684|ncRNA_inter_chr16_13062|ncRNA_inter_chr16_13225|ncRNA_inter_chr16_13428|ncRNA_inter_chr16_13510|ncRNA_inter_chr18_14392|ncRNA_inter_chr18_14432|ncRNA_inter_chr18_14590|ncRNA_inter_chr19_14770|ncRNA_inter_chr19_14802|ncRNA_inter_chr1_977|ncRNA_inter_chr2_1422|ncRNA_inter_chr2_1424|ncRNA_inter_chr2_1430|ncRNA_inter_chr2_1432|ncRNA_inter_chr2_2012|ncRNA_inter_chr2_2016|ncRNA_inter_chr2_2017|ncRNA_inter_chr2_2085|ncRNA_inter_chr3_2743|ncRNA_inter_chr3_2915|ncRNA_inter_chr3_2937|ncRNA_inter_chr3_3002|ncRNA_inter_chr4_3141|ncRNA_inter_chr4_3177|ncRNA_inter_chr4_3344|ncRNA_inter_chr5_4115|ncRNA_inter_chr5_4116|ncRNA_inter_chr5_4365|ncRNA_inter_chr5_4527|ncRNA_inter_chr5_4763|ncRNA_inter_chr5_4764|ncRNA_inter_chr6_5138|ncRNA_inter_chr6_5253|ncRNA_inter_chr6_5691|ncRNA_inter_chr7_6113|ncRNA_inter_chr8_6766|ncRNA_inter_chr8_6946|ncRNA_inter_chr8_7105|ncRNA_inter_chr8_7334|ncRNA_inter_chr8_7684|ncRNA_inter_chr9_7878|ncRNA_inter_chr9_8015|ncRNA_inter_chr9_8249"),x=rownames(G172.umis),value=FALSE)
x4 <- grep(c("ncRNA_inter_chrX_15376|ncRNA_inter_chrX_15394|ncRNA_inter_chrX_15549|ncRNA_intra_chr15_12674|ncRNA_intra_chr15_12675|ncRNA_intra_chr19_15008|ncRNA_intra_chr19_15009|ncRNA_intra_chr19_15014|ncRNA_intra_chr5_4726|ncRNA_intra_chr5_4728|ncRNA_intra_chr5_4730|ncRNA_intra_chr7_5920|ncRNA_intra_chr7_6001|ncRNA_intra_chr8_7308|ncRNA_intra_chr8_7310|ncRNA_as_chr10_9240|ncRNA_as_chr1_231|ncRNA_as_chr14_12074|ncRNA_as_chr15_12874|ncRNA_as_chr19_14711|ncRNA_as_chr19_15054|ncRNA_as_chr3_2192|ncRNA_as_chr3_2800|ncRNA_as_chr4_3297|ncRNA_as_chr6_5601|ncRNA_as_chr6_5851|ncRNA_as_chr7_6302|ncRNA_as_chr8_7087|ncRNA_as_chr9_8142|ncRNA_as_chr9_8420|ncRNA_as_chrX_15318|ncRNA_inter_chr10_8466|ncRNA_inter_chr10_8767|ncRNA_inter_chr10_8829|ncRNA_inter_chr10_8999|ncRNA_inter_chr10_9123|ncRNA_inter_chr10_9138|ncRNA_inter_chr10_9245|ncRNA_inter_chr10_9313|ncRNA_inter_chr10_9349|ncRNA_inter_chr10_9351|ncRNA_inter_chr11_10078|ncRNA_inter_chr11_10159|ncRNA_inter_chr11_10162|ncRNA_inter_chr1_115|ncRNA_inter_chr11_9925|ncRNA_inter_chr12_10423|ncRNA_inter_chr12_10454|ncRNA_inter_chr12_10458|ncRNA_inter_chr12_10562|ncRNA_inter_chr12_10910|ncRNA_inter_chr1_234|ncRNA_inter_chr13_11222|ncRNA_inter_chr13_11553|ncRNA_inter_chr13_11670|ncRNA_inter_chr13_11782|ncRNA_inter_chr1_406|ncRNA_inter_chr14_11858|ncRNA_inter_chr14_11989|ncRNA_inter_chr14_12201|ncRNA_inter_chr1_420|ncRNA_inter_chr1_477|ncRNA_inter_chr15_12341|ncRNA_inter_chr15_12609|ncRNA_inter_chr15_12937|ncRNA_inter_chr16_13170|ncRNA_inter_chr16_13173|ncRNA_inter_chr16_13176|ncRNA_inter_chr16_13177|ncRNA_inter_chr16_13349|ncRNA_inter_chr16_13451|ncRNA_inter_chr16_13470|ncRNA_inter_chr16_13471|ncRNA_inter_chr1_630|ncRNA_inter_chr17_13817|ncRNA_inter_chr17_13827|ncRNA_inter_chr17_13938|ncRNA_inter_chr17_13940|ncRNA_inter_chr17_13983"),x=rownames(G172.umis),value=FALSE)
x5 <- grep(c("ncRNA_inter_chr17_13986|ncRNA_inter_chr17_14028|ncRNA_inter_chr17_14151|ncRNA_inter_chr1_734|ncRNA_inter_chr18_14223|ncRNA_inter_chr19_14853|ncRNA_inter_chr19_14873|ncRNA_inter_chr1_931|ncRNA_inter_chr2_1156|ncRNA_inter_chr2_1157|ncRNA_inter_chr2_1232|ncRNA_inter_chr2_1233|ncRNA_inter_chr2_1462|ncRNA_inter_chr2_1471|ncRNA_inter_chr2_1479|ncRNA_inter_chr2_1502|ncRNA_inter_chr2_1689|ncRNA_inter_chr2_2002|ncRNA_inter_chr2_2037|ncRNA_inter_chr3_2314|ncRNA_inter_chr3_2504|ncRNA_inter_chr3_2786|ncRNA_inter_chr3_2935|ncRNA_inter_chr4_3079|ncRNA_inter_chr4_3468|ncRNA_inter_chr4_3723|ncRNA_inter_chr5_4681|ncRNA_inter_chr5_4787|ncRNA_inter_chr5_4819|ncRNA_inter_chr6_5023|ncRNA_inter_chr6_5248|ncRNA_inter_chr6_5309|ncRNA_inter_chr6_5316|ncRNA_inter_chr6_5551|ncRNA_inter_chr6_5595|ncRNA_inter_chr7_5922|ncRNA_inter_chr7_6087|ncRNA_inter_chr7_6220|ncRNA_inter_chr7_6222|ncRNA_inter_chr7_6509|ncRNA_inter_chr8_6739|ncRNA_inter_chr8_6741|ncRNA_inter_chr8_6876|ncRNA_inter_chr8_6971|ncRNA_inter_chr8_7280|ncRNA_inter_chr8_7420|ncRNA_inter_chr8_7423|ncRNA_inter_chr8_7430|ncRNA_inter_chr9_7753|ncRNA_inter_chr9_8099|ncRNA_inter_chr9_8122|ncRNA_inter_chr9_8383|ncRNA_intra_chr12_10856|ncRNA_intra_chr12_10872|ncRNA_intra_chr15_12911|ncRNA_intra_chr18_14533|ncRNA_intra_chr19_14773|ncRNA_inter_chrX_15394"),x = rownames(G172.umis), value = FALSE)


TCPO1 <- grep(c("Corin|Ppp1r3c|Xlr3a|Xlr3b|Cyp2a5|Ikbke|Gdf15|Anln|Rarb|Cdca2|Gna14|Kif20b|Tpx2|Cenpe|Arhgef39|Kif2c|Cxcl1|Mmd2|Ephb6|Wipf3|Gprc5d|Ttyh1|Atp4a|Plekha4|Mrgprb3|Bcl7c|Prr36|Fgl1|Slc18a1|Fcho1|Gm10638|St3gal4|Qsox1|Armc2|Dnajc12|Avpr1a|Kif5a|Camk2b|Olfr1393|Sgsm2|Capn8|Gpx6|Dcdc2a|Cxcl14|Lrit2|Sftpa1|Slc39a2|Tspyl5|Cpne8|Npff|Gp5|Prr18|Caskin1|Fam129b|Zfp811|Abcg5|Abcg8|Zbtb7c|Rom1|Cnnm1|Timp1|Xlr3c|Slc1a2|Spag4|Gm826|Rbpjl|Satb2|Gm6525|Ankrd6|Tmem169|Ttll10|Crot|Zfyve28|Ncapg|Ccdc18|Trpm8|Ugt1a5|Rph3a|Fzd10|Akr1d1|Zfp398|Gprin3|Mad2l1|Bmp10|Kbtbd12|Fancd2|Cdca3|Rad51ap1|Foxm1|Ptpn4|Bhlhe41|Lig1|Cyp2g1|Nphs1|Ccne1|Ticrr|Gsta3|Fzd4|Cdk18|Plk1|Kif22|Chid1|Shcbp1|Kif14|Cenpu|Neil3|Aspm|Ankle1|Pdp2|Gins2|Sipa1l2|Mmp12|Sesn3|Fam129a|Ccdc15|Cbl|H2afx|Apoa4|Cyp1a1|Kif23|Zwilch|Pif1|Traip|Fam198a|Kif15|Adgb|Sult3a1|Cdk1|Parpbp|Gas2l3|Kitl|Syt1|Nuf2|Cobl|Uhmk1|Fbxo48|Spdl1|Hmmr|Olfr16|Aurkb|Spag5|Exo1|Ska2|Lrrc46|Brca1|BC030867|Kif18b|Birc5|Rrm2|Rdh11|Plekhh1|Serpina6|Ncapg2|Adarb2|Zfp184|Hist1h2bk|Hist1h2be|Ppp1r3g|Atf3|Zfp369|Cenph|Meig1|Wnt5a|Cdkn3|Dlgap5|Tnfrsf19|Pbk|Klf12|Lmbrd2|Nebl|Recql4|Troap|Sapcd2|Cdc45|Polq|C330027C09Rik|Epha3|Ncam2|Cyyr1|Zfp97|Pkmyt1|Chtf18|Neurl1B|Kifc5b|Kifc1|Emilin2|Ndc80|Strn|Fzd8|Mib1|Stard4|Cdc25c|Kif20a|Ldlrad4|Mc5r|Ska1|Cfap53|Cyb5a|Gstp2|Incenp|Fen1|Ptar1|Kif11|Cep55|Cyp2c66|Lcor|4933411K16Rik|Slc9a7|Kif4|Fancb|Kif18a|Arhgap11a|Bub1b|Ncaph|Bub1|Wfdc16|Ube2c|Aurka|Gm14440|Ccna2|Plk4|Trim59|Tlr2|Iqgap3|Them5|Hist2h2bb|Nbeal1|Psrc1|Pard3b|Depdc1a|Mms22l|Melk|Smc2|Ptgr1|Mup19|Mup5|Mup21|Mup11|Mup15|Acot11|Orc1|Cdkn2c|Stil|Rad54l|Ptch2|Cdca8|Clspn|Aunip"),x = rownames(G172.umis), value = FALSE)
TCPO2 <- grep(c("Megf6|Vwa1|Cgref1|Sult5a1|Dio1|Ren1|Slc15a2|A1bg|Mup17|Trpv1|Klhl25|Cbr1|Gpcpd1|A2m|Cadm4|Ppp1r14a|Saa3|Rad51b|Fos|Dsg1c|Tchh|Crym|Gmnn|Bik|Trib3|Nnmt|E2f2|Sult1d1|Por|Mad1l1|Cyp3a11|Gadd45a|Cyp2b10|Tsku|Ces2a|4931406C07Rik|Alas1|Gadd45b|9130409I23Rik|Pnpla3|Dtx4|Cyp2c55|Cyp2c29|Rarres1|Gstm3|Pla2g12a|Rdh16|Gadd45g|Ttll13|Rnf186|Tmub1|Fabp2|Sult1e1|Katna1|Cyp21a1|Cyp3a44|Cyp2c40|Cyp2c69|Rdh9|Dbp|Atp2b2|Cdc20|Slc34a2|Akr1b7|Taf1d|Trmt61a|Nedd9|Pfkfb3|Gm14295|Gm14403|Exosc8|D630039A03Rik|Mug2|Pkp3|Derl3|Ndufa4l2|Slc16a11|Sema5b|Bmf|Ell3|Dnaic1|Cyp4a14|Cyp4a10|Azin2|Mt2|Mt1|Fbxo31|Igfbp1|Slc13a5|Pole|Ckap2|Apol10a|Sult1c2|Tcp11l1|Hao2|Mup12|Slc10a2|Colgalt2|Psmc3ip|Ttll8|Ptges|Gstm2|Gstm1|Ctps|Chka|Gjb4|Cdkn1a|Mki67|Cenpf|Ccnb1|Espl1|Knstrn|Thrsp|Shroom1|Hspa2|Pusl1|Hmgn3|Btnl9|Barhl1|Ttbk1|Stk36|Ihh|Saa2|Sebox|Cyb561|Slc7a15|Adamtsl2|Rbm20|Map3k6|Sc5d|Mis18bp1|Mastl|Chaf1a|Slc22a26|Slco1a4|Myc|Mbd1|Orm3|Fgf21|Adrb3|Serpina3n|Arid5a|Apol10b|Mup16|Vps37b|Rhbdd2|Papss2|Cyp26a1|Prss22|E030018B13Rik|Socs2|Upp2|Lnx1|Cux2|Zbed5|Slc12a9|Nxpe5|Sun1|3110082I17Rik|Tmem184a|Cyp3a16|Gpr12|Pon1|Hes6|Akr1b8|Trim24|Atoh8|Pparg|Gucy2c|Gys2|Ldhb|Rep15|Zfp773|Grik5|Phldb3|Lgi4|Nupr1|Adora1|Oat|B4galnt4|Lrrc56|Slc25a22|Cdkn1c|Ano1|Eif4ebp1|Rfxank|Pbx4|Slc27a1|Klf1|Hook2|Spata2L|Agt|Fam89a|Ccdc151|Oaf|1810046K07Rik|Pcbp4|Gnat1|Slc38a3|Mst1r|Col7a1|Plxnb1|Tmie|Acaa1a|Acvr2b|Cyp8b1|Tpd52l1|Fmo3|Col18a1|Tle2|Mterf2|Mgst3|Lrp1|B4galnt1|Itga7|Gls2|Tbc1d10a|Srebf1|Trp53i13|Rara|Arl4d|Rundc3a|Slc16a5|Llgl2|Tmc6|Plekhg3|Acot3|Rps6kl1|Eml1|D130043K22Rik|Foxq1|Fbp1|Ube2ql1|Slc12a7|Ccno|Mcm10|Il17rb|Rpgrip1|Rabggta|Tgm1|Irf9|Gulo|Sorbs3|Dct|Kifc2|Pdxp|Pmm1|Cenpm|Arhgap8|Celsr1|Chkb|Rnd1|Lmbr1l|Csad|Smim22|Dexi|Prodh|Clcn2|Agpat2|Sardh|Crat|Gm9992|Slc22a3|Unc93a|Ass1|Pim1|Slc25a25|St6galnac6|Cyp4f16|Gnmt|Plin5|Egr1|Gnpda1|Ppargc1b|Arhgef37|Pcyox1l|Bad|Macrod1|Slc22a29|Rab3il1|Trpm6|Got1|Col17a1|Gsto1|Plekhs1|Slc7a3|Mum1l1|Pdk1|Pla2g4f|Prom2|Acss2|Neurl2|Slc2a2|Thbs3"),x = rownames(G172.umis), value = FALSE)
TCPO3 <- grep(c("Il6ra|Selenbp1|Anxa9|Ctsk|Alpk1|Cth|Col27a1|Ccdc30|Hpca|Ptpru|Tmem200b|Cnksr1|Paqr7|Ifnlr1|Alpl|Rap1gap|Klhdc7a|Ugt2b35|Mob1b|Dr1|Ugt1a9|Hsph1|C1galt1|Grm8|Zfp212|Kbtbd8|B4galnt3|Slc15a5|Pla2g4c|Zfp619|Gas2|Cd55|Yod1|Prss23|Acer3|Ipo7|Mrvi1|Dkk3|Mical2|Psma1|Pde3b|Pdilt|Zfp768|Tnks|Ppp1r3b|Zfp617|Calr|Edem3|Swsap1|Zfp872|Hspa8|Tagln|Tex12|Arpp19|Pgm3|Spsb4|Poc1a|Manf|Prkar2a|Epm2aip1|Higd1a|Abhd5|Ginm1|Cacybp|Ddx21|Timp3|Creg1|Arl1|Dusp6|Wif1|Prim1|Irgm1|Hnrnpab|Bcl6b|Psmd11|Pigw|Rad51c|Nme1|Gm11541|Lsm12|Efcab3|Kpna2|Desi2|Zfp750|Rsad2|Dnajb9|Pygl|Pigh|Adam4|Tmed8|Alkbh1|Tc2n|Hsp90aa1|Tnfaip2|Nid1|Inhba|Rnf144b|Lysmd3|F2r|Itga1|Spryd7|Slc25a30|Lifr|Atad2|Ndrg1|St3gal1|Cyp2d9|Cyp2d12|Klhdc7b|Ankrd39|Fgd4|Sdf2l1|Tubb4b|Alg3|Map3k13|Lrrc58|Naa50|Dzip3|Rab44|Glo1|Ppp1r10|Enpp4|Catsperd|Nudt12|Srd5a2|Yipf4|Zbtb26|Psma8|Scai|Rnf125|Tmed7|Slc22a28|Ms4a4d|Mamdc2|Pik3ap1|Cox15|Xk|Pgrmc1|Zbtb33|Klhl15|Yipf6|Cdx4|Chic1|Npas2|Reps2|Mospd2|Cwc22|Sppl2a|Atrn|Pcna|4921509C19Rik|Rbl1|Zgpat|Il7|Supt20|Tcf24|Serp1|P2ry12|Ssr3|Aox1|S100a10|Nop58|Gstm4|5330417C22Rik|Prdm14|Xkr9|Rnpc3|Tmem56|Alg14|Fubp1|Fzd5|Ndufaf4|Mup3|Mup1|Mup10|Dph2|Akirin1|AW011738|Keg1|Per3|Insc|Nrg4|Ptp4a1|Abcc3|1810032O08Rik|Slc46a3|Rnf152|Steap3|Bbc3|Cyp2a4|Cyp2a22|Sh2d4a|Gamt|Tbc1d30|1810010H24Rik|Ccdc57|Syne3|Zfp395|Apol7a|Tef|Igfals|Slc22a7|Hsd3b3|Slc44a3|Uox|Cyp7a1|Enho|Tcea3|Arhgef19|Fam131c|Slc2a5|Lrg1|Cxcl10|Abhd2|Mycn|Nrep|P2ry4|Padi4|Insig2|Prg4|Leap2|Crp|Mal2|Rapgef4|Fam110a|Jun|8430408G22Rik|Lad1|Ces3a|Arrdc3|G0s2|Hsd3b5|Hsd3b2|Cyp2u1|Mreg|Il22ra1|Cyp3a59|Rgs1|Ddit4|Lcn2|1810055G02Rik|Rgs12|Cc2d2a|Ppargc1a|Nipal1|Ugt2b5|Afp|Gbp10|Gbp6|Fgfrl1|Sgsm1|2610524H06Rik|Oasl1|Fbxo21|Tbx3|Oas1c|Stx2|Cyp3a41a|Per2|Irf5|Pask|Zfp467|Avl9|Nat8|Serpinb8|Rassf4|Usp18|Clstn3|Clec2h|Ceacam2|Cd79a|Cyp2b9|Sertad3|Cd22|Tulp2|Svip|Wee1|Eef2k|Aqp8|Cd19|Fcer2a|Cd209c|Htra4|Mtnr1a|Klkb1|Lrp2bp|Nr3c2|Ces1f|Mmp15|Ces2h|Ces3b|Slc9a5|Mvd|Tmem218|Nxpe2|Cd276|Gclc|Nt5e|6430571L13Rik|Acaa1b|Dlec1|Wisp3|Tube1|Chst3|Fmo4|Fmo1|Rab36|Gstt2|Lss|Izumo4|Tjp3|Lin7a|Inhbe|Nab2|Mettl7b|Slc39a5|Osbp2|Ddc|Snrnp25|Pcp4l1|Pttg1|D930048N14Rik|Pctp|Tob1|Nr1d1|Krt23|Abca8a|Cbx8|Cbx4|Cbx2|Fn3k|Psen2|Cys1|Cmpk2|Atxn7l1|Acot4|Zc2hc1c|Serpina9|Akr1c19|Dsp|Rd3|Tppp|Mblac2|Adamts6|Pde4d|Fst|4930452B06Rik|Lrtm1|Lrit1|Fdft1|Chrna2|C1ql3|Prlr|Apol9a|Apol9b|Fam227a|Arsa|Entpd2|Nit2|Hunk|Ccnf|Pde9a|Cyp4f14|Tcf19|Zbtb12"),x = rownames(G172.umis), value = FALSE)
TCPO4 <- grep(c("Dnph1|Aqp4|Dsg2|Lipg|Cacnb4|Hsbp1l1|Acy3|Snx32|Chrm1|Fads3|Kank1|Vldlr|Hhex|Pde6c|Cyp2c38|Mid1ip1|Fign|Sytl4|Bhlhb9|Kantr|Klhl41|Madd|Abtb2|Elf5|Pak6|Itpka|Wdr76|Sord|Nabp1|Rbm38|Slc17a9|Pgap1|Pfn2|Sucnr1|Bche|Aox3|Mab21l2|Hcn3|Fdps|Car14|Hist2h2be|Trim45|Gnat2|Adh4|Ddah1|Coro2a|Col15a1|Mup8|Mpdz|Igfbp5|2610528J11Rik|Ncmap|Gale|Arhgef16|Insig1|Sh3bp2|Stx18|Adamts3|Slc10a6|Mfsd7a|Crybb3|Ung|Glt1d1|Asl|Crcp|Serpine1|Cyp3a57|Cyp3a25|Aass|Lmod2|Atp6v0a4|Snx10|St3gal5|Podxl2|Nop2|Inhbb|Clec9a|Gprc5a|Pik3c2g|Amn1|Nlrp12|Hif3a|Saa1|Klf13|Slc45a3|Adm|Rassf10|Sult1a1|Tacc2|Dmbt1|Ifitm5|Lrrc8e|Irs2|Enpp6|Hp|Tat|Wwox|Map1lc3b|Cdh15|Mmp8|Ppan|Cnn1|Bmper|Adamts15|Ubash3b|Rgs16|Zbtb16|Zkscan7|Map3k5|Ctgf|Adamts14|Slc19a1|Slc35e3|Adcy1|Lgalsl|Rasd1|Apcs|Ccl6|Ypel2|Etv4|Grn|Gfap|Itgb3|H3f3b|Tha1|Lpin1|Prps1l1|Susd6|Ppp4r4|Serpina3i|Serpina3m|Serpina3f|Serpina3g|Slc25a47|Simc1|Klhl3|Ctsl|Rpp38|Hspa14|Itga2|Gnl3|Itih4|Fam35a|Apex1|Zc3h13|Farp1|Nrbp2|Eppk1|Maff|Pim3|Slc38a2|Dnaja3|Socs1|Cebpd|Obp2a|Nr1i2|Cldn14|Dopey2|Rpl10a|Fkbp5|Rsph1|Angptl4|Rpl12|Supt3|Cyp39a1|Fsd1|Mpnd|Dpp9|Myom1|Rbbp8|Nr6a1|Fbn2|Ablim3|2010003K11Rik|Ms4a8a|Sgms1|Acta2|Cyp26c1|Cyp2c70|Rrp12|Ablim1|Gfra1|6030498E09Rik|B3galt1|Il1r1|Mtch2|Thbs1|Tmem87b|Nop56|Thbd|Bcl2l1|Kcnb1|Cebpb|Arhgef26|Tiparp|Gpatch4|Adamtsl4|Trp53inp1|Slc25a51|Tomm5|Ambp|Orm1|Lurap1l|Cyp2j8|Prkag3|Cyp27a1|Mfsd2a|Gja4|Tmem51|Steap4|Abcb1a|Serpina7|Ugt2b34|Stbd1|Fam47e|Prss8|Smpd3|Gstt3|Gstt1|Fam110c|Chaf1b|Cyp2c54|Cyp2c37|Cyp2c50|Podn|Lgr5|Ang|Chac1|Ugt1a1|Tfcp2l1|Tomt|Cyp1a2|Gas1|Ppp1r42|Mup9|Inmt|Ces1d|Setd4|Susd4|Tifa|Fam222a|Cyp26b1|Prtn3|Rtn4rl1|Bhmt|Dntt|Fosl2|Prom1|Nfe2l3|Prc1|Ccnd1|Ces1c|Slc13a2|Ccl9|Top2a|Fam84a|Ahr|Klhl33|Slc25a37|Aldh1a7|Ermn|Elovl3|Timm8a1|Ckap2l|Slc23a2|Fam83d|Ect2|Hgfac|Lgals4|Syt3|Fut1|Rab30|Gas6|Rnf122|6430573F11Rik|Isyna1|Junb|Rfx4|Hsd17b6|Sun3|Per1|Socs3|Ltbp2|Cyp46a1|Nfil3|Lamb3|Itih3|Mat1a|Rnase4|Nuggc|Scara5|Sybu|Aifm3|Mas1|Cables1|Awat2|Ripply1|Slc43a1|Myh7b|D630003M21Rik|Pck1|Fabp5|Lingo4|Etnppl|Sytl1|Extl1|Ugt2b1|Ugt2b36|Ugt2a1|Prkg2|Hrk|Kntc1|Nup205|Jazf1|Reep1|Mob1a|Mcm2|Atg7|Mcm3|Nudt19|Acmsd|Mcm6|Mctp2|Fanci|Acsm2|Zbed6|Nrg1|Lpl|Calr3|Mcm5|Asf1b|Ces2c|Fanca|Lamc2|Aph1b|Bmp5|Ttk|Xirp1|Echdc1|Sgpl1|H2afy2|BC055324|Fignl1|Tgtp2|Sar1b|9530068E07Rik|Slfn9|Cdc6|Ern1|Foxa1|Klhl28|Ifi27l2b|Serpina1d|Serpina1e|Serpina1a|Idi1|Dtl|Nek2|Lect2|Depdc1b|Wdhd1|Socs4|Esco2|Abcc4|Tmtc4|Pdzd2|Arl5b|Ncald|Wisp1|Alg10b|Scn8a|Krt4|Krt79|Mcm4|Sidt1|Hspa13|Nrip1"),x = rownames(G172.umis), value = FALSE)
TCPO5 <- grep(c("Adamts1|Uhrf1|Ston1|Pcdhgc3|Prelid2|Clcf1|Fads1|Aldh1a1|Dock8|A1cf|Hells|Cyp2c65|Abcc2|Sorcs3|Pnliprp1|Sytl5|Pola1|Ercc6l|Cenpi|Rad51|Nusap1|Mybl1|Zfp931|Chrna4|Slc10a5|Zfp687|Chd1l|Prok1|Lactb2|Agl|Impad1|Ccne2|Alg2|Mup13|Mup2|Mup7|Mup14|Ttc39b|Adamtsl1|Fndc5|Irs1|Sowahb|Klhl8|Nudt7|Slc16a13|Rab11fip4|Sntg2|Acot6|Sgcg|C9|Fam171b|Gpat2|Srms|Itgb3bp|Ube2u|A630001G21Rik|Shisa3|Ugt2b37|Cxcl13|Gbp8|Slc15a4|Ybx3|Sult2a5|Sult2a2|Sult2a1|Sult2a4|Etnk2|Dpep1|9530077C05Rik|Uck2|Rims2|Cyp4f15|H2-Eb1|Notch4|H2-Q2|Dock11|Rnf24|Tgm2|Coq10b|Kcna2|Gbp2b|Igfbp2|Myom3|Col4a3"),x = rownames(G172.umis), value = FALSE)

TCPO6 <- grep(c("ncRNA_inter_chr16_13412|ncRNA_inter_chr2_1923|ncRNA_inter_chr5_4197|ncRNA_inter_chr9_8103|ncRNA_inter_chr10_9254|ncRNA_inter_chr18_14655|ncRNA_inter_chr6_5137|ncRNA_as_chr1_369|ncRNA_inter_chr5_4424|ncRNA_inter_chr5_4619|ncRNA_as_chr5_4731|ncRNA_as_chr5_4745|ncRNA_inter_chr5_4781|ncRNA_as_chr6_5038|ncRNA_inter_chr6_5610|ncRNA_inter_chr7_5913|ncRNA_inter_chr7_5914|ncRNA_as_chr7_6050|ncRNA_inter_chr7_6074|ncRNA_inter_chr7_6411|ncRNA_inter_chr1_552|ncRNA_inter_chr1_547|ncRNA_inter_chr8_6741|ncRNA_inter_chr8_6944|ncRNA_inter_chr8_6955|ncRNA_inter_chr9_7766|ncRNA_as_chr9_7843|ncRNA_as_chr9_7880|ncRNA_inter_chr9_7881|ncRNA_inter_chr9_7938|ncRNA_inter_chr9_7994|ncRNA_inter_chr9_7992|ncRNA_as_chr9_8419|ncRNA_inter_chr10_8796|ncRNA_inter_chr10_9195|ncRNA_as_chr10_9385|ncRNA_as_chr10_9411|ncRNA_inter_chr11_9543|ncRNA_inter_chr11_10206|ncRNA_as_chr12_10281|ncRNA_inter_chr12_10630|ncRNA_inter_chr12_10754|ncRNA_as_chr12_10762|ncRNA_inter_chr12_10814|ncRNA_inter_chr12_10916|ncRNA_inter_chr12_10938|ncRNA_inter_chr13_11125|ncRNA_inter_chr13_11153|ncRNA_inter_chr13_11203|ncRNA_inter_chr13_11204|ncRNA_inter_chr13_11201|ncRNA_inter_chr13_11412|ncRNA_inter_chr13_11602|ncRNA_as_chr14_11908|ncRNA_as_chr14_11919|ncRNA_as_chr14_11991|ncRNA_inter_chr14_12061|ncRNA_inter_chr15_12776|ncRNA_inter_chr15_12819|ncRNA_inter_chr16_13477|ncRNA_inter_chr17_14011|ncRNA_as_chr18_14302|ncRNA_as_chr18_14461|ncRNA_inter_chr18_14589|ncRNA_as_chr18_14689|ncRNA_as_chr19_14883|ncRNA_inter_chr19_14949|ncRNA_as_chr19_15129|ncRNA_inter_chr2_1477|ncRNA_as_chr2_1652|ncRNA_inter_chr2_1990|ncRNA_inter_chr3_2232|ncRNA_inter_chr3_2551|ncRNA_inter_chr3_2550|ncRNA_inter_chr3_2629|ncRNA_inter_chr1_66|ncRNA_inter_chr1_63|ncRNA_inter_chr1_62|ncRNA_as_chr3_2878|ncRNA_inter_chr3_2901|ncRNA_inter_chr3_3003|ncRNA_inter_chr4_3157|ncRNA_as_chr4_3800|ncRNA_inter_chr4_3862|ncRNA_inter_chr4_3863|ncRNA_inter_chr5_3988|ncRNA_inter_chr5_4315|ncRNA_inter_chr5_4322|ncRNA_inter_chr5_4335|ncRNA_as_chr1_400|ncRNA_intra_chr5_4728|ncRNA_inter_chr6_4886|ncRNA_as_chr6_5132|ncRNA_inter_chr6_5310|ncRNA_inter_chr6_5421|ncRNA_inter_chr6_5502|ncRNA_inter_chr6_5721"), x = rownames(G172.umis), value = FALSE)

TCPO6.2 <- grep(c("ncRNA_inter_chr6_5817|ncRNA_as_chr7_5955|ncRNA_inter_chr7_6085|ncRNA_as_chr7_6192|ncRNA_inter_chr7_6367|ncRNA_inter_chr7_6392|ncRNA_inter_chr7_6508|ncRNA_inter_chr7_6510|ncRNA_inter_chr1_570|ncRNA_inter_chr1_571|ncRNA_inter_chr1_566|ncRNA_inter_chr1_568|ncRNA_inter_chr1_567|ncRNA_inter_chr8_6775|ncRNA_inter_chr1_590|ncRNA_as_chr8_7071|ncRNA_inter_chr8_7141|ncRNA_inter_chr1_610|ncRNA_intra_chr1_611|ncRNA_inter_chr8_7169|ncRNA_intra_chr8_7312|ncRNA_intra_chr8_7310|ncRNA_inter_chr1_633|ncRNA_as_chr8_7521|ncRNA_as_chr8_7528|ncRNA_inter_chr8_7612|ncRNA_inter_chr8_7683|ncRNA_inter_chr9_8105|ncRNA_inter_chr9_8099|ncRNA_inter_chr9_8104|ncRNA_as_chr9_8172|ncRNA_inter_chr9_8279|ncRNA_as_chr9_8334|ncRNA_inter_chr9_8350|ncRNA_inter_chr10_8999|ncRNA_inter_chr10_9000|ncRNA_inter_chr10_9222|ncRNA_intra_chr11_9593|ncRNA_inter_chr11_9636|ncRNA_as_chr11_9787|ncRNA_as_chr11_9790|ncRNA_inter_chr11_9995|ncRNA_inter_chr12_10421|ncRNA_inter_chr12_10476|ncRNA_inter_chr12_10549|ncRNA_inter_chr12_10672|ncRNA_intra_chr12_10866|ncRNA_intra_chr12_10859|ncRNA_intra_chr12_10851|ncRNA_inter_chr12_10949|ncRNA_inter_chr13_11074|ncRNA_inter_chr13_11070|ncRNA_inter_chr13_11399|ncRNA_inter_chr13_11438|ncRNA_inter_chr13_11440|ncRNA_inter_chr14_11853|ncRNA_inter_chr14_11911|ncRNA_inter_chr14_11987|ncRNA_inter_chr14_12058|ncRNA_as_chr14_12074|ncRNA_inter_chr14_12191|ncRNA_inter_chr14_12193|ncRNA_inter_chr14_12205|ncRNA_inter_chr14_12291|ncRNA_inter_chr14_12297|ncRNA_inter_chr15_12606|ncRNA_as_chr15_12697|ncRNA_intra_chr15_12771|ncRNA_inter_chr15_12777|ncRNA_inter_chr16_12998|ncRNA_inter_chr16_13190|ncRNA_inter_chr16_13349|ncRNA_inter_chr17_13743|ncRNA_inter_chr17_13808|ncRNA_as_chr17_13828|ncRNA_inter_chr17_13841|ncRNA_inter_chr17_13842|ncRNA_inter_chr17_13876|ncRNA_inter_chr17_13919|ncRNA_inter_chr17_14015|ncRNA_inter_chr17_14085|ncRNA_inter_chr17_14129|ncRNA_inter_chr18_14339|ncRNA_inter_chr18_14656|ncRNA_inter_chr19_14717|ncRNA_inter_chr19_14856|ncRNA_inter_chr19_14854|ncRNA_inter_chr19_14953|ncRNA_inter_chr19_14952|ncRNA_inter_chr2_1436|ncRNA_inter_chr2_1492|ncRNA_inter_chr2_1577|ncRNA_inter_chr2_1821|ncRNA_inter_chr2_1826|ncRNA_inter_chr2_1827|ncRNA_inter_chr3_2764|ncRNA_as_chr3_2936|ncRNA_inter_chr3_2983|ncRNA_inter_chr4_3142|ncRNA_inter_chr4_3306|ncRNA_inter_chr4_3512|ncRNA_inter_chr4_3521|ncRNA_inter_chr4_3673|ncRNA_inter_chr4_3732|ncRNA_inter_chr4_3778|ncRNA_as_chr4_3843|ncRNA_as_chr10_9015|ncRNA_inter_chr19_14873|ncRNA_inter_chr3_2165|ncRNA_inter_chr10_8471|ncRNA_inter_chr19_15097|ncRNA_inter_chr11_10188|ncRNA_inter_chr1_420|ncRNA_inter_chr8_7363|ncRNA_inter_chr8_7423|ncRNA_inter_chr8_7430|ncRNA_inter_chr11_9864"),x = rownames(G172.umis), value = FALSE)


TCPO6.1 <-  grep(c("ncRNA_inter_chr11_9925|ncRNA_inter_chr11_9968|ncRNA_inter_chr1_914|ncRNA_inter_chr16_13176|ncRNA_as_chr18_14602|ncRNA_inter_chr3_2162|ncRNA_inter_chr3_2504|ncRNA_as_chr4_3300|ncRNA_inter_chr6_5793|ncRNA_inter_chr8_7610|ncRNA_as_chr9_7959|ncRNA_inter_chr15_12869|ncRNA_inter_chr18_14442|ncRNA_inter_chr19_15169|ncRNA_inter_chr19_15179|ncRNA_inter_chr19_15177|ncRNA_inter_chr19_15171|ncRNA_inter_chr2_1725|ncRNA_inter_chr3_2543|ncRNA_inter_chr4_3120|ncRNA_as_chr5_4655|ncRNA_inter_chr5_4654|ncRNA_as_chr6_5335|ncRNA_inter_chr7_5998|ncRNA_inter_chr9_8301|ncRNA_as_chr10_8460|ncRNA_as_chr11_9709|ncRNA_inter_chr1_931|ncRNA_inter_chr15_12834|ncRNA_inter_chr16_13509|ncRNA_inter_chr16_13510|ncRNA_as_chr16_13512|ncRNA_inter_chr18_14690|ncRNA_inter_chr19_15002|ncRNA_inter_chr19_15004|ncRNA_inter_chr16_13225|ncRNA_inter_chr2_2017|ncRNA_inter_chr6_5253|ncRNA_inter_chr9_7763|ncRNA_inter_chr5_4777|ncRNA_inter_chr6_5551|ncRNA_inter_chr14_11945|ncRNA_as_chr9_8142|ncRNA_inter_chr2_1471|ncRNA_inter_chr11_10185|ncRNA_inter_chr9_8000|ncRNA_inter_chr4_3282|ncRNA_inter_chr5_4499|ncRNA_inter_chr1_496|ncRNA_inter_chr8_6894|ncRNA_inter_chr8_6896|ncRNA_inter_chr15_12836|ncRNA_inter_chr19_14947|ncRNA_as_chr7_6065|ncRNA_inter_chr13_11031|ncRNA_inter_chr4_3156|ncRNA_inter_chr9_7809|ncRNA_inter_chr13_11385|ncRNA_inter_chr6_5248|ncRNA_inter_chr9_7875|ncRNA_inter_chr1_775|ncRNA_intra_chr11_9594|ncRNA_as_chr15_12959|ncRNA_inter_chr5_4316|ncRNA_inter_chr11_9635|ncRNA_inter_chr13_11100|ncRNA_inter_chr16_13428|ncRNA_inter_chr17_14130|ncRNA_inter_chr3_2721|ncRNA_inter_chr7_6113|ncRNA_as_chr5_4372|ncRNA_inter_chr5_4773|ncRNA_as_chr7_5921|ncRNA_inter_chr7_5935|ncRNA_as_chr7_6007|ncRNA_inter_chr7_6070"),x = rownames(G172.umis), value = FALSE)

TCPO7 <- grep(c("ncRNA_inter_chr9_7989|ncRNA_inter_chr9_8417|ncRNA_as_chr10_9016|ncRNA_inter_chr10_9418|ncRNA_inter_chr13_11163|ncRNA_inter_chr15_12319|ncRNA_inter_chr16_13050|ncRNA_inter_chr16_13211|ncRNA_inter_chr17_14188|ncRNA_inter_chr19_14987|ncRNA_as_chr2_1343|ncRNA_inter_chr2_1594|ncRNA_as_chr3_2968|ncRNA_as_chr8_7332|ncRNA_inter_chr11_9965|ncRNA_inter_chr13_11622|ncRNA_inter_chr2_1996|ncRNA_as_chr5_4325|ncRNA_inter_chr7_6087|ncRNA_inter_chr7_6094|ncRNA_inter_chr7_6692|ncRNA_inter_chr1_591|ncRNA_inter_chr9_7878|ncRNA_inter_chr9_8118|ncRNA_inter_chr10_9123|ncRNA_inter_chr10_9313|ncRNA_inter_chr12_10454|ncRNA_inter_chr13_11437|ncRNA_inter_chr17_13889|ncRNA_inter_chr2_2037|ncRNA_inter_chr3_2168|ncRNA_inter_chr4_3549|ncRNA_inter_chr9_8302|ncRNA_inter_chr10_9256|ncRNA_inter_chr11_9442|ncRNA_as_chr19_14777|ncRNA_as_chr19_14779|ncRNA_inter_chr19_14967|ncRNA_inter_chr2_1430|ncRNA_inter_chr4_3618|ncRNA_as_chr19_14976|ncRNA_inter_chr5_4338|ncRNA_as_chr7_5999|ncRNA_as_chr8_7359|ncRNA_inter_chr3_2988|ncRNA_inter_chr2_2016|ncRNA_inter_chr6_5316|ncRNA_inter_chr1_290|ncRNA_inter_chr5_4775|ncRNA_inter_chr7_6220|ncRNA_inter_chr7_6222|ncRNA_inter_chr9_8122|ncRNA_inter_chr11_9599|ncRNA_inter_chr12_10910|ncRNA_as_chr15_12920|ncRNA_inter_chr16_13170|ncRNA_inter_chr16_13177|ncRNA_inter_chr17_14151|ncRNA_inter_chr17_14162|ncRNA_inter_chr3_2166|ncRNA_inter_chr4_3651|ncRNA_inter_chr5_3974|ncRNA_inter_chr9_8067|ncRNA_inter_chr10_9181|ncRNA_inter_chr1_129|ncRNA_inter_chr15_12796|ncRNA_as_chr2_1207|ncRNA_inter_chr19_15175|ncRNA_inter_chr2_2012|ncRNA_inter_chr2_2011|ncRNA_inter_chr1_365|ncRNA_inter_chr5_4491|ncRNA_as_chr6_5635|ncRNA_inter_chr6_5710|ncRNA_inter_chr7_6110|ncRNA_inter_chr7_6489|ncRNA_inter_chr7_6587|ncRNA_inter_chr7_6639|ncRNA_inter_chr8_6738|ncRNA_inter_chr8_6759|ncRNA_inter_chr8_6766|ncRNA_inter_chr8_6942|ncRNA_inter_chr8_6946|ncRNA_inter_chr8_7074|ncRNA_inter_chr8_7180|ncRNA_inter_chr8_7432|ncRNA_inter_chr9_7691|ncRNA_inter_chr9_7819|ncRNA_inter_chr9_7813|ncRNA_inter_chr1_113|ncRNA_inter_chr9_7885|ncRNA_as_chr9_8043|ncRNA_inter_chr9_8049|ncRNA_as_chr9_8317|ncRNA_inter_chr10_8697"),x = rownames(G172.umis), value = FALSE)

TCPO7.1  <- grep(c("ncRNA_inter_chr10_8776|ncRNA_inter_chr10_9125|ncRNA_inter_chr10_9366|ncRNA_inter_chr10_9363|ncRNA_intra_chr11_9623|ncRNA_as_chr1_814|ncRNA_inter_chr11_9743|ncRNA_inter_chr1_913|ncRNA_inter_chr12_10509|ncRNA_inter_chr12_10539|ncRNA_inter_chr12_10713|ncRNA_inter_chr12_10943|ncRNA_as_chr13_11127|ncRNA_as_chr1_978|ncRNA_inter_chr13_11222|ncRNA_inter_chr13_11361|ncRNA_inter_chr13_11572|ncRNA_as_chr13_11787|ncRNA_inter_chr2_1050|ncRNA_inter_chr14_11938|ncRNA_as_chr14_11957|ncRNA_inter_chr14_12034|ncRNA_inter_chr14_12032|ncRNA_inter_chr14_12031|ncRNA_inter_chr15_12446|ncRNA_as_chr15_12818|ncRNA_inter_chr15_12890|ncRNA_as_chr15_12942|ncRNA_inter_chr16_13173|ncRNA_as_chr1_153|ncRNA_inter_chr17_13857|ncRNA_inter_chr17_13924|ncRNA_inter_chr17_13983|ncRNA_inter_chr17_14026|ncRNA_as_chr17_14041|ncRNA_inter_chr17_14102|ncRNA_as_chr18_14369|ncRNA_as_chr19_14782|ncRNA_inter_chr2_1310|ncRNA_inter_chr19_14874|ncRNA_inter_chr19_14880|ncRNA_inter_chrX_15449|ncRNA_inter_chr2_1423|ncRNA_inter_chr2_1497|ncRNA_inter_chr2_1501|ncRNA_inter_chr1_31|ncRNA_inter_chr2_1829|ncRNA_as_chr2_1870|ncRNA_inter_chr2_1958|ncRNA_inter_chr2_2124|ncRNA_inter_chr3_2140|ncRNA_inter_chr3_2311|ncRNA_inter_chr3_2505|ncRNA_inter_chr3_2743|ncRNA_inter_chr3_2940|ncRNA_inter_chr4_3009|ncRNA_inter_chr4_3010|ncRNA_inter_chr4_3163|ncRNA_as_chr4_3654|ncRNA_inter_chr4_3689|ncRNA_inter_chr4_3867|ncRNA_inter_chr5_4010|ncRNA_inter_chr5_4123|ncRNA_inter_chr5_4138|ncRNA_inter_chr5_4336|ncRNA_inter_chr5_4337|ncRNA_inter_chr5_4656|ncRNA_inter_chr5_4685|ncRNA_as_chr5_4687|ncRNA_inter_chr6_4828|ncRNA_inter_chr6_5154|ncRNA_inter_chr6_5675|ncRNA_inter_chr6_5684|ncRNA_inter_chr6_5723|ncRNA_as_chr6_5861|ncRNA_inter_chr7_6097|ncRNA_as_chr7_6384|ncRNA_inter_chr7_6559|ncRNA_inter_chr7_6709|ncRNA_inter_chr8_6887|ncRNA_inter_chr8_7072|ncRNA_inter_chr8_7105"),x = rownames(G172.umis), value = FALSE)
TCPO7.2 <-  grep(c("ncRNA_as_chr8_7190|ncRNA_intra_chr8_7324|ncRNA_inter_chr1_635|ncRNA_inter_chr1_630|ncRNA_inter_chr8_7512|ncRNA_inter_chr1_648|ncRNA_inter_chr1_678|ncRNA_inter_chr9_7996|ncRNA_inter_chr9_7995|ncRNA_inter_chr9_8022|ncRNA_inter_chr9_8100|ncRNA_intra_chr9_8171|ncRNA_inter_chr10_9128|ncRNA_inter_chr10_9199|ncRNA_intra_chr11_9595|ncRNA_inter_chr11_9600|ncRNA_inter_chr11_9651|ncRNA_as_chr11_9663|ncRNA_as_chr11_9684|ncRNA_inter_chr11_9780|ncRNA_inter_chr11_9784|ncRNA_inter_chr11_9991|ncRNA_inter_chr11_10051|ncRNA_inter_chr12_10415|ncRNA_intra_chr12_10508|ncRNA_inter_chr12_10543|ncRNA_inter_chr12_10628|ncRNA_intra_chr12_10870|ncRNA_intra_chr12_10872|ncRNA_intra_chr12_10863|ncRNA_intra_chr12_10854|ncRNA_intra_chr12_10856|ncRNA_intra_chr12_10857|ncRNA_inter_chr12_10895|ncRNA_inter_chr12_10978|ncRNA_inter_chr13_11199|ncRNA_inter_chr14_11851|ncRNA_inter_chr14_12016|ncRNA_inter_chr14_12145|ncRNA_inter_chr14_12154|ncRNA_inter_chr14_12239|ncRNA_inter_chr14_12290|ncRNA_as_chr14_12314|ncRNA_inter_chr15_12368|ncRNA_inter_chr2_1097|ncRNA_as_chr2_1101|ncRNA_as_chr15_12682|ncRNA_as_chr16_13145|ncRNA_inter_chr16_13270|ncRNA_inter_chr16_13316|ncRNA_inter_chr16_13317|ncRNA_inter_chr17_13539|ncRNA_as_chr17_13731|ncRNA_inter_chr17_14132|ncRNA_inter_chr18_14338|ncRNA_inter_chr18_14336|ncRNA_inter_chr18_14337|ncRNA_inter_chr18_14674|ncRNA_inter_chr19_14790|ncRNA_inter_chr19_14892|ncRNA_inter_chr19_14999|ncRNA_inter_chr19_15136|ncRNA_inter_chrX_15248|ncRNA_inter_chrX_15240|ncRNA_inter_chrX_15389|ncRNA_inter_chr2_1686|ncRNA_inter_chr2_1736|ncRNA_intra_chr2_1796|ncRNA_inter_chr2_2112|ncRNA_inter_chr3_2574|ncRNA_inter_chr4_3161|ncRNA_inter_chr4_3184|ncRNA_as_chr4_3206|ncRNA_as_chr4_3310|ncRNA_inter_chr4_3424|ncRNA_inter_chr4_3422|ncRNA_inter_chr4_3723|ncRNA_as_chr8_7333|ncRNA_as_chr5_4744|ncRNA_inter_chr5_4746|ncRNA_inter_chr6_5638|ncRNA_inter_chr7_6108|ncRNA_inter_chr7_6109|ncRNA_as_chr13_11025|ncRNA_inter_chr14_12201|ncRNA_inter_chr15_12439|ncRNA_inter_chr15_12815|ncRNA_inter_chr18_14223|ncRNA_as_chr1_222|ncRNA_inter_chr8_6744|ncRNA_inter_chr8_7334|ncRNA_inter_chr2_1425|ncRNA_inter_chr2_1426|ncRNA_inter_chr3_2413|ncRNA_inter_chr3_2410|ncRNA_inter_chr3_2411|ncRNA_inter_chr5_4407|ncRNA_inter_chr6_5249|ncRNA_inter_chr8_7511|ncRNA_inter_chr9_7874|ncRNA_inter_chr9_8056|ncRNA_as_chr9_8393|ncRNA_inter_chr13_11254|ncRNA_intra_chr2_1057|ncRNA_as_chr14_12315|ncRNA_inter_chr15_12684|ncRNA_inter_chr15_12937|ncRNA_inter_chr17_13692|ncRNA_inter_chr17_14175|ncRNA_inter_chr18_14688|ncRNA_intra_chr19_14773|ncRNA_inter_chr3_2169|ncRNA_as_chr4_3537"),x = rownames(G172.umis), value = FALSE)

TCPO8 <- grep(c("ncRNA_inter_chr5_4750|ncRNA_inter_chr7_6390|ncRNA_inter_chr9_8300|ncRNA_inter_chr13_11116|ncRNA_inter_chr15_12835|ncRNA_inter_chr6_5318|ncRNA_inter_chr11_9914|ncRNA_as_chr12_10974|ncRNA_inter_chr14_12199|ncRNA_inter_chr15_12824|ncRNA_intra_chr19_15008|ncRNA_as_chr17_13834|ncRNA_inter_chr19_14946|ncRNA_inter_chr5_4150|ncRNA_inter_chr5_4149|ncRNA_as_chr5_4157|ncRNA_as_chr5_4156|ncRNA_inter_chr5_4180|ncRNA_inter_chr5_4242|ncRNA_inter_chr1_366|ncRNA_inter_chr5_4273|ncRNA_as_chr5_4276|ncRNA_as_chr5_4370|ncRNA_intra_chr5_4388|ncRNA_inter_chr5_4402|ncRNA_inter_chr5_4507|ncRNA_inter_chr5_4532|ncRNA_inter_chr5_4564|ncRNA_inter_chr5_4560|ncRNA_as_chr1_404|ncRNA_as_chr5_4712|ncRNA_inter_chr5_4774|ncRNA_inter_chr5_4776|ncRNA_inter_chr5_4785|ncRNA_inter_chr5_4784|ncRNA_inter_chr5_4783|ncRNA_as_chr6_5114|ncRNA_inter_chr6_5117|ncRNA_inter_chr6_5125|ncRNA_inter_chr6_5127|ncRNA_inter_chr6_5159|ncRNA_inter_chr1_444|ncRNA_as_chr6_5250|ncRNA_inter_chr1_463|ncRNA_inter_chr6_5304|ncRNA_inter_chr6_5313|ncRNA_inter_chr6_5323|ncRNA_inter_chr6_5370|ncRNA_inter_chr6_5429|ncRNA_inter_chr6_5552|ncRNA_inter_chr6_5557|ncRNA_inter_chr6_5563|ncRNA_inter_chr6_5582|ncRNA_as_chr6_5587|ncRNA_inter_chr6_5630|ncRNA_inter_chr6_5673|ncRNA_as_chr7_5939|ncRNA_inter_chr7_5990|ncRNA_as_chr7_6012|ncRNA_inter_chr7_6022|ncRNA_inter_chr7_6101|ncRNA_inter_chr7_6118|ncRNA_inter_chr7_6115|ncRNA_inter_chr7_6119|ncRNA_inter_chr7_6121|ncRNA_inter_chr7_6114|ncRNA_inter_chr7_6223|ncRNA_inter_chr7_6221|ncRNA_as_chr7_6288|ncRNA_inter_chr7_6338|ncRNA_inter_chr8_6861|ncRNA_as_chr8_6963|ncRNA_inter_chr8_7012|ncRNA_inter_chr8_7256|ncRNA_inter_chr8_7280|ncRNA_as_chr8_7331|ncRNA_inter_chr8_7330|ncRNA_inter_chr8_7362|ncRNA_as_chr8_7369|ncRNA_inter_chr8_7394|ncRNA_inter_chr8_7424|ncRNA_inter_chr8_7603|ncRNA_inter_chr8_7649|ncRNA_inter_chr8_7650|ncRNA_inter_chr9_7754|ncRNA_inter_chr9_7873|ncRNA_as_chr9_7931|ncRNA_as_chr9_7947|ncRNA_as_chr9_7949|ncRNA_inter_chr9_8016|ncRNA_inter_chr9_8015|ncRNA_inter_chr9_8117|ncRNA_inter_chr9_8123|ncRNA_inter_chr9_8121|ncRNA_inter_chr9_8132|ncRNA_as_chr9_8271|ncRNA_inter_chr9_8396|ncRNA_inter_chr1_115|ncRNA_inter_chr1_14|ncRNA_inter_chr10_8829|ncRNA_inter_chr10_8950|ncRNA_inter_chr10_8951|ncRNA_inter_chr10_9011|ncRNA_as_chr10_9007|ncRNA_as_chr10_9023|ncRNA_inter_chr10_9138|ncRNA_as_chr10_9240|ncRNA_inter_chr10_9245|ncRNA_as_chr10_9389|ncRNA_inter_chr10_9409|ncRNA_inter_chr11_9670|ncRNA_inter_chr11_9674|ncRNA_as_chr11_9688|ncRNA_as_chr11_9692|ncRNA_inter_chr11_9833|ncRNA_inter_chr11_9862|ncRNA_inter_chr11_9873|ncRNA_inter_chr11_9926|ncRNA_as_chr11_9928|ncRNA_inter_chr11_9938|ncRNA_as_chr11_9940|ncRNA_inter_chr11_9939|ncRNA_inter_chr1_889"),x = rownames(G172.umis), value = FALSE)

TCPO8.1 <- grep(c("ncRNA_inter_chr11_10087|ncRNA_as_chr11_10132|ncRNA_inter_chr11_10148|ncRNA_as_chr11_10149|ncRNA_inter_chr11_10202|ncRNA_inter_chr11_10201|ncRNA_inter_chr11_10211|ncRNA_inter_chr1_915|ncRNA_inter_chr12_10412|ncRNA_inter_chr12_10411|ncRNA_inter_chr1_928|ncRNA_inter_chr12_10573|ncRNA_inter_chr12_10629|ncRNA_as_chr12_10655|ncRNA_inter_chr12_10657|ncRNA_inter_chr12_10665|ncRNA_inter_chr12_10681|ncRNA_as_chr12_10764|ncRNA_inter_chr1_948|ncRNA_inter_chr1_949|ncRNA_inter_chr12_10819|ncRNA_inter_chr12_10881|ncRNA_inter_chr12_10893|ncRNA_as_chr13_11068|ncRNA_inter_chr13_11069|ncRNA_inter_chr13_11237|ncRNA_inter_chr13_11236|ncRNA_inter_chr13_11216|ncRNA_inter_chr13_11242|ncRNA_inter_chr13_11255|ncRNA_inter_chr1_1000|ncRNA_inter_chr13_11439|ncRNA_as_chr13_11486|ncRNA_inter_chr13_11651|ncRNA_inter_chr13_11752|ncRNA_inter_chr13_11771|ncRNA_inter_chr2_1049|ncRNA_inter_chr14_11950|ncRNA_inter_chr14_11944|ncRNA_inter_chr14_11946|ncRNA_inter_chr14_11949|ncRNA_inter_chr14_11989|ncRNA_inter_chr14_11998|ncRNA_inter_chr14_11999|ncRNA_as_chr14_12111|ncRNA_as_chr14_12166|ncRNA_inter_chr14_12198|ncRNA_inter_chr14_12200|ncRNA_inter_chr2_1077|ncRNA_inter_chr2_1078|ncRNA_inter_chr15_12338|ncRNA_inter_chr15_12489|ncRNA_inter_chr15_12514|ncRNA_as_chr15_12542|ncRNA_inter_chr15_12633|ncRNA_as_chr15_12677|ncRNA_inter_chr15_12685|ncRNA_as_chr15_12706|ncRNA_intra_chr15_12715|ncRNA_inter_chr15_12719|ncRNA_inter_chr15_12718"),x = rownames(G172.umis), value = FALSE)

TCPO8.2 <- grep(c("ncRNA_inter_chr15_12720|ncRNA_as_chr15_12721|ncRNA_inter_chr15_12745|ncRNA_inter_chr15_12823|ncRNA_inter_chr15_12875|ncRNA_inter_chr15_12916|ncRNA_inter_chr15_12936|ncRNA_inter_chr15_12953|ncRNA_inter_chr15_12954|ncRNA_inter_chr16_13049|ncRNA_inter_chr16_13171|ncRNA_as_chr16_13227|ncRNA_inter_chr16_13340|ncRNA_inter_chr16_13434|ncRNA_inter_chr16_13470|ncRNA_inter_chr17_13681|ncRNA_intra_chr17_13894|ncRNA_as_chr17_13905|ncRNA_inter_chr17_13939|ncRNA_inter_chr17_13938|ncRNA_inter_chr17_14163|ncRNA_as_chr17_14173|ncRNA_inter_chr17_14176|ncRNA_inter_chr18_14317|ncRNA_inter_chr18_14321|ncRNA_inter_chr18_14323|ncRNA_inter_chr18_14331|ncRNA_inter_chr18_14330|ncRNA_inter_chr18_14328|ncRNA_inter_chr18_14324|ncRNA_as_chr18_14408|ncRNA_inter_chr18_14534|ncRNA_inter_chr18_14590|ncRNA_inter_chr18_14650|ncRNA_inter_chr2_1303|ncRNA_as_chr2_1305|ncRNA_inter_chr2_1302|ncRNA_inter_chr19_14802|ncRNA_as_chr19_14823|ncRNA_inter_chr19_14822|ncRNA_inter_chr19_14991|ncRNA_inter_chr19_14990|ncRNA_intra_chr19_15006|ncRNA_as_chr19_15010|ncRNA_as_chr19_15007|ncRNA_as_chrX_15387|ncRNA_inter_chrX_15422|ncRNA_inter_chrX_15550|ncRNA_inter_chr2_1481|ncRNA_inter_chr2_1502|ncRNA_inter_chr2_1528|ncRNA_inter_chr2_1600|ncRNA_inter_chr2_1782|ncRNA_inter_chr2_1792|ncRNA_inter_chr2_1790|ncRNA_inter_chr2_1793|ncRNA_inter_chr1_196|ncRNA_inter_chr1_195|ncRNA_inter_chr2_1989|ncRNA_inter_chr2_2038|ncRNA_inter_chr3_2167|ncRNA_inter_chr1_44|ncRNA_inter_chr3_2407|ncRNA_inter_chr3_2437|ncRNA_as_chr1_221|ncRNA_inter_chr3_2507|ncRNA_inter_chr3_2546|ncRNA_as_chr3_2544|ncRNA_inter_chr1_238|ncRNA_as_chr3_2603|ncRNA_inter_chr3_2636|ncRNA_inter_chr3_2768|ncRNA_as_chr3_2781|ncRNA_as_chr3_2778|ncRNA_inter_chr3_2790|ncRNA_inter_chr3_2791|ncRNA_inter_chr3_2798|ncRNA_inter_chr3_2889|ncRNA_inter_chr3_2896|ncRNA_as_chr3_2894|ncRNA_inter_chr3_2904|ncRNA_inter_chr4_3027|ncRNA_as_chr4_3224|ncRNA_inter_chr4_3308|ncRNA_inter_chr4_3350|ncRNA_inter_chr4_3381|ncRNA_inter_chr4_3380|ncRNA_inter_chr4_3389|ncRNA_inter_chr4_3391|ncRNA_inter_chr4_3390|ncRNA_as_chr1_86|ncRNA_inter_chr4_3575|ncRNA_inter_chr4_3652|ncRNA_inter_chr4_3690|ncRNA_inter_chr4_3764|ncRNA_inter_chr4_3775|ncRNA_inter_chr4_3905|ncRNA_as_chr4_3910|ncRNA_inter_chr4_3923|ncRNA_inter_chr5_4066|ncRNA_inter_chr5_4067|ncRNA_inter_chr5_4065|ncRNA_inter_chr5_4116|ncRNA_inter_chr5_4472|ncRNA_inter_chr5_4500|ncRNA_as_chr5_4627|ncRNA_inter_chr5_4749|ncRNA_inter_chr6_4993|ncRNA_inter_chr6_5281|ncRNA_inter_chr6_5282|ncRNA_as_chr6_5722|ncRNA_inter_chr1_494|ncRNA_inter_chr1_495|ncRNA_as_chr7_5880|ncRNA_inter_chr7_6091|ncRNA_inter_chr7_6090|ncRNA_inter_chr7_6080|ncRNA_as_chr7_6174|ncRNA_inter_chr1_520|ncRNA_as_chr7_6377|ncRNA_as_chr7_6389|ncRNA_inter_chr7_6415|ncRNA_inter_chr7_6433|ncRNA_as_chr7_6467"),x = rownames(G172.umis), value = FALSE)

TCPO9 <- grep(c("ncRNA_as_chr7_6656|ncRNA_inter_chr8_6742|ncRNA_as_chr8_6747|ncRNA_inter_chr8_6740|ncRNA_inter_chr8_6743|ncRNA_inter_chr8_6895|ncRNA_inter_chr8_6953|ncRNA_inter_chr8_6989|ncRNA_inter_chr8_6990|ncRNA_inter_chr8_7336|ncRNA_inter_chr8_7399|ncRNA_inter_chr8_7398|ncRNA_as_chr8_7632|ncRNA_as_chr9_7778|ncRNA_inter_chr9_7810|ncRNA_inter_chr9_7923|ncRNA_as_chr9_7924|ncRNA_inter_chr9_8169|ncRNA_inter_chr9_8299|ncRNA_inter_chr9_8297|ncRNA_inter_chr9_8387|ncRNA_inter_chr10_8469|ncRNA_inter_chr10_8777|ncRNA_as_chr10_9182|ncRNA_inter_chr10_9193|ncRNA_inter_chr10_9209|ncRNA_inter_chr10_9210|ncRNA_inter_chr1_792|ncRNA_inter_chr10_9351|ncRNA_as_chr11_9447|ncRNA_as_chr11_9449|ncRNA_inter_chr1_125|ncRNA_inter_chr1_126|ncRNA_inter_chr11_9693|ncRNA_inter_chr1_842|ncRNA_inter_chr11_9894|ncRNA_inter_chr11_9960|ncRNA_inter_chr11_9967|ncRNA_inter_chr11_9966|ncRNA_inter_chr11_10019|ncRNA_inter_chr11_10091|ncRNA_as_chr11_10193|ncRNA_inter_chr12_10324|ncRNA_inter_chr12_10322|ncRNA_inter_chr12_10477|ncRNA_inter_chr12_10562|ncRNA_as_chr12_10847|ncRNA_as_chr12_10889|ncRNA_as_chr12_10887|ncRNA_as_chr12_10885|ncRNA_intra_chr12_10888|ncRNA_as_chr12_10852|ncRNA_intra_chr12_10886|ncRNA_inter_chr12_10988|ncRNA_inter_chr13_11071|ncRNA_inter_chr13_11128|ncRNA_inter_chr13_11381|ncRNA_inter_chr13_11386|ncRNA_inter_chr13_11425|ncRNA_as_chr13_11491|ncRNA_inter_chr13_11490|ncRNA_inter_chr13_11603|ncRNA_as_chr13_11785|ncRNA_as_chr13_11784|ncRNA_inter_chr14_12018|ncRNA_inter_chr2_1072|ncRNA_as_chr15_12330|ncRNA_inter_chr15_12644|ncRNA_inter_chr15_12695|ncRNA_inter_chr15_12830|ncRNA_as_chr15_12837|ncRNA_inter_chr15_12832|ncRNA_inter_chr15_12831|ncRNA_inter_chr15_12833|ncRNA_inter_chr15_12849|ncRNA_as_chr16_13514|ncRNA_inter_chr17_13787|ncRNA_inter_chr17_13875|ncRNA_inter_chr17_14006|ncRNA_inter_chr17_14023|ncRNA_inter_chr17_14125|ncRNA_as_chr19_14728|ncRNA_intra_chr19_15028|ncRNA_as_chr19_15027|ncRNA_inter_chr19_15058|ncRNA_inter_chr19_15187|ncRNA_as_chr19_15188|ncRNA_as_chrX_15318|ncRNA_inter_chr2_1427|ncRNA_inter_chr2_1419|ncRNA_inter_chr2_1421|ncRNA_inter_chr2_1420|ncRNA_inter_chr2_1424|ncRNA_inter_chr2_1422|ncRNA_inter_chr2_1432|ncRNA_inter_chr2_1431|ncRNA_as_chr1_173|ncRNA_inter_chr1_171|ncRNA_inter_chr1_169|ncRNA_inter_chr2_1851|ncRNA_intra_chr2_1852|ncRNA_inter_chr2_1877|ncRNA_inter_chr2_1883|ncRNA_inter_chr2_1887|ncRNA_inter_chr2_1888|ncRNA_inter_chr2_1926|ncRNA_inter_chr2_2009|ncRNA_inter_chr2_2078|ncRNA_inter_chr2_2079|ncRNA_inter_chr2_2084|ncRNA_inter_chr2_2085|ncRNA_inter_chr3_2207|ncRNA_as_chr3_2409|ncRNA_as_chr3_2418|ncRNA_inter_chr3_2481|ncRNA_inter_chr3_2549|ncRNA_inter_chr3_2579|ncRNA_inter_chr1_256|ncRNA_inter_chr1_257|ncRNA_inter_chr3_2705|ncRNA_inter_chr3_2887|ncRNA_inter_chr4_3056|ncRNA_inter_chr4_3079|ncRNA_as_chr4_3210|ncRNA_as_chr4_3275|ncRNA_as_chr4_3313"),x = rownames(G172.umis), value = FALSE)

TCPO10 <-  grep(c("ncRNA_as_chr1_312|ncRNA_inter_chr4_3534|ncRNA_inter_chr4_3584|ncRNA_inter_chr4_3685|ncRNA_inter_chr4_3884|ncRNA_as_chr7_6302|ncRNA_intra_chr8_7322|ncRNA_intra_chr8_7321|ncRNA_intra_chr8_7319|ncRNA_intra_chr8_7317|ncRNA_intra_chr8_7314|ncRNA_intra_chr8_7313|ncRNA_intra_chr8_7323|ncRNA_inter_chr10_8461|ncRNA_as_chr1_782|ncRNA_inter_chr12_10621|ncRNA_inter_chr12_10715|ncRNA_as_chr16_13511|ncRNA_inter_chr18_14691|ncRNA_inter_chr19_15003|ncRNA_intra_chr19_15014|ncRNA_intra_chr19_15011|ncRNA_as_chr9_7767|ncRNA_as_chr19_14977|ncRNA_as_chr2_1965|ncRNA_as_chr1_483|ncRNA_inter_chr8_6961|ncRNA_inter_chr13_11436|ncRNA_inter_chr2_1098|ncRNA_inter_chr3_2269|ncRNA_inter_chr4_3183|ncRNA_as_chr5_4694|ncRNA_as_chr15_12817|ncRNA_as_chr19_15130|ncRNA_inter_chr2_1963|ncRNA_inter_chr4_3468|ncRNA_intra_chr7_5920|ncRNA_inter_chr3_2137|ncRNA_inter_chr14_12024|ncRNA_inter_chr1_401|ncRNA_inter_chr6_5138|ncRNA_inter_chr6_5824|ncRNA_inter_chr7_6511|ncRNA_inter_chr9_8249|ncRNA_inter_chr10_8767|ncRNA_as_chr10_8927|ncRNA_inter_chr19_14853|ncRNA_inter_chr4_3052|ncRNA_inter_chr5_4577|ncRNA_inter_chr5_4578|ncRNA_inter_chr8_6816|ncRNA_inter_chr9_7993|ncRNA_as_chr9_8050|ncRNA_inter_chr9_8147|ncRNA_as_chr11_10151|ncRNA_inter_chr11_10195|ncRNA_inter_chr12_10942|ncRNA_inter_chr1_987|ncRNA_inter_chr14_12026|ncRNA_inter_chr18_14293|ncRNA_inter_chr18_14604|ncRNA_inter_chr19_14979|ncRNA_inter_chrX_15549|ncRNA_inter_chr1_73|ncRNA_inter_chr1_51|ncRNA_inter_chr1_53|ncRNA_inter_chr1_47|ncRNA_inter_chr1_56|ncRNA_as_chr4_3235|ncRNA_inter_chr11_9996|ncRNA_inter_chr2_1498|ncRNA_inter_chr2_1491|ncRNA_inter_chr4_3178|ncRNA_inter_chr5_4137|ncRNA_inter_chr5_4192|ncRNA_inter_chr5_4330|ncRNA_inter_chr5_4329|ncRNA_as_chr5_4662|ncRNA_inter_chr5_4674|ncRNA_inter_chr6_5118|ncRNA_inter_chr6_5131|ncRNA_as_chr6_5336|ncRNA_inter_chr6_5595|ncRNA_inter_chr6_5822|ncRNA_inter_chr7_6343|ncRNA_inter_chr7_6391|ncRNA_inter_chr7_6509|ncRNA_inter_chr7_6522|ncRNA_inter_chr7_6523|ncRNA_inter_chr7_6524|ncRNA_inter_chr7_6534|ncRNA_inter_chr8_6757|ncRNA_intra_chr1_604|ncRNA_intra_chr8_7327|ncRNA_inter_chr1_631|ncRNA_inter_chr8_7605|ncRNA_inter_chr8_7606|ncRNA_as_chr9_8401|ncRNA_inter_chr11_9433|ncRNA_inter_chr11_9655|ncRNA_inter_chr12_10459|ncRNA_as_chr12_10896|ncRNA_inter_chr12_10979|ncRNA_inter_chr13_11120|ncRNA_inter_chr13_11294|ncRNA_as_chr13_11596|ncRNA_inter_chr14_12214|ncRNA_inter_chr14_12250|ncRNA_inter_chr2_1099|ncRNA_inter_chr15_12609|ncRNA_inter_chr15_12752|ncRNA_inter_chr15_12774|ncRNA_inter_chr15_12793|ncRNA_inter_chr15_12792|ncRNA_inter_chr16_13508|ncRNA_inter_chr19_14851|ncRNA_inter_chr19_14855|ncRNA_inter_chr19_15030|ncRNA_as_chr19_15054|ncRNA_inter_chr19_15190|ncRNA_as_chr2_1457|ncRNA_inter_chr3_2303|ncRNA_as_chr3_2567|ncRNA_inter_chr3_2722|ncRNA_inter_chr3_2937|ncRNA_inter_chr4_3307|ncRNA_inter_chr4_3425|ncRNA_inter_chr4_3779|ncRNA_inter_chr11_9631|ncRNA_inter_chr13_11122|ncRNA_inter_chr13_11383|ncRNA_inter_chr15_12855|ncRNA_as_chr18_14310|ncRNA_inter_chr3_2488|ncRNA_inter_chr3_2744|ncRNA_inter_chr4_3859|ncRNA_inter_chr5_4115|ncRNA_as_chr6_5601|ncRNA_inter_chr1_670|ncRNA_inter_chr10_9183|ncRNA_inter_chr11_10217|ncRNA_as_chr12_10618|ncRNA_inter_chr16_13137|ncRNA_inter_chr4_3285|ncRNA_inter_chr4_3286"),x = rownames(G172.umis), value = FALSE)


```


G172M1M3 is the correct batch DE geens

```{r}
DE1.M1M3.0.1 <- grep(c("ncRNA_as_chr7_5999|ncRNA_as_chr15_12340|ncRNA_inter_chrX_15446|ncRNA_inter_chr2_1419|ncRNA_inter_chr19_14746|ncRNA_inter_chr17_13909|ncRNA_as_chr8_7528|ncRNA_inter_chr9_7809|ncRNA_as_chr4_3532|ncRNA_as_chr19_14823|ncRNA_as_chr19_14883|ncRNA_as_chr8_7521|ncRNA_inter_chr10_8697|ncRNA_intra_chr6_5593|ncRNA_as_chr6_5589|ncRNA_as_chr8_6954|ncRNA_inter_chr5_3974|ncRNA_as_chr5_4325|ncRNA_inter_chr8_6743|ncRNA_as_chr12_10281|ncRNA_as_chr7_6166|ncRNA_as_chr12_10697|ncRNA_inter_chr11_9512|ncRNA_as_chr14_12038|ncRNA_inter_chr5_4221|ncRNA_as_chr5_4744|ncRNA_inter_chr3_2988|ncRNA_inter_chr17_13928|ncRNA_inter_chr13_11227|ncRNA_inter_chr15_12608|ncRNA_inter_chr12_10270|ncRNA_inter_chr4_3436|ncRNA_inter_chr2_1302|ncRNA_inter_chr2_1077|ncRNA_inter_chr1_292|ncRNA_as_chr1_373|ncRNA_as_chr19_14977|ncRNA_as_chr1_625|ncRNA_intra_chr14_11961|ncRNA_as_chr5_4334|ncRNA_as_chr10_8962|ncRNA_inter_chr2_1491|ncRNA_inter_chr13_11070|ncRNA_as_chr6_5634|ncRNA_as_chrX_15320|ncRNA_inter_chr10_9000|ncRNA_inter_chr5_4220|ncRNA_inter_chr11_9434|ncRNA_inter_chr10_9264|ncRNA_as_chr15_12817|ncRNA_as_chr9_7702|ncRNA_as_chr11_10075|ncRNA_as_chr19_14882|ncRNA_inter_chr3_2156|ncRNA_as_chr7_6192|ncRNA_inter_chr8_7363|ncRNA_inter_chr10_9193|ncRNA_inter_chr13_11752|ncRNA_inter_chr5_4395|ncRNA_as_chr17_13708|ncRNA_as_chr3_2894|ncRNA_inter_chr15_12365|ncRNA_inter_chr14_11945|ncRNA_inter_chrX_15437|ncRNA_inter_chr10_9086|ncRNA_as_chr18_14602|ncRNA_as_chr1_252|ncRNA_inter_chr8_6726|ncRNA_inter_chr13_11602|ncRNA_as_chr4_3083|ncRNA_inter_chr5_4593|ncRNA_as_chr17_14018|ncRNA_inter_chr12_10880|ncRNA_inter_chr3_2768|ncRNA_as_chr12_10521|ncRNA_inter_chr4_3294|ncRNA_as_chr8_7632|ncRNA_as_chr2_1440|ncRNA_inter_chr8_6759|ncRNA_as_chr2_1543|ncRNA_inter_chr11_9922|ncRNA_as_chr2_1884|ncRNA_as_chr17_13683|ncRNA_as_chr1_978|ncRNA_as_chr7_6467|ncRNA_inter_chr17_14026|ncRNA_inter_chr17_13801|ncRNA_inter_chr12_10476|ncRNA_as_chr4_3235|ncRNA_as_chr7_5921|ncRNA_inter_chr1_291|ncRNA_inter_chr1_161|ncRNA_inter_chr5_4052|ncRNA_inter_chr11_9911|ncRNA_inter_chr7_6501|ncRNA_as_chr4_3559|ncRNA_as_chr6_5860|ncRNA_inter_chr7_6318|ncRNA_as_chr1_222|ncRNA_inter_chr10_8469|ncRNA_as_chr5_4281|ncRNA_as_chr11_9956|ncRNA_as_chr2_1695|ncRNA_as_chr4_3224|ncRNA_as_chr15_12467|ncRNA_as_chr17_13834|ncRNA_as_chr10_9389|ncRNA_inter_chr7_6369|ncRNA_inter_chr11_9926|ncRNA_as_chr15_12818|ncRNA_inter_chr1_496|ncRNA_inter_chr6_5862|ncRNA_as_chr1_400|ncRNA_inter_chr10_9208|ncRNA_inter_chr17_14128|ncRNA_as_chr8_7331|ncRNA_as_chr6_5722|ncRNA_as_chr19_14782|ncRNA_inter_chr3_2697|ncRNA_inter_chr12_10462|ncRNA_inter_chr11_9923|ncRNA_inter_chr1_846|ncRNA_inter_chr2_2079|ncRNA_inter_chr9_7999|ncRNA_inter_chr4_3458|ncRNA_inter_chr8_7654|ncRNA_as_chr18_14332|ncRNA_inter_chr3_2166|ncRNA_as_chr2_1439|ncRNA_as_chr19_14976|ncRNA_as_chr12_10245|ncRNA_inter_chr5_3975|ncRNA_inter_chr1_840|ncRNA_as_chr4_3298|ncRNA_inter_chr6_4980|ncRNA_as_chr1_762|ncRNA_inter_chr7_6189|ncRNA_inter_chr19_14990|ncRNA_as_chr13_11068|ncRNA_inter_chr19_14880|ncRNA_inter_chr11_9948|ncRNA_inter_chr6_4887|ncRNA_as_chr13_11149|ncRNA_as_chr2_1550|ncRNA_inter_chr4_3698|ncRNA_inter_chr10_9366|ncRNA_inter_chr5_4065|ncRNA_as_chr8_7375|ncRNA_as_chr19_14713|ncRNA_inter_chr9_8121|ncRNA_as_chr2_1677|ncRNA_inter_chr5_4022|ncRNA_as_chr2_1966|ncRNA_as_chr19_15130|ncRNA_inter_chr12_10561|ncRNA_inter_chr4_3819|ncRNA_as_chr19_14732|ncRNA_inter_chrX_15297|ncRNA_inter_chr11_9635|ncRNA_as_chr1_221|ncRNA_inter_chr8_6878|ncRNA_inter_chr9_7885|ncRNA_as_chr11_9990|ncRNA_inter_chr14_11845|ncRNA_as_chr11_10222|ncRNA_inter_chr3_2161|ncRNA_inter_chr2_1958|ncRNA_inter_chr11_10189|ncRNA_intra_chr8_6800|ncRNA_inter_chr1_852|ncRNA_as_chr7_6161|ncRNA_inter_chr1_109|ncRNA_inter_chr16_13456|ncRNA_inter_chr19_15132"),x = rownames(G172.umis), value = FALSE)
DE2.M1M3.0.1 <- grep(c("ncRNA_inter_chr19_14954|ncRNA_inter_chr3_2411|ncRNA_inter_chr16_13190|ncRNA_inter_chr15_12770|ncRNA_as_chr5_4384|ncRNA_inter_chr6_5510|ncRNA_as_chr11_10149|ncRNA_inter_chr9_8350|ncRNA_as_chr2_1145|ncRNA_inter_chr6_5542|ncRNA_inter_chrX_15278|ncRNA_inter_chr13_11385|ncRNA_as_chr2_1579|ncRNA_inter_chr6_4875|ncRNA_as_chr10_9365|ncRNA_inter_chr7_6338|ncRNA_as_chr8_7096|ncRNA_inter_chr14_12199|ncRNA_inter_chr1_812|ncRNA_inter_chr14_11979|ncRNA_as_chr17_14069|ncRNA_inter_chr1_120|ncRNA_as_chr17_13658|ncRNA_as_chr14_12055|ncRNA_as_chr16_13150|ncRNA_inter_chr6_5310|ncRNA_as_chr9_7957|ncRNA_inter_chr1_119|ncRNA_inter_chr2_2066|ncRNA_intra_chr7_5919|ncRNA_inter_chr14_12030|ncRNA_inter_chr17_13792|ncRNA_as_chr14_11948|ncRNA_as_chr2_1073|ncRNA_inter_chr8_6742|ncRNA_inter_chr6_5610|ncRNA_inter_chr7_6334|ncRNA_inter_chr10_9320|ncRNA_as_chr7_6032|ncRNA_as_chr6_5508|ncRNA_as_chr5_3947|ncRNA_inter_chr6_4981|ncRNA_inter_chrX_15240|ncRNA_as_chr13_11779|ncRNA_inter_chr4_3793|ncRNA_inter_chr10_8794|ncRNA_as_chr11_10121|ncRNA_as_chr9_8224|ncRNA_as_chr4_3820|ncRNA_as_chr19_15151|ncRNA_inter_chr15_12845|ncRNA_as_chr11_9812|ncRNA_inter_chr6_5318|ncRNA_as_chr2_2108|ncRNA_inter_chr13_11254|ncRNA_inter_chr12_10973|ncRNA_inter_chr19_14964|ncRNA_inter_chr6_5582|ncRNA_as_chr10_9147|ncRNA_as_chr15_12321|ncRNA_inter_chr2_1171|ncRNA_inter_chr3_2165|ncRNA_as_chr11_9897|ncRNA_intra_chr8_6802|ncRNA_inter_chr8_6788|ncRNA_inter_chr16_13443|ncRNA_as_chr1_253|ncRNA_as_chr8_7410|ncRNA_inter_chr12_10269|ncRNA_as_chr7_6655|ncRNA_inter_chr8_7682|ncRNA_as_chr9_8316|ncRNA_inter_chr8_7267|ncRNA_inter_chr2_1237|ncRNA_inter_chr17_13939|ncRNA_inter_chr11_10233|ncRNA_as_chr11_9838|ncRNA_inter_chr9_8000|ncRNA_inter_chr13_11669|ncRNA_inter_chr19_14774|ncRNA_inter_chr10_9210|ncRNA_inter_chr4_3549|ncRNA_as_chr8_6874|ncRNA_inter_chr16_13171|ncRNA_as_chr3_2545|ncRNA_inter_chr6_5045|ncRNA_inter_chr10_9263|ncRNA_as_chr10_9048|ncRNA_inter_chr7_6182|ncRNA_inter_chrX_15244|ncRNA_inter_chr12_10649|ncRNA_as_chr18_14628|ncRNA_as_chr9_8428|ncRNA_as_chr19_15065|ncRNA_inter_chr3_2572|ncRNA_as_chr5_4771|ncRNA_inter_chr17_14187|ncRNA_inter_chr12_10411|ncRNA_inter_chr19_14874|ncRNA_as_chr11_9767|ncRNA_as_chr16_13458|ncRNA_as_chr8_7367|ncRNA_as_chr7_6293|ncRNA_inter_chr15_12653|ncRNA_inter_chr11_10211|ncRNA_as_chr17_13856|ncRNA_as_chr2_1542|ncRNA_inter_chr4_3285|ncRNA_inter_chr3_2653|ncRNA_inter_chr7_6397|ncRNA_inter_chr9_8104|ncRNA_as_chr3_2554|ncRNA_as_chr14_12134|ncRNA_inter_chr17_13551|ncRNA_as_chr8_7523|ncRNA_as_chr15_12460|ncRNA_as_chr2_1101|ncRNA_inter_chr8_7519|ncRNA_as_chr17_13822|ncRNA_inter_chr4_3738|ncRNA_inter_chr2_1830|ncRNA_inter_chr5_4321|ncRNA_as_chr2_1694|ncRNA_inter_chr17_14182|ncRNA_inter_chr16_13352|ncRNA_inter_chr3_2742|ncRNA_inter_chr11_9503|ncRNA_inter_chr6_5724|ncRNA_inter_chr1_591|ncRNA_inter_chr13_11523|ncRNA_as_chr4_3316|ncRNA_inter_chr3_2345|ncRNA_as_chr3_2544"),x=rownames(G172.umis),value=FALSE)

DE3.M1M3.0.1 <- grep(c("Gm26992|Dpyd|Cwc22|Cps1|Upp2|Rad51b|Carmil1|Clpx|Fgb|Ang4|Fga|Gclc|Fgg|Fam13a|Sult3a2|Abcc2|Gsta3|Kcnt2|Irf9|Meiob|Crot|Gadd45g|G6pc|Pzp|Fgl1|Chrm3|Cfh|Etfbkmt|Sntg2|Cxcl1|Setbp1|Tymp|Crtc3|Pah|Kmo|Vmp1|Nfia|Rbpms|Slc7a2|Fam210b|Slc16a10|Plin2|St3gal1|Sox5|Ly6e|Sc5d|Wwox|mt-Atp6|Tacc2|Chka|Qsox1|Ass1|Pxmp4|Fam214a|Rnf125|Adgrv1|Por|Mbl1|Zbtb16|Adk|mt-Co3|Rrbp1|Map2k6|Pdgfc|Nfib|2810459M11Rik|Slc25a30|Adck5|Mindy3|Ypel2|Nrp1|Cryl1|mt-Cytb|Fbf1|Gas2|Otc|C4bp|Ldha|Aldh1l1|Garem1|Adh4|mt-Nd4|Inmt|mt-Nd2|Gm42906|Slc38a3|St3gal4|Ugt2b36|Ptprd|Insig1|Immp2l|Gon4l|Desi2|Tacc1|Cyb5a|Vwa8|Klf12|Gnmt|Prpf6|Cfi|Pter|Rapgef4|Fggy|Pitpnc1|Lbp|Peak1|Cdo1|Diaph2|Adh1|Mapk15|Slc30a10|Sdr9c7|Mamdc2|Rims2|Litaf|Ahcyl2|Abcb11|mt-Co2|Cyp2c29|Crp|Slc25a47|Slc37a1|Pnrc1|Hgd|Rai14|Prg4|Gcnt7|mt-Nd3|Rint1|Fgfr2|Gclm|Glt1d1|Ddc|Vtn|Eva1a|Trpm3|Zc3h13|Trp53inp2|Saa4|Nr3c2|Zbtb20|Cebpb|Cpb2|Nr2c2ap|Gsap|Itih3|Suds3|Insig2|Igf1|Hhex|Lnx2|Vcl|Sugct|Tmem243|mt-Nd4l|Chn2|Clock|Mthfr|Agmo|Pid1|Fech|Gm4952|Skap2|Pdia5|Onecut1|Acat1|Kynu|Tmed5|mt-Nd1|Nos1ap|Acaca|Serpina10|Ptpn3|Tifa|Bhmt|Clec2d|Nfil3|Abca6|Ern1|Ces1d|Pcyt2|Ftcd|Slc38a4|Gspt1|Inhba|Larp4b|Simc1|Traf5|Gtpbp4|Pik3c2g|Phf20l1|Timm23|Rpl36|Senp5|Slc17a5|Fabp1|Gldc|Ripor2|Lrpprc|Tab2|Zpr1|Etfdh|Thrsp|Ube2f|Peg3|Slc24a5|Gm3839|Tgoln1|Agmat|Kpna4|Fetub|St5|Ypel3|Sf3b3|Rlf|Pkhd1|Slco1b2|Gfod2|Ccdc12|Sco2|Exoc5|Ftl1|Zfp697|Eif2s2|Fnbp1|Nek6|Txndc15|Mastl|Bach2|Wsb1|Map3k14|Abcg8|Zbtb7a|Pawr|Tmem219|Gpd1|Erlin1|Gpc4|Fpgs|Eif1|Ap3s1|Akr1a1|Macrod2|mt-Co1|Slc25a17|Pck1|A230050P20Rik|Zkscan1|Cxadr|Gm6614|Ldah|Ppp1r3c|B3gat3|Fermt2|Cpn1|Mpp6|D430042O09Rik|Rtf2|Itih4|Dbi|Prodh|Lgals9|Bend7|Tstd1|Gm4756|Ptges3|Hsd17b13|March2|Fam210a|Cox8a|Lars2|Hsd17b12|Sqstm1|Col6a6|Etnppl|Ppfia1|Tbc1d5|Zc3h15|Calr|Sigmar1|Ptpn1|Iscu|Tmem248|Tecpr2|Ccdc152|Gabarapl1|Mrpl38|Cep44|2810474O19Rik|Ftl1-ps1|Grina|Lars|Serinc1|Slc25a51|Slc25a25|Ppp4r3b|Nsd3|Lcp1|Socs3|Usp6nl|Fth1|Pms1|Hmgcl|Sik1|Clpp|Ech1|Kifc3|Ssu72|Clic4|Dnajc3|Ei24|Stx16|Prr16|Rffl|Rasal2|Cfap54|Scai|Serping1|Mettl26|C4b|Hspa4|4931406P16Rik|Dnmt3b|Chd4|Cul1|Mal2|Ncoa5|Scarb1|Dcaf5|Akap8|Plk3|Tor1aip1|Strbp|Inhbc|Zfp410|Nedd4l|Gm11639|Catspere2|Sh3bgrl2|1600014C10Rik|Slc11a2|Creb3l3|Atp7a|Taok3|Ank3|Dab1|Zfp141|Nr1i3|Dmgdh|Ube2k|Clcn3|Amn1|Retreg3|Etnk1|Ergic1|Gata4|Coq3|Coq10b|Eif3c|Rpl24|Pfn1|Hint1|Trmt112|Brip1|Ifi35|Serpina3n|Helz2|Saysd1|Msmo1|Atox1|Cox19|Lactb|Errfi1|Rabac1|Fam168b|Ndufa4l2|Cyp2e1|Slc15a4|Sntb1|Glyat|Ranbp2|Tpd52l2|Hectd2|Dcaf7|Cox7a2l|Pcsk7|Kdm5b|Rb1|Mcl1|mt-Nd5|Fam135a|Pnkd|Zfhx2|Dcaf6|Nasp|Timm9|Kras|Cox6a1|Ifrd1|Adap2|Plpp3|Znrf2|Rexo1|Eif4e|Nckap5|Pxmp2|Ddx39b|Gm28305|Krt18|5830473C10Rik|Lrig1|Amacr|Agpat2|Sod2|Dhrs4|Slc25a20|Cggbp1|Irak4|Tm9sf2|Traf6|Ghr|Myo10|Serp1|Wdr37|Atp1a1|Nr2f2|Nck1|Prpf4b|Frrs1|Gnas|Tmem30a|Ndrg3|Ttc7b|Abhd17c|Cpeb4|Drg1|Fam160a2|Zmynd8|Smim13|Gfer|Otulin|Ahsa2|Mphosph8|Nedd8|Rc3h1|Slc25a42|Creld1|Lap3|Gsdme|Fn1|Exd1|Nrbp2|Serf2|Abhd14b|Nars2|Cdh1|Caprin1|Fam162a|Akirin1|Selenot|Eps15|Dtnbp1|Syvn1|Luzp1|Cebpz|Slk|Plbd2|Lsm6|Prr3|Eif4e2|Ghitm|Ggps1|Rpl6|Pigl|Neb|Cabyr|Park7|Cab39|Creg1|Chd7|Elavl1|Hspa5|Fem1c|Plcxd2|Gtf2ird1|Nup98|Hdlbp|C1ra|AI182371|Psmd11|Srsf7|Cyp2c23|Cyp3a25|Pycrl|Nostrin|Bag3|Apobec1|Nipa2|Uqcr10|Alkbh5|Susd1|Tra2b|C8g|Sdf2|Baiap2l1|Gbe1|Plcb1|Tmed2|Ttr|2210408I21Rik|Dnm1l|Sema6b|Aldh1a1|Eif4h|Cyp2j6|Gm37240|Palld|Sat1|Wdr20|Pnpla7|Parva|Urgcp|Top1|Rab6a|Cyp2j5|B4galt5|Cp|Zfp970|Elob|Dnaja2|Ndufa4|B4galt1|Gltpd2|Lasp1|Nufip2|Sftpd|Pak2|Nup54|Mtfr1|Slc10a2|Cep170|Midn|Rnf11|Micall1|1110008F13Rik|Sardh|Peli1|Tmem238|Slc30a9|Polr2a|Sgpl1|Ormdl3|Slc3a1|Aldh1a7|Etv6|Hmgcs2|Xbp1|Nectin2|Rap1b|Gatad2a|Acsl1|Golga5|Bcl7c|Fam111a|Mapkapk2|Polr2j|Rtp3|Inpp5a|Frmd4b|Slc35b3|Naca|Mical2|Ppm1g|Mapre3|Id2|Srpr|Cpox|Eif4b|Ntan1|Zfp810"),x = rownames(G172.umis), value = FALSE)
DE4.M1M3.0.1 <- grep(c("Cox7a2|Astn2|Smad1|Serbp1|Rusc2|Foxa3|Ergic2|Spin1|Cela1|Nudt12|Rnf24|Tex2|Chdh|Zkscan3|Shtn1|Slc35d1|Cd81|Zfp799|Vcp|Akap1|Chuk|Gphn|Lonp2|Etf1|Bace1|Slc39a11|Dnah8|Ccni|Alkbh1|Mpp7|Hook2|Yy1|Mob4|Scyl2|Gatad1|Rere|Parg|Zfp182|Il6st|Ap1ar|Acadvl|Atxn7l3b|Stra6l|Btbd1|Yaf2|Brd1|Vtcn1|Rpl11|Orc3|Ndufv3|Hibadh|Hsd17b6|Tmx4|Ppp2ca|Ubr4|Nek1|Morn1|Herc4|Ubqln1|Gm10563|Dzip3|Tank|Farp1|Tcta|Papola|Bmpr1a|Hspa9|Eif5|Tmem131|Bri3|Pank1|Mttp|1110059E24Rik|Lrfn3|Hivep1|Neu1|Tbce|Efna5|Zcchc6|Actr2|Slc16a1|Sec22b|Ythdf2|Tef|Ybx1|Preb|Zbtb1|Pld1|F9|Rarb|Gmeb2|Rad23b|Rcc2|Gorasp2|Lztfl1|Fam104a|Sfpq|Smagp|Ppil4|Edf1|Nhsl1|Ints7|Zfp385b|Psmb1|Timm13|Zfp652|Itsn1|Mrps34|Eef1b2|Lurap1l|Gsr|Ddx50|Wdr53|Sec62|Gls2|Cnot7|Xdh|Cluh|Atxn7l1|Tmem150a|Net1|Senp6|Fam120c|Gpx4|Ethe1|Commd7|Fbxo42|Mrps26|Cltb|Car1|Kansl3|Pspc1|Tbc1d22a|Pex13|Rnf2|B230219D22Rik|Dclre1c|1700019A02Rik|Acox1|Wipi2|Vezt|Dpf3|Gm14403|Gmcl1|Hsdl2|Eif2b1|Emc3|Rap1gap|Trit1|Pqlc1|Nf1|Scaf8|Shq1|Larp4|Ngrn|Ubb|Vcpip1|Ube2j2|Lrrc29|Snx3|Mast2|Atp5l|Rnf19a|Atp6v1f|Eif4g2|Klhl2|Slc7a7|Cox7b|Adam11|Mbd1|Yap1|Grb10|Mprip|Lin7c|Zbtb11|Pura|Gabarap|Srsf6|8430408G22Rik|Oma1|Akr1c19|H2-T23|Szrd1|Mrnip|Nsd2|Usmg5|Rab18|Sppl2a|Mcm7|Kif5b|Thrap3|Tpst2|Blvrb|Samm50|Fbxw8|Lztr1|Ppp2r2d|Actl6a|Btaf1|Bcl3|Atr|Clu|Erp29|Pla2g12b|Csnk2a2|Sdf4|Sdr42e1|Ttpal|Rsbn1l|Gm379|Ubxn1|Ythdc1|H2afj|Prickle1|Abat|Nolc1|Nrip1|Acbd5|Spata24|Ror1|Mtmr14|Igfals|Map7d1|Plin5|Casp8ap2|Rnf10|Gin1|Tc2n|Nr2f6|Acp5|Tgfa|Zfp24|Gpx1|Dnaja3|Ifitm3|Usp16|1700037C18Rik|Idnk|Rp9|Cdip1|Rpn1|Cdv3|Eif4a1|Hmgcs1|Smad9|9030624G23Rik|Sult5a1|Oplah|Zfp106|Mtmr1|Colgalt2|Rai1|Dgkh|Ppp4r3a|Ddx23|Dstn|Stat5a|Hmces|Ythdf1|Adipor2|Dynll2|Gcgr|Dhx9|Asb13|Eci1|Zhx2|Eif2b4|Upb1|Psmb2|Foxn2|Zbtb5|Rad54l2|Cpeb2|Tmed4|Kdm3a|Metap1|Ankrd46|Akirin2|Lpin2|Mrps7|Nprl3|Ctsz|Soat2|Atp6v1h|Caml|Upf3b|Map4k5|Pdia3|4932438A13Rik|Dnajb11|Smurf2|Mtif2|Cyp4f17|Paip2|Slc29a1|Tars|Vamp8|G3bp1|Adh7|Vwce|Anapc16|Scyl3|Cops6|Snw1|Ranbp9|Gosr2|Chordc1|Mrps10|Ncam2|E130311K13Rik|Wars|Eif5b|Znhit1|Actb|Gmfb|Crk|Hipk2|Crem|Nr0b2|Dusp1|Sdf2l1|Hadha|Atp11a|Ddb1|Prkd3|Htatip2|Ints8|Qprt|Rcbtb2|Greb1l|Klf9|Pithd1|Ugt3a1|Mrpl52|Psmd4|Ppib|Clns1a|Rad54b|Smarcad1|Vps72|Ttc14|Hadhb|Xiap|Ceacam1|Sec63|Cox11|Clptm1|Atp11b|Cox7c|Shmt1|Abl2|Ngef|Mgat2|Dus1l|Serpina3c|Tmem259|Rhoa|Rpl27a|Ndufb11|Brwd3|Fam53a|Hmgcr|2310030G06Rik|Med13|Fer|Reln|Uxt|Junb|Pdia6|P4ha1|Ndufa10|Atp5c1|C1s2|Wdr4|Cebpg|Stk38|Nadsyn1|Dexi|Fam120b|Trabd|Elmo1|Ahnak|Tmem234|Stk24|Ywhaz|Ppdpf|Tpp2|Mreg|Nphp3|Gpkow|Prelid2|Slc30a5|Derl1|Stx5a|Stat3|Pyurf|Ppp2r1b|Tbca|Phrf1|Utp14a|Tmed7|Patl1|Pebp1|Vps50|Caap1|Ssb|Pdrg1|Gnl3|Farsa|Ufc1|Tubb4b|Foxj3|Ubn1|Cox6c|Rspry1|Kpna1|Btd|Dcaf10|Mbd2|4921524J17Rik|Uqcrq|Pdia4|Paip1|Shpk|Stard13|Plxnb2|Utp4|Bsg|Rnf114|Cfap97|Tmem127|Rnps1|Map3k10|Riok1|Nop58|Gcsh|Rac1|Fyco1|Trappc4|Bclaf1|Ptpn2|Ddx19b|Hs2st1|Zswim8|Gm5617|Dcun1d3|Actn1|4930556J24Rik|Lrp5|Cdk7|Asgr1|Supt5|Pabpc1|Hnrnpl|Lamp1|Emc6|R3hdm1|Bckdhb|Srsf11|Wdr7|Tmem143|Aatf|Pef1|Acad8|Togaram1|Sympk|Mrpl34|Myl12b|C1galt1|Sync|Zfp867|Snap47|Tma7|Mtss1|Rest|Tssc4|Creb1|Fkbp8|Large1|Imp3|Sdsl|Eif3e|Rpl10a|Atp6v0b|Prpsap1|Cdc42bpb|Actr1a|Mocos|Rab2b|Lcat|Scaf1|Sf3b6|Atp13a1|Ndufb5|Mat2a|Igf2bp3|Prdx5|Hnmt|Cox5a|Ighmbp2|Setdb2|Tmem19|Actn4|Oxld1|Maged1|Sox6|Gjb2|Lsm12|Sec61a1|Baz1b|Psmd5|1190002N15Rik|Ndufb9|Abhd3|Dicer1|Cldn1|Mindy1|Acads|Dhps|Rpl19|Baz1a|Iws1|Supt16|Ccnt1|Atg2a|Fbxw7|Rps3a1|Wdr45b|Dock4|Fam25c|Sdhaf3|Swt1|Serpinc1|Gan|Sbds|Med6|Pcf11|Coq10a|Smap1|Gpr68|Psmd8|Txn2|Ascc2|Fbxo31|Vti1b|Taf11|Fam222b|Ldlr|Tle3|Acbd3|Lrrc58|Sirt1|Lmna|Slc40a1|Gpt|Ccdc122|Tex30|Cpsf1|Mrpl2|Esco1|Cdc42bpa|Xylt2|Nlrp6|Mrpl57|Zfp451|Gpatch2l|4833439L19Rik|Prpf31|Csnk1d|Rabep2|Gria3|Ndufs3|Rapgef5|Shoc2|Rnf217|Chd9|Rbm4b|Ap2a2|Myl6|Cars|Mlxip|Tbc1d20|Calm1|Col4a1|Fam207a"),x = rownames(G172.umis), value = FALSE)
DE5.M1M3.0.1 <- grep(c("Ppard|Rps11|Wdtc1|Vapa|Neto2|Kpna3|Mrpl4|Nf2|Mtor|Tm9sf4|Wdr82|Cox18|Nrn1|Zfp444|Mrpl17|Tm2d1|Angptl4|Prok1|Mcph1|Ulk1|Tmem214|Taf1a|Sem1|Fndc3b|Fchsd2|Olfm2|Gm11808|Tpk1|Itgb1|Mkl1|Fbxo38|Rras2|Rnf111|Tsc22d2|Tmem216|Nr2c2|Aaed1|Apoo|Hyal2|Atp2c1|Pi4kb|Slc39a14|Tst|Kdelr1|Ube2a|Clpb|Coa3|Ap5z1|Ddx21|Nop10|Gimap9|Wsb2|Abce1|Ccar1|Inca1|Tgm2|Ogfod1|Mthfs|Bdp1|Il13ra1|Rdh14|Cgrrf1|Hnrnpk|Zfp830|Stk16|Vav2|Gm7298|Trp53inp1|Dpagt1|Rnf138|Tcim|Sec11a|Cdkn1a|Ttc19|Faf2|Atg14|H3f3a|Thumpd3|Mrpl49|Cdc123|Srp9|Lrp1|Rabgef1|Spcs2|Lamp2|Sfr1|Rexo2|Anxa7|Pcgf6|Bbox1|Ube2d2a|Nploc4|Lonrf3|Car14|Psmc3|Malsu1|Ints6|Dnajc15|Sec13|Lpar6|Fbxw4|Zfp512|Heatr1|Cdkn2aip|Ganab|Bbs9|Pla2g6|Tmem141|Manba|D17Wsu92e|Cpt2|Cldn2|Thoc7|Clta|Gtf2a2|Grpel1|Atg2b|Hdac4|Sec24c|Snrnp48|Zfp773|Atp5a1|Zfp664|Cmah|Mogs|Ddost|Cyp26a1|Trrap|Ext2|Eif3a|Canx|Rplp1|Mul1|Nudt5|Nono|Slc35a2|Irak2|Fkrp|Lrig2|Brms1l|Dek|Heatr6|Morf4l1|Ddhd1|Zcchc11|Tbl1x|Mcfd2|Map3k20|Ston1|Rabggtb|Steap4|Minos1|U2af1|Bmt2|Crip2|March5|Gtf2e1|Snrnp70|Edem3|Ehd1|Cyc1|Rbm10|Ing4|Xpo4|Sidt2|Ankrd11|Rbm18|Vps37a|Mob1b|Abcf2|Ginm1|Tmem68|Zcchc14|C1rl|Dad1|Usp15|Ube2v2|Ssfa2|Kxd1|Ttll4|Zdhhc18|Tmem163|Hfe|Pex6|Ezh2|Mapk1|Scarb2|Ostc|Brix1|Atp6ap2|Ddhd2|Apcs|Spint2|Tbck|Tsen34|Rpl5|Nat10|Mtfr1l|Ninj1|Ano6|Fnta|Fgfr4|Pgk1|Ccdc28a|Nipsnap2|Nudt13|Xpr1|Cct2|Ppm1d|Atp9a|Upf2|Gak|Kdm3b|Hnrnph1|Trim44|Agrn|Csde1|Olfr16|Ppip5k2|Dennd5b|Pdcd6ip|Pop1|Tmem176b|Efl1|Eif5a|Otud5|Hist1h2bc|Zfp740|Dip2b|Alkbh8|Pfdn5|Npc1|Gabarapl2|Ppil2|Tmem167|Calm2|Ppp1r13b|Abca8b|Mcm9|Proc|Mrps21|Cnot8|Pxylp1|Alkbh2|Ptpn11|Zfand5|Tom1l2|Tspo|Sra1|Rps10|Fam76b|Zfp131|Cldn3|Mrps12|Ifnar2|Ces1f|Nfe2l1|Ube2n|0610030E20Rik|Cog4|Mbnl2|Camk2n1|Gatad2b|H2afy|Vezf1|Pkd2|Ddx1|Trappc8|Wwc2|Rnf38|Aak1|Atp5o|Hmgxb3|Slx4ip|Capza1|mt-Atp8|Mon1a|Smim10l1|Gspt2|Baz2a|Snx16|Cenpa|Riok3|Rrn3|Cwc15|Susd6|Oxsm|Apol9b|Ccm2|Cpq|Ccdc47|Fdft1|Cope|Mtf2|Zc3h7a|Psmg2|Tedc2|Tert|Rab11a|Rps13|Slc25a15|Fbxw2|Ercc4|Atg12|Ciapin1|Efr3a|Atp6v0a2|1110032A03Rik|Osbp|Fam118b|Pphln1|Pex19|Erbin|Aff4|Mlec|mt-Nd6|Trpc4ap|Zfp568|Zcchc24|Fnbp1l|Fam213b|Aimp1|Vac14|Ncor1|Akr7a5|Atp8b1|Cldn14|Ccdc25|Phf3|Supt6|Heatr5a|Jade2|Mif|Usf2|Tmem64|Chid1|Psmd7|Scaf11|Tex12|Mettl2|Gm11273|Rpl18|Alg11|Spsb2|Ube2z|Xrn2|Icam1|Ddx46|Sephs1|Slc35a3|Tram1|Bcl7b|Med27|Nrf1|Cntrl|Sec24b|Nol8|Xrcc4|Sumf1|Ier2|Hdac1|Heatr3|Mfn1|Ccdc91|Ssna1|Eif2s1|Nars|Cyp2j8|Cacybp|Txndc9|Spaca6|Zdhhc14|Samd8|Elmod3|Erh|Naa50|Oaz1|Me1|Glmp|Apon|Acbd4|Ngdn|Ufd1|Pou6f1|Prkar1a|Coq5|Eif3j1|Hspe1|Grpel2|Fxr2|Rab5a|Spata5|Senp2|Atp6v1a|Eri3|Xylb|Ago3|Agt|Tmem131l|Pllp|Atp6v1e1|Scand1|Tmem56|Gstm1|Psmd3|Tbc1d14|Faim|2210016F16Rik|Tmem208|Zfp768|Atat1|Timm8b|Crebzf|Pdhx|Copb2|Prkag2|Slc30a7|Timm17a|Dhx33|Nlk|Ppp1cb|Acbd6|Zfp326|Pqlc2|Leng9|Stk38l|Tmem126a|Hdgfl2|Slc25a26|Flii|Higd2a|C1s1|Lsg1|Puf60|Zfp655|Ribc1|Dhx40|Ppp1r37|Hoga1|Paqr9|Ccs|Nxf1|Ssr1|Fhit|Nme7|Pnpo|Kctd20|Eapp|Nras|Hectd1|Zfr2|Krt8|Ttf1|Ecd|Vps54|Mical3|Rprd1b|Zfx|Pcbp2|Ankrd13c|Cry1|Ikbkap|Agpat3|Mrpl40|Tat|Rtraf|Cdc26|Mrfap1|Ppp1r2|Vnn1|Ivns1abp|Alg14|Mrpl50|Gchfr|Cnpy3|Ost4|Ppp1r42|Dnase2a|Mrpl43|Vkorc1l1|Clp1|Slc25a5|Gm49356|Snip1|Slc35f5|Acad11|Polr3h|Tmem29|Zfc3h1|Usp9x|Utp11|Eny2|Irgm2|Ilf2|Zfp654|Anxa5|Klf3|Prpf19|Bcas3|Ccnl1|Pcbd1|Mospd3|Ctbs|Gm17660|Tprkb|Camta2|Mrpl20|Dusp11|Zfp513|Rlim|Mif4gd|Foxq1|Inppl1|Prr14|Cyb561|Chd8|Rab40c|Fbxw11|Dna2|Usp3|Klhdc8b|Cmas|Proser1|Nvl|Rrp1|Synj1|Sin3a|Hnrnpll|Taf2|Slc39a1|Cpsf2|Slain2|Ndufs7|Uqcrc1|Cmtm6|Hnrnpf|Mat2b|Ndst2|Gtf3c1|2410015M20Rik|Tomm22|AA986860|Immp1l|Top1mt|Qrich1|Ubc|Pex11a|Slc6a9|Mink1|Ppp2cb|Eif1a|Mrps24|BC005561|Slc26a1|Ctnnbl1|Rrp8|Pon1|Mrpl14|Tirap|Echdc2|Ppp4c|Hnf1b|Uso1|Carhsp1|Pcm1|Hspa13|Ehmt1|Dnajb4|Ep300|Apopt1|Atp2a2|Polk|Usp12|Pde3b|Tspan31|Qtrt1|Golim4|Tfr2|Zfp511|Hmbox1|Snx13|Atraid|Lyrm2|Paxx|Rps15"),x = rownames(G172.umis), value = FALSE)
DE6.M1M3.0.1 <- grep(c("Smim4|Prdm2|Pdss2|Vps53|Usp50|Acin1|Zbtb44|Atp6v1g1|Ppp2r5c|Ilvbl|Kdm5a|Wdr91|Oaz2|Pskh1|Sharpin|Fabp2|Suclg1|Slc39a10|Rab22a|3110043O21Rik|Med28|Lyrm1|Xpo7|Leprotl1|Zfr|Atf1|Tkfc|Tmed9|Mettl7a1|Dhx32|Tcp11l2|Tfeb|Acyp1|Ppp6c|Golga3|Pdpk1|Ptov1|Pfdn6|Snd1|Rps6ka1|Cul5|Rad17|Dnajb12|Emc7|Phf11c|Stt3b|Zfp592|Hist1h1c|Tdrd7|Ttyh2|Timp3|Rsf1|Lrrc28|Cdk17|Immt|Dnajc11|Cfap20|S100pbp|Btg1|Ube2e1|March7|Asap3|Fam98a|Naa30|Taf13|Tipin|Pip5k1c|Pole4|Gucd1|Smim14|Pcgf2|Maf|Mrps33|Ssr2|Fgfr1|Rnh1|Rpusd3|Igsf5|Pom121|Utp20|Lyn|E2f4|Cbfb|Lpgat1|Ncaph2|Spop|Srp19|Uox|3110021N24Rik|Atp5f1|Myo6|Farsb|Lsm1|Rpl18a|Ndufaf7|Selenof|Thoc1|Lypla2|Tgfbr1|Gm43064|Mtpn|Mrps18a|Aars|Cirbp|Tcp1|Phkg1|Slc25a39|Epn1|Ranbp17|Casd1|Mapk1ip1l|Pkdcc|Sept7|Cand1|a|Pitpnb|Lrpap1|Tmem186|Yme1l1|Gpbp1l1|Sirpa|Rpl23|Commd3|Vps28|Adam10|Gmps|Sec23ip|Naa25|Tgfbrap1|Pecr|Mars|Sgf29|Slc25a46|Rfc2|Hnrnpa1|Psma7|Foxp4|Parn|Txnrd1|4930453N24Rik|Prorsd1|Atg4a|Aqp8|Znrf1|Cdk11b|Man2a2|Sgta|Btbd10|Ddx31|Eci2|Spr|Iars2|Cast|Med15|Zranb2|Trim11|Psmc6|Rex1bd|Tspan12|Ankrd36|Pdcd7|Tut1|Rnf126|Dnajc5|Mroh1|Plcg1|Nucks1|Cct7|Bid|Cdadc1|Cmtm4|Exosc7|Abcf1|Tbc1d12|Zbtb43|Kif1c|Sbno1|Srpk1|Myh9|Plaa|Rack1|Akr1c6|Rps16|Phc3|Nmnat1|Ogdh|Uggt1|Epb41l4b|Herpud2|Mfap3|Cactin|Ddx42|Cxxc1|Mctp2|2310039H08Rik|Tm9sf3|Rps5|Trim25|Mrps11|Uckl1|Aes|Wdr12|Zfp367|Nudcd1|Pigyl|Hnrnpa0|Ep400|Flcn|Serpina1b|1110065P20Rik|Phf8|Eif2ak1|Cops3|Pdzrn3|Nr5a2|2010107E04Rik|Tmem175|Rin2|Usp39|Eef1d|Psmf1|1700123O20Rik|Trub2|Atp5h|Mad2l2|Lrrc3|Smdt1|Zbtb24|Wdr89|Ublcp1|Epas1|Fermt1|Tuba4a|Rps19|Abi2|Ccdc82|Zkscan7|Srrm2|Wdr61|Txlng|Ntmt1|Nfs1|Uros|Ccdc77|1700017B05Rik|Ubap2|Ppp1r15b|Cct4|Mt1|BC031181|Rnf216|Dhx29|Ablim3|Synrg|Zfp263|Nol9|Nup153|Habp2|Mphosph10|Rad50|Rpsa|C87436|Mocs1|Fastkd1|Dcaf17|Lmbrd1|Foxk2|Ccdc107|Epg5|Hars|Eif2ak2|Hras|Stt3a|Epo|Efna1|Uri1|Epc2|Stx8|Hyou1|Zfp646|1700109H08Rik|Dnajc21|Klhdc10|Ap4m1|Tmem70|Dync2h1|Mtmr10|Mob1a|Bop1|Alas1|P2rx4|Thap2|Mylip|H13|Ndufb10|Nelfa|Prelid1|Iqcb1|Snx33|Zhx3|Nup155|Mrap|Gm14325|Clk2|Eml3|Suz12|Necap1|Tmem123|Map3k7|Taz|Rab3gap1|Lrrc51|Fam32a|Wars2|Skp1a|Vipas39|Pcif1|Slc16a4|Ndufa8|Cyp2d26|Jmy|Tmem222|Tigd2|Eef2|Nabp1|Ssrp1|Thnsl2|Dhx16|Pgam5|Tsfm|Syn3|Pnkp|Uxs1|B3galnt2|Ggnbp2|Apmap|Cdc42se2|Lss|Mrpl27|Kctd2|Lsm4|Polr3f|Atp5d|Rps17|Trim33|Wdcp|Rpl32|Rab9|Slc18a1|Rab11b|Nceh1|Clec16a|Pepd|Trappc13|Rabgap1l|Psmc1|Nectin1|Ascc3|Aftph|Mtch2|Grip1|Map1lc3a|Fancl|Gemin5|Shprh|Adam19|Lage3|Nom1|Dnajc24|Pgs1|Prrg2|Ctdspl2|Grk5|Pik3c3|Slc35e2|Stap2|Uvssa|Mvb12a|Slc8b1|Rhbdd1|Ptpn9|Nck2|Csnk1g2|Laptm4a|Pus10|Ube2g2|Pir|Wrap53|Idh3g|Med8|Pomp|BC005624|Pcbp1|Polr2c|Klc4|F8|Ywhae|Brd7|Ppp4r1|Adnp2|Dennd5a|Prrc1|Ino80d|Zswim5|Rnf6|Otud4|Prr14l|Tmem106a|Cdc37|Col4a3bp|Tbc1d15|Sprtn|Phtf1|Bbs7|Mapk8ip3|Sugp1|Depdc7|Yipf5|Gpd2|Calu|Wbp2|Pnldc1|Slc35d2|Sf3b1|Tmem258|Gpr89|Pank2|Rps2|Ip6k1|Dolk|Ube2m|Smc5|Mrpl48|Cmc4|Maml1|Ccdc138|Rps4x|Pik3c2a|Emsy|Med4|Gng5|Rps26|Mettl27|Pak7|Psmb5|Poc1b|Cideb|Ppp1r10|Cbwd1|Ddx10|Lman2l|Dld|Mecr|Ptgr2|Cep57|Phb2|Cnpy2|Scamp2|Mdn1|Fip1l1|Parp4|Pxk|Bola3|Ube2j1|Stxbp3|Ahsg|Dgkq|Zcchc9|Mfsd11|Sh3gl1|Tpi1|Sdccag3|Tdrd3|Hnf4g|Polr1d|Hopx|Gid4|Tmod3|Rpl12|Safb|Btbd9|Vdac3|Akap9|Cep97|Rfwd3|March8|Smim12|Ccdc117|Abraxas1|Hbp1|Pdzd8|Fbxw9|Gapvd1|Thap3|Kat6a|Setdb1|Fam241a|Dph5|Slc25a13|Trim6|Abcf3|Tnfrsf1b|Desi1|Paxbp1|Slc38a10|Nedd1|AU040320|H2-Ke6|Tmem161b|Surf4|Cul2|Fat1|Adcy6|Trim41|Naa15|Rtf1|Dnajb6|Csnk2b|Cep350|Grn|Zfp282|Zfp346|Amdhd1|Acox3|Ubxn7|Tardbp|E130308A19Rik|Selenok|Eif3b|Vegfd|Pcnp|Irf3|Api5|Ndufv1|Zbtb41|Dcaf8|2510039O18Rik|Nfic|Dalrd3|Smarcc1|Ino80b|Abcc6|Sft2d3|Sipa1l3|Cpn2|Ola1|Gnb1|Galc|Ube2q2|Ncbp2|Vps26a|Prr13|Hace1|Zfp260|Coq7|Hagh|Mief1|Lias|Elf2"),x = rownames(G172.umis), value = FALSE)

```






```{r}
DE1.M2M4.0.1 <- grep(c("Gm26992|Etfbkmt|ncRNA-inter-chrX-15446|ncRNA-inter-chr8-6726|Sult3a2|ncRNA-as-chr6-5589|ncRNA-intra-chr6-5593|Gm3839|Acsl1|ncRNA-inter-chr3-2156|Tymp|Errfi1|Ghr|Carmil1|ncRNA-as-chr15-12340|Cyp2c23|Pkhd1|Car1|G6pc|Adgrv1|ncRNA-as-chr4-3532|Dpyd|ncRNA-inter-chr11-9512|Alb|ncRNA-as-chr15-12321|Gm4756|C3|Apoe|Sugct|Dlgap1|Pbld1|Fga|Slc7a2|Vwa8|Nckap5|Klf9|Gm6614|Nfia|Rbpms|Hmgcs1|Mtif2|Peak1|ncRNA-inter-chr17-13909|Sult2a8|Clpx|Syne1|Baiap2l1|ncRNA-as-chr10-8791|ncRNA-inter-chr18-14605|Zfp697|ncRNA-as-chr2-1904|Cxadr|Sox5|Mapk15|Gm5934|Gm28305|ncRNA-inter-chr15-12608|Gm42906|Lpin2|Abcb11|Echdc2|ncRNA-as-chrX-15320|Abhd14b|Hmgcs2|ncRNA-as-chr15-12323|Tmem219|Gm37240|ncRNA-as-chr6-5860|ncRNA-as-chr19-14772|Eva1a|Glud1|Adra1b|Bri3|Acat1|ncRNA-as-chr4-3298|Fgfr4|Mrnip|Adh1|Fmo5|Iigp1|Sdr9c7|Slc39a14|Ttc7b|Suds3|ncRNA-inter-chr5-4221|Igf2bp3|ncRNA-inter-chr11-9948|Adck5|Sirt3|Col6a6|Chrm3|ncRNA-inter-chr4-3819|Fgfr2|Iah1|Immp2l|ncRNA-as-chr8-6954|Rint1|Saa4|Nhsl1|Gcnt7|Abcb4|Sh3d19|ncRNA-inter-chr3-2161|ncRNA-as-chr10-8962|Slc10a1|Timm9|ncRNA-as-chr2-1543|Tmem126a|ncRNA-inter-chr6-4887|Gbe1|Fam214a|Trp53inp2|Fech|Thnsl2|ncRNA-as-chr16-13178|Tmem150a|Eif1|Neb|Vmp1|ncRNA-as-chr17-13683|Nfib|Gm33543|ncRNA-as-chr7-6547|Herpud1|ncRNA-intra-chr6-5591|Cps1|Zbtb20|Zfp707|Ap3m1|Mettl23|Atp5o|Blm|Sdr42e1|Vcl|Ppara|Mab21l3|Prpf4b|Igf1|Preb|Tshz2|Shtn1|Nedd4l|Cabyr|ncRNA-as-chr4-3500|Zdhhc14|ncRNA-as-chr9-7702|Slc25a42|ncRNA-inter-chr13-11227|Stim2|Gm4952|ncRNA-intra-chr7-5919|Mecr|ncRNA-inter-chr12-10880|ncRNA-inter-chr4-3698|St3gal3|Hsd17b13|Plin2|Strbp|4833420G17Rik|Thap2|Fbf1|Zfand6|Ugt2a3|Olfr56|ncRNA-as-chr5-4334|Sco2|Numb|Bmp1|Dab1|ncRNA-as-chr2-1884|Adipor2|Irgm2|ncRNA-as-chr2-1145|ncRNA-as-chr12-10521|ncRNA-as-chr11-10075|Rnf25|Atr|Etfdh|F11|ncRNA-inter-chr10-9264|Bcas3|Acox2|Kyat1|ncRNA-inter-chr4-3294|Sqstm1|Proser1|Tmem243|Pid1|Btbd9|Me1|ncRNA-as-chr16-13380|Smad9|Smim13|Gucd1|Drg1|Phyh|Pde4c|ncRNA-inter-chr10-8794|Nr1h4|Mtss1|Elovl5|Slc37a4|Lztfl1|Stard5|Dnase2b|C1rl|Xiap|Msmo1|Ypel3|ncRNA-as-chr17-14018|Aaed1|Ggact|ncRNA-inter-chr1-846|ncRNA-inter-chr1-161|Ces1g|Sdc4|Ppp1r9a|Ap3s1|Mettl26|Trim7|Dhps|Rbfox1|Trim28|ncRNA-inter-chr11-10189|Eci2|Stx16|Gm10563|Dhrs4|Slc27a2|Gin1|Atp6v0b|Cpt1a|Nrp1|ncRNA-as-chr18-14628|Pxmp4|Rora|ncRNA-as-chr7-6161|ncRNA-inter-chr11-9922|Ttc14|Tfr2|Ifnk|Oxld1|Hadha|Nos1ap|Sf3b3|ncRNA-inter-chr12-10595|ncRNA-inter-chr15-12365|Fyco1|ncRNA-inter-chr12-10270"),x = rownames(G172.umis), value = FALSE)
DE2.M2M4.0.1  <- grep(c("Fam210b|ncRNA-as-chr1-979|Aldh1l1|ncRNA-inter-chr8-6975|ncRNA-inter-chr17-13928|Dnaja2|Plcb1|Net1|ncRNA-inter-chr6-5862|Lonp2|ncRNA-as-chr9-8316|Peli1|Them4|2810474O19Rik|Gclm|Zpr1|ncRNA-as-chr9-7825|ncRNA-inter-chr12-10561|Gria3|ncRNA-inter-chr13-11669|Ackr4|Rnf169|Ttpa|Gsdme|ncRNA-inter-chr12-10555|Slc7a8|Ggps1|1810022K09Rik|Slc24a5|ncRNA-as-chr4-3083|Rtp3|Son|Ddhd2|Dclre1c|ncRNA-as-chr19-15060|ncRNA-inter-chr5-4395|ncRNA-as-chr2-1439|Astn2|Traf6|Ssbp2|Slc35e2|ncRNA-as-chr6-5261|Ror1|Osbpl9|Pdrg1|ncRNA-as-chr11-10136|Nme7|1700037C18Rik|Cdip1|ncRNA-inter-chr1-109|Lims2|Hopx|Eef1akmt1|Pld1|Slc38a4|Esco1|Cox19|ncRNA-inter-chrX-15437|Ppdpf|Mrps10|Inca1|Bet1|ncRNA-inter-chr15-12713|ncRNA-as-chr1-373|Apobec1|Plcxd2|Sash1|Zfp385b|ncRNA-inter-chr1-292|Ocel1|ncRNA-inter-chr17-14182|Aldh9a1|Oaz1|Stat3|Tpk1|Zkscan1|Fbxl20|Mettl7a1|ncRNA-inter-chr11-9911|P2ry14|Pycrl|Prkd3|ncRNA-as-chr6-5634|Sigmar1|ncRNA-as-chr7-6100|ncRNA-as-chr11-9956|Myo6|Lpp|ncRNA-inter-chr4-3458|Slc16a4|Acsm3|ncRNA-as-chr2-1073|Dmac2|Zfr2|Rbp4|Gpt2|Agmo|ncRNA-inter-chr19-14876|ncRNA-as-chr17-14065|Rsph3b|Nek1|Slc25a20|Aacs|Tap1|Gm11639|Akr1c6|2810459M11Rik|ncRNA-inter-chr19-14746|ncRNA-inter-chr6-5561|Stk16|Dlat|Psmd7|Pibf1|Phlpp1|ncRNA-inter-chr19-15180|Nfx1|Ip6k2|Abi2|Zfp951|Gon4l|Gm12185|Mettl27|Lactb|Chdh|9030624G23Rik|ncRNA-inter-chr4-3352|Gpat3|Ppp3cc|Nr2c2ap|Tlcd1|Morn1|ncRNA-inter-chr5-4052|Catspere2|Zfp646|Adam11|ncRNA-inter-chr17-14128|Spg20|Cenpv|B4galt5|ncRNA-inter-chr3-2128|Ncoa5|BC049762|Zscan26|Apob|Fuca1|Sirpa|Rnps1|Pawr|Vps9d1|Ddx39b|Crebzf|Vwce|ncRNA-as-chr1-252|Rnf4|Cfap54|Snw1|Evi5|Mib2|Pex19|Ak2|Pura|ncRNA-inter-chr4-3707|Yipf5|Ergic2|Plpp3|Ifi35|Rab11b|ncRNA-as-chr5-3947|Ptprg|Tmem106a|Polr1a|Amacr|Prpsap1|ncRNA-inter-chr6-4853|Crtc3|ncRNA-inter-chr3-2134|Dip2c|Gm7298|Saysd1|Deptor|Dpf3|H2-D1|ncRNA-inter-chr19-15138|ncRNA-as-chr13-11149|Apoh|Pdcd4|Mpp1|Apbb2|Slc16a10|Timm23|Wsb1|ncRNA-as-chr15-12821|2310039H08Rik|Phf7|Scamp1|B3gat3|ncRNA-as-chr2-1550|Pan3|Gch1|Hmgcl|Tom1l2|Srsf7|ncRNA-inter-chr1-120|Ttc19|ncRNA-as-chr4-3023|Fam111a|Slc35b3|ncRNA-inter-chr1-840|Pls3|ncRNA-as-chr5-4079|Asic5|ncRNA-inter-chr8-6878|ncRNA-inter-chr12-10462|Serf2|Hibadh|Ndfip1|Trpm3|A230050P20Rik|Zfp799|Akap8l|ncRNA-inter-chr12-10403|Leng8|Mat2a|Lars2|ncRNA-as-chr6-5726|Zfp141|Slc22a23|AI182371|Pqlc1|ncRNA-inter-chr6-5510|Ulk2|ncRNA-as-chr10-9073|ncRNA-inter-chr2-1959|Ppil2|ncRNA-inter-chr12-10973|Herc6|Ocln|Uvrag|ncRNA-inter-chr5-4278"),x = rownames(G172.umis), value = FALSE)
DE3.M2M4.0.1  <- grep(c("Mfsd11|Clock|Dcakd|ncRNA-as-chr18-14332|ncRNA-inter-chr6-5384|Gm765|Atp11b|ncRNA-inter-chr12-10930|Senp5|ncRNA-as-chr6-5266|Palmd|D230025D16Rik|Zc3h15|ncRNA-as-chr5-4281|Tuft1|Cyp4f13|Nmnat3|Ccdc174|Dact2|Gne|mt-Co3|ncRNA-intra-chr19-15016|Dido1|Itpr2|ncRNA-as-chr5-4600|Tkfc|ncRNA-as-chr16-13310|Myo1e|ncRNA-inter-chr1-119|Hnrnpl|Brd1|Aven|Dnah7a|AC149090.1|Fpgs|Ddx50|Hmces|Sec24c|Eci1|Paip2|Thoc1|Mylk|Klhl2|Zc3h6|Dnase2a|Fahd1|Acbd5|Nudcd2|Rmdn1|Wdr82|Myo10|Fbxw9|Slc17a5|Sharpin|Cabin1|Txndc15|AU022252|Srek1|Coq3|ncRNA-inter-chr14-11988|Irf3|Zcchc6|Slc25a13|Mrpl18|Pdia5|Ldlr|Tmed9|Klhdc8b|Rcbtb1|Adap2|B2m|Klhl24|Lbp|ncRNA-as-chr8-7337|ncRNA-as-chr10-8953|Sf3b6|Slc33a1|Lrpprc|Srsf4|Mindy3|Gstp3|Vps37a|Dnajb4|ncRNA-as-chr19-15065|Mast3|Skp1a|Zbtb1|Ndufa10|Psip1|ncRNA-as-chr8-7367|ncRNA-inter-chr11-9923|Btbd1|Slc38a11|Sema6b|ncRNA-inter-chr5-4321|Hsf2|Zmynd8|Luzp1|Pdgfa|ncRNA-inter-chr4-3038|Ube2b|Lamp2|Atl3|Prdx6|Ei24|Cecr2|Zc3h14|Cdc40|Ccdc125|Gspt2|Gpd1|ncRNA-as-chr5-4384|mt-Nd2|Casc3|Ell2|Rpl36|Sh3rf1|Lrrc29|Edf1|ncRNA-as-chr10-8939|A430033K04Rik|Lrig1|Ctsh|ncRNA-as-chr11-10222|Smap1|Ntan1|Clint1|Atxn1|Thap3|Irak4|Ugt3a1|Hnrnph3|Pja1|Btg1|Tgfbr2|Cox11|Tmem29|Brap|Gdap2|Rnf214|F13b|ncRNA-as-chr16-13146|Tpst2|Pik3c2a|ncRNA-inter-chr19-14964|Bcdin3d|Ahnak|Smim4|Commd7|Top2b|ncRNA-inter-chr11-10215|ncRNA-inter-chr13-11374|ncRNA-as-chr16-13153|ncRNA-as-chr8-7096|ncRNA-as-chr4-3820|Kxd1|Mzt1|Tmem143|Eif4ebp2|Mbtd1|Rbm47|Nipsnap3b|ncRNA-as-chr10-9130|ncRNA-inter-chr17-14177|Dhx40|Akap8|Pls1|Srsf11|Pex13|Cldn2|Osbpl1a|Cfap20|Mrpl52|ncRNA-as-chr7-6300|Zfp933|1110059E24Rik|Tab2|Gle1|Yaf2|Kmo|Lcp1|Gnl2|ncRNA-as-chr15-12467|Lsm6|Eps15|ncRNA-inter-chr17-14187|ncRNA-as-chr19-14904|Ighmbp2|ncRNA-as-chr2-1542|Atat1|Mtmr1|ncRNA-as-chr8-7410|Depdc7|Ccs|Scyl2|mt-Co2|Lars|Pts|Rexo1|Tssc4|Usp53|ncRNA-as-chr4-3559|Gfod1|Nars2|ncRNA-inter-chr4-3579|Eif3j1|ncRNA-as-chr1-795|ncRNA-inter-chr8-6981|Pde4a|ncRNA-inter-chr3-2881|Slc39a10|E030030I06Rik|Camk1|Ppp2r1b|Amot|ncRNA-inter-chr17-14010|Cntn5|Ube2h|Fbxo36|Wbp1l|ncRNA-as-chr2-2108|Zbtb44|Hectd2|Mgrn1"),x = rownames(G172.umis), value = FALSE)

```




```{r}
All.sex.index <- c(x1,x2,x3,x4,x5)
All.sex.TCPO.index <- c(x1,x2,x3,x4,x5, TCPO1, TCPO2, TCPO3,TCPO4, TCPO5, TCPO6,TCPO6.1, TCPO6.2, TCPO7,TCPO7.1, TCPO7.2, TCPO8,TCPO8.1,TCPO8.2,TCPO9, TCPO10)

DE.G172M1M3.0.1 <- c(DE1.M1M3.0.1,DE2.M1M3.0.1,DE3.M1M3.0.1,DE4.M1M3.0.1,DE5.M1M3.0.1,DE6.M1M3.0.1,DE7.M1M3.0.1,DE8.M1M3.0.1,DE9.M1M3.0.1,DE10.M1M3.0.1)
All.sex.index.batch <- c(x1,x2,x3,x4,x5,DE1.M1M3.0.1,DE2.M1M3.0.1,DE3.M1M3.0.1,DE4.M1M3.0.1,DE5.M1M3.0.1,DE6.M1M3.0.1)
G172.umis.sex <- G172.umis[-All.sex.index, ]
G172.umis.sex.batch <- G172.umis[-All.sex.index.batch, ]


DE.G172M2M4.0.1 <- c(DE1.M2M4.0.1,DE2.M2M4.0.1,DE3.M2M4.0.1)
All.sex.TCPO.index.batch <- c(All.sex.TCPO.index ,DE.G172M2M4.0.1) 
G172.umis.sex.TCPO.batch <- G172.umis[-All.sex.TCPO.index.batch, ]

G172.umis.sex.TCPO <- G172.umis[-All.sex.TCPO.index, ]

# Setup Seurat object
#G172.hashtag <- CreateSeuratObject(counts = G172.umis.sex)

G172.hashtag <- CreateSeuratObject(counts = G172.umis)
G172.hashtag <- AddMetaData(object = G172.hashtag,  metadata = lncRNA, col.name = "nlncRNA") 
#G172.hashtag <- AddMetaData(object = G172.hashtag, meta.data0.5) 

 
G172.hashtag.sex.batch <- CreateSeuratObject(counts = G172.umis.sex.batch)



G172.hashtag.TCPO <- CreateSeuratObject(counts = G172.umis.sex.TCPO)
G172.hashtag.sex.TCPO.batch <- CreateSeuratObject(counts = G172.umis.sex.TCPO.batch)


# Normalize RNA data with log normalization

#G172.hashtag <- AddMetaData(object = G172.hashtag) 
G172.hashtag <- NormalizeData(G172.hashtag)
# Find and scale variable features
G172.hashtag <- FindVariableFeatures(G172.hashtag, selection.method = "vst", nfeatures = 2000)
#G172.hashtag <- FindVariableFeatures(G172.hashtag, selection.method = "mean.var.plot")
G172.hashtag <- ScaleData(G172.hashtag, features = VariableFeatures(G172.hashtag))
G172.hashtag <- RunPCA(G172.hashtag,npcs = 30, features = VariableFeatures(G172.hashtag))
G172.hashtag <- RunUMAP(G172.hashtag, reduction = "pca", dims = 1:30)
G172.hashtag <- FindNeighbors(G172.hashtag, reduction = "pca", dims = 1:30)
G172.hashtag <- FindClusters(G172.hashtag, resolution = 0.25)   
G172.hashtag.p1<- UMAPPlot(G172.hashtag, reduction = "umap", label=TRUE, label.size=5)
G172.hashtag.p1
DefaultAssay(G172.hashtag) <- "RNA"
G172.hashtag <- NormalizeData(G172.hashtag, verbose = TRUE)
d1 <- DotPlot(G172.hashtag, features = All_)+RotatedAxis()
plot_grid(G172.M1.p1,d1)
FeaturePlot(G172.hashtag, features = "Cyp2c55")
FeaturePlot(G172.hashtag, features = "ncRNA-inter-chrX-15394")

# Add HTO data as a new assay independent from RNA
#G172.hashtag[["HTO"]] <- CreateAssayObject(counts = G172.htos)
G172.hashtag[["HTO"]] <- CreateAssayObject(counts = G172.Xist.ChrY)

# Normalize HTO data, here we use centered log-ratio (CLR) transformation
G172.hashtag <- NormalizeData(G172.hashtag, assay = "HTO", normalization.method = "CLR")

# If you have a very large dataset we suggest using k_function = 'clara'. This is a k-medoid
# clustering function for large applications You can also play with additional parameters (see
# documentation for HTODemux()) to adjust the threshold for classification Here we are using the
# default settings
G172.hashtag <- HTODemux(G172.hashtag, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
# Global classification results
table(G172.hashtag$HTO_classification.global)
table(G172.hashtag$hash.ID)

# Group cells based on the max HTO signal
Idents(G172.hashtag) <- "hash.ID"
RidgePlot(G172.hashtag, assay = "HTO", features = rownames(G172.hashtag[["HTO"]])[1:4], ncol = 2, nrow=2)
G172.hashtag$HTO_classification <- G172.hashtag$hash.ID
#Idents(G172.hashtag) <- "HTO_classification.global"

G172.hashtag.subset <- G172.hashtag
# First, we will remove negative cells from the object
G172.hashtag.subset <- subset(G172.hashtag, idents =c("Negative","Doublet"), invert = TRUE)

## Feature scatterplot for hastags IDs ########
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M2-TGACGCCGTTGTTGT")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M1-ATGATGAACAGCCAG", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M3-GCCTAGTATGATCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M2-TGACGCCGTTGTTGT", feature2 = "M4-AGTCACAGTATTCCA")
FeatureScatter(G172.hashtag.subset, feature1 = "M3-GCCTAGTATGATCCA", feature2 = "M4-AGTCACAGTATTCCA")


# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = G172.hashtag.subset, assay = "HTO"))))


G172.hashtag.subset <- NormalizeData(G172.hashtag.subset)
# Find and scale variable features
G172.hashtag.subset <- FindVariableFeatures(G172.hashtag.subset, selection.method = "mean.var.plot")
G172.hashtag.subset <- ScaleData(G172.hashtag.subset, features = VariableFeatures(G172.hashtag.subset))
G172.hashtag.subset <- RunPCA(G172.hashtag.subset,npcs = 30, features = VariableFeatures(G172.hashtag.subset))

# Calculate tSNE embeddings with a distance matrix
G172.hashtag.subset <- RunTSNE(G172.hashtag.subset, distance.matrix = hto.dist.mtx, perplexity = 100)
DimPlot(G172.hashtag.subset)
HTOHeatmap(G172.hashtag, assay = "HTO", ncells = 3030)


########################## Rescue doublets ########################
G172.doublet <- subset(G172.hashtag, idents = "Negative")
G172.doublet.rescue <- HTODemux(G172.doublet, assay = "HTO", positive.quantile = 0.99, kfunc = 'clara')
Idents(G172.doublet.rescue) <- "hash.ID"
RidgePlot(G172.doublet.rescue, assay = "HTO", features = rownames(G172.doublet.rescue[["HTO"]])[1:4], ncol = 2, nrow=2)
###############################################################################


# Extract the singlets M1 #############3
G172.M1 <- subset(G172.hashtag.subset, idents = "M1-ATGATGAACAGCCAG", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1 <- subset(G172.hashtag.subset, idents = "M1-ATGATGAACAGCCAG")

#G172.M1 <- G172.hashtag
G172.M1$stim  <- "G172M1"
DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- SCTransform(G172.M1,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1 <- NormalizeData(G172.M1, verbose = FALSE)
G172.M1 <- FindVariableFeatures(G172.M1, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1 <- ScaleData(G172.M1, features = VariableFeatures(G172.M1))
# Run PCA
G172.M1 <- RunPCA(G172.M1,npcs = 30, features = VariableFeatures(G172.M1))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1 <- RunUMAP(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindNeighbors(G172.M1, reduction = "pca", dims = 1:25)
G172.M1 <- FindClusters(G172.M1, resolution = 0.5)   
G172.M1.p1<- UMAPPlot(G172.M1, reduction = "umap", label=TRUE, label.size=5)
G172.M1.p1


DefaultAssay(G172.M1) <- "RNA"
G172.M1 <- NormalizeData(G172.M1, verbose = TRUE)
d1 <- DotPlot(G172.M1, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.p1,d1)


 for(i in 0:(length(levels(G172.M1@meta.data$seurat_clusters))-1))
 {
 label <- paste("p", i, sep="") 
 assign(label, subset(G172.M1, idents = i))}
#label1 <- paste("df", i, sep="") 
##paste0("p",i,"@assays$RNA@counts")
# assign(label1,split.default((as.data.frame(eval(parse(text=(paste0((as.name(label)),"@assays$RNA@counts")))))),0:(length(as.data.frame(e val(parse(text=(paste0((as.name(label)),"@assays$RNA@counts")))))-1)%/%5), rowSums))
# 
# colnames(as.name(label1)) <- paste("C",i, colnames(as.name(label1)), sep = "_") }

mydf0 <- sapply(split.default(as.data.frame(p0@assays$RNA@counts), 0:(length(as.data.frame(p0@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf0) <- paste("C0", colnames(mydf0), sep = "_")

mydf1 <- sapply(split.default(as.data.frame(p1@assays$RNA@counts), 0:(length(as.data.frame(p1@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf1) <- paste("C1", colnames(mydf1), sep = "_")

mydf2 <- sapply(split.default(as.data.frame(p2@assays$RNA@counts), 0:(length(as.data.frame(p2@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf2) <- paste("C2", colnames(mydf2), sep = "_")

mydf3 <- sapply(split.default(as.data.frame(p3@assays$RNA@counts), 0:(length(as.data.frame(p3@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf3) <- paste("C3", colnames(mydf3), sep = "_")

mydf4 <- sapply(split.default(as.data.frame(p4@assays$RNA@counts), 0:(length(as.data.frame(p4@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf4) <- paste("C4", colnames(mydf4), sep = "_")

mydf5 <- sapply(split.default(as.data.frame(p5@assays$RNA@counts), 0:(length(as.data.frame(p5@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf5) <- paste("C5", colnames(mydf5), sep = "_")

mydf6 <- sapply(split.default(as.data.frame(p6@assays$RNA@counts), 0:(length(as.data.frame(p6@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf6) <- paste("C6", colnames(mydf6), sep = "_")

mydf7 <- sapply(split.default(as.data.frame(p7@assays$RNA@counts), 0:(length(as.data.frame(p7@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf7) <- paste("C7", colnames(mydf7), sep = "_")

mydf8 <- sapply(split.default(as.data.frame(p8@assays$RNA@counts), 0:(length(as.data.frame(p8@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf8) <- paste("C8", colnames(mydf8), sep = "_")

mydf9 <- sapply(split.default(as.data.frame(p9@assays$RNA@counts), 0:(length(as.data.frame(p9@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(mydf9) <- paste("C9", colnames(mydf9), sep = "_")

merge_df <- cbind(mydf0,mydf1,mydf2,mydf3,mydf4,mydf5,mydf6,mydf7,mydf8,mydf9)


G172.M1.merge <- CreateSeuratObject(counts = merge_df)
DefaultAssay(G172.M1) <- "RNA"
G172.M1.merge <- SCTransform(G172.M1.merge,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.merge <- NormalizeData(G172.M1.merge, verbose = FALSE)
G172.M1.merge <- FindVariableFeatures(G172.M1.merge, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.merge <- ScaleData(G172.M1.merge, features = VariableFeatures(G172.M1.merge))
# Run PCA
G172.M1.merge <- RunPCA(G172.M1.merge,npcs = 30, features = VariableFeatures(G172.M1.merge))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.merge <- RunUMAP(G172.M1.merge, reduction = "pca", dims = 1:25)
G172.M1.merge <- FindNeighbors(G172.M1.merge, reduction = "pca", dims = 1:25)
G172.M1.merge <- FindClusters(G172.M1.merge, resolution = 0.5 )   
G172.M1.p1.merge<- UMAPPlot(G172.M1.merge, reduction = "umap", label=TRUE, label.size=5)
G172.M1.p1.merge


DefaultAssay(G172.M1.merge) <- "RNA"
G172.M1.merge <- NormalizeData(G172.M1.merge, verbose = TRUE)
d1 <- DotPlot(G172.M1.merge, features = all_genes, cols=c('white','blue'))+RotatedAxis()
plot_grid(G172.M1.p1.merge,d1)

################################## Extract the singlet for M2 ################################################
G172.M2 <- subset(G172.hashtag, idents = "M2-TGACGCCGTTGTTGT", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2$stim  <- "G172M2"
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- SCTransform(G172.M2,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2 <- NormalizeData(G172.M2, verbose = FALSE)
G172.M2 <- FindVariableFeatures(G172.M2, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2 <- ScaleData(G172.M2, features = VariableFeatures(G172.M2))
# Run PCA
G172.M2 <- RunPCA(G172.M2,npcs = 30, features = VariableFeatures(G172.M2))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2 <- RunUMAP(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindNeighbors(G172.M2, reduction = "pca", dims = 1:25)
G172.M2 <- FindClusters(G172.M2, resolution = 0.5 )   
G172.M2.p1<- UMAPPlot(G172.M2, reduction = "umap", label=TRUE, label.size=5)
G172.M2.p1
DefaultAssay(G172.M2) <- "RNA"
G172.M2 <- NormalizeData(G172.M2, verbose = TRUE)
d2 <- DotPlot(G172.M2, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.p1,d2)

for(i in 0:(length(levels(G172.M2@meta.data$seurat_clusters))-1))
 {
 label <- paste("q", i, sep="") 
 assign(label, subset(G172.M2, idents = i))}
# label1 <- paste("df", i, sep="") 
#
dfq0 <- sapply(split.default(as.data.frame(q0@assays$RNA@counts), 0:(length(as.data.frame(q0@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq0) <- paste("Q0", colnames(dfq0), sep = "_")

dfq1 <- sapply(split.default(as.data.frame(q1@assays$RNA@counts), 0:(length(as.data.frame(q1@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq1) <- paste("Q1", colnames(dfq1), sep = "_")

dfq2 <- sapply(split.default(as.data.frame(q2@assays$RNA@counts), 0:(length(as.data.frame(q2@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq2) <- paste("Q2", colnames(dfq2), sep = "_")

dfq3 <- sapply(split.default(as.data.frame(q3@assays$RNA@counts), 0:(length(as.data.frame(q3@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq3) <- paste("Q3", colnames(dfq3), sep = "_")

dfq4 <- sapply(split.default(as.data.frame(q4@assays$RNA@counts), 0:(length(as.data.frame(q4@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq4) <- paste("Q4", colnames(dfq4), sep = "_")

dfq5 <- sapply(split.default(as.data.frame(q5@assays$RNA@counts), 0:(length(as.data.frame(q5@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq5) <- paste("Q5", colnames(dfq5), sep = "_")

dfq6 <- sapply(split.default(as.data.frame(q6@assays$RNA@counts), 0:(length(as.data.frame(q6@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq6) <- paste("Q6", colnames(dfq6), sep = "_")

dfq7 <- sapply(split.default(as.data.frame(q7@assays$RNA@counts), 0:(length(as.data.frame(q7@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq7) <- paste("Q7", colnames(dfq7), sep = "_")

dfq8 <- sapply(split.default(as.data.frame(q8@assays$RNA@counts), 0:(length(as.data.frame(q8@assays$RNA@counts))-1) %/% 5), rowSums)
colnames(dfq8) <- paste("C8", colnames(dfq8), sep = "_")


merge_df2 <- cbind(dfq0,dfq1,dfq2,dfq3,dfq4,dfq5,dfq6,dfq7,dfq8)
G172.M2.merge <- CreateSeuratObject(counts = merge_df2)
DefaultAssay(G172.M2) <- "RNA"
G172.M2.merge <- SCTransform(G172.M2.merge,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.merge <- NormalizeData(G172.M2.merge, verbose = FALSE)
G172.M2.merge <- FindVariableFeatures(G172.M2.merge, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.merge <- ScaleData(G172.M2.merge, features = VariableFeatures(G172.M2.merge))
# Run PCA
G172.M2.merge <- RunPCA(G172.M2.merge,npcs = 30, features = VariableFeatures(G172.M2.merge))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.merge <- RunUMAP(G172.M2.merge, reduction = "pca", dims = 1:25)
G172.M2.merge <- FindNeighbors(G172.M2.merge, reduction = "pca", dims = 1:25)
G172.M2.merge <- FindClusters(G172.M2.merge, resolution = 0.5 )   
G172.M2.p1.merge<- UMAPPlot(G172.M2.merge, reduction = "umap", label=TRUE, label.size=5)
G172.M2.p1.merge

DefaultAssay(G172.M2.merge) <- "RNA"
G172.M2.merge <- NormalizeData(G172.M2.merge, verbose = TRUE)
d1 <- DotPlot(G172.M2.merge, features = all_genes, cols=c('white','blue'))+RotatedAxis()
plot_grid(G172.M2.p1.merge,d1)

#######################################################################################


############################# Extract the singlet for M3 ##############################

G172.M3 <- subset(G172.hashtag, idents = "M3-GCCTAGTATGATCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- SCTransform(G172.M3,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3 <- NormalizeData(G172.M3, verbose = FALSE)
G172.M3 <- FindVariableFeatures(G172.M3, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3 <- ScaleData(G172.M3, features = VariableFeatures(G172.M3))
# Run PCA
G172.M3 <- RunPCA(G172.M3,npcs = 30, features = VariableFeatures(G172.M3))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3 <- RunUMAP(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindNeighbors(G172.M3, reduction = "pca", dims = 1:25)
G172.M3 <- FindClusters(G172.M3, resolution = 0.5 )   
G172.M3.p1<- UMAPPlot(G172.M3, reduction = "umap", label=TRUE, label.size=5)
G172.M3.p1
DefaultAssay(G172.M3) <- "RNA"
G172.M3 <- NormalizeData(G172.M3, verbose = TRUE)
d3 <- DotPlot(G172.M3, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.p1,d3)




##### Extract the singlet for M4 #####
G172.M4 <- subset(G172.hashtag, idents = "M4-AGTCACAGTATTCCA", subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- SCTransform(G172.M4,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4 <- NormalizeData(G172.M4, verbose = FALSE)
G172.M4 <- FindVariableFeatures(G172.M4, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4 <- ScaleData(G172.M4, features = VariableFeatures(G172.M4))
# Run PCA
G172.M4 <- RunPCA(G172.M4,npcs = 30, features = VariableFeatures(G172.M4))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4 <- RunUMAP(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindNeighbors(G172.M4, reduction = "pca", dims = 1:25)
G172.M4 <- FindClusters(G172.M4, resolution = 0.5 )   
G172.M4.p1<- UMAPPlot(G172.M4, reduction = "umap", label=TRUE, label.size=5)
G172.M4.p1
DefaultAssay(G172.M4) <- "RNA"
G172.M4 <- NormalizeData(G172.M4, verbose = TRUE)
d4 <- DotPlot(G172.M4, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.p1,d4)


############## Combined ####################33
G172.M5 <- subset(G172.hashtag, idents = c("M1-ATGATGAACAGCCAG","M2-TGACGCCGTTGTTGT","M3-GCCTAGTATGATCCA","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- SCTransform(G172.M5,verbose =TRUE)
# Select the top 1000 most variable features
G172.M5 <- NormalizeData(G172.M5, verbose = FALSE)
G172.M5 <- FindVariableFeatures(G172.M5, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M5 <- ScaleData(G172.M5, features = VariableFeatures(G172.M5))
# Run PCA
G172.M5 <- RunPCA(G172.M5,npcs = 30, features = VariableFeatures(G172.M5))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M5 <- RunUMAP(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindNeighbors(G172.M5, reduction = "pca", dims = 1:25)
G172.M5 <- FindClusters(G172.M5, resolution = 0.5 )   
G172.M5.p1<- UMAPPlot(G172.M5, reduction = "umap", label=TRUE, label.size=5)
G172.M5.p1
DefaultAssay(G172.M5) <- "RNA"
G172.M5 <- NormalizeData(G172.M5, verbose = TRUE)
d5 <- DotPlot(G172.M5, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M5.p1,d5)


### control 
G172.M6 <- subset(G172.hashtag.subset, idents = c("M1-ATGATGAACAGCCAG","M3-GCCTAGTATGATCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M6) <- "RNA"
#G172.M6 <- SCTransform(G172.M6,verbose =TRUE)
# Select the top 1000 most variable features
G172.M6 <- NormalizeData(G172.M6, verbose = FALSE)
G172.M6 <- FindVariableFeatures(G172.M6, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M6 <- ScaleData(G172.M6, features = VariableFeatures(G172.M6))
# Run PCA
G172.M6 <- RunPCA(G172.M6,npcs = 30, features = VariableFeatures(G172.M6))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M6 <- RunUMAP(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindNeighbors(G172.M6, reduction = "pca", dims = 1:25)
G172.M6 <- FindClusters(G172.M6, resolution = 0.5 )   
G172.M6.p1<- UMAPPlot(G172.M6, reduction = "umap", label=TRUE, label.size=5)
G172.M6.p1
DefaultAssay(G172.M6) <- "RNA"
G172.M6 <- NormalizeData(G172.M6, verbose = TRUE)
d6 <- DotPlot(G172.M6, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M6.p1,d6)


#### TCPO 
G172.M7 <- subset(G172.hashtag, idents = c("M2-TGACGCCGTTGTTGT","M4-AGTCACAGTATTCCA"), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- SCTransform(G172.M7,verbose =TRUE)
# Select the top 1000 most variable features
G172.M7 <- NormalizeData(G172.M7, verbose = FALSE)
G172.M7 <- FindVariableFeatures(G172.M7, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M7 <- ScaleData(G172.M7, features = VariableFeatures(G172.M7))
# Run PCA
G172.M7 <- RunPCA(G172.M7,npcs = 30, features = VariableFeatures(G172.M7))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M7 <- RunUMAP(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindNeighbors(G172.M7, reduction = "pca", dims = 1:25)
G172.M7 <- FindClusters(G172.M7, resolution = 0.5 )   
G172.M7.p1<- UMAPPlot(G172.M7, reduction = "umap", label=TRUE, label.size=5)
G172.M7.p1
DefaultAssay(G172.M7) <- "RNA"
G172.M7 <- NormalizeData(G172.M7, verbose = TRUE)
d7 <- DotPlot(G172.M7, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M7.p1,d7, nrow=2)


```


G172 classified  cells already labelled by demuxEM or HTOdemux (Seurat) to calculate TPM
```{r}

G172.M1.both <- subset(G172.hashtag,  cells= joint.bcs.M1.both)
G172.M2.both <- subset(G172.hashtag,  cells= joint.bcs.M2.both)
G172.M3.both <- subset(G172.hashtag,  cells= joint.bcs.M3.both)
G172.M4.both <- subset(G172.hashtag,  cells= joint.bcs.M4.both)

G172.M1.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M1.demuxEM)
G172.M2.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M2.demuxEM)
G172.M3.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M4.demuxEM)
G172.M4.demuxEM <- subset(G172.hashtag,  cells= joint.bcs.M4.demuxEM)


G172.M1.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M1.HTOdemux)
G172.M2.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M2.HTOdemux)
G172.M3.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M3.HTOdemux)
G172.M4.HTOdemux <- subset(G172.hashtag,  cells= joint.bcs.M4.HTOdemux)


G172.M1.both  <- NormalizeData(G172.M1.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.both  <- NormalizeData(G172.M2.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M3.both  <- NormalizeData(G172.M3.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M4.both  <- NormalizeData(G172.M4.both,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.both.avg  <- rowMeans(as.matrix(G172.M1.both@assays$RNA@data))
G172.M2.both.avg  <- rowMeans(as.matrix(G172.M2.both@assays$RNA@data))
G172.M3.both.avg  <- rowMeans(as.matrix(G172.M3.both@assays$RNA@data))
G172.M4.both.avg  <- rowMeans(as.matrix(G172.M4.both@assays$RNA@data))


G172.M1.demuxEM  <- NormalizeData(G172.M1.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.demuxEM  <- NormalizeData(G172.M2.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M3.demuxEM  <- NormalizeData(G172.M3.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M4.demuxEM  <- NormalizeData(G172.M4.demuxEM,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.demuxEM.avg  <- rowMeans(as.matrix(G172.M1.demuxEM@assays$RNA@data))
G172.M2.demuxEM.avg  <- rowMeans(as.matrix(G172.M2.demuxEM@assays$RNA@data))
G172.M3.demuxEM.avg  <- rowMeans(as.matrix(G172.M3.demuxEM@assays$RNA@data))
G172.M4.demuxEM.avg  <- rowMeans(as.matrix(G172.M4.demuxEM@assays$RNA@data))


G172.M1.HTOdemux  <- NormalizeData(G172.M1.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.HTOdemux  <- NormalizeData(G172.M2.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M3.HTOdemux  <- NormalizeData(G172.M3.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M4.HTOdemux  <- NormalizeData(G172.M4.HTOdemux,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.HTOdemux.avg  <- rowMeans(as.matrix(G172.M1.HTOdemux@assays$RNA@data))
G172.M2.HTOdemux.avg  <- rowMeans(as.matrix(G172.M2.HTOdemux@assays$RNA@data))
G172.M3.HTOdemux.avg  <- rowMeans(as.matrix(G172.M3.HTOdemux@assays$RNA@data))
G172.M4.HTOdemux.avg  <- rowMeans(as.matrix(G172.M4.HTOdemux@assays$RNA@data))

G172.M1.Top <- subset(G172.hashtag,  cells= joint.bcs.M1.top)
G172.M2.Top <- subset(G172.hashtag,  cells= joint.bcs.M2.top)

G172.M1.Top  <- NormalizeData(G172.M1.Top,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.Top  <- NormalizeData(G172.M2.Top,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172.M1.bottom <- subset(G172.hashtag,  cells= joint.bcs.M1.bottom)
G172.M2.bottom <- subset(G172.hashtag,  cells= joint.bcs.M2.bottom)

G172.M1.bottom  <- NormalizeData(G172.M1.bottom,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)
G172.M2.bottom  <- NormalizeData(G172.M2.bottom,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)

G172_TPM_M1_Top <- cbind(as.matrix(G172.M1.Top@assays$RNA@counts)) 
G172_TPM_M2_Top <- cbind(as.matrix(G172.M2.Top@assays$RNA@counts)) 
G172_TPM_M1_bottom <- cbind(as.matrix(G172.M1.bottom@assays$RNA@counts)) 
G172_TPM_M2_bottom <- cbind(as.matrix(G172.M2.bottom@assays$RNA@counts)) 


G172_counts_M1_Top <- cbind(as.matrix(G172.M1.Top@assays$RNA@counts)) 
G172_counts_M2_Top <- cbind(as.matrix(G172.M2.Top@assays$RNA@counts)) 
G172_counts_M1_bottom <- cbind(as.matrix(G172.M1.bottom@assays$RNA@counts)) 
G172_counts_M2_bottom <- cbind(as.matrix(G172.M2.bottom@assays$RNA@counts)) 

write.table(G172_TPM_M1_Top, "demuxEM_HTodemux_analysis/G172_TPM_M1_Top")
write.table(G172_TPM_M2_Top, "demuxEM_HTodemux_analysis/G172_TPM_M2_Top")
write.table(G172_TPM_M1_bottom, "demuxEM_HTodemux_analysis/G172_TPM_M1_bottom")
write.table(G172_TPM_M2_bottom, "demuxEM_HTodemux_analysis/G172_TPM_M2_bottom")


write.table(G172_counts_M1_Top, "demuxEM_HTodemux_analysis/G172_counts_M1_Top")
write.table(G172_counts_M2_Top, "demuxEM_HTodemux_analysis/G172_counts_M2_Top")
write.table(G172_counts_M1_bottom, "demuxEM_HTodemux_analysis/G172_counts_M1_bottom")
write.table(G172_counts_M2_bottom, "demuxEM_HTodemux_analysis/G172_counts_M2_bottom")


combined_avg_prelabelled <- cbind(G172.M1.both.avg, G172.M2.both.avg,G172.M3.both.avg,G172.M4.both.avg, G172.M1.demuxEM.avg,G172.M2.demuxEM.avg, G172.M3.demuxEM.avg, G172.M4.demuxEM.avg, G172.M1.HTOdemux.avg, G172.M2.HTOdemux.avg, G172.M3.HTOdemux.avg, G172.M4.HTOdemux.avg) 

write.table(combined_avg_prelabelled, "demuxEM_HTodemux_analysis/combined_avgTPM_prelabelled")

combined_M1_M2 <- cbind(as.matrix(G172.M1.both@assays$RNA@data), as.matrix(G172.M1.both@assays$RNA@data)) 
```


```{r}

G172.M1.both <- subset(G172.hashtag, cells= joint.bcs.M1.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#G172.M1.both <- subset(G172.hashtag.sex.batch, cells= joint.bcs.M1.both)
#G172.M1.both <- subset(G172.hashtag, cells= joint.bcs.M1.both)
G172.M1.both$stim  <- "G172M1_both"
DefaultAssay(G172.M1.both) <- "RNA"
G172.M1.both <- SCTransform(G172.M1.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.both <- NormalizeData(G172.M1.both, verbose = FALSE)
G172.M1.both <- FindVariableFeatures(G172.M1.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.both <- ScaleData(G172.M1.both, features = VariableFeatures(G172.M1.both))
# Run PCA
G172.M1.both <- RunPCA(G172.M1.both,npcs = 30, features = VariableFeatures(G172.M1.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.both <- RunUMAP(G172.M1.both, reduction = "pca", dims = 1:30)
G172.M1.both <- FindNeighbors(G172.M1.both, reduction = "pca", dims = 1:30)
G172.M1.both <- FindClusters(G172.M1.both, resolution = 0.25)   
G172.M1.both.p1<- UMAPPlot(G172.M1.both, reduction = "umap", label=TRUE, label.size=5)
G172.M1.both.p1
DefaultAssay(G172.M1.both) <- "RNA"
G172.M1.both <- NormalizeData(G172.M1.both, verbose = TRUE)
d2 <- DotPlot(G172.M1.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.both.p1,d2)

################### clean hepatocytes ##################

G172.M1.clean.both <- subset(G172.hashtag, cells= joint.bcs.M1.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1.clean.both$stim  <- "G172M1_clean_both"
DefaultAssay(G172.M1.clean.both) <- "RNA"
G172.M1.clean.both <- SCTransform(G172.M1.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.clean.both <- NormalizeData(G172.M1.clean.both, verbose = FALSE)
G172.M1.clean.both <- FindVariableFeatures(G172.M1.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.clean.both <- ScaleData(G172.M1.clean.both, features = VariableFeatures(G172.M1.clean.both))
# Run PCA
G172.M1.clean.both <- RunPCA(G172.M1.clean.both,npcs = 30, features = VariableFeatures(G172.M1.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.clean.both <- RunUMAP(G172.M1.clean.both, reduction = "pca", dims = 1:30)
G172.M1.clean.both <- FindNeighbors(G172.M1.clean.both, reduction = "pca", dims = 1:30)
G172.M1.clean.both <- FindClusters(G172.M1.clean.both, resolution = 0.5 )   
G172.M1.clean.both.p1<- UMAPPlot(G172.M1.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M1.clean.both.p1
DefaultAssay(G172.M1.clean.both) <- "RNA"
G172.M1.clean.both <- NormalizeData(G172.M1.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M1.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.clean.both.p1,d2)

################ both top80 M1
G172.M1.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M1.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M1.both.top80$stim  <- "G172M1_both_top80"
DefaultAssay(G172.M1.both.top80) <- "RNA"
G172.M1.both.top80 <- SCTransform(G172.M1.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M1.both.top80 <- NormalizeData(G172.M1.both.top80, verbose = FALSE)
G172.M1.both.top80 <- FindVariableFeatures(G172.M1.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M1.both.top80 <- ScaleData(G172.M1.both.top80, features = VariableFeatures(G172.M1.both.top80))
# Run PCA
G172.M1.both.top80 <- RunPCA(G172.M1.both.top80,npcs = 30, features = VariableFeatures(G172.M1.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M1.both.top80 <- RunUMAP(G172.M1.both.top80, reduction = "pca", dims = 1:25)
G172.M1.both.top80 <- FindNeighbors(G172.M1.both.top80, reduction = "pca", dims = 1:25)
G172.M1.both.top80 <- FindClusters(G172.M1.both.top80, resolution = 0.25 )   
G172.M1.both.top80.p1<- UMAPPlot(G172.M1.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M1.both.top80.p1
DefaultAssay(G172.M1.both.top80) <- "RNA"
G172.M1.both.top80 <- NormalizeData(G172.M1.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M1.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M1.both.top80.p1,d2)



############################################## G172 M2 both ######################

#G172.M2.both <- subset(G172.hashtag, cells= joint.bcs.M2.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2.both <- subset(G172.hashtag, cells= joint.bcs.M2.both)
#G172.M2.both <- subset(G172.hashtag.TCPO, cells= joint.bcs.M2.both)
#G172.M2.both <- subset(G172.hashtag.sex.TCPO.batch, cells= joint.bcs.M2.both)
G172.M2.both$stim  <- "G172M2_both"
DefaultAssay(G172.M2.both) <- "RNA"
G172.M2.both <- SCTransform(G172.M2.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.both <- NormalizeData(G172.M2.both, verbose = FALSE)
G172.M2.both <- FindVariableFeatures(G172.M2.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.both <- ScaleData(G172.M2.both, features = VariableFeatures(G172.M2.both))
# Run PCA
G172.M2.both <- RunPCA(G172.M2.both,npcs = 30, features = VariableFeatures(G172.M2.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.both <- RunUMAP(G172.M2.both, reduction = "pca", dims = 1:20)
G172.M2.both <- FindNeighbors(G172.M2.both, reduction = "pca", dims = 1:20)
G172.M2.both <- FindClusters(G172.M2.both, resolution = 0.35)   
G172.M2.both.p1<- UMAPPlot(G172.M2.both, reduction = "umap", label=TRUE, label.size=5)
G172.M2.both.p1
DefaultAssay(G172.M2.both) <- "RNA"
G172.M2.both <- NormalizeData(G172.M2.both, verbose = TRUE)
d2 <- DotPlot(G172.M2.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.both.p1,d2)

############# clean M2 ########################
G172.M2.clean.both <- subset(G172.hashtag, cells= joint.bcs.M2.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2.clean.both$stim  <- "G172M2_both"
DefaultAssay(G172.M2.clean.both) <- "RNA"
G172.M2.clean.both <- SCTransform(G172.M2.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.clean.both <- NormalizeData(G172.M2.clean.both, verbose = FALSE)
G172.M2.clean.both <- FindVariableFeatures(G172.M2.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.clean.both <- ScaleData(G172.M2.clean.both, features = VariableFeatures(G172.M2.clean.both))
# Run PCA
G172.M2.clean.both <- RunPCA(G172.M2.clean.both,npcs = 30, features = VariableFeatures(G172.M2.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.clean.both <- RunUMAP(G172.M2.clean.both, reduction = "pca", dims = 1:30)
G172.M2.clean.both <- FindNeighbors(G172.M2.clean.both, reduction = "pca", dims = 1:30)
G172.M2.clean.both <- FindClusters(G172.M2.clean.both, resolution = 0.35)   
G172.M2.clean.both.p1<- UMAPPlot(G172.M2.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M2.clean.both.p1
DefaultAssay(G172.M2.clean.both) <- "RNA"
G172.M2.clean.both <- NormalizeData(G172.M2.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M2.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.clean.both.p1,d2)

################# both top 80 M2 ################
G172.M2.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M2.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M2.both.top80$stim  <- "G172M2_both"
DefaultAssay(G172.M2.both.top80) <- "RNA"
G172.M2.both.top80 <- SCTransform(G172.M2.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M2.both.top80 <- NormalizeData(G172.M2.both.top80, verbose = FALSE)
G172.M2.both.top80 <- FindVariableFeatures(G172.M2.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M2.both.top80 <- ScaleData(G172.M2.both.top80, features = VariableFeatures(G172.M2.both.top80))
# Run PCA
G172.M2.both.top80 <- RunPCA(G172.M2.both.top80,npcs = 30, features = VariableFeatures(G172.M2.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M2.both.top80 <- RunUMAP(G172.M2.both.top80, reduction = "pca", dims = 1:30)
G172.M2.both.top80 <- FindNeighbors(G172.M2.both.top80, reduction = "pca", dims = 1:30)
G172.M2.both.top80 <- FindClusters(G172.M2.both.top80, resolution = 0.35)   
G172.M2.both.top80.p1<- UMAPPlot(G172.M2.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M2.both.top80.p1
DefaultAssay(G172.M2.both.top80) <- "RNA"
G172.M2.both.top80 <- NormalizeData(G172.M2.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M2.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M2.both.top80.p1,d2)


############# both M3 #############
G172.M3.both <- subset(G172.hashtag, cells= joint.bcs.M3.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#G172.M3.both <- subset(G172.hashtag.sex.batch, cells= joint.bcs.M3.both)
G172.M3.both <- subset(G172.hashtag, cells= joint.bcs.M3.both)
G172.M3.both$stim  <- "G172M3_both"
DefaultAssay(G172.M3.both) <- "RNA"
G172.M3.both <- SCTransform(G172.M3.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3.both <- NormalizeData(G172.M3.both, verbose = FALSE)
G172.M3.both <- FindVariableFeatures(G172.M3.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3.both <- ScaleData(G172.M3.both, features = VariableFeatures(G172.M3.both))
# Run PCA
G172.M3.both <- RunPCA(G172.M3.both,npcs = 30, features = VariableFeatures(G172.M3.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3.both <- RunUMAP(G172.M3.both, reduction = "pca", dims = 1:30)
G172.M3.both <- FindNeighbors(G172.M3.both, reduction = "pca", dims = 1:30)
G172.M3.both <- FindClusters(G172.M3.both, resolution = 0.35)   
G172.M3.both.p1<- UMAPPlot(G172.M3.both, reduction = "umap", label=TRUE, label.size=5)
G172.M3.both.p1
DefaultAssay(G172.M3.both) <- "RNA"
G172.M3.both <- NormalizeData(G172.M3.both, verbose = TRUE)
d2 <- DotPlot(G172.M3.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.both.p1,d2)

################### clean M3 ##########################
G172.M3.clean.both <- subset(G172.hashtag, cells= joint.bcs.M3.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M3.clean.both$stim  <- "G172M3_both"
DefaultAssay(G172.M3.clean.both) <- "RNA"
G172.M3.clean.both <- SCTransform(G172.M3.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3.clean.both <- NormalizeData(G172.M3.clean.both, verbose = FALSE)
G172.M3.clean.both <- FindVariableFeatures(G172.M3.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3.clean.both <- ScaleData(G172.M3.clean.both, features = VariableFeatures(G172.M3.clean.both))
# Run PCA
G172.M3.clean.both <- RunPCA(G172.M3.clean.both,npcs = 30, features = VariableFeatures(G172.M3.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3.clean.both <- RunUMAP(G172.M3.clean.both, reduction = "pca", dims = 1:30)
G172.M3.clean.both <- FindNeighbors(G172.M3.clean.both, reduction = "pca", dims = 1:30)
G172.M3.clean.both <- FindClusters(G172.M3.clean.both, resolution = 0.4)   
G172.M3.clean.both.p1<- UMAPPlot(G172.M3.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M3.clean.both.p1
DefaultAssay(G172.M3.clean.both) <- "RNA"
G172.M3.clean.both <- NormalizeData(G172.M3.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M3.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.clean.both.p1,d2)

######################## both top80 M3 #################3
G172.M3.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M3.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M3.both.top80$stim  <- "G172M3_both"
DefaultAssay(G172.M3.both.top80) <- "RNA"
G172.M3.both.top80 <- SCTransform(G172.M3.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M3.both.top80 <- NormalizeData(G172.M3.both.top80, verbose = FALSE)
G172.M3.both.top80 <- FindVariableFeatures(G172.M3.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M3.both.top80 <- ScaleData(G172.M3.both.top80, features = VariableFeatures(G172.M3.both.top80))
# Run PCA
G172.M3.both.top80 <- RunPCA(G172.M3.both.top80,npcs = 30, features = VariableFeatures(G172.M3.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M3.both.top80 <- RunUMAP(G172.M3.both.top80, reduction = "pca", dims = 1:30)
G172.M3.both.top80 <- FindNeighbors(G172.M3.both.top80, reduction = "pca", dims = 1:30)
G172.M3.both.top80 <- FindClusters(G172.M3.both.top80, resolution = 0.4)   
G172.M3.both.top80.p1<- UMAPPlot(G172.M3.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M3.both.top80.p1
DefaultAssay(G172.M3.both.top80) <- "RNA"
G172.M3.both.top80 <- NormalizeData(G172.M3.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M3.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M3.both.top80.p1,d2)


############################
#G172.M4.both <- subset(G172.hashtag, cells= joint.bcs.M4.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#G172.M4.both <- subset(G172.hashtag.TCPO, cells= joint.bcs.M4.both)
G172.M4.both <- subset(G172.hashtag.sex.TCPO.batch, cells= joint.bcs.M4.both)
G172.M4.both$stim  <- "G172M4_both"
DefaultAssay(G172.M4.both) <- "RNA"
G172.M4.both <- SCTransform(G172.M4.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4.both <- NormalizeData(G172.M4.both, verbose = FALSE)
G172.M4.both <- FindVariableFeatures(G172.M4.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4.both <- ScaleData(G172.M4.both, features = VariableFeatures(G172.M4.both))
# Run PCA
G172.M4.both <- RunPCA(G172.M4.both,npcs = 30, features = VariableFeatures(G172.M4.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4.both <- RunUMAP(G172.M4.both, reduction = "pca", dims = 1:25)
G172.M4.both <- FindNeighbors(G172.M4.both, reduction = "pca", dims = 1:25)
G172.M4.both <- FindClusters(G172.M4.both, resolution = 0.35)   
G172.M4.both.p1<- UMAPPlot(G172.M4.both, reduction = "umap", label=TRUE, label.size=5)
G172.M4.both.p1
DefaultAssay(G172.M4.both) <- "RNA"
G172.M4.both <- NormalizeData(G172.M4.both, verbose = TRUE)
d2 <- DotPlot(G172.M4.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.both.p1,d2)


################################ clean M4 ##############
G172.M4.clean.both <- subset(G172.hashtag, cells= joint.bcs.M4.clean.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M4.clean.both$stim  <- "G172M4_both"
DefaultAssay(G172.M4.clean.both) <- "RNA"
G172.M4.clean.both <- SCTransform(G172.M4.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4.clean.both <- NormalizeData(G172.M4.clean.both, verbose = FALSE)
G172.M4.clean.both <- FindVariableFeatures(G172.M4.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4.clean.both <- ScaleData(G172.M4.clean.both, features = VariableFeatures(G172.M4.clean.both))
# Run PCA
G172.M4.clean.both <- RunPCA(G172.M4.clean.both,npcs = 30, features = VariableFeatures(G172.M4.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4.clean.both <- RunUMAP(G172.M4.clean.both, reduction = "pca", dims = 1:30)
G172.M4.clean.both <- FindNeighbors(G172.M4.clean.both, reduction = "pca", dims = 1:30)
G172.M4.clean.both <- FindClusters(G172.M4.clean.both, resolution = 0.25)   
G172.M4.clean.both.p1<- UMAPPlot(G172.M4.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M4.clean.both.p1
DefaultAssay(G172.M4.clean.both) <- "RNA"
G172.M4.clean.both <- NormalizeData(G172.M4.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M4.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.clean.both.p1,d2)

############# M4 top 80 ###########3
G172.M4.both.top80 <- subset(G172.hashtag, cells= joint.bcs.M4.both.top80, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M4.both.top80$stim  <- "G172M4_both"
DefaultAssay(G172.M4.both.top80) <- "RNA"
G172.M4.both.top80 <- SCTransform(G172.M4.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M4.both.top80 <- NormalizeData(G172.M4.both.top80, verbose = FALSE)
G172.M4.both.top80 <- FindVariableFeatures(G172.M4.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M4.both.top80 <- ScaleData(G172.M4.both.top80, features = VariableFeatures(G172.M4.both.top80))
# Run PCA
G172.M4.both.top80 <- RunPCA(G172.M4.both.top80,npcs = 30, features = VariableFeatures(G172.M4.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M4.both.top80 <- RunUMAP(G172.M4.both.top80, reduction = "pca", dims = 1:25)
G172.M4.both.top80 <- FindNeighbors(G172.M4.both.top80, reduction = "pca", dims = 1:25)
G172.M4.both.top80 <- FindClusters(G172.M4.both.top80, resolution = 0.25)   
G172.M4.both.top80.p1<- UMAPPlot(G172.M4.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M4.both.top80.p1
DefaultAssay(G172.M4.both.top80) <- "RNA"
G172.M4.both.top80 <- NormalizeData(G172.M4.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M4.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M4.both.top80.p1,d2)



############### both M1 + m3 ###########33

G172.M6.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M6.both <- merge(G172.M1.both, G172.M3.both)
DefaultAssay(G172.M6.both) <- "RNA"
#G172.M6.both <- SCTransform(G172.M6.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M6.both <- NormalizeData(G172.M6.both, verbose = FALSE)
G172.M6.both <- FindVariableFeatures(G172.M6.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M6.both <- ScaleData(G172.M6.both, features = VariableFeatures(G172.M6.both))
# Run PCA
G172.M6.both <- RunPCA(G172.M6.both,npcs = 30, features = VariableFeatures(G172.M6.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M6.both <- RunUMAP(G172.M6.both, reduction = "pca", dims = 1:30)
G172.M6.both <- FindNeighbors(G172.M6.both, reduction = "pca", dims = 1:30)
G172.M6.both <- FindClusters(G172.M6.both, resolution = 0.25 )   
G172.M6.both.p1<- UMAPPlot(G172.M6.both, reduction = "umap", label=TRUE, label.size=5)
G172.M6.both.p1
G172.M6.both.p2 <- UMAPPlot(G172.M6.both, reduction = "umap", group="stim", label=TRUE, label.size=5)
G172.M6.both.p2
DefaultAssay(G172.M6.both) <- "RNA"
G172.M6.both <- NormalizeData(G172.M6.both, verbose = TRUE)
d6 <- DotPlot(G172.M6.both, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M6.both.p1,d6)

# find markers between the male and female cluster hepatocytes #################
DE.sex.hep <- FindMarkers(G172.M6.both, ident.1 = 0, ident.2 = 1 ,verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
write.table(DE.sex.hep, "DE_Analysis/DE_sex_hep_G173_M1-M3_for_sex_batch_depleted", sep="\t")

###################33 both M2 + M4 ####
G172.M7.both <- subset(G172.hashtag, cells = c(joint.bcs.M2.both,joint.bcs.M4.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M7.both <- merge(G172.M2.both, G172.M4.both)
DefaultAssay(G172.M7.both) <- "RNA"
#G172.M7.both <- SCTransform(G172.M7.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M7.both <- NormalizeData(G172.M7.both, verbose = FALSE)
G172.M7.both <- FindVariableFeatures(G172.M7.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M7.both <- ScaleData(G172.M7.both, features = VariableFeatures(G172.M7.both))
# Run PCA
G172.M7.both <- RunPCA(G172.M7.both,npcs = 30, features = VariableFeatures(G172.M7.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M7.both <- RunUMAP(G172.M7.both, reduction = "pca", dims = 1:25)
G172.M7.both <- FindNeighbors(G172.M7.both, reduction = "pca", dims = 1:25)
G172.M7.both <- FindClusters(G172.M7.both, resolution = 0.3 )   
G172.M7.both.p1<- UMAPPlot(G172.M7.both, reduction = "umap", label=TRUE, label.size=5)
G172.M7.both.p1
G172.M7.both.p2 <- UMAPPlot(G172.M7.both, reduction = "umap", group="stim", label=TRUE, label.size=5)
G172.M7.both.p2
DefaultAssay(G172.M7.both) <- "RNA"
G172.M7.both <- NormalizeData(G172.M7.both, verbose = TRUE)
d7 <- DotPlot(G172.M7.both, features = c(all_genes,'Cyp2b10','Cyp2d9'))+RotatedAxis()
plot_grid(G172.M7.both.p1,d7)
DE.sex.hep.M7 <- FindMarkers(G172.M7.both, ident.1 = 0, ident.2 = 1 ,verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
write.table(DE.sex.hep.M7, "DE_Analysis/DE_sex_hep_G172_M2-M4_for_sexdepleated", sep="\t")


######################## clean M8 (M1+M3) #########################
G172.M8.clean.both <- subset(G172.hashtag, cells= c(joint.bcs.M1.clean.both,joint.bcs.M3.clean.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M8.clean.both$stim  <- "G172M8_both"
DefaultAssay(G172.M8.clean.both) <- "RNA"
G172.M8.clean.both <- SCTransform(G172.M8.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M8.clean.both <- NormalizeData(G172.M8.clean.both, verbose = FALSE)
G172.M8.clean.both <- FindVariableFeatures(G172.M8.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M8.clean.both <- ScaleData(G172.M8.clean.both, features = VariableFeatures(G172.M8.clean.both))
# Run PCA
G172.M8.clean.both <- RunPCA(G172.M8.clean.both,npcs = 30, features = VariableFeatures(G172.M8.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M8.clean.both <- RunUMAP(G172.M8.clean.both, reduction = "pca", dims = 1:30)
G172.M8.clean.both <- FindNeighbors(G172.M8.clean.both, reduction = "pca", dims = 1:30)
G172.M8.clean.both <- FindClusters(G172.M8.clean.both, resolution = 0.25)   
G172.M8.clean.both.p1<- UMAPPlot(G172.M8.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M8.clean.both.p1
DefaultAssay(G172.M8.clean.both) <- "RNA"
G172.M8.clean.both <- NormalizeData(G172.M8.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M8.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M8.clean.both.p1,d2)

#####3 clean M9 (M2+ M4)#####33
G172.M9.clean.both <- subset(G172.hashtag, cells= c(joint.bcs.M2.clean.both,joint.bcs.M4.clean.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M9.clean.both$stim  <- "G172M9_both"
DefaultAssay(G172.M9.clean.both) <- "RNA"
G172.M9.clean.both <- SCTransform(G172.M9.clean.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M9.clean.both <- NormalizeData(G172.M9.clean.both, verbose = FALSE)
G172.M9.clean.both <- FindVariableFeatures(G172.M9.clean.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M9.clean.both <- ScaleData(G172.M9.clean.both, features = VariableFeatures(G172.M9.clean.both))
# Run PCA
G172.M9.clean.both <- RunPCA(G172.M9.clean.both,npcs = 30, features = VariableFeatures(G172.M9.clean.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M9.clean.both <- RunUMAP(G172.M9.clean.both, reduction = "pca", dims = 1:30)
G172.M9.clean.both <- FindNeighbors(G172.M9.clean.both, reduction = "pca", dims = 1:30)
G172.M9.clean.both <- FindClusters(G172.M9.clean.both, resolution = 0.25)   
G172.M9.clean.both.p1<- UMAPPlot(G172.M9.clean.both, reduction = "umap", label=TRUE, label.size=5)
G172.M9.clean.both.p1
DefaultAssay(G172.M9.clean.both) <- "RNA"
G172.M9.clean.both <- NormalizeData(G172.M9.clean.both, verbose = TRUE)
d2 <- DotPlot(G172.M9.clean.both, features = all_genes)+RotatedAxis()
plot_grid(G172.M9.clean.both.p1,d2)

################### M1+ M3 both top 80

G172.M10.both.top80 <- subset(G172.hashtag, cells= c(joint.bcs.M1.both.top80,joint.bcs.M3.both.top80), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M10.both.top80$stim  <- "G172M10_both"
DefaultAssay(G172.M10.both.top80) <- "RNA"
G172.M10.both.top80 <- SCTransform(G172.M10.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M10.both.top80 <- NormalizeData(G172.M10.both.top80, verbose = FALSE)
G172.M10.both.top80 <- FindVariableFeatures(G172.M10.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M10.both.top80 <- ScaleData(G172.M10.both.top80, features = VariableFeatures(G172.M10.both.top80))
# Run PCA
G172.M10.both.top80 <- RunPCA(G172.M10.both.top80,npcs = 30, features = VariableFeatures(G172.M10.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M10.both.top80 <- RunUMAP(G172.M10.both.top80, reduction = "pca", dims = 1:30)
G172.M10.both.top80 <- FindNeighbors(G172.M10.both.top80, reduction = "pca", dims = 1:30)
G172.M10.both.top80 <- FindClusters(G172.M10.both.top80, resolution = 0.25)   
G172.M10.both.top80.p1<- UMAPPlot(G172.M10.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M10.both.top80.p1
DefaultAssay(G172.M10.both.top80) <- "RNA"
G172.M10.both.top80 <- NormalizeData(G172.M10.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M10.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M10.both.top80.p1,d2)


############ M2+ M4 top 80
G172.M11.both.top80 <- subset(G172.hashtag, cells= c(joint.bcs.M2.both.top80,joint.bcs.M4.both.top80), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M11.both.top80$stim  <- "G172M11_both"
DefaultAssay(G172.M11.both.top80) <- "RNA"
G172.M11.both.top80 <- SCTransform(G172.M11.both.top80,verbose =TRUE)
# Select the top 1000 most variable features
G172.M11.both.top80 <- NormalizeData(G172.M11.both.top80, verbose = FALSE)
G172.M11.both.top80 <- FindVariableFeatures(G172.M11.both.top80, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M11.both.top80 <- ScaleData(G172.M11.both.top80, features = VariableFeatures(G172.M11.both.top80))
# Run PCA
G172.M11.both.top80 <- RunPCA(G172.M11.both.top80,npcs = 30, features = VariableFeatures(G172.M11.both.top80))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M11.both.top80 <- RunUMAP(G172.M11.both.top80, reduction = "pca", dims = 1:25)
G172.M11.both.top80 <- FindNeighbors(G172.M11.both.top80, reduction = "pca", dims = 1:25)
G172.M11.both.top80 <- FindClusters(G172.M11.both.top80, resolution = 0.25)   
G172.M11.both.top80.p1<- UMAPPlot(G172.M11.both.top80, reduction = "umap", label=TRUE, label.size=5)
G172.M11.both.top80.p1
DefaultAssay(G172.M11.both.top80) <- "RNA"
G172.M11.both.top80 <- NormalizeData(G172.M11.both.top80, verbose = TRUE)
d2 <- DotPlot(G172.M11.both.top80, features = all_genes)+RotatedAxis()
plot_grid(G172.M11.both.top80.p1,d2)


#################################### M1+M2+M3+M4 sex delpetec genes #########################
#G172.M12.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M12.both <- merge(G172.M6.both, G172.M7.both)
#G172.M12.both <- subset(G172.M12.both, subset = nFeature_RNA > 500 & nCount_RNA > 1000)

DefaultAssay(G172.M12.both) <- "RNA"
#G172.M12.both <- SCTransform(G172.M12.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M12.both <- NormalizeData(G172.M12.both, verbose = FALSE)
G172.M12.both <- FindVariableFeatures(G172.M12.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M12.both <- ScaleData(G172.M12.both, features = VariableFeatures(G172.M12.both))
# Run PCA
G172.M12.both <- RunPCA(G172.M12.both,npcs = 30, features = VariableFeatures(G172.M12.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M12.both <- RunUMAP(G172.M12.both, reduction = "pca", dims = 1:30)
G172.M12.both <- FindNeighbors(G172.M12.both, reduction = "pca", dims = 1:30)
G172.M12.both <- FindClusters(G172.M12.both, resolution = 0.25 )   
G172.M12.both.p1<- UMAPPlot(G172.M12.both, reduction = "umap", label=TRUE, label.size=5)
G172.M12.both.p1
G172.M12.both.p2 <- UMAPPlot(G172.M12.both, reduction = "umap", group="stim", label=TRUE, label.size=5)
G172.M12.both.p2
plot_grid(G172.M12.both.p1, G172.M12.both.p2)
FeaturePlot(G172.M12.both, features = "Cyp2c55")
FeaturePlot(G172.M12.both, features = "ncRNA-inter-chrX-15394")
d12 <- DotPlot(G172.M12.both, features = c(all_genes_new))+RotatedAxis()

#################################### G172 M1 +M2+M3+M4 ######################
#G172.M13.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both,joint.bcs.M2.both,joint.bcs.M4.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M13.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both,joint.bcs.M2.both,joint.bcs.M4.both), subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G172.M13.both <- subset(G172.hashtag, cells = c(joint.bcs.M1.both,joint.bcs.M3.both,joint.bcs.M2.both,joint.bcs.M4.both))

DefaultAssay(G172.M13.both) <- "RNA"
#G172.M13.both <- SCTransform(G172.M13.both,verbose =TRUE)
# Select the top 1000 most variable features
G172.M13.both <- NormalizeData(G172.M13.both, verbose = FALSE)
G172.M13.both <- FindVariableFeatures(G172.M13.both, selection.method = "vst", nfeatures = 2000)
# Scaling RNA data, we only scale the variable features here for efficiency
G172.M13.both <- ScaleData(G172.M13.both, features = VariableFeatures(G172.M13.both))
# Run PCA
G172.M13.both <- RunPCA(G172.M13.both,npcs = 30, features = VariableFeatures(G172.M13.both))
# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
G172.M13.both <- RunUMAP(G172.M13.both, reduction = "pca", dims = 1:30)
G172.M13.both <- FindNeighbors(G172.M13.both, reduction = "pca", dims = 1:30)
G172.M13.both <- FindClusters(G172.M13.both, resolution = 0.25 )   
G172.M13.both.p1<- UMAPPlot(G172.M13.both, reduction = "umap", label=TRUE, label.size=5)
plot_grid(G172.M13.both.p1, G172.M13.both.p2)
FeaturePlot(G172.M13.both, features = "Cyp2c55")
FeaturePlot(G172.M13.both, features = "ncRNA-inter-chrX-15394")
d13 <- DotPlot(G172.M13.both, features = c(all_genes_new))+RotatedAxis()
plot_grid(G172.M13.both.p1, d13)
FeaturePlot(G172.M13.both, features = "Cyp2c55")
FeaturePlot(G172.M13.both, features = "ncRNA-inter-chrX-15394")

```



```{r}
########## function load_tissue_droplet############
droplet_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(droplet_metadata)[1] <- "channel"
tissue_metadata = filter(droplet_metadata, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]

raw.data <- Read10X("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/Liver-10X_P4_2/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata$channel[1],'_',x))
  meta.data1 = data.frame(row.names = colnames(raw.data))
  meta.data1['channel'] = tissue_metadata$channel[1]

  if (length(tissue_metadata$channel) > 1){
    # Some tissues, like Thymus and Heart had only one channel
    for(i in 2:nrow(tissue_metadata)){
subfolder = paste0("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Refined_cellmatrices/",tissue_of_interest, '-', tissue_metadata$channel[i])
      new.data1 <- Read10X(data.dir = subfolder)
      colnames(new.data1) <- lapply(colnames(new.data1), function(x) paste0(tissue_metadata$channel[i],'_', x))
      
      new.metadata1 = data.frame(row.names = colnames(new.data1))
      new.metadata1['channel'] = tissue_metadata$channel[i]
      
      raw.data = cbind(raw.data, new.data1)
      meta.data1 = rbind(meta.data1, new.metadata1)
    }
  }
  
  rnames = row.names(meta.data1)
  meta.data1 <- merge(meta.data1, tissue_metadata, sort = F)
  row.names(meta.data1) <- rnames
  # Order the cells alphabetically to ensure consistency.
    ordered_cell_names = order(colnames(raw.data))
  raw.data = raw.data[,ordered_cell_names]
  meta.data1 = meta.data1[ordered_cell_names,]
    # Find ERCC's, compute the percent ERCC, and drop them from the raw data.
  erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
  percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
  ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
  raw.data <- raw.data[-ercc.index,]
  
  # Create the Seurat object with all the data
  droplet <- CreateSeuratObject(raw.data)   # dropseq
  droplet <- AddMetaData(object = droplet, meta.data1) 
  droplet@meta.data$tech <- "droplet"

#n.pcs = 10
  #droplet <- SubsetData(droplet,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000))  # old version of seurat
droplet <-  subset(droplet, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
droplet <- NormalizeData(droplet, verbose = FALSE)
droplet <- FindVariableFeatures(droplet, selection.method = "vst", nfeatures = 2000)
droplet <- ScaleData(droplet, verbose = FALSE)
#droplet <- RunPCA(droplet, npcs = 10, verbose = FALSE)
droplet$stim <- "droplet"
droplet$cond <- "ctrl"
# droplet <- ScaleData(droplet, verbose = FALSE)
 droplet <- RunPCA(droplet, npcs = 30, verbose = FALSE)
 droplet <- RunUMAP(droplet, reduction = "pca", dims = 1:25)
 droplet <- FindNeighbors(droplet, reduction = "pca", dims = 1:10)
 droplet <- FindClusters(droplet, resolution = 0.5 )   
 p1<- UMAPPlot(droplet, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
 p2 <- UMAPPlot(droplet, label=TRUE, label.size=6)
 p3<- UMAPPlot(droplet, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)

res.used <- 1
#droplet <- FindClusters(object = droplet, reduction.type = "pca", dims.use = 1:n.pcs, resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

droplet <- RunTSNE(object = droplet, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = droplet, do.label = T, pt.size = 1.2, label.size = 4)


droplet <- RenameIdents(droplet, `0` = "Hep-Mid-M", `1` = "Hep-PC-F", `2` = "Hep-PP-F",`3` = "Hep-PP-M", `4` = "Hep-Mid-M", `5` = "Hep-PC-M", `6` = "Hep-Mid-F", `7` = "Hep-Mid-F", `8` = "Endo-F", `9` = "Bileduct-F")



### this is tranformation option for develpment SCtransform program #######
droplet <- SCTransform(droplet,verbose =TRUE)


######3 Smartseq #########################

plate_metadata <- read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/Liver_facs_annotation.csv", sep=",",  header = TRUE)
colnames(plate_metadata)[1] <- "plate.barcode"
  
raw.data = read.csv("/restricted/projectnb/waxmanlab/kkarri/scRNAseq_data_integration/liver_facs_scrna_data.csv", sep=",", row.names=1)
colnames(plate_metadata)[1] <- "plate.barcode"
  
plate.barcodes = lapply(colnames(raw.data), function(x) strsplit(strsplit(x, "_")[[1]][1], '.', fixed=TRUE)[[1]][2])

  barcode.df = t.data.frame(as.data.frame(plate.barcodes))
  
  rownames(barcode.df) = colnames(raw.data)
  barcode.df= cbind(barcode.df, colnames(raw.data))
  colnames(barcode.df) = c('plate.barcode1', 'plate.barcode')
  
  rnames = row.names(barcode.df)
  meta.data <- merge(barcode.df, plate_metadata, by='plate.barcode', sort = F)
  row.names(meta.data) <- rnames
    
  # Sort cells by cell name
  meta.data = meta.data[order(rownames(meta.data)), ]
  raw.data = raw.data[,rownames(meta.data)]
  
  # Create the Seurat object with all the data
  smartseq <- CreateSeuratObject(raw.data)
  smartseq <- AddMetaData(object = smartseq, meta.data)
  #smartseq@meta.data$tech <- "smartseq"
  smartseq$stim <- "smartseq"
  smartseq$cond <- "ctrl"
#smartseq <- SubsetData(smartseq,subset.names = c("nGene", "nUMI"),low.thresholds = c(500, 1000))  #old version of seurat
smartseq<-  subset(smartseq, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
#smartseq <- SCTransform(smartseq)
smartseq <- NormalizeData(smartseq, verbose = FALSE)
smartseq <- FindVariableFeatures(smartseq, selection.method = "vst", nfeatures = 2000)
smartseq <- ScaleData(smartseq, verbose = FALSE)
smartseq <- RunPCA(smartseq, npcs = 30, verbose = FALSE)
smartseq <- RunUMAP(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindNeighbors(smartseq, reduction = "pca", dims = 1:25)
smartseq <- FindClusters(smartseq, resolution = 0.5 )   
#p4<- UMAPPlot(smartseq, reduction = "umap", group.by = "channel", label=TRUE, label.size=5)
p5 <- UMAPPlot(smartseq, label=TRUE, label.size=6)
p6<- UMAPPlot(smartseq, reduction = "umap", group.by = "mouse.sex", label=TRUE, label.size=5)

########## sctransform ###################3
smartseq <- SCTransform(smartseq)

############################### Integration Regular workflow ############################
anchors <- FindIntegrationAnchors(object.list = c(droplet.list, smartseq.list,G171B.list, G172.M1.list, G172.M2.list), dims = 1:50, anchor.features = 3000)
combined <- IntegrateData(anchorset = anchors, dims = 1:50)    

DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
                                                    
# t-SNE and Clustering
combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20)
combined <- FindClusters(combined, resolution = 0.5 )   
#combined <- RunTSNE(combined, reduction = "pca", dims = 1:20)
    
 # Visualization
p1 <- DimPlot(combined, reduction = "umap", group.by = "stim")
p2 <- DimPlot(combined, reduction = "umap", group.by = "mouse.sex")
p3 <- DimPlot(combined, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(combined, label=TRUE)
plot_grid(p2, p3,p4) 
DimPlot(combined, reduction = "umap", split.by = "stim")   

p5 <- DimPlot(combined, reduction = "tsne", group.by = "stim")
p6 <- DimPlot(combined, reduction = "tsne", group.by = "mouse.sex")
p7 <- DimPlot(combined, reduction = "tsne", label = TRUE)
p8 <- TSNEPlot(combined, label =T)
plot_grid(p6, p7,p8) 
DimPlot(combined, reduction = "tsne", split.by = "stim") 

#################################### Refernce based Integration ################################
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list1 <- c(droplet.list)
ctrl.list <- c(droplet.list, smartseq.list)

for (i in names(ctrl.list)) {
    ctrl.list[[i]] <- SCTransform(ctrl.list[[i]], verbose =TRUE)
}

ctrl.list1 <- c(ctrl.list, G171B.list)
#dims = 1:50
ref.features <- SelectIntegrationFeatures(object.list = ctrl.list1, dims=1:50, anchor.features = 3000)
ref.list <- PrepSCTIntegration(object.list = ctrl.list1, anchor.features = ref.features)
ref.anchors <- FindIntegrationAnchors(object.list = ref.list, normalization.method = "SCT", anchor.features = ref.features, reference = c(1,2,3))
ref.integrated <- IntegrateData(anchorset = ref.anchors, normalization.method = "SCT")

DefaultAssay(ref.integrated) <- "integrated"
ref.integrated <- ScaleData(ref.integrated, verbose = FALSE)
ref.integrated <- RunPCA(object = ref.integrated, npcs=30,verbose = FALSE)
ref.integrated <- RunUMAP(object = ref.integrated ,dims = 1:30, seed.use = 10, perplexity=30)
ref.integrated <- FindNeighbors(ref.integrated, reduction = "pca", dims = 1:30)
ref.integrated <- FindClusters(ref.integrated, resolution = 0.2 )   

r1 <- UMAPPlot(ref.integrated, split.by="stim")
r2 <- UMAPPlot(ref.integrated, label=TRUE)
r3 <- UMAPPlot(ref.integrated, split.by = c("mouse.sex"))


plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, 
    byrow = TRUE, override.aes = list(size = 2.5))))
CombinePlots(plots)


ref.integrated$celltype.stim <- paste(Idents(ref.integrated), ref.integrated$stim, sep = "_")
ref.integrated$seurat_clusters <- Idents(ref.integrated)
Idents(ref.integrated) <- "celltype.stim"

fp <- FeaturePlot(ref.integrated, features = "ncRNA-as-chr10-8460", cols = c("lightgrey","darkred"), order = TRUE,  shape.by = "stim", pt.size = 2)

DefaultAssay(ref.integrated) <-"RNA"
ref.integrated <- NormalizeData(ref.integrated)
ref.integrated <- ScaleData(ref.integrated)

dot1 <- DotPlot(ref.integrated, features= all_genes)+RotatedAxis()
d1 <- DoHeatmap(ref.integrated, features = c(all_genes,"Mup20","Xist"), group.by = "seurat_clusters", assay= 'RNA')
#d1 <- DoHeatmap(ref.integrated, features = highTPM, group.by = "celltype.stim", assay= 'RNA',raster = F, disp.max = 0.5)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)


############ merge G172 M1 #############33
droplet.list <- SplitObject(droplet, split.by = "mouse.sex")
smartseq.list <- SplitObject(smartseq, split.by = "stim")
G172.M1.list <- SplitObject(G172.M1, split.by = "stim")
G172.M2.list <- SplitObject(G172.M2, split.by = "stim")
G172.M6.list <- SplitObject(G172.M6, split.by = "stim")
G171B.list <- SplitObject(G171B, split.by="stim")
ctrl.list.2 <- c( G172.M1.list, G172.M2.list)
G172.M1.merge.list <- SplitObject(G172.M1.merge, split.by = "stim")
G172.M2.merge.list <- SplitObject(G172.M2.merge, split.by = "stim")

merged.list <- c(G172.M1.merge.list, G172.M2.merge.list)

for (i in names(ctrl.list.2)) {
    ctrl.list.2[[i]] <- SCTransform(ctrl.list.2[[i]], verbose =TRUE)
}

ref.features.1 <- SelectIntegrationFeatures(object.list = ctrl.list.2, dims = 1:50, anchor.features = 3000)
ref.list.1 <- PrepSCTIntegration(object.list = ctrl.list.2, anchor.features = ref.features.1)


ref.anchors.1 <- FindIntegrationAnchors(object.list = ref.list.1, normalization.method = "SCT", 
    anchor.features = ref.features.1, reference = c(1,2))
ref.integrated.1 <- IntegrateData(anchorset = ref.anchors.1, normalization.method = "SCT")

DefaultAssay(ref.integrated.1) <- "integrated"
ref.integrated.1 <- ScaleData(ref.integrated.1, verbose = FALSE)
ref.integrated.1 <- RunPCA(object = ref.integrated.1, npcs=30,verbose = FALSE)
ref.integrated.1 <- RunUMAP(object = ref.integrated.1, dims = 1:30, seed.use = 10 , perplexity=30)
#ref.integrated.1 <- RunTSNE(object = ref.integrated.1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
ref.integrated.1 <- FindNeighbors(ref.integrated.1, reduction = "pca", dims = 1:30)
ref.integrated.1 <- FindClusters(ref.integrated.1, resolution = 0.2)   

rG172M1.1 <- UMAPPlot(ref.integrated.1, split.by="stim", label=T)
rG172M1.2 <- UMAPPlot(ref.integrated.1, label=TRUE, combine = FALSE)
rG172M1.3 <- UMAPPlot(ref.integrated.1, split.by = c("mouse.sex"))

rG172M1.4 <- TSNEPlot(ref.integrated.1, split.by="stim")


DefaultAssay(ref.integrated.1) <-"RNA"
ref.integrated.1 <- NormalizeData(ref.integrated.1)
ref.integrated.1 <- ScaleData(ref.integrated.1)

DoHeatmap(ref.integrated.1, features = c(all_genes,'Sox9','Mup20'), group.by = "seurat_clusters", assay= 'RNA' ,raster = F)+scale_fill_gradientn(colors = (RColorBrewer::brewer.pal(n = 9, name = "PuRd")) ) + guides(color=FALSE)


ref.integrated.1$celltype <- RenameIdents(ref.integrated.1, c(`0` = "PP", `1` = "PC", `2` = "Endo",`3` = "HSC", `4` = "Kupffer", `5` = "Dividing", `6` = "Immune", `7` = "B-NK", `8` = "NA"))


ref.integrated.1.markers <- FindAllMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers  %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)

write.csv(ref.integrated.1.markers, "ref_integrated_G172M1-M2_Allmarker")

ref.integrated.1$celltype.stim <- paste(Idents(ref.integrated.1), ref.integrated.1$stim, sep = "_")
ref.integrated.1$celltype <- Idents(ref.integrated.1)
Idents(ref.integrated.1) <- "celltype.stim"

DoHeatmap(ref.integrated.1, features = TPM4, group.by = "seurat_clusters", assay= 'RNA' )

ref.integrated.1.markers <- FindMarkers(ref.integrated.1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
ref.integrated.1.markers  %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)


##################################################################################################3

for (i in names(merged.list)) {
    merged.list[[i]] <- SCTransform(merged.list[[i]], verbose =TRUE)
}
ref.features.2 <- SelectIntegrationFeatures(object.list = merged.list, dims = 1:50, anchor.features = 3000)
ref.list.2 <- PrepSCTIntegration(object.list = merged.list, anchor.features = ref.features.2)


ref.anchors.2 <- FindIntegrationAnchors(object.list = ref.list.2, normalization.method = "SCT", 
    anchor.features = ref.features.2, reference = 1)
ref.integrated.2 <- IntegrateData(anchorset = ref.anchors.2, normalization.method = "SCT")

DefaultAssay(ref.integrated.2) <- "integrated"
ref.integrated.2 <- ScaleData(ref.integrated.2, verbose = FALSE)
ref.integrated.2 <- RunPCA(object = ref.integrated.2, npcs=30,verbose = FALSE)
ref.integrated.2 <- RunUMAP(object = ref.integrated.2, dims = 1:30, seed.use = 10 , perplexity=30)
ref.integrated.2 <- RunTSNE(object = ref.integrated.2, dims.use = 1:30, seed.use = 10, perplexity=30)
ref.integrated.2 <- FindNeighbors(ref.integrated.2, reduction = "pca", dims = 1:30)
ref.integrated.2 <- FindClusters(ref.integrated.2, resolution = 0.5)   

rG172M1.1 <- UMAPPlot(ref.integrated.2, split.by="stim", label=T)
rG172M1.2 <- UMAPPlot(ref.integrated.2, label=TRUE)
rG172M1.3 <- TSNEPlot(ref.integrated.2, split.by="stim")
rG172M1.4 <- TSNEPlot(ref.integrated.2, label=TRUE)


DefaultAssay(ref.integrated.2) <-"RNA"
ref.integrated.2 <- NormalizeData(ref.integrated.2)
ref.integrated.2 <- ScaleData(ref.integrated.2)
d3 <- DotPlot(ref.integrated.2, features = all_genes)+ RotatedAxis()
plot_grid(rG172M1.2,rG172M1.1 ,d3)


```

```{r}
G172.M1.list <- SplitObject(G172.M1.both, split.by = "stim")
G172.M3.list <- SplitObject(G172.M3.both, split.by = "stim")
merged.list <- c(G172.M1.list, G172.M3.list)

for (i in names(merged.list)) {
   merged.list[[i]] <- SCTransform(merged.list[[i]], verbose =TRUE)
}

ref.features.1 <- SelectIntegrationFeatures(object.list = merged.list, dims = 1:50, anchor.features = 2000)
ref.list.1 <- PrepSCTIntegration(object.list = merged.list, anchor.features = ref.features.1)

ref.anchors.1 <- FindIntegrationAnchors(object.list = ref.list.1, normalization.method = "SCT", 
    anchor.features = ref.features.1)
ref.integrated.1 <- IntegrateData(anchorset = ref.anchors.1, normalization.method = "SCT")

DefaultAssay(ref.integrated.1) <- "integrated"
ref.integrated.1 <- ScaleData(ref.integrated.1, verbose = FALSE)
ref.integrated.1 <- RunPCA(object = ref.integrated.1, npcs=30,verbose = FALSE)
ref.integrated.1 <- RunUMAP(object = ref.integrated.1, dims = 1:30, seed.use = 10 , perplexity=30)
#ref.integrated.1 <- RunTSNE(object = ref.integrated.1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
ref.integrated.1 <- FindNeighbors(ref.integrated.1, reduction = "pca", dims = 1:30)
ref.integrated.1 <- FindClusters(ref.integrated.1, resolution = 0.2)   

rG172M1.1 <- UMAPPlot(ref.integrated.1, group.by="stim")
rG172M1.2 <- UMAPPlot(ref.integrated.1, label=TRUE)
#rG172M1.4 <- TSNEPlot(ref.integrated.1, split.by="stim")

DefaultAssay(ref.integrated.1) <-"RNA"
ref.integrated.1 <- NormalizeData(ref.integrated.1)
ref.integrated.1 <- ScaleData(ref.integrated.1)
d3 <- DotPlot(ref.integrated.1, features = all_genes)+ RotatedAxis()
plot_grid(rG172M1.2,rG172M1.1 ,d3)

##########3 use the standar integration workflow ##################
anchors <- FindIntegrationAnchors(object.list = merged.list, dims = 1:50, anchor.features = 2000)
#anchors <- FindIntegrationAnchors(object.list = list(lnc5998, droplet1), dims = 1:50, anchor.features = 3000)
combined <- IntegrateData(anchorset = anchors, dims = 1:50)    
DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
                                                    
# t-SNE and Clustering
combined <- RunUMAP(combined, reduction = "pca", dims = 1:30)
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:30)
combined <- FindClusters(combined, resolution = 0.2 )   

 # Visualization
p1 <- UMAPPlot(combined, reduction = "umap", group.by = "stim")
p4 <- UMAPPlot(combined, label=TRUE, label.size=6)

DefaultAssay(combined) <-"RNA"
combined <- NormalizeData(combined)
combined <- ScaleData(combined)
d3 <- DotPlot(combined, features = all_genes)+ RotatedAxis()
plot_grid(p1,p4 ,d3)

```


G172M1 and G172 M2 DE analysis
```{r}

DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1" ,"1_G172M1", "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1", "8_G172M1"), ident.2 = c("0_G172M2" ,"1_G172M2", "2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2", "8_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_All_M1_M2 <- DE1.1 
write.csv(DE_All_M1_M2, "/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G172/analysis/marker/DE_G172_All_M1_M2")
DE_hep_NPC <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M2" ,"1_G172M2"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_NPC_M1_M2 <- FindMarkers(ref.integrated.1, ident.1 = c( "2_G172M1" ,"3_G172M1" ,"4_G172M1" ,"5_G172M1" ,"6_G172M1","7_G172M1"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE_Hep0.1_NPC_M2 <- FindMarkers(ref.integrated.1, ident.1 = c( "0_G172M2" ,"1_G172M2"), ident.2 = c("2_G172M2" ,"3_G172M2" ,"4_G172M2" ,"5_G172M2" ,"6_G172M2","7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

write.csv(DE_Hep0.1_NPC_M2, "G172_markers/DE_Hep01_NPC_M2")

DE0.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1"), ident.2 = c("0_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE1.1 <- FindMarkers(ref.integrated.1, ident.1 = c("1_G172M1"), ident.2 = c("1_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE2.1 <- FindMarkers(ref.integrated.1, ident.1 = c("2_G172M1"), ident.2 = c("2_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE3.1 <- FindMarkers(ref.integrated.1, ident.1 = c("3_G172M1"), ident.2 = c("3_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE4.1 <- FindMarkers(ref.integrated.1, ident.1 = c("4_G172M1"), ident.2 = c("4_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE5.1 <- FindMarkers(ref.integrated.1, ident.1 = c("5_G172M1"), ident.2 = c("5_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE6.1 <- FindMarkers(ref.integrated.1, ident.1 = c("6_G172M1"), ident.2 = c("6_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE7.1 <- FindMarkers(ref.integrated.1, ident.1 = c("7_G172M1"), ident.2 = c("7_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DE0.1 <- FindMarkers(ref.integrated.1, ident.1 = c("0_G172M1"), ident.2 = c("0_G172M2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
```



```{r}
################# G171 TCPO expsosed ########
G171B_metadata_G171B <- read.csv("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/G171_metadata_droplet_liver.csv", sep=",", header = TRUE)
colnames(G171B_metadata_G171B)[1] <- "channel"
tissue_metadata_G171B = filter(G171B_metadata_G171B, tissue == tissue_of_interest)[,c('channel','tissue','subtissue','mouse.sex', 'mouse.id')]

raw.data <- Read10X("/net/waxman-server/mnt/data/waxmanlabvm_home/kkarri/G171/Analysis/Transcript_Refined/Liver-10X_G171B/")
colnames(raw.data) <- lapply(colnames(raw.data), function(x) paste0(tissue_metadata_G171B$channel[1],'_',x))
  meta.data1 = data.frame(row.names = colnames(raw.data))
  meta.data1['channel'] = tissue_metadata_G171B$channel[1]
  
  rnames = row.names(meta.data1)
  meta.data1 <- merge(meta.data1, tissue_metadata_G171B, sort = F)
  row.names(meta.data1) <- rnames
  # Order the cells alphabetically to ensure consistency.
  
  ordered_cell_names = order(colnames(raw.data))
  raw.data = raw.data[,ordered_cell_names]
  meta.data1 = meta.data1[ordered_cell_names,]
  
  # Find ERCC's, compute the percent ERCC, and drop them from the raw data.
  erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
  percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
  ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
  raw.data <- raw.data[-ercc.index,]
  
  ncRNA.genes <- grep(pattern = "^ncRNA", x = rownames(x = raw.data), value = TRUE)
  percent.ncRNA <- Matrix::colSums(raw.data[ncRNA.genes, ])/Matrix::colSums(raw.data)
  KRAB.genes <- grep(pattern = "^KRAB", x = rownames(x = raw.data), value = TRUE)
  cherry.genes <- grep(pattern = "^mcherry", x = rownames(x = raw.data), value = TRUE)
  
  # Create the Seurat object with all the data
  G171B <- CreateSeuratObject(raw.data)   # dropseq
  G171B <- AddMetaData(object = G171B, meta.data1) 
  G171B@meta.data$tech <- "G171B"

#G171B <- SubsetData(G171B,subset.names = c("nGene", "nUMI"), low.thresholds = c(500, 1000))  # old version of seurat
#G171B <-  subset(G171B, subset = nFeature_RNA > 500 & nCount_RNA > 1000)
G171B <- NormalizeData(G171B, verbose = FALSE)
G171B <- FindVariableFeatures(G171B, selection.method = "vst", nfeatures = 2000)
G171B$stim <- "G171B"

### this is tranformation option for develpment SCtransform program #######
G171B <- SCTransform(G171B,verbose =TRUE)


```


diffusion plt code 

```{r}
# Before running MDS, we first calculate a distance matrix between all pairs of cells.  Here we
# use a simple euclidean distance metric on all genes, using scale.data as input
d <- dist(t(GetAssayData(combined, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
combined[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(combined))

# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(combined, reduction = "mds", pt.size = 0.5)
```



```{r}
genes_hep_main =c('Alb', 'Ttr', 'Apoa1', 'Serpina1c')
genes_hep = c('Alb', 'Ttr', 'Apoa1', 'Serpina1c',
                   'Cyp2e1', 'Glul', 'Oat', 'Gulo',
                   'Ass1', 'Hamp', 'Gstp1', 'Ubb',
                   'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')
genes_endo = c('Pecam1', 'Nrp1', 'Kdr','Oit3','Igfbp7','Aqp1')
genes_kuppfer = c( 'Clec4f', 'Cd68')
genes_nk = c('Il2rb', 'Nkg7', 'Cxcr6', 'Gzma')
genes_b = c('Cd79a', 'Cd79b')
genes_bec = c('Epcam', 'Krt19', 'Krt7')
genes_immune = 'Ptprc'
HSC = c("Dcn","Lama1","Nes")
Dividing = "Top2a"
Bplasma= "Jchain"
Mac= "Csf1r"
Chol="Sox9"

Y_genes <- c("Uty","Ddx3y","Kdm5d","Eif2s3y",	"Gm47283")
sex <- c("Cyp2d9", "ncRNA-inter-chrX-15394","Cyp2c69", 'Mup20', 'Mup1','Mup12', 'Mup21', 'Cyp2d9')
F_sex <- c('Sult3a1', 'A1bg', 'Fmo3', 'Cyp2b9', 'Sult2a1','Cyp2b13')
all_genes = c(genes_hep, genes_endo, genes_kuppfer, Mac,genes_nk, genes_b, genes_bec, genes_immune, HSC, Dividing)
genes_bec_b_immune  = c(genes_bec,genes_b,genes_immune)
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
              'Ass1', 'Hamp', 'Gstp1', 'Ubb',
              'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')

All_sex_genes <- c('Mup11','Mup7','Elovl3','Cyp4a12a','Tff3','Scara5','Mup14','Gstp2','Cyp4a12b','Mup20','Mup12','Mup21','Cyp2d9','Mup1',
                   'Sult2a5','Sult2a2','Sult2a3','Cyp3a16','Sult2a6','Cyp3a41b','Cyp3a44','Cyp3a41a','Cyp2a4','Slc22a26','Sult3a1',
'Sult2a1','Cyp2b13','A1bg','Fmo3','Cyp2c69','Cyp2b9')

#All_sex_genes <- c('Mup11|Mup7|Elovl3|Cyp4a12a|Tff3|Scara5|Mup14|Gstp2|Cyp4a12b|Mup20|Mup12|Mup21|Cyp2d9|Mup1|
                  # Sult2a5|Sult2a2|Sult2a3|Cyp3a16|Sult2a6|Cyp3a41b|Cyp3a44|Cyp3a41a|Cyp2a4|Slc22a26|Sult3a1|
#Sult2a1|Cyp2b13|A1bg|Fmo3|Cyp2c69|Cyp2b9')


All_sex_genes_all <- c(Y_genes, "ncRNA-inter-chrX-15394", All_sex_genes)

receptor_KO <- c("ncRNA-inter-chr7-5998","Cyp2b10","Nr1i2","Nr1i3","Ppara","Pparg","Ppargc1b","Ppard")
cell <- c("Stab2","Csf1r","Cd3g","Ebf1","Irf8","Sox9","Apoc3","Top2a","Dcn")
```

```{r}
TPM4<- c('ncRNA-inter-chr7-6524',
'ncRNA-inter-chr19-14853',
'ncRNA-inter-chr6-5675',
'ncRNA-inter-chr4-3468',
'ncRNA-inter-chr9-8122',
'ncRNA-inter-chr12-10476',
'ncRNA-inter-chr17-14026',
'ncRNA-as-chr2-1457',
'ncRNA-as-chr19-14883',
'ncRNA-as-chr10-8460',
'ncRNA-as-chr5-4325',
'ncRNA-inter-chr7-5998',
'ncRNA-as-chr9-7843',
'ncRNA-as-chr9-8142',
'ncRNA-inter-chr11-9925',
'ncRNA-inter-chr19-14873',
'ncRNA-as-chr9-8172',
'ncRNA-inter-chr4-3779',
'ncRNA-inter-chr3-2504',
'ncRNA-as-chr19-15054',
'ncRNA-inter-chr8-7423',
'ncRNA-as-chr7-6302',
'ncRNA-inter-chr10-9418',
'ncRNA-inter-chr12-10454',
'ncRNA-as-chr7-5999',
'ncRNA-as-chr6-5335',
'ncRNA-inter-chr19-14987',
'ncRNA-inter-chr16-13170',
'ncRNA-inter-chr3-2988',
'ncRNA-inter-chr8-7430',
'ncRNA-inter-chr3-2168',
'ncRNA-inter-chr9-7874',
'ncRNA-inter-chr4-3778',
'ncRNA-inter-chr2-2011',
'ncRNA-inter-chr5-4335',
'ncRNA-inter-chr9-8301',
'ncRNA-inter-chr16-13510',
'ncRNA-as-chr9-8401',
'ncRNA-as-chr16-13512',
'ncRNA-as-chr12-10896',
'ncRNA-as-chr8-7359',
'ncRNA-as-chr5-4744',
'ncRNA-inter-chr5-4499',
'ncRNA-inter-chr16-13509',
'ncRNA-inter-chr17-13692',
'ncRNA-as-chr17-13834',
'ncRNA-inter-chr9-8147',
'ncRNA-inter-chr10-8697',
'ncRNA-inter-chr6-5551',
'ncRNA-as-chr9-8317',
'ncRNA-inter-chr7-6509',
'ncRNA-as-chr19-14977',
'ncRNA-inter-chr6-5318',
'ncRNA-inter-chr5-4578',
'ncRNA-inter-chr12-10509',
'ncRNA-as-chr10-9015',
'ncRNA-inter-chr3-2411',
'ncRNA-inter-chr9-7875',
'ncRNA-inter-chr5-4336',
'ncRNA-inter-chr12-10910',
'ncRNA-as-chr1-782',
'ncRNA-inter-chr17-13924',
'ncRNA-intra-chr7-5920',
'ncRNA-inter-chr16-13225',
'ncRNA-inter-chr3-2269',
'ncRNA-inter-chr14-12016',
'ncRNA-as-chr4-3800',
'ncRNA-as-chr5-4655',
'ncRNA-inter-chr9-7989',
'ncRNA-intra-chr5-4728',
'ncRNA-inter-chrX-15248',
'ncRNA-inter-chr10-8767',
'ncRNA-inter-chr19-14717',
'ncRNA-inter-chr8-6766',
'ncRNA-inter-chr13-11122',
'ncRNA-inter-chr7-6074',
'ncRNA-inter-chr15-12439',
'ncRNA-as-chr11-9787',
'ncRNA-inter-chr2-1827',
'ncRNA-as-chr19-14976',
'ncRNA-inter-chr19-14947',
'ncRNA-inter-chr6-5248',
'ncRNA-inter-chr2-1098',
'ncRNA-inter-chr8-7180',
'ncRNA-inter-chr9-8118',
'ncRNA-inter-chr14-12199',
'ncRNA-as-chr6-5336',
'ncRNA-inter-chr4-3867',
'ncRNA-inter-chr10-9000',
'ncRNA-inter-chr14-12290',
'ncRNA-inter-chr2-1491',
'ncRNA-inter-chr15-12606',
'ncRNA-inter-chr10-9222',
'ncRNA-inter-chr5-4322',
'ncRNA-inter-chr12-10942',
'ncRNA-inter-chr18-14690',
'ncRNA-inter-chr2-1471',
'ncRNA-inter-chr3-2410',
'ncRNA-inter-chr19-14880',
'ncRNA-inter-chr13-11074',
'ncRNA-inter-chr9-8056',
'ncRNA-inter-chr5-4654',
'ncRNA-inter-chr3-2166',
'ncRNA-inter-chr8-6944',
'ncRNA-as-chr7-6065',
'ncRNA-inter-chr8-6896',
'ncRNA-inter-chr2-1963',
'ncRNA-inter-chr4-3425',
'ncRNA-inter-chr13-11385',
'ncRNA-inter-chr9-7885',
'ncRNA-as-chr10-9411',
'ncRNA-as-chr9-8419',
'ncRNA-as-chr12-10618',
'ncRNA-inter-chr7-6390',
'ncRNA-inter-chr17-14151',
'ncRNA-as-chr13-11787',
'ncRNA-as-chr2-1965',
'ncRNA-inter-chr6-5721',
'ncRNA-inter-chr6-5822',
'ncRNA-inter-chr11-9635',
'ncRNA-inter-chr11-9965',
'ncRNA-inter-chr4-3052',
'ncRNA-inter-chr17-13857',
'ncRNA-inter-chr13-11201',
'ncRNA-inter-chr6-5249',
'ncRNA-inter-chr19-14851',
'ncRNA-inter-chr6-5638',
'ncRNA-inter-chr17-14130',
'ncRNA-inter-chr9-7993',
'ncRNA-inter-chr18-14656',
'ncRNA-inter-chr9-7992',
'ncRNA-inter-chr1-566',
'ncRNA-inter-chr12-10715',
'ncRNA-inter-chr17-14102',
'ncRNA-inter-chr4-3010',
'ncRNA-inter-chr3-2764',
'ncRNA-inter-chr4-3306',
'ncRNA-inter-chr19-14979',
'ncRNA-inter-chr1-570',
'ncRNA-inter-chr19-14790',
'ncRNA-inter-chr4-3282',
'ncRNA-inter-chr5-4777',
'ncRNA-inter-chr8-7605',
'ncRNA-inter-chr9-8000',
'ncRNA-as-chr15-12920',
'ncRNA-as-chr1-369',
'ncRNA-inter-chr19-14952',
'ncRNA-as-chr9-8393',
'ncRNA-as-chr14-12074',
'ncRNA-inter-chr12-10415',
'ncRNA-inter-chr7-6559',
'ncRNA-inter-chr1-630',
'ncRNA-inter-chr4-3142',
'ncRNA-inter-chr6-5131',
'ncRNA-inter-chr16-13050',
'ncRNA-inter-chr7-6411',
'ncRNA-inter-chr5-4746',
'ncRNA-inter-chr1-633',
'ncRNA-inter-chr10-8461',
'ncRNA-inter-chr2-2016',
'ncRNA-inter-chr6-5137',
'ncRNA-as-chr4-3300',
'ncRNA-inter-chr4-3009',
'ncRNA-inter-chr2-1923',
'ncRNA-inter-chr1-670',
'ncRNA-inter-chr15-12835',
'ncRNA-inter-chr18-14655',
'ncRNA-inter-chr16-13211',
'ncRNA-as-chr7-6050',
'ncRNA-inter-chr7-6508',
'ncRNA-inter-chr11-10206',
'ncRNA-inter-chr13-11399',
'ncRNA-inter-chr8-6757',
'ncRNA-inter-chr1-129',
'ncRNA-inter-chr12-10459',
'ncRNA-inter-chr7-6709',
'ncRNA-inter-chr12-10713',
'ncRNA-inter-chr7-6523',
'ncRNA-inter-chr2-2017',
'ncRNA-inter-chr7-6343',
'ncRNA-inter-chr1-63',
'ncRNA-as-chr3-2936',
'ncRNA-inter-chr18-14691',
'ncRNA-inter-chr7-6097',
'ncRNA-inter-chr6-5723')
```


```{r}
DefaultAssay(droplet) <- "RNA"
#droplet <- NormalizeData(combined, verbose = TRUE, normalization.method = "RC", scale.factor = 1e6)
combined <- NormalizeData(combined, verbose = TRUE)

DotPlot(combined, features = all_genes)
FeaturePlot(combined, features = genes_hep_main, min.cutoff = "q9")
#hepatocytes 
subtissplot <- DotPlot(combined, features = c(genes_hep_main, genes_endo, genes_bec_b_immune, genes_kuppfer, genes_nk))
PC <- DotPlot(combined, features = c(genes_hep_main,genes_zones))
NPC <- DotPlot(combined, features = c(genes_endo,genes_kuppfer, genes_nk))
all <- DotPlot(combined, features=c(all_genes))

### coexpression plots####

f1 <- FeaturePlot(KO.cells, features = c('Cyp2b10','ncRNA-inter-chr7-5998'), reduction = "mds", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 0.5)


########## this is exact averaging formula ###############33
  x <- (AverageExpression(KO.cells, verbose = TRUE, assays = "RNA" ,slot="counts")$RNA)
   x["ncRNA-inter-chr7-5998",]
#                         G171B    G171C
#ncRNA-inter-chr7-5998 1.871795 1.091463
########
#Idents(combined) <- factor(Idents(combined), levels = c(0,1,12))
markers.to.plot <- c("Alb","ncRNA-inter-chr7-5998")
DotPlot(combined, features = rev(markers.to.plot), cols = c("blue", "red"), dot.scale = 8, 
    split.by = "stim") + RotatedAxis()

FeaturePlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9","KRAB","AAV8-mCherry"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red"))


######################### vlnplot ##########################

plots <- VlnPlot(combined, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","dSaCas9"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

plots <- VlnPlot(combined, features = c("Lhx4","Dtna","Fam189a1","Galnt16","Kalrn"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)


#endothelial
DotPlot(combined, features = genes_endo)

#zones
zones <- DotPlot(combined, features = genes_zones)

f1 <- FeaturePlot(combined, features = c('Cyp2e1','Cyp2f2','Ass1'), min.cutoff = "q9", reduction = "tsne")

DimPlot(combined, label = TRUE)

save(combined, file="Seurat_smart-drop_integrated.Robj")


################# save raw counts from cluster #####################

Idents(combined) <- "stim"

### to avergae out the matrix from KO cells 

combined.raw.data.0.1 <- as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 0,idents = "stim")])
combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])
combined.raw.data.2 <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 0)])

#combined.raw.data.[i] <- as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 1)])
#combined.raw.data.12 <-as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 12)])
#combined.raw.data.1 <- as.matrix(GetAssayData(combined, slot = "counts"))
x <- AverageExpression(test.combined,assays = "RNA",add.ident = "stim", slot = "data",use.scale = FALSE, use.counts = FALSE)$RNA

#}######## CAR data 
avg.combined.cells <- (AverageExpression(combined, verbose = FALSE)$RNA) 
avg.combined.cells$gene <- rownames(avg.combined.cells)

CAR_FP <- FeaturePlot(combined, features = c('Cyp2b10','Nr1i3'), reduction = "umap", order = TRUE,split.by = "stim", blend = TRUE,sort.cell = TRUE, max.cutoff = 1, min.cutoff = 0, pt.size = 0.5, repel = TRUE)
CAR_DOT_NR <- DotPlot(combined, features = 'Nr1i3', col.min = 0)

Cyp2b10_FP <- FeaturePlot(combined, features = 'Cyp2b10', reduction = "umap", min.cutoff = 0)
########### tSNE #################################
combined <- NormalizeData(object = combined)
combined <- FindVariableFeatures(combined, selection.method = "vst", nfeatures = 2000)

```


```{r}
###### I used these for merged cells ####

G172_TPM_count  <- ref.integrated.2
G172_TPM_count  <- NormalizeData(G172_TPM_count,assay = "RNA", normalization.method = "RC", scale.factor = 1e6)


G172_TPM_count $celltype.stim <- paste(Idents(G172_TPM_count ), G172_TPM_count $stim, sep = "_")
G172_TPM_count $celltype <- Idents(G172_TPM_count )
Idents(G172_TPM_count ) <- "celltype.stim"
G172_TPM_count <- RenameCells(G172_TPM_count, new.names = paste0(colnames(G172_TPM_count),"-",G172_TPM_count$celltype.stim))


KO.cells <- subset(G172_TPM_count, idents = c("0","1","2","3","4","5","6","7"))
Idents(KO.cells) <- "stim"
colnames(KO.cells )
colnames(KO.cells) <- lapply(colnames(KO.cells), function(x) paste0(KO.cells$stim,'_',x))

### to avergae out the matrix from KO cells 
raw.data.0 <- as.matrix(GetAssayData(G172_TPM_count, slot = c("counts","data"))[, WhichCells(G172_TPM_count, ident = 0)])
raw.data.1 <- as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 1)])
raw.data.2 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 2)])
raw.data.3 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 3)])
raw.data.4 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 4)])
raw.data.5 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 5)])
raw.data.6 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 6)])
raw.data.7 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 7)])
raw.data.8 <-as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 8)])


TPMcount0<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '0_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '0_MergedM2')]))



TPMcount1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '1_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '1_MergedM2')]))

TPMcount2<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '2_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '2_MergedM2')]))

TPMcount3<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '3_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '3_MergedM2')]))


TPMcount4<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '4_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '4_MergedM2')]))

TPMcount5<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '5_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '5_MergedM2')]))


TPMcount6<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '6_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '6_MergedM2')]))

TPMcount7<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_MergedM1')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_MergedM1')]),as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = '7_MergedM2')]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = '7_MergedM2')]))


TPMcountBottom <- cbind(as.matrix(GetAssayData(G172.M1, slot = "counts")), as.matrix(GetAssayData(G172.M1, slot = "data"))) 
write.csv(TPMcountBottom, "CountResult/counts_TPMcountBottom")

TPMcountTop <- cbind(as.matrix(GetAssayData(G172.M1, slot = "counts")), as.matrix(GetAssayData(G172.M1, slot = "data"))) 
write.csv(TPMcountTop, "CountResult/counts_TPMcountTop")

TPMcountTopM2 <- cbind(as.matrix(GetAssayData(G172.M2, slot = "counts")), as.matrix(GetAssayData(G172.M2, slot = "data"))) 
write.csv(TPMcountTopM2, "CountResult/counts_TPMcountTopG172M2")


TPMcountG172M1 <- cbind(as.matrix(GetAssayData(G172.M1, slot = "counts")), as.matrix(GetAssayData(G172.M1, slot = "data"))) 
write.csv(TPMcountG172M1, "CountResult/counts_TPMcountG172M1_Full")

TPMcountG172M2 <-  as.matrix(GetAssayData(G172.M2, slot = "data")) 
write.csv(TPMcountG172M1, "CountResult/counts_TPMcountG172M1_Full")

TPMcountG172M1_merge <-  as.matrix(GetAssayData(G172.M1.merge, slot = "data")) 
write.csv(TPMcountG172M1_merge_avg,"CountResult/PMcountG172M1_merge_avg")


TPMcountG172M2_merge <-  as.matrix(GetAssayData(G172.M2.merge, slot = "data")) 
write.csv(TPMcountG172M2_merge_avg, "CountResult/TPMcountG172M2_merge_avg")

TPMcount1<- cbind(as.matrix(GetAssayData(G172_TPM_count, slot = "counts")[, WhichCells(G172_TPM_count, ident = 1)]), as.matrix(GetAssayData(G172_TPM_count, slot = "data")[, WhichCells(G172_TPM_count, ident = 1)]))

TPMcount2<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 2)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 2)]))

TPMcount3<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 3)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 3)]))

TPMcount4<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 4)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 4)]))

TPMcount5<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 5)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 5)]))

TPMcount6<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 6)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 6)]))

TPMcount7<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 7)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 7)]))

TPMcount8<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 8)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 8)]))

TPMcount9<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 9)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 9)]))

TPMcount10<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 10)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 10)]))

TPMcount11<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 11)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 11)]))

TPMcount12<- cbind(as.matrix(GetAssayData(combined, slot = "counts")[, WhichCells(combined, ident = 12)]), as.matrix(GetAssayData(combined, slot = "data")[, WhichCells(combined, ident = 12)]))




write.csv(TPMcount0, "CountResult/counts_TPMcount0.csv")
write.csv(TPMcount1, "CountResult/counts_TPMcount1.csv")
write.csv(TPMcount2, "CountResult/counts_TPMcount2.csv")
write.csv(TPMcount3, "CountResult/counts_TPMcount3.csv")
write.csv(TPMcount4, "CountResult/counts_TPMcount4.csv")
write.csv(TPMcount5, "CountResult/counts_TPMcount5.csv")
write.csv(TPMcount6, "CountResult/counts_TPMcount6.csv")
write.csv(TPMcount7, "CountResult/counts_TPMcount7.csv")
write.csv(TPMcount8, "CountResult/counts.TPMcount8.csv")
write.csv(TPMcount9, "CountResult/counts.TPMcount9.csv")
write.csv(TPMcount10, "CountResult/counts.TPMcount10.csv")
write.csv(TPMcount11, "CountResult/counts.TPMcount11.csv")
write.csv(TPMcount12, "CountResult/counts.TPMcount12.csv")



#avg.KO.cells <- log1p(AverageExpression(KO.cells, verbose = FALSE)$RNA)  #original code log transformed
avg.KO.cells <- (AverageExpression(KO.cells, verbose = FALSE)$RNA) 
avg.KO.cells$gene <- rownames(avg.KO.cells)

genes.to.label= ("ncRNA-inter-chr7-5998")
#genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
p1 <- ggplot(avg.KO.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE)
plot_grid(p1, p2)
```


```{r}
KO.cells <- RunUMAP(KO.cells, reduction = "pca", dims = 1:20 )
KO.cells <- FindNeighbors(KO.cells, reduction = "pca", dims = 1:20)
KO.cells <- FindClusters(KO.cells, resolution = 0.5 )   
KO.cells <- RunTSNE(KO.cells, reduction = "pca", dims = 1:20)
 

Hep.cells <- RunUMAP(Hep.cells, reduction = "pca", dims = 1:20 )
Hep.cells <- FindNeighbors(Hep.cells, reduction = "pca", dims = 1:20)
Hep.cells <- FindClusters(Hep.cells, resolution = 0.5 )   
Hep.cells <- RunTSNE(Hep.cells, reduction = "pca", dims = 1:20)
 

   
 # Visualization
p1 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(KO.cells, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(KO.cells, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(KO.cells, label=TRUE)
plot_grid(p1,p4) 
DimPlot(KO.cells, reduction = "umap", split.by = "stim")   


#hepatocyte cells

p5 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "stim")
p6 <- UMAPPlot(Hep.cells, reduction = "umap", group.by = "mouse.sex")
p7 <- UMAPPlot(Hep.cells, reduction = "umap", label = TRUE)
p8 <- UMAPPlot(Hep.cells, label=TRUE)
plot_grid(p5,p8) 
DimPlot(KO.cells, reduction = "umap", split.by = "stim")   


raw.data.KO.0 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 0)])
raw.data.KO.1 <- as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 1)])
raw.data.KO.2 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 2)])
raw.data.KO.3 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 3)])
raw.data.KO.4 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 4)])
raw.data.KO.5 <-as.matrix(GetAssayData(KO.cells, slot = "counts")[, WhichCells(KO.cells, ident = 5)])


write.csv(raw.data.KO.0, "CountResult/Markers/raw.data.KO.0")
write.csv(raw.data.KO.1, "CountResult/Markers/raw.data.KO.1")
write.csv(raw.data.KO.2, "CountResult/Markers/raw.data.KO.2")
write.csv(raw.data.KO.3, "CountResult/Markers/raw.data.KO.3")
write.csv(raw.data.KO.4, "CountResult/Markers/raw.data.KO.4")
write.csv(raw.data.KO.5, "CountResult/Markers/raw.data.KO.5")



f1 <- FeaturePlot(KO.cells, features = c('ncRNA-inter-chr7-5998'),  reduction = "umap", split.by = "stim")
plot_grid(f1,p1,p4) 

DefaultAssay(KO.cells) <- "RNA"
KO.cells <- NormalizeData(KO.cells, verbose = FALSE)

plots <- VlnPlot(KO.cells, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

Three_five_six <- subset(KO.cells, idents = c("5","6"))

Three_five_six <- RunUMAP(Three_five_six, reduction = "pca", dims = 1:20 )
Three_five_six <- FindNeighbors(Three_five_six, reduction = "pca", dims = 1:20)
Three_five_six <- FindClusters(Three_five_six, resolution = 1 )   
Three_five_six <- RunTSNE(Three_five_six, reduction = "pca", dims = 1:20)

p1 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(Three_five_six, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(Three_five_six, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(Three_five_six, label=TRUE)
plot_grid(p1,p4) 
DimPlot(Three_five_six, reduction = "umap", split.by = "stim")   

f1 <- FeaturePlot(Three_five_six, features = c('ncRNA-inter-chr7-5998'),  reduction = "umap", split.by = "stim")


lnc5998 <- subset(combined, cells = lnc5998.cells, idents = "1")

lnc5998 <- RunUMAP(lnc5998, reduction = "pca", dims = 1:20 )
lnc5998 <- FindNeighbors(lnc5998, reduction = "pca", dims = 1:20)
lnc5998 <- FindClusters(lnc5998, resolution = 1 )   
lnc5998 <- RunTSNE(lnc5998, reduction = "pca", dims = 1:20)
    
 # Visualization
p1 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "stim")
p2 <- UMAPPlot(lnc5998, reduction = "umap", group.by = "mouse.sex")
p3 <- UMAPPlot(lnc5998, reduction = "umap", label = TRUE)
p4 <- UMAPPlot(lnc5998, label=TRUE)
plot_grid(p1,p4) 
DimPlot(lnc5998, reduction = "umap", split.by = "stim")   

lnc5998.cells <- WhichCells(object = combined, expression = "ncRNA-inter-chr7-5998" > 1)
FeaturePlot(lnc5998, features = c("ncRNA-inter-chr7-5998"), split.by = "stim",  
+             cols = c("grey", "red"), cells = lnc5998.cells,min.cutoff = 0.5)


DefaultAssay(lnc5998) <- "RNA"
lnc5998 <- NormalizeData(lnc5998, verbose = FALSE)

plots <- VlnPlot(lnc5998, features = c("Alb", "ncRNA-inter-chr7-5998","Cyp2b10","Cyp2e1","Cyp2f2"), split.by = "stim", group.by = "seurat_clusters", pt.size = 0, combine = FALSE)
CombinePlots(plots = plots, ncol = 1)

 
```

```{r}
d <- dist(t(GetAssayData(KO.cells, slot = "scale.data")))
# Run the MDS procedure, k determines the number of dimensions
mds <- cmdscale(d = d, k = 2)
# cmdscale returns the cell embeddings, we first label the columns to ensure downstream
# consistency
colnames(mds) <- paste0("MDS_", 1:2)
# We will now store this as a custom dimensional reduction called 'mds'
KO.cells[["mds"]] <- CreateDimReducObject(embeddings = mds, key = "MDS_", assay = DefaultAssay(KO.cells))

# We can now use this as you would any other dimensional reduction in all downstream functions
DimPlot(KO.cells, reduction = "mds", pt.size = 0.5)
```

Find differential markers

```{r}
KO.cells$celltype.stim <- paste(Idents(KO.cells), KO.cells$stim, sep = "_")
KO.cells$celltype <- Idents(KO.cells)
Idents(KO.cells) <- "celltype.stim"
response3 <- FindMarkers(KO.cells, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)


#DE1: Compare cluster 0+1+12 (that expressed lnc5998) with Other hepatocyte clusters (2+5+8)
#DE2: compare cluster 1 (showed major effects in the KD) vs Cluster 0 (that showed little KD)
#DE3: For KO.cells that formed five subcluster, compare clusters 3+4+5+1 vs 2+0
#DE4: for KO.cells that formed five clusters. Comapre cluster 4 vs. 2

DE0112.258 <- FindMarkers(combined, ident.1 = c("0","1","12" ), ident.2 = c("2","5","8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE0112.All <- FindMarkers(combined, ident.1 = c("0","1","12" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.2 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("2"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.5 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE1.8 <- FindMarkers(combined, ident.1 = c("1" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DE2.5 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("5"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE2.8 <- FindMarkers(combined, ident.1 = c("2" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DE5.8 <- FindMarkers(combined, ident.1 = c("5" ), ident.2 = c("8"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


DEK4351.20 <- FindMarkers(KO.cells, ident.1 = c("3","4","5","1" ), ident.2 = c("2","0"),verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)

DEK4.3 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "3",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.2 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "2",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.0 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "0",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.1 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "1",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK4.5 <- FindMarkers(KO.cells, ident.1 = "4", ident.2 = "5",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)



##### comaprison between G171B vs G171C for KO.cell clusters of 0+1+12 ######33

DEK4_C.B <- FindMarkers(KO.cells, ident.1 = "4_G171C", ident.2 = "4_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK3_C.B <- FindMarkers(KO.cells, ident.1 = "3_G171C", ident.2 = "3_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK1_C.B <- FindMarkers(KO.cells, ident.1 = "1_G171C", ident.2 = "1_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK2_C.B <- FindMarkers(KO.cells, ident.1 = "2_G171C", ident.2 = "2_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK0_C.B <- FindMarkers(KO.cells, ident.1 = "0_G171C", ident.2 = "0_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
DEK5_C.B <- FindMarkers(KO.cells, ident.1 = "5_G171C", ident.2 = "5_G171B",verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


################################### Write the results #########################
write.csv(DE0112.258, "CountResult/Markers/DE0112.258")
write.csv(DE0112.All, "CountResult/Markers/DE0112.All")
write.csv(DE1.2, "CountResult/Markers/DE1.2")
write.csv(DE1.5, "CountResult/Markers/DE1.5")
write.csv(DE1.8, "CountResult/Markers/DE1.8")
write.csv(DE2.5, "CountResult/Markers/DE2.5")
write.csv(DE2.8, "CountResult/Markers/DE2.8")
write.csv(DE5.8, "CountResult/Markers/DE5.8")

write.csv(DEK4351.20, "CountResult/Markers/DEK4351.20")
write.csv(DEK4.3, "CountResult/Markers/DEK4.3")
write.csv(DEK4.2, "CountResult/Markers/DEK4.2")
write.csv(DEK4.0, "CountResult/Markers/DEK4.0")
write.csv(DEK4.1, "CountResult/Markers/DEK4.1")
write.csv(DEK4.5, "CountResult/Markers/DEK4.5")


write.csv(DEK4_C.B, "CountResult/Markers/DEK4_C.B")
write.csv(DEK3_C.B, "CountResult/Markers/DEK3_C.B")
write.csv(DEK1_C.B, "CountResult/Markers/DEK1_C.B")
write.csv(DEK2_C.B, "CountResult/Markers/DEK2_C.B")
write.csv(DEK0_C.B, "CountResult/Markers/DEK0_C.B")
write.csv(DEK5_C.B, "CountResult/Markers/DEK5_C.B")




combined$celltype.stim <- paste(Idents(combined), combined$stim, sep = "_")
combined$celltype <- Idents(combined)
Idents(combined) <- "celltype.stim"


test.combined$celltype.stim <- paste(Idents(test.combined), test.combined$stim, sep = "_")
test.combined$celltype <- Idents(test.combined)
Idents(test.combined) <- "celltype.stim"




Combined_G171B_vs_G171C <- FindMarkers(combined, ident.1 = c("0_G171B","1_G171B","2_G171B","3_G171B","4_G171B","5_G171B","6_G171B","7_G171B","8_G171B","9_G171B","10_G171B","11_G171B","12_G171B" ), ident.2 = c("0_G171C","1_G171C","2_G171C","3_G171C","4_G171C","5_G171C","6_G171C","7_G171C","8_G171C","9_G171C","10_G171C","11_G171C","12_G171C" ), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


Combined_G171C_vs_G171B_Clust1 <- FindMarkers(combined, ident.1 = c("1_G171C"), ident.2 = c("1_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust0 <- FindMarkers(combined, ident.1 = c("0_G171C"), ident.2 = c("0_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust12 <- FindMarkers(combined, ident.1 = c("12_G171C"), ident.2 = c("12_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust2 <- FindMarkers(combined, ident.1 = c("2_G171C"), ident.2 = c("2_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust5 <- FindMarkers(combined, ident.1 = c("5_G171C"), ident.2 = c("5_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust8 <- FindMarkers(combined, ident.1 = c("8_G171C"), ident.2 = c("8_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)


Combined_G171C_vs_G171B_Clust3 <- FindMarkers(combined, ident.1 = c("3_G171C"), ident.2 = c("3_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust4 <- FindMarkers(combined, ident.1 = c("4_G171C"), ident.2 = c("4_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust6 <- FindMarkers(combined, ident.1 = c("6_G171C"), ident.2 = c("6_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust7 <- FindMarkers(combined, ident.1 = c("7_G171C"), ident.2 = c("7_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust9 <- FindMarkers(combined, ident.1 = c("9_G171C"), ident.2 = c("9_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust10 <- FindMarkers(combined, ident.1 = c("10_G171C"), ident.2 = c("10_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)
Combined_G171C_vs_G171B_Clust11 <- FindMarkers(combined, ident.1 = c("11_G171C"), ident.2 = c("11_G171B"), verbose = TRUE, logfc.threshold = FALSE,min.pct = FALSE)





write.csv(Combined_G171C_vs_G171B_Clust1, "CountResult/Markers/Combined_G171C_vs_G171B_Clust1")
write.csv(Combined_G171C_vs_G171B_Clust0, "CountResult/Markers/Combined_G171C_vs_G171B_Clust0")
write.csv(Combined_G171C_vs_G171B_Clust12, "CountResult/Markers/Combined_G171C_vs_G171B_Clust12")
write.csv(Combined_G171C_vs_G171B_Clust2, "CountResult/Markers/Combined_G171C_vs_G171B_Clust2")
write.csv(Combined_G171C_vs_G171B_Clust5, "CountResult/Markers/Combined_G171C_vs_G171B_Clust5")
write.csv(Combined_G171C_vs_G171B_Clust8, "CountResult/Markers/Combined_G171C_vs_G171B_Clust8")

write.csv(Combined_G171C_vs_G171B_Clust3, "CountResult/Markers/Combined_G171C_vs_G171B_Clust3")
write.csv(Combined_G171C_vs_G171B_Clust4, "CountResult/Markers/Combined_G171C_vs_G171B_Clust4")
write.csv(Combined_G171C_vs_G171B_Clust6, "CountResult/Markers/Combined_G171C_vs_G171B_Clust6")
write.csv(Combined_G171C_vs_G171B_Clust7, "CountResult/Markers/Combined_G171C_vs_G171B_Clust7")
write.csv(Combined_G171C_vs_G171B_Clust9, "CountResult/Markers/Combined_G171C_vs_G171B_Clust9")
write.csv(Combined_G171C_vs_G171B_Clust10, "CountResult/Markers/Combined_G171C_vs_G171B_Clust10")
write.csv(Combined_G171C_vs_G171B_Clust11, "CountResult/Markers/Combined_G171C_vs_G171B_Clust11")





PC_vs_PP_G171B <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B"), ident.2 = c("0_G171B","2_G171B"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171B, n = 15)


PC_vs_PP_G171C <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C"), ident.2 = c("0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)


PC_vs_PP_G171BC <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","4_G171C","3_G171C"), ident.2 = c("0_G171B","2_G171B","0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(PC_vs_PP_G171C, n = 15)


lnc5998_KO_DE_1 <- FindMarkers(KO.cells, ident.1 = c("4_G171B","3_G171B","5_G171B"), ident.2 = c("4_G171C","3_G171C","5_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)


lnc5998_KO_DE_2 <- FindMarkers(KO.cells, ident.1 = c("4_G171C","3_G171C","5_G171C"), ident.2 =c("4_G171B","3_G171B","5_G171B") , verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(lnc5998_KO_DE, n = 15)

cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = cell.type.genes, bg.genes = rownames(KO.cells@assays$RNA@dataKO.cells@assays$RNA@data), organism = "Mouse")

cell.type.genes <- (PC_vs_PP_G171BC[1]) # Takes all the unique cell type specific genes
GOterms = topGOterms(fg.genes = rownames(cell.type.genes), bg.genes = rownames(KO.cells@assays$RNA@data), organism = "Mouse")
 
```

```{r}

AvergeExpression2 <- function (object, assays = NULL, features = NULL, return.seurat = FALSE, 
          add.ident = NULL, slot = "data", use.scale = FALSE, use.counts = FALSE, 
          verbose = TRUE, ...) 
{
    
    fxn.average <- switch(EXPR = slot, data = function(x) {
        return(mean(x = x))
    }, mean)
    object.assays <- FilterObjects(object = object, classes.keep = "Assay")
    assays <- assays %||% object.assays
    ident.orig <- Idents(object = object)
    orig.levels <- levels(x = Idents(object = object))
    ident.new <- c()
    if (!all(assays %in% object.assays)) {
        assays <- assays[assays %in% object.assays]
        if (length(assays) == 0) {
            stop("None of the requested assays are present in the object")
        }
        else {
            warning("Requested assays that do not exist in object. Proceeding with existing assays only.")
        }
    }
    if (!is.null(x = add.ident)) {
        new.data <- FetchData(object = object, vars = add.ident)
        new.ident <- paste(Idents(object)[rownames(x = new.data)], 
                           new.data[, 1], sep = "_")
        Idents(object, cells = rownames(new.data)) <- new.ident
    }
    data.return <- list()
    for (i in 1:length(x = assays)) {
        data.use <- GetAssayData(object = object, assay = assays[i], 
                                 slot = slot)
        features.assay <- features
        if (length(x = intersect(x = features, y = rownames(x = data.use))) < 
            1) {
            features.assay <- rownames(x = data.use)
        }
        data.all <- data.frame(row.names = features.assay)
        for (j in levels(x = Idents(object))) {
            temp.cells <- WhichCells(object = object, idents = j)
            features.assay <- unique(x = intersect(x = features.assay, 
                                                   y = rownames(x = data.use)))
            if (length(x = temp.cells) == 1) {
                data.temp <- (data.use[features.assay, temp.cells])
                if (slot == "data") {
                    data.temp <-  data.temp
                }
            }
            if (length(x = temp.cells) > 1) {
                data.temp <- apply(X = data.use[features.assay, 
                                                temp.cells, drop = FALSE], MARGIN = 1, FUN = fxn.average)
            }
            data.all <- cbind(data.all, data.temp)
            colnames(x = data.all)[ncol(x = data.all)] <- j
            if (verbose) {
                message(paste("Finished averaging", assays[i], 
                              "for cluster", j))
            }
            if (i == 1) {
                ident.new <- c(ident.new, as.character(x = ident.orig[temp.cells[1]]))
            }
        }
        names(x = ident.new) <- levels(x = Idents(object))
        data.return[[i]] <- data.all
        names(x = data.return)[i] <- assays[[i]]
    }
    if (return.seurat) {
        toRet <- CreateSeuratObject(counts = data.return[[1]], 
                                    project = "Average", assay = names(x = data.return)[1], 
                                    ...)
        if (length(x = data.return) > 1) {
            for (i in 2:length(x = data.return)) {
                toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]])
            }
        }
        if (DefaultAssay(object = object) %in% names(x = data.return)) {
            DefaultAssay(object = toRet) <- DefaultAssay(object = object)
        }
        Idents(toRet, cells = colnames(x = toRet)) <- ident.new[colnames(x = toRet)]
        Idents(object = toRet) <- factor(x = Idents(object = toRet), 
                                         levels = as.character(x = orig.levels), ordered = TRUE)
        toRet <- NormalizeData(object = toRet, verbose = verbose)
        toRet <- ScaleData(object = toRet, verbose = verbose)
        return(toRet)
    }
    else {
        return(data.return)
    }
}
```



Find differential expression markers

```{r}
combined.markers <- FindAllMarkers(object = combined, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
combined.markers %>% group_by(cluster) %>% top_n(2, avg_logFC)

KO.markers <- FindAllMarkers(object = KO.cells, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

response3 <- FindMarkers(combined, ident.1 = c("1_G171B","0_G171B","2_G171B"), ident.2 = c("1_G171C", "0_G171C","2_G171C"), verbose = TRUE, test.use = "MAST", logfc.threshold = FALSE,min.pct = FALSE)
head(response3, n = 15)

```




Visualize top genes in principal components

```{r, echo=FALSE, fig.height=4, fig.width=8}
PCHeatmap(object = tiss1, pc.use = 1:3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
```

Later on (in FindClusters and TSNE) you will pick a number of principal components to use. This has the effect of keeping the major directions of variation in the data and, ideally, supressing noise. There is no correct answer to the number to use, but a decent rule of thumb is to go until the plot plateaus.

```{r}
PCElbowPlot(object = tiss1)
```

Choose the number of principal components to use.
```{r}
# Set number of principal components. 
n.pcs = 10
```

The clustering is performed based on a nearest neighbors graph. Cells that have similar expression will be joined together. The Louvain algorithm looks for groups of cells with high modularity--more connections within the group than between groups. The resolution parameter determines the scale. Higher resolution will give more clusters, lower resolution will give fewer.

For the top-level clustering, aim to under-cluster instead of over-cluster. It will be easy to subset groups and further analyze them below.

```{r}
# Set resolution 
res.used <- 4
tiss1 <- FindClusters(object = tiss1, reduction.type = "pca", dims.use = 1:n.pcs, 
    resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
```

We use TSNE solely to visualize the data.
```{r}
# If cells are too spread out, you can raise the perplexity. If you have few cells, try a lower perplexity (but never less than 10).
tiss1 <- RunTSNE(object = tiss1, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
```

```{r}
TSNEPlot(object = tiss1, do.label = T, pt.size = 1.2, label.size = 4)
```
## Compare to previous annotations
```{r}
previous_annotation = read.csv("/Users/kkarri/Documents/Lab/Single_cell_project/dropseq/Liver_droplet_annotation.csv", stringsAsFactors = FALSE)
cols = c('free_annotation', 'cell_ontology_class')
    for (col in cols){
      previous_col = paste0('previous_', col)
      tiss1@meta.data[, previous_col] <- "NA"
      tiss1@meta.data[as.character(previous_annotation$X), previous_col] <- previous_annotation[, col]
      print(table(tiss1@meta.data[, previous_col]))
      print(table(tiss1@meta.data[, previous_col], tiss@ident))
      
    }
    
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss@ident)
```


```{r}
tiss1 = compare_previous_annotation(tiss1, tissue_of_interest, "droplet")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "previous_cell_ontology_class")
table(tiss1@meta.data[, "previous_cell_ontology_class"], tiss1@ident)
```


```{r}
TSNEPlot(tiss1, group.by="mouse.sex")
TSNEPlot(tiss1, group.by="mouse.id")
```


Significant genes:

hepatocyte: Alb, Ttr, Apoa1, and Serpina1c
pericentral: Cyp2e1, Glul, Oat, Gulo
midlobular: Ass1, Hamp, Gstp1, Ubb
periportal: Cyp2f2, Pck1, Hal, Cdh1

endothelial cells: Pecam1, Nrp1, Kdr+ and Oit3+
Kuppfer cells: Emr1, Clec4f, Cd68, Irf7
NK/NKT cells: Zap70, Il2rb, Nkg7, Cxcr6, Klr1c, Gzma
B cells: Cd79a, Cd79b, Cd74 and Cd19
Immune cells: Ptprc




```{r, echo=FALSE, fig.height=16, fig.width=12}
# Hepatic marker
FeaturePlot(tiss1, c(genes_hep), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# Endothelial markers
FeaturePlot(tiss1, c(genes_endo), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# Kupffer cells
FeaturePlot(tiss1, c(genes_kuppfer), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes_nk
FeaturePlot(tiss1, c(genes_nk), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes_b
FeaturePlot(tiss1, c(genes_b), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes bile duct endo cells
FeaturePlot(tiss1, c(genes_bec), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))

# genes immune
FeaturePlot(tiss1, c(genes_immune), pt.size = 1, nCol = 4, cols.use = c("grey", "red"))


```

Dotplots let you see the intensity of exppression and the fraction of cells expressing for each of your genes of interest.
The radius shows you the percent of cells in that cluster with at least one read sequenced from that gene. The color level indicates the average
Z-score of gene expression for cells in that cluster, where the scaling is done over taken over all cells in the sample.

#We have various immune cell types in the last cluster
```{r, echo=FALSE, fig.height=4, fig.width=10}
DotPlot(tiss1, c(genes_kuppfer, genes_nk, genes_b, "Ptprc"), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
```

```{r, echo=FALSE, fig.height=8, fig.width=10}
DotPlot(tiss1, c(genes_hep_main, genes_endo, genes_nk, genes_kuppfer, genes_bec_b_immune), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
```

Using the markers above, we can confidentaly label many of the clusters:

19: endothelial cells
20: bile duct epithelial cells
21: immune cells
rest are hepatocytes

We will add those cell_ontology_classes to the dataset.

```{r}
tiss1 <- StashIdent(object = tiss1, save.name = "cluster.ids")
cluster.ids <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
free_annotation <- c(
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  NA,
  "bile duct epithelial cells",
  "endothelial cell of hepatic sinusoid",
  NA
  )
cell_ontology_class <- c(
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "hepatocyte",
  "duct epithelial cell",
  "endothelial cell of hepatic sinusoid",
  "hepatocyte")
tiss1 = stash_annotations(tiss1, cluster.ids, free_annotation, cell_ontology_class)
```

## Checking for batch effects

Color by metadata, like plate barcode, to check for batch effects.
```{r}
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "channel")
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "free_annotation")

```

## Subcluster

Let's drill down on the hepatocytes.

```{r}
subtiss1 = SubsetData(tiss1, ident.use = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20))


```

```{r}
subtiss1 <- subtiss1 %>% ScaleData() %>%
  FindVariableGenes(do.plot = FALSE, x.high.cutoff = Inf, y.cutoff = 0.5) %>%
  RunPCA(do.print = FALSE)
```

```{r}
PCHeatmap(object = subtiss1, pc.use = 1:3, cells.use = 20, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
PCElbowPlot(subtiss1)
```


```{r}
sub.n.pcs = 8
sub.res.use = 0.5
subtiss1 <- subtiss1 %>% FindClusters(reduction.type = "pca", dims.use = 1:sub.n.pcs,
    resolution = sub.res.use, print.output = 0, save.SNN = TRUE, force.recalc = TRUE) %>%
    RunTSNE(dims.use = 1:sub.n.pcs, seed.use = 10, perplexity=8)
TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4)
```

```{r, echo=FALSE, fig.height=25, fig.width=25}
FeaturePlot(subtiss1, genes_hep,cols.use = c("grey", "red"), pt.size = 4, nCol = 4)
```

```{r, echo=FALSE, fig.height=8, fig.width=10}
DotPlot(subtiss1, all_genes, col.max = 2.5, plot.legend = T, do.return = T) + coord_flip()
```

```{r}
BuildClusterTree(subtiss1)
```

```{r, echo=FALSE, fig.height=10, fig.width=8}
#female genes have lower expression in cluster 6 relative to other female clusters, especally Xist
FeaturePlot(subtiss1,c('Mup20', 'Mup1','Mup12', 'Mup21', 'Cyp2d9', 'Xist', 'A1bg', 'Cyp2c69'),cols.use = c("grey", "red"), pt.size = 3, nCol = 2)

DotPlot(tiss1,c('Mup20', 'Mup1','Mup12', 'Mup21', 'Cyp2d9', 'Xist', 'A1bg', 'Cyp2c69'), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()

```


From these genes, it appears that the clusters represent:

0: midlobular male
1: pericentral female
2: periportal female
3: periportal male
4: midlobular male
5: pericentral male
6: midlobular female
7: midlobular female

The multitude of clusters of each type correspond mostly to individual animals/sexes.

```{r}
table(FetchData(subtiss1, c('mouse.sex','ident')) %>% droplevels())
```

```{r}
sub.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6, 7)
sub.free_annotation <- c("periportal female", "midlobular male", "pericentral female", "periportal male", "midlobular male", "pericentral male", "midlobular female", "midlobular female")
sub.cell_ontology_class <- c("hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte", "hepatocyte")
subtiss1 = stash_annotations(subtiss1, sub.cluster.ids, sub.free_annotation, sub.cell_ontology_class)
tiss1 = stash_subtiss_in_tiss(tiss1, subtiss1)
```

Liver zonation markers

```{r}
genes_zones = c('Cyp2e1', 'Glul', 'Oat', 'Gulo',
              'Ass1', 'Hamp', 'Gstp1', 'Ubb',
              'Cyp2f2', 'Pck1', 'Hal', 'Cdh1')

FeaturePlot(subtiss1,c(genes_zones),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)

DotPlot(subtiss1,c(genes_zones), plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()


TSNEPlot(object = subtiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")

TSNEPlot(object = tiss1, do.label = T, pt.size = 1, label.size = 4, group.by="free_annotation")

```



##########
Find cluster markers for lncRNAs
```{r}

MIN_LOGFOLD_CHANGE = 1 # set to minimum required average log fold change in gene expression.
MIN_PCT_CELLS_EXPR_GENE = 0.1

all.markers = FindAllMarkers(tiss1,
                             min.pct = MIN_PCT_CELLS_EXPR_GENE,
                             logfc.threshold = MIN_LOGFOLD_CHANGE,
                             only.pos = TRUE,
                             test.use="bimod") # likelihood ratio test
lnc_all_markers <- grep(pattern = "^ncRNA", x= rownames(all.markers), value = TRUE)
lnc_all_markers

#[1] "ncRNA_inter_chr10_92081" "ncRNA_intra_chr16_13383" "ncRNA_inter_chr17_13605" "ncRNA_inter_chr14_11815"
#[5] "ncRNA_inter_chr18_14344"

FeaturePlot(subtiss1,c(lnc_all_markers),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)

######################### lncRNA markers- CELL TYPE MARKER ############
markers.hep <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
lnc_markers_hep <- grep(pattern = "^ncRNA", x= rownames(markers.hep), value = TRUE)
lnc_markers_hep
FeaturePlot(tiss1,c(lnc_markers_hep),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_hep, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()
#[1] "ncRNA_as_chr11_9423"     "ncRNA_as_chr7_6166"      "ncRNA_inter_chr4_3295"   "ncRNA_inter_chr17_14026"
#[5] "ncRNA_inter_chr3_2915"   "ncRNA_inter_chr5_4547"   "ncRNA_inter_chr15_12684"


markers.hep.MAST <- FindMarkers(object = tiss1, ident.1 = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20), ident.2 = c(18,19),only.pos = TRUE, test.use = "MAST")
lnc_markers_hep_MAST_TABLE <- subset(markers.hep.MAST, grepl("^ncRNA", rownames(markers.hep.MAST)))
lnc_markers_hep_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.hep.MAST), value = TRUE)
lnc_markers_hep_MAST



markers.endo <- FindMarkers(object = tiss1, ident.1 = c(18,19),  only.pos = TRUE, min.pct = 0.25, thresh.use = 0.5)
lnc_markers_endo <- grep(pattern = "^ncRNA", x= rownames(markers.endo), value = TRUE)
lnc_markers_endo
FeaturePlot(tiss1,c(lnc_markers_endo),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)
DotPlot(tiss1,lnc_markers_endo, plot.legend = T, col.max = 2.5, do.return = T) + coord_flip()


#"ncRNA_inter_chr15_12770", "ncRNA_inter_chr12_10817", "ncRNA_as_chr13_11451",


markers.endo.MAST <- FindMarkers(object = tiss1, ident.1 = 19, test.use = "MAST" ,only.pos = TRUE)
lnc_markers_endo_MAST_TABLE <- subset(markers.endo.MAST, grepl("^ncRNA", rownames(markers.endo.MAST)))
lnc_markers_endo_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.endo.MAST), value = TRUE)
lnc_markers_endo_MAST


################## lncRNA expression ########################3

# "ncRNA_inter_chr17_13605" , "ncRNA_intra_chr16_13383"

########## Periporal markers- zonation markers ############
markers.pc <- FindMarkers(object = subtiss1, ident.1 = c(2,5), 
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.001, test.use = "bimod" )

markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc <- subset(markers.pc, grepl("^ncRNA", rownames(markers.pc)))
lnc_markers_pc <- grep(pattern = "^ncRNA", x= rownames(markers.pc), value = TRUE)
lnc_markers_pc 

markers.pc.MAST <- FindMarkers(object = subtiss1, ident.1 = c(2,5), ident.2 = c(0,3), test.use = "MAST" ,only.pos = TRUE)
lnc_markers_pc_MAST <- subset(markers.pc.MAST, grepl("^ncRNA", rownames(markers.pc.MAST)))
lnc_markers_pc_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pc.MAST), value = TRUE)
lnc_markers_pc_MAST

DotPlot(tiss1, lnc_markers_pc, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()
FeaturePlot(subtiss1,c(lnc_markers_pc),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)


############################### midlobular genes #############

markers.mid <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7), 
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)

lnc_markers_mid <- subset(markers.mid, grepl("^ncRNA", rownames(markers.mid)))
lnc_markers_mid <- grep(pattern = "^ncRNA", x= rownames(markers.mid), value = TRUE)
lnc_markers_mid
DotPlot(tiss1, lnc_markers_mid, plot.legend = T, col.max = 2.5, do.return = T, group.by="free_annotation") + coord_flip()


FeaturePlot(subtiss1,c(lnc_markers_mid),cols.use = c("grey", "red"), pt.size = 1, nCol = 4)



markers.mid.MAST <- FindMarkers(object = subtiss1, ident.1 = c(1,4,6,7),test.use = "MAST",only.pos = TRUE )

lnc_markers_mid_MAST_TABLE <- subset(markers.mid.MAST, grepl("^ncRNA", rownames(markers.mid.MAST)))
lnc_markers_mid_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.mid.MAST), value = TRUE)
lnc_markers_mid_MAST


#####3 periportalmarker genes############3

markers.pp <- FindMarkers(object = subtiss1, ident.1 = c(0,3),
                              only.pos = FALSE, min.pct = 0.001, thresh.use = 0.05)


lnc_markers_pp <- subset(markers.pp, grepl("^ncRNA", rownames(markers.pp)))
lnc_markers_pp <- grep(pattern = "^ncRNA", x= rownames(markers.pp), value = TRUE)
lnc_markers_pp

markers.pp.MAST <- FindMarkers(object = subtiss1, ident.1 = c(0,3), ident.2 = c(2,5),test.use = "MAST",only.pos = TRUE )

lnc_markers_pp_MAST_TABLE <- subset(markers.pp.MAST, grepl("^ncRNA", rownames(markers.pp.MAST)))
lnc_markers_pp_MAST <- grep(pattern = "^ncRNA", x= rownames(markers.pp.MAST), value = TRUE)
lnc_markers_pp_MAST

FeaturePlot(subtiss1,c(lnc_markers_pp),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_pp,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()


################## amle and female specific ############################

markers.female <- FindMarkers(object = subtiss1, ident.1 = c(0,2,6,7),
                              only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1)

lnc_markers_female <- subset(markers.female, grepl("^ncRNA", rownames(markers.female)))
lnc_markers_female <- grep(pattern = "^ncRNA", x= rownames(markers.female), value = TRUE)
lnc_markers_female

FeaturePlot(subtiss1,c(lnc_markers_female),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_female,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()



markers.male <- FindMarkers(object = subtiss1, ident.1 = c(1,3,4,5),
                              only.pos = TRUE, min.pct = 0.001, thresh.use = 0.05)

lnc_markers_male <- subset(markers.male, grepl("^ncRNA", rownames(markers.male)))
lnc_markers_male <- grep(pattern = "^ncRNA", x= rownames(markers.male), value = TRUE)
lnc_markers_male

FeaturePlot(subtiss1,c(lnc_markers_male),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)
DotPlot(tiss1, c(lnc_markers_male,"Cyp2e1","Cyp2f2"), plot.legend = T, col.max = 2.5, do.return = T, group.by= "free_annotation") + coord_flip()


############################ Female zonate specific genes ###################################

markers.pericentral.female <- FindMarkers(object = tiss1, ident.1 = c(6,11,14,20), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,12,13,8,5,16), logfc.threshold = 1)

markers.periportal.female <- FindMarkers(object = tiss1, ident.1 = c(2,3,15),
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(6,11,14,20,12,13,8,5,16), logfc.threshold = 1)


markers.pericentral.male <- FindMarkers(object = tiss1, ident.1 = c(13,12), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,8,5,16,6,11,14,20), logfc.threshold = 1)


markers.periportal.male <- FindMarkers(object = tiss1, ident.1 = c(8,5,16), test.use = "MAST",
                            only.pos = TRUE, min.pct = 0.1, ident.2 = c(2,3,15,13,12,6,11,14,20), logfc.threshold = 1)




############### xeno-lncs CAR?RXR ##################

FeaturePlot(tiss1,c("ncRNA_inter_chr15_12684","ncRNA_inter_chr8_7430","ncRNA_inter_chr7_6222"),cols.use = c("grey", "red"), pt.size = 1, nCol = 4, max.cutoff = 1)






#######################################################################

markers.endo.2 <- FindMarkers(object = seurat_drop, logfc.threshold = 2,ident.1 = "Endothelial", 
                              only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

lnc.endo.2 <- grep(pattern = "^ncRNA", x= rownames(markers.endo.2), value = TRUE)
lnc.endo.2





```



Zonated lncRNAs 

```{r}
pp_zontaed <- c('ncRNA_inter_chr14_12016','ncRNA_as_chr19_15090','ncRNA_inter_chr10_9351','ncRNA_inter_chr16_13170',
'ncRNA_inter_chr3_2697','ncRNA_inter_chr1_274','ncRNA_as_chr6_5518','ncRNA_inter_chr14_12066','ncRNA_intra_chr12_10871',
'ncRNA_inter_chr16_13510','ncRNA_inter_chr3_2314','ncRNA_inter_chr10_9264,'ncRNA_inter_chr9_8122')





```

## Checking for batch effects

Color by metadata, like plate barcode, to check for batch effects.
```{r}
TSNEPlot(object = subtiss1, do.return = TRUE, group.by = "mouse.sex")

```

# Final coloring

Color by cell ontology class on the original TSNE.

```{r}
TSNEPlot(object = tiss1, do.return = TRUE, group.by = "cell_ontology_class")
```

# Save the Robject for later

```{r}
filename = here('00_data_ingest', '04_tiss1ue_robj_generated', 
                     paste0("droplet_", tiss1ue_of_interest, "refinedcells_seurat_tiss1.Robj"))
print(filename)
save(tiss1, file=filename)
```

```{r}
# To reload a saved object
filename = here('00_data_ingest', '04_tiss1ue_robj_generated',
                      paste0("droplet_", tissue_of_interest, "seurat_smartdrop-integrated-8272019.Robj"))
load(file=filename)
```


# Export the final metadata


```{r}
save_annotation_csv(tiss1, tiss1ue_of_interest, "droplet")
```
