library(pathfindR) library(biomaRt) setwd('~/Documents/desktop-tutorial/Sonja/SAN_GSE30936179/pathway') # getPlot_human = function(de,color='grey'){ # output_df <- run_pathfindR(de) # sig = output_df[grep('signaling',output_df$Term_Description),c('Term_Description','highest_p')] # #row.names(sig) = sig$Term_Description # sig$log10p = -log10(sig$highest_p) # sig = sig[order(sig$log10p),] # par(mar=c(3,15,2,2)) # barplot(sig$log10p,horiz=T,names.arg=sig$Term_Description,las=2,cex.names =0.5,col = color) # return(output_df) # } load("/Users/zhezhenwang/Documents/desktop-tutorial/Sonja/frCarlos/HTseq_DESeq_LA_padj0.05LFC0.5.rData") de = subset(resHTdata,abs(log2FoldChange)>0.5 & padj<0.05) depawy = getPlot_human(de[,c('gene_id','log2FoldChange','padj')]) justPlot = function(output_df,color='grey'){ sig = output_df[grep('signaling',output_df$Term_Description),c('Term_Description','highest_p')] #row.names(sig) = sig$Term_Description sig$log10p = -log10(sig$highest_p) sig = sig[order(sig$log10p),] par(mar=c(3,15,2,2)) barplot(sig$log10p,horiz=T,names.arg=sig$Term_Description,las=2,cex.names =0.5,col = color) } # load("/Users/zhezhenwang/Documents/desktop-tutorial/Sonja/SAN_GSE30936179/Tbx5KO_de_pathway_human.rData") # justPlot(depawy,color = 'purple') # save(depawy,file='Tbx5KO_de_pathway_human.rData') # dev.copy2pdf(file='Tbx5KO_de_pathway_human.pdf') # # up = subset(resHTdata,log2FoldChange>0.5 & padj<0.05) # uppawy = getPlot_human(up[,c('gene_id','log2FoldChange','padj')]) # save(uppawy,file='Tbx5KO_up_pathway_human.rData') # justPlot(uppawy,color = 'darkviolet') # dev.copy2pdf(file='Tbx5KO_up_pathway_human.pdf') # # set.seed(123) # down = subset(resHTdata,log2FoldChange< -0.5 & padj<0.05) # dnpawy = getPlot_human(down[,c('gene_id','log2FoldChange','padj')],color = 'mediumpurple') # save(dnpawy,file='Tbx5KO_dn_pathway_human.rData') # dev.copy2pdf(file='Tbx5KO_dn_pathway_human.pdf') ###! compared and found mouse pathways are better #### tbx5_mm=read.table('~/Documents/desktop-tutorial/Sonja/SAN_GSE30936179/pathway/Tbx5KO_de_pathway.txt') justPlot(tbx5_mm,color = 'purple') san_mm=read.table('~/Documents/desktop-tutorial/Sonja/SAN_GSE30936179/pathway/SAN_de_pathway.txt') justPlot(san_mm,color = 'gold') #### previous result # gsets_list <- get_gene_sets_list(source = "KEGG", # org_code = "mmu") # mmu_kegg_genes <- gsets_list$gene_sets # mmu_kegg_descriptions <- gsets_list$descriptions ## Save both as RDS files for later use #saveRDS(mmu_kegg_genes, "mmu_kegg_genes.RDS") #saveRDS(mmu_kegg_descriptions, "mmu_kegg_descriptions.RDS") mmu_kegg_genes <- readRDS("mmu_kegg_genes.RDS") mmu_kegg_descriptions <- readRDS("mmu_kegg_descriptions.RDS") url <- "https://stringdb-static.org/download/protein.links.v11.0/10090.protein.links.v11.0.txt.gz" path2file <- file.path(tempdir(check = TRUE), "STRING.txt.gz") download.file(url, path2file) ## read STRING pin file mmu_string_df <- read.table(path2file, header = TRUE) ## filter using combined_score cut-off value of 800 mmu_string_df <- mmu_string_df[mmu_string_df$combined_score >= 800, ] ## fix ids mmu_string_pin <- data.frame(Interactor_A = sub("^10090\\.", "", mmu_string_df$protein1), Interactor_B = sub("^10090\\.", "", mmu_string_df$protein2)) head(mmu_string_pin, 2) mmu_ensembl <- useMart("ensembl", dataset = "mmusculus_gene_ensembl") converted <- getBM(attributes = c("ensembl_peptide_id", "mgi_symbol"), filters = "ensembl_peptide_id", values = unique(unlist(mmu_string_pin)), mart = mmu_ensembl) mmu_string_pin$Interactor_A <- converted$mgi_symbol[match(mmu_string_pin$Interactor_A, converted$ensembl_peptide_id)] mmu_string_pin$Interactor_B <- converted$mgi_symbol[match(mmu_string_pin$Interactor_B, converted$ensembl_peptide_id)] mmu_string_pin <- mmu_string_pin[!is.na(mmu_string_pin$Interactor_A) & !is.na(mmu_string_pin$Interactor_B), ] mmu_string_pin <- mmu_string_pin[mmu_string_pin$Interactor_A != "" & mmu_string_pin$Interactor_B != "", ] head(mmu_string_pin, 2) # remove self interactions self_intr_cond <- mmu_string_pin$Interactor_A == mmu_string_pin$Interactor_B mmu_string_pin <- mmu_string_pin[!self_intr_cond, ] # remove duplicated inteactions (including symmetric ones) mmu_string_pin <- unique(t(apply(mmu_string_pin, 1, sort))) # this will return a matrix object mmu_string_pin <- data.frame(A = mmu_string_pin[, 1], pp = "pp", B = mmu_string_pin[, 2]) path2SIF <- file.path('~/Documents/desktop-tutorial/Sonja/SAN_GSE30936179', "mmusculusPIN.sif") write.table(mmu_string_pin, file = path2SIF, col.names = FALSE, row.names = FALSE, sep = "\t", quote = FALSE) path2SIF <- normalizePath(path2SIF) de = subset(resHTdata,abs(log2FoldChange)>0.5 & padj<0.05) # dim(output_df) # [1] 158 9 getPlot = function(de){ output_df <- run_pathfindR(de,convert2alias = FALSE, gene_sets = "Custom",custom_genes = mmu_kegg_genes, custom_descriptions = mmu_kegg_descriptions,pin_name_path = path2SIF) sig = output_df[grep('signaling',output_df$Term_Description),c('Term_Description','highest_p')] #row.names(sig) = sig$Term_Description sig$log10p = log10(sig$highest_p) sig = sig[order(sig$log10p),] par(mar=c(3,15,2,2)) barplot(sig$log10p,horiz=T,names.arg=sig$Term_Description,las=2,cex.names =0.5) # clustered_df <- cluster_enriched_terms(output_df) #kegg <- get_kegg(species = "mmu", path = "~/Documents/desktop-tutorial/Sonja/data/") #combine_pathfindR_results return(output_df) } # up = subset(resHTdata,log2FoldChange>0.5 & padj<0.05) # write.table(up,file = 'Tbx5KO_up_pathway.txt',sep='\t') # uppawy = getPlot(up[,c('gene_id','log2FoldChange','padj')]) # dev.copy2pdf(file = 'Tbx5KO_up_pathway.pdf') # # down = subset(resHTdata,log2FoldChange< -0.5 & padj<0.05) # downpawy = getPlot(down[,c('gene_id','log2FoldChange','padj')]) # write.table(down,file = 'Tbx5KO_down_pathway.txt',sep='\t') # dev.copy2pdf(file = 'Tbx5KO_down_pathway.pdf') # # nodalref = read.table('~/Documents/desktop-tutorial/Sonja/data/nodal_Srivastava/305913r1_online_table_ii.txt',sep = '\t',head = T) # nodalref = nodalref[-1,] # colnames(nodalref)[2:3] = c('log2FC','log2CPM') # # up = subset(nodalref,log2FC>0.5 & FDR<0.05) # write.table(up,file = 'Tbx5KO_up_pathway.txt',sep='\t') # uppawy = getPlot(up[,c('Gene','log2FC','FDR')]) # dev.copy2pdf(file = 'SAN_up_pathway.pdf') # # down = subset(nodalref,log2FC< -0.5 & FDR<0.05) # downpawy = getPlot(down[,c('Gene','log2FC','FDR')]) # write.table(down,file = 'SAN_down_pathway.txt',sep='\t') # dev.copy2pdf(file = 'SAN_down_pathway.pdf') depawy = getPlot(de[,c('gene_id','log2FoldChange','padj')]) write.table(depawy,file = 'Tbx5KO_de_pathway.txt',sep='\t') # dev.copy2pdf(file = 'Tbx5KO_de_pathway.pdf') depawy_hm = getPlot_human(de[,c('gene_id','log2FoldChange','padj')]) nodalref = read.table('~/Documents/desktop-tutorial/Sonja/data/nodal_Srivastava/305913r1_online_table_ii.txt',sep = '\t',head = T) nodalref = nodalref[-1,] colnames(nodalref)[2:3] = c('log2FC','log2CPM') de = subset(nodalref,abs(log2FC)>0.5 & FDR<0.05) depawy = getPlot(de[,c('Gene','log2FC','FDR')]) write.table(depawy,file = 'SAN_de_pathway.txt',sep='\t') dev.copy2pdf(file = 'SAN_de_pathway.pdf')