Script
#a<-unique(biomart$entrezgene)
biomart<-read.table(genes_mapped,header=TRUE, sep="\t")
mat<-matrix(NA, nrow=dim(biomart)[1], ncol=10)
colnames(mat)<-c(colnames(biomart),"promoter_start","promoter_end")
for (i in 1:dim(biomart)[1] ) {
if(biomart[i,3] > 0 ) {
promoter_start<-as.numeric(biomart$transcript_start[i])-upstream
promoter_end<-as.numeric(biomart$transcript_start[i])+downstream
mat[i,]<-c(biomart[i,1],as.character(biomart[i,2]),biomart[i,3],as.character(biomart[i,4]), biomart[i,5],biomart[i,6],biomart[i,7],biomart[i,8],promoter_start,promoter_end)
}
if(biomart[i,3] < 0 ) {
promoter_start<-as.numeric(biomart$transcript_end[i])+upstream
promoter_end<-as.numeric(biomart$transcript_end[i])-downstream
mat[i,]<-c(biomart[i,1],as.character(biomart[i,2]),biomart[i,3],as.character(biomart[i,4]), biomart[i,5],biomart[i,6],biomart[i,7],biomart[i,8],promoter_end,promoter_start)
}
}
write.table(mat, promoter_file, col.names=TRUE, row.names=FALSE,sep="\t")
#dput(mat,promoter_file)
epigen_file<-read.table(epigenetic_file, header=TRUE, sep="\t")
epigen_file[,2]<-gsub("chr","",epigen_file[,2])
genes<-data.frame(mat,stringsAsFactors=FALSE)
library(GenomicRanges)
t1 <- GRanges(seqnames = genes$chromosome_name, ranges = IRanges(as.numeric(genes$promoter_start), as.numeric(genes$promoter_end) , names = genes$entrezgene))
t2 <- GRanges(seqnames = epigen_file[,inter_chr],ranges = IRanges(epigen_file[,inter_start], epigen_file[,inter_end]))
ov1<-findOverlaps(t1,t2,minoverlap=overlap)
print("ovdone")
genes_pvalues<-read.table(gene_expr_file,header=TRUE, sep="\t")
idx<-match(genes$entrezgene[as.matrix(ov1)[,1]],genes_pvalues$gene_id )
overlapping_genes<-genes$entrezgene[as.matrix(ov1)[,1]]
non_overlapping_genes<-genes$entrezgene[-(as.matrix(ov1)[,1])]
list_non_overlapping<-unique(non_overlapping_genes)
list_overlapping<-unique(overlapping_genes)
idx_com<-match(list_non_overlapping,list_overlapping)
idx_non_overlapping_without_overlapping_idx<-which(is.na(idx_com)==TRUE)
list_non_overlapping_new<-list_non_overlapping[idx_non_overlapping_without_overlapping_idx]
idx_overlapping<- match(list_overlapping, genes_pvalues$gene_id)
overlapping_genes_pvals<-genes_pvalues[idx_overlapping,]
#overlapping_genes_pvals_out<- paste("overlapping_genes",Sys.time(),".txt",sep="")
idx_non_overlapping<- match(list_non_overlapping_new, genes_pvalues$gene_id)
non_overlapping_genes_pvals<-genes_pvalues[idx_non_overlapping,]
#non_overlapping_genes_pvals_out<-paste("non_overlapping_genes",Sys.time(),".txt",sep="")
write.table(overlapping_genes_pvals,overlapping_genes_pvals_file, col.names=TRUE, row.names=FALSE,sep="\t")
#dput(overlapping_genes_pvals,overlapping_genes_pvals_file)
#dput(non_overlapping_genes_pvals,non_overlapping_genes_pvals_file)
write.table(non_overlapping_genes_pvals,non_overlapping_genes_pvals_file, col.names=TRUE, row.names=FALSE,sep="\t")
#write.table(overlapping_genes_pvals,overlapping_genes_pvals_out,col.names=TRUE, row.names=FALSE, sep="\t")
#write.table(non_overlapping_genes_pvals,non_overlapping_genes_pvals_out ,col.names=TRUE,row.names=FALSE,sep="\t")
png(filename=figure, height=400, width=400, bg="white");
plot(ecdf(as.numeric((overlapping_genes_pvals[,4]))),xlab="adjusted p-value", main="",ylab= "proportion of genes <= x", col.main="black");
lines(ecdf(as.numeric((non_overlapping_genes_pvals[,4]))), col="red")
legend("bottomright", inset=.05, c("overlapping genes","non_overlapping genes"), lwd=2, lty=c(1, 1, 1, 2), col=c("black","red"))
dev.off()
ks_test<-ks.test(overlapping_genes_pvals[,4],non_overlapping_genes_pvals[,4])
ks_test_unlist<-unlist(ks_test)
ks_D<-ks_test_unlist[[1]]
ks_p<-ks_test_unlist[[2]]
Comments (0)
No comments yet
Log in to make a comment