miRNA GFF to entrez gene
Created: 2011-01-26 11:10:40
Last updated: 2012-01-11 14:32:54
This workflow reads a GFF file of miRNA cooridinates and uses BioMart to search human ensemble genes for the gene that codes for the miRNA. The workflow returns a list of miRNAid, chromosome, start, stop, strand, entrez gene id, gene name, gene strand. Example input file here: ftp://mirbase.org/pub/mirbase/CURRENT/genomes/hsa.gff
Preview
Run
Run this Workflow in the Taverna Workbench...
Workflow Components
Authors (0)
Titles (0)
Descriptions (0)
Dependencies (0)
Inputs (1)
Name |
Description |
Beanshell_url |
|
Processors (5)
Name |
Type |
Description |
get_File_from_Web |
beanshell |
ScriptURL inputURL = null;
if (base != void) {
inputURL = new URL(new URL(base), url);
}
else {
inputURL = new URL(url);
}
URLConnection con = inputURL.openConnection();
InputStream in = con.getInputStream();
InputStreamReader isr = new InputStreamReader(in);
Reader inReader = new BufferedReader(isr);
StringBuffer buf = new StringBuffer();
int ch;
while ((ch = inReader.read()) > -1) {
buf.append((char)ch);
}
inReader.close();
contents = buf.toString();
|
split_mirbase_GFF |
beanshell |
Scriptimport java.util.regex.Matcher;
import java.util.regex.Pattern;
String name_input = file;
String[] name_array = name_input.split("\n");
chr = new ArrayList();
start = new ArrayList();
end = new ArrayList();
miRNAid = new ArrayList();
region = new ArrayList();
strand = new ArrayList();
//ACC="MI0006363"; ID="hsa-mir-1302-2";
Pattern p = Pattern.compile("ID=\"(.*)\"");
for(int line = 0; line < name_array.length; line++)
{
String temp = name_array[line];
if (!temp.startsWith("#")) {
String [] cols = temp.split("\t");
chr.add(cols[0]);
start.add(cols[3]);
end.add(cols[4]);
strand.add(cols[6]);
String r;
//if (cols[6].startsWith("+")) {
// r = cols[0] + ":" + cols[3] + ":" + cols[4] + ":1";
// }//
//else {
// r = cols[0] + ":" + cols[3] + ":" + cols[4] + ":-1";
// }
r = cols[0] + ":" + cols[3] + ":" + cols[4];
region.add(r);
String t = cols[8];
Matcher m = p.matcher(t);
boolean matchFound = m.find();
if (matchFound) {
// Get all groups for this match
miRNAid.add(m.group(1));
}
}
}
|
hsapiens_gene_ensembl |
biomart |
|
Beanshell |
beanshell |
Script//output = "";
//if (geneid != null){
// output = mirnaid + "\t" + geneid + "\n";
//}
combined = new ArrayList();
output = new ArrayList();
for (Iterator i = geneid.iterator(); i.hasNext();) {
String item = (String) i.next();
if (item != null) {
combined.add(mirnaid + "\t" + chr + "\t" + start + "\t" + stop + "\t" + strand + "\t" + item + "\t" + genename + "\t" + genestrand + "\n");
}
}
output = combined;
|
Flatten_List |
localworker |
Scriptflatten(inputs, outputs, depth) {
for (i = inputs.iterator(); i.hasNext();) {
element = i.next();
if (element instanceof Collection && depth > 0) {
flatten(element, outputs, depth - 1);
} else {
outputs.add(element);
}
}
}
outputlist = new ArrayList();
flatten(inputlist, outputlist, 1); |
Beanshells (3)
Name |
Description |
Inputs |
Outputs |
get_File_from_Web |
|
url
|
contents
|
split_mirbase_GFF |
|
file
|
chr
start
end
miRNAid
region
strand
|
Beanshell |
|
mirnaid
geneid
chr
start
stop
strand
genestrand
genename
|
output
|
Outputs (4)
Name |
Description |
hsapiens_gene_ensembl_hsapiens_gene_ensembl.entrezgene |
|
Beanshell_output |
|
split_mirbase_GFF_miRNAid |
|
split_mirbase_GFF_region |
|
Datalinks (16)
Source |
Sink |
Beanshell_url |
get_File_from_Web:url |
get_File_from_Web:contents |
split_mirbase_GFF:file |
split_mirbase_GFF:region |
hsapiens_gene_ensembl:hsapiens_gene_ensembl.chromosomal_region_filter |
split_mirbase_GFF:miRNAid |
Beanshell:mirnaid |
hsapiens_gene_ensembl:hsapiens_gene_ensembl.entrezgene |
Beanshell:geneid |
split_mirbase_GFF:chr |
Beanshell:chr |
split_mirbase_GFF:end |
Beanshell:stop |
split_mirbase_GFF:start |
Beanshell:start |
split_mirbase_GFF:strand |
Beanshell:strand |
hsapiens_gene_ensembl:hsapiens_gene_ensembl.strand |
Beanshell:genestrand |
hsapiens_gene_ensembl:hsapiens_gene_ensembl.external_gene_id |
Beanshell:genename |
Beanshell:output |
Flatten_List:inputlist |
hsapiens_gene_ensembl:hsapiens_gene_ensembl.entrezgene |
hsapiens_gene_ensembl_hsapiens_gene_ensembl.entrezgene |
Flatten_List:outputlist |
Beanshell_output |
split_mirbase_GFF:miRNAid |
split_mirbase_GFF_miRNAid |
split_mirbase_GFF:region |
split_mirbase_GFF_region |
Uploader
License
All versions of this Workflow are
licensed under:
Version 1
(of 1)
Credits (0)
(People/Groups)
None
Attributions (0)
(Workflows/Files)
None
Shared with Groups (1)
Featured In Packs (0)
None
Log in to add to one of your Packs
Attributed By (0)
(Workflows/Files)
None
Favourited By (0)
No one
Statistics
Other workflows that use similar services
(0)
There are no workflows in myExperiment that use similar services to this Workflow.
Comments (0)
No comments yet
Log in to make a comment