NCBI Gi to Kegg Pathway Descriptions
Created: 2012-01-12 09:46:31
Last updated: 2013-01-30 13:36:27
This workflow accepts a list of genbank gene identifiers and returns descriptions of gene functions and a list of all pathways each gene is involved in, according to the KEGG database.
Preview
Run
Run this Workflow in the Taverna Workbench...
Workflow Components
Authors (1)
Paul Fisher and Katy Wolstencroft |
Titles (1)
NCBI Gi to Kegg Pathway Descriptions |
Descriptions (1)
This workflow accepts a list of genbank gene identifiers and returns descriptions of gene functions and a list of all pathways each gene is involved in, according to the KEGG database. |
Dependencies (0)
Inputs (1)
Name |
Description |
Gi_numbers |
Accepts a list of genbank gene identifiers without the gi: prefix
|
Processors (14)
Name |
Type |
Description |
regex |
stringconstant |
Value\n |
add_ncbi_to_string |
beanshell |
ScriptString[] split;
Vector nonEmpty = new Vector();
if(input.indexOf("__cr____cn__") != -1) {
split = input.split("__cr____cn__");
} else if(input.indexOf(",") != -1) {
split = input.split(",");
} else {
split = input.split("\n");
}
for (int i = 0; i < split.length; i++)
{
String trimmed = split[i].trim();
nonEmpty.add(trimmed);
}
String output = "";
for (int i = 0; i < nonEmpty.size(); i++)
{
output = output + "ncbi-gi:" + (String) (nonEmpty.elementAt(i) + "\n");
} |
extract_gene_ids |
beanshell |
ScriptString[] split = input.split("\n");
Vector nonEmpty = new Vector();
for (int i = 0; i < split.length; i++)
{
String trimmed = split[i].trim();
String[] trimmedSplit = trimmed.split("\t");
if (trimmedSplit.length > 2)
{
nonEmpty.add(trimmedSplit[1].trim());
}
}
String output = "";
for (int i = 0; i < nonEmpty.size(); i++)
{
output = output + (String) (nonEmpty.elementAt(i) + "\n");
} |
remove_nulls_2 |
beanshell |
ScriptString[] split = input.split("\n");
Vector nonEmpty = new Vector();
for (int i = 0; i < split.length; i++){
if (!(split[i].equals("")))
{
nonEmpty.add(split[i].trim());
}
}
String[] non_empty = new String[nonEmpty.size()];
for (int i = 0; i < non_empty.length; i ++)
{
non_empty[i] = nonEmpty.elementAt(i);
}
String output = "";
for (int i = 0; i < non_empty.length; i++)
{
output = output + (String) (non_empty[i] + "\n");
} |
remove_Nulls |
beanshell |
ScriptString[] split = input.split("\n");
Vector nonEmpty = new Vector();
for (int i = 0; i < split.length; i++){
if (!(split[i].equals("")))
{
nonEmpty.add(split[i].trim());
}
}
String[] non_empty = new String[nonEmpty.size()];
for (int i = 0; i < non_empty.length; i ++)
{
non_empty[i] = nonEmpty.elementAt(i);
}
String output = "";
for (int i = 0; i < non_empty.length; i++)
{
output = output + (String) (non_empty[i] + "\n");
} |
btit1 |
wsdl |
Wsdlhttp://soap.genome.jp/KEGG.wsdlWsdl Operationbtit |
btit |
wsdl |
Wsdlhttp://soap.genome.jp/KEGG.wsdlWsdl Operationbtit |
bconv |
wsdl |
Wsdlhttp://soap.genome.jp/KEGG.wsdlWsdl Operationbconv |
get_pathways_by_genes |
wsdl |
Wsdlhttp://soap.genome.jp/KEGG.wsdlWsdl Operationget_pathways_by_genes |
merge_pathways |
localworker |
ScriptString seperatorString = "\n";
if (seperator != void) {
seperatorString = seperator;
}
StringBuffer sb = new StringBuffer();
for (Iterator i = stringlist.iterator(); i.hasNext();) {
String item = (String) i.next();
sb.append(item);
if (i.hasNext()) {
sb.append(seperatorString);
}
}
concatenated = sb.toString();
|
Remove_duplicate_pathways |
localworker |
ScriptList strippedlist = new ArrayList();
for (Iterator i = stringlist.iterator(); i.hasNext();) {
String item = (String) i.next();
if (strippedlist.contains(item) == false) {
strippedlist.add(item);
}
}
|
split_by_regex |
localworker |
ScriptList split = new ArrayList();
if (!string.equals("")) {
String regexString = ",";
if (regex != void) {
regexString = regex;
}
String[] result = string.split(regexString);
for (int i = 0; i < result.length; i++) {
split.add(result[i]);
}
}
|
merge_descriptions |
localworker |
ScriptString seperatorString = "\n";
if (seperator != void) {
seperatorString = seperator;
}
StringBuffer sb = new StringBuffer();
for (Iterator i = stringlist.iterator(); i.hasNext();) {
String item = (String) i.next();
sb.append(item);
if (i.hasNext()) {
sb.append(seperatorString);
}
}
concatenated = sb.toString();
|
merge_pathways_2 |
localworker |
ScriptString seperatorString = "\n";
if (seperator != void) {
seperatorString = seperator;
}
StringBuffer sb = new StringBuffer();
for (Iterator i = stringlist.iterator(); i.hasNext();) {
String item = (String) i.next();
sb.append(item);
if (i.hasNext()) {
sb.append(seperatorString);
}
}
concatenated = sb.toString();
|
Beanshells (4)
Name |
Description |
Inputs |
Outputs |
add_ncbi_to_string |
|
input
|
output
|
extract_gene_ids |
|
input
|
output
|
remove_nulls_2 |
|
input
|
output
|
remove_Nulls |
|
input
|
output
|
Outputs (3)
Name |
Description |
gene_descriptions |
Returns a list of gene descriptions from genbank
|
pathway_by_genes |
Returns a list of KEGG pathway identifiers for all pathways the query gene is involved with
|
pathway_descriptions |
Returns a list of KEGG pathway identifiers and descriptions for each pathway the query gene is involved with
|
Datalinks (17)
Source |
Sink |
Gi_numbers |
add_ncbi_to_string:input |
bconv:return |
extract_gene_ids:input |
merge_pathways_2:concatenated |
remove_nulls_2:input |
merge_descriptions:concatenated |
remove_Nulls:input |
merge_pathways:concatenated |
btit1:string |
extract_gene_ids:output |
btit:string |
add_ncbi_to_string:output |
bconv:string |
split_by_regex:split |
get_pathways_by_genes:genes_id_list |
get_pathways_by_genes:return |
merge_pathways:stringlist |
merge_pathways:concatenated |
Remove_duplicate_pathways:stringlist |
regex:value |
split_by_regex:regex |
extract_gene_ids:output |
split_by_regex:string |
btit:return |
merge_descriptions:stringlist |
Remove_duplicate_pathways:strippedlist |
merge_pathways_2:stringlist |
remove_Nulls:output |
gene_descriptions |
remove_nulls_2:output |
pathway_by_genes |
btit1:return |
pathway_descriptions |
Uploader
License
All versions of this Workflow are
licensed under:
Version 2
(of 5)
Credits (1)
(People/Groups)
Attributions (1)
(Workflows/Files)
Shared with Groups (0)
None
Featured In Packs (1)
Log in to add to one of your Packs
Attributed By (2)
(Workflows/Files)
Favourited By (2)
Statistics
Other workflows that use similar services
(0)
There are no workflows in myExperiment that use similar services to this Workflow.
Comments (1)
Log in to make a comment
hi Katy,
if in target value you use "genes" rather than "mmu", it is possible to obtain the KEGG ID even if it is not known the target species
Grettings
Massimo