This workflow filters protein_molecule-labeled terms from an input string(list). The result is a tagged list of proteins (disregarding false positives in the input).
Internal information:
This workflow is a copy of 'filter_protein_molecule_MR3' used for the NBIC poster (now in Archive).
(?=<protein_molecule>)|(?<=</protein_molecule>)
org.embl.ebi.escience.scuflworkers.java.StringStripDuplicates
org.embl.ebi.escience.scuflworkers.java.SplitByRegex
<protein_molecule>\w*</protein_molecule>
.+
org.embl.ebi.escience.scuflworkers.java.FilterStringList
Iterator i;
if (uniprotIDlist.isEmpty()) {
uniprotID_or_False = "False";
} else {
uniprotID_or_False = (String) uniprotIDlist.iterator().next().toString();
}
uniprotIDlist
uniprotID_or_False
http://bubbles.biosemantics.org:8180/axis/services/SynsetServer/SynsetServer.jws?wsdl
getUniprotID
org.embl.ebi.escience.scuflworkers.java.FilterStringList
if (uniprot!="False") {
true_protein=protein;
true_uniprot=uniprot;
}
protein
uniprot
true_protein
true_uniprot
.+
org.embl.ebi.escience.scuflworkers.java.FilterStringList
import java.util.regex.*;
Pattern pattern = Pattern.compile("</?[\\w\\d-]+>");
Matcher matcher = pattern.matcher(tagged_term);
String term= matcher.replaceAll("");
tagged_term
term