Data Refinement Workflow v17
This workflow brings together various sub-workflows in one integrated workflow. This includes, - Synonym Expansion / Occurrence Retrieval - BioSTIFF Data Selection Tool - Google Refine Data Quality Installation instructions for this workflow can be foundhere
This workflow depends onDCWorkflow.jar, which must be installed in Taverna's local user lib directory.
Preview
Run
Run this Workflow in the Taverna Workbench...
Option 1:
Copy and paste this link into File > 'Open workflow location...'
http://myexperiment.org/workflows/2874/download?version=4
[ More Info ]
Taverna is available from http://taverna.sourceforge.net/
If you are having problems downloading it in Taverna, you may need to provide your username and password in the URL so that Taverna can access the Workflow:
Replace http:// in the link above with http://yourusername:yourpassword@
Workflow Components
None
None
None
json-simple-1.1.1.jar |
DCWorkflow.jar |
None
Name | Type | Description |
---|---|---|
Select_File | localworker |
Scriptimport java.awt.CardLayout; import java.awt.Image; import java.awt.Toolkit; import java.io.File; import java.util.HashMap; import java.util.Map; import javax.swing.ImageIcon; import javax.swing.JEditorPane; import javax.swing.JFileChooser; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.filechooser.FileFilter; class FileExtFilter extends FileFilter { public FileExtFilter(String ext, String label, boolean includeDir) { this.ext = ext; this.label = label; this.includeDir = includeDir; } public String getDescription() { return this.label; } public boolean accept(File file) { if (file.isDirectory() && includeDir) { return true; } else { return file.getName().endsWith(this.ext); } } String ext, label; boolean includeDir; } if (title == void) { title = null; } if ((fileExtensions == void) || (fileExtensions == null)) { fileExtensions = ""; } if ((fileExtLabels == void) || (fileExtLabels == null)) { fileExtLabels = ""; } JFileChooser chooser = new JFileChooser(); chooser.setDialogTitle(title); String[] fileTypeList = fileExtensions.split(","); String[] filterLabelList = fileExtLabels.split(","); if (fileTypeList != null && filterLabelList != null && fileTypeList.length != filterLabelList.length) { throw new RuntimeException("The list of extensions and file filter labels must be the same length"); } // create the file filters for (int i = 0; i < fileTypeList.length; i++) { FileExtFilter filter = new FileExtFilter(fileTypeList[i], filterLabelList[i], true); chooser.setFileFilter(filter); } chooser.showOpenDialog(null); File file = chooser.getSelectedFile(); selectedFile = file.getAbsolutePath(); |
title_value | stringconstant |
ValueChoose input file |
Read_Text_File | localworker |
ScriptBufferedReader getReader (String fileUrl, String encoding) throws IOException { InputStreamReader reader; try { if (encoding == null) { reader = new FileReader(fileUrl); } else { reader = new InputStreamReader(new FileInputStream(fileUrl),encoding); } } catch (FileNotFoundException e) { // try a real URL instead URL url = new URL(fileUrl); if (encoding == null) { reader = new InputStreamReader (url.openStream()); } else { reader = new InputStreamReader (url.openStream(), encoding); } } return new BufferedReader(reader); } StringBuffer sb = new StringBuffer(4000); if (encoding == void) { encoding = null; } BufferedReader in = getReader(fileurl, encoding); String str; String lineEnding = System.getProperty("line.separator"); while ((str = in.readLine()) != null) { sb.append(str); sb.append(lineEnding); } in.close(); filecontents = sb.toString(); |
Data_Cleaning_Worklow_Loop | workflow | |
Merge_String_List_to_a_String | localworker |
ScriptString seperatorString = "\n"; if (seperator != void) { seperatorString = seperator; } StringBuffer sb = new StringBuffer(); for (Iterator i = stringlist.iterator(); i.hasNext();) { String item = (String) i.next(); sb.append(item); if (i.hasNext()) { sb.append(seperatorString); } } concatenated = sb.toString(); |
no_separator | stringconstant |
Value |
Write_Text_File | localworker |
ScriptwriteOK = "false"; BufferedWriter out; if (encoding == void) { out = new BufferedWriter(new FileWriter(outputFile)); } else { out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), encoding)); } out.write(filecontents); out.flush(); out.close(); writeOK = "true"; |
Select_Output_CSV_File | localworker |
Scriptimport java.awt.CardLayout; import java.awt.Image; import java.awt.Toolkit; import java.io.File; import java.util.HashMap; import java.util.Map; import javax.swing.ImageIcon; import javax.swing.JEditorPane; import javax.swing.JFileChooser; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.filechooser.FileFilter; class FileExtFilter extends FileFilter { public FileExtFilter(String ext, String label, boolean includeDir) { this.ext = ext; this.label = label; this.includeDir = includeDir; } public String getDescription() { return this.label; } public boolean accept(File file) { if (file.isDirectory() && includeDir) { return true; } else { return file.getName().endsWith(this.ext); } } String ext, label; boolean includeDir; } if (title == void) { title = null; } if ((fileExtensions == void) || (fileExtensions == null)) { fileExtensions = ""; } if ((fileExtLabels == void) || (fileExtLabels == null)) { fileExtLabels = ""; } JFileChooser chooser = new JFileChooser(); chooser.setDialogTitle(title); String[] fileTypeList = fileExtensions.split(","); String[] filterLabelList = fileExtLabels.split(","); if (fileTypeList != null && filterLabelList != null && fileTypeList.length != filterLabelList.length) { throw new RuntimeException("The list of extensions and file filter labels must be the same length"); } // create the file filters for (int i = 0; i < fileTypeList.length; i++) { FileExtFilter filter = new FileExtFilter(fileTypeList[i], filterLabelList[i], true); chooser.setFileFilter(filter); } chooser.showOpenDialog(null); File file = chooser.getSelectedFile(); selectedFile = file.getAbsolutePath(); |
output_filechooser_title | stringconstant |
ValueChoose Output CSV File |
Name | Description | Inputs | Outputs |
---|---|---|---|
AssignInputOutput | in | out | |
Parse_Project | jsonStr |
jsonErr projectID percent |
|
slw_filter_generator | sciName | filter | |
NameStatusConditional | nameStatus |
synpass_flags synfail_flags |
|
CreateID | name | id_param | |
FirstElement_From_XPath |
xpath xmltext |
nodeVal | |
Col_Copyright_Conditional | copyright_answer | col_copyright_conditional | |
DC_Choose_Sub_Flow | internalCSVData |
synExpOccRetCSVData dataSelCSVData dataQualCSVData endWFlowCSVData endWFlow csvData |
|
OccTargetConditional | sciNameList |
emptyOutputList gbifList slwList gbifChosen |
|
Occ_Credit_Checker |
gbif_agreement_conditional gbif_names_list slw_names_list |
gbif_names_list slw_names_list |
|
End_Workflow | csvData | csv_output | |
Parse_Job | jsonStr |
jsonErr jobID |
|
Format_Options | jobID | options | |
Delete_Data_Options | projectID | options | |
Export_Data_Conditional | gref_answer |
save_true cancel_true |
|
nameParser | taxonSearchJSON | synResponse | |
Empty_Response_Service | emptyResponse | ||
trimRESTurlResult | url | resultUrl | |
checkDataUpload | status |
dataUpload_ok dataUpload_failed uploadStatus |
|
FirstElement_From_XPath |
xpath xmltext |
nodeVal | |
GBIF_Data_Use_Conditional | copyright_answer | gbif_data_use_conditional | |
passthrough | csvin | csvout | |
SynCheckGUI | synreqres_list | out | |
DCSynExpInputParser | csvData |
synonymRequest incorrectRecords |
|
GBIFCheckListParser | gbifChkListJSON | gbifChkList | |
DCSynExpInputDialog |
gbifChkLists synonymRequest |
colSynReq gbifSynReq gbifSelChkListIDs colChosen gbifChosen |
|
Syn_Credit_Checker |
col_copyright_conditional colSynReq gbif_data_use_conditional gbifSynReq |
colSynReq gbifSynReq |
|
Merge_Syn_Responses |
colSynResList gbifSynResList |
synResList | |
gbifNameSearchParser | gbifNameSerachJSON |
taxonIDList emptyTaxonIDList |
|
gbifTaxonSearchParser | gbifTaxonSerachJSON |
synTaxonIDList datasetName acceptedNameResponse rank datasetID |
|
Concat_Response |
accNameRes datasetName synRes datasetID |
concatResponse | |
Parse_Data_Upload | jsonStr |
jsonErr upload_ok |
|
GBIF_Agreement_Conditional | gbif_agreement_answer | gbif_agreement_conditional | |
Export_Data_Options | projectID | options |
Name | Description |
---|---|
endWFlow | |
csv_output | |
file_write_ok |
Source | Sink |
---|---|
title_value:value | Select_File:title |
Select_File:selectedFile | Read_Text_File:fileurl |
Read_Text_File:filecontents | Data_Cleaning_Worklow_Loop:internalCSVData |
no_separator:value | Merge_String_List_to_a_String:seperator |
Data_Cleaning_Worklow_Loop:internalCSVData | Merge_String_List_to_a_String:stringlist |
Select_Output_CSV_File:selectedFile | Write_Text_File:outputFile |
Merge_String_List_to_a_String:concatenated | Write_Text_File:filecontents |
output_filechooser_title:value | Select_Output_CSV_File:title |
Data_Cleaning_Worklow_Loop:endWFlow | endWFlow |
Merge_String_List_to_a_String:concatenated | csv_output |
Write_Text_File:writeOK | file_write_ok |
Controller | Target |
---|---|
Merge_String_List_to_a_String | Select_Output_CSV_File |
Workflow Type
Version 4 (of 17)
- biostif
- |
- catalogue of life col
- |
- data quality and filtering
- |
- edit platform for cybertaxonomy
- |
- gbif
- |
- geo-temporal data selection and filtering
- |
- google refine
- |
- historical analysis
- |
- occurrence retrieval
- |
- openrefine
- |
- pan-european species directories infrastructure pesi
- |
- spatio-temporal analysis
- |
- species distribution analysis
- |
- species occurrence
- |
- species richness and diversity
- |
- species2000
- |
- synonym expansion
- |
- taxonomic data cleaning and refinement
- |
- taxonomic name resolution
- |
- taxonomy
- |
- world register of marine species worms
None
Log in to add Tags
Shared with Groups (2)
Statistics
In chronological order:
-
Created by Cherian Mathew on Wednesday 11 April 2012 10:08:25 (UTC)
Last edited by Cherian Mathew on Wednesday 11 April 2012 10:09:19 (UTC)
-
Created by Cherian Mathew on Wednesday 13 June 2012 10:31:04 (UTC)
Last edited by Cherian Mathew on Wednesday 13 June 2012 10:32:17 (UTC)
Revision comment:Integrated new version of Synonym Expansion / Occurrence Retrieval part of the workflow
-
Created by Cherian Mathew on Tuesday 26 June 2012 12:47:53 (UTC)
-
Created by Cherian Mathew on Tuesday 26 June 2012 13:22:41 (UTC)
Last edited by Cherian Mathew on Thursday 28 June 2012 08:00:16 (UTC)
Revision comment:Added new version of BioSTIF workflow which call the web service on http, removing the need for the credentials in Taverna.
-
Created by Cherian Mathew on Thursday 26 July 2012 14:50:56 (UTC)
Last edited by Cherian Mathew on Thursday 26 July 2012 14:54:16 (UTC)
Revision comment:Added new version of taxonomic name expansion nested workflow
-
Created by Cherian Mathew on Tuesday 07 August 2012 08:14:45 (UTC)
Last edited by Cherian Mathew on Tuesday 07 August 2012 08:16:55 (UTC)
-
Created by Cherian Mathew on Friday 28 September 2012 13:56:56 (UTC)
Last edited by Cherian Mathew on Friday 28 September 2012 13:57:17 (UTC)
-
Created by Cherian Mathew on Thursday 01 November 2012 09:15:21 (UTC)
-
Created by Cherian Mathew on Thursday 08 November 2012 13:12:58 (UTC)
-
Created by Cherian Mathew on Thursday 31 January 2013 09:02:16 (UTC)
-
Created by Cherian Mathew on Tuesday 02 April 2013 11:47:04 (UTC)
Revision comment:This version of the workflow is compatible with the newly created DRF Plugin which installs the dependency jars using the plugin framework of taverna.
-
Created by Cherian Mathew on Thursday 01 August 2013 13:31:20 (UTC)
Revision comment:Added checks for Google Refine.
Changed copyright notices to be popups.
Added BGBM EDIT Platform as a target.
Misc. look and feel changes
-
Created by Cherian Mathew on Tuesday 04 March 2014 11:13:25 (UTC)
Revision comment:This new version of the data refinement workflow has:
* Access to the new GBIF API for taxonomic name resolution and for retrieval of occurrence points.
* Integration of PESI name resolution web service.
* Added search filter for aggregated name checklists
* Minor changes in GUI, mainly in the TNRS subworkflow
* Minor bugs fixed
-
Created by Biodiversity eLaboratory on Thursday 08 May 2014 12:52:48 (UTC)
-
Created by Biodiversity eLaboratory on Tuesday 02 September 2014 08:35:48 (UTC)
Revision comment:This new version of the data refinement workflow has:
* The incluision of the World Register of Marine Species (WoRMS) as a new checklist to resolve species names in the Taxonomic Name Resolution subworkflow. The WoRMS checklist is queried by using the WoRMS web service created by VLIZ.
* Minor bugs fixed
-
Created by Biodiversity eLaboratory on Wednesday 17 December 2014 12:51:15 (UTC)
Revision comment:Change the BioSTIF services from Fraunhofer to EGI
Reviews (0)
Other workflows that use similar services (0)
There are no workflows in myExperiment that use similar services to this Workflow.
Comments (0)
No comments yet
Log in to make a comment