Get Element ID from Gzipped GenBank file

None

/sciobj/bio_sequence/@element_id
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.seq.io.agave.AgaveWriter;

BufferedReader getReader (String fileUrl) throws IOException {
		InputStreamReader reader;
		try {
			reader = new FileReader(fileUrl);
		}
		catch (FileNotFoundException e) {
			// try a real URL instead
			URL url = new URL(fileUrl);
			reader = new InputStreamReader (url.openStream());
		}
		return new BufferedReader(reader);
	}

if ((fileUrl == void) || (fileUrl == null)) {
    throw new RuntimeException("The fileUrl must be specified");
}

BufferedReader br  = getReader(fileUrl);

// read the GenBank File
SequenceIterator sequences = SeqIOTools.readGenbank(br);

// iterate through the sequences
ByteArrayOutputStream os = new ByteArrayOutputStream();
StringBuffer sb = new StringBuffer();
AgaveWriter writer = new AgaveWriter();
PrintStream ps = new PrintStream(os);

while (sequences.hasNext()) {
	Sequence seq = sequences.nextSequence();
	writer.writeSequence(seq, ps);
	sb.append(os.toString());
}

genbankdata = sb.toString();
br.close();
ps.close();
  

File f = File.createTempFile("taverna", ".tmp");
BufferedWriter writer = new BufferedWriter (new FileWriter(f));
writer.write(content);
writer.close();

String filePath = f.getCanonicalPath();
//
// Import modules;
//
import java.io.*;
import java.util.zip.*;
import java.io.FileInputStream;
// import java.io.FileOutputStream;
import java.io.InputStream;
// import java.io.OutputStream;
import java.util.zip.GZIPInputStream;
// import java.util.zip.GZIPOutputStream;

// import java.io.ByteArrayInputStream;
//
// Main script.
//
// String vError = new String("");
 String newFile = new String("");
// Decoded = "false";

	
	FileInputStream fis = new FileInputStream(gzipfile);
	GZIPInputStream oGIS = new GZIPInputStream(fis);
	
	// BufferedReader oBR = new BufferedReader(new InputStreamReader(oGIS));

	BufferedReader in = new BufferedReader(new InputStreamReader(oGIS));
	StringBuffer sb = new StringBuffer(100000);
	String str;
	String lineEnding = System.getProperty("line.separator");

	while ((str = in.readLine()) != null) {
		sb.append(str);
		sb.append(lineEnding);
	}
	in.close();
	newFile = sb.toString();

Name	Type	Description
ElementID_XPath_Service	xpath	This XPath service takes the XML result of the previous service and reports the ElementID as both an XML element and plain text. The Read_Gen_Bank_File service generates the XML such that it references the agave.dtd. My Taverna could not find the agave.dtd, it was looking here: /Applications/Taverna 2.3.0.app/Contents/Resources/Java I made a my own agave.dtd from: INSD_INSDSeq.dtd. I just downloaded it and renamed it to agave.dtd. This dtd references 2 other modules so those will need to downloaded and saved to the above location. Xpath Expression /sciobj/bio_sequence/@element_id
Read_Gen_Bank_File	localworker	This service takes the temporary file and converts it to XML via the agavewriter. The agavewriter does not get all fields it ignores the annotation fields at the top of the gbk file. A *.seq file generates an error when run through this service: Sourced file: inline evaluation of: ``import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.SequenceIterator . . . '' : Method Invocation writer.writeSequence : at Line: 36 : in file: inline evaluation of: ``import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.SequenceIterator . . . '' : writer .writeSequence ( seq , ps ) Script import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.SequenceIterator; import org.biojava.bio.seq.io.SeqIOTools; import org.biojava.bio.seq.io.agave.AgaveWriter; BufferedReader getReader (String fileUrl) throws IOException { InputStreamReader reader; try { reader = new FileReader(fileUrl); } catch (FileNotFoundException e) { // try a real URL instead URL url = new URL(fileUrl); reader = new InputStreamReader (url.openStream()); } return new BufferedReader(reader); } if ((fileUrl == void) \|\| (fileUrl == null)) { throw new RuntimeException("The fileUrl must be specified"); } BufferedReader br = getReader(fileUrl); // read the GenBank File SequenceIterator sequences = SeqIOTools.readGenbank(br); // iterate through the sequences ByteArrayOutputStream os = new ByteArrayOutputStream(); StringBuffer sb = new StringBuffer(); AgaveWriter writer = new AgaveWriter(); PrintStream ps = new PrintStream(os); while (sequences.hasNext()) { Sequence seq = sequences.nextSequence(); writer.writeSequence(seq, ps); sb.append(os.toString()); } genbankdata = sb.toString(); br.close(); ps.close();
Create_and_populate_temporary_file	beanshell	This service, borrowed from Alan Williams, takes the newly gunzipped file and creates a temporary file and outputs the filepath. Script File f = File.createTempFile("taverna", ".tmp"); BufferedWriter writer = new BufferedWriter (new FileWriter(f)); writer.write(content); writer.close(); String filePath = f.getCanonicalPath();
Decompress_Gzipped_File	beanshell	This service gunzips the supplied file and the result is a string. Script // // Import modules; // import java.io.; import java.util.zip.; import java.io.FileInputStream; // import java.io.FileOutputStream; import java.io.InputStream; // import java.io.OutputStream; import java.util.zip.GZIPInputStream; // import java.util.zip.GZIPOutputStream; // import java.io.ByteArrayInputStream; // // Main script. // // String vError = new String(""); String newFile = new String(""); // Decoded = "false"; FileInputStream fis = new FileInputStream(gzipfile); GZIPInputStream oGIS = new GZIPInputStream(fis); // BufferedReader oBR = new BufferedReader(new InputStreamReader(oGIS)); BufferedReader in = new BufferedReader(new InputStreamReader(oGIS)); StringBuffer sb = new StringBuffer(100000); String str; String lineEnding = System.getProperty("line.separator"); while ((str = in.readLine()) != null) { sb.append(str); sb.append(lineEnding); } in.close(); newFile = sb.toString();

Name	Description	Inputs	Outputs
Create_and_populate_temporary_file	This service, borrowed from Alan Williams, takes the newly gunzipped file and creates a temporary file and outputs the filepath.	content	filePath
Decompress_Gzipped_File	This service gunzips the supplied file and the result is a string.	gzipfile	newFile

Name	Description
text	Plain text element ID
XML	XML out of the Element ID

Source	Sink
Read_Gen_Bank_File:genbankdata	ElementID_XPath_Service:xml_text
Create_and_populate_temporary_file:filePath	Read_Gen_Bank_File:fileUrl
Decompress_Gzipped_File:newFile	Create_and_populate_temporary_file:content
gzippedFile	Decompress_Gzipped_File:gzipfile
ElementID_XPath_Service:nodelist	text
ElementID_XPath_Service:nodelistAsXML	XML

Get Element ID from Gzipped GenBank file

Preview

Run

Run this Workflow in the Taverna Workbench...

Workflow Components

Xpath Expression

Script

Script

Script

Reviews (0)

Comments (0)

Other workflows that use similar services (0)