commit/galaxy-central: kanwei: Improve BLAST parser tool error handling, and improve elementtree import as well.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/26bffb1b7d57/ changeset: 26bffb1b7d57 user: kanwei date: 2011-07-13 00:14:56 summary: Improve BLAST parser tool error handling, and improve elementtree import as well. affected #: 3 files (260 bytes) --- a/tools/metag_tools/megablast_xml_parser.py Tue Jul 12 17:04:16 2011 -0400 +++ b/tools/metag_tools/megablast_xml_parser.py Tue Jul 12 18:14:56 2011 -0400 @@ -2,12 +2,12 @@ import sys, os, re -assert sys.version_info[:2] >= ( 2, 4 ) - if sys.version_info[:2] >= ( 2, 5 ): - import xml.etree.cElementTree as cElementTree + import xml.etree.cElementTree as ElementTree else: - import cElementTree + from galaxy import eggs + import pkg_resources; pkg_resources.require( "elementtree" ) + from elementtree import ElementTree def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) @@ -34,7 +34,7 @@ # get an iterable try: - context = cElementTree.iterparse( source, events=( "start", "end" ) ) + context = ElementTree.iterparse( source, events=( "start", "end" ) ) except: stop_err( "Invalid data format." ) # turn it into an iterator @@ -46,7 +46,7 @@ stop_err( "Invalid data format." ) outfile = open( sys.argv[2], 'w' ) - try: + try: for event, elem in context: # for every <Iteration> tag if event == "end" and elem.tag == "Iteration": @@ -71,7 +71,7 @@ elem.clear() except: outfile.close() - stop_err( "The input data contains tags that are not recognizable by the tool." ) + stop_err( "The input data is malformed, or there is more than one dataset in the input file. Error: %s" % sys.exc_info()[1] ) outfile.close() --- a/tools/metag_tools/megablast_xml_parser.xml Tue Jul 12 17:04:16 2011 -0400 +++ b/tools/metag_tools/megablast_xml_parser.xml Tue Jul 12 18:14:56 2011 -0400 @@ -2,26 +2,23 @@ <description></description><command interpreter="python">megablast_xml_parser.py $input1 $output1</command><inputs> - <param name="input1" type="data" format="blastxml" label="Megablast XML output" /> + <param name="input1" type="data" format="blastxml" label="Megablast XML output" /></inputs><outputs> - <data name="output1" format="tabular"/> + <data name="output1" format="tabular"/></outputs> -<requirements> - <requirement type="python-module">cElementTree</requirement> -</requirements><tests> - <test> - <param name="input1" value="megablast_xml_parser_test1.gz" ftype="blastxml" /> - <output name="output1" file="megablast_xml_parser_test1_out.tabular" ftype="tabular" /> - </test> + <test> + <param name="input1" value="megablast_xml_parser_test1.gz" ftype="blastxml" /> + <output name="output1" file="megablast_xml_parser_test1_out.tabular" ftype="tabular" /> + </test></tests><help> **What it does** This tool processes the XML output of any NCBI blast tool (if you run your own blast jobs, the XML output can be generated with **-m 7** option). - + ----- **Output fields** --- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Jul 12 17:04:16 2011 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Jul 12 18:14:56 2011 -0400 @@ -5,7 +5,7 @@ BLAST filename, output format (std for standard 12 columns, or ext for the extended 24 columns offered in the BLAST+ wrappers). -The 12 colums output are 'qseqid sseqid pident length mismatch gapopen qstart +The 12 columns output are 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore' or 'std' at the BLAST+ command line, which mean: @@ -51,22 +51,23 @@ Be aware that the sequence in the extended tabular output or XML direct from BLAST+ may or may not use XXXX masking on regions of low complexity. This can throw the off the calculation of percentage identity and gap openings. -[In fact, both BLAST 2.2.24+ and 2.2.25+ have a sutle bug in this regard, +[In fact, both BLAST 2.2.24+ and 2.2.25+ have a subtle bug in this regard, with these numbers changing depending on whether or not the low complexity filter is used.] -This script attempts to produce idential output to what BLAST+ would have done. +This script attempts to produce identical output to what BLAST+ would have done. However, check this with "diff -b ..." since BLAST+ sometimes includes an extra space character (probably a bug). """ import sys import re -assert sys.version_info[:2] >= ( 2, 4 ) if sys.version_info[:2] >= ( 2, 5 ): - import xml.etree.cElementTree as cElementTree + import xml.etree.cElementTree as ElementTree else: - import cElementTree + from galaxy import eggs + import pkg_resources; pkg_resources.require( "elementtree" ) + from elementtree import ElementTree def stop_err( msg ): sys.stderr.write("%s\n" % msg) @@ -90,7 +91,7 @@ # get an iterable try: - context = cElementTree.iterparse(in_file, events=("start", "end")) + context = ElementTree.iterparse(in_file, events=("start", "end")) except: stop_err("Invalid data format.") # turn it into an iterator Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket