[galaxy-dev] [hg] galaxy 3167: Updated megablast_wrapper tool to allow for da...

11 Dec 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/7e1e7c5d8dbe
changeset: 3167:7e1e7c5d8dbe
user:      Kelly Vincent <kpvincent@bx.psu.edu>
date:      Wed Dec 09 18:50:12 2009 -0500
description:
Updated megablast_wrapper tool to allow for date to be included with database name and displayed (so user knows how current it is)

diffstat:

 tool-data/blastdb.loc.sample            |  19 ++++++++++++-------
 tools/metag_tools/megablast_wrapper.py  |  30 +++++++++++++++---------------
 tools/metag_tools/megablast_wrapper.xml |   5 +++--
 3 files changed, 30 insertions(+), 24 deletions(-)

diffs (138 lines):

diff -r 0ba4a2b77f65 -r 7e1e7c5d8dbe tool-data/blastdb.loc.sample

--- a/tool-data/blastdb.loc.sample	Wed Dec 09 16:20:12 2009 -0500
+++ b/tool-data/blastdb.loc.sample	Wed Dec 09 18:50:12 2009 -0500
@@ -1,14 +1,18 @@
 #This is a sample file distributed with Galaxy that is used by some
-#short read tools.  The blastdb.loc file has this format (white space 
-#characters are TAB characters):
+#short read tools.  The blastdb.loc file has this format:
 #
-#<database>	<path to base name> 
+#<database> <build_date>	<path to base name> 
+#
+#where a single space separates the first two and a tab the last two.
+#It is important that the actual database name does not have a space in it,
+#and that the first tab that appears in the line is right before the path.
+#The <build_date> can look any way you want.
 #
 #So, for example, if your database is nt and the path to your base name 
 #is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry 
 #would look like this:
 #
-#nt      /depot/data2/galaxy/blastdb/nt/nt.chunk
+#nt 02 Dec 2009      /depot/data2/galaxy/blastdb/nt/nt.chunk
 #
 #and your /depot/data2/galaxy/blastdb/nt directory would contain all of 
 #your "base names" (e.g.):
@@ -16,11 +20,12 @@
 #-rw-r--r--  1 wychung galaxy  23437408 2008-04-09 11:26 nt.chunk.00.nhr
 #-rw-r--r--  1 wychung galaxy   3689920 2008-04-09 11:26 nt.chunk.00.nin
 #-rw-r--r--  1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq
+#...etc...
 #
 #Your blastdb.loc file should include an entry per line for each "base name" 
 #you have stored.  For example:
 #
-#nt	/depot/data2/galaxy/blastdb/nt/nt.chunk
-#wgs	/depot/data2/galaxy/blastdb/wgs/wgs.chunk
-#test	/depot/data2/galaxy/blastdb/test/test.fa
+#nt 02 Dec 2009	/depot/data2/galaxy/blastdb/nt/nt.chunk
+#wgs 30 Nov 2009	/depot/data2/galaxy/blastdb/wgs/wgs.chunk
+#test 20 Sep 2008	/depot/data2/galaxy/blastdb/test/test
 #...etc...
diff -r 0ba4a2b77f65 -r 7e1e7c5d8dbe tools/metag_tools/megablast_wrapper.py
--- a/tools/metag_tools/megablast_wrapper.py	Wed Dec 09 16:20:12 2009 -0500
+++ b/tools/metag_tools/megablast_wrapper.py	Wed Dec 09 18:50:12 2009 -0500
@@ -30,7 +30,7 @@
     #Parse Command Line
     options, args = doc_optparse.parse( __doc__ )
     
-    db_build = options.db_build
+    db_build = options.db_build.split( ' ' )[0]
     query_filename = options.input.strip()
     output_filename = options.output.strip()
     mega_word_size = options.word_size        # -W
@@ -43,33 +43,33 @@
 
     # megablast parameters
     try:
-        int(mega_word_size)    
+        int( mega_word_size )    
     except:
-        stop_err('Invalid value for word size')
+        stop_err( 'Invalid value for word size' )
     try:
         float(mega_iden_cutoff)
     except:
-        stop_err('Invalid value for identity cut-off')
+        stop_err( 'Invalid value for identity cut-off' )
     try:
         float(mega_evalue_cutoff)
     except:
-        stop_err('Invalid value for Expectation value')
+        stop_err( 'Invalid value for Expectation value' )
 
     # prepare the database
     db = {}
     for i, line in enumerate( file( DB_LOC ) ):
         line = line.rstrip( '\r\n' )
-        if not line or line.startswith('#'):
+        if not line or line.startswith( '#' ):
             continue
-        fields = line.split()
-        if len(fields) == 2:
-            db[(fields[0])] = fields[1]
+        fields = line.split( '\t' )
+        if len( fields ) == 2:
+            db[ fields[0].split( ' ' )[0] ] = fields[1]
 
-    if not db.has_key(db_build):
-        stop_err('Cannot locate the target database. Please check your location file.')
+    if not db.has_key( db_build ):
+        stop_err( 'Cannot locate the target database. Please check your location file.' )
     
     # arguments for megablast    
-    chunk = db[(db_build)]
+    chunk = db[ ( db_build ) ]
     megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null 2>&1 " \
         % ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter ) 
     
@@ -80,16 +80,16 @@
     except Exception, e:
         stop_err( str( e ) )
 
-    output = open(output_filename,'w')
+    output = open( output_filename, 'w' )
     invalid_lines = 0
     for i, line in enumerate( file( mega_temp_output ) ):
         line = line.rstrip( '\r\n' )
         fields = line.split()
         try:
             # get gi and length of that gi seq
-            gi, gi_len = fields[1].split('_')
+            gi, gi_len = fields[1].split( '_' )
             # convert the last column (causing problem in filter tool) to float
-            fields[-1] = float(fields[-1])
+            fields[-1] = float( fields[-1] )
             
             new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] )
         except:
diff -r 0ba4a2b77f65 -r 7e1e7c5d8dbe tools/metag_tools/megablast_wrapper.xml
--- a/tools/metag_tools/megablast_wrapper.xml	Wed Dec 09 16:20:12 2009 -0500
+++ b/tools/metag_tools/megablast_wrapper.xml	Wed Dec 09 18:50:12 2009 -0500
@@ -1,5 +1,5 @@
-<tool id="megablast_wrapper" name="Megablast" version="1.0.0">
-	<description> compare short reads against nt and wgs databases</description>
+<tool id="megablast_wrapper" name="Megablast" version="1.0.5">
+	<description> compare short reads against htgs, nt, and wgs databases</description>
 	<command interpreter="python">
 	  megablast_wrapper.py 
 	   --db_build=$source_select
@@ -39,6 +39,7 @@
 	<tests>
 		<test>
 			<param name="input_query" value="megablast_wrapper_test1.fa" ftype="fasta"/>
+			<!-- source_select needs to match the entry in the blastdb.loc file, which includes the last update date if appropriate --> 
 			<param name="source_select" value="phiX" />
 			<param name="word_size" value="28" />
 			<param name="iden_cutoff" value="99.0" />

    

[galaxy-dev] [hg] galaxy 3167: Updated megablast_wrapper tool to allow for da...

Greg Von Kuster