details: http://www.bx.psu.edu/hg/galaxy/rev/7e1e7c5d8dbe changeset: 3167:7e1e7c5d8dbe user: Kelly Vincent <kpvincent@bx.psu.edu> date: Wed Dec 09 18:50:12 2009 -0500 description: Updated megablast_wrapper tool to allow for date to be included with database name and displayed (so user knows how current it is) diffstat: tool-data/blastdb.loc.sample | 19 ++++++++++++------- tools/metag_tools/megablast_wrapper.py | 30 +++++++++++++++--------------- tools/metag_tools/megablast_wrapper.xml | 5 +++-- 3 files changed, 30 insertions(+), 24 deletions(-) diffs (138 lines): diff -r 0ba4a2b77f65 -r 7e1e7c5d8dbe tool-data/blastdb.loc.sample --- a/tool-data/blastdb.loc.sample Wed Dec 09 16:20:12 2009 -0500 +++ b/tool-data/blastdb.loc.sample Wed Dec 09 18:50:12 2009 -0500 @@ -1,14 +1,18 @@ #This is a sample file distributed with Galaxy that is used by some -#short read tools. The blastdb.loc file has this format (white space -#characters are TAB characters): +#short read tools. The blastdb.loc file has this format: # -#<database> <path to base name> +#<database> <build_date> <path to base name> +# +#where a single space separates the first two and a tab the last two. +#It is important that the actual database name does not have a space in it, +#and that the first tab that appears in the line is right before the path. +#The <build_date> can look any way you want. # #So, for example, if your database is nt and the path to your base name #is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry #would look like this: # -#nt /depot/data2/galaxy/blastdb/nt/nt.chunk +#nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk # #and your /depot/data2/galaxy/blastdb/nt directory would contain all of #your "base names" (e.g.): @@ -16,11 +20,12 @@ #-rw-r--r-- 1 wychung galaxy 23437408 2008-04-09 11:26 nt.chunk.00.nhr #-rw-r--r-- 1 wychung galaxy 3689920 2008-04-09 11:26 nt.chunk.00.nin #-rw-r--r-- 1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq +#...etc... # #Your blastdb.loc file should include an entry per line for each "base name" #you have stored. For example: # -#nt /depot/data2/galaxy/blastdb/nt/nt.chunk -#wgs /depot/data2/galaxy/blastdb/wgs/wgs.chunk -#test /depot/data2/galaxy/blastdb/test/test.fa +#nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +#wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk +#test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test #...etc... diff -r 0ba4a2b77f65 -r 7e1e7c5d8dbe tools/metag_tools/megablast_wrapper.py --- a/tools/metag_tools/megablast_wrapper.py Wed Dec 09 16:20:12 2009 -0500 +++ b/tools/metag_tools/megablast_wrapper.py Wed Dec 09 18:50:12 2009 -0500 @@ -30,7 +30,7 @@ #Parse Command Line options, args = doc_optparse.parse( __doc__ ) - db_build = options.db_build + db_build = options.db_build.split( ' ' )[0] query_filename = options.input.strip() output_filename = options.output.strip() mega_word_size = options.word_size # -W @@ -43,33 +43,33 @@ # megablast parameters try: - int(mega_word_size) + int( mega_word_size ) except: - stop_err('Invalid value for word size') + stop_err( 'Invalid value for word size' ) try: float(mega_iden_cutoff) except: - stop_err('Invalid value for identity cut-off') + stop_err( 'Invalid value for identity cut-off' ) try: float(mega_evalue_cutoff) except: - stop_err('Invalid value for Expectation value') + stop_err( 'Invalid value for Expectation value' ) # prepare the database db = {} for i, line in enumerate( file( DB_LOC ) ): line = line.rstrip( '\r\n' ) - if not line or line.startswith('#'): + if not line or line.startswith( '#' ): continue - fields = line.split() - if len(fields) == 2: - db[(fields[0])] = fields[1] + fields = line.split( '\t' ) + if len( fields ) == 2: + db[ fields[0].split( ' ' )[0] ] = fields[1] - if not db.has_key(db_build): - stop_err('Cannot locate the target database. Please check your location file.') + if not db.has_key( db_build ): + stop_err( 'Cannot locate the target database. Please check your location file.' ) # arguments for megablast - chunk = db[(db_build)] + chunk = db[ ( db_build ) ] megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null 2>&1 " \ % ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter ) @@ -80,16 +80,16 @@ except Exception, e: stop_err( str( e ) ) - output = open(output_filename,'w') + output = open( output_filename, 'w' ) invalid_lines = 0 for i, line in enumerate( file( mega_temp_output ) ): line = line.rstrip( '\r\n' ) fields = line.split() try: # get gi and length of that gi seq - gi, gi_len = fields[1].split('_') + gi, gi_len = fields[1].split( '_' ) # convert the last column (causing problem in filter tool) to float - fields[-1] = float(fields[-1]) + fields[-1] = float( fields[-1] ) new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] ) except: diff -r 0ba4a2b77f65 -r 7e1e7c5d8dbe tools/metag_tools/megablast_wrapper.xml --- a/tools/metag_tools/megablast_wrapper.xml Wed Dec 09 16:20:12 2009 -0500 +++ b/tools/metag_tools/megablast_wrapper.xml Wed Dec 09 18:50:12 2009 -0500 @@ -1,5 +1,5 @@ -<tool id="megablast_wrapper" name="Megablast" version="1.0.0"> - <description> compare short reads against nt and wgs databases</description> +<tool id="megablast_wrapper" name="Megablast" version="1.0.5"> + <description> compare short reads against htgs, nt, and wgs databases</description> <command interpreter="python"> megablast_wrapper.py --db_build=$source_select @@ -39,6 +39,7 @@ <tests> <test> <param name="input_query" value="megablast_wrapper_test1.fa" ftype="fasta"/> + <!-- source_select needs to match the entry in the blastdb.loc file, which includes the last update date if appropriate --> <param name="source_select" value="phiX" /> <param name="word_size" value="28" /> <param name="iden_cutoff" value="99.0" />