[hg] galaxy 1537: add error msg for shrimp_wrapper when memory e...
details: http://www.bx.psu.edu/hg/galaxy/rev/1dbd88fb9e57 changeset: 1537:1dbd88fb9e57 user: wychung date: Tue Sep 30 16:40:42 2008 -0400 description: add error msg for shrimp_wrapper when memory error and zero hits. convert last column of megablast output to float. 3 file(s) affected in this change: tools/metag_tools/megablast_wrapper.py tools/metag_tools/shrimp_wrapper.py tools/metag_tools/shrimp_wrapper.xml diffs (111 lines): diff -r 8ab38aa72998 -r 1dbd88fb9e57 tools/metag_tools/megablast_wrapper.py --- a/tools/metag_tools/megablast_wrapper.py Tue Sep 30 16:17:12 2008 -0400 +++ b/tools/metag_tools/megablast_wrapper.py Tue Sep 30 16:40:42 2008 -0400 @@ -55,7 +55,9 @@ chunk = db[(db_build)] megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null 2>&1 " \ % ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter ) - + + print megablast_command + try: os.system( megablast_command ) except Exception, e: @@ -67,8 +69,12 @@ line = line.rstrip( '\r\n' ) fields = line.split() try: + # get gi and length of that gi seq gi, gi_len = fields[1].split('_') - new_line = "%s\t%s\t%s\t%s" % ( fields[0], gi, gi_len, '\t'.join( fields[2:] ) ) + # convert the last column (causing problem in filter tool) to float + fields[-1] = float(fields[-1]) + + new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] ) except: new_line = line invalid_lines += 1 diff -r 8ab38aa72998 -r 1dbd88fb9e57 tools/metag_tools/shrimp_wrapper.py --- a/tools/metag_tools/shrimp_wrapper.py Tue Sep 30 16:17:12 2008 -0400 +++ b/tools/metag_tools/shrimp_wrapper.py Tue Sep 30 16:40:42 2008 -0400 @@ -1,15 +1,21 @@ #! /usr/bin/python """ +TODO +1. decrease memory usage +2. multi-fasta fastq file, ex. 454 +3. split reads into small chuncks? + SHRiMP wrapper Inputs: - reference seq and reads +1. reference seq +2. reads Outputs: - table of 8 columns: +1. table of 8 columns: chrom ref_loc read_id read_loc ref_nuc read_nuc quality coverage - SHRiMP output +2. SHRiMP output Parameters: -s Spaced Seed (default: 111111011111) @@ -37,13 +43,9 @@
7:2:1147:982/1 chr3 + 95338194 95338225 4 35 36 2700 9T7C14 7:2:587:93/1 chr3 + 14913541 14913577 1 35 36 2960 19--16
-Testing: -%python shrimp_wrapper.py single ~/Desktop/shrimp_wrapper/phix_anc.fa tmp tmp1 ~/Desktop/shrimp_wrapper/phix.10.solexa.fastq -%python shrimp_wrapper.py paired ~/Desktop/shrimp_wrapper/eca_ref_chrMT.fa tmp tmp1 ~/Desktop/shrimp_wrapper/eca.5.solexa_1.fastq ~/Desktop/shrimp_wrapper/eca.5.solexa_2.fastq - """ -import os, sys, tempfile, os.path +import os, sys, tempfile, os.path, re assert sys.version_info[:2] >= (2.4) @@ -575,6 +577,29 @@ if os.path.exists(query_qual_end1): os.remove(query_qual_end1) if os.path.exists(query_qual_end2): os.remove(query_qual_end2) stop_err(str(e)) + + # check SHRiMP output: count number of lines + num_hits = 0 + if shrimp_outfile: + for i, line in enumerate(file(shrimp_outfile)): + line = line.rstrip('\r\n') + if not line or line.startswith('#'): continue + try: + fields = line.split() + num_hits += 1 + except Exception, e: + stop_err(str(e)) + + if num_hits == 0: # no hits generated + err_msg = '' + if shrimp_log: + for i, line in enumerate(file(shrimp_log)): + if line.startswith('error'): # deal with memory error: + err_msg += line # error: realloc failed: Cannot allocate memory + if re.search('Reads Matched', line): # deal with zero hits + if int(line[8:].split()[2]) == 0: + err_msg = 'Zero hits found.\n' + stop_err('SHRiMP Failed due to:\n' + err_msg) # convert to table if type_of_reads == 'single': diff -r 8ab38aa72998 -r 1dbd88fb9e57 tools/metag_tools/shrimp_wrapper.xml --- a/tools/metag_tools/shrimp_wrapper.xml Tue Sep 30 16:17:12 2008 -0400 +++ b/tools/metag_tools/shrimp_wrapper.xml Tue Sep 30 16:40:42 2008 -0400 @@ -1,5 +1,5 @@ <tool id="shrimp_wrapper" name="SHRiMP" version="1.0.0"> - <description>SHort Read Mapping Package</description> + <description>: SHort Read Mapping Package</description> <command interpreter="python"> #if ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="skip"):#shrimp_wrapper.py $input_target $output1 $output2 $input_query #elif ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="skip"):#shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size
participants (1)
-
Greg Von Kuster