details: http://www.bx.psu.edu/hg/galaxy/rev/0291f870f2c9 changeset: 3470:0291f870f2c9 user: Greg Von Kuster <greg@bx.psu.edu> date: Wed Mar 03 13:40:26 2010 -0500 description: Error conditions will skip lines instead of stopping the tool for the lastz paired reads, and clean up the tool pages for both lastz tools. diffstat: tools/sr_mapping/lastz_paired_reads_wrapper.py | 134 ++++++++++++++++++----- tools/sr_mapping/lastz_paired_reads_wrapper.xml | 102 +----------------- tools/sr_mapping/lastz_wrapper.xml | 18 +- 3 files changed, 118 insertions(+), 136 deletions(-) diffs (464 lines): diff -r 8e9aa1709c6c -r 0291f870f2c9 tools/sr_mapping/lastz_paired_reads_wrapper.py --- a/tools/sr_mapping/lastz_paired_reads_wrapper.py Wed Mar 03 12:07:39 2010 -0500 +++ b/tools/sr_mapping/lastz_paired_reads_wrapper.py Wed Mar 03 13:40:26 2010 -0500 @@ -78,11 +78,49 @@ # Keep track of all created temporary files so they can be deleted global tmp_file_names tmp_file_names = [] +# The values in the skipped_lines dict are tuples consisting of: +# - the number of skipped lines for that error +# If not a sequence error: +# - the 1st line number on which the error was found +# - the text of the 1st line on which the error was found +# If a sequence error: +# - The number of the sequence in the file +# - the sequence name on which the error occurred +# We may need to improve dealing with file position and text as +# much of it comes from temporary files that are created from the +# inputs, and not the inputs themselves, so this could be confusing +# to the user. +global skipped_lines +skipped_lines = dict( bad_interval=( 0, 0, '' ), + inconsistent_read_lengths=( 0, 0, '' ), + inconsistent_reads=( 0, 0, '' ), + inconsistent_sizes=( 0, 0, '' ), + missing_mate=( 0, 0, '' ), + missing_quals=( 0, 0, '' ), + missing_seq=( 0, 0, '' ), + multiple_seqs=( 0, 0, '' ), + no_header=( 0, 0, '' ), + num_fields=( 0, 0, '' ), + reads_paired=( 0, 0, '' ), + sam_flag=( 0, 0, '' ), + sam_headers=( 0, 0, '' ), + sam_min_columns=( 0, 0, '' ), + two_mate_names=( 0, 0, '' ), + wrong_seq_len=( 0, 0, '' ) ) +global total_skipped_lines +total_skipped_lines = 0 def stop_err( msg ): sys.stderr.write( "%s" % msg ) sys.exit() +def skip_line( error_key, position, text ): + if not skipped_lines[ error_key ][2]: + skipped_lines[ error_key ][1] = position + skipped_lines[ error_key ][2] = text + skipped_lines[ error_key ][0] += 1 + total_skipped_lines += 1 + def get_tmp_file_name( dir=None, suffix=None ): """ Return a unique temporary file name that can be managed. The @@ -150,16 +188,16 @@ line = line.split( "#", 1 )[0].rstrip() fields = line.split() if len( fields ) != 4: - # TODO: Do we want to err out here or just skip the line? - stop_err( "Wrong number of fields ( must be 4 ) in line %d: %s" % ( i+1, line ) ) + skip_line( 'num_fields', i+1, line ) + continue name, start, length, size = fields start = int( start ) length = int( length ) size = int( size ) end = start + length if end > size: - # TODO: Do we want to err out here or just skip the line? - stop_err( "Bad interval in line %d: %s" % ( i+1, line ) ) + skip_line[ 'bad_interval' ] += 1 + continue if name not in read_to_linker_dict: read_to_linker_dict[ name ] = ( start, end, size ) continue @@ -168,9 +206,8 @@ continue ( s, e, sz ) = read_to_linker_dict[ name ] if sz != size: - # This should never occur - # TODO: Do we want to err out here or just skip the line? - stop_err( "Inconsistent sizes for %s" % name ) + skip_line( 'inconsistent_sizes', i+1, name ) + continue if s > end or e < start: # Non-overlapping intervals, so skip this sequence read_to_linker_dict[ name ] = None @@ -194,18 +231,15 @@ read_to_linker_dict[ seq.name ] = "" continue if read_to_linker_dict[ seq.name ] == "": - # TODO: Do we want to err out here or just skip the line? - stop_err( "Multiple sequences named %s" % seq.name ) + skip_line( 'multiple_seqs', seqs, seq.name ) + continue if read_to_linker_dict[ seq.name ] == None: # Read previously marked as non-overlapping intervals, so skip this sequence - see above continue ( start, end, size ) = read_to_linker_dict[ seq.name ] if seq.length != size: - # TODO: Do we want to err out here or just skip the line? - combined_linker_file.close() - mates_file.close() - mates_mapping_file.close() - stop_err( "Sequence disagrees with size for sequence %s, size: %s seq.length: %s" % ( seq.name, str( size ), str( seq.length ) ) ) + skip_line( 'wrong_seq_len', seqs, seq.name ) + continue left = seq.text[ :start ] right = seq.text[ end: ] left_is_small = len( left ) <= seq_len_lower_threshold @@ -272,7 +306,9 @@ if not seq: break seqs += 1 - # Create a temporary file to contain the current sequence as input to lastz + # Create a temporary file to contain the current sequence as input to lastz. + # We're doing this a bit differently here since we could be generating a huge + # number of temporary files. tmp_in_fd, tmp_in_file_name = tempfile.mkstemp( suffix='seq_%d_in' % seqs ) tmp_in_file = os.fdopen( tmp_in_fd, 'w+b' ) tmp_in_file.write( '>%s\n%s\n' % ( seq.name, seq.text ) ) @@ -441,10 +477,12 @@ if not line.startswith( "#" ): fields = line.split() if len( fields ) != 4: - stop_err( "Incorrect number of fields (must be 4) in line %s of file %s" % ( i+1, tmp_mates_mapping_file_name ) ) + skip_line( "num_fields", i+1, line ) + continue mate_name, read_name, s_offset, e_offset = fields if mate_name in mate_to_read_dict: - stop_err( "%s is in the mate_to_read_dict when it should not be." % mate_name ) + skip_line( 'two_mate_names', i+1, mate_name ) + continue mate_to_read_dict[ mate_name ] = ( read_name, int( s_offset ), int( e_offset ) ) # Read sequence data read_to_nucs_dict = {} @@ -458,9 +496,8 @@ seq_text_upper = seq.text.upper() if seq.name in read_to_nucs_dict: if seq_text_upper != read_to_nucs_dict[ seq.name ]: - # TODO: Should we err out here or just skip the line? - stop_err( "Inconsistent reads named %s (second occurs at line %d in file %s)" % ( seq.name, seqs, input2 ) ) - #continue + skip_line( 'inconsistent_reads', seqs, seq.name ) + continue read_to_nucs_dict[ seq.name ] = seq_text_upper # Read quality data def quality_sequences( f ): @@ -477,7 +514,8 @@ seq_line = line_number seq_quals = [] elif seq_name is None: - stop_err( "First quality sequence has no header" ) + skip_line( 'no_header', line_number, line ) + continue else: seq_quals += [ int( q ) for q in line.split() ] if seq_name is not None: @@ -494,11 +532,11 @@ quals = samify_phred_scores( quals ) if seq_name in read_to_quals_dict: if quals != read_to_quals_dict[ seq_name ]: - stop_err( "Inconsistent quality sequences named %s (second occurs at line %d in %s)" % ( seq_name, line_number, input4 ) ) + skip_line( 'inconsistent_reads', line_number, seq_name ) continue if len( quals ) != len( read_to_nucs_dict[ seq_name ] ): - stop_err( "Inconsistent read/quality lengths for %s, quals: %s, read_to_nucs_dict[ seq_name ]: %s" % \ - ( seq_name, quals, read_to_nucs_dict[ seq_name ] ) ) + skip_line( 'inconsistent_read_lengths', line_number, seq_name ) + continue read_to_quals_dict[ seq_name ] = quals # process the SAM file tmp_align_file_names = ' '.join( tmp_align_file_name_list ) @@ -512,21 +550,25 @@ line = line.strip() if line.startswith( "@" ): if has_non_header: - stop_err( "Input SAM contains headers in several places (e.g., line %d) in file %s" % ( i+1, combined_chrom_file_name ) ) + skip_line( 'sam_headers', i+1, line ) + continue fout.write( "%s\n" % line ) continue has_non_header = True fields = line.split() num_fields = len( fields ) if num_fields < SAM_MIN_COLUMNS: - stop_err( "Not enough columns at line %d (%d, expected %d)" % ( i+1, num_fields, SAM_MIN_COLUMNS ) ) + skip_line( 'sam_min_columns', i+1, line ) + continue # Set flags for mates try: flag = int( fields[ SAM_FLAG_COLUMN ] ) except ValueError: - stop_err( "Bad SAM flag at line %d: %s" % ( i+1, line ) ) + skip_line( 'sam_flag', i+1, line ) + continue if not( flag & ( BAM_FPAIRED + BAM_FREAD1 + BAM_FREAD2 ) == 0 ): - stop_err( "SAM flag indicates reads already paired, at line %d\n%s" % ( i+1, line ) ) + skip_line( 'reads_paired', i+1, line ) + continue mate_name = fields[ SAM_QNAME_COLUMN ] unmap_it = False half = None @@ -548,7 +590,8 @@ try: read_name, s_offset, e_offset = mate_to_read_dict[ mate_name ] except KeyError: - stop_err( "'%s' doesn't appear in the mapping file." % mate_name ) + skip_line( 'missing_mate', i+1, mate_name ) + continue cigar = fields[ SAM_CIGAR_COLUMN ] cigar_prefix = None cigar_suffix = None @@ -598,14 +641,16 @@ fields[ SAM_CIGAR_COLUMN ] = cigar # Fetch sequence and quality values, and flip/clip them if read_name not in read_to_nucs_dict: - stop_err( "Missing sequence for '%s'" % read_name ) + skip_line( 'missing_seq', i+1, read_name ) + continue nucs = read_to_nucs_dict[ read_name ] if not on_plus_strand: nucs = reverse_complement( nucs ) quals = None if read_to_quals_dict != None: if read_name not in read_to_quals_dict: - stop_err( "Missing quality values for '%s'" % read_name ) + skip_line( 'missing_quals', i+1, read_name ) + continue quals = read_to_quals_dict[ read_name ] if not on_plus_strand: quals = reverse_string( quals ) @@ -752,5 +797,32 @@ # Delete all temporary files for file_name in tmp_file_names: os.remove( file_name ) + # Handle any invalid lines in the input data + if total_skipped_lines: + msgs = dict( bad_interval="Bad interval in line", + inconsistent_read_lengths="Inconsistent read/quality lengths for seq #", + inconsistent_reads="Inconsistent reads for seq #", + inconsistent_sizes="Inconsistent sizes for seq #", + missing_mate="Mapping file does not include mate on line", + missing_quals="Missing quality values for name on line", + missing_seq="Missing sequence for name on line", + multiple_seqs="Multiple names for seq #", + no_header="First quality sequence has no header", + num_fields="Must have 4 fields in line", + reads_paired="SAM flag indicates reads already paired on line", + sam_flag="Bad SAM flag on line", + sam_headers="SAM headers on line", + sam_min_columns="Need 11 columns on line", + two_mate_names="Mate name already seen, line", + wrong_seq_len="Size differs from length of seq #" ) + print "Skipped %d invalid lines: " + msg = "" + for k, v in skipped_lines.items(): + if v[0]: + # v[0] is the number of times the error occurred + # v[1] is the position of the line or sequence in the file + # v[2] is the name of the sequence or the text of the line + msg += "(%d)%s %d:%s. " % ( v[0], msgs[k], v[1], v[2] ) + print msg if __name__=="__main__": __main__() diff -r 8e9aa1709c6c -r 0291f870f2c9 tools/sr_mapping/lastz_paired_reads_wrapper.xml --- a/tools/sr_mapping/lastz_paired_reads_wrapper.xml Wed Mar 03 12:07:39 2010 -0500 +++ b/tools/sr_mapping/lastz_paired_reads_wrapper.xml Wed Mar 03 13:40:26 2010 -0500 @@ -38,7 +38,7 @@ <param name="input3" format="fasta" type="data" label="Linker file" /> <param name="input4" format="qual454" type="data" label="Select a base quality score 454 dataset" /> <conditional name="seq_name"> - <param name="how_to_name" type="select" label="Do you want to modify reference name?"> + <param name="how_to_name" type="select" label="Do you want to modify the reference name?"> <option value="no">No</option> <option value="yes">Yes</option> </param> @@ -75,9 +75,9 @@ **What it does** -**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. The Galaxy version of LASTZ is geared towards aligning of short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) reads against a reference sequence. There is excellent, extensive `documentation`__ on LASTZ available, although it hasn't been updated for the version of LASTZ that Galaxy is running (the key changes have to do with output formats, so it is still extremely helpful). +**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) paired reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_. - .. __: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.01.50/README.lastz-1.01... + .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02... ------ @@ -89,7 +89,7 @@ **Outputs** -LASTZ generates one output. Depending on the choice you make in *Select output format* drop-down LASTZ will produce a SAM file showing sequence alignments, a list of differences between the reads and reference (Polymorphisms), or a general table with one line per alignment block (Tabular). Examples of these outputs are shown below. +This LASTZ tool produces a SAM file showing sequence alignments. **SAM output** @@ -132,102 +132,11 @@ 0x0080 the read is the second read in a pair 0x0100 the alignment is not primary -**Polymorphism (SNP or differences) output** - -Polymorphism output contains 14 columns:: - - 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - -------------------------------------------------------------------------------------------------------------------------------------------------------------- - chrM 2490 2491 + 5386 HWI-EAS91_1_306UPAAXX:6:1:486:822 10 11 - 36 C A ACCTGTTTTACAGACACCTAAAGCTACATCGTCAAC ACCTGTTTTAAAGACACCTAAAGCTACATCGTCAAC - chrM 2173 2174 + 5386 HWI-EAS91_1_306UPAAXX:6:1:259:1389 26 27 + 36 G T GCGTACTTATTCGCCACCATGATTATGACCAGTGTT GCGTACTTATTCGCCACCATGATTATTACCAGTGTT - -where:: - - 1. (chrM) - Reference sequence id - 2. (2490) - Start position of the difference in the reference - 3. (2491) - End position of the difference in the reference - 4. (+) - Strand of the reference (always plus) - 5. (5386) - Length of the reference sequence - 6. (HWI...) - read id - 7. (10) - Start position of the difference in the read - 8. (11) - End position of the difference in the read - 9. (+) - Strand of the read - 10. (36) - Length of the read - 11. (C) - Nucleotide in the reference - 12. (A) - Nucleotide in the read - 13. (ACC...) - Reference side os the alignment - 14. (ACC...) - Read side of the alignment - -**Tabular output** - -Tabular output is a tab-separated format with 30 columns:: - - 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 - ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 14 PHIX174 + 5386 4648 4647 4661 14 ATTTTCGTGATATT EYKX4VC01BV8HS + 204 154 153 167 154 153 167 14 ATTTTCGTGATATT .............. 14M 14/14 100.0% 14/204 6.9% 0/14 0.0% 4494 NA - 16 PHIX174 + 5386 3363 3362 3378 16 GACGCCGGATTTGAGA EYKX4VC01AWJ88 - 259 36 35 51 209 208 224 16 GACGCCGGATTTGAGA ................ 16M 16/16 100.0% 16/259 6.2% 0/16 0.0% 3327 NA - -The following columns are present:: - - Field Meaning - ---------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 1. score Score of the alignment block. The scale and meaning of this number will vary, depending on the final stage performed and other command-line options. - 2. name1 Name of the target sequence. - 3. strand1 Target sequence strand, either "+" or "−". - 4. size1 Size of the entire target sequence. - 5. start1 Starting position of the alignment block in the target, origin-one. - 6. zstart1 Starting position of the alignment block in the target, origin-zero. - 7. end1 Ending position of the alignment block in the target, expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems). - 8. length1 Length of the alignment block in the target (excluding gaps). - 9. text1 Aligned characters in the target, including gap characters. - 10. name2 Name of the query sequence. - 11. strand2 Query sequence strand, either "+" or "−". - 12. size2 Size of the entire query sequence. - 13. start2 Starting position of the alignment block in the query, origin-one. - 14. zstart2 Starting position of the alignment block in the query, origin-zero. - 15. end2 Ending position of the alignment block in the query, expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems). - 16. start2+ Starting position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), origin-one. Note that if strand2 is "−", then this is the other end of the block from start2. - 17. zstart2+ Starting position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), origin-zero. Note that if strand2 is "−", then this is the other end of the block from zstart2. - 18. end2+ Ending position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems). Note that if strand2 is "−", then this is the other end of the block from end2. - 19. length2 Length of the alignment block in the query (excluding gaps). - 20. text2 Aligned characters in the query, including gap characters. - 21. diff Differences between what would be written for text1 and text2. Matches are written as . (period), transitions as : (colon), transversions as X, and gaps as - (hyphen). - 22. cigar A CIGAR-like representation of the alignment's path through the Dynamic Programming matrix. This is the short representation, without spaces, described in the Ensembl CIGAR specification. - 23./24. identity Fraction of aligned bases in the block that are matches (see Identity). This is written as two fields. The first field is a fraction, written as <n>/<d>. The second field contains the same value, computed as a percentage. - 25./26. coverage Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (see Coverage). This is written as two fields. The first field is a fraction, written as <n>/<d>. The second field contains the same value, computed as a percentage. - 27./28. gaprate Rate of gaps (also called indels) in the alignment block. This is written as two fields. The first field is a fraction, written as <n>/<d>, with the numerator being the number of alignment columns containing gaps and the denominator being the number without gaps. The second field contains the same value, computed as a percentage. - 29. diagonal The diagonal of the start of the alignment block in the dynamic programming matrix, expressed as an identifying number start1-start2. - 30. shingle A measurement of the shingle overlap between the target and the query. This is intended for the case where both the target and query are relatively short, and their ends are expected to overlap. - -------- - -**LASTZ Settings** - -There are two setting modes: (1) **Commonly used settings** and (2) **Full Parameter List**. - -**Commonly used settings** - -There are seven modes:: - - Illumina-Solexa/AB-SOLiD 95% identity - Illumina-Solexa/AB-SOLiD 85% identity - Roche-454 98% identity - Roche-454 95% identity - Roche-454 90% identity - Roche-454 85% identity - Roche-454 75% identity - -when deciding which one to use consider the following: a 36 bp read with two difference will be 34/36 = 94% identical to the reference. - -**Full Parameter List** - -This modes gives you a fuller control over lastz. The description of these and other parameters is found at the end of this page. Note, that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu. - ------ -**Do you want to modify reference name?** +**Do you want to modify the reference name?** -This option allows you set the name of the reference sequence manually. This is helpful when, for example, you would like to make reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at UCSC Genome Browser. +This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser. ------ diff -r 8e9aa1709c6c -r 0291f870f2c9 tools/sr_mapping/lastz_wrapper.xml --- a/tools/sr_mapping/lastz_wrapper.xml Wed Mar 03 12:07:39 2010 -0500 +++ b/tools/sr_mapping/lastz_wrapper.xml Wed Mar 03 13:40:26 2010 -0500 @@ -97,7 +97,7 @@ </when> </conditional> <conditional name="seq_name"> - <param name="how_to_name" type="select" label="Do you want to modify reference name?"> + <param name="how_to_name" type="select" label="Do you want to modify the reference name?"> <option value="no">No</option> <option value="yes">Yes</option> </param> @@ -213,9 +213,9 @@ **What it does** -**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. The Galaxy version of LASTZ is geared towards aligning of short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) reads against a reference sequence. There is excellent, extensive `documentation`__ on LASTZ available, although it hasn't been updated for the version of LASTZ that Galaxy is running (the key changes have to do with output formats, so it is still extremely helpful). +**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_. - .. __: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.01.50/README.lastz-1.01... + .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02... ------ @@ -227,7 +227,7 @@ **Outputs** -LASTZ generates one output. Depending on the choice you make in *Select output format* drop-down LASTZ will produce a SAM file showing sequence alignments, a list of differences between the reads and reference (Polymorphisms), or a general table with one line per alignment block (Tabular). Examples of these outputs are shown below. +LASTZ generates one output. Depending on the choice you make in the *Select output format* drop-down, LASTZ will produce a SAM file showing sequence alignments, a list of differences between the reads and reference (Polymorphisms), or a general table with one line per alignment block (Tabular). Examples of these outputs are shown below. **SAM output** @@ -355,23 +355,23 @@ Roche-454 85% identity Roche-454 75% identity -when deciding which one to use consider the following: a 36 bp read with two difference will be 34/36 = 94% identical to the reference. +When deciding which one to use, consider the following: a 36 bp read with two differences will be 34/36 = 94% identical to the reference. **Full Parameter List** -This modes gives you a fuller control over lastz. The description of these and other parameters is found at the end of this page. Note, that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu. +This mode gives you fuller control over lastz. The description of these and other parameters is found at the end of this page. Note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu. ------ -**Do you want to modify reference name?** +**Do you want to modify the reference name?** -This option allows you set the name of the reference sequence manually. This is helpful when, for example, you would like to make reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at UCSC Genome Browser. +This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser. ------ **LASTZ parameter list** -This is an exhaustive list of LASTZ options. Once again, please note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu:: +This is an exhaustive list of LASTZ options. Once again, please note that not all options are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu:: target[[s..e]][-] spec/file containing target sequence (fasta or nib) [s..e] defines a subrange of the file