galaxy-dev
Threads by month
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
April 2010
- 37 participants
- 148 discussions
details: http://www.bx.psu.edu/hg/galaxy/rev/ab319f94495c
changeset: 3622:ab319f94495c
user: rc
date: Thu Apr 08 16:21:53 2010 -0400
description:
forms: fixed a bug in default values
diffstat:
lib/galaxy/model/__init__.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diffs (12 lines):
diff -r 196b3b8bb6d8 -r ab319f94495c lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py Thu Apr 08 16:09:58 2010 -0400
+++ b/lib/galaxy/model/__init__.py Thu Apr 08 16:21:53 2010 -0400
@@ -1349,7 +1349,7 @@
value = False
else:
# Set other field types to the default value of the field
- value = field['default']
+ value = field.get('default', '')
# create the field widget
field_widget = eval( field[ 'type' ] )( field_name )
if field[ 'type' ] == 'TextField':
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/196b3b8bb6d8
changeset: 3621:196b3b8bb6d8
user: rc
date: Thu Apr 08 16:09:58 2010 -0400
description:
removed unused file.
diffstat:
scripts/galaxy_messaging/server/daemon.py | 34 -------------------------------
1 files changed, 0 insertions(+), 34 deletions(-)
diffs (38 lines):
diff -r 81de8629eeb6 -r 196b3b8bb6d8 scripts/galaxy_messaging/server/daemon.py
--- a/scripts/galaxy_messaging/server/daemon.py Thu Apr 08 15:25:38 2010 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-"""
-
-Data Transfer Script Daemon
-
-This script is called from Galaxy LIMS once the lab admin starts the data
-transfer process using the user interface. This creates a child process and
-this child process starts the data_transfer.py script as a new process
-
-This script passes all the arguments from Galaxy LIMS to the data_transfer.py script
-
-Usage:
-
-python daemon.py <sequencer_host>
- <username>
- <password>
- <source_file>
- <sample_id>
- <dataset_index>
- <library_id>
- <folder_id>
-"""
-
-import sys, os
-# Perform first fork.
-try:
- pid = os.fork( )
- if pid > 0:
- sys.exit(0) # Exit first parent.
-except OSError, e:
- sys.stderr.write("fork #1 failed: (%d) %sn" % (e.errno, e.strerror))
- sys.exit(2)
-os.execv(os.path.join( os.getcwd(), "scripts/galaxy_messaging/server/data_transfer.py"), sys.argv)
-
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/81de8629eeb6
changeset: 3620:81de8629eeb6
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Thu Apr 08 15:25:38 2010 -0400
description:
Add FASTQ Quality Trimmer by sliding window tool.
diffstat:
test-data/sanger_full_range_quality_trimmed_out_1.fastqsanger | 8 +
test-data/sanger_full_range_quality_trimmed_out_2.fastqsanger | 8 +
test-data/sanger_full_range_quality_trimmed_out_3.fastqsanger | 8 +
tool_conf.xml.main | 1 +
tool_conf.xml.sample | 1 +
tools/fastq/fastq_trimmer_by_quality.py | 121 ++++++++++
tools/fastq/fastq_trimmer_by_quality.xml | 112 +++++++++
7 files changed, 259 insertions(+), 0 deletions(-)
diffs (299 lines):
diff -r 61b09dc1dff2 -r 81de8629eeb6 test-data/sanger_full_range_quality_trimmed_out_1.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sanger_full_range_quality_trimmed_out_1.fastqsanger Thu Apr 08 15:25:38 2010 -0400
@@ -0,0 +1,8 @@
+@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
++
+56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
+CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
++
+~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:98765
diff -r 61b09dc1dff2 -r 81de8629eeb6 test-data/sanger_full_range_quality_trimmed_out_2.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sanger_full_range_quality_trimmed_out_2.fastqsanger Thu Apr 08 15:25:38 2010 -0400
@@ -0,0 +1,8 @@
+@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
++
+56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
+CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
++
+~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"!
diff -r 61b09dc1dff2 -r 81de8629eeb6 test-data/sanger_full_range_quality_trimmed_out_3.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sanger_full_range_quality_trimmed_out_3.fastqsanger Thu Apr 08 15:25:38 2010 -0400
@@ -0,0 +1,8 @@
+@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
++
+!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
+CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
++
+~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:98765
diff -r 61b09dc1dff2 -r 81de8629eeb6 tool_conf.xml.main
--- a/tool_conf.xml.main Wed Apr 07 11:12:00 2010 -0400
+++ b/tool_conf.xml.main Thu Apr 08 15:25:38 2010 -0400
@@ -295,6 +295,7 @@
<label text="Generic FASTQ manipulation" id="generic_fastq" />
<tool file="fastq/fastq_filter.xml" />
<tool file="fastq/fastq_trimmer.xml" />
+ <tool file="fastq/fastq_trimmer_by_quality.xml" />
<tool file="fastq/fastq_manipulation.xml" />
<tool file="fastq/fastq_to_fasta.xml" />
<tool file="fastq/fastq_to_tabular.xml" />
diff -r 61b09dc1dff2 -r 81de8629eeb6 tool_conf.xml.sample
--- a/tool_conf.xml.sample Wed Apr 07 11:12:00 2010 -0400
+++ b/tool_conf.xml.sample Thu Apr 08 15:25:38 2010 -0400
@@ -209,6 +209,7 @@
<label text="Generic FASTQ manipulation" id="generic_fastq" />
<tool file="fastq/fastq_filter.xml" />
<tool file="fastq/fastq_trimmer.xml" />
+ <tool file="fastq/fastq_trimmer_by_quality.xml" />
<tool file="fastq/fastq_manipulation.xml" />
<tool file="fastq/fastq_to_fasta.xml" />
<tool file="fastq/fastq_to_tabular.xml" />
diff -r 61b09dc1dff2 -r 81de8629eeb6 tools/fastq/fastq_trimmer_by_quality.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastq/fastq_trimmer_by_quality.py Thu Apr 08 15:25:38 2010 -0400
@@ -0,0 +1,121 @@
+#Dan Blankenberg
+from optparse import OptionParser
+from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
+
+def mean( score_list ):
+ return float( sum( score_list ) ) / float( len( score_list ) )
+
+ACTION_METHODS = { 'min':min, 'max':max, 'sum':sum, 'mean':mean }
+
+def compare( aggregated_value, operator, threshold_value ):
+ if operator == '>':
+ return aggregated_value > threshold_value
+ elif operator == '>=':
+ return aggregated_value >= threshold_value
+ elif operator == '==':
+ return aggregated_value == threshold_value
+ elif operator == '<':
+ return aggregated_value < threshold_value
+ elif operator == '<=':
+ return aggregated_value <= threshold_value
+ elif operator == '!=':
+ return aggregated_value != threshold_value
+
+def exclude( value_list, exclude_indexes ):
+ rval = []
+ for i, val in enumerate( value_list ):
+ if i not in exclude_indexes:
+ rval.append( val )
+ return rval
+
+def exclude_and_compare( aggregate_action, aggregate_list, operator, threshold_value, exclude_indexes = None ):
+ if not aggregate_list or compare( aggregate_action( aggregate_list ), operator, threshold_value ):
+ return True
+ if exclude_indexes:
+ for exclude_index in exclude_indexes:
+ excluded_list = exclude( aggregate_list, exclude_index )
+ if not excluded_list or compare( aggregate_action( excluded_list ), operator, threshold_value ):
+ return True
+ return False
+
+def main():
+ usage = "usage: %prog [options] input_file output_file"
+ parser = OptionParser( usage=usage )
+ parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'cssanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
+ parser.add_option( '-s', '--window_size', type="int", dest='window_size', default='1', help='Window size' )
+ parser.add_option( '-t', '--window_step', type="int", dest='window_step', default='1', help='Window step' )
+ parser.add_option( '-e', '--trim_ends', type="choice", dest='trim_ends', default='53', choices=('5','3','53','35' ), help='Ends to Trim' )
+ parser.add_option( '-a', '--aggregation_action', type="choice", dest='aggregation_action', default='min', choices=('min','max','sum','mean' ), help='Aggregate action for window' )
+ parser.add_option( '-x', '--exclude_count', type="int", dest='exclude_count', default='0', help='Maximum number of bases to exclude from the window during aggregation' )
+ parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='>=', choices=('>','>=','==','<', '<=', '!=' ), help='Keep read when aggregate score is' )
+ parser.add_option( '-q', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
+ parser.add_option( "-k", "--keep_zero_length", action="store_true", dest="keep_zero_length", default=False, help="Keep reads with zero length")
+ ( options, args ) = parser.parse_args()
+
+ if len ( args ) != 2:
+ parser.error( "Need to specify an input file and an output file" )
+
+ #determine an exhaustive list of window indexes that can be excluded from aggregation
+ exclude_window_indexes = []
+ last_exclude_indexes = []
+ for exclude_count in range( min( options.exclude_count, options.window_size ) ):
+ if last_exclude_indexes:
+ new_exclude_indexes = []
+ for exclude_list in last_exclude_indexes:
+ for window_index in range( options.window_size ):
+ if window_index not in exclude_list:
+ new_exclude = sorted( exclude_list + [ window_index ] )
+ if new_exclude not in exclude_window_indexes + new_exclude_indexes:
+ new_exclude_indexes.append( new_exclude )
+ exclude_window_indexes += new_exclude_indexes
+ last_exclude_indexes = new_exclude_indexes
+ else:
+ for window_index in range( options.window_size ):
+ last_exclude_indexes.append( [ window_index ] )
+ exclude_window_indexes = list( last_exclude_indexes )
+
+ out = fastqWriter( open( args[1], 'wb' ), format = options.format )
+ action = ACTION_METHODS[ options.aggregation_action ]
+ window_step = abs( options.window_step )
+
+ num_reads = None
+ num_reads_excluded = 0
+ for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
+ for trim_end in options.trim_ends:
+ quality_list = fastq_read.get_decimal_quality_scores()
+ if trim_end == '5':
+ lwindow_position = 0 #left position of window
+ while True:
+ if lwindow_position >= len( quality_list ):
+ fastq_read.sequence = ''
+ fastq_read.quality = ''
+ break
+ if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ):
+ fastq_read = fastq_read.slice( lwindow_position, None )
+ break
+ lwindow_position += window_step
+ else:
+ rwindow_position = len( quality_list ) #right position of window
+ while True:
+ lwindow_position = rwindow_position - options.window_size #left position of window
+ if rwindow_position <= 0 or lwindow_position < 0:
+ fastq_read.sequence = ''
+ fastq_read.quality = ''
+ break
+ if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ):
+ fastq_read = fastq_read.slice( None, rwindow_position )
+ break
+ rwindow_position -= window_step
+ if options.keep_zero_length or len( fastq_read ):
+ out.write( fastq_read )
+ else:
+ num_reads_excluded += 1
+ out.close()
+ if num_reads is None:
+ print "No valid FASTQ reads could be processed."
+ else:
+ print "%i FASTQ reads were processed." % ( num_reads + 1 )
+ if num_reads_excluded:
+ print "%i reads of zero length were excluded from the output." % num_reads_excluded
+
+if __name__ == "__main__": main()
diff -r 61b09dc1dff2 -r 81de8629eeb6 tools/fastq/fastq_trimmer_by_quality.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastq/fastq_trimmer_by_quality.xml Thu Apr 08 15:25:38 2010 -0400
@@ -0,0 +1,112 @@
+<tool id="fastq_quality_trimmer" name="FASTQ Quality Trimmer" version="1.0.0">
+ <description>by sliding window</description>
+ <command interpreter="python">fastq_trimmer_by_quality.py '$input_file' '$output_file' -f '${input_file.extension[len( 'fastq' ):]}' -s '$window_size'
+ -t '$step_size' -e '$trim_ends' -a '$aggregation_action' -x '$exclude_count' -c '$score_comparison' -q '$quality_score'
+ #if $keep_zero_length.value:
+ -k
+ #end if
+ </command>
+ <inputs>
+ <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
+ <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
+ <param name="trim_ends" type="select" label="Trim ends">
+ <option value="53" selected="True">5' and 3'</option>
+ <option value="5">5' only</option>
+ <option value="3">3' only</option>
+ </param>
+ <param name="window_size" type="integer" value="1" label="Window size"/>
+ <param name="step_size" type="integer" value="1" label="Step Size" />
+ <param name="exclude_count" label="Maximum number of bases to exclude from the window during aggregation" value="0" type="integer" />
+ <param name="aggregation_action" type="select" label="Aggregate action for window">
+ <option value="min" selected="True">min score</option>
+ <option value="max">max score</option>
+ <option value="sum">sum of scores</option>
+ <option value="mean">mean of scores</option>
+ </param>
+ <param name="score_comparison" type="select" label="Trim until aggregate score is">
+ <sanitizer>
+ <valid initial="none">
+ <add value="<>=!"/> <!-- only allow lt, gt, e, le, ge, ne for this parameter; will be single-quote escaped on commandline -->
+ </valid>
+ </sanitizer>
+ <option value=">">></option>
+ <option value=">=" selected="true">>=</option>
+ <option value="==">==</option>
+ <option value="!=">!=</option>
+ <option value="<"><</option>
+ <option value="<="><=</option>
+ </param>
+ <param name="quality_score" label="Quality Score" value="0" type="float" />
+ </inputs>
+ <outputs>
+ <data name="output_file" format="input" />
+ </outputs>
+ <tests>
+ <test>
+ <!-- Trim until window size 1 >= 20;both ends -->
+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+ <param name="keep_zero_length" value="exclude_zero_length" />
+ <param name="trim_ends" value="53"/>
+ <param name="window_size" value="1"/>
+ <param name="step_size" value="1"/>
+ <param name="exclude_count" value="0"/>
+ <param name="aggregation_action" value="min"/>
+ <param name="score_comparison" value=">="/>
+ <param name="quality_score" value="20"/>
+ <output name="output_file" file="sanger_full_range_quality_trimmed_out_1.fastqsanger" />
+ </test>
+ <test>
+ <!-- Trim until window size 1 >= 20; 5' end only -->
+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+ <param name="keep_zero_length" value="exclude_zero_length" />
+ <param name="trim_ends" value="5"/>
+ <param name="window_size" value="1"/>
+ <param name="step_size" value="1"/>
+ <param name="exclude_count" value="0"/>
+ <param name="aggregation_action" value="min"/>
+ <param name="score_comparison" value=">="/>
+ <param name="quality_score" value="20"/>
+ <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" />
+ </test>
+ <test>
+ <!-- Trim until window size 1 >= 20; 3' end only -->
+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+ <param name="keep_zero_length" value="exclude_zero_length" />
+ <param name="trim_ends" value="3"/>
+ <param name="window_size" value="1"/>
+ <param name="step_size" value="1"/>
+ <param name="exclude_count" value="0"/>
+ <param name="aggregation_action" value="min"/>
+ <param name="score_comparison" value=">="/>
+ <param name="quality_score" value="20"/>
+ <output name="output_file" file="sanger_full_range_quality_trimmed_out_3.fastqsanger" />
+ </test>
+ <test>
+ <!-- Trim until window size 2 >= 1;both ends, 1 deviant score -->
+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+ <param name="keep_zero_length" value="exclude_zero_length" />
+ <param name="trim_ends" value="53"/>
+ <param name="window_size" value="2"/>
+ <param name="step_size" value="1"/>
+ <param name="exclude_count" value="1"/>
+ <param name="aggregation_action" value="min"/>
+ <param name="score_comparison" value=">="/>
+ <param name="quality_score" value="1"/>
+ <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
+ </test>
+ </tests>
+ <help>
+This tool allows you to trim the ends of reads based upon the aggregate value of quality scores found within a sliding window; a sliding window of size 1 is equivalent to 'simple' trimming of the ends.
+
+The user specifies the aggregating action (min, max, sum, mean) to perform on the quality score values found within the sliding window to be used with the user defined comparison operation and comparison value.
+
+The user can provide a maximum count of bases that can be excluded from the aggregation within the window. When set, this tool will first check the aggregation of the entire window, then after removing 1 value, then after removing 2 values, up to the number declared. Setting this value to be equal to or greater than the window size will cause no trimming to occur.
+
+-----
+
+.. class:: warningmark
+
+Trimming a color space read will cause any adapter base to be lost.
+
+ </help>
+</tool>
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/61b09dc1dff2
changeset: 3619:61b09dc1dff2
user: rc
date: Wed Apr 07 11:12:00 2010 -0400
description:
sff converter tool
diffstat:
tool_conf.xml.sample | 1 +
tools/filters/sff_extract.py | 1505 +++++++++++++++++++++++++++++++++++++++
tools/filters/sff_extractor.xml | 22 +
3 files changed, 1528 insertions(+), 0 deletions(-)
diffs (1546 lines):
diff -r ebfc9236bf5a -r 61b09dc1dff2 tool_conf.xml.sample
--- a/tool_conf.xml.sample Wed Apr 07 08:44:57 2010 -0400
+++ b/tool_conf.xml.sample Wed Apr 07 11:12:00 2010 -0400
@@ -78,6 +78,7 @@
<tool file="fasta_tools/tabular_to_fasta.xml" />
<tool file="fastx_toolkit/fastq_to_fasta.xml" />
<tool file="filters/wiggle_to_simple.xml" />
+ <tool file="filters/sff_extractor.xml" />
</section>
<section name="Extract Features" id="features">
<tool file="filters/ucsc_gene_bed_to_exon_bed.xml" />
diff -r ebfc9236bf5a -r 61b09dc1dff2 tools/filters/sff_extract.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/filters/sff_extract.py Wed Apr 07 11:12:00 2010 -0400
@@ -0,0 +1,1505 @@
+#!/usr/bin/python
+'''This software extracts the seq, qual and ancillary information from an sff
+file, like the ones used by the 454 sequencer.
+
+Optionally, it can also split paired-end reads if given the linker sequence.
+The splitting is done with maximum match, i.e., every occurence of the linker
+sequence will be removed, even if occuring multiple times.'''
+
+#copyright Jose Blanca and Bastien Chevreux
+#COMAV institute, Universidad Politecnica de Valencia (UPV)
+#Valencia, Spain
+
+# additions to handle paired end reads by Bastien Chevreux
+# bugfixes for linker specific lengths: Lionel Guy
+
+#This program is free software: you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation, either version 3 of the License, or
+#(at your option) any later version.
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#GNU General Public License for more details.
+#You should have received a copy of the GNU General Public License
+#along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+__author__ = 'Jose Blanca and Bastien Chevreux'
+__copyright__ = 'Copyright 2008, Jose Blanca, COMAV, and Bastien Chevreux'
+__license__ = 'GPLv3 or later'
+__version__ = '0.2.8'
+__email__ = 'jblanca(a)btc.upv.es'
+__status__ = 'beta'
+
+import struct
+import sys
+import os
+import subprocess
+import tempfile
+
+
+fake_sff_name = 'fake_sff_name'
+
+
+# readname as key: lines with matches from SSAHA, one best match
+ssahapematches = {}
+# linker readname as key: length of linker sequence
+linkerlengths = {}
+
+# set to true if something really fishy is going on with the sequences
+stern_warning = True
+
+def read_bin_fragment(struct_def, fileh, offset=0, data=None,
+ byte_padding=None):
+ '''It reads a chunk of a binary file.
+
+ You have to provide the struct, a file object, the offset (where to start
+ reading).
+ Also you can provide an optional dict that will be populated with the
+ extracted data.
+ If a byte_padding is given the number of bytes read will be a multiple of
+ that number, adding the required pad at the end.
+ It returns the number of bytes reads and the data dict.
+ '''
+ if data is None:
+ data = {}
+
+ #we read each item
+ bytes_read = 0
+ for item in struct_def:
+ #we go to the place and read
+ fileh.seek(offset + bytes_read)
+ n_bytes = struct.calcsize(item[1])
+ buffer = fileh.read(n_bytes)
+ read = struct.unpack('>' + item[1], buffer)
+ if len(read) == 1:
+ read = read[0]
+ data[item[0]] = read
+ bytes_read += n_bytes
+
+ #if there is byte_padding the bytes_to_read should be a multiple of the
+ #byte_padding
+ if byte_padding is not None:
+ pad = byte_padding
+ bytes_read = ((bytes_read + pad - 1) // pad) * pad
+
+ return (bytes_read, data)
+
+
+def check_magic(magic):
+ '''It checks that the magic number of the file matches the sff magic.'''
+ if magic != 779314790:
+ raise RuntimeError('This file does not seems to be an sff file.')
+
+def check_version(version):
+ '''It checks that the version is supported, otherwise it raises an error.'''
+ supported = ('\x00', '\x00', '\x00', '\x01')
+ i = 0
+ for item in version:
+ if version[i] != supported[i]:
+ raise RuntimeError('SFF version not supported. Please contact the author of the software.')
+ i += 1
+
+def read_header(fileh):
+ '''It reads the header from the sff file and returns a dict with the
+ information'''
+ #first we read the first part of the header
+ head_struct = [
+ ('magic_number', 'I'),
+ ('version', 'cccc'),
+ ('index_offset', 'Q'),
+ ('index_length', 'I'),
+ ('number_of_reads', 'I'),
+ ('header_length', 'H'),
+ ('key_length', 'H'),
+ ('number_of_flows_per_read', 'H'),
+ ('flowgram_format_code', 'B'),
+ ]
+ data = {}
+ first_bytes, data = read_bin_fragment(struct_def=head_struct, fileh=fileh,
+ offset=0, data=data)
+ check_magic(data['magic_number'])
+ check_version(data['version'])
+ #now that we know the number_of_flows_per_read and the key_length
+ #we can read the second part of the header
+ struct2 = [
+ ('flow_chars', str(data['number_of_flows_per_read']) + 'c'),
+ ('key_sequence', str(data['key_length']) + 'c')
+ ]
+ read_bin_fragment(struct_def=struct2, fileh=fileh, offset=first_bytes,
+ data=data)
+ return data
+
+
+def read_sequence(header, fileh, fposition):
+ '''It reads one read from the sff file located at the fposition and
+ returns a dict with the information.'''
+ header_length = header['header_length']
+ index_offset = header['index_offset']
+ index_length = header['index_length']
+
+ #the sequence struct
+ read_header_1 = [
+ ('read_header_length', 'H'),
+ ('name_length', 'H'),
+ ('number_of_bases', 'I'),
+ ('clip_qual_left', 'H'),
+ ('clip_qual_right', 'H'),
+ ('clip_adapter_left', 'H'),
+ ('clip_adapter_right', 'H'),
+ ]
+ def read_header_2(name_length):
+ '''It returns the struct definition for the second part of the header'''
+ return [('name', str(name_length) +'c')]
+ def read_data(number_of_bases):
+ '''It returns the struct definition for the read data section.'''
+ #size = {'c': 1, 'B':1, 'H':2, 'I':4, 'Q':8}
+ if header['flowgram_format_code'] == 1:
+ flow_type = 'H'
+ else:
+ raise Error('file version not supported')
+ number_of_bases = str(number_of_bases)
+ return [
+ ('flowgram_values', str(header['number_of_flows_per_read']) +
+ flow_type),
+ ('flow_index_per_base', number_of_bases + 'B'),
+ ('bases', number_of_bases + 'c'),
+ ('quality_scores', number_of_bases + 'B'),
+ ]
+
+ data = {}
+ #we read the first part of the header
+ bytes_read, data = read_bin_fragment(struct_def=read_header_1,
+ fileh=fileh, offset=fposition, data=data)
+
+ read_bin_fragment(struct_def=read_header_2(data['name_length']),
+ fileh=fileh, offset=fposition + bytes_read, data=data)
+ #we join the letters of the name
+ data['name'] = ''.join(data['name'])
+ offset = data['read_header_length']
+ #we read the sequence and the quality
+ read_data_st = read_data(data['number_of_bases'])
+ bytes_read, data = read_bin_fragment(struct_def=read_data_st,
+ fileh=fileh, offset=fposition + offset,
+ data=data, byte_padding=8)
+ #we join the bases
+ data['bases'] = ''.join(data['bases'])
+
+ #print data
+ #print "pre cqr: ", data['clip_qual_right']
+ #print "pre car: ", data['clip_adapter_right']
+ #print "pre cql: ", data['clip_qual_left']
+ #print "pre cal: ", data['clip_adapter_left']
+
+ # correct for the case the right clip is <= than the left clip
+ # in this case, left clip is 0 are set to 0 (right clip == 0 means
+ # "whole sequence")
+ if data['clip_qual_right'] <= data['clip_qual_left'] :
+ data['clip_qual_right'] = 0
+ data['clip_qual_left'] = 0
+ if data['clip_adapter_right'] <= data['clip_adapter_left'] :
+ data['clip_adapter_right'] = 0
+ data['clip_adapter_left'] = 0
+
+ #the clipping section follows the NCBI's guidelines Trace Archive RFC
+ #http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=rfc&m=doc&s=rfc
+ #if there's no adapter clip: qual -> vector
+ #else: qual-> qual
+ # adapter -> vector
+
+ if not data['clip_adapter_left']:
+ data['clip_adapter_left'], data['clip_qual_left'] = data['clip_qual_left'], data['clip_adapter_left']
+ if not data['clip_adapter_right']:
+ data['clip_adapter_right'], data['clip_qual_right'] = data['clip_qual_right'], data['clip_adapter_right']
+
+ # see whether we have to override the minimum left clips
+ if config['min_leftclip'] > 0:
+ if data['clip_adapter_left'] >0 and data['clip_adapter_left'] < config['min_leftclip']:
+ data['clip_adapter_left'] = config['min_leftclip']
+ if data['clip_qual_left'] >0 and data['clip_qual_left'] < config['min_leftclip']:
+ data['clip_qual_left'] = config['min_leftclip']
+
+
+ #print "post cqr: ", data['clip_qual_right']
+ #print "post car: ", data['clip_adapter_right']
+ #print "post cql: ", data['clip_qual_left']
+ #print "post cal: ", data['clip_adapter_left']
+
+
+ # for handling the -c (clip) option gently, we already clip here
+ # and set all clip points to the sequence end points
+ if config['clip']:
+ data['bases'], data['quality_scores'] = clip_read(data)
+
+ data['number_of_bases']=len(data['bases'])
+ data['clip_qual_right'] = data['number_of_bases']
+ data['clip_adapter_right'] = data['number_of_bases']
+ data['clip_qual_left'] = 0
+ data['clip_adapter_left'] = 0
+
+ return data['read_header_length'] + bytes_read, data
+
+
+def sequences(fileh, header):
+ '''It returns a generator with the data for each read.'''
+ #now we can read all the sequences
+ fposition = header['header_length'] #position in the file
+ reads_read = 0
+ while True:
+ if fposition == header['index_offset']:
+ #we have to skip the index section
+ fposition += index_length
+ continue
+ else:
+ bytes_read, seq_data = read_sequence(header=header, fileh=fileh,
+ fposition=fposition)
+ yield seq_data
+ fposition += bytes_read
+ reads_read += 1
+ if reads_read >= header['number_of_reads']:
+ break
+
+
+def remove_last_xmltag_in_file(fname, tag=None):
+ '''Given an xml file name and a tag, it removes the last tag of the
+ file if it matches the given tag. Tag removal is performed via file
+ truncation.
+
+ It the given tag is not the last in the file, a RunTimeError will be
+ raised.
+
+ The resulting xml file will be not xml valid. This function is a hack
+ that allows to append records to xml files in a quick and dirty way.
+ '''
+
+ fh = open(fname, 'r+')
+ #we have to read from the end to the start of the file and keep the
+ #string enclosed by </ >
+ i = -1
+ last_tag = [] #the chars that form the last tag
+ start_offset = None #in which byte does the last tag starts?
+ end_offset = None #in which byte does the last tag ends?
+ while True:
+ fh.seek(i, 2)
+ char = fh.read(1)
+ if not char.isspace():
+ last_tag.append(char)
+ if char == '>':
+ end_offset = i
+ if char == '<':
+ start_offset = i
+ break
+ i -= 1
+
+ #we have read the last tag backwards
+ last_tag = ''.join(last_tag[::-1])
+ #we remove the </ and >
+ last_tag = last_tag.rstrip('>').lstrip('</')
+
+ #we check that we're removing the asked tag
+ if tag is not None and tag != last_tag:
+ raise RuntimeError("The given xml tag wasn't the last one in the file")
+
+ # while we are at it: also remove all white spaces in that line :-)
+ i -= 1
+ while True:
+ fh.seek(i, 2)
+ char = fh.read(1)
+ if not char == ' ' and not char == '\t':
+ break;
+ if fh.tell() == 1:
+ break;
+ i -= 1
+
+ fh.truncate();
+
+ fh.close()
+ return last_tag
+
+
+def create_basic_xml_info(readname, fname):
+ '''Formats a number of read specific infos into XML format.
+ Currently formated: name and the tags set from command line
+ '''
+ to_print = [' <trace>\n']
+ to_print.append(' <trace_name>')
+ to_print.append(readname)
+ to_print.append('</trace_name>\n')
+
+ #extra information
+ #do we have extra info for this file?
+ info = None
+ if config['xml_info']:
+ #with this name?
+ if fname in config['xml_info']:
+ info = config['xml_info'][fname]
+ else:
+ #with no name?
+ try:
+ info = config['xml_info'][fake_sff_name]
+ except KeyError:
+ pass
+ #we print the info that we have
+ if info:
+ for key in info:
+ to_print.append(' <' + key + '>' + info[key] + \
+ '</' + key +'>\n')
+
+ return ''.join(to_print)
+
+
+def create_clip_xml_info(readlen, adapl, adapr, quall, qualr):
+ '''Takes the clip values of the read and formats them into XML
+ Corrects "wrong" values that might have resulted through
+ simplified calculations earlier in the process of conversion
+ (especially during splitting of paired-end reads)
+ '''
+
+ to_print = [""]
+
+ # if right borders are >= to read length, they don't need
+ # to be printed
+ if adapr >= readlen:
+ adapr = 0
+ if qualr >= readlen:
+ qualr = 0
+
+ # BaCh
+ # when called via split_paired_end(), some values may be < 0
+ # (when clip values were 0 previously)
+ # instead of putting tons of if clauses for different calculations there,
+ # I centralise corrective measure here
+ # set all values <0 to 0
+
+ if adapr < 0:
+ adapr = 0
+ if qualr <0:
+ qualr = 0
+ if adapl < 0:
+ adapl = 0
+ if quall <0:
+ quall = 0
+
+ if quall:
+ to_print.append(' <clip_quality_left>')
+ to_print.append(str(quall))
+ to_print.append('</clip_quality_left>\n')
+ if qualr:
+ to_print.append(' <clip_quality_right>')
+ to_print.append(str(qualr))
+ to_print.append('</clip_quality_right>\n')
+ if adapl:
+ to_print.append(' <clip_vector_left>')
+ to_print.append(str(adapl))
+ to_print.append('</clip_vector_left>\n')
+ if adapr:
+ to_print.append(' <clip_vector_right>')
+ to_print.append(str(adapr))
+ to_print.append('</clip_vector_right>\n')
+ return ''.join(to_print)
+
+
+def create_xml_for_unpaired_read(data, fname):
+ '''Given the data for one read it returns an str with the xml ancillary
+ data.'''
+ to_print = [create_basic_xml_info(data['name'],fname)]
+ #clippings in the XML only if we do not hard clip
+ if not config['clip']:
+ to_print.append(create_clip_xml_info(data['number_of_bases'],data['clip_adapter_left'], data['clip_adapter_right'], data['clip_qual_left'], data['clip_qual_right']));
+ to_print.append(' </trace>\n')
+ return ''.join(to_print)
+
+
+def format_as_fasta(name,seq,qual):
+ name_line = ''.join(('>', name,'\n'))
+ seqstring = ''.join((name_line, seq, '\n'))
+ qual_line = ' '.join([str(q) for q in qual])
+ qualstring = ''.join((name_line, qual_line, '\n'))
+ return seqstring, qualstring
+
+def format_as_fastq(name,seq,qual):
+ qual_line = ''.join([chr(q+33) for q in qual])
+ #seqstring = ''.join(('@', name,'\n', seq, '\n+', name,'\n', qual_line, '\n'))
+ seqstring = ''.join(('@', name,'\n', seq, '\n+\n', qual_line, '\n'))
+ return seqstring
+
+
+def get_read_data(data):
+ '''Given the data for one read it returns 2 strs with the fasta seq
+ and fasta qual.'''
+ #seq and qual
+ if config['mix_case']:
+ seq = sequence_case(data)
+ qual = data['quality_scores']
+ else :
+ seq = data['bases']
+ qual = data['quality_scores']
+
+ return seq, qual
+
+def extract_read_info(data, fname):
+ '''Given the data for one read it returns 3 strs with the fasta seq, fasta
+ qual and xml ancillary data.'''
+
+ seq,qual = get_read_data(data)
+ seqstring, qualstring = format_as_fasta(data['name'],seq,qual)
+
+ #name_line = ''.join(('>', data['name'],'\n'))
+ #seq = ''.join((name_line, seq, '\n'))
+ #qual_line = ' '.join([str(q) for q in qual])
+ #qual = ''.join((name_line, qual_line, '\n'))
+
+ xmlstring = create_xml_for_unpaired_read(data, fname)
+
+ return seqstring, qualstring, xmlstring
+
+def write_sequence(name,seq,qual,seq_fh,qual_fh):
+ '''Write sequence and quality FASTA and FASTA qual filehandles
+ (or into FASTQ and XML)
+ if sequence length is 0, don't write'''
+
+ if len(seq) == 0 : return
+
+ if qual_fh is None:
+ seq_fh.write(format_as_fastq(name,seq,qual))
+ else:
+ seqstring, qualstring = format_as_fasta(name,seq,qual)
+ seq_fh.write(seqstring)
+ qual_fh.write(qualstring)
+ return
+
+def write_unpaired_read(data, sff_fh, seq_fh, qual_fh, xml_fh):
+ '''Writes an unpaired read into FASTA, FASTA qual and XML filehandles
+ (or into FASTQ and XML)
+ if sequence length is 0, don't write'''
+
+ seq,qual = get_read_data(data)
+ if len(seq) == 0 : return
+
+ write_sequence(data['name'],seq,qual,seq_fh,qual_fh)
+
+ anci = create_xml_for_unpaired_read(data, sff_fh.name)
+ if anci is not None:
+ xml_fh.write(anci)
+ return
+
+
+def reverse_complement(seq):
+ '''Returns the reverse complement of a DNA sequence as string'''
+
+ compdict = {
+ 'a': 't',
+ 'c': 'g',
+ 'g': 'c',
+ 't': 'a',
+ 'u': 't',
+ 'm': 'k',
+ 'r': 'y',
+ 'w': 'w',
+ 's': 's',
+ 'y': 'r',
+ 'k': 'm',
+ 'v': 'b',
+ 'h': 'd',
+ 'd': 'h',
+ 'b': 'v',
+ 'x': 'x',
+ 'n': 'n',
+ 'A': 'T',
+ 'C': 'G',
+ 'G': 'C',
+ 'T': 'A',
+ 'U': 'T',
+ 'M': 'K',
+ 'R': 'Y',
+ 'W': 'W',
+ 'S': 'S',
+ 'Y': 'R',
+ 'K': 'M',
+ 'V': 'B',
+ 'H': 'D',
+ 'D': 'H',
+ 'B': 'V',
+ 'X': 'X',
+ 'N': 'N',
+ '*': '*'
+ }
+
+ complseq = ''.join([compdict[base] for base in seq])
+ # python hack to reverse a list/string/etc
+ complseq = complseq[::-1]
+ return complseq
+
+
+def mask_sequence(seq, maskchar, fpos, tpos):
+ '''Given a sequence, mask it with maskchar starting at fpos (including) and
+ ending at tpos (excluding)
+ '''
+
+ if len(maskchar) > 1:
+ raise RuntimeError("Internal error: more than one character given to mask_sequence")
+ if fpos<0:
+ fpos = 0
+ if tpos > len(seq):
+ tpos = len(seq)
+
+ newseq = ''.join((seq[:fpos],maskchar*(tpos-fpos), seq[tpos:]))
+
+ return newseq
+
+
+def fragment_sequences(sequence, qualities, splitchar):
+ '''Works like split() on strings, except it does this on a sequence
+ and the corresponding list with quality values.
+ Returns a tuple for each fragment, each sublist has the fragment
+ sequence as first and the fragment qualities as second elemnt'''
+
+ # this is slow (due to zip and list appends... use an iterator over
+ # the sequence find find variations and splices on seq and qual
+
+ if len(sequence) != len(qualities):
+ print sequence, qualities
+ raise RuntimeError("Internal error: length of sequence and qualities don't match???")
+
+ retlist = ([])
+ if len(sequence) == 0:
+ return retlist
+
+ actseq = ([])
+ actqual = ([])
+ if sequence[0] != splitchar:
+ inseq = True
+ else:
+ inseq = False
+ for char,qual in zip(sequence,qualities):
+ if inseq:
+ if char != splitchar:
+ actseq.append(char)
+ actqual.append(qual)
+ else:
+ retlist.append((''.join(actseq), actqual))
+ actseq = ([])
+ actqual = ([])
+ inseq = False
+ else:
+ if char != splitchar:
+ inseq = True
+ actseq.append(char)
+ actqual.append(qual)
+
+ if inseq and len(actseq):
+ retlist.append((''.join(actseq), actqual))
+
+ return retlist
+
+
+def calc_subseq_boundaries(maskedseq, maskchar):
+ '''E.g.:
+ ........xxxxxxxx..........xxxxxxxxxxxxxxxxxxxxx.........
+ to
+ (0,8),(8,16),(16,26),(26,47),(47,56)
+ '''
+
+ blist = ([])
+ if len(maskedseq) == 0:
+ return blist
+
+ inmask = True
+ if maskedseq[0] != maskchar:
+ inmask = False
+
+ start = 0
+ for spos in range(len(maskedseq)):
+ if inmask and maskedseq[spos] != maskchar:
+ blist.append(([start,spos]))
+ start = spos
+ inmask = False
+ elif not inmask and maskedseq[spos] == maskchar:
+ blist.append(([start,spos]))
+ start = spos
+ inmask = True
+
+ blist.append(([start,spos+1]))
+
+ return blist
+
+
+def correct_for_smallhits(maskedseq, maskchar, linkername):
+ '''If partial hits were found, take preventive measure: grow
+ the masked areas by 20 bases in each direction
+ Returns either unchanged "maskedseq" or a new sequence
+ with some more characters masked.
+ '''
+ global linkerlengths
+
+ CEBUG = 0
+
+ if CEBUG : print "correct_for_smallhits"
+ if CEBUG : print "Masked seq\n", maskedseq
+ if CEBUG : print "Linkername: ", linkername
+
+ if len(maskedseq) == 0:
+ return maskedseq
+
+ growl=40
+ growl2=growl/2
+
+ boundaries = calc_subseq_boundaries(maskedseq,maskchar)
+ if CEBUG : print "Boundaries: ", boundaries
+
+ foundpartial = False
+ for bounds in boundaries:
+ if CEBUG : print "\tbounds: ", bounds
+ left, right = bounds
+ if left != 0 and right != len(maskedseq):
+ if maskedseq[left] == maskchar:
+ # allow 10% discrepancy
+ # -linkerlengths[linkername]/10
+ # that's a kind of safety net if there are slight sequencing
+ # errors in the linker itself
+ if right-left < linkerlengths[linkername]-linkerlengths[linkername]/10:
+ if CEBUG : print "\t\tPartial: found " + str(right-left) + " gaps, " + linkername + " is " + str(linkerlengths[linkername]) + " nt long."
+ foundpartial = True
+
+ if not foundpartial:
+ return maskedseq
+
+ # grow
+ newseq = ""
+ for bounds in boundaries:
+ if CEBUG : print "Bounds: ", bounds
+ left, right = bounds
+ if maskedseq[left] == maskchar:
+ newseq += maskedseq[left:right]
+ else:
+ clearstart = 0
+ if left > 0 :
+ clearstart = left+growl2
+ clearstop = len(maskedseq)
+ if right < len(maskedseq):
+ clearstop = right-growl2
+
+ if CEBUG : print "clearstart, clearstop: ",clearstart, clearstop
+
+ if clearstop <= clearstart:
+ newseq += maskchar * (right-left)
+ else:
+ if clearstart != left:
+ newseq += maskchar * growl2
+ newseq += maskedseq[clearstart:clearstop]
+ if clearstop != right:
+ newseq += maskchar * growl2
+
+ #print "newseq\n",newseq
+
+ return newseq
+
+
+def split_paired_end(data, sff_fh, seq_fh, qual_fh, xml_fh):
+ '''Splits a paired end read and writes sequences into FASTA, FASTA qual
+ and XML traceinfo file. Returns the number of sequences created.
+
+ As the linker sequence may be anywhere in the read, including the ends
+ and overlapping with bad quality sequence, we need to perform some
+ computing and eventually set new clip points.
+
+ If the resulting split yields only one sequence (because linker
+ was not present or overlapping with left or right clip), only one
+ sequence will be written with ".fn" appended to the name.
+
+ If the read can be split, two reads will be written. The side left of
+ the linker will be named ".r" and will be written in reverse complement
+ into the file to conform with what approximately all assemblers expect
+ when reading paired-end data: reads in forward direction in file. The side
+ right of the linker will be named ".f"
+
+ If SSAHA found partial linker (linker sequences < length of linker),
+ the sequences will get a "_pl" furthermore be cut back thoroughly.
+
+ If SSAHA found multiple occurences of the linker, the names will get an
+ additional "_mlc" within the name to show that there was "multiple
+ linker contamination".
+
+ For multiple or partial linker, the "good" parts of the reads are
+ stored with a ".part<number>" name, additionally they will not get
+ template information in the XML
+ '''
+
+ global ssahapematches
+
+ CEBUG = 0
+
+ maskchar = "#"
+
+ if CEBUG : print "Need to split: " + data['name']
+
+ numseqs = 0;
+ readname = data['name']
+ readlen = data['number_of_bases']
+
+ leftclip, rightclip = return_merged_clips(data)
+ seq, qual = get_read_data(data)
+
+ if CEBUG : print "Original read:\n",seq
+
+ maskedseq = seq
+ if leftclip > 0:
+ maskedseq = mask_sequence(maskedseq, maskchar, 0, leftclip-1)
+ if rightclip < len(maskedseq):
+ maskedseq = mask_sequence(maskedseq, maskchar, rightclip, len(maskedseq))
+
+ leftclip, rightclip = return_merged_clips(data)
+ readlen = data['number_of_bases']
+
+ if CEBUG : print "Readname:", readname
+ if CEBUG : print "Readlen:", readlen
+ if CEBUG : print "Num matches:", str(len(ssahapematches[data['name']]))
+ if CEBUG : print "matches:", ssahapematches[data['name']]
+
+ for match in ssahapematches[data['name']]:
+ score = int(match[0])
+ linkername = match[2]
+ leftreadhit = int(match[3])
+ rightreadhit = int(match[4])
+ #leftlinkerhit = int(match[5])
+ #rightlinkerhit = int(match[6])
+ #direction = match[7]
+ #hitlen = int(match[8])
+ #hitidentity = float(match[9])
+
+ if CEBUG : print match
+ if CEBUG : print "Match with score:", score
+ if CEBUG : print "Read before:\n", maskedseq
+ maskedseq = mask_sequence(maskedseq, maskchar, leftreadhit-1, rightreadhit)
+ if CEBUG : print "Masked seq:\n", maskedseq
+
+ correctedseq = correct_for_smallhits(maskedseq, maskchar, linkername)
+
+ if len(maskedseq) != len(correctedseq):
+ raise RuntimeError("Internal error: maskedseq != correctedseq")
+
+ partialhits = False
+ if correctedseq != maskedseq:
+ if CEBUG : print "Partial hits in", readname
+ if CEBUG : print "Original seq:\n", seq
+ if CEBUG : print "Masked seq:\n", maskedseq
+ if CEBUG : print "Corrected seq\n", correctedseq
+ partialhits = True
+ readname += "_pl"
+ maskedseq = correctedseq
+
+ fragments = fragment_sequences(maskedseq, qual, maskchar)
+
+ if CEBUG : print "Fragments (", len(fragments), "): ", fragments
+
+ mlcflag = False
+ #if len(ssahapematches[data['name']]) > 1:
+ # #print "Multi linker contamination"
+ # mlcflag = True
+ # readname += "_mlc"
+
+ if len(fragments) > 2:
+ if CEBUG : print "Multi linker contamination"
+ mlcflag = True
+ readname += "_mlc"
+
+
+ #print fragments
+ if mlcflag or partialhits:
+ fragcounter = 1
+ readname += ".part"
+ for frag in fragments:
+ actseq = frag[0]
+ if len(actseq) >= 20:
+ actqual = frag[1]
+ oname = readname + str(fragcounter)
+ #seq_fh.write(">"+oname+"\n")
+ #seq_fh.write(actseq+"\n")
+ #qual_fh.write(">"+oname+"\n")
+ #qual_fh.write(' '.join((str(q) for q in actqual)))
+ #qual_fh.write("\n")
+ write_sequence(oname,actseq,actqual,seq_fh,qual_fh)
+ to_print = [create_basic_xml_info(oname,sff_fh.name)]
+ # No clipping in XML ... the multiple and partial fragments
+ # are clipped "hard"
+ # No template ID and trace_end: we don't know the
+ # orientation of the frahments. Even if it were
+ # only two, the fact we had multiple linkers
+ # says something went wrong, so simply do not
+ # write any paired-end information for all these fragments
+ to_print.append(' </trace>\n')
+ xml_fh.write(''.join(to_print))
+ numseqs += 1
+ fragcounter += 1
+ else:
+ if len(fragments) >2:
+ raise RuntimeError("Unexpected: more than two fragments detected in " + readname + ". please contact the authors.")
+ # nothing will happen for 0 fragments
+ if len(fragments) == 1:
+ #print "Tada1"
+ boundaries = calc_subseq_boundaries(maskedseq,maskchar)
+ if len(boundaries) < 1 or len(boundaries) >3:
+ raise RuntimeError("Unexpected case: ", str(len(boundaries)), "boundaries for 1 fragment of ", readname)
+ if len(boundaries) == 3:
+ # case: mask char on both sides of sequence
+ #print "bounds3"
+ data['clip_adapter_left']=1+boundaries[0][1]
+ data['clip_adapter_right']=boundaries[2][0]
+ elif len(boundaries) == 2:
+ # case: mask char left or right of sequence
+ #print "bounds2",
+ if maskedseq[0] == maskchar :
+ # case: mask char left
+ #print "left"
+ data['clip_adapter_left']=1+boundaries[0][1]
+ else:
+ # case: mask char right
+ #print "right"
+ data['clip_adapter_right']=boundaries[1][0]
+ data['name'] = data['name'] + ".fn"
+ write_unpaired_read(data, sff_fh, seq_fh, qual_fh, xml_fh)
+ numseqs = 1
+ elif len(fragments) == 2:
+ #print "Tada2"
+ oname = readname + ".r"
+ seq, qual = get_read_data(data)
+
+ startsearch = False
+ for spos in range(len(maskedseq)):
+ if maskedseq[spos] != maskchar:
+ startsearch = True;
+ else:
+ if startsearch:
+ break
+
+ #print "\nspos: ", spos
+ lseq=seq[:spos]
+ #print "lseq:", lseq
+ actseq = reverse_complement(lseq)
+ lreadlen = len(actseq)
+ lqual = qual[:spos];
+ # python hack to reverse a list/string/etc
+ lqual = lqual[::-1];
+
+ #seq_fh.write(">"+oname+"\n")
+ #seq_fh.write(actseq+"\n")
+ #qual_fh.write(">"+oname+"\n")
+ #qual_fh.write(' '.join((str(q) for q in lqual)))
+ #qual_fh.write("\n")
+
+ write_sequence(oname,actseq,lqual,seq_fh,qual_fh)
+
+ to_print = [create_basic_xml_info(oname,sff_fh.name)]
+ to_print.append(create_clip_xml_info(lreadlen, 0, lreadlen+1-data['clip_adapter_left'], 0, lreadlen+1-data['clip_qual_left']));
+ to_print.append(' <template_id>')
+ to_print.append(readname)
+ to_print.append('</template_id>\n')
+ to_print.append(' <trace_end>r</trace_end>\n')
+ to_print.append(' </trace>\n')
+ xml_fh.write(''.join(to_print))
+
+ oname = readname + ".f"
+ startsearch = False
+ for spos in range(len(maskedseq)-1,-1,-1):
+ if maskedseq[spos] != maskchar:
+ startsearch = True;
+ else:
+ if startsearch:
+ break
+
+ actseq = seq[spos+1:]
+ actqual = qual[spos+1:];
+
+ #print "\nspos: ", spos
+ #print "rseq:", actseq
+
+ #seq_fh.write(">"+oname+"\n")
+ #seq_fh.write(actseq+"\n")
+ #qual_fh.write(">"+oname+"\n")
+ #qual_fh.write(' '.join((str(q) for q in actqual)))
+ #qual_fh.write("\n")
+ write_sequence(oname,actseq,actqual,seq_fh,qual_fh)
+
+ rreadlen = len(actseq)
+ to_print = [create_basic_xml_info(oname,sff_fh.name)]
+ to_print.append(create_clip_xml_info(rreadlen, 0, rreadlen-(readlen-data['clip_adapter_right']), 0, rreadlen-(readlen-data['clip_qual_right'])));
+ to_print.append(' <template_id>')
+ to_print.append(readname)
+ to_print.append('</template_id>\n')
+ to_print.append(' <trace_end>f</trace_end>\n')
+ to_print.append(' </trace>\n')
+ xml_fh.write(''.join(to_print))
+ numseqs = 2
+
+ return numseqs
+
+
+
+def extract_reads_from_sff(config, sff_files):
+ '''Given the configuration and the list of sff_files it writes the seqs,
+ qualities and ancillary data into the output file(s).
+
+ If file for paired-end linker was given, first extracts all sequences
+ of an SFF and searches these against the linker(s) with SSAHA2 to
+ create needed information to split reads.
+ '''
+
+ global ssahapematches
+
+
+ if len(sff_files) == 0 :
+ raise RuntimeError("No SFF file given?")
+
+ #we go through all input files
+ for sff_file in sff_files:
+ if not os.path.getsize(sff_file):
+ raise RuntimeError('Empty file? : ' + sff_file)
+ fh = open(sff_file, 'r')
+ fh.close()
+
+ openmode = 'w'
+ if config['append']:
+ openmode = 'a'
+
+ seq_fh = open(config['seq_fname'], openmode)
+ xml_fh = open(config['xml_fname'], openmode)
+ if config['want_fastq']:
+ qual_fh = None
+ try:
+ os.remove(config['qual_fname'])
+ except :
+ python_formattingwithoutbracesisdumb_dummy = 1
+ else:
+ qual_fh = open(config['qual_fname'], openmode)
+
+ if not config['append']:
+ xml_fh.write('<?xml version="1.0"?>\n<trace_volume>\n')
+ else:
+ remove_last_xmltag_in_file(config['xml_fname'], "trace_volume")
+
+ #we go through all input files
+ for sff_file in sff_files:
+ #print "Working on '" + sff_file + "':"
+ ssahapematches.clear()
+
+ seqcheckstore = ([])
+
+ debug = 0
+
+ if not debug and config['pelinker_fname']:
+ #print "Creating temporary sequences from reads in '" + sff_file + "' ... ",
+ sys.stdout.flush()
+
+ if 0 :
+ # for debugging
+ pid = os.getpid()
+ tmpfasta_fname = 'sffe.tmp.'+ str(pid)+'.fasta'
+ tmpfasta_fh = open(tmpfasta_fname, 'w')
+ else:
+ tmpfasta_fh = tempfile.NamedTemporaryFile(prefix = 'sffeseqs_',
+ suffix = '.fasta')
+
+ sff_fh = open(sff_file, 'rb')
+ header_data = read_header(fileh=sff_fh)
+ for seq_data in sequences(fileh=sff_fh, header=header_data):
+ seq,qual = get_read_data(seq_data)
+ seqstring, qualstring = format_as_fasta(seq_data['name'],seq,qual)
+ tmpfasta_fh.write(seqstring)
+ #seq, qual, anci = extract_read_info(seq_data, sff_fh.name)
+ #tmpfasta_fh.write(seq)
+ #print "done."
+ tmpfasta_fh.seek(0)
+
+ if 0 :
+ # for debugging
+ tmpssaha_fname = 'sffe.tmp.'+str(pid)+'.ssaha2'
+ tmpssaha_fh = open(tmpssaha_fname, 'w+')
+ else:
+ tmpssaha_fh = tempfile.NamedTemporaryFile(prefix = 'sffealig_',
+ suffix = '.ssaha2')
+
+ launch_ssaha(config['pelinker_fname'], tmpfasta_fh.name, tmpssaha_fh)
+ tmpfasta_fh.close()
+
+ tmpssaha_fh.seek(0)
+ read_ssaha_data(tmpssaha_fh)
+ tmpssaha_fh.close()
+
+ if debug:
+ tmpssaha_fh = open("sffe.tmp.10634.ssaha2", 'r')
+ read_ssaha_data(tmpssaha_fh)
+
+ #print "Converting '" + sff_file + "' ... ",
+ sys.stdout.flush()
+ sff_fh = open(sff_file, 'rb')
+ #read_header(infile)
+ header_data = read_header(fileh=sff_fh)
+
+ #now convert all reads
+ nseqs_sff = 0
+ nseqs_out = 0
+ for seq_data in sequences(fileh=sff_fh, header=header_data):
+ nseqs_sff += 1
+
+ seq, qual = clip_read(seq_data)
+ seqcheckstore.append(seq[0:50])
+
+ #if nseqs_sff >1000:
+ # check_for_dubious_startseq(seqcheckstore,sff_file,seq_data)
+ # sys.exit()
+
+ if ssahapematches.has_key(seq_data['name']):
+ #print "Paired end:",seq_data['name']
+ nseqs_out += split_paired_end(seq_data, sff_fh, seq_fh, qual_fh, xml_fh)
+ else:
+ #print "Normal:",seq_data['name']
+ if config['pelinker_fname']:
+ seq_data['name'] = seq_data['name'] + ".fn"
+ write_unpaired_read(seq_data, sff_fh, seq_fh, qual_fh, xml_fh)
+ nseqs_out += 1
+ #print "done."
+ #print 'Converted', str(nseqs_sff), 'reads into', str(nseqs_out), 'sequences.'
+ sff_fh.close()
+
+ check_for_dubious_startseq(seqcheckstore,sff_file,seq_data)
+ seqcheckstore = ([])
+
+ xml_fh.write('</trace_volume>\n')
+
+ xml_fh.close()
+ seq_fh.close()
+ if qual_fh is not None:
+ qual_fh.close()
+
+ return
+
+def check_for_dubious_startseq(seqcheckstore, sffname,seqdata):
+
+ global stern_warning
+
+ foundproblem = ""
+ for checklen in range(1,len(seqcheckstore[0])):
+ foundinloop = False
+ seqdict = {}
+ for seq in seqcheckstore:
+ shortseq = seq[0:checklen]
+ if shortseq in seqdict:
+ seqdict[shortseq] += 1
+ else:
+ seqdict[shortseq] = 1
+
+ for shortseq, count in seqdict.items():
+ if float(count)/len(seqcheckstore) >= 0.5:
+ foundinloop = True
+ stern_warning
+ foundproblem = "\n"+"*" * 80
+ foundproblem += "\nWARNING: "
+ foundproblem += "weird sequences in file " + sffname + "\n\n"
+ foundproblem += "After applying left clips, " + str(count) + " sequences (="
+ foundproblem += '%.0f'%(100.0*float(count)/len(seqcheckstore))
+ foundproblem += "%) start with these bases:\n" + shortseq
+ foundproblem += "\n\nThis does not look sane.\n\n"
+ foundproblem += "Countermeasures you *probably* must take:\n"
+ foundproblem += "1) Make your sequence provider aware of that problem and ask whether this can be\n corrected in the SFF.\n"
+ foundproblem += "2) If you decide that this is not normal and your sequence provider does not\n react, use the --min_left_clip of sff_extract.\n"
+ left,right = return_merged_clips(seqdata)
+ foundproblem += " (Probably '--min_left_clip="+ str(left+len(shortseq))+"' but you should cross-check that)\n"
+ foundproblem += "*" * 80 + "\n"
+ if not foundinloop :
+ break
+ if len(foundproblem):
+ print foundproblem
+
+
+def parse_extra_info(info):
+ '''It parses the information that will go in the xml file.
+
+ There are two formats accepted for the extra information:
+ key1:value1, key2:value2
+ or:
+ file1.sff{key1:value1, key2:value2};file2.sff{key3:value3}
+ '''
+ if not info:
+ return info
+ finfos = info.split(';') #information for each file
+ data_for_files = {}
+ for finfo in finfos:
+ #we split the file name from the rest
+ items = finfo.split('{')
+ if len(items) == 1:
+ fname = fake_sff_name
+ info = items[0]
+ else:
+ fname = items[0]
+ info = items[1]
+ #now we get each key,value pair in the info
+ info = info.replace('}', '')
+ data = {}
+ for item in info.split(','):
+ key, value = item.strip().split(':')
+ key = key.strip()
+ value = value.strip()
+ data[key] = value
+ data_for_files[fname] = data
+ return data_for_files
+
+
+def return_merged_clips(data):
+ '''It returns the left and right positions to clip.'''
+ def max(a, b):
+ '''It returns the max of the two given numbers.
+
+ It won't take into account the zero values.
+ '''
+ if not a and not b:
+ return None
+ if not a:
+ return b
+ if not b:
+ return a
+ if a >= b:
+ return a
+ else:
+ return b
+ def min(a, b):
+ '''It returns the min of the two given numbers.
+
+ It won't take into account the zero values.
+ '''
+ if not a and not b:
+ return None
+ if not a:
+ return b
+ if not b:
+ return a
+ if a <= b:
+ return a
+ else:
+ return b
+ left = max(data['clip_adapter_left'], data['clip_qual_left'])
+ right = min(data['clip_adapter_right'], data['clip_qual_right'])
+ #maybe both clips where zero
+ if left is None:
+ left = 1
+ if right is None:
+ right = data['number_of_bases']
+ return left, right
+
+def sequence_case(data):
+ '''Given the data for one read it returns the seq with mixed case.
+
+ The regions to be clipped will be lower case and the rest upper case.
+ '''
+ left, right = return_merged_clips(data)
+ seq = data['bases']
+ new_seq = ''.join((seq[:left-1].lower(), seq[left-1:right], seq[right:].lower()))
+ return new_seq
+
+def clip_read(data):
+ '''Given the data for one read it returns clipped seq and qual.'''
+
+ qual = data['quality_scores']
+ left, right = return_merged_clips(data)
+ seq = data['bases']
+ qual = data['quality_scores']
+ new_seq = seq[left-1:right]
+ new_qual = qual[left-1:right]
+
+ return new_seq, new_qual
+
+
+
+def tests_for_ssaha(linker_fname):
+ '''Tests whether SSAHA2 can be successfully called.'''
+
+ try:
+ print "Testing whether SSAHA2 is installed and can be launched ... ",
+ sys.stdout.flush()
+ fh = open('/dev/null', 'w')
+ retcode = subprocess.call(["ssaha2", "-v"], stdout = fh)
+ fh.close()
+ print "ok."
+ except :
+ print "nope? Uh oh ...\n\n"
+ raise RuntimeError('Could not launch ssaha2. Have you installed it? Is it in your path?')
+
+
+def load_linker_sequences(linker_fname):
+ '''Loads all linker sequences into memory, storing only the length
+ of each linker.'''
+
+ global linkerlengths
+
+ if not os.path.getsize(linker_fname):
+ raise RuntimeError("File empty? '" + linker_fname + "'")
+ fh = open(linker_fname, 'r')
+ linkerseqs = read_fasta(fh)
+ if len(linkerseqs) == 0:
+ raise RuntimeError(linker_fname + ": no sequence found?")
+ for i in linkerseqs:
+ if linkerlengths.has_key(i.name):
+ raise RuntimeError(linker_fname + ": sequence '" + i.name + "' present multiple times. Aborting.")
+ linkerlengths[i.name] = len(i.sequence)
+ fh.close()
+
+
+def launch_ssaha(linker_fname, query_fname, output_fh):
+ '''Launches SSAHA2 on the linker and query file, string SSAHA2 output
+ into the output filehandle'''
+
+ try:
+ print "Searching linker sequences with SSAHA2 (this may take a while) ... ",
+ sys.stdout.flush()
+ retcode = subprocess.call(["ssaha2", "-output", "ssaha2", "-solexa", "-kmer", "4", "-skip", "1", linker_fname, query_fname], stdout = output_fh)
+ if retcode:
+ raise RuntimeError('Ups.')
+ else:
+ print "ok."
+ except:
+ print "\n"
+ raise RuntimeError('An error occured during the SSAHA2 execution, aborting.')
+
+def read_ssaha_data(ssahadata_fh):
+ '''Given file handle, reads file generated with SSAHA2 (with default
+ output format) and stores all matches as list ssahapematches
+ (ssaha paired-end matches) dictionary'''
+
+ global ssahapematches
+
+ print "Parsing SSAHA2 result file ... ",
+ sys.stdout.flush()
+
+ for line in ssahadata_fh:
+ if line.startswith('ALIGNMENT'):
+ ml = line.split()
+ if len(ml) != 12 :
+ print "\n", line,
+ raise RuntimeError('Expected 12 elements in the SSAHA2 line with ALIGMENT keyword, but found ' + str(len(ml)))
+ if not ssahapematches.has_key(ml[2]) :
+ ssahapematches[ml[2]] = ([])
+ if ml[8] == 'F':
+ #print line,
+
+ # store everything except the first element (output
+ # format name (ALIGNMENT)) and the last element
+ # (length)
+ ssahapematches[ml[2]].append(ml[1:-1])
+ else:
+ #print ml
+ ml[4],ml[5] = ml[5],ml[4]
+ #print ml
+ ssahapematches[ml[2]].append(ml[1:-1])
+
+ print "done."
+
+
+##########################################################################
+#
+# BaCh: This block was shamelessly copied from
+# http://python.genedrift.org/2007/07/04/reading-fasta-files-conclusion/
+# and then subsequently modified to read fasta correctly
+# It's still not fool proof, but should be good enough
+#
+##########################################################################
+
+class Fasta:
+ def __init__(self, name, sequence):
+ self.name = name
+ self.sequence = sequence
+
+def read_fasta(file):
+ items = []
+ aninstance = Fasta('', '')
+ linenum = 0
+ for line in file:
+ linenum += 1
+ if line.startswith(">"):
+ if len(aninstance.sequence):
+ items.append(aninstance)
+ aninstance = Fasta('', '')
+ # name == all characters until the first whitespace
+ # (split()[0]) but without the starting ">" ([1:])
+ aninstance.name = line.split()[0][1:]
+ aninstance.sequence = ''
+ if len(aninstance.name) == 0:
+ raise RuntimeError(file.name + ': no name in line ' + str(linenum) + '?')
+
+ else:
+ if len(aninstance.name) == 0:
+ raise RuntimeError(file.name + ': no sequence header at line ' + str(linenum) + '?')
+ aninstance.sequence += line.strip()
+
+ if len(aninstance.name) and len(aninstance.sequence):
+ items.append(aninstance)
+
+ return items
+##########################################################################
+
+def version_string ():
+ return "sff_extract " + __version__
+
+def read_config():
+ '''It reads the configuration options from the command line arguments and
+ it returns a dict with them.'''
+ from optparse import OptionParser, OptionGroup
+ usage = "usage: %prog [options] sff1 sff2 ..."
+ desc = "Extract sequences from 454 SFF files into FASTA, FASTA quality"\
+ " and XML traceinfo format. When a paired-end linker sequence"\
+ " is given (-l), use SSAHA2 to scan the sequences for the linker,"\
+ " then split the sequences, removing the linker."
+ parser = OptionParser(usage = usage, version = version_string(), description = desc)
+ parser.add_option('-a', '--append', action="store_true", dest='append',
+ help='append output to existing files', default=False)
+ parser.add_option('-i', '--xml_info', dest='xml_info',
+ help='extra info to write in the xml file')
+ parser.add_option("-l", "--linker_file", dest="pelinker_fname",
+ help="FASTA file with paired-end linker sequences", metavar="FILE")
+
+ group = OptionGroup(parser, "File name options","")
+ group.add_option('-c', '--clip', action="store_true", dest='clip',
+ help='clip (completely remove) ends with low qual and/or adaptor sequence', default=False)
+ group.add_option('-u', '--upper_case', action="store_false", dest='mix_case',
+ help='all bases in upper case, including clipped ends', default=True)
+ group.add_option('', '--min_left_clip', dest='min_leftclip',
+ metavar="INTEGER", type = "int",
+ help='if the left clip coming from the SFF is smaller than this value, override it', default=0)
+ group.add_option('-Q', '--fastq', action="store_true", dest='want_fastq',
+ help='store as FASTQ file instead of FASTA + FASTA quality file', default=False)
+ parser.add_option_group(group)
+
+ group = OptionGroup(parser, "File name options","")
+ group.add_option("-o", "--out_basename", dest="basename",
+ help="base name for all output files")
+ group.add_option("-s", "--seq_file", dest="seq_fname",
+ help="output sequence file name", metavar="FILE")
+ group.add_option("-q", "--qual_file", dest="qual_fname",
+ help="output quality file name", metavar="FILE")
+ group.add_option("-x", "--xml_file", dest="xml_fname",
+ help="output ancillary xml file name", metavar="FILE")
+ parser.add_option_group(group)
+
+ #default fnames
+ #is there an sff file?
+ basename = 'reads'
+ if sys.argv[-1][-4:].lower() == '.sff':
+ basename = sys.argv[-1][:-4]
+ def_seq_fname = basename + '.fasta'
+ def_qual_fname = basename + '.fasta.qual'
+ def_xml_fname = basename + '.xml'
+ def_pelinker_fname = ''
+ parser.set_defaults(seq_fname = def_seq_fname)
+ parser.set_defaults(qual_fname = def_qual_fname)
+ parser.set_defaults(xml_fname = def_xml_fname)
+ parser.set_defaults(pelinker_fname = def_pelinker_fname)
+
+ #we parse the cmd line
+ (options, args) = parser.parse_args()
+
+ #we put the result in a dict
+ global config
+ config = {}
+ for property in dir(options):
+ if property[0] == '_' or property in ('ensure_value', 'read_file',
+ 'read_module'):
+ continue
+ config[property] = getattr(options, property)
+
+ if config['basename'] is None:
+ config['basename']=basename
+
+ #if we have not set a file name with -s, -q or -x we set the basename
+ #based file name
+ if config['want_fastq']:
+ config['qual_fname'] = ''
+ if config['seq_fname'] == def_seq_fname:
+ config['seq_fname'] = config['basename'] + '.fastq'
+ else:
+ if config['seq_fname'] == def_seq_fname:
+ config['seq_fname'] = config['basename'] + '.fasta'
+ if config['qual_fname'] == def_qual_fname:
+ config['qual_fname'] = config['basename'] + '.fasta.qual'
+
+ if config['xml_fname'] == def_xml_fname:
+ config['xml_fname'] = config['basename'] + '.xml'
+
+ #we parse the extra info for the xml file
+ config['xml_info'] = parse_extra_info(config['xml_info'])
+ return config, args
+
+
+
+##########################################################################
+
+
+def testsome():
+ sys.exit()
+ return
+
+
+def debug():
+ try:
+ dummy = 1
+ #debug()
+ #testsome()
+
+ config, args = read_config()
+ load_linker_sequences(config['pelinker_fname'])
+
+ #pid = os.getpid()
+ pid = 15603
+
+ #tmpfasta_fname = 'sffe.tmp.'+ str(pid)+'.fasta'
+ #tmpfasta_fh = open(tmpfasta_fname, 'w')
+ tmpfasta_fname = 'FLVI58L05.fa'
+ tmpfasta_fh = open(tmpfasta_fname, 'r')
+
+ tmpssaha_fname = 'sffe.tmp.'+str(pid)+'.ssaha2'
+ tmpssaha_fh = open(tmpssaha_fname, 'w')
+
+ launch_ssaha(config['pelinker_fname'], tmpfasta_fh.name, tmpssaha_fh)
+
+ tmpssaha_fh = open("sffe.tmp.15603.ssaha2", 'r')
+ read_ssaha_data(tmpssaha_fh)
+
+ sys.exit()
+
+ extract_reads_from_sff(config, args)
+
+ except (OSError, IOError, RuntimeError), errval:
+ print errval
+ sys.exit()
+
+ sys.exit()
+
+
+def main():
+
+ argv = sys.argv
+ if len(argv) == 1:
+ sys.argv.append('-h')
+ read_config()
+ sys.exit()
+ try:
+ #debug();
+
+ config, args = read_config()
+
+ if config['pelinker_fname']:
+ #tests_for_ssaha(config['pelinker_fname'])
+ load_linker_sequences(config['pelinker_fname'])
+ if len(args) == 0:
+ raise RuntimeError("No SFF file given?")
+ extract_reads_from_sff(config, args)
+ except (OSError, IOError, RuntimeError), errval:
+ print errval
+ return 1
+
+ if stern_warning:
+ return 1
+
+ return 0
+
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff -r ebfc9236bf5a -r 61b09dc1dff2 tools/filters/sff_extractor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/filters/sff_extractor.xml Wed Apr 07 11:12:00 2010 -0400
@@ -0,0 +1,22 @@
+<tool id="Sff_extractor" name="SFF converter" version="1.0.0">
+ <description></description>
+ <command interpreter="python">sff_extract.py -s $out_file1 -q $out_file2 -x $out_file3 $input</command>
+ <inputs>
+ <param format="sff" name="input" type="data" label="Extract from this dataset"/>
+ </inputs>
+ <outputs>
+ <data format="fasta" name="out_file1" />
+ <data format="qual" name="out_file2" />
+ <data format="xml" name="out_file3" />
+ </outputs>
+ <help>
+**What it does**
+
+This tool extracts data from the 454 Sequencer SFF format and creates three files containing the:
+ Sequences (FASTA),
+ Qualities (QUAL) and
+ Clippings (XML)
+ </help>
+</tool>
+
+
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/ebfc9236bf5a
changeset: 3618:ebfc9236bf5a
user: Anton Nekrutenko <anton(a)bx.psu.edu>
date: Wed Apr 07 08:44:57 2010 -0400
description:
Updated output format for solid2fastq converter
diffstat:
tools/next_gen_conversion/solid2fastq.xml | 4 ++--
tools/samtools/pileup_parser.xml | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diffs (26 lines):
diff -r 8fa4e8c12dfc -r ebfc9236bf5a tools/next_gen_conversion/solid2fastq.xml
--- a/tools/next_gen_conversion/solid2fastq.xml Tue Apr 06 19:00:39 2010 -0400
+++ b/tools/next_gen_conversion/solid2fastq.xml Wed Apr 07 08:44:57 2010 -0400
@@ -35,8 +35,8 @@
</param>
</inputs>
<outputs>
- <data format="fastqsanger" name="out_file1"/>
- <data format="fastqsanger" name="out_file2">
+ <data format="fastqcssanger" name="out_file1"/>
+ <data format="fastqcssanger" name="out_file2">
<filter>is_run['paired'] == 'yes'</filter>
</data>
</outputs>
diff -r 8fa4e8c12dfc -r ebfc9236bf5a tools/samtools/pileup_parser.xml
--- a/tools/samtools/pileup_parser.xml Tue Apr 06 19:00:39 2010 -0400
+++ b/tools/samtools/pileup_parser.xml Wed Apr 07 08:44:57 2010 -0400
@@ -353,7 +353,7 @@
**Example 3**: Report everything and print total number of differences
-If you set the **Print total number of differences?** to **Yes** the tool will print an additional column with the total number of reads where a devinat base is above the quality threshold cutoff. So, seetiing parametrs like this:
+If you set the **Print total number of differences?** to **Yes** the tool will print an additional column with the total number of reads where a devinat base is above the quality threshold. So, seetiing parametrs like this:
.. image:: ../static/images/pileup_parser_help3.png
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/8fa4e8c12dfc
changeset: 3617:8fa4e8c12dfc
user: Kanwei Li <kanwei(a)gmail.com>
date: Tue Apr 06 19:00:39 2010 -0400
description:
Add BedGraph datatype (wiggle-like data for intervals)
trackster:
- Fix various UI issues
- BedGraph -> array_tree converter
diffstat:
datatypes_conf.xml.sample | 3 +
lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py | 55 ++++++++++
lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.xml | 14 ++
lib/galaxy/datatypes/interval.py | 9 +-
lib/galaxy/visualization/tracks/data/array_tree.py | 2 +-
lib/galaxy/web/controllers/tracks.py | 6 +-
static/scripts/packed/trackster.js | 2 +-
static/scripts/trackster.js | 34 +++---
templates/tracks/browser.mako | 19 +--
9 files changed, 110 insertions(+), 34 deletions(-)
diffs (334 lines):
diff -r 2d415fb320b4 -r 8fa4e8c12dfc datatypes_conf.xml.sample
--- a/datatypes_conf.xml.sample Tue Apr 06 15:29:23 2010 -0400
+++ b/datatypes_conf.xml.sample Tue Apr 06 19:00:39 2010 -0400
@@ -16,6 +16,9 @@
<!-- <display file="ucsc/interval_as_bed.xml" /> -->
<display file="genetrack.xml" />
</datatype>
+ <datatype extension="bedgraph" type="galaxy.datatypes.interval:BedGraph" display_in_upload="true">
+ <converter file="bedgraph_to_array_tree_converter.xml" target_datatype="array_tree"/>
+ </datatype>
<datatype extension="bedstrict" type="galaxy.datatypes.interval:BedStrict" />
<datatype extension="bed6" type="galaxy.datatypes.interval:Bed6">
<converter file="bed_to_genetrack_converter.xml" target_datatype="genetrack"/>
diff -r 2d415fb320b4 -r 8fa4e8c12dfc lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py Tue Apr 06 19:00:39 2010 -0400
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+from __future__ import division
+
+import sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.arrays.array_tree import *
+# from bx.arrays.wiggle import BedReader
+
+BLOCK_SIZE = 100
+
+class BedGraphReader:
+ def __init__( self, f ):
+ self.f = f
+
+ def __iter__( self ):
+ return self
+
+ def next( self ):
+ while True:
+ line = self.f.readline()
+ if not line:
+ raise StopIteration()
+ if line.isspace():
+ continue
+ if line[0] == "#":
+ continue
+ if line[0].isalpha():
+ if line.startswith( "track" ) or line.startswith( "browser" ):
+ continue
+
+ feature = line.strip().split()
+ chrom = feature[0]
+ chrom_start = int(feature[1])
+ chrom_end = int(feature[2])
+ score = int(feature[3])
+ return chrom, chrom_start, chrom_end, None, score
+def main():
+
+ input_fname = sys.argv[1]
+ out_fname = sys.argv[2]
+
+ reader = BedGraphReader( open( input_fname ) )
+
+ # Fill array from reader
+ d = array_tree_dict_from_reader( reader, {}, block_size = BLOCK_SIZE )
+
+ for array_tree in d.itervalues():
+ array_tree.root.build_summary()
+
+ FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r 2d415fb320b4 -r 8fa4e8c12dfc lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.xml Tue Apr 06 19:00:39 2010 -0400
@@ -0,0 +1,14 @@
+<tool id="CONVERTER_BedGraph_0" name="Index BedGraph for Track Viewer">
+ <!-- Used internally to generate track indexes -->
+ <command interpreter="python">bedgraph_to_array_tree_converter.py $input $output</command>
+ <inputs>
+ <page>
+ <param format="bedgraph" name="input" type="data" label="Choose BedGraph"/>
+ </page>
+ </inputs>
+ <outputs>
+ <data format="array_tree" name="output"/>
+ </outputs>
+ <help>
+ </help>
+</tool>
\ No newline at end of file
diff -r 2d415fb320b4 -r 8fa4e8c12dfc lib/galaxy/datatypes/interval.py
--- a/lib/galaxy/datatypes/interval.py Tue Apr 06 15:29:23 2010 -0400
+++ b/lib/galaxy/datatypes/interval.py Tue Apr 06 19:00:39 2010 -0400
@@ -345,7 +345,14 @@
def get_track_resolution( self, dataset, start, end):
return None
-
+class BedGraph( Interval ):
+ """Tab delimited chrom/start/end/datavalue dataset"""
+
+ file_ext = "bedgraph"
+
+ def get_track_type( self ):
+ return "LineTrack", {"data": "array_tree"}
+
class Bed( Interval ):
"""Tab delimited data in BED format"""
file_ext = "bed"
diff -r 2d415fb320b4 -r 8fa4e8c12dfc lib/galaxy/visualization/tracks/data/array_tree.py
--- a/lib/galaxy/visualization/tracks/data/array_tree.py Tue Apr 06 15:29:23 2010 -0400
+++ b/lib/galaxy/visualization/tracks/data/array_tree.py Tue Apr 06 19:00:39 2010 -0400
@@ -19,7 +19,7 @@
chrom_array_tree = d[chrom]
except KeyError:
f.close()
- return "no data"
+ return None
root_summary = chrom_array_tree.get_summary( 0, chrom_array_tree.levels )
diff -r 2d415fb320b4 -r 8fa4e8c12dfc lib/galaxy/web/controllers/tracks.py
--- a/lib/galaxy/web/controllers/tracks.py Tue Apr 06 15:29:23 2010 -0400
+++ b/lib/galaxy/web/controllers/tracks.py Tue Apr 06 19:00:39 2010 -0400
@@ -209,11 +209,13 @@
return messages.NO_CONVERTER
# Need to check states again for the converted version
+ if converted_dataset and converted_dataset.state == model.Dataset.states.ERROR:
+ return messages.ERROR
+
if not converted_dataset or converted_dataset.state != model.Dataset.states.OK:
return messages.PENDING
- if converted_dataset.state == model.Dataset.states.ERROR:
- return messages.ERROR
+
extra_info = None
if 'index' in data_sources:
diff -r 2d415fb320b4 -r 8fa4e8c12dfc static/scripts/packed/trackster.js
--- a/static/scripts/packed/trackster.js Tue Apr 06 15:29:23 2010 -0400
+++ b/static/scripts/packed/trackster.js Tue Apr 06 19:00:39 2010 -0400
@@ -1,1 +1,1 @@
-var DEBUG=false;var DENSITY=1000,FEATURE_LEVELS=10,DATA_ERROR="There was an error in indexing this dataset.",DATA_NOCONVERTER="A converter for this dataset is not installed. Please check your datatypes_conf.xml file.",DATA_NONE="No data for this chrom/contig.",DATA_PENDING="Currently indexing... please wait",DATA_LOADING="Loading data...",CACHED_TILES_FEATURE=10,CACHED_TILES_LINE=30,CACHED_DATA=20,CONTEXT=$("<canvas></canvas>").get(0).getContext("2d"),RIGHT_STRAND,LEFT_STRAND;var right_img=new Image();right_img.src="../images/visualization/strand_right.png";right_img.onload=function(){RIGHT_STRAND=CONTEXT.createPattern(right_img,"repeat")};var left_img=new Image();left_img.src="../images/visualization/strand_left.png";left_img.onload=function(){LEFT_STRAND=CONTEXT.createPattern(left_img,"repeat")};var right_img_inv=new Image();right_img_inv.src="../images/visualization/strand_right_inv.png";right_img_inv.onload=function(){RIGHT_STRAND_INV=CONTEXT.createPattern(right_img_inv!
,"repeat")};var left_img_inv=new Image();left_img_inv.src="../images/visualization/strand_left_inv.png";left_img_inv.onload=function(){LEFT_STRAND_INV=CONTEXT.createPattern(left_img_inv,"repeat")};function commatize(b){b+="";var a=/(\d+)(\d{3})/;while(a.test(b)){b=b.replace(a,"$1,$2")}return b}var Cache=function(a){this.num_elements=a;this.clear()};$.extend(Cache.prototype,{get:function(b){var a=this.key_ary.indexOf(b);if(a!=-1){this.key_ary.splice(a,1);this.key_ary.push(b)}return this.obj_cache[b]},set:function(b,c){if(!this.obj_cache[b]){if(this.key_ary.length>=this.num_elements){var a=this.key_ary.shift();delete this.obj_cache[a]}this.key_ary.push(b)}this.obj_cache[b]=c;return c},clear:function(){this.obj_cache={};this.key_ary=[]}});var Drawer=function(){};$.extend(Drawer.prototype,{intensity:function(b,a,c){},});drawer=new Drawer();var View=function(b,d,c,a){this.vis_id=c;this.dbkey=a;this.title=d;this.chrom=b;this.tracks=[];this.label_tracks=[];this.max_low=0;this.max_!
high=0;this.center=(this.max_high-this.max_low)/2;this.zoom_factor=3;t
his.zoom_level=0;this.track_id_counter=0};$.extend(View.prototype,{add_track:function(a){a.view=this;a.track_id=this.track_id_counter;this.tracks.push(a);if(a.init){a.init()}a.container_div.attr("id","track_"+a.track_id);this.track_id_counter+=1},add_label_track:function(a){a.view=this;this.label_tracks.push(a)},remove_track:function(a){a.container_div.fadeOut("slow",function(){$(this).remove()});delete this.tracks[a]},update_options:function(){var b=$("ul#sortable-ul").sortable("toArray");var d=[];var c=$("#viewport > div").sort(function(g,f){return b.indexOf($(g).attr("id"))>b.indexOf($(f).attr("id"))});$("#viewport > div").remove();$("#viewport").html(c);for(var e in view.tracks){var a=view.tracks[e];if(a.update_options){a.update_options(e)}}},reset:function(){this.low=this.max_low;this.high=this.max_high;this.center=this.center=(this.max_high-this.max_low)/2;this.zoom_level=0;$(".yaxislabel").remove()},redraw:function(f){this.span=this.max_high-this.max_low;var d=this.sp!
an/Math.pow(this.zoom_factor,this.zoom_level),b=this.center-(d/2),e=b+d;if(b<0){b=0;e=b+d}else{if(e>this.max_high){e=this.max_high;b=e-d}}this.low=Math.floor(b);this.high=Math.ceil(e);this.center=Math.round(this.low+(this.high-this.low)/2);this.resolution=Math.pow(10,Math.ceil(Math.log((this.high-this.low)/200)/Math.LN10));this.zoom_res=Math.pow(FEATURE_LEVELS,Math.max(0,Math.ceil(Math.log(this.resolution,FEATURE_LEVELS)/Math.log(FEATURE_LEVELS))));$("#overview-box").css({left:(this.low/this.span)*$("#overview-viewport").width(),width:Math.max(12,((this.high-this.low)/this.span)*$("#overview-viewport").width())}).show();$("#low").val(commatize(this.low));$("#high").val(commatize(this.high));if(!f){for(var c=0,a=this.tracks.length;c<a;c++){if(this.tracks[c].enabled){this.tracks[c].draw()}}for(var c=0,a=this.label_tracks.length;c<a;c++){this.label_tracks[c].draw()}}},zoom_in:function(a,b){if(this.max_high===0||this.high-this.low<30){return}if(a){this.center=a/b.width()*(this.!
high-this.low)+this.low}this.zoom_level+=1;this.redraw()},zoom_out:fun
ction(){if(this.max_high===0){return}if(this.zoom_level<=0){this.zoom_level=0;return}this.zoom_level-=1;this.redraw()}});var Track=function(a,b){this.name=a;this.parent_element=b;this.init_global()};$.extend(Track.prototype,{init_global:function(){this.header_div=$("<div class='track-header'>").text(this.name);this.content_div=$("<div class='track-content'>");this.container_div=$("<div></div>").addClass("track").append(this.header_div).append(this.content_div);this.parent_element.append(this.container_div)},init_each:function(c,b){var a=this;a.enabled=false;a.data_queue={};a.tile_cache.clear();a.data_cache.clear();a.content_div.css("height","30px");a.content_div.text(DATA_LOADING);a.container_div.removeClass("nodata error pending");if(a.view.chrom){$.getJSON(data_url,c,function(d){if(!d||d==="error"){a.container_div.addClass("error");a.content_div.text(DATA_ERROR)}else{if(d==="no converter"){a.container_div.addClass("error");a.content_div.text(DATA_NOCONVERTER)}else{if((d.da!
ta&&d.data.length===0)||d==="no data"){a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}else{if(d==="pending"){a.container_div.addClass("pending");a.content_div.text(DATA_PENDING);setTimeout(function(){a.init()},5000)}else{a.content_div.text("");a.content_div.css("height",a.height_px+"px");a.enabled=true;b(d);a.draw()}}}}})}else{a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}}});var TiledTrack=function(){};$.extend(TiledTrack.prototype,Track.prototype,{draw:function(){var i=this.view.low,e=this.view.high,f=e-i,d=this.view.resolution;if(DEBUG){$("#debug").text(d+" "+this.view.zoom_res)}var k=$("<div style='position: relative;'></div>");this.content_div.children(":first").remove();this.content_div.append(k);var l=this.content_div.width()/f;var h;var a=Math.floor(i/d/DENSITY);while((a*DENSITY*d)<e){var j=this.content_div.width()+"_"+this.view.zoom_level+"_"+a;var c=this.tile_cache.get(j);if(c){var g=a*DENSITY*d;var b=(g-i)*l;if(this.left_offse!
t){b-=this.left_offset}c.css({left:b});k.append(c);this.max_height=Mat
h.max(this.max_height,c.height())}else{this.delayed_draw(this,j,i,e,a,d,k,l)}a+=1}},delayed_draw:function(c,e,a,f,b,d,g,h){setTimeout(function(){if(!(a>c.view.high||f<c.view.low)){tile_element=c.draw_tile(d,b,g,h);if(tile_element){c.tile_cache.set(e,tile_element);c.max_height=Math.max(c.max_height,tile_element.height());c.content_div.css("height",c.max_height+"px")}}},50)}});var LabelTrack=function(a){Track.call(this,null,a);this.track_type="LabelTrack";this.hidden=true;this.container_div.addClass("label-track")};$.extend(LabelTrack.prototype,Track.prototype,{draw:function(){var c=this.view,d=c.high-c.low,g=Math.floor(Math.pow(10,Math.floor(Math.log(d)/Math.log(10)))),a=Math.floor(c.low/g)*g,e=this.content_div.width(),b=$("<div style='position: relative; height: 1.3em;'></div>");while(a<c.high){var f=(a-c.low)/d*e;b.append($("<div class='label'>"+commatize(a)+"</div>").css({position:"absolute",left:f-1}));a+=g}this.content_div.children(":first").remove();this.content_div.app!
end(b)}});var LineTrack=function(c,a,b){this.track_type="LineTrack";Track.call(this,c,$("#viewport"));TiledTrack.call(this);this.height_px=100;this.container_div.addClass("line-track");this.dataset_id=a;this.data_cache=new Cache(CACHED_DATA);this.tile_cache=new Cache(CACHED_TILES_LINE);this.prefs={min_value:undefined,max_value:undefined,mode:"line"};if(b.min_value!==undefined){this.prefs.min_value=b.min_value}if(b.max_value!==undefined){this.prefs.max_value=b.max_value}if(b.max_value!==undefined){this.prefs.mode=b.mode}};$.extend(LineTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.tracks.indexOf(a);a.vertical_range=undefined;this.init_each({stats:true,chrom:a.view.chrom,low:null,high:null,dataset_id:a.dataset_id},function(c){data=c.data;if(isNaN(parseFloat(a.prefs.min_value))||isNaN(parseFloat(a.prefs.max_value))){a.prefs.min_value=data.min;a.prefs.max_value=data.max;$("#track_"+b+"_minval").val(a.prefs.min_value);$("#track_"+b+"_maxval").val(a.pre!
fs.max_value)}a.vertical_range=a.prefs.max_value-a.prefs.min_value;a.t
otal_frequency=data.total_frequency;$("#linetrack_"+b+"_minval").remove();$("#linetrack_"+b+"_maxval").remove();var e=$("<div></div>").addClass("yaxislabel").attr("id","linetrack_"+b+"_minval").text(a.prefs.min_value);var d=$("<div></div>").addClass("yaxislabel").attr("id","linetrack_"+b+"_maxval").text(a.prefs.max_value);d.css({position:"relative",top:"25px"});d.prependTo(a.container_div);e.css({position:"relative",top:a.height_px+55+"px"});e.prependTo(a.container_div)})},get_data:function(d,b){var c=this,a=b*DENSITY*d,f=(b+1)*DENSITY*d,e=d+"_"+b;if(!c.data_queue[e]){c.data_queue[e]=true;$.ajax({url:data_url,dataType:"json",data:{chrom:this.view.chrom,low:a,high:f,dataset_id:this.dataset_id,resolution:this.view.resolution},success:function(g){data=g.data;c.data_cache.set(e,data);delete c.data_queue[e];c.draw()},error:function(h,g,i){console.log(h,g,i)}})}},draw_tile:function(o,q,c,e){if(this.vertical_range===undefined){return}var r=q*DENSITY*o,a=DENSITY*o,b=$("<canvas class!
='tile'></canvas>"),u=o+"_"+q;if(!this.data_cache.get(u)){this.get_data(o,q);return}var t=this.data_cache.get(u);b.css({position:"absolute",top:0,left:(r-this.view.low)*e});b.get(0).width=Math.ceil(a*e);b.get(0).height=this.height_px;var n=b.get(0).getContext("2d"),k=false,l=this.prefs.min_value,g=this.prefs.max_value,m=this.vertical_range,s=this.total_frequency,d=this.height_px;n.beginPath();if(t.length>1){var f=Math.ceil((t[1][0]-t[0][0])*e)}else{var f=10}for(var p=0;p<t.length;p++){var j=t[p][0]-r;var h=t[p][1];if(this.prefs.mode=="intensity"){if(h===null){continue}j=j*e;if(h<=l){h=l}else{if(h>=g){h=g}}h=255-Math.floor((h-l)/m*255);n.fillStyle="rgb("+h+","+h+","+h+")";n.fillRect(j,0,f,30)}else{if(h===null){k=false;continue}else{j=j*e;if(h<=l){h=l}else{if(h>=g){h=g}}h=Math.round(d-(h-l)/m*d);if(k){n.lineTo(j,h)}else{n.moveTo(j,h);k=true}}}}n.stroke();c.append(b);return b},gen_options:function(n){var a=$("<div></div>").addClass("form-row");var h="track_"+n+"_minval",k="tra!
ck_"+n+"_maxval",e="track_"+n+"_mode",l=$("<label></label>").attr("for
",h).text("Min value:"),b=(this.prefs.min_value===undefined?"":this.prefs.min_value),m=$("<input></input>").attr("id",h).val(b),g=$("<label></label>").attr("for",k).text("Max value:"),j=(this.prefs.max_value===undefined?"":this.prefs.max_value),f=$("<input></input>").attr("id",k).val(j),d=$("<label></label>").attr("for",e).text("Display mode:"),i=(this.prefs.mode===undefined?"line":this.prefs.mode),c=$('<select id="'+e+'"><option value="line" id="mode_line">Line</option><option value="intensity" id="mode_intensity">Intensity</option></select>');$("#"+e+" #mode_"+i).attr("selected","selected");return a.append(l).append(m).append(g).append(f).append(d).append(c)},update_options:function(d){var a=$("#track_"+d+"_minval").val(),c=$("#track_"+d+"_maxval").val(),b=$("#track_"+d+"_mode option:selected").val();if(a!==this.prefs.min_value||c!==this.prefs.max_value||b!=this.prefs.mode){this.prefs.min_value=parseFloat(a);this.prefs.max_value=parseFloat(c);this.prefs.mode=b;this.vertica!
l_range=this.prefs.max_value-this.prefs.min_value;$("#linetrack_"+d+"_minval").text(this.prefs.min_value);$("#linetrack_"+d+"_maxval").text(this.prefs.max_value);this.tile_cache.clear();this.draw()}}});var FeatureTrack=function(c,a,b){this.track_type="FeatureTrack";Track.call(this,c,$("#viewport"));TiledTrack.call(this);this.height_px=100;this.container_div.addClass("feature-track");this.dataset_id=a;this.zo_slots={};this.show_labels_scale=0.001;this.showing_details=false;this.vertical_detail_px=10;this.vertical_nodetail_px=3;this.default_font="9px Monaco, Lucida Console, monospace";this.left_offset=200;this.inc_slots={};this.data_queue={};this.s_e_by_tile={};this.tile_cache=new Cache(CACHED_TILES_FEATURE);this.data_cache=new Cache(20);this.prefs={block_color:"black",label_color:"black",show_counts:true};if(b.block_color!==undefined){this.prefs.block_color=b.block_color}if(b.label_color!==undefined){this.prefs.label_color=b.label_color}if(b.show_counts!==undefined){this.pre!
fs.show_counts=b.show_counts}};$.extend(FeatureTrack.prototype,TiledTr
ack.prototype,{init:function(){var a=this,b=a.view.max_low+"_"+a.view.max_high;this.init_each({low:a.view.max_low,high:a.view.max_high,dataset_id:a.dataset_id,chrom:a.view.chrom,resolution:this.view.resolution},function(c){a.data_cache.set(b,c);a.draw()})},get_data:function(a,d){var b=this,c=a+"_"+d;if(!b.data_queue[c]){b.data_queue[c]=true;$.getJSON(data_url,{chrom:b.view.chrom,low:a,high:d,dataset_id:b.dataset_id,resolution:this.view.resolution},function(e){b.data_cache.set(c,e);delete b.data_queue[c];b.draw()})}},incremental_slots:function(a,g,c){if(!this.inc_slots[a]){this.inc_slots[a]={};this.inc_slots[a].w_scale=1/a;this.s_e_by_tile[a]={}}var m=this.inc_slots[a].w_scale,u=[],h=0,b=$("<canvas></canvas>").get(0).getContext("2d"),n=this.view.max_low;var d,f,w=[];for(var r=0,s=g.length;r<s;r++){var e=g[r],l=e[0];if(this.inc_slots[a][l]!==undefined){h=Math.max(h,this.inc_slots[a][l]);w.push(this.inc_slots[a][l])}else{u.push(r)}}for(var r=0,s=u.length;r<s;r++){var e=g[u[r]];!
l=e[0],feature_start=e[1],feature_end=e[2],feature_name=e[3];d=Math.floor((feature_start-n)*m);if(!c){d-=b.measureText(feature_name).width}f=Math.ceil((feature_end-n)*m);var q=0;while(true){var o=true;if(this.s_e_by_tile[a][q]!==undefined){for(var p=0,v=this.s_e_by_tile[a][q].length;p<v;p++){var t=this.s_e_by_tile[a][q][p];if(f>t[0]&&d<t[1]){o=false;break}}}if(o){if(this.s_e_by_tile[a][q]===undefined){this.s_e_by_tile[a][q]=[]}this.s_e_by_tile[a][q].push([d,f]);this.inc_slots[a][l]=q;h=Math.max(h,q);break}q++}}return h},draw_tile:function(R,h,m,ae){var z=h*DENSITY*R,X=(h+1)*DENSITY*R,w=DENSITY*R;var ac,ad,p;var Y=z+"_"+X;var ac=this.data_cache.get(Y);if(!ac){this.data_queue[[z,X]]=true;this.get_data(z,X);return}if(ac.dataset_type=="array_tree"){p=30}else{var P=(ac.extra_info==="no_detail");var af=(P?this.vertical_nodetail_px:this.vertical_detail_px);p=this.incremental_slots(this.view.zoom_res,ac.data,P)*af+15;m.parent().css("height",Math.max(this.height_px,p)+"px");ad=this.!
inc_slots[this.view.zoom_res]}var a=Math.ceil(w*ae),F=$("<canvas class
='tile'></canvas>"),T=this.prefs.label_color,f=this.prefs.block_color,J=this.left_offset;F.css({position:"absolute",top:0,left:(z-this.view.low)*ae-J});F.get(0).width=a+J;F.get(0).height=p;var t=F.get(0).getContext("2d");t.fillStyle=this.prefs.block_color;t.font=this.default_font;t.textAlign="right";var C=55,W=255-C,g=W*2/3;if(ac.dataset_type=="summary_tree"){var L=ac.data;var v=ac.max;var l=ac.avg;if(ac.data.length>2){var b=Math.ceil((L[1][0]-L[0][0])*ae)}else{var b=50}for(var aa=0,s=L.length;aa<s;aa++){var N=Math.ceil((L[aa][0]-z)*ae);var M=L[aa][1];if(!M){continue}var E=Math.floor(W-(M/v)*W);t.fillStyle="rgb("+E+","+E+","+E+")";t.fillRect(N+J,0,b,20);if(this.prefs.show_counts){if(E>g){t.fillStyle="black"}else{t.fillStyle="#ddd"}t.textAlign="center";t.fillText(L[aa][1],N+J+(b/2),12)}}m.append(F);return F}var ac=ac.data;var Z=0;for(var aa=0,s=ac.length;aa<s;aa++){var G=ac[aa],D=G[0],ab=G[1],O=G[2],A=G[3];if(ab<=X&&O>=z){var Q=Math.floor(Math.max(0,(ab-z)*ae)),u=Math.ceil(Ma!
th.min(a,(O-z)*ae)),K=ad[D]*af;if(P){t.fillRect(Q+J,K+5,u-Q,1)}else{var r=G[4],I=G[5],S=G[6],e=G[7];var q,U,B=null,ag=null;if(I&&S){B=Math.floor(Math.max(0,(I-z)*ae));ag=Math.ceil(Math.min(a,(S-z)*ae))}if(ab>z){t.fillStyle=T;t.fillText(A,Q-1+J,K+8);t.fillStyle=f}if(e){if(r){if(r=="+"){t.fillStyle=RIGHT_STRAND}else{if(r=="-"){t.fillStyle=LEFT_STRAND}}t.fillRect(Q+J,K,u-Q,10);t.fillStyle=f}for(var Y=0,d=e.length;Y<d;Y++){var n=e[Y],c=Math.floor(Math.max(0,(n[0]-z)*ae)),H=Math.ceil(Math.min(a,(n[1]-z)*ae));if(c>H){continue}q=5;U=3;t.fillRect(c+J,K+U,H-c,q);if(B!==undefined&&!(c>ag||H<B)){q=9;U=1;var V=Math.max(c,B),o=Math.min(H,ag);t.fillRect(V+J,K+U,o-V,q)}}}else{q=9;U=1;t.fillRect(Q+J,K+U,u-Q,q);if(G.strand){if(G.strand=="+"){t.fillStyle=RIGHT_STRAND_INV}else{if(G.strand=="-"){t.fillStyle=LEFT_STRAND_INV}}t.fillRect(Q+J,K,u-Q,10);t.fillStyle=prefs.block_color}}}Z++}}m.append(F);return F},gen_options:function(h){var a=$("<div></div>").addClass("form-row");var d="track_"+h+"_b!
lock_color",j=$("<label></label>").attr("for",d).text("Block color:"),
k=$("<input></input>").attr("id",d).attr("name",d).val(this.prefs.block_color),i="track_"+h+"_label_color",f=$("<label></label>").attr("for",i).text("Text color:"),g=$("<input></input>").attr("id",i).attr("name",i).val(this.prefs.label_color),e="track_"+h+"_show_count",c=$("<label></label>").attr("for",e).text("Show summary counts"),b=$('<input type="checkbox" style="float:left;"></input>').attr("id",e).attr("name",e).attr("checked",this.prefs.show_counts);return a.append(j).append(k).append(f).append(g).append(b).append(c)},update_options:function(d){var b=$("#track_"+d+"_block_color").val(),c=$("#track_"+d+"_label_color").val(),a=$("#track_"+d+"_show_count").attr("checked");if(b!==this.prefs.block_color||c!==this.prefs.label_color||a!=this.prefs.show_counts){this.prefs.block_color=b;this.prefs.label_color=c;this.prefs.show_counts=a;this.tile_cache.clear();this.draw()}}});var ReadTrack=function(c,a,b){FeatureTrack.call(this,c,a,b);this.track_type="ReadTrack"};$.extend(ReadT!
rack.prototype,TiledTrack.prototype,FeatureTrack.prototype,{});
\ No newline at end of file
+var DEBUG=false;var DENSITY=1000,FEATURE_LEVELS=10,DATA_ERROR="There was an error in indexing this dataset.",DATA_NOCONVERTER="A converter for this dataset is not installed. Please check your datatypes_conf.xml file.",DATA_NONE="No data for this chrom/contig.",DATA_PENDING="Currently indexing... please wait",DATA_LOADING="Loading data...",CACHED_TILES_FEATURE=10,CACHED_TILES_LINE=30,CACHED_DATA=20,CONTEXT=$("<canvas></canvas>").get(0).getContext("2d"),RIGHT_STRAND,LEFT_STRAND;var right_img=new Image();right_img.src="../images/visualization/strand_right.png";right_img.onload=function(){RIGHT_STRAND=CONTEXT.createPattern(right_img,"repeat")};var left_img=new Image();left_img.src="../images/visualization/strand_left.png";left_img.onload=function(){LEFT_STRAND=CONTEXT.createPattern(left_img,"repeat")};var right_img_inv=new Image();right_img_inv.src="../images/visualization/strand_right_inv.png";right_img_inv.onload=function(){RIGHT_STRAND_INV=CONTEXT.createPattern(right_img_inv!
,"repeat")};var left_img_inv=new Image();left_img_inv.src="../images/visualization/strand_left_inv.png";left_img_inv.onload=function(){LEFT_STRAND_INV=CONTEXT.createPattern(left_img_inv,"repeat")};function commatize(b){b+="";var a=/(\d+)(\d{3})/;while(a.test(b)){b=b.replace(a,"$1,$2")}return b}var Cache=function(a){this.num_elements=a;this.clear()};$.extend(Cache.prototype,{get:function(b){var a=this.key_ary.indexOf(b);if(a!=-1){this.key_ary.splice(a,1);this.key_ary.push(b)}return this.obj_cache[b]},set:function(b,c){if(!this.obj_cache[b]){if(this.key_ary.length>=this.num_elements){var a=this.key_ary.shift();delete this.obj_cache[a]}this.key_ary.push(b)}this.obj_cache[b]=c;return c},clear:function(){this.obj_cache={};this.key_ary=[]}});var Drawer=function(){};$.extend(Drawer.prototype,{intensity:function(b,a,c){},});drawer=new Drawer();var View=function(b,d,c,a){this.vis_id=c;this.dbkey=a;this.title=d;this.chrom=b;this.tracks=[];this.label_tracks=[];this.max_low=0;this.max_!
high=0;this.center=(this.max_high-this.max_low)/2;this.zoom_factor=3;t
his.zoom_level=0;this.track_id_counter=0};$.extend(View.prototype,{add_track:function(a){a.view=this;a.track_id=this.track_id_counter;this.tracks.push(a);if(a.init){a.init()}a.container_div.attr("id","track_"+a.track_id);this.track_id_counter+=1},add_label_track:function(a){a.view=this;this.label_tracks.push(a)},remove_track:function(a){a.container_div.fadeOut("slow",function(){$(this).remove()});delete this.tracks[a]},update_options:function(){var b=$("ul#sortable-ul").sortable("toArray");var d=[];var c=$("#viewport > div").sort(function(g,f){return b.indexOf($(g).attr("id"))>b.indexOf($(f).attr("id"))});$("#viewport > div").remove();$("#viewport").html(c);for(var e in view.tracks){var a=view.tracks[e];if(a.update_options){a.update_options(e)}}},reset:function(){this.low=this.max_low;this.high=this.max_high;this.center=this.center=(this.max_high-this.max_low)/2;this.zoom_level=0;$(".yaxislabel").remove()},redraw:function(f){this.span=this.max_high-this.max_low;var d=this.sp!
an/Math.pow(this.zoom_factor,this.zoom_level),b=this.center-(d/2),e=b+d;if(b<0){b=0;e=b+d}else{if(e>this.max_high){e=this.max_high;b=e-d}}this.low=Math.floor(b);this.high=Math.ceil(e);this.center=Math.round(this.low+(this.high-this.low)/2);this.resolution=Math.pow(10,Math.ceil(Math.log((this.high-this.low)/200)/Math.LN10));this.zoom_res=Math.pow(FEATURE_LEVELS,Math.max(0,Math.ceil(Math.log(this.resolution,FEATURE_LEVELS)/Math.log(FEATURE_LEVELS))));$("#overview-box").css({left:(this.low/this.span)*$("#overview-viewport").width(),width:Math.max(12,((this.high-this.low)/this.span)*$("#overview-viewport").width())}).show();$("#low").val(commatize(this.low));$("#high").val(commatize(this.high));if(!f){for(var c=0,a=this.tracks.length;c<a;c++){if(this.tracks[c].enabled){this.tracks[c].draw()}}for(var c=0,a=this.label_tracks.length;c<a;c++){this.label_tracks[c].draw()}}},zoom_in:function(a,b){if(this.max_high===0||this.high-this.low<30){return}if(a){this.center=a/b.width()*(this.!
high-this.low)+this.low}this.zoom_level+=1;this.redraw()},zoom_out:fun
ction(){if(this.max_high===0){return}if(this.zoom_level<=0){this.zoom_level=0;return}this.zoom_level-=1;this.redraw()}});var Track=function(a,b){this.name=a;this.parent_element=b;this.init_global()};$.extend(Track.prototype,{init_global:function(){this.header_div=$("<div class='track-header'>").text(this.name);this.content_div=$("<div class='track-content'>");this.container_div=$("<div></div>").addClass("track").append(this.header_div).append(this.content_div);this.parent_element.append(this.container_div)},init_each:function(c,b){var a=this;a.enabled=false;a.data_queue={};a.tile_cache.clear();a.data_cache.clear();a.content_div.css("height","30px");a.content_div.text(DATA_LOADING);a.container_div.removeClass("nodata error pending");if(a.view.chrom){$.getJSON(data_url,c,function(d){if(!d||d==="error"){a.container_div.addClass("error");a.content_div.text(DATA_ERROR)}else{if(d==="no converter"){a.container_div.addClass("error");a.content_div.text(DATA_NOCONVERTER)}else{if(d.dat!
a&&d.data.length===0||d.data===null){a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}else{if(d==="pending"){a.container_div.addClass("pending");a.content_div.text(DATA_PENDING);setTimeout(function(){a.init()},5000)}else{a.content_div.text("");a.content_div.css("height",a.height_px+"px");a.enabled=true;b(d);a.draw()}}}}})}else{a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}}});var TiledTrack=function(){};$.extend(TiledTrack.prototype,Track.prototype,{draw:function(){var i=this.view.low,e=this.view.high,f=e-i,d=this.view.resolution;if(DEBUG){$("#debug").text(d+" "+this.view.zoom_res)}var k=$("<div style='position: relative;'></div>");this.content_div.children(":first").remove();this.content_div.append(k);var l=this.content_div.width()/f;var h;var a=Math.floor(i/d/DENSITY);while((a*DENSITY*d)<e){var j=this.content_div.width()+"_"+this.view.zoom_level+"_"+a;var c=this.tile_cache.get(j);if(c){var g=a*DENSITY*d;var b=(g-i)*l;if(this.left_offset)!
{b-=this.left_offset}c.css({left:b});k.append(c);this.max_height=Math.
max(this.max_height,c.height())}else{this.delayed_draw(this,j,i,e,a,d,k,l)}a+=1}},delayed_draw:function(c,e,a,f,b,d,g,h){setTimeout(function(){if(!(a>c.view.high||f<c.view.low)){tile_element=c.draw_tile(d,b,g,h);if(tile_element){c.tile_cache.set(e,tile_element);c.max_height=Math.max(c.max_height,tile_element.height());c.content_div.css("height",c.max_height+"px")}}},50)}});var LabelTrack=function(a){Track.call(this,null,a);this.track_type="LabelTrack";this.hidden=true;this.container_div.addClass("label-track")};$.extend(LabelTrack.prototype,Track.prototype,{draw:function(){var c=this.view,d=c.high-c.low,g=Math.floor(Math.pow(10,Math.floor(Math.log(d)/Math.log(10)))),a=Math.floor(c.low/g)*g,e=this.content_div.width(),b=$("<div style='position: relative; height: 1.3em;'></div>");while(a<c.high){var f=(a-c.low)/d*e;b.append($("<div class='label'>"+commatize(a)+"</div>").css({position:"absolute",left:f-1}));a+=g}this.content_div.children(":first").remove();this.content_div.appen!
d(b)}});var LineTrack=function(c,a,b){this.track_type="LineTrack";Track.call(this,c,$("#viewport"));TiledTrack.call(this);this.height_px=100;this.container_div.addClass("line-track");this.dataset_id=a;this.data_cache=new Cache(CACHED_DATA);this.tile_cache=new Cache(CACHED_TILES_LINE);this.prefs={min_value:undefined,max_value:undefined,mode:"Line"};if(b.min_value!==undefined){this.prefs.min_value=b.min_value}if(b.max_value!==undefined){this.prefs.max_value=b.max_value}if(b.mode!==undefined){this.prefs.mode=b.mode}};$.extend(LineTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.tracks.indexOf(a);a.vertical_range=undefined;this.init_each({stats:true,chrom:a.view.chrom,low:null,high:null,dataset_id:a.dataset_id},function(c){data=c.data;if(isNaN(parseFloat(a.prefs.min_value))||isNaN(parseFloat(a.prefs.max_value))){a.prefs.min_value=data.min;a.prefs.max_value=data.max;$("#track_"+b+"_minval").val(a.prefs.min_value);$("#track_"+b+"_maxval").val(a.prefs.max_!
value)}a.vertical_range=a.prefs.max_value-a.prefs.min_value;a.total_fr
equency=data.total_frequency;$("#linetrack_"+b+"_minval").remove();$("#linetrack_"+b+"_maxval").remove();var e=$("<div></div>").addClass("yaxislabel").attr("id","linetrack_"+b+"_minval").text(a.prefs.min_value);var d=$("<div></div>").addClass("yaxislabel").attr("id","linetrack_"+b+"_maxval").text(a.prefs.max_value);d.css({position:"relative",top:"25px"});d.prependTo(a.container_div);e.css({position:"relative",top:a.height_px+55+"px"});e.prependTo(a.container_div)})},get_data:function(d,b){var c=this,a=b*DENSITY*d,f=(b+1)*DENSITY*d,e=d+"_"+b;if(!c.data_queue[e]){c.data_queue[e]=true;$.ajax({url:data_url,dataType:"json",data:{chrom:this.view.chrom,low:a,high:f,dataset_id:this.dataset_id,resolution:this.view.resolution},success:function(g){data=g.data;c.data_cache.set(e,data);delete c.data_queue[e];c.draw()},error:function(h,g,i){console.log(h,g,i)}})}},draw_tile:function(o,q,c,e){if(this.vertical_range===undefined){return}var r=q*DENSITY*o,a=DENSITY*o,b=$("<canvas class='tile'!
></canvas>"),u=o+"_"+q;if(this.data_cache.get(u)===undefined){this.get_data(o,q);return}var t=this.data_cache.get(u);if(t===null){return}b.css({position:"absolute",top:0,left:(r-this.view.low)*e});b.get(0).width=Math.ceil(a*e);b.get(0).height=this.height_px;var n=b.get(0).getContext("2d"),k=false,l=this.prefs.min_value,g=this.prefs.max_value,m=this.vertical_range,s=this.total_frequency,d=this.height_px;n.beginPath();if(t.length>1){var f=Math.ceil((t[1][0]-t[0][0])*e)}else{var f=10}for(var p=0;p<t.length;p++){var j=t[p][0]-r;var h=t[p][1];if(this.prefs.mode=="Intensity"){if(h===null){continue}j=j*e;if(h<=l){h=l}else{if(h>=g){h=g}}h=255-Math.floor((h-l)/m*255);n.fillStyle="rgb("+h+","+h+","+h+")";n.fillRect(j,0,f,this.height_px)}else{if(h===null){k=false;continue}else{j=j*e;if(h<=l){h=l}else{if(h>=g){h=g}}h=Math.round(d-(h-l)/m*d);if(k){n.lineTo(j,h)}else{n.moveTo(j,h);k=true}}}}n.stroke();c.append(b);return b},gen_options:function(n){var a=$("<div></div>").addClass("form-row!
");var h="track_"+n+"_minval",k="track_"+n+"_maxval",e="track_"+n+"_mo
de",l=$("<label></label>").attr("for",h).text("Min value:"),b=(this.prefs.min_value===undefined?"":this.prefs.min_value),m=$("<input></input>").attr("id",h).val(b),g=$("<label></label>").attr("for",k).text("Max value:"),j=(this.prefs.max_value===undefined?"":this.prefs.max_value),f=$("<input></input>").attr("id",k).val(j),d=$("<label></label>").attr("for",e).text("Display mode:"),i=(this.prefs.mode===undefined?"Line":this.prefs.mode),c=$('<select id="'+e+'"><option value="Line" id="mode_Line">Line</option><option value="Intensity" id="mode_Intensity">Intensity</option></select>');c.children("#mode_"+i).attr("selected","selected");return a.append(l).append(m).append(g).append(f).append(d).append(c)},update_options:function(d){var a=$("#track_"+d+"_minval").val(),c=$("#track_"+d+"_maxval").val(),b=$("#track_"+d+"_mode option:selected").val();if(a!==this.prefs.min_value||c!==this.prefs.max_value||b!=this.prefs.mode){this.prefs.min_value=parseFloat(a);this.prefs.max_value=parseF!
loat(c);this.prefs.mode=b;this.vertical_range=this.prefs.max_value-this.prefs.min_value;$("#linetrack_"+d+"_minval").text(this.prefs.min_value);$("#linetrack_"+d+"_maxval").text(this.prefs.max_value);this.tile_cache.clear();this.draw()}}});var FeatureTrack=function(c,a,b){this.track_type="FeatureTrack";Track.call(this,c,$("#viewport"));TiledTrack.call(this);this.height_px=100;this.container_div.addClass("feature-track");this.dataset_id=a;this.zo_slots={};this.show_labels_scale=0.001;this.showing_details=false;this.vertical_detail_px=10;this.vertical_nodetail_px=3;this.default_font="9px Monaco, Lucida Console, monospace";this.left_offset=200;this.inc_slots={};this.data_queue={};this.s_e_by_tile={};this.tile_cache=new Cache(CACHED_TILES_FEATURE);this.data_cache=new Cache(20);this.prefs={block_color:"black",label_color:"black",show_counts:false};if(b.block_color!==undefined){this.prefs.block_color=b.block_color}if(b.label_color!==undefined){this.prefs.label_color=b.label_color!
}if(b.show_counts!==undefined){this.prefs.show_counts=b.show_counts}};
$.extend(FeatureTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.max_low+"_"+a.view.max_high;this.init_each({low:a.view.max_low,high:a.view.max_high,dataset_id:a.dataset_id,chrom:a.view.chrom,resolution:this.view.resolution},function(c){a.data_cache.set(b,c);a.draw()})},get_data:function(a,d){var b=this,c=a+"_"+d;if(!b.data_queue[c]){b.data_queue[c]=true;$.getJSON(data_url,{chrom:b.view.chrom,low:a,high:d,dataset_id:b.dataset_id,resolution:this.view.resolution},function(e){b.data_cache.set(c,e);delete b.data_queue[c];b.draw()})}},incremental_slots:function(a,g,c){if(!this.inc_slots[a]){this.inc_slots[a]={};this.inc_slots[a].w_scale=1/a;this.s_e_by_tile[a]={}}var m=this.inc_slots[a].w_scale,u=[],h=0,b=$("<canvas></canvas>").get(0).getContext("2d"),n=this.view.max_low;var d,f,w=[];for(var r=0,s=g.length;r<s;r++){var e=g[r],l=e[0];if(this.inc_slots[a][l]!==undefined){h=Math.max(h,this.inc_slots[a][l]);w.push(this.inc_slots[a][l])}else{u.push(r)}}for(var!
r=0,s=u.length;r<s;r++){var e=g[u[r]];l=e[0],feature_start=e[1],feature_end=e[2],feature_name=e[3];d=Math.floor((feature_start-n)*m);if(!c){d-=b.measureText(feature_name).width}f=Math.ceil((feature_end-n)*m);var q=0;while(true){var o=true;if(this.s_e_by_tile[a][q]!==undefined){for(var p=0,v=this.s_e_by_tile[a][q].length;p<v;p++){var t=this.s_e_by_tile[a][q][p];if(f>t[0]&&d<t[1]){o=false;break}}}if(o){if(this.s_e_by_tile[a][q]===undefined){this.s_e_by_tile[a][q]=[]}this.s_e_by_tile[a][q].push([d,f]);this.inc_slots[a][l]=q;h=Math.max(h,q);break}q++}}return h},draw_tile:function(R,h,m,ae){var z=h*DENSITY*R,X=(h+1)*DENSITY*R,w=DENSITY*R;var ac,ad,p;var Y=z+"_"+X;var ac=this.data_cache.get(Y);if(ac===undefined){this.data_queue[[z,X]]=true;this.get_data(z,X);return}if(ac.dataset_type=="array_tree"){p=30}else{var P=(ac.extra_info==="no_detail");var af=(P?this.vertical_nodetail_px:this.vertical_detail_px);p=this.incremental_slots(this.view.zoom_res,ac.data,P)*af+15;m.parent().css(!
"height",Math.max(this.height_px,p)+"px");ad=this.inc_slots[this.view.
zoom_res]}var a=Math.ceil(w*ae),F=$("<canvas class='tile'></canvas>"),T=this.prefs.label_color,f=this.prefs.block_color,J=this.left_offset;F.css({position:"absolute",top:0,left:(z-this.view.low)*ae-J});F.get(0).width=a+J;F.get(0).height=p;var t=F.get(0).getContext("2d");t.fillStyle=this.prefs.block_color;t.font=this.default_font;t.textAlign="right";var C=55,W=255-C,g=W*2/3;if(ac.dataset_type=="summary_tree"){var L=ac.data;var v=ac.max;var l=ac.avg;if(ac.data.length>2){var b=Math.ceil((L[1][0]-L[0][0])*ae)}else{var b=50}for(var aa=0,s=L.length;aa<s;aa++){var N=Math.ceil((L[aa][0]-z)*ae);var M=L[aa][1];if(!M){continue}var E=Math.floor(W-(M/v)*W);t.fillStyle="rgb("+E+","+E+","+E+")";t.fillRect(N+J,0,b,20);if(this.prefs.show_counts){if(E>g){t.fillStyle="black"}else{t.fillStyle="#ddd"}t.textAlign="center";t.fillText(L[aa][1],N+J+(b/2),12)}}m.append(F);return F}var ac=ac.data;var Z=0;for(var aa=0,s=ac.length;aa<s;aa++){var G=ac[aa],D=G[0],ab=G[1],O=G[2],A=G[3];if(ab<=X&&O>=z){var !
Q=Math.floor(Math.max(0,(ab-z)*ae)),u=Math.ceil(Math.min(a,(O-z)*ae)),K=ad[D]*af;if(P){t.fillRect(Q+J,K+5,u-Q,1)}else{var r=G[4],I=G[5],S=G[6],e=G[7];var q,U,B=null,ag=null;if(I&&S){B=Math.floor(Math.max(0,(I-z)*ae));ag=Math.ceil(Math.min(a,(S-z)*ae))}if(ab>z){t.fillStyle=T;t.fillText(A,Q-1+J,K+8);t.fillStyle=f}if(e){if(r){if(r=="+"){t.fillStyle=RIGHT_STRAND}else{if(r=="-"){t.fillStyle=LEFT_STRAND}}t.fillRect(Q+J,K,u-Q,10);t.fillStyle=f}for(var Y=0,d=e.length;Y<d;Y++){var n=e[Y],c=Math.floor(Math.max(0,(n[0]-z)*ae)),H=Math.ceil(Math.min(a,(n[1]-z)*ae));if(c>H){continue}q=5;U=3;t.fillRect(c+J,K+U,H-c,q);if(B!==undefined&&!(c>ag||H<B)){q=9;U=1;var V=Math.max(c,B),o=Math.min(H,ag);t.fillRect(V+J,K+U,o-V,q)}}}else{q=9;U=1;t.fillRect(Q+J,K+U,u-Q,q);if(G.strand){if(G.strand=="+"){t.fillStyle=RIGHT_STRAND_INV}else{if(G.strand=="-"){t.fillStyle=LEFT_STRAND_INV}}t.fillRect(Q+J,K,u-Q,10);t.fillStyle=prefs.block_color}}}Z++}}m.append(F);return F},gen_options:function(i){var a=$("<div>!
</div>").addClass("form-row");var e="track_"+i+"_block_color",k=$("<la
bel></label>").attr("for",e).text("Block color:"),l=$("<input></input>").attr("id",e).attr("name",e).val(this.prefs.block_color),j="track_"+i+"_label_color",g=$("<label></label>").attr("for",j).text("Text color:"),h=$("<input></input>").attr("id",j).attr("name",j).val(this.prefs.label_color),f="track_"+i+"_show_count",c=$("<label></label>").attr("for",f).text("Show summary counts"),b=$('<input type="checkbox" style="float:left;"></input>').attr("id",f).attr("name",f).attr("checked",this.prefs.show_counts),d=$("<div></div>").append(b).append(c);return a.append(k).append(l).append(g).append(h).append(d)},update_options:function(d){var b=$("#track_"+d+"_block_color").val(),c=$("#track_"+d+"_label_color").val(),a=$("#track_"+d+"_show_count").attr("checked");if(b!==this.prefs.block_color||c!==this.prefs.label_color||a!=this.prefs.show_counts){this.prefs.block_color=b;this.prefs.label_color=c;this.prefs.show_counts=a;this.tile_cache.clear();this.draw()}}});var ReadTrack=function(c!
,a,b){FeatureTrack.call(this,c,a,b);this.track_type="ReadTrack"};$.extend(ReadTrack.prototype,TiledTrack.prototype,FeatureTrack.prototype,{});
\ No newline at end of file
diff -r 2d415fb320b4 -r 8fa4e8c12dfc static/scripts/trackster.js
--- a/static/scripts/trackster.js Tue Apr 06 15:29:23 2010 -0400
+++ b/static/scripts/trackster.js Tue Apr 06 19:00:39 2010 -0400
@@ -238,7 +238,7 @@
} else if (result === "no converter") {
track.container_div.addClass("error");
track.content_div.text(DATA_NOCONVERTER);
- } else if ( (result.data && result.data.length === 0) || result === "no data") {
+ } else if (result.data && result.data.length === 0 || result.data === null) {
track.container_div.addClass("nodata");
track.content_div.text(DATA_NONE);
} else if (result === "pending") {
@@ -354,10 +354,10 @@
this.dataset_id = dataset_id;
this.data_cache = new Cache(CACHED_DATA);
this.tile_cache = new Cache(CACHED_TILES_LINE);
- this.prefs = { 'min_value': undefined, 'max_value': undefined, 'mode': 'line' };
+ this.prefs = { 'min_value': undefined, 'max_value': undefined, 'mode': 'Line' };
if (prefs.min_value !== undefined) { this.prefs.min_value = prefs.min_value; }
if (prefs.max_value !== undefined) { this.prefs.max_value = prefs.max_value; }
- if (prefs.max_value !== undefined) { this.prefs.mode = prefs.mode; }
+ if (prefs.mode !== undefined) { this.prefs.mode = prefs.mode; }
};
$.extend( LineTrack.prototype, TiledTrack.prototype, {
init: function() {
@@ -431,12 +431,13 @@
canvas = $("<canvas class='tile'></canvas>"),
key = resolution + "_" + tile_index;
- if (!this.data_cache.get(key)) {
+ if (this.data_cache.get(key) === undefined) {
this.get_data( resolution, tile_index );
return;
}
var data = this.data_cache.get(key);
+ if (data === null) { return; }
canvas.css( {
position: "absolute",
@@ -467,7 +468,7 @@
var x = data[i][0] - tile_low;
var y = data[i][1];
- if ( this.prefs.mode == "intensity" ) {
+ if ( this.prefs.mode == "Intensity" ) {
// DRAW INTENSITY
if (y === null) {
continue;
@@ -480,7 +481,7 @@
}
y = 255 - Math.floor( (y - min_value) / vertical_range * 255 );
ctx.fillStyle = "rgb(" +y+ "," +y+ "," +y+ ")";
- ctx.fillRect(x, 0, delta_x_px, 30);
+ ctx.fillRect(x, 0, delta_x_px, this.height_px);
}
else {
// Missing data causes us to stop drawing
@@ -523,10 +524,11 @@
max_val = (this.prefs.max_value === undefined ? "" : this.prefs.max_value),
max_input = $('<input></input>').attr("id", maxval).val(max_val),
mode_label = $('<label></label>').attr("for", mode).text("Display mode:"),
- mode_val = (this.prefs.mode === undefined ? "line" : this.prefs.mode),
- mode_input = $('<select id="' +mode+ '"><option value="line" id="mode_line">Line</option><option value="intensity" id="mode_intensity">Intensity</option></select>');
- $("#" + mode + " #mode_"+mode_val).attr('selected', 'selected');
-
+ mode_val = (this.prefs.mode === undefined ? "Line" : this.prefs.mode),
+ mode_input = $('<select id="' +mode+ '"><option value="Line" id="mode_Line">Line</option><option value="Intensity" id="mode_Intensity">Intensity</option></select>');
+
+ mode_input.children("#mode_"+mode_val).attr('selected', 'selected');
+
return container.append(min_label).append(min_input).append(max_label).append(max_input).append(mode_label).append(mode_input);
}, update_options: function(track_id) {
var min_value = $('#track_' + track_id + '_minval').val(),
@@ -567,7 +569,7 @@
this.tile_cache = new Cache(CACHED_TILES_FEATURE);
this.data_cache = new Cache(20);
- this.prefs = { 'block_color': 'black', 'label_color': 'black', 'show_counts': true };
+ this.prefs = { 'block_color': 'black', 'label_color': 'black', 'show_counts': false };
if (prefs.block_color !== undefined) { this.prefs.block_color = prefs.block_color; }
if (prefs.label_color !== undefined) { this.prefs.label_color = prefs.label_color; }
if (prefs.show_counts !== undefined) { this.prefs.show_counts = prefs.show_counts; }
@@ -581,9 +583,6 @@
high: track.view.max_high, dataset_id: track.dataset_id,
chrom: track.view.chrom, resolution: this.view.resolution }, function (result) {
track.data_cache.set(key, result);
- // track.values = result;
- // track.calc_slots();
- // track.slots = track.zo_slots;
track.draw();
});
},
@@ -691,7 +690,7 @@
var k = tile_low + '_' + tile_high;
var data = this.data_cache.get(k);
- if (!data) {
+ if (data === undefined) {
this.data_queue[ [tile_low, tile_high] ] = true;
this.get_data(tile_low, tile_high);
return;
@@ -866,9 +865,10 @@
label_color_input = $('<input></input>').attr("id", label_color).attr("name", label_color).val(this.prefs.label_color),
show_count = 'track_' + track_id + '_show_count',
show_count_label = $('<label></label>').attr("for", show_count).text("Show summary counts"),
- show_count_input = $('<input type="checkbox" style="float:left;"></input>').attr("id", show_count).attr("name", show_count).attr("checked", this.prefs.show_counts);
+ show_count_input = $('<input type="checkbox" style="float:left;"></input>').attr("id", show_count).attr("name", show_count).attr("checked", this.prefs.show_counts),
+ show_count_div = $('<div></div>').append(show_count_input).append(show_count_label);
- return container.append(block_color_label).append(block_color_input).append(label_color_label).append(label_color_input).append(show_count_input).append(show_count_label);
+ return container.append(block_color_label).append(block_color_input).append(label_color_label).append(label_color_input).append(show_count_div);
}, update_options: function(track_id) {
var block_color = $('#track_' + track_id + '_block_color').val(),
label_color = $('#track_' + track_id + '_label_color').val(),
diff -r 2d415fb320b4 -r 8fa4e8c12dfc templates/tracks/browser.mako
--- a/templates/tracks/browser.mako Tue Apr 06 15:29:23 2010 -0400
+++ b/templates/tracks/browser.mako Tue Apr 06 19:00:39 2010 -0400
@@ -45,7 +45,7 @@
</div>
</div>
<div id="nav-controls">
- <form name="chr" id="chr" method="get">
+ <form action="#">
<select id="chrom" name="chrom" style="width: 15em;">
<option value="">Loading</option>
</select>
@@ -68,14 +68,14 @@
<div class="unified-panel-header" unselectable="on">
<div class="unified-panel-header-inner">Configuration</div>
</div>
- <form action="${h.url_for( action='update_config' )}">
+ <form action="#" onsubmit="view.update_options();return false;">
## <input name="title" id="title" value="${config.title}" />
<div id="show-hide-move">
<ul id="sortable-ul"></ul>
</div>
- <input type="button" id="refresh-button" value="Refresh" />
+ <input type="submit" id="refresh-button" value="Refresh" />
<input type="button" id="save-button" value="Save" />
- <input id="add-track" type="button" value="Add Track" />
+ <input id="add-track" type="button" value="Add Tracks" />
</form>
</%def>
@@ -189,10 +189,6 @@
view.redraw();
});
- $("#refresh-button").bind( "click", function(e) {
- view.update_options();
- });
-
// Use a popup grid to add more tracks
$("#add-track").bind( "click", function(e) {
$.ajax({
@@ -311,16 +307,15 @@
del_icon = $('<a href="#" class="icon-button delete" />'),
edit_icon = $('<a href="#" class="icon-button edit" />'),
body = $('<div class="historyItemBody"></div>'),
- li = $('<li class="sortable"></li>').attr("id", "track_" + track_id),
+ li = $('<li class="sortable"></li>').attr("id", "track_" + track_id + "_li"),
div = $('<div class="historyItemContainer historyItem"></div>'),
editable = $('<div style="display:none"></div>').attr("id", "track_" + track_id + "_editable");
edit_icon.bind("click", function() {
$("#track_" + track_id + "_editable").toggle();
});
-
del_icon.bind("click", function() {
- li.fadeOut('slow', function() { $(this).remove(); });
+ $("#track_" + track_id + "_li").fadeOut('slow', function() { $("#track_" + track_id).remove(); });
view.remove_track(track);
view.update_options();
});
@@ -329,7 +324,7 @@
if (track.gen_options) {
editable.append(track.gen_options(track_id)).appendTo(body);
}
- div.append(title).append(body).appendTo(li)
+ div.append(title).append(body).appendTo(li);
$("ul#sortable-ul").append(li);
}
};
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/2d415fb320b4
changeset: 3616:2d415fb320b4
user: fubar: ross Lazarus at gmail period com
date: Tue Apr 06 15:29:23 2010 -0400
description:
when running a converter, deps may be null in datatypes/data.py
diffstat:
lib/galaxy/datatypes/data.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diffs (12 lines):
diff -r 61f1556d6e88 -r 2d415fb320b4 lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py Tue Apr 06 12:10:20 2010 -0400
+++ b/lib/galaxy/datatypes/data.py Tue Apr 06 15:29:23 2010 -0400
@@ -262,7 +262,7 @@
#determine input parameter name and add to params
input_name = 'input1'
for key, value in converter.inputs.items():
- if value.name in deps:
+ if (deps) and (value.name in deps):
params[value.name] = deps[value.name]
elif value.type == 'data':
input_name = key
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/61f1556d6e88
changeset: 3615:61f1556d6e88
user: jeremy goecks <jeremy.goecks(a)emory.edu>
date: Tue Apr 06 12:10:20 2010 -0400
description:
Fix unicode bug for large files. Fixes #244
diffstat:
templates/dataset/large_file.mako | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diffs (10 lines):
diff -r 44366a1123b0 -r 61f1556d6e88 templates/dataset/large_file.mako
--- a/templates/dataset/large_file.mako Tue Apr 06 09:24:33 2010 -0400
+++ b/templates/dataset/large_file.mako Tue Apr 06 12:10:20 2010 -0400
@@ -7,5 +7,5 @@
</div>
<pre>
-${ truncated_data | h }
+${ unicode( truncated_data, 'utf-8' ) | h }
</pre>
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/44366a1123b0
changeset: 3614:44366a1123b0
user: rc
date: Tue Apr 06 09:24:33 2010 -0400
description:
lims: delete/undelete request bug fixed. all samples are also deleted when a request is deleted.
diffstat:
lib/galaxy/web/controllers/requests.py | 8 ++++++++
lib/galaxy/web/controllers/requests_admin.py | 8 ++++++++
2 files changed, 16 insertions(+), 0 deletions(-)
diffs (50 lines):
diff -r 6f7528445c64 -r 44366a1123b0 lib/galaxy/web/controllers/requests.py
--- a/lib/galaxy/web/controllers/requests.py Mon Apr 05 15:11:04 2010 -0400
+++ b/lib/galaxy/web/controllers/requests.py Tue Apr 06 09:24:33 2010 -0400
@@ -924,6 +924,10 @@
else:
request.deleted = True
trans.sa_session.add( request )
+ # delete all the samples belonging to this request
+ for s in request.samples:
+ s.deleted = True
+ trans.sa_session.add( s )
trans.sa_session.flush()
if not len(delete_failed):
message = '%i request(s) has been deleted.' % len(id_list)
@@ -951,6 +955,10 @@
**kwd) )
request.deleted = False
trans.sa_session.add( request )
+ # undelete all the samples belonging to this request
+ for s in request.samples:
+ s.deleted = False
+ trans.sa_session.add( s )
trans.sa_session.flush()
return trans.response.send_redirect( web.url_for( controller='requests',
action='list',
diff -r 6f7528445c64 -r 44366a1123b0 lib/galaxy/web/controllers/requests_admin.py
--- a/lib/galaxy/web/controllers/requests_admin.py Mon Apr 05 15:11:04 2010 -0400
+++ b/lib/galaxy/web/controllers/requests_admin.py Tue Apr 06 09:24:33 2010 -0400
@@ -385,6 +385,10 @@
**kwd) )
request.deleted = True
trans.sa_session.add( request )
+ # delete all the samples belonging to this request
+ for s in request.samples:
+ s.deleted = True
+ trans.sa_session.add( s )
trans.sa_session.flush()
message = '%i request(s) has been deleted.' % len(id_list)
status = 'done'
@@ -407,6 +411,10 @@
**kwd) )
request.deleted = False
trans.sa_session.add( request )
+ # undelete all the samples belonging to this request
+ for s in request.samples:
+ s.deleted = False
+ trans.sa_session.add( s )
trans.sa_session.flush()
return trans.response.send_redirect( web.url_for( controller='requests_admin',
action='list',
1
0
15 Apr '10
details: http://www.bx.psu.edu/hg/galaxy/rev/6f7528445c64
changeset: 3613:6f7528445c64
user: Kanwei Li <kanwei(a)gmail.com>
date: Mon Apr 05 15:11:04 2010 -0400
description:
trackster: don't allow adding of datasets in error state
diffstat:
lib/galaxy/web/controllers/tracks.py | 10 ++++++----
1 files changed, 6 insertions(+), 4 deletions(-)
diffs (23 lines):
diff -r 7bdc5202c67e -r 6f7528445c64 lib/galaxy/web/controllers/tracks.py
--- a/lib/galaxy/web/controllers/tracks.py Mon Apr 05 14:15:19 2010 -0400
+++ b/lib/galaxy/web/controllers/tracks.py Mon Apr 05 15:11:04 2010 -0400
@@ -61,13 +61,15 @@
grids.TextColumn( "Name", key="name", model_class=model.HistoryDatasetAssociation ),
grids.GridColumn( "Filetype", key="extension" ),
]
+
+ def build_initial_query( self, session ):
+ return session.query( self.model_class ).join( model.History.table).join( model.Dataset.table )
def apply_default_filter( self, trans, query, **kwargs ):
if self.available_tracks is None:
self.available_tracks = trans.app.datatypes_registry.get_available_tracks()
- return query.select_from( model.HistoryDatasetAssociation.table \
- .join( model.History.table ) ) \
- .filter( model.History.user == trans.user ) \
- .filter( model.HistoryDatasetAssociation.extension.in_(self.available_tracks) )
+ return query.filter( model.History.user == trans.user ) \
+ .filter( model.HistoryDatasetAssociation.extension.in_(self.available_tracks) ) \
+ .filter( model.Dataset.state != "error")
class TracksController( BaseController ):
"""
1
0