details: http://www.bx.psu.edu/hg/galaxy/rev/14214b45db3f changeset: 3153:14214b45db3f user: rc date: Mon Dec 07 14:45:47 2009 -0500 description: Merge with 24f195c617da80ae71dd8281b75179733016bcb6 diffstat: buildbot_setup.sh | 16 +- lib/galaxy/datatypes/binary.py | 49 ++- lib/galaxy/datatypes/sequence.py | 36 +-- lib/galaxy/datatypes/test/1.bam | lib/galaxy/tools/__init__.py | 47 +--- lib/galaxy/tools/parameters/basic.py | 71 +++-- lib/galaxy/tools/parameters/input_translation.py | 106 ++++++++ lib/galaxy/tools/parameters/sanitize.py | 167 +++++++++++++ lib/galaxy/tools/util/maf_utilities.py | 11 +- lib/galaxy/util/__init__.py | 66 +----- lib/galaxy/web/controllers/async.py | 2 +- lib/galaxy/web/controllers/root.py | 2 +- lib/galaxy/web/controllers/tool_runner.py | 9 +- test-data/1.bam | test-data/2.sam | 10 + test-data/2.tabular | 20 +- test-data/3.sam | 10 + test-data/chrM.fa | 335 --------------------------- test-data/chr_m.fasta | 335 +++++++++++++++++++++++++++ test-data/joiner_out3.bed | 32 +- test-data/sam_to_bam_in1.sam | 10 - test-data/sam_to_bam_in2.sam | 10 - test-data/sam_to_bam_out1.bam | test-data/sam_to_bam_out2.bam | tools/data_source/biomart.xml | 17 +- tools/data_source/biomart_test.xml | 17 +- tools/data_source/data_source.py | 2 + tools/data_source/epigraph_import.xml | 3 +- tools/data_source/epigraph_import_test.xml | 3 +- tools/data_source/eupathdb.xml | 19 +- tools/data_source/flymine.xml | 3 +- tools/data_source/flymine_test.xml | 3 +- tools/data_source/gramene_mart.xml | 17 +- tools/data_source/modmine.xml | 3 +- tools/data_source/ratmine.xml | 3 +- tools/data_source/ucsc_tablebrowser.xml | 19 +- tools/data_source/ucsc_tablebrowser_archaea.xml | 23 +- tools/data_source/ucsc_tablebrowser_test.xml | 23 +- tools/data_source/upload.py | 57 +++- tools/data_source/wormbase.xml | 17 +- tools/data_source/wormbase_test.xml | 17 +- tools/emboss_5/emboss_fuzznuc.xml | 12 +- tools/filters/grep.py | 41 ++- tools/filters/grep.xml | 13 +- tools/filters/joiner.xml | 2 +- tools/plotting/xy_plot.xml | 7 +- tools/samtools/sam_to_bam.py | 134 ++++++---- tools/samtools/sam_to_bam.xml | 43 +-- tools/sr_mapping/bowtie_wrapper.xml | 4 +- tools/sr_mapping/lastz_wrapper.xml | 31 +- 50 files changed, 1084 insertions(+), 793 deletions(-) diffs (2808 lines): diff -r b01c8245ef74 -r 14214b45db3f buildbot_setup.sh --- a/buildbot_setup.sh Mon Dec 07 14:45:09 2009 -0500 +++ b/buildbot_setup.sh Mon Dec 07 14:45:47 2009 -0500 @@ -2,13 +2,22 @@ cd `dirname $0` +: ${HOSTTYPE:=`uname -m`} + +# link to HYPHY is arch-dependent +case "$OSTYPE" in + linux-gnu) + kernel=`uname -r | cut -f1,2 -d.` + HYPHY="/galaxy/software/linux$kernel-$HOSTTYPE/hyphy" + ;; +esac + LINKS=" /depot/data2/galaxy/alignseq.loc /depot/data2/galaxy/binned_scores.loc /depot/data2/galaxy/blastdb.loc /depot/data2/galaxy/bowtie_indices.loc /depot/data2/galaxy/encode_datasets.loc -/home/universe/linux-i686/HYPHY /depot/data2/galaxy/lastz_seqs.loc /depot/data2/galaxy/liftOver.loc /depot/data2/galaxy/maf_index.loc @@ -44,6 +53,11 @@ ln -sf $link tool-data done +if [ -d "$HYPHY" ]; then + echo "Linking $HYPHY" + ln -sf $HYPHY tool-data/HYPHY +fi + for sample in $SAMPLES; do file=`echo $sample | sed -e 's/\.sample$//'` echo "Copying $sample to $file" diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/datatypes/binary.py Mon Dec 07 14:45:47 2009 -0500 @@ -7,7 +7,7 @@ from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * from urllib import urlencode, quote_plus -import zipfile +import zipfile, gzip import os, subprocess, tempfile log = logging.getLogger(__name__) @@ -54,32 +54,35 @@ def init_meta( self, dataset, copy_from=None ): Binary.init_meta( self, dataset, copy_from=copy_from ) - """ - GVK 12/2/09: just noticed this - not good and doesn't work, so commenting out for now. - def set_meta( self, dataset, overwrite = True, **kwd ): - # Sets index for BAM file. - index_file = dataset.metadata.bam_index - if not index_file: - index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset ) + def set_meta( self, dataset, overwrite = True, **kwd ): + """ Sets index for BAM file. """ + # These metadata values are not accessible by users, always overwrite + index_file = dataset.metadata.bam_index + if not index_file: + index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset ) + try: + # Using a symlink from ~/database/files/dataset_XX.dat, create a temporary file + # to store the indexex generated from samtools, something like ~/tmp/dataset_XX.dat.bai tmp_dir = tempfile.gettempdir() - tmpf1 = tempfile.NamedTemporaryFile( dir=tmp_dir ) - tmpf1bai = '%s.bai' % tmpf1.name - try: - os.system( 'cd %s' % tmp_dir ) - os.system( 'cp %s %s' % ( dataset.file_name, tmpf1.name ) ) - os.system( 'samtools index %s' % tmpf1.name ) - os.system( 'cp %s %s' % ( tmpf1bai, index_file.file_name ) ) - except Exception, ex: - sys.stderr.write( 'There was a problem creating the index for the BAM file\n%s\n' + str( ex ) ) - tmpf1.close() - if os.path.exists( tmpf1bai ): - os.remove( tmpf1bai ) - dataset.metadata.bam_index = index_file - """ + tmp_file_path = os.path.join( tmp_dir, os.path.basename( dataset.file_name ) ) + # Here tmp_file_path looks something like /tmp/dataset_XX.dat + os.symlink( dataset.file_name, tmp_file_path ) + command = 'samtools index %s' % tmp_file_path + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + except: + err_msg = 'Error creating index file (%s) for BAM file (%s)' % ( str( tmp_file_path ), str( dataset.file_name ) ) + log.exception( err_msg ) + sys.stderr.write( err_msg ) + # Move the temporary index file ~/tmp/dataset_XX.dat.bai to be ~/database/files/_metadata_files/dataset_XX.dat + shutil.move( '%s.bai' % ( tmp_file_path ), index_file.file_name ) + os.unlink( tmp_file_path ) + dataset.metadata.bam_index = index_file def sniff( self, filename ): + # BAM is compressed in the BGZF format, and must not be uncompressed in Galaxy. # The first 4 bytes of any bam file is 'BAM\1', and the file is binary. try: - header = open( filename ).read(4) + header = gzip.open( filename ).read(4) if binascii.b2a_hex( header ) == binascii.hexlify( 'BAM\1' ): return True return False diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/datatypes/sequence.py Mon Dec 07 14:45:47 2009 -0500 @@ -309,35 +309,21 @@ return #this is not a MAF file dataset.metadata.species = species dataset.metadata.blocks = blocks - #only overwrite the contents if our newly determined chromosomes don't match stored + + #write species chromosomes to a file chrom_file = dataset.metadata.species_chromosomes - compare_chroms = {} - if chrom_file: - try: - for line in open( chrom_file.file_name ): - fields = line.split( "\t" ) - if fields: - spec = fields.pop( 0 ) - if spec: - compare_chroms[spec] = fields - except: - pass - #write out species chromosomes again only if values are different - if not species_chromosomes or compare_chroms != species_chromosomes: - tmp_file = tempfile.TemporaryFile( 'w+b' ) - for spec, chroms in species_chromosomes.items(): - tmp_file.write( "%s\t%s\n" % ( spec, "\t".join( chroms ) ) ) - - if not chrom_file: - chrom_file = dataset.metadata.spec['species_chromosomes'].param.new_file( dataset = dataset ) - tmp_file.seek( 0 ) - open( chrom_file.file_name, 'wb' ).write( tmp_file.read() ) - dataset.metadata.species_chromosomes = chrom_file - tmp_file.close() + if not chrom_file: + chrom_file = dataset.metadata.spec['species_chromosomes'].param.new_file( dataset = dataset ) + chrom_out = open( chrom_file.file_name, 'wb' ) + for spec, chroms in species_chromosomes.items(): + chrom_out.write( "%s\t%s\n" % ( spec, "\t".join( chroms ) ) ) + chrom_out.close() + dataset.metadata.species_chromosomes = chrom_file + index_file = dataset.metadata.maf_index if not index_file: index_file = dataset.metadata.spec['maf_index'].param.new_file( dataset = dataset ) - indexes.write( open( index_file.file_name, 'w' ) ) + indexes.write( open( index_file.file_name, 'wb' ) ) dataset.metadata.maf_index = index_file def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/datatypes/test/1.bam Binary file lib/galaxy/datatypes/test/1.bam has changed diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/tools/__init__.py Mon Dec 07 14:45:47 2009 -0500 @@ -17,6 +17,7 @@ from parameters import * from parameters.grouping import * from parameters.validation import LateValidationError +from parameters.input_translation import ToolInputTranslator from galaxy.util.expressions import ExpressionContext from galaxy.tools.test import ToolTestBuilder from galaxy.tools.actions import DefaultToolAction @@ -299,48 +300,10 @@ self.tool_type = root.get( "tool_type", None ) #Force history to fully refresh after job execution for this tool. Useful i.e. when an indeterminate number of outputs are created by a tool. self.force_history_refresh = util.string_as_bool( root.get( 'force_history_refresh', 'False' ) ) - # data_source tool - if self.tool_type == "data_source": - self.URL_method = root.get( "URL_method", "get" ) # get is the default - self.param_trans_dict = {} - req_param_trans = root.find( "request_param_translation" ) - if req_param_trans is not None: - for req_param in req_param_trans.findall( "request_param" ): - # req_param tags must look like <request_param galaxy_name="dbkey" remote_name="GENOME" missing="" /> - trans_list = [] - remote_name = req_param.get( "remote_name" ) - trans_list.append( req_param.get( "galaxy_name" ) ) - trans_list.append( req_param.get( "missing" ) ) - if req_param.get( "galaxy_name" ) == "data_type": - # The req_param tag for data_type is special in that it can contain another tag set like - # <data_type_translation> - # <format galaxy_format="tabular" remote_format="selectedFields" /> - # </data_type_translation> - format_trans = req_param.find( "data_type_translation" ) - if format_trans is not None: - format_trans_dict = {} - for format in format_trans.findall( "format" ): - remote_format = format.get( "remote_format" ) - galaxy_format = format.get( "galaxy_format" ) - format_trans_dict[ remote_format ] = galaxy_format - trans_list.append( format_trans_dict ) - elif req_param.get( "galaxy_name" ) == "URL": - # Some remote data sources ( e.g., Gbrowse ) send parameters back to - # Galaxy in the initial response that must be added to URL prior to - # Galaxy sending the secondary request to the URL. The tag set looks - # asomething like: - # <add_to_url> - # <param_from_source name="d" missing="" /> - # </add_to_url> - add_to_url = req_param.find( "add_to_url" ) - if add_to_url is not None: - add_to_url_dict = {} - for param_from_source in add_to_url.findall( "param_from_source" ): - name = param_from_source.get( "name" ) - value = param_from_source.get( "missing" ) # only used if the source doesn't send the param name - add_to_url_dict[ name ] = value - trans_list.append( add_to_url_dict ) - self.param_trans_dict[ remote_name ] = trans_list + #load input translator, used by datasource tools to change names/values of incoming parameters + self.input_translator = root.find( "request_param_translation" ) + if self.input_translator: + self.input_translator = ToolInputTranslator.from_element( self.input_translator ) # Command line (template). Optional for tools that do not invoke a local program command = root.find("command") if command is not None and command.text is not None: diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/tools/parameters/basic.py Mon Dec 07 14:45:47 2009 -0500 @@ -7,6 +7,8 @@ from galaxy import config, datatypes, util from galaxy.web import form_builder from galaxy.util.bunch import Bunch +from galaxy.util import string_as_bool, sanitize_param +from sanitize import ToolParameterSanitizer import validation, dynamic_options # For BaseURLToolParameter from galaxy.web import url_for @@ -28,7 +30,9 @@ self.type = param.get("type") self.label = util.xml_text(param, "label") self.help = util.xml_text(param, "help") - self.unsanitize = param.get( "unsanitize", None ) + self.sanitizer = param.find( "sanitizer" ) + if self.sanitizer is not None: + self.sanitizer = ToolParameterSanitizer.from_element( self.sanitizer ) self.html = "no html set" self.repeat = param.get("repeat", None) self.condition = param.get( "condition", None ) @@ -122,7 +126,13 @@ return value def to_param_dict_string( self, value, other_values={} ): - return str( value ) + value = str( value ) + if self.tool is None or self.tool.options.sanitize: + if self.sanitizer: + value = self.sanitizer.sanitize_param( value ) + else: + value = sanitize_param( value ) + return value def validate( self, value, history=None ): for validator in self.validators: @@ -154,7 +164,7 @@ self.name = elem.get( 'name' ) self.size = elem.get( 'size' ) self.value = elem.get( 'value' ) - self.area = str_bool( elem.get( 'area', False ) ) + self.area = string_as_bool( elem.get( 'area', False ) ) def get_html_field( self, trans=None, value=None, other_values={} ): if self.area: return form_builder.TextArea( self.name, self.size, value or self.value ) @@ -262,7 +272,7 @@ self.truevalue = elem.get( 'truevalue', 'true' ) self.falsevalue = elem.get( 'falsevalue', 'false' ) self.name = elem.get( 'name' ) - self.checked = str_bool( elem.get( 'checked' ) ) + self.checked = string_as_bool( elem.get( 'checked' ) ) def get_html_field( self, trans=None, value=None, other_values={} ): checked = self.checked if value: @@ -299,7 +309,7 @@ """ ToolParameter.__init__( self, tool, elem ) self.name = elem.get( 'name' ) - self.ajax = str_bool( elem.get( 'ajax-upload' ) ) + self.ajax = string_as_bool( elem.get( 'ajax-upload' ) ) def get_html_field( self, trans=None, value=None, other_values={} ): return form_builder.FileField( self.name, ajax = self.ajax, value = value ) def from_html( self, value, trans=None, other_values={} ): @@ -474,7 +484,7 @@ """ def __init__( self, tool, elem, context=None ): ToolParameter.__init__( self, tool, elem ) - self.multiple = str_bool( elem.get( 'multiple', False ) ) + self.multiple = string_as_bool( elem.get( 'multiple', False ) ) self.display = elem.get( 'display', None ) self.separator = elem.get( 'separator', ',' ) self.legal_values = set() @@ -492,7 +502,7 @@ for index, option in enumerate( elem.findall( "option" ) ): value = option.get( "value" ) self.legal_values.add( value ) - selected = str_bool( option.get( "selected", False ) ) + selected = string_as_bool( option.get( "selected", False ) ) self.static_options.append( ( option.text, value, selected ) ) self.is_dynamic = ( ( self.dynamic_options is not None ) or ( self.options is not None ) ) def get_options( self, trans, other_values ): @@ -571,11 +581,19 @@ if value is None: return "None" if isinstance( value, list ): - if not(self.repeat): + if not( self.repeat ): assert self.multiple, "Multiple values provided but parameter is not expecting multiple values" - return self.separator.join( map( str, value ) ) + value = map( str, value ) else: - return str(value) + value = str( value ) + if self.tool is None or self.tool.options.sanitize: + if self.sanitizer: + value = self.sanitizer.sanitize_param( value ) + else: + value = sanitize_param( value ) + if isinstance( value, list ): + value = self.separator.join( value ) + return value def value_to_basic( self, value, app ): if isinstance( value, UnvalidatedValue ): return { "__class__": "UnvalidatedValue", "value": value.value } @@ -741,9 +759,9 @@ def __init__( self, tool, elem ): SelectToolParameter.__init__( self, tool, elem ) self.tool = tool - self.numerical = str_bool( elem.get( "numerical", False )) - self.force_select = str_bool( elem.get( "force_select", True )) - self.accept_default = str_bool( elem.get( "accept_default", False )) + self.numerical = string_as_bool( elem.get( "numerical", False )) + self.force_select = string_as_bool( elem.get( "force_select", True )) + self.accept_default = string_as_bool( elem.get( "accept_default", False )) self.data_ref = elem.get( "data_ref", None ) self.is_dynamic = True def get_column_list( self, trans, other_values ): @@ -894,11 +912,11 @@ def __init__( self, tool, elem, context=None ): def recurse_option_elems( cur_options, option_elems ): for option_elem in option_elems: - selected = str_bool( option_elem.get( 'selected', False ) ) + selected = string_as_bool( option_elem.get( 'selected', False ) ) cur_options.append( { 'name':option_elem.get( 'name' ), 'value': option_elem.get( 'value'), 'options':[], 'selected':selected } ) recurse_option_elems( cur_options[-1]['options'], option_elem.findall( 'option' ) ) ToolParameter.__init__( self, tool, elem ) - self.multiple = str_bool( elem.get( 'multiple', False ) ) + self.multiple = string_as_bool( elem.get( 'multiple', False ) ) self.display = elem.get( 'display', None ) self.hierarchy = elem.get( 'hierarchy', 'exact' ) #exact or recurse self.separator = elem.get( 'separator', ',' ) @@ -1019,7 +1037,13 @@ if len( rval ) > 1: if not( self.repeat ): assert self.multiple, "Multiple values provided but parameter is not expecting multiple values" - return self.separator.join( rval ) + rval = self.separator.join( rval ) + if self.tool is None or self.tool.options.sanitize: + if self.sanitizer: + rval = self.sanitizer.sanitize_param( rval ) + else: + rval = sanitize_param( rval ) + return rval def get_initial_value( self, trans, context ): def recurse_options( initial_values, options ): @@ -1094,7 +1118,7 @@ def __init__( self, tool, elem ): ToolParameter.__init__( self, tool, elem ) # Add metadata validator - if not str_bool( elem.get( 'no_validation', False ) ): + if not string_as_bool( elem.get( 'no_validation', False ) ): self.validators.append( validation.MetadataValidator() ) # Build tuple of classes for supported data formats formats = [] @@ -1108,9 +1132,9 @@ else: formats.append( tool.app.datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ ) self.formats = tuple( formats ) - self.multiple = str_bool( elem.get( 'multiple', False ) ) + self.multiple = string_as_bool( elem.get( 'multiple', False ) ) # Optional DataToolParameters are used in tools like GMAJ and LAJ - self.optional = str_bool( elem.get( 'optional', False ) ) + self.optional = string_as_bool( elem.get( 'optional', False ) ) # TODO: Enhance dynamic options for DataToolParameters. Currently, # only the special case key='build' of type='data_meta' is # a valid filter @@ -1366,13 +1390,4 @@ runtime. """ pass - -def str_bool(in_str): - """ - returns true/false of a string, since bool(str), always returns true if string is not empty - default action is to return false - """ - if str(in_str).lower() == 'true' or str(in_str).lower() == 'yes': - return True - return False diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/tools/parameters/input_translation.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/tools/parameters/input_translation.py Mon Dec 07 14:45:47 2009 -0500 @@ -0,0 +1,106 @@ +""" +Tool Input Translation. +""" + +import logging +from galaxy.util.bunch import Bunch + +log = logging.getLogger( __name__ ) + +class ToolInputTranslator( object ): + """ + Handles Tool input translation. + This is used for data source tools + + >>> from galaxy.util import Params + >>> from elementtree.ElementTree import XML + >>> translator = ToolInputTranslator.from_element( XML( + ... ''' + ... <request_param_translation> + ... <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> + ... <request_param galaxy_name="URL" remote_name="URL" missing="" > + ... <append_param separator="&" first_separator="?" join="="> + ... <value name="_export" missing="1" /> + ... <value name="GALAXY_URL" missing="0" /> + ... </append_param> + ... </request_param> + ... <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> + ... <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> + ... <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> + ... <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> + ... <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > + ... <value_translation> + ... <value galaxy_value="tabular" remote_value="primaryTable" /> + ... <value galaxy_value="tabular" remote_value="selectedFields" /> + ... <value galaxy_value="wig" remote_value="wigData" /> + ... <value galaxy_value="interval" remote_value="tab" /> + ... <value galaxy_value="html" remote_value="hyperlinks" /> + ... <value galaxy_value="fasta" remote_value="sequence" /> + ... </value_translation> + ... </request_param> + ... </request_param_translation> + ... ''' ) ) + >>> params = Params( { 'db':'hg17', 'URL':'URL_value', 'org':'Human', 'hgta_outputType':'primaryTable' } ) + >>> translator.translate( params ) + >>> print params + {'hgta_outputType': 'primaryTable', 'data_type': 'tabular', 'table': 'unknown table', 'URL': 'URL_value?GALAXY_URL=0&_export=1', 'org': 'Human', 'URL_method': 'post', 'db': 'hg17', 'organism': 'Human', 'dbkey': 'hg17', 'description': 'no description'} + """ + @classmethod + def from_element( cls, elem ): + """Loads the proper filter by the type attribute of elem""" + rval = ToolInputTranslator() + for req_param in elem.findall( "request_param" ): + # req_param tags must look like <request_param galaxy_name="dbkey" remote_name="GENOME" missing="" /> + #trans_list = [] + remote_name = req_param.get( "remote_name" ) + galaxy_name = req_param.get( "galaxy_name" ) + missing = req_param.get( "missing" ) + value_trans = {} + append_param = None + + value_trans_elem = req_param.find( 'value_translation' ) + if value_trans_elem: + for value_elem in value_trans_elem.findall( 'value' ): + remote_value = value_elem.get( "remote_value" ) + galaxy_value = value_elem.get( "galaxy_value" ) + if None not in [ remote_value, galaxy_value ]: + value_trans[ remote_value ] = galaxy_value + + append_param_elem = req_param.find( "append_param" ) + if append_param_elem: + separator = append_param_elem.get( 'separator', ',' ) + first_separator = append_param_elem.get( 'first_separator', None ) + join_str = append_param_elem.get( 'join', '=' ) + append_dict = {} + for value_elem in append_param_elem.findall( 'value' ): + value_name = value_elem.get( 'name' ) + value_missing = value_elem.get( 'missing' ) + if None not in [ value_name, value_missing ]: + append_dict[ value_name ] = value_missing + append_param = Bunch( separator = separator, first_separator = first_separator, join_str = join_str, append_dict = append_dict ) + + rval.param_trans_dict[ remote_name ] = Bunch( galaxy_name = galaxy_name, missing = missing, value_trans = value_trans, append_param = append_param ) + + return rval + + def __init__( self ): + self.param_trans_dict = {} + + def translate( self, params ): + """ + update params in-place + """ + for remote_name, translator in self.param_trans_dict.iteritems(): + galaxy_name = translator.galaxy_name #NB: if a param by name galaxy_name is provided, it is always thrown away unless galaxy_name == remote_name + value = params.get( remote_name, translator.missing ) #get value from input params, or use default value specified in tool config + if translator.value_trans and value in translator.value_trans: + value = translator.value_trans[ value ] + if translator.append_param: + for param_name, missing_value in translator.append_param.append_dict.iteritems(): + param_value = params.get( param_name, missing_value ) + if translator.append_param.first_separator and translator.append_param.first_separator not in value: + sep = translator.append_param.first_separator + else: + sep = translator.append_param.separator + value += '%s%s%s%s' % ( sep, param_name, translator.append_param.join_str, param_value ) + params.update( { galaxy_name: value } ) diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/tools/parameters/sanitize.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/tools/parameters/sanitize.py Mon Dec 07 14:45:47 2009 -0500 @@ -0,0 +1,167 @@ +""" +Tool Parameter specific sanitizing. +""" + +import logging +import string +import galaxy.util + +log = logging.getLogger( __name__ ) + +class ToolParameterSanitizer( object ): + """ + Handles tool parameter specific sanitizing. + + >>> from elementtree.ElementTree import XML + >>> sanitizer = ToolParameterSanitizer.from_element( XML( + ... ''' + ... <sanitizer invalid_char=""> + ... <valid initial="string.letters"/> + ... </sanitizer> + ... ''' ) ) + >>> sanitizer.sanitize_param( string.printable ) == string.letters + True + >>> slash = chr( 92 ) + >>> sanitizer = ToolParameterSanitizer.from_element( XML( + ... ''' + ... <sanitizer> + ... <valid initial="none"> + ... <add preset="string.printable"/> + ... <remove value="""/> + ... <remove value="%s"/> + ... </valid> + ... <mapping initial="none"> + ... <add source=""" target="%s""/> + ... <add source="%s" target="%s%s"/> + ... </mapping> + ... </sanitizer> + ... ''' % ( slash, slash, slash, slash, slash ) ) ) + >>> text = '%s"$rm!' % slash + >>> [ c for c in sanitizer.sanitize_param( text ) ] == [ slash, slash, slash, '"', '$', 'r', 'm', '&', '#', '!' ] + True + """ + + VALID_PRESET = { 'default':( string.letters + string.digits +" -=_.()/+*^,:?!" ), 'none':'' } + MAPPING_PRESET = { 'default':galaxy.util.mapped_chars, 'none':{} } + DEFAULT_INVALID_CHAR = 'X' + + #class methods + @classmethod + def from_element( cls, elem ): + """Loads the proper filter by the type attribute of elem""" + #TODO: Add ability to generically specify a method to use for sanitizing input via specification in tool XML + rval = ToolParameterSanitizer() + rval._invalid_char = elem.get( 'invalid_char', cls.DEFAULT_INVALID_CHAR ) + rval.sanitize = galaxy.util.string_as_bool( elem.get( 'sanitize', 'True' ) ) + for valid_elem in elem.findall( 'valid' ): + rval._valid_chars = rval.get_valid_by_name( valid_elem.get( 'initial', 'default' ) ) + for action_elem in valid_elem: + preset = rval.get_valid_by_name( action_elem.get( 'preset', 'none' ) ) + valid_value = [ val for val in action_elem.get( 'value', [] ) ] + if action_elem.tag.lower() == 'add': + for val in ( preset + valid_value ): + if val not in rval._valid_chars: + rval._valid_chars.append( val ) + elif action_elem.tag.lower() == 'remove': + for val in ( preset + valid_value ): + while val in rval._valid_chars: + rval._valid_chars.remove( val ) + else: + log.debug( 'Invalid action tag in valid: %s' % action_elem.tag ) + for mapping_elem in elem.findall( 'mapping' ): + rval._mapped_chars = rval.get_mapping_by_name( mapping_elem.get( 'initial', 'default' ) ) + for action_elem in mapping_elem: + map_source = action_elem.get( 'source', None ) + map_target = action_elem.get( 'target', None ) + preset = rval.get_mapping_by_name( action_elem.get( 'preset', 'none' ) ) + if action_elem.tag.lower() == 'add': + rval._mapped_chars.update( preset ) + if None not in [ map_source, map_target ]: + rval._mapped_chars[ map_source ] = map_target + elif action_elem.tag.lower() == 'remove': + for map_key in preset.keys(): + if map_key in rval._mapped_chars: + del rval._mapped_chars[ map_key ] + if map_source is not None and map_key in rval._mapped_chars: + del rval._mapped_chars[ map_key ] + else: + log.debug( 'Invalid action tag in mapping: %s' % action_elem.tag ) + return rval + + @classmethod + def get_valid_by_name( cls, name ): + rval = [] + for split_name in name.split( ',' ): + split_name = split_name.strip() + value = [] + if split_name.startswith( 'string.' ): + try: + value = eval( split_name ) + except NameError, e: + log.debug( 'Invalid string preset specified: %s' % e ) + elif split_name in cls.VALID_PRESET: + value = cls.VALID_PRESET[ split_name ] + else: + log.debug( 'Invalid preset name specified: %s' % split_name ) + rval.extend( [ val for val in value if val not in rval ] ) + return rval + + @classmethod + def get_mapping_by_name( cls, name ): + rval = {} + for split_name in name.split( ',' ): + split_name = split_name.strip() + if split_name in cls.MAPPING_PRESET: + rval.update( cls.MAPPING_PRESET[ split_name ] ) + else: + log.debug( 'Invalid preset name specified: %s' % split_name ) + return rval + #end class methods + + def __init__( self ): + self._valid_chars = [] #List of valid characters + self._mapped_chars = {} #Replace a char with a any number of characters + self._invalid_char = self.DEFAULT_INVALID_CHAR #Replace invalid characters with this character + self.sanitize = True #Simply pass back the passed in value + + def restore_text( self, text ): + """Restores sanitized text""" + if self.sanitize: + for key, value in self._mapped_chars.iteritems(): + text = text.replace( value, key ) + return text + + def restore_param( self, value ): + if self.sanitize: + if isinstance( value, basestring ): + return self.restore_text( value ) + elif isinstance( value, list ): + return map( self.restore_text, value ) + else: + raise Exception, 'Unknown parameter type (%s:%s)' % ( type( value ), value ) + return value + + def sanitize_text( self, text ): + """Restricts the characters that are allowed in a text""" + if not self.sanitize: + return text + rval = [] + for c in text: + if c in self._valid_chars: + rval.append( c ) + elif c in self._mapped_chars: + rval.append( self._mapped_chars[ c ] ) + else: + rval.append( self._invalid_char ) + return ''.join( rval ) + + def sanitize_param( self, value ): + """Clean incoming parameters (strings or lists)""" + if not self.sanitize: + return value + if isinstance( value, basestring ): + return self.sanitize_text( value ) + elif isinstance( value, list ): + return map( self.sanitize_text, value ) + else: + raise Exception, 'Unknown parameter type (%s:%s)' % ( type( value ), value ) diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/tools/util/maf_utilities.py --- a/lib/galaxy/tools/util/maf_utilities.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/tools/util/maf_utilities.py Mon Dec 07 14:45:47 2009 -0500 @@ -191,13 +191,16 @@ def build_maf_index_species_chromosomes( filename, index_species = None ): species = [] species_chromosomes = {} - indexes = bx.interval_index_file.Indexes() + indexes = bx.interval_index_file.Indexes() + blocks = 0 try: maf_reader = bx.align.maf.Reader( open( filename ) ) while True: pos = maf_reader.file.tell() block = maf_reader.next() - if block is None: break + if block is None: + break + blocks += 1 for c in block.components: spec = c.src chrom = None @@ -225,11 +228,11 @@ #most likely a bad MAF log.debug( 'Building MAF index on %s failed: %s' % ( filename, e ) ) return ( None, [], {} ) - return ( indexes, species, species_chromosomes ) + return ( indexes, species, species_chromosomes, blocks ) #builds and returns ( index, index_filename ) for specified maf_file def build_maf_index( maf_file, species = None ): - indexes, found_species, species_chromosomes = build_maf_index_species_chromosomes( maf_file, species ) + indexes, found_species, species_chromosomes, blocks = build_maf_index_species_chromosomes( maf_file, species ) if indexes is not None: fd, index_filename = tempfile.mkstemp() out = os.fdopen( fd, 'w' ) diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/util/__init__.py Mon Dec 07 14:45:47 2009 -0500 @@ -174,70 +174,16 @@ [('status', 'on'), ('symbols', 'alpha'), ('symbols', '__lt____gt__'), ('symbols', 'XrmXX!')] """ - # HACK: Need top prevent sanitizing certain parameter types. The - # better solution I think is to more responsibility for - # sanitizing into the tool parameters themselves so that - # different parameters can be sanitized in different ways. + # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers? NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths'] - def __init__( self, params, safe=True, sanitize=True, tool=None ): - if safe: + def __init__( self, params, sanitize=True ): + if sanitize: for key, value in params.items(): - # Check to see if we should translate certain parameter names. For example, - # in data_source tools, the external data source application may send back - # parameter names like GENOME which is translated to dbkey in Galaxy. - # param_trans_dict looks like { "GENOME" : [ "dbkey" "?" ] } - new_key = key - new_value = value - if tool and tool.tool_type == 'data_source': - if key in tool.param_trans_dict: - new_key = tool.param_trans_dict[ key ][0] - if new_key == 'data_type': - try: - # The Galaxy "data_type entry is special in that it can include the ability - # to translate the format to a Galaxy supported format. In the dict, this entry - # looks something like: - # {'hgta_outputType': ['data_type', 'bed', {'selectedFields': 'tabular'}] } - format_trans_dict = tool.param_trans_dict[ key ][2] - if value in format_trans_dict: - new_value = format_trans_dict[ value ] - except: - pass - elif new_key == 'URL': - # As above, the URL can include a set of params from the remote data source - # that must be appended to the URL prior to the post. In this case, the - # dict entry would look something like: - # ['URL', '', {'q': '', 's': '', 'd': '', 'dbkey': '', 't': ''}] - try: - add_to_url_dict = tool.param_trans_dict[ key ][2] - if new_value.count( '?' ) == 0: - sep = '?' - else: - sep = '&' - for param_name, missing_value in add_to_url_dict.items(): - param_value = params.get( param_name, None ) - if not param_value and missing_value: - param_value = missing_value - if param_value: - new_value += '%s%s=%s' % ( sep, param_name, param_value ) - sep = '&' - except: - pass - if not value and not new_value: - new_value = tool.param_trans_dict[ key ][1] - if sanitize and not ( key in self.NEVER_SANITIZE or True in [ key.endswith( "|%s" % nonsanitize_parameter ) for nonsanitize_parameter in self.NEVER_SANITIZE ] ): #sanitize check both ungrouped and grouped parameters by name - self.__dict__[ new_key ] = sanitize_param( new_value ) + if key not in self.NEVER_SANITIZE and True not in [ key.endswith( "|%s" % nonsanitize_parameter ) for nonsanitize_parameter in self.NEVER_SANITIZE ]: #sanitize check both ungrouped and grouped parameters by name. Anything relying on NEVER_SANITIZE should be changed to not require this and NEVER_SANITIZE should be removed. + self.__dict__[ key ] = sanitize_param( value ) else: - self.__dict__[ new_key ] = new_value - if tool and tool.tool_type == 'data_source': - # Add the tool's URL_method to params - self.__dict__[ 'URL_method' ] = tool.URL_method - for key, value in tool.param_trans_dict.items(): - # Make sure that all translated values used in Galaxy are added to the params - galaxy_name = tool.param_trans_dict[ key ][0] - if galaxy_name not in self.__dict__: - # This will set the galaxy_name to the "missing" value - self.__dict__[ galaxy_name ] = tool.param_trans_dict[ key ][1] + self.__dict__[ key ] = value else: self.__dict__.update(params) diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/web/controllers/async.py --- a/lib/galaxy/web/controllers/async.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/web/controllers/async.py Mon Dec 07 14:45:47 2009 -0500 @@ -32,7 +32,7 @@ return trans.response.send_redirect( "/index" ) history = trans.get_history( create=True ) - params = util.Params(kwd, safe=False) + params = util.Params(kwd, sanitize=False) STATUS = params.STATUS URL = params.URL data_id = params.data_id diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/web/controllers/root.py Mon Dec 07 14:45:47 2009 -0500 @@ -270,7 +270,7 @@ if trans.app.security_agent.can_access_dataset( roles, data.dataset ): if data.state == trans.model.Dataset.states.UPLOAD: return trans.show_error_message( "Please wait until this dataset finishes uploading before attempting to edit its metadata." ) - params = util.Params( kwd, safe=False ) + params = util.Params( kwd, sanitize=False ) if params.change: # The user clicked the Save button on the 'Change data type' form if data.datatype.allow_datatype_change and trans.app.datatypes_registry.get_datatype_by_extension( params.datatype ).allow_datatype_change: diff -r b01c8245ef74 -r 14214b45db3f lib/galaxy/web/controllers/tool_runner.py --- a/lib/galaxy/web/controllers/tool_runner.py Mon Dec 07 14:45:09 2009 -0500 +++ b/lib/galaxy/web/controllers/tool_runner.py Mon Dec 07 14:45:47 2009 -0500 @@ -43,7 +43,10 @@ log.error( "index called with tool id '%s' but no such tool exists", tool_id ) trans.log_event( "Tool id '%s' does not exist" % tool_id ) return "Tool '%s' does not exist, kwd=%s " % (tool_id, kwd) - params = util.Params( kwd, sanitize=tool.options.sanitize, tool=tool ) + params = util.Params( kwd, sanitize = False ) #Sanitize parameters when substituting into command line via input wrappers + #do param translation here, used by datasource tools + if tool.input_translator: + tool.input_translator.translate( params ) history = trans.get_history() template, vars = tool.handle_input( trans, params.__dict__ ) if len(params) > 0: @@ -73,7 +76,7 @@ # Get the associated job, if any. If this hda was copied from another, # we need to find the job that created the origial hda job_hda = data - while job_hda.copied_from_history_dataset_association: + while job_hda.copied_from_history_dataset_association:#should this check library datasets as well? job_hda = job_hda.copied_from_history_dataset_association if not job_hda.creating_job_associations: error( "Could not find the job for this dataset" ) @@ -153,7 +156,7 @@ tool = self.get_toolbox().tools_by_id.get( tool_id, None ) if not tool: return False # bad tool_id - nonfile_params = util.Params( kwd, sanitize=tool.options.sanitize, tool=tool ) + nonfile_params = util.Params( kwd, sanitize=False ) if kwd.get( 'tool_state', None ) not in ( None, 'None' ): encoded_state = util.string_to_object( kwd["tool_state"] ) tool_state = DefaultToolState() diff -r b01c8245ef74 -r 14214b45db3f test-data/1.bam Binary file test-data/1.bam has changed diff -r b01c8245ef74 -r 14214b45db3f test-data/2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.sam Mon Dec 07 14:45:47 2009 -0500 @@ -0,0 +1,10 @@ +HWI-EAS91_1_30788AAXX:1:1:1095:605 0 chrM 23 25 36M * 0 0 AAGCAAGNNACTGAAAATGCCTAGATGAGTATTCTT IIIIIII""IIIIIIIIIIIIIIIEIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1650:1185 0 chrM 14956 25 36M * 0 0 ACCCCAGNNAACCCTCTCAGCACTCCCCCTCATATT IIIIIII""IIIIIIIIIIII6IIIIIIIII5I-II NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:799:192 16 chrM 8421 25 36M * 0 0 CCTGTAGCCCTAGCCGTGCGGCTAACCNNTAACATT II%::I<IIIIIEIII8IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1082:719 16 chrM 7191 25 36M * 0 0 TAAATTAACCCATACCAGCACCATAGANNCTCAAGA <III0EII3+3I29I>III8AIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1746:1180 16 chrM 12013 25 36M * 0 0 CCTAAGCTTCAAACTAGATTACTTCTCNNTAATTTT IIIIIIIIFIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:606:460 0 chrM 4552 25 36M * 0 0 TTAATTTNNATTATAATAACACTCACAATATTCATA IIIIIII""IIIIIIIIIIIIIIIIII?I6IIIII6 NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1059:362 16 chrM 7348 25 36M * 0 0 GGCCACCAATGATACTGAAGCTACGAGNNTACCGAT II/<)2IIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1483:1161 16 chrM 15080 25 36M * 0 0 TCCTGATCCTAGCACTCATCCCCACCCNNCACATAT HIIIIIFIIAIHIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1273:600 16 chrM 13855 25 36M * 0 0 GTATTAGACACCCATACCTCAGGATACNNCTCAGTA IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1190:1283 16 chrM 15338 25 36M * 0 0 TATATCGCACATTACCCTGGTCTTGTANNCCAGAAA EIII?-IIIIIAIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 diff -r b01c8245ef74 -r 14214b45db3f test-data/2.tabular --- a/test-data/2.tabular Mon Dec 07 14:45:09 2009 -0500 +++ b/test-data/2.tabular Mon Dec 07 14:45:47 2009 -0500 @@ -1,10 +1,10 @@ -1 68 4.1 -2 71 4.6 -3 62 3.8 -4 75 4.4 -5 58 3.2 -6 60 3.1 -7 67 3.8 -8 68 4.1 -9 71 4.3 -10 69 3.7 +1 68 4.1 +2 71 4.6 +3 62 3.8 +4 75 4.4 +5 58 3.2 +6 60 3.1 +7 67 3.8 +8 68 4.1 +9 71 4.3 +10 69 3.7 diff -r b01c8245ef74 -r 14214b45db3f test-data/3.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.sam Mon Dec 07 14:45:47 2009 -0500 @@ -0,0 +1,10 @@ +HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1698:516 16 chrM 2735 25 36M * 0 0 TTTACACTCAGAGGTTCAACTCCTCTCNNTAACAAC I9IIIII5IIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1491:637 16 chrM 10864 25 36M * 0 0 TGTAGAAGCCCCAATTGCCGGATCCATNNTGCTAGC DBAIIIIIIIIIIIFIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1711:249 16 chrM 10617 25 36M * 0 0 ACCAAACAGAACGCCTGAACGCAGGCCNNTACTTCC IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1634:211 0 chrM 9350 25 36M * 0 0 GAAGCAGNNGCTTGATACTGACACTTCGTCGACGTA IIIIIII""IIIIIIIIIIIIIIIIIIIIII9IIDF NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1218:141 16 chrM 14062 25 36M * 0 0 ACAAAACTAACAACAAAAATAACACTCNNAATAAAC I+IIII1IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1398:854 16 chrM 3921 25 36M * 0 0 CACCCTTCCCGTACTAATAAATCCCCTNNTCTTCAC IIIII=AIIIIIIIIIIIIIIBIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1310:991 16 chrM 10002 25 36M * 0 0 CTCCTATGCCTAGAAGGAATAATACTANNACTATTC I:2IEI:IIDIIIIII4IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1716:413 0 chrM 6040 25 36M * 0 0 GATCCAANNCTTTATCAACACCTATTCTGATTCTTC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1630:59 16 chrM 12387 25 36M * 0 0 TCATACTCGACCCCAACCTTACCAACCNNCCGCTCC FIIHII;IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 diff -r b01c8245ef74 -r 14214b45db3f test-data/chrM.fa --- a/test-data/chrM.fa Mon Dec 07 14:45:09 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,335 +0,0 @@ ->chrM -GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA -GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT -TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT -CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA -AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG -ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG -GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA -ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA -AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA -AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA -TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA -AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT -GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC -CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT -CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA -AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT -TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC -TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT -GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT -CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA -CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG -TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA -TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC -TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG -AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG -ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT -AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC -CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT -ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG -AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA -ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA -AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA -ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG -TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA -ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG -TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA -CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA -TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA -AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT -ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC -AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca -taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt -tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg -cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG -ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT -TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC -CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA -TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG -CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG -TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT -CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT -CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT -AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA -AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg -tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt -caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT -GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT -CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG -GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA -CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT -AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC -CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC -CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA -CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG -TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC -ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC -ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA -ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA -TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA -ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG -GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC -ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA -CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC -TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA -CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT -ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact -acaggaattgaacctgctcctgagaattcaaaatcctccgtgctaccgaa -ttacaccatgtcctaCAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCAT -ACCCCGAAAATGTTGGATTACACCCTTCCCGTACTAATAAATCCCCTTAT -CTTCACAACTATTCTAATAACAGTTCTTCTAGGAACTATAATCGTTATAA -TAAGCTCACACTGACTAATAATCTGAATCGGATTTGAAATAAATCTACTA -GCCATTATCCCTATCCTAATAAAAAAGTACAATCCCCGAACCATAGAAGC -CTCCACCAAATATTTTCTAACCCAAGCCACCGCATCAATACTCCTCATAA -TAGCGATCATCATTAACCTCATACACTCAGGCCAATGAACAATCACAAAA -GTCTTCAACCCCACAGCGTCCATCATTATAACTTCAGCTCTCGCCATAAA -ACTTGGACTCACACCATTCCACTTCTGAGTACCCGAAGTCACACAGGGCA -TCTCATTAACATCAGGTCTCATCCTACTTACATGACAAAAACTAGCCCCA -ATATCAATCCTATATCAAATCTCACCCTCAATTAACCTAAATATCTTATT -AACTATAGCCGTACTGTCAATCCTAGTAGGAGGCTGAGGCGGTCTCAACC -AAACCCAACTACGAAAAATCATAGCATACTCGTCAATCGCGCATATAGGA -TGAATAACAGCTGTCCTAGTATATAACCCAACACTAACAATACTAAACAT -ATTAATTTACATTATAATAACACTCACAATATTCATACTATTTATCCACA -GCTCCTCTACTACAACACTATCACTCTCCCACACATGAAACAAAATACCT -CTAACCACTACACTAATCTTAATTACCTTACTATCCATAGGAGGCCTCCC -CCCACTATCAGGATTCATACCCAAATGAATAATCATTCAAGAGCTCACCA -AAAATAGCAGCATCATCCTCCCCACACTAATAGCCATTATAGCACTACTC -AACCTCTACTTCTACATACGACTAACCTATTCCACCTCACTGACCATATT -CCCATCCACAAACAACATAAAAATAAAATGACAATTCGAAACCAAACGAA -TTACTCTCTTACCCCCGTTAATTGTTATATCCTCCCTACTCCTCCCCCTA -ACCCCCATACTATCAATTTTGGACTAGGAATTTAGGTTAACATCCCAGAC -CAAGAGCCTTCAAAGCTCTAAGCAAGTGAATCCACTTAATTCCTGCATAC -TAAGGACTGCGAGACTCTATCTCACATCAATTGAACGCAAATCAAACTCT -TTTATTAAGCTAAGCCCTTACTAGATTGGTGGGCTACCATCCCACGAAAT -TTTAGTTAACAGCTAAATACCCTAATCAACTGGCTTCAATCTACTTCTCC -CGCCGCCTAGAAAAAAAGGCGGGAGAAGCCCCGGCAGAAATTGAAGCTGC -TCCTTTGAATTTGCAATTCAATGTGAAAATTCACCACGGGACTTGATAAG -AAGAGGATTCCAACCCCTGTCTTTAGATTTACAGTCTAATGCTTACTCAG -CCATCTTACCTATGTTCATCAACCGCTGACTATTTTCAACTAACCACAAA -GACATCGGCACTCTGTACCTCCTATTCGGCGCTTGAGCTGGAATAGTAGG -AACTGCCCTAAGCCTCCTAATCCGTGCTGAATTAGGCCAACCTGGGACCC -TACTAGGAGATGATCAGATCTACAATGTCATTGTAACCGCCCATGCATTC -GTAATAATTTTCTTTATGGTCATACCCATTATAATCGGAGGATTCGGAAA -CTGATTAGTCCCCCTGATAATTGGAGCACCTGATATAGCTTTCCCCCGAA -TAAACAACATAAGCTTCTGATTACTTCCCCCATCATTCCTACTTCTTCTC -GCTTCCTCAATAATTGAAGCAGGTGCCGGAACAGGCTGAACCGTATATCC -TCCTCTAGCTGGAAATCTGGCGCATGCAGGAGCCTCTGTTGACTTAACCA -TTTTCTCTCTCCACCTAGCTGGGGTGTCCTCGATTTTAGGTGCCATCAAC -TTTATTACCACAATCATTAACATAAAACCACCAGCCCTATCCCAATATCA -AACCCCCCTATTCGTTTGATCTGTCCTTATTACGGCAGTACTCCTTCTCC -TAGCCCTCCCGGTCCTAGCAGCAGGCATTACCATGCTTCTCACAGACCGT -AACCTGAACACTACTTTCTTCGACCCCGCAGGAGGAGGGGATCCAATCCT -TTATCAACACCTATTCTGATTCTTCGGACACCCCGAAGTCTATATTCTTA -TCCTACCAGGCTTCGGTATAATCTCACACATCGTCACATACTACTCAGGT -AAAAAGGAACCTTTTGGCTACATGGGTATAGTGTGAGCTATAATATCCAT -TGGCTTTCTAGGCTTCATCGTATGGGCTCACCACATGTTTACAGTAGGGA -TAGACGTTGACACACGAGCATACTTCACATCAGCTACCATAATCATCGCT -ATCCCTACTGGTGTAAAAGTATTCAGCTGACTAGCCACCCTGCACGGAGG -AAATATCAAATGATCTCCAGCTATACTCTGAGCTCTAGGCTTCATCTTCT -TATTCACAGTAGGAGGTCTAACAGGAATCGTCCTAGCTAACTCATCCCTA -GATATTGTTCTCCACGATACTTATTATGTAGTAGCACATTTCCATTATGT -CCTGTCTATAGGAGCAGTCTTCGCCATTATGGGGGGATTTGTACACTGAT -TCCCTCTATTCTCAGGATACACACTCAACCAAACCTGAGCAAAAATCCAC -TTTACAATTATATTCGTAGGGGTAAATATAACCTTCTTCCCACAACATTT -CCTTGGCCTCTCAGGAATGCCACGACGCTATTCTGATTATCCAGACGCAT -ATACAACATGAAATACCATCTCATCCATAGGATCTTTTATCTCACTTACA -GCAGTGATACTAATAATTTTCATAATTTGAGAAGCGTTCGCATCCAAACG -AGAAGTGTCTACAGTAGAATTAACCTCAACTAATCTGGAATGACTACACG -GATGCCCCCCACCATACCACACATTTGAAGAACCCACCTACGTAAACCTA -AAAtaagaaaggaaggaatcgaaccccctctaactggtttcaagccaata -tcataaccactatgtctttctcCATCAATTGAGGTATTAGTAAAAATTAC -ATGACTTTGTCAAAGTTAAATTATAGGTTAAACCCCTATATACCTCTATG -GCCTACCCCTTCCAACTAGGATTCCAAGACGCAACATCCCCTATTATAGA -AGAACTCCTACACTTCCACGACCACACACTAATAATCGTATTCCTAATTA -GCTCTCTAGTATTATATATTATCTCATCAATACTAACAACTAAATTAACC -CATACCAGCACCATAGATGCTCAAGAAGTAGAGACAATTTGAACGATTTT -ACCAGCCATCATCCTTATTCTAATCGCCCTCCCATCCCTACGAATTCTAT -ATATAATAGATGAAATCAATAATCCGTCCCTCACAGTCAAAACAATAGGC -CACCAATGATACTGAAGCTACGAGTATACCGATTACGAAGACTTGACCTT -TGACTCCTACATGATCCCCACATCAGACCTAAAACCAGGAGAATTACGTC -TTCTAGAAGTCGACAATCGAGTGGTTCTCCCCATAGAAATAACCATCCGA -ATGCTAATTTCATCCGAAGACGTCCTACACTCATGAGCTGTGCCCTCCCT -AGGCCTAAAAACAGACGCTATCCCTGGGCGCCTAAATCAGACAACTCTCG -TGGCCTCTCGACCAGGACTTTACTACGGTCAATGCTCAGAGATCTGCGGA -TCAAACCACAGCTTTATACCAATTGTCCTTGAACTAGTTCCACTGAAACA -CTTCGAAGAATGATCTGCATCAATATTATAAAGTCACTAAGAAGCTATTA -TAGCATTAACCTTTTAAGTTAAAGATTGAGGGTTCAACCCCCTCCCTAGT -GATATGCCACAGTTGGATACATCAACATGATTTATTAATATCGTCTCAAT -AATCCTAACTCTATTTATTGTATTTCAACTAAAAATCTCAAAGCACTCCT -ATCCGACACACCCAGAAGTAAAGACAACCAAAATAACAAAACACTCTGCC -CCTTGAGAATCAAAATGAACGAAAATCTATTCGCCTCTTTCGCTACCCCA -ACAATAGTAGGCCTCCCTATTGTAATTCTGATCATCATATTTCCCAGCAT -CCTATTCCCCTCACCCAACCGACTAATCAACAATCGCCTAATCTCAATTC -AACAATGGCTAGTCCAACTTACATCAAAACAAATAATAGCTATCCATAAC -AGCAAAGGACAAACCTGAACTCTTATACTCATATCACTGATCCTATTCAT -TGGCTCAACAAACTTATTAGGCCTACTACCTCACTCATTTACACCAACAA -CACAACTATCAATAAACCTAGGCATAGCTATTCCCCTATGGGCAGGGACA -GTATTCATAGGCTTTCGTCACAAAACAAAAGCAGCCCTAGCCCACTTTCT -ACCTCAAGGGACGCCCATTTTCCTCATCCCCATACTAGTAATTATCGAGA -CTATCAGCCTATTTATTCAACCTGTAGCCCTAGCCGTGCGGCTAACCGCT -AACATTACCGCCGGACACCTCCTAATACACCTCATCGGAGGGGCAACACT -AGCCCTCATAAGCATCAGCCCCTCAACAGCCCTTATTACGTTTATCATCC -TAATTCTACTAACTATCCTCGAATTCGCAGTAGCTATAATCCAAGCCTAC -GTATTCACTCTCCTGGTAAGCCTTTACTTACACGACAACACCTAATGACC -CACCAAACCCACGCTTACCACATAGTAAACCCCAGCCCATGACCACTTAC -AGGAGCCCTATCAGCCCTCCTGATAACATCAGGACTAGCCATGTGATTTC -ACTTTAACTCAACCTTACTTCTAGCTATAGGGCTATTAACTAACATCCTT -ACCATATATCAATGATGACGAGACATCATCCGAGAAAGCACATTCCAAGG -CCATCACACATCAATCGTTCAAAAGGGACTCCGATATGGCATAATCCTTT -TTATTATCTCAGAAGTCTTCTTCTTCTCTGGCTTCTTCTGAGCCTTTTAC -CACTCAAGCCTAGCCCCCACACCCGAACTAGGCGGCTGCTGACCACCCAC -AGGTATCCACCCCTTAAACCCCCTAGAAGTCCCCTTACTCAACACCTCAG -TGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCACCATAGCCTAATA -GAAGGAAACCGTAAAAATATGCTCCAAGGCCTATTCATCACAATTTCACT -AGGCGTATACTTCACCCTTCTCCAAGCCTCAGAATACTATGAAGCCTCAT -TTACTATTTCAGATGGAGTATACGGATCAACATTTTTCGTAGCAACAGGG -TTCCACGGACTACACGTAATTATCGGATCTACCTTCCTCATTGTATGTTT -CCTACGCCAACTAAAATTCCACTTTACATCCAGCCACCACTTCGGATTCG -AAGCAGCCGCTTGATACTGACACTTCGTCGACGTAGTCTGACTATTCTTG -TACGTCTCTATTTATTGATGAGGATCCTATTCTTTTAGTATTGACCAGTA -CAATTGACTTCCAATCAATCAGCTTCGGTATAACCCGAAAAAGAATAATA -AACCTCATACTGACACTCCTCACTAACACATTACTAGCCTCGCTACTCGT -ACTCATCGCATTCTGACTACCACAACTAAACATCTATGCAGAAAAAACCA -GCCCATATGAATGCGGATTTGACCCTATAGGGTCAGCACGCCTCCCCTTC -TCAATAAAATTTTTCTTAGTGGCCATTACATTTCTGCTATTCGACTTAGA -AATTGCCCTCCTATTACCCCTTCCATGAGCATCCCAAACAACTAACCTAA -ACACTATACTTATCATAGCACTAGTCCTAATCTCTCTTCTAGCCATCAGC -CTAGCCTACGAATGAACCCAAAAAGGACTAGAATGAACTGAGTATGGTAA -TTAGTTTAAACCAAAACAAATGATTTCGACTCATTAAACTATGATTAACT -TCATAATTACCAACATGTCACTAGTCCATATTAATATCTTCCTAGCATTC -ACAGTATCCCTCGTAGGCCTACTAATGTACCGATCCCACCTAATATCCTC -ACTCCTATGCCTAGAAGGAATAATACTATCACTATTCGTCATAGCAACCA -TAATAGTCCTAAACACCCACTTCACACTAGCTAGTATAATACCTATCATC -TTACTAGTATTTGCTGCCTGCGAACGAGCTCTAGGATTATCCCTACTAGT -CATAGTCTCCAATACTTATGGAGTAGACCACGTACAAAACCTTAACCTCC -TCCAATGCTAAAAATTATCATTCCCACAATCATACTTATGCCCCTTACAT -GACTATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATAGTCTA -TTAATCAGCCTTATCAGCCTATCCCTCCTAAACCAACCTAGCAACAATAG -CCTAAACTTCTCACTAATATTCTTCTCCGATCCCCTATCAGCCCCACTTC -TGGTGTTGACAACATGACTACTGCCACTAATACTCATAGCCAGCCAACAC -CATCTATCTAAGGAACCACTAATCCGAAAAAAACTCTACATCACCATGCT -AACCATACTTCAAACTTTCCTAATCATGACTTTTACCGCCACAGAACTAA -TCTCCTTCTACATCCTATTTGAAGCCACATTAGTTCCAACACTAATTATC -ATCACCCGCTGAGGCAACCAAACAGAACGCCTGAACGCAGGCCTCTACTT -CCTATTCTACACACTAATAGGTTCCCTCCCACTCTTAGTTGCACTAATCT -CTATCCAAAACCTAACAGGCTCACTAAACTTCCTATTAATTCAATACTGA -AACCAAGCACTACCCGACTCTTGATCCAATATTTTCCTATGACTAGCATG -TATAATAGCATTCATAGTCAAAATACCGGTATATGGTCTTCACCTCTGAC -TCCCAAAAGCCCATGTAGAAGCCCCAATTGCCGGATCCATAGTGCTAGCA -GCCATTCTACTAAAACTAGGAGGCTACGGAATACTACGAATTACAACAAT -ACTAAACCCCCAAACTAGCTTTATAGCCTACCCCTTCCTCATACTATCCC -TGTGAGGAATAATCATAACTAGTTCCATCTGCTTGCGACAAACCGATCTA -AAATCACTTATTGCATACTCCTCTGTCAGCCACATAGCCCTAGTAATCGT -AGCCGTCCTCATCCAAACACCATGAAGTTATATAGGAGCTACAGCCCTAA -TAATCGCTCACGGCCTTACATCATCAATACTATTCTGCCTGGCAAACTCA -AATTACGAACGTACCCATAGCCGAACTATAATCCTAGCCCGCGGGCTTCA -AACACTTCTTCCCCTTATAGCAGCCTGATGACTATTAGCCAGCCTAACCA -ACCTGGCCCTCCCTCCCAGCATTAACCTAATTGGAGAGCTATTCGTAGTA -ATATCATCATTCTCATGATCAAATATTACCATTATCCTAATAGGAGCCAA -TATCACCATCACCGCCCTCTACTCCCTATACATACTAATCACAACACAAC -GAGGGAAATACACACACCATATCAACAGCATTAAACCTTCATTTACACGA -GAAAACGCACTCATGGCCCTCCACATGACTCCCCTACTACTCCTATCACT -TAACCCTAAAATTATCCTAGGCTTTACGTACTGTAAATATAGTTTAACAA -AAACACTAGATTGTGGATCTAGAAACAGAAACTTAATATTTCTTATTTAC -CGAGAAAGTATGCAAGAACTGCTAATTCATGCCCCCATGTCCAACAAACA -TGGCTCTCTCAAACTTTTAAAGGATAGGAGCTATCCGTTGGTCTTAGGAA -CCAAAAAATTGGTGCAACTCCAAATAAAAGTAATCAACATGTTCTCCTCC -CTCATACTAGTTTCACTATTAGTACTAACCCTCCCAATCATATTATCAAT -CTTCAATACCTACAAAAACAGCACGTTCCCGCATCATGTAAAAAACACTA -TCTCATATGCCTTCATTACTAGCCTAATTCCCACTATAATATTTATTCAC -TCTGGACAAGAAACAATTATCTCAAACTGACACTGAATAACCATACAAAC -CCTCAAACTATCCCTAAGCTTCAAACTAGATTACTTCTCAATAATTTTCG -TACCAGTAGCCCTATTCGTAACATGATCTATTATGGAATTCTCCCTATGA -TACATGCACTCAGATCCTTACATTACTCGATTTTTTAAATACTTACTTAC -ATTCCTCATCACTATAATAATTCTAGTCACAGCTAACAACCTTTTCCAAC -TGTTCATCGGATGGGAGGGAGTAGGCATCATGTCATTCTTACTAATCGGA -TGATGATACGGCCGAACAGATGCCAACACCGCGGCCCTTCAAGCAATCCT -TTATAACCGCATCGGGGATATCGGCTTCATCATGGCCATAGCCTGATTCC -TATTCAACACCAACACATGAGACCTCCAACAAATCTTCATACTCGACCCC -AACCTTACCAACCTCCCGCTCCTAGGCCTCCTCCTAGCCGCAACTGGCAA -ATCCGCTCAATTTGGACTCCACCCATGACTTCCTTCAGCCATAGAGGGCC -CTACACCAGTCTCAGCCCTACTCCACTCCAGCACAATAGTTGTAGCAGGC -GTCTTCCTGCTAATCCGCTTCCATCCACTAATAGAAAACAACAAAACAAT -CCAGTCACTTACCCTATGCCTAGGAGCCATCACCACACTATTCACAGCAA -TCTGCGCACTCACTCAAAACGATATCAAAAAAATCATTGCTTTCTCCACC -TCCAGCCAACTAGGCCTGATAATCGTAACCATCGGTATCAATCAACCCTA -CCTAGCATTCCTCCACATTTGCACTCACGCATTCTTCAAAGCTATACTAT -TTATATGTTCCGGATCCATTATCCACAGCCTAAATGACGAGCAAGATATC -CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT -AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT -ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC -TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG -TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC -TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC -CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA -CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG -CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG -ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC -CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG -CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC -TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC -CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT -CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG -TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA -ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC -CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA -CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT -AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC -AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG -TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC -ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC -CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG -AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA -ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT -GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC -GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA -CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG -AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT -ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA -GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC -AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG -GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC -ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT -ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT -ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC -AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT -CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT -CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC -CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT -CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA -ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA -TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT -AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC -CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC -CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT -CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT -CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC -ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT -ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC -TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC -TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT -CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC -CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG -TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA -GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC -AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC -GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG -CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT -TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA -ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA -CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT -CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA -CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA -GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac -ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct -gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt -gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc -acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac -ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA -CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT -GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT -AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA -GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT -TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC -TTCTTCCCCC diff -r b01c8245ef74 -r 14214b45db3f test-data/chr_m.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chr_m.fasta Mon Dec 07 14:45:47 2009 -0500 @@ -0,0 +1,335 @@ +>chrM +GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA +GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT +TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT +CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA +AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG +ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG +GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA +ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA +AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA +AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA +TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA +AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT +GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC +CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT +CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA +AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT +TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC +TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT +GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT +CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA +CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG +TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA +TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC +TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG +AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG +ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT +AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC +CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT +ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG +AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA +ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA +AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA +ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG +TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA +ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG +TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA +CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA +TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA +AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT +ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC +AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca +taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt +tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg +cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG +ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT +TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC +CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA +TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG +CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG +TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT +CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT +CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT +AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA +AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg +tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt +caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT +GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT +CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG +GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA +CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT +AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC +CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC +CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA +CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG +TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC +ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC +ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA +ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA +TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA +ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG +GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC +ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA +CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC +TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA +CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT +ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact +acaggaattgaacctgctcctgagaattcaaaatcctccgtgctaccgaa +ttacaccatgtcctaCAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCAT +ACCCCGAAAATGTTGGATTACACCCTTCCCGTACTAATAAATCCCCTTAT +CTTCACAACTATTCTAATAACAGTTCTTCTAGGAACTATAATCGTTATAA +TAAGCTCACACTGACTAATAATCTGAATCGGATTTGAAATAAATCTACTA +GCCATTATCCCTATCCTAATAAAAAAGTACAATCCCCGAACCATAGAAGC +CTCCACCAAATATTTTCTAACCCAAGCCACCGCATCAATACTCCTCATAA +TAGCGATCATCATTAACCTCATACACTCAGGCCAATGAACAATCACAAAA +GTCTTCAACCCCACAGCGTCCATCATTATAACTTCAGCTCTCGCCATAAA +ACTTGGACTCACACCATTCCACTTCTGAGTACCCGAAGTCACACAGGGCA +TCTCATTAACATCAGGTCTCATCCTACTTACATGACAAAAACTAGCCCCA +ATATCAATCCTATATCAAATCTCACCCTCAATTAACCTAAATATCTTATT +AACTATAGCCGTACTGTCAATCCTAGTAGGAGGCTGAGGCGGTCTCAACC +AAACCCAACTACGAAAAATCATAGCATACTCGTCAATCGCGCATATAGGA +TGAATAACAGCTGTCCTAGTATATAACCCAACACTAACAATACTAAACAT +ATTAATTTACATTATAATAACACTCACAATATTCATACTATTTATCCACA +GCTCCTCTACTACAACACTATCACTCTCCCACACATGAAACAAAATACCT +CTAACCACTACACTAATCTTAATTACCTTACTATCCATAGGAGGCCTCCC +CCCACTATCAGGATTCATACCCAAATGAATAATCATTCAAGAGCTCACCA +AAAATAGCAGCATCATCCTCCCCACACTAATAGCCATTATAGCACTACTC +AACCTCTACTTCTACATACGACTAACCTATTCCACCTCACTGACCATATT +CCCATCCACAAACAACATAAAAATAAAATGACAATTCGAAACCAAACGAA +TTACTCTCTTACCCCCGTTAATTGTTATATCCTCCCTACTCCTCCCCCTA +ACCCCCATACTATCAATTTTGGACTAGGAATTTAGGTTAACATCCCAGAC +CAAGAGCCTTCAAAGCTCTAAGCAAGTGAATCCACTTAATTCCTGCATAC +TAAGGACTGCGAGACTCTATCTCACATCAATTGAACGCAAATCAAACTCT +TTTATTAAGCTAAGCCCTTACTAGATTGGTGGGCTACCATCCCACGAAAT +TTTAGTTAACAGCTAAATACCCTAATCAACTGGCTTCAATCTACTTCTCC +CGCCGCCTAGAAAAAAAGGCGGGAGAAGCCCCGGCAGAAATTGAAGCTGC +TCCTTTGAATTTGCAATTCAATGTGAAAATTCACCACGGGACTTGATAAG +AAGAGGATTCCAACCCCTGTCTTTAGATTTACAGTCTAATGCTTACTCAG +CCATCTTACCTATGTTCATCAACCGCTGACTATTTTCAACTAACCACAAA +GACATCGGCACTCTGTACCTCCTATTCGGCGCTTGAGCTGGAATAGTAGG +AACTGCCCTAAGCCTCCTAATCCGTGCTGAATTAGGCCAACCTGGGACCC +TACTAGGAGATGATCAGATCTACAATGTCATTGTAACCGCCCATGCATTC +GTAATAATTTTCTTTATGGTCATACCCATTATAATCGGAGGATTCGGAAA +CTGATTAGTCCCCCTGATAATTGGAGCACCTGATATAGCTTTCCCCCGAA +TAAACAACATAAGCTTCTGATTACTTCCCCCATCATTCCTACTTCTTCTC +GCTTCCTCAATAATTGAAGCAGGTGCCGGAACAGGCTGAACCGTATATCC +TCCTCTAGCTGGAAATCTGGCGCATGCAGGAGCCTCTGTTGACTTAACCA +TTTTCTCTCTCCACCTAGCTGGGGTGTCCTCGATTTTAGGTGCCATCAAC +TTTATTACCACAATCATTAACATAAAACCACCAGCCCTATCCCAATATCA +AACCCCCCTATTCGTTTGATCTGTCCTTATTACGGCAGTACTCCTTCTCC +TAGCCCTCCCGGTCCTAGCAGCAGGCATTACCATGCTTCTCACAGACCGT +AACCTGAACACTACTTTCTTCGACCCCGCAGGAGGAGGGGATCCAATCCT +TTATCAACACCTATTCTGATTCTTCGGACACCCCGAAGTCTATATTCTTA +TCCTACCAGGCTTCGGTATAATCTCACACATCGTCACATACTACTCAGGT +AAAAAGGAACCTTTTGGCTACATGGGTATAGTGTGAGCTATAATATCCAT +TGGCTTTCTAGGCTTCATCGTATGGGCTCACCACATGTTTACAGTAGGGA +TAGACGTTGACACACGAGCATACTTCACATCAGCTACCATAATCATCGCT +ATCCCTACTGGTGTAAAAGTATTCAGCTGACTAGCCACCCTGCACGGAGG +AAATATCAAATGATCTCCAGCTATACTCTGAGCTCTAGGCTTCATCTTCT +TATTCACAGTAGGAGGTCTAACAGGAATCGTCCTAGCTAACTCATCCCTA +GATATTGTTCTCCACGATACTTATTATGTAGTAGCACATTTCCATTATGT +CCTGTCTATAGGAGCAGTCTTCGCCATTATGGGGGGATTTGTACACTGAT +TCCCTCTATTCTCAGGATACACACTCAACCAAACCTGAGCAAAAATCCAC +TTTACAATTATATTCGTAGGGGTAAATATAACCTTCTTCCCACAACATTT +CCTTGGCCTCTCAGGAATGCCACGACGCTATTCTGATTATCCAGACGCAT +ATACAACATGAAATACCATCTCATCCATAGGATCTTTTATCTCACTTACA +GCAGTGATACTAATAATTTTCATAATTTGAGAAGCGTTCGCATCCAAACG +AGAAGTGTCTACAGTAGAATTAACCTCAACTAATCTGGAATGACTACACG +GATGCCCCCCACCATACCACACATTTGAAGAACCCACCTACGTAAACCTA +AAAtaagaaaggaaggaatcgaaccccctctaactggtttcaagccaata +tcataaccactatgtctttctcCATCAATTGAGGTATTAGTAAAAATTAC +ATGACTTTGTCAAAGTTAAATTATAGGTTAAACCCCTATATACCTCTATG +GCCTACCCCTTCCAACTAGGATTCCAAGACGCAACATCCCCTATTATAGA +AGAACTCCTACACTTCCACGACCACACACTAATAATCGTATTCCTAATTA +GCTCTCTAGTATTATATATTATCTCATCAATACTAACAACTAAATTAACC +CATACCAGCACCATAGATGCTCAAGAAGTAGAGACAATTTGAACGATTTT +ACCAGCCATCATCCTTATTCTAATCGCCCTCCCATCCCTACGAATTCTAT +ATATAATAGATGAAATCAATAATCCGTCCCTCACAGTCAAAACAATAGGC +CACCAATGATACTGAAGCTACGAGTATACCGATTACGAAGACTTGACCTT +TGACTCCTACATGATCCCCACATCAGACCTAAAACCAGGAGAATTACGTC +TTCTAGAAGTCGACAATCGAGTGGTTCTCCCCATAGAAATAACCATCCGA +ATGCTAATTTCATCCGAAGACGTCCTACACTCATGAGCTGTGCCCTCCCT +AGGCCTAAAAACAGACGCTATCCCTGGGCGCCTAAATCAGACAACTCTCG +TGGCCTCTCGACCAGGACTTTACTACGGTCAATGCTCAGAGATCTGCGGA +TCAAACCACAGCTTTATACCAATTGTCCTTGAACTAGTTCCACTGAAACA +CTTCGAAGAATGATCTGCATCAATATTATAAAGTCACTAAGAAGCTATTA +TAGCATTAACCTTTTAAGTTAAAGATTGAGGGTTCAACCCCCTCCCTAGT +GATATGCCACAGTTGGATACATCAACATGATTTATTAATATCGTCTCAAT +AATCCTAACTCTATTTATTGTATTTCAACTAAAAATCTCAAAGCACTCCT +ATCCGACACACCCAGAAGTAAAGACAACCAAAATAACAAAACACTCTGCC +CCTTGAGAATCAAAATGAACGAAAATCTATTCGCCTCTTTCGCTACCCCA +ACAATAGTAGGCCTCCCTATTGTAATTCTGATCATCATATTTCCCAGCAT +CCTATTCCCCTCACCCAACCGACTAATCAACAATCGCCTAATCTCAATTC +AACAATGGCTAGTCCAACTTACATCAAAACAAATAATAGCTATCCATAAC +AGCAAAGGACAAACCTGAACTCTTATACTCATATCACTGATCCTATTCAT +TGGCTCAACAAACTTATTAGGCCTACTACCTCACTCATTTACACCAACAA +CACAACTATCAATAAACCTAGGCATAGCTATTCCCCTATGGGCAGGGACA +GTATTCATAGGCTTTCGTCACAAAACAAAAGCAGCCCTAGCCCACTTTCT +ACCTCAAGGGACGCCCATTTTCCTCATCCCCATACTAGTAATTATCGAGA +CTATCAGCCTATTTATTCAACCTGTAGCCCTAGCCGTGCGGCTAACCGCT +AACATTACCGCCGGACACCTCCTAATACACCTCATCGGAGGGGCAACACT +AGCCCTCATAAGCATCAGCCCCTCAACAGCCCTTATTACGTTTATCATCC +TAATTCTACTAACTATCCTCGAATTCGCAGTAGCTATAATCCAAGCCTAC +GTATTCACTCTCCTGGTAAGCCTTTACTTACACGACAACACCTAATGACC +CACCAAACCCACGCTTACCACATAGTAAACCCCAGCCCATGACCACTTAC +AGGAGCCCTATCAGCCCTCCTGATAACATCAGGACTAGCCATGTGATTTC +ACTTTAACTCAACCTTACTTCTAGCTATAGGGCTATTAACTAACATCCTT +ACCATATATCAATGATGACGAGACATCATCCGAGAAAGCACATTCCAAGG +CCATCACACATCAATCGTTCAAAAGGGACTCCGATATGGCATAATCCTTT +TTATTATCTCAGAAGTCTTCTTCTTCTCTGGCTTCTTCTGAGCCTTTTAC +CACTCAAGCCTAGCCCCCACACCCGAACTAGGCGGCTGCTGACCACCCAC +AGGTATCCACCCCTTAAACCCCCTAGAAGTCCCCTTACTCAACACCTCAG +TGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCACCATAGCCTAATA +GAAGGAAACCGTAAAAATATGCTCCAAGGCCTATTCATCACAATTTCACT +AGGCGTATACTTCACCCTTCTCCAAGCCTCAGAATACTATGAAGCCTCAT +TTACTATTTCAGATGGAGTATACGGATCAACATTTTTCGTAGCAACAGGG +TTCCACGGACTACACGTAATTATCGGATCTACCTTCCTCATTGTATGTTT +CCTACGCCAACTAAAATTCCACTTTACATCCAGCCACCACTTCGGATTCG +AAGCAGCCGCTTGATACTGACACTTCGTCGACGTAGTCTGACTATTCTTG +TACGTCTCTATTTATTGATGAGGATCCTATTCTTTTAGTATTGACCAGTA +CAATTGACTTCCAATCAATCAGCTTCGGTATAACCCGAAAAAGAATAATA +AACCTCATACTGACACTCCTCACTAACACATTACTAGCCTCGCTACTCGT +ACTCATCGCATTCTGACTACCACAACTAAACATCTATGCAGAAAAAACCA +GCCCATATGAATGCGGATTTGACCCTATAGGGTCAGCACGCCTCCCCTTC +TCAATAAAATTTTTCTTAGTGGCCATTACATTTCTGCTATTCGACTTAGA +AATTGCCCTCCTATTACCCCTTCCATGAGCATCCCAAACAACTAACCTAA +ACACTATACTTATCATAGCACTAGTCCTAATCTCTCTTCTAGCCATCAGC +CTAGCCTACGAATGAACCCAAAAAGGACTAGAATGAACTGAGTATGGTAA +TTAGTTTAAACCAAAACAAATGATTTCGACTCATTAAACTATGATTAACT +TCATAATTACCAACATGTCACTAGTCCATATTAATATCTTCCTAGCATTC +ACAGTATCCCTCGTAGGCCTACTAATGTACCGATCCCACCTAATATCCTC +ACTCCTATGCCTAGAAGGAATAATACTATCACTATTCGTCATAGCAACCA +TAATAGTCCTAAACACCCACTTCACACTAGCTAGTATAATACCTATCATC +TTACTAGTATTTGCTGCCTGCGAACGAGCTCTAGGATTATCCCTACTAGT +CATAGTCTCCAATACTTATGGAGTAGACCACGTACAAAACCTTAACCTCC +TCCAATGCTAAAAATTATCATTCCCACAATCATACTTATGCCCCTTACAT +GACTATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATAGTCTA +TTAATCAGCCTTATCAGCCTATCCCTCCTAAACCAACCTAGCAACAATAG +CCTAAACTTCTCACTAATATTCTTCTCCGATCCCCTATCAGCCCCACTTC +TGGTGTTGACAACATGACTACTGCCACTAATACTCATAGCCAGCCAACAC +CATCTATCTAAGGAACCACTAATCCGAAAAAAACTCTACATCACCATGCT +AACCATACTTCAAACTTTCCTAATCATGACTTTTACCGCCACAGAACTAA +TCTCCTTCTACATCCTATTTGAAGCCACATTAGTTCCAACACTAATTATC +ATCACCCGCTGAGGCAACCAAACAGAACGCCTGAACGCAGGCCTCTACTT +CCTATTCTACACACTAATAGGTTCCCTCCCACTCTTAGTTGCACTAATCT +CTATCCAAAACCTAACAGGCTCACTAAACTTCCTATTAATTCAATACTGA +AACCAAGCACTACCCGACTCTTGATCCAATATTTTCCTATGACTAGCATG +TATAATAGCATTCATAGTCAAAATACCGGTATATGGTCTTCACCTCTGAC +TCCCAAAAGCCCATGTAGAAGCCCCAATTGCCGGATCCATAGTGCTAGCA +GCCATTCTACTAAAACTAGGAGGCTACGGAATACTACGAATTACAACAAT +ACTAAACCCCCAAACTAGCTTTATAGCCTACCCCTTCCTCATACTATCCC +TGTGAGGAATAATCATAACTAGTTCCATCTGCTTGCGACAAACCGATCTA +AAATCACTTATTGCATACTCCTCTGTCAGCCACATAGCCCTAGTAATCGT +AGCCGTCCTCATCCAAACACCATGAAGTTATATAGGAGCTACAGCCCTAA +TAATCGCTCACGGCCTTACATCATCAATACTATTCTGCCTGGCAAACTCA +AATTACGAACGTACCCATAGCCGAACTATAATCCTAGCCCGCGGGCTTCA +AACACTTCTTCCCCTTATAGCAGCCTGATGACTATTAGCCAGCCTAACCA +ACCTGGCCCTCCCTCCCAGCATTAACCTAATTGGAGAGCTATTCGTAGTA +ATATCATCATTCTCATGATCAAATATTACCATTATCCTAATAGGAGCCAA +TATCACCATCACCGCCCTCTACTCCCTATACATACTAATCACAACACAAC +GAGGGAAATACACACACCATATCAACAGCATTAAACCTTCATTTACACGA +GAAAACGCACTCATGGCCCTCCACATGACTCCCCTACTACTCCTATCACT +TAACCCTAAAATTATCCTAGGCTTTACGTACTGTAAATATAGTTTAACAA +AAACACTAGATTGTGGATCTAGAAACAGAAACTTAATATTTCTTATTTAC +CGAGAAAGTATGCAAGAACTGCTAATTCATGCCCCCATGTCCAACAAACA +TGGCTCTCTCAAACTTTTAAAGGATAGGAGCTATCCGTTGGTCTTAGGAA +CCAAAAAATTGGTGCAACTCCAAATAAAAGTAATCAACATGTTCTCCTCC +CTCATACTAGTTTCACTATTAGTACTAACCCTCCCAATCATATTATCAAT +CTTCAATACCTACAAAAACAGCACGTTCCCGCATCATGTAAAAAACACTA +TCTCATATGCCTTCATTACTAGCCTAATTCCCACTATAATATTTATTCAC +TCTGGACAAGAAACAATTATCTCAAACTGACACTGAATAACCATACAAAC +CCTCAAACTATCCCTAAGCTTCAAACTAGATTACTTCTCAATAATTTTCG +TACCAGTAGCCCTATTCGTAACATGATCTATTATGGAATTCTCCCTATGA +TACATGCACTCAGATCCTTACATTACTCGATTTTTTAAATACTTACTTAC +ATTCCTCATCACTATAATAATTCTAGTCACAGCTAACAACCTTTTCCAAC +TGTTCATCGGATGGGAGGGAGTAGGCATCATGTCATTCTTACTAATCGGA +TGATGATACGGCCGAACAGATGCCAACACCGCGGCCCTTCAAGCAATCCT +TTATAACCGCATCGGGGATATCGGCTTCATCATGGCCATAGCCTGATTCC +TATTCAACACCAACACATGAGACCTCCAACAAATCTTCATACTCGACCCC +AACCTTACCAACCTCCCGCTCCTAGGCCTCCTCCTAGCCGCAACTGGCAA +ATCCGCTCAATTTGGACTCCACCCATGACTTCCTTCAGCCATAGAGGGCC +CTACACCAGTCTCAGCCCTACTCCACTCCAGCACAATAGTTGTAGCAGGC +GTCTTCCTGCTAATCCGCTTCCATCCACTAATAGAAAACAACAAAACAAT +CCAGTCACTTACCCTATGCCTAGGAGCCATCACCACACTATTCACAGCAA +TCTGCGCACTCACTCAAAACGATATCAAAAAAATCATTGCTTTCTCCACC +TCCAGCCAACTAGGCCTGATAATCGTAACCATCGGTATCAATCAACCCTA +CCTAGCATTCCTCCACATTTGCACTCACGCATTCTTCAAAGCTATACTAT +TTATATGTTCCGGATCCATTATCCACAGCCTAAATGACGAGCAAGATATC +CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT +AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT +ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC +TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG +TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC +TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC +CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA +CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG +CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG +ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC +CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG +CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC +TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC +CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT +CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG +TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA +ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC +CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA +CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT +AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC +AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG +TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC +ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC +CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG +AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA +ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT +GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC +GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA +CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG +AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT +ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA +GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC +AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG +GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC +ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT +ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT +ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC +AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT +CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT +CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC +CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT +CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA +ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA +TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT +AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC +CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC +CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT +CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT +CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC +ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT +ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC +TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC +TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT +CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC +CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG +TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA +GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC +AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC +GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG +CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT +TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA +ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA +CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT +CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA +CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA +GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac +ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct +gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt +gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc +acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac +ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA +CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT +GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT +AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA +GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT +TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC +TTCTTCCCCC diff -r b01c8245ef74 -r 14214b45db3f test-data/joiner_out3.bed --- a/test-data/joiner_out3.bed Mon Dec 07 14:45:09 2009 -0500 +++ b/test-data/joiner_out3.bed Mon Dec 07 14:45:47 2009 -0500 @@ -6,46 +6,46 @@ chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - chr11 116124407 116124501 AK057832_cds_0_0_chr11_116124408_r 0 - chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + chr11 116206508 116206563 NM_000040_cds_1_0_chr11_116206509_f 0 + chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - chr11 116211733 116212337 BC005380_cds_0_0_chr11_116211734_r 0 - -chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + X X X X X X +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + ~ ~ ~ ~ ~ ~ chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - chr12 38440094 38440321 NM_052885_cds_0_0_chr12_38440095_r 0 - chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + chr13 112381694 112381953 NM_207440_cds_1_0_chr13_112381695_f 0 + chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - chr14 98710240 98712285 NM_022898_cds_0_0_chr14_98710241_r 0 - chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - chr15 41486872 41487060 BX537418_cds_0_0_chr15_41486873_r 0 - chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + chr15 41673708 41673857 AK223365_cds_0_0_chr15_41673709_f 0 + chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - chr15 41679161 41679250 NM_153700_cds_0_0_chr15_41679162_r 0 - -chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + X X X X X X +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + ~ ~ ~ ~ ~ ~ chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + chr16 142908 143003 NM_005332_cds_0_0_chr16_142909_f 0 + -chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - X X X X X X +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - ~ ~ ~ ~ ~ ~ chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + chr16 244413 244681 AK057165_cds_2_0_chr16_244414_f 0 + chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - chr16 259268 259383 AB016929_cds_0_0_chr16_259269_r 0 - chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - chr18 23786114 23786321 NM_001792_cds_0_0_chr18_23786115_r 0 - chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + chr18 59406881 59407046 NM_012397_cds_1_0_chr18_59406882_f 0 + chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - chr18 59455932 59456337 AB046400_cds_0_0_chr18_59455933_r 0 - -chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + X X X X X X +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + ~ ~ ~ ~ ~ ~ chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + chr19 59068595 59069564 BC013995_cds_1_0_chr19_59068596_f 0 + chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - chr19 59236026 59236146 NM_198481_cds_0_0_chr19_59236027_r 0 - chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + chr19 59297998 59298008 NM_004542_cds_0_0_chr19_59297999_f 0 + -chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - X X X X X X +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - ~ ~ ~ ~ ~ ~ chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + chr2 118288583 118288668 NM_006773_cds_0_0_chr2_118288584_f 0 + -chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - X X X X X X -chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + X X X X X X +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - ~ ~ ~ ~ ~ ~ +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + ~ ~ ~ ~ ~ ~ chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - chr2 220229609 220230869 NM_024536_cds_0_0_chr2_220229610_r 0 - chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - chr20 33330413 33330423 NM_181466_cds_0_0_chr20_33330414_r 0 - chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + chr20 33513606 33513792 AF022655_cds_1_0_chr20_33513607_f 0 + -chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - X X X X X X -chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + X X X X X X -chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + X X X X X X +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - ~ ~ ~ ~ ~ ~ +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + ~ ~ ~ ~ ~ ~ +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + ~ ~ ~ ~ ~ ~ chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - chr21 32869641 32870022 NM_018277_cds_3_0_chr21_32869642_r 0 - chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + chr21 33321040 33322012 NM_005806_cds_1_0_chr21_33321041_f 0 + -chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - X X X X X X +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - ~ ~ ~ ~ ~ ~ chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + chr22 30120223 30120265 NM_004147_cds_0_0_chr22_30120224_f 0 + chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - chr22 30160419 30160661 BC032941_cds_0_0_chr22_30160420_r 0 - -chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + X X X X X X -chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - X X X X X X +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + ~ ~ ~ ~ ~ ~ +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - ~ ~ ~ ~ ~ ~ chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + chr5 131424298 131424460 NM_000588_cds_0_0_chr5_131424299_f 0 + chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - chr5 131556601 131556672 BC035813_cds_0_0_chr5_131556602_r 0 - chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + chr5 131621326 131621419 BC003096_cds_0_0_chr5_131621327_f 0 + -chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - X X X X X X +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - ~ ~ ~ ~ ~ ~ chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - chr6 108299600 108299744 NM_007214_cds_0_0_chr6_108299601_r 0 - chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + chr6 108594662 108594687 NM_003269_cds_0_0_chr6_108594663_f 0 + chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - chr6 108640045 108640151 NM_003795_cds_0_0_chr6_108640046_r 0 - @@ -57,8 +57,8 @@ chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - chr8 118881131 118881317 NM_000127_cds_0_0_chr8_118881132_r 0 - chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + chr9 128764156 128764189 BC051300_cds_0_0_chr9_128764157_f 0 + chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - chr9 128787519 128789136 NM_014908_cds_0_0_chr9_128787520_r 0 - -chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + X X X X X X -chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - X X X X X X +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + ~ ~ ~ ~ ~ ~ +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - ~ ~ ~ ~ ~ ~ chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + chrX 122745047 122745924 NM_001167_cds_1_0_chrX_122745048_f 0 + chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - chrX 152648964 152649196 NM_000425_cds_0_0_chrX_152648965_r 0 - chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + chrX 152691446 152691471 AF101728_cds_0_0_chrX_152691447_f 0 + diff -r b01c8245ef74 -r 14214b45db3f test-data/sam_to_bam_in1.sam --- a/test-data/sam_to_bam_in1.sam Mon Dec 07 14:45:09 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1698:516 16 chrM 2735 25 36M * 0 0 TTTACACTCAGAGGTTCAACTCCTCTCNNTAACAAC I9IIIII5IIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1491:637 16 chrM 10864 25 36M * 0 0 TGTAGAAGCCCCAATTGCCGGATCCATNNTGCTAGC DBAIIIIIIIIIIIFIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1711:249 16 chrM 10617 25 36M * 0 0 ACCAAACAGAACGCCTGAACGCAGGCCNNTACTTCC IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1634:211 0 chrM 9350 25 36M * 0 0 GAAGCAGNNGCTTGATACTGACACTTCGTCGACGTA IIIIIII""IIIIIIIIIIIIIIIIIIIIII9IIDF NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1218:141 16 chrM 14062 25 36M * 0 0 ACAAAACTAACAACAAAAATAACACTCNNAATAAAC I+IIII1IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1398:854 16 chrM 3921 25 36M * 0 0 CACCCTTCCCGTACTAATAAATCCCCTNNTCTTCAC IIIII=AIIIIIIIIIIIIIIBIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1310:991 16 chrM 10002 25 36M * 0 0 CTCCTATGCCTAGAAGGAATAATACTANNACTATTC I:2IEI:IIDIIIIII4IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1716:413 0 chrM 6040 25 36M * 0 0 GATCCAANNCTTTATCAACACCTATTCTGATTCTTC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1630:59 16 chrM 12387 25 36M * 0 0 TCATACTCGACCCCAACCTTACCAACCNNCCGCTCC FIIHII;IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 diff -r b01c8245ef74 -r 14214b45db3f test-data/sam_to_bam_in2.sam --- a/test-data/sam_to_bam_in2.sam Mon Dec 07 14:45:09 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -HWI-EAS91_1_30788AAXX:1:1:1095:605 0 chrM 23 25 36M * 0 0 AAGCAAGNNACTGAAAATGCCTAGATGAGTATTCTT IIIIIII""IIIIIIIIIIIIIIIEIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1650:1185 0 chrM 14956 25 36M * 0 0 ACCCCAGNNAACCCTCTCAGCACTCCCCCTCATATT IIIIIII""IIIIIIIIIIII6IIIIIIIII5I-II NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:799:192 16 chrM 8421 25 36M * 0 0 CCTGTAGCCCTAGCCGTGCGGCTAACCNNTAACATT II%::I<IIIIIEIII8IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1082:719 16 chrM 7191 25 36M * 0 0 TAAATTAACCCATACCAGCACCATAGANNCTCAAGA <III0EII3+3I29I>III8AIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1746:1180 16 chrM 12013 25 36M * 0 0 CCTAAGCTTCAAACTAGATTACTTCTCNNTAATTTT IIIIIIIIFIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:606:460 0 chrM 4552 25 36M * 0 0 TTAATTTNNATTATAATAACACTCACAATATTCATA IIIIIII""IIIIIIIIIIIIIIIIII?I6IIIII6 NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1059:362 16 chrM 7348 25 36M * 0 0 GGCCACCAATGATACTGAAGCTACGAGNNTACCGAT II/<)2IIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1483:1161 16 chrM 15080 25 36M * 0 0 TCCTGATCCTAGCACTCATCCCCACCCNNCACATAT HIIIIIFIIAIHIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1273:600 16 chrM 13855 25 36M * 0 0 GTATTAGACACCCATACCTCAGGATACNNCTCAGTA IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 -HWI-EAS91_1_30788AAXX:1:1:1190:1283 16 chrM 15338 25 36M * 0 0 TATATCGCACATTACCCTGGTCTTGTANNCCAGAAA EIII?-IIIIIAIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 diff -r b01c8245ef74 -r 14214b45db3f test-data/sam_to_bam_out1.bam Binary file test-data/sam_to_bam_out1.bam has changed diff -r b01c8245ef74 -r 14214b45db3f test-data/sam_to_bam_out2.bam Binary file test-data/sam_to_bam_out2.bam has changed diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/biomart.xml --- a/tools/data_source/biomart.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/biomart.xml Mon Dec 07 14:45:47 2009 -0500 @@ -7,7 +7,7 @@ TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. --> -<tool name="BioMart" id="biomart" tool_type="data_source" URL_method="get" version="1.0.1"> +<tool name="BioMart" id="biomart" tool_type="data_source" version="1.0.1"> <description>Central server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> @@ -16,16 +16,17 @@ </inputs> <request_param_translation> <request_param galaxy_name="URL" remote_name="URL" missing=""> - <add_to_url> - <param_from_source name="_export" missing="1" /> - <param_from_source name="GALAXY_URL" missing="0" /> - </add_to_url> + <append_param separator="&" first_separator="?" join="="> + <value name="_export" missing="1" /> + <value name="GALAXY_URL" missing="0" /> + </append_param> </request_param> <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > - <data_type_translation> - <format galaxy_format="tabular" remote_format="TSV" /> - </data_type_translation> + <value_translation> + <value galaxy_value="tabular" remote_value="TSV" /> + </value_translation> </request_param> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/biomart_test.xml --- a/tools/data_source/biomart_test.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/biomart_test.xml Mon Dec 07 14:45:47 2009 -0500 @@ -7,7 +7,7 @@ TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. --> -<tool name="BioMart" id="biomart_test" tool_type="data_source" URL_method="get" version="1.0.1"> +<tool name="BioMart" id="biomart_test" tool_type="data_source" version="1.0.1"> <description>Test server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> @@ -16,16 +16,17 @@ </inputs> <request_param_translation> <request_param galaxy_name="URL" remote_name="URL" missing=""> - <add_to_url> - <param_from_source name="_export" missing="1" /> - <param_from_source name="GALAXY_URL" missing="0" /> - </add_to_url> + <append_param separator="&" first_separator="?" join="="> + <value name="_export" missing="1" /> + <value name="GALAXY_URL" missing="0" /> + </append_param> </request_param> <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > - <data_type_translation> - <format galaxy_format="tabular" remote_format="TSV" /> - </data_type_translation> + <value_translation> + <value galaxy_value="tabular" remote_value="TSV" /> + </value_translation> </request_param> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/data_source.py --- a/tools/data_source/data_source.py Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/data_source.py Mon Dec 07 14:45:47 2009 -0500 @@ -12,6 +12,7 @@ sys.exit() def check_gzip( filename ): + # TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py ) temp = open( filename, "U" ) magic_check = temp.read( 2 ) temp.close() @@ -66,6 +67,7 @@ out.write( chunk ) out.close() if check_gzip( filename ): + # TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py ) fd, uncompressed = tempfile.mkstemp() gzipped_file = gzip.GzipFile( filename ) while 1: diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/epigraph_import.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source" URL_method="get"> +<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> <description> server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> @@ -12,6 +12,7 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/epigraph_import_test.xml --- a/tools/data_source/epigraph_import_test.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/epigraph_import_test.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source" URL_method="get"> +<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source"> <description> test server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> @@ -12,6 +12,7 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/eupathdb.xml --- a/tools/data_source/eupathdb.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/eupathdb.xml Mon Dec 07 14:45:47 2009 -0500 @@ -3,7 +3,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="EuPathDB" id="eupathdb" tool_type="data_source" url_method="post"> +<tool name="EuPathDB" id="eupathdb" tool_type="data_source"> <description>server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://galaxy.eupathdb.org/eupathdb.galaxy/queries_tools.jsp" check_values="false" method="post"> @@ -11,15 +11,16 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=eupathdb" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing=""> - <add_to_url> - <param_from_source name="dbkey" missing="?" /> - <param_from_source name="wdk_history_id" missing="" /> - <param_from_source name="wdkReportFormat" missing="tabular" /> - <param_from_source name="selectedFields" missing="" /> - <param_from_source name="includeHeader" missing="yes" /> - <param_from_source name="downloadType" missing="plain" /> - </add_to_url> + <append_param separator="&" first_separator="?" join="="> + <value name="dbkey" missing="?" /> + <value name="wdk_history_id" missing="" /> + <value name="wdkReportFormat" missing="tabular" /> + <value name="selectedFields" missing="" /> + <value name="includeHeader" missing="yes" /> + <value name="downloadType" missing="plain" /> + </append_param> </request_param> <request_param galaxy_name="format" remote_name="wdkReportFormat" missing="tabular" /> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/flymine.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="Flymine" id="flymine" tool_type="data_source" URL_method="post"> +<tool name="Flymine" id="flymine" tool_type="data_source"> <description>server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.flymine.org" check_values="false" method="get" target="_top"> @@ -12,6 +12,7 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/flymine_test.xml --- a/tools/data_source/flymine_test.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/flymine_test.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="Flymine test" id="flymine_test" tool_type="data_source" URL_method="post"> +<tool name="Flymine test" id="flymine_test" tool_type="data_source"> <description>server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top"> @@ -12,6 +12,7 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/gramene_mart.xml --- a/tools/data_source/gramene_mart.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/gramene_mart.xml Mon Dec 07 14:45:47 2009 -0500 @@ -7,7 +7,7 @@ TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. --> -<tool name="GrameneMart" id="gramenemart" tool_type="data_source" URL_method="get" version="1.0.1"> +<tool name="GrameneMart" id="gramenemart" tool_type="data_source" version="1.0.1"> <description> Central server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.gramene.org/biomart/martview" check_values="false" method="get" target="_top"> @@ -16,16 +16,17 @@ </inputs> <request_param_translation> <request_param galaxy_name="URL" remote_name="URL" missing=""> - <add_to_url> - <param_from_source name="_export" missing="1" /> - <param_from_source name="GALAXY_URL" missing="0" /> - </add_to_url> + <append_param separator="&" first_separator="?" join="="> + <value name="_export" missing="1" /> + <value name="GALAXY_URL" missing="0" /> + </append_param> </request_param> <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular"> - <data_type_translation> - <format galaxy_format="tabular" remote_format="TSV" /> - </data_type_translation> + <value_translation> + <value galaxy_value="tabular" remote_value="TSV" /> + </value_translation> </request_param> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/modmine.xml --- a/tools/data_source/modmine.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/modmine.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="modMine" id="modmine" tool_type="data_source" URL_method="post"> +<tool name="modMine" id="modmine" tool_type="data_source"> <description>server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://intermine.modencode.org/" check_values="false" method="get" target="_top"> @@ -12,6 +12,7 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=modmine" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/ratmine.xml --- a/tools/data_source/ratmine.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/ratmine.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="Ratmine" id="ratmine" tool_type="data_source" URL_method="post"> +<tool name="Ratmine" id="ratmine" tool_type="data_source"> <description>server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get" target="_top"> @@ -12,6 +12,7 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/ucsc_tablebrowser.xml --- a/tools/data_source/ucsc_tablebrowser.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/ucsc_tablebrowser.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source" URL_method="post"> +<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source"> <description>table browser</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> @@ -16,20 +16,21 @@ <param name="hgta_outputType" type="hidden" value="bed" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <data_type_translation> - <format galaxy_format="tabular" remote_format="primaryTable" /> - <format galaxy_format="tabular" remote_format="selectedFields" /> - <format galaxy_format="wig" remote_format="wigData" /> - <format galaxy_format="interval" remote_format="tab" /> - <format galaxy_format="html" remote_format="hyperlinks" /> - <format galaxy_format="fasta" remote_format="sequence" /> - </data_type_translation> + <value_translation> + <value galaxy_value="tabular" remote_value="primaryTable" /> + <value galaxy_value="tabular" remote_value="selectedFields" /> + <value galaxy_value="wig" remote_value="wigData" /> + <value galaxy_value="interval" remote_value="tab" /> + <value galaxy_value="html" remote_value="hyperlinks" /> + <value galaxy_value="fasta" remote_value="sequence" /> + </value_translation> </request_param> </request_param_translation> <uihints minwidth="800"/> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/ucsc_tablebrowser_archaea.xml --- a/tools/data_source/ucsc_tablebrowser_archaea.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/ucsc_tablebrowser_archaea.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source" URL_method="post"> +<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source"> <description>table browser</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> @@ -16,20 +16,21 @@ <param name="hgta_outputType" type="hidden" value="bed" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_track" missing="" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" /> + <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> + <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <data_type_translation> - <format galaxy_format="tabular" remote_format="primaryTable" /> - <format galaxy_format="tabular" remote_format="selectedFields" /> - <format galaxy_format="wig" remote_format="wigdata" /> - <format galaxy_format="interval" remote_format="tab" /> - <format galaxy_format="html" remote_format="hyperlinks" /> - <format galaxy_format="fasta" remote_format="sequence" /> - </data_type_translation> + <value_translation> + <value galaxy_value="tabular" remote_value="primaryTable" /> + <value galaxy_value="tabular" remote_value="selectedFields" /> + <value galaxy_value="wig" remote_value="wigData" /> + <value galaxy_value="interval" remote_value="tab" /> + <value galaxy_value="html" remote_value="hyperlinks" /> + <value galaxy_value="fasta" remote_value="sequence" /> + </value_translation> </request_param> </request_param_translation> <uihints minwidth="800"/> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/ucsc_tablebrowser_test.xml --- a/tools/data_source/ucsc_tablebrowser_test.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/ucsc_tablebrowser_test.xml Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ the initial response. If value of 'URL_method' is 'post', any additional params coming back in the initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. --> -<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source" URL_method="post"> +<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source"> <description>table browser</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> @@ -16,20 +16,21 @@ <param name="hgta_outputType" type="hidden" value="bed" /> </inputs> <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="URL" remote_name="URL" missing="" /> <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_track" missing="" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" /> + <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> + <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <data_type_translation> - <format galaxy_format="tabular" remote_format="primaryTable" /> - <format galaxy_format="tabular" remote_format="selectedFields" /> - <format galaxy_format="wig" remote_format="wigdata" /> - <format galaxy_format="interval" remote_format="tab" /> - <format galaxy_format="html" remote_format="hyperlinks" /> - <format galaxy_format="fasta" remote_format="sequence" /> - </data_type_translation> + <value_translation> + <value galaxy_value="tabular" remote_value="primaryTable" /> + <value galaxy_value="tabular" remote_value="selectedFields" /> + <value galaxy_value="wig" remote_value="wigData" /> + <value galaxy_value="interval" remote_value="tab" /> + <value galaxy_value="html" remote_value="hyperlinks" /> + <value galaxy_value="fasta" remote_value="sequence" /> + </value_translation> </request_param> </request_param_translation> <uihints minwidth="800"/> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/upload.py --- a/tools/data_source/upload.py Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/upload.py Mon Dec 07 14:45:47 2009 -0500 @@ -4,7 +4,7 @@ # WARNING: Changes in this tool (particularly as related to parsing) may need # to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools -import urllib, sys, os, gzip, tempfile, shutil, re, gzip, zipfile, codecs +import urllib, sys, os, gzip, tempfile, shutil, re, gzip, zipfile, codecs, binascii from galaxy import eggs # need to import model before sniff to resolve a circular import dependency import galaxy.model @@ -18,7 +18,6 @@ def stop_err( msg, ret=1 ): sys.stderr.write( msg ) sys.exit( ret ) - def file_err( msg, dataset, json_file ): json_file.write( to_json_string( dict( type = 'dataset', ext = 'data', @@ -28,7 +27,6 @@ os.remove( dataset.path ) except: pass - def safe_dict(d): """ Recursively clone json structure with UTF-8 dictionary keys @@ -40,7 +38,6 @@ return [safe_dict(x) for x in d] else: return d - def check_html( temp_name, chunk=None ): if chunk is None: temp = open(temp_name, "U") @@ -64,7 +61,6 @@ if chunk is None: temp.close() return False - def check_binary( temp_name, chunk=None ): if chunk is None: temp = open( temp_name, "U" ) @@ -85,21 +81,42 @@ if chunk is None: temp.close() return False - def check_gzip( temp_name ): + # This is sort of hacky. BAM is compressed in the BGZF format, and must + # not be uncompressed in upon upload ( it will be detected as gzipped ). + # The tuple we're returning from here contains boolean values for + # ( is_compressed, is_valid, is_bam ). temp = open( temp_name, "U" ) magic_check = temp.read( 2 ) temp.close() if magic_check != util.gzip_magic: - return ( False, False ) + return ( False, False, False ) CHUNK_SIZE = 2**15 # 32Kb gzipped_file = gzip.GzipFile( temp_name ) chunk = gzipped_file.read( CHUNK_SIZE ) gzipped_file.close() - if check_html( temp_name, chunk=chunk ) or check_binary( temp_name, chunk=chunk ): - return( True, False ) - return ( True, True ) - + if check_html( temp_name, chunk=chunk ): + return ( True, False, False ) + if check_binary( temp_name, chunk=chunk ): + # We do support some binary data types, so check if the compressed binary file is valid + # We currently only check for [ 'sff', 'bam' ] + # TODO: this should be fixed to more easily support future-supported binary data types. + # This is currently just copied from the sniff methods. + # The first 4 bytes of any bam file is 'BAM\1', and the file is binary. + try: + header = gzip.open( temp_name ).read(4) + if binascii.b2a_hex( header ) == binascii.hexlify( 'BAM\1' ): + return ( True, True, True ) + except: + pass + try: + header = gzip.open( temp_name ).read(4) + if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ): + return ( True, True, False ) + except: + pass + return ( True, False, False ) + return ( True, True, False ) def check_zip( temp_name ): if not zipfile.is_zipfile( temp_name ): return ( False, False, None ) @@ -116,14 +133,12 @@ if ext != test_ext: return ( True, False, test_ext ) return ( True, True, test_ext ) - def parse_outputs( args ): rval = {} for arg in args: id, files_path, path = arg.split( ':', 2 ) rval[int( id )] = ( path, files_path ) return rval - def add_file( dataset, json_file, output_path ): data_type = None line_count = None @@ -153,15 +168,19 @@ ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) else: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress - is_gzipped, is_valid = check_gzip( dataset.path ) + is_gzipped, is_valid, is_bam = check_gzip( dataset.path ) if is_gzipped and not is_valid: file_err( 'The uploaded file contains inappropriate content', dataset, json_file ) return - elif is_gzipped and is_valid: - # We need to uncompress the temp_name file + elif is_gzipped and is_valid and is_bam: + ext = 'bam' + data_type = 'bam' + elif is_gzipped and is_valid and not is_bam: + # We need to uncompress the temp_name file, but BAM files must remain compressed + # in order for samtools to function on them CHUNK_SIZE = 2**20 # 1Mb - fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( dataset.path ) ) - gzipped_file = gzip.GzipFile( dataset.path ) + fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( dataset.path ), text=False ) + gzipped_file = gzip.GzipFile( dataset.path, 'rb' ) while 1: try: chunk = gzipped_file.read( CHUNK_SIZE ) @@ -229,7 +248,7 @@ if check_html( dataset.path ): file_err( 'The uploaded file contains inappropriate content', dataset, json_file ) return - if data_type != 'binary' and data_type != 'zip': + if data_type != 'bam' and data_type != 'binary' and data_type != 'zip': if dataset.space_to_tab: line_count = sniff.convert_newlines_sep2tabs( dataset.path ) else: diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/wormbase.xml --- a/tools/data_source/wormbase.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/wormbase.xml Mon Dec 07 14:45:47 2009 -0500 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool name="Wormbase" id="wormbase" tool_type="data_source" URL_method="post"> +<tool name="Wormbase" id="wormbase" tool_type="data_source"> <description>server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.wormbase.org/db/seq/gbgff/c_elegans/" check_values="false" target="_top"> @@ -8,14 +8,15 @@ </inputs> <request_param_translation> <request_param galaxy_name="URL" remote_name="URL" missing=""> - <add_to_url> - <param_from_source name="d" missing="" /> - <param_from_source name="dbkey" missing="" /> - <param_from_source name="q" missing="" /> - <param_from_source name="s" missing="" /> - <param_from_source name="t" missing="" /> - </add_to_url> + <append_param separator="&" first_separator="?" join="="> + <value name="d" missing="" /> + <value name="dbkey" missing="" /> + <value name="q" missing="" /> + <value name="s" missing="" /> + <value name="t" missing="" /> + </append_param> </request_param> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> </request_param_translation> <uihints minwidth="800"/> diff -r b01c8245ef74 -r 14214b45db3f tools/data_source/wormbase_test.xml --- a/tools/data_source/wormbase_test.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/data_source/wormbase_test.xml Mon Dec 07 14:45:47 2009 -0500 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool name="Wormbase" id="wormbase_test" tool_type="data_source" URL_method="post"> +<tool name="Wormbase" id="wormbase_test" tool_type="data_source"> <description>test server</description> <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://dev.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" target="_top"> @@ -8,14 +8,15 @@ </inputs> <request_param_translation> <request_param galaxy_name="URL" remote_name="URL" missing=""> - <add_to_url> - <param_from_source name="d" missing="" /> - <param_from_source name="dbkey" missing="" /> - <param_from_source name="q" missing="" /> - <param_from_source name="s" missing="" /> - <param_from_source name="t" missing="" /> - </add_to_url> + <append_param separator="&" first_separator="?" join="="> + <value name="d" missing="" /> + <value name="dbkey" missing="" /> + <value name="q" missing="" /> + <value name="s" missing="" /> + <value name="t" missing="" /> + </append_param> </request_param> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> </request_param_translation> <uihints minwidth="800"/> diff -r b01c8245ef74 -r 14214b45db3f tools/emboss_5/emboss_fuzznuc.xml --- a/tools/emboss_5/emboss_fuzznuc.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/emboss_5/emboss_fuzznuc.xml Mon Dec 07 14:45:47 2009 -0500 @@ -1,12 +1,20 @@ -<tool id="EMBOSS: fuzznuc37" name="fuzznuc" version="5.0.0"> +<tool id="EMBOSS: fuzznuc37" name="fuzznuc" version="5.0.1"> <description>Nucleic acid pattern search</description> - <command>fuzznuc -sequence $input1 -outfile $out_file1 -pattern "$pattern" -pmismatch $mismatch -complement $complement -rformat2 $out_format1 -auto</command> + <command>fuzznuc -sequence $input1 -outfile $out_file1 -pattern '$pattern' -pmismatch $mismatch -complement $complement -rformat2 $out_format1 -auto</command> <inputs> <param format="fasta" name="input1" type="data"> <label>Sequences</label> </param> <param name="pattern" size="5" type="text" value=""> <label>Search pattern</label> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target=""/> + </mapping> + </sanitizer> </param> <param name="mismatch" size="5" type="text" value="0"> <label>Number of mismatches</label> diff -r b01c8245ef74 -r 14214b45db3f tools/filters/grep.py --- a/tools/filters/grep.py Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/filters/grep.py Mon Dec 07 14:45:47 2009 -0500 @@ -13,9 +13,11 @@ # -v true or false (output NON-matching lines) import sys +import os import re import string import commands +from tempfile import NamedTemporaryFile # This function is exceedingly useful, perhaps package for reuse? def getopts(argv): @@ -72,17 +74,15 @@ '[' :'__ob__', ']' :'__cb__', '{' :'__oc__', - '}' :'__cc__', - + '}' :'__cc__' } - + + #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility for key, value in mapped_chars.items(): pattern = pattern.replace(value, key) - - pattern = pattern.replace('\'', '') - - fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") - invertRegEx = re.compile("(true)|(false)") + + fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why? + invertRegEx = re.compile("(true)|(false)") #why? if not fileRegEx.match(outputfile): print "Illegal output filename." @@ -94,16 +94,29 @@ print "Illegal invert option." return -7 - # grep + # invert grep search? if invert == "true": - invertflag = " -v" + invertflag = " -v" + print "Not matching pattern: %s" % pattern else: - invertflag = "" - - commandline = "grep -E"+invertflag+" '"+pattern+"' "+inputfile+" > "+outputfile - + invertflag = "" + print "Matching pattern: %s" % pattern + + #Create temp file holding pattern + #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern + pattern_file_name = NamedTemporaryFile().name + open( pattern_file_name, 'w' ).write( pattern ) + + #generate grep command + commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile ) + + #run grep errorcode, stdout = commands.getstatusoutput(commandline) + #remove temp pattern file + os.unlink( pattern_file_name ) + + #return error code return errorcode if __name__ == "__main__": diff -r b01c8245ef74 -r 14214b45db3f tools/filters/grep.xml --- a/tools/filters/grep.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/filters/grep.xml Mon Dec 07 14:45:47 2009 -0500 @@ -1,4 +1,4 @@ -<tool id="Grep1" name="Select"> +<tool id="Grep1" name="Select" version="1.0.1"> <description>lines that match an expression</description> <command interpreter="python">grep.py -i $input -o $out_file1 -pattern '$pattern' -v $invert</command> <inputs> @@ -7,7 +7,16 @@ <option value="false">Matching</option> <option value="true">NOT Matching</option> </param> - <param name="pattern" size="40" type="text" value="^chr([0-9A-Za-z])+" label="the pattern" help="here you can enter text or regular expression (for syntax check lower part of this frame)"/> + <param name="pattern" size="40" type="text" value="^chr([0-9A-Za-z])+" label="the pattern" help="here you can enter text or regular expression (for syntax check lower part of this frame)"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> </inputs> <outputs> <data format="input" name="out_file1" metadata_source="input"/> diff -r b01c8245ef74 -r 14214b45db3f tools/filters/joiner.xml --- a/tools/filters/joiner.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/filters/joiner.xml Mon Dec 07 14:45:47 2009 -0500 @@ -1,4 +1,4 @@ -<tool id="join1" name="Join two Queries" version="2.0.1"> +<tool id="join1" name="Join two Queries" version="2.0.2"> <description>side by side on a specified field</description> <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command> <inputs> diff -r b01c8245ef74 -r 14214b45db3f tools/plotting/xy_plot.xml --- a/tools/plotting/xy_plot.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/plotting/xy_plot.xml Mon Dec 07 14:45:47 2009 -0500 @@ -94,9 +94,7 @@ <outputs> <data format="pdf" name="out_file1" /> </outputs> - <!-- - TODO: figure out how to change the submit_form() method to correctly handle refreshing the - form when the input for refresh is within a repeat construct ( like this tool ) + <tests> <test> <param name="main" value="Example XY Plot"/> @@ -106,13 +104,12 @@ <param name="xcol" value="1"/> <param name="ycol" value="2"/> <param name="type" value="line"/> - <param name="lty" value="dashed"/> + <param name="lty" value="2"/> <param name="col" value="2"/> <param name="lwd" value="1.0"/> <output name="out_file1" file="XY_Plot_1_out.pdf"/> </test> </tests> - --> <help> .. class:: infomark diff -r b01c8245ef74 -r 14214b45db3f tools/samtools/sam_to_bam.py --- a/tools/samtools/sam_to_bam.py Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/samtools/sam_to_bam.py Mon Dec 07 14:45:47 2009 -0500 @@ -1,31 +1,27 @@ #! /usr/bin/python - """ -Converts SAM data to BAM format. - -usage: %prog [options] - -i, --input1=i: SAM file to be converted - -d, --dbkey=d: dbkey value - -r, --ref_file=r: Reference file if choosing from history - -o, --output1=o: BAM output - -x, --index_dir=x: Index directory - -usage: %prog input_file dbkey ref_list output_file +Converts SAM data to sorted BAM data. +usage: sam_to_bam.py [options] + --input1: SAM file to be converted + --dbkey: dbkey value + --ref_file: Reference file if choosing from history + --output1: output dataset in bam format + --index_dir: GALAXY_DATA_INDEX_DIR """ -import os, sys, tempfile +import optparse, os, sys, subprocess, tempfile, shutil, gzip from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from bx.cookbook import doc_optparse +from galaxy import util def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() -def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): - seq_file = "%s/sam_fa_indices.loc" % GALAXY_DATA_INDEX_DIR +def check_seq_file( dbkey, cached_seqs_pointer_file ): seq_path = '' - for line in open( seq_file ): + for line in open( cached_seqs_pointer_file ): line = line.rstrip( '\r\n' ) if line and not line.startswith( "#" ) and line.startswith( 'index' ): fields = line.split( '\t' ) @@ -38,48 +34,80 @@ def __main__(): #Parse Command Line - options, args = doc_optparse.parse( __doc__ ) - seq_path = check_seq_file( options.dbkey, options.index_dir ) + parser = optparse.OptionParser() + parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) + parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) + parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) + parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) + parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + ( options, args ) = parser.parse_args() + + cached_seqs_pointer_file = "%s/sam_fa_indices.loc" % options.index_dir + if not os.path.exists( cached_seqs_pointer_file ): + stop_err( "The required file (%s) does not exist." % cached_seqs_pointer_file ) + # If found for the dbkey, seq_path will look something like /depot/data2/galaxy/equCab2/sam_index/equCab2.fa, + # and the equCab2.fa file will contain fasta sequences. + seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) tmp_dir = tempfile.gettempdir() - os.chdir(tmp_dir) - tmpf1 = tempfile.NamedTemporaryFile(dir=tmp_dir) - tmpf1fai = '%s.fai' % tmpf1.name - tmpf2 = tempfile.NamedTemporaryFile(dir=tmp_dir) - tmpf3 = tempfile.NamedTemporaryFile(dir=tmp_dir) - tmpf3bam = '%s.bam' % tmpf3.name if options.ref_file == "None": - full_path = "%s.fai" % seq_path - if not os.path.exists( full_path ): - stop_err( "No sequences are available for '%s', request them by reporting this error." % options.dbkey ) - cmd1 = "cp %s %s; cp %s %s" % (seq_path, tmpf1.name, full_path, tmpf1fai) + # We're using locally cached reference sequences( e.g., /depot/data2/galaxy/equCab2/sam_index/equCab2.fa ). + # The indexes for /depot/data2/galaxy/equCab2/sam_index/equCab2.fa will be contained in + # a file named /depot/data2/galaxy/equCab2/sam_index/equCab2.fa.fai + fai_index_file_path = "%s.fai" % seq_path + if not os.path.exists( fai_index_file_path ): + stop_err( "No sequences are available for build (%s), request them by reporting this error." % options.dbkey ) else: - cmd1 = "cp %s %s; samtools faidx %s 2>/dev/null" % (options.ref_file, tmpf1.name, tmpf1.name) - cmd2 = "samtools view -bt %s -o %s %s 2>/dev/null" % (tmpf1fai, tmpf2.name, options.input1) - cmd3 = "samtools sort %s %s 2>/dev/null" % (tmpf2.name, tmpf3.name) - cmd4 = "cp %s %s" % (tmpf3bam, options.output1) - # either create index based on fa file or copy provided index to temp directory + try: + # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will: + # - index reference sequence in the FASTA format or extract subsequence from indexed reference sequence + # - if no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk + # - if regions are specified, the subsequences will be retrieved and printed to stdout in the FASTA format + # - the input file can be compressed in the RAZF format. + # IMPORTANT NOTE: a real weakness here is that we are creating indexes for the history dataset + # every time we run this tool. It would be nice if we could somehow keep track of user's specific + # index files so they could be re-used. + fai_index_file_path = os.path.join( tmp_dir, os.path.basename( options.ref_file ) ) + # At this point, fai_index_file_path will look something like /tmp/dataset_13.dat + os.symlink( options.ref_file, fai_index_file_path ) + command = "samtools faidx %s 2>/dev/null" % fai_index_file_path + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + except Exception, e: + stop_err( 'Error creating indexes from reference (%s), %s' % ( options.ref_file, str( e ) ) ) try: - os.system(cmd1) - except Exception, eq: - stop_err("Error creating the reference list index.\n" + str(eq)) - # create original bam file + # Extract all alignments from the input SAM file to BAM format ( since no region is specified, all the alignments will be extracted ). + tmp_aligns_file = tempfile.NamedTemporaryFile() + tmp_aligns_file_name = tmp_aligns_file.name + tmp_aligns_file.close() + # IMPORTANT NOTE: for some reason the samtools view command gzips the resulting bam file without warning, + # and the docs do not currently state that this occurs ( very bad ). + command = "samtools view -bt %s -o %s %s 2>/dev/null" % ( fai_index_file_path, tmp_aligns_file_name, options.input1 ) + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + except Exception, e: + stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) ) try: - os.system(cmd2) - except Exception, eq: - stop_err("Error running view command.\n" + str(eq)) - # sort original bam file to produce sorted output bam file - try: - os.system(cmd3) - os.system(cmd4) - except Exception, eq: - stop_err("Error sorting data and creating output file.\n" + str(eq)) - # cleanup temp files - tmpf1.close() - tmpf2.close() - tmpf3.close() - if os.path.exists(tmpf1fai): - os.remove(tmpf1fai) - if os.path.exists(tmpf3bam): - os.remove(tmpf3bam) + # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command + # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted + # into memory ( controlled by option -m ). + tmp_sorted_aligns_file = tempfile.NamedTemporaryFile() + tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name + tmp_sorted_aligns_file.close() + command = "samtools sort %s %s 2>/dev/null" % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name ) + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + except Exception, e: + stop_err( 'Error sorting alignments from (%s), %s' % ( tmp_aligns_file_name, str( e ) ) ) + # Move tmp_aligns_file_name to our output dataset location + sorted_bam_file = '%s.bam' % tmp_sorted_aligns_file_name + shutil.move( sorted_bam_file, options.output1 ) + if options.ref_file != "None": + # Remove the symlink from /tmp/dataset_13.dat to ~/database/files/000/dataset_13.dat + os.unlink( fai_index_file_path ) + # Remove the index file + index_file_name = '%s.fai' % fai_index_file_path + os.unlink( index_file_name ) + # Remove the tmp_aligns_file_name + os.unlink( tmp_aligns_file_name ) if __name__=="__main__": __main__() diff -r b01c8245ef74 -r 14214b45db3f tools/samtools/sam_to_bam.xml --- a/tools/samtools/sam_to_bam.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/samtools/sam_to_bam.xml Mon Dec 07 14:45:47 2009 -0500 @@ -1,32 +1,29 @@ <tool id="sam_to_bam" name="SAM-to-BAM" version="1.0.0"> <description>converts SAM format to BAM format</description> <command interpreter="python"> - sam_to_bam.py - --input1=$source.input1 - --dbkey=${input1.metadata.dbkey} - #if $source.indexSource == "history": - --ref_file=$ref_file - #else - --ref_file="None" - #end if - --output1=$output1 - --index_dir=${GALAXY_DATA_INDEX_DIR} +sam_to_bam.py --input1=$source.input1 --dbkey=${input1.metadata.dbkey} +#if $source.index_source == "history": +--ref_file=$source.ref_file +#else +--ref_file="None" +#end if +--output1=$output1 --index_dir=${GALAXY_DATA_INDEX_DIR} </command> <inputs> <conditional name="source"> - <param name="indexSource" type="select" label="Choose the source for the reference list"> - <option value="built_in">Built-in</option> + <param name="index_source" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> <option value="history">History</option> </param> - <when value="built_in"> + <when value="cached"> <param name="input1" type="data" format="sam" label="SAM File to Convert"> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> </param> </when> <when value="history"> - <param name="input1" type="data" format="sam" label="SAM File to Convert" /> - <param name="ref_file" type="data" format="fasta" label="Choose the reference file" /> + <param name="input1" type="data" format="sam" label="Convert SAM file" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file" /> </when> </conditional> </inputs> @@ -34,19 +31,16 @@ <data name="output1" format="bam"/> </outputs> <tests> + <!-- + # IMPORTANT NOTE: for some reason the samtools view command gzips the resulting bam file without warning, + # and the docs do not currently state that this occurs ( very bad ). + --> <test> - <param name="indexSource" value="history" /> - <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" /> - <param name="ref_file" value="chrM.fa" ftype="fasta" /> + <param name="index_source" value="history" /> + <param name="input1" value="3.sam" ftype="sam" /> + <param name="ref_file" value="chr_m.fasta" ftype="fasta" /> <output name="output1" file="sam_to_bam_out1.bam" /> </test> -<!-- chrM is not a built-in dbkey (in the builds.txt list) so can't be tested - <test> - <param name="indexSource" value="built_in" /> - <param name="input1" value="sam_to_bam_in2.sam" ftype="sam" dbkey="chrM" /> - <output name="output1" file="sam_to_bam_out2.bam" /> - </test> ---> </tests> <help> diff -r b01c8245ef74 -r 14214b45db3f tools/sr_mapping/bowtie_wrapper.xml --- a/tools/sr_mapping/bowtie_wrapper.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/sr_mapping/bowtie_wrapper.xml Mon Dec 07 14:45:47 2009 -0500 @@ -338,7 +338,7 @@ </test> <test> <param name="genomeSource" value="history" /> - <param name="ownFile" value="chrM.fa" /> + <param name="ownFile" value="chr_m.fasta" /> <param name="index_settings" value="index_pre_set" /> <param name="sPaired" value="paired" /> <param name="input1" ftype="fastqsanger" value="bowtie_in2.fastq" /> @@ -349,7 +349,7 @@ </test> <test> <param name="genomeSource" value="history" /> - <param name="ownFile" value="chrM.fa" /> + <param name="ownFile" value="chr_m.fasta" /> <param name="index_settings" value="index_full" /> <param name="auto_b" value="set" /> <param name="packed" value="unpacked" /> diff -r b01c8245ef74 -r 14214b45db3f tools/sr_mapping/lastz_wrapper.xml --- a/tools/sr_mapping/lastz_wrapper.xml Mon Dec 07 14:45:09 2009 -0500 +++ b/tools/sr_mapping/lastz_wrapper.xml Mon Dec 07 14:45:47 2009 -0500 @@ -125,10 +125,13 @@ <requirement type="binary">lastz</requirement> </requirements> <tests> - <test> <!-- Lastz command: lastz phiX.2bit/PHIX174[nickname=Ref] test-data/b1.fasta +nogfextend +nochain +gapped +strand=both +seed=12of19 +transition O=400 E=30 X=910 Y=9370 K=3000 L=3000 +noentropy +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=sam- > lastz_wrapper_out2.sam - You need to point to phiX.2bit somewhere on your system. b1.fasta is located in galaxy's test-data - You will have to replace all the pluses before the commands with 2 dashes, - as double-dash can't appear in an XML comment --> + <test> + <!-- + Lastz command: + lastz phiX.2bit/PHIX174[nickname=Ref] test-data/b1.fasta +nogfextend +nochain +gapped +strand=both +seed=12of19 +transition O=400 E=30 X=910 Y=9370 K=3000 L=3000 +noentropy +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=sam- > lastz_wrapper_out2.sam + You need to point to phiX.2bit somewhere on your system. b1.fasta is located in galaxy's test-data. You will have to replace all the pluses before the + commands with 2 dashes, as double-dash can't appear in an XML comment. + --> <param name="input2" value="b1.fasta" ftype="fasta" /> <param name="ref_source" value="cached" /> <param name="input1_2bit" value="phiX" /> @@ -156,10 +159,13 @@ <param name="num_threads" value="4" /> <output name="output1" file="lastz_wrapper_out2.sam" /> </test> - <test> <!-- Lastz command: lastz test-data/phiX.fasta test-data/b1.fasta[fullnames] +yasra95short +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=diffs > lastz_wrapper_out3.tabular - phiX.fasta and b1.fasta are located in galaxy's test-data - You will have to replace all the pluses before the commands with 2 dashes, - as double-dash can't appear in an XML comment --> + <test> + <!-- + Lastz command: + lastz test-data/phiX.fasta test-data/b1.fasta[fullnames] +yasra95short +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=diffs > lastz_wrapper_out3.tabular + phiX.fasta and b1.fasta are located in galaxy's test-data. You will have to replace all the pluses before the commands with 2 dashes, + as double-dash can't appear in an XML comment. + --> <param name="input2" value="b1.fasta" ftype="fasta" /> <param name="ref_source" value="history" /> <param name="input1" value="phiX.fasta" ftype="fasta" /> @@ -173,14 +179,7 @@ <param name="num_threads" value="4" /> <output name="output1" file="lastz_wrapper_out3.tabular" /> </test> - <test> <!-- Lastz command: first you will need to split the file phiX_split.fasta into two files, - phiX1.fasta and phiX2.fasta, each with 1 sequence (phiX1 and phiX2, respectively). Then: - lastz phiX1.fasta test-data/b1.fasta *yasra95short *ambiguousn *nolaj *identity=0..100 *coverage=0 *format=general:score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle > lastz_wrapper_out4.tabular - lastz phiX2.fasta test-data/b1.fasta *yasra95short *ambiguousn *nolaj *identity=0..100 *coverage=0 *format=general:score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle >> lastz_wrapper_out4.tabular - You need to point to phiX1.fasta and phiX2.fasta somewhere on your system. - phiX_split.fasta and b1.fasta are located in galaxy's test-data - You will have to replace all the asterisks before the commands with 2 dashes, - as double-dash can't appear in an XML comment --> + <test> <param name="input2" value="b1.fasta" ftype="fasta" /> <param name="ref_source" value="history" /> <param name="input1" value="phiX_split.fasta" ftype="fasta" />