# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Dan Blankenberg <dan@bx.psu.edu> # Date 1286485181 14400 # Node ID d85159be56e854731822a314a2e982e0c6987389 # Parent 5038f0863ab9a8286fd97c718c63945bfd442dd0 Add basic support for bowtie indexes as a datatype (bowtie_base_index, bowtie_color_index), available via datatype conversion. Currently, the indexes need to be converted manually from the FASTA file before use in bowtie, but they can be reused. More work is required to allow the one-off indexes built by bowtie to become Galaxy datasets; alternatively, the custom genome selection could be limited to the index datatype for input (and not allow fasta directly), which would allow implicit datatype conversion to occur when a fasta file is selected as input, but this would prevent the index tuning that is currently available when currently selecting a fasta file. --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -45,10 +45,14 @@ <datatype extension="csv" type="galaxy.datatypes.tabular:Tabular"/><!-- End MSI added Datatypes --><datatype extension="customtrack" type="galaxy.datatypes.interval:CustomTrack"/> + <datatype extension="bowtie_color_index" type="galaxy.datatypes.ngsindex:BowtieColorIndex" mimetype="text/html" display_in_upload="False"/> + <datatype extension="bowtie_base_index" type="galaxy.datatypes.ngsindex:BowtieBaseIndex" mimetype="text/html" display_in_upload="False"/><datatype extension="csfasta" type="galaxy.datatypes.sequence:csFasta" display_in_upload="true"/><datatype extension="data" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream" max_optional_metadata_filesize="1048576" /><datatype extension="fasta" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true"><converter file="fasta_to_tabular_converter.xml" target_datatype="tabular"/> + <converter file="fasta_to_bowtie_base_index_converter.xml" target_datatype="bowtie_base_index"/> + <converter file="fasta_to_bowtie_color_index_converter.xml" target_datatype="bowtie_color_index"/></datatype><datatype extension="fastq" type="galaxy.datatypes.sequence:Fastq" display_in_upload="true"/><datatype extension="fastqsanger" type="galaxy.datatypes.sequence:FastqSanger" display_in_upload="true"/> --- a/tools/sr_mapping/bowtie_wrapper.xml +++ b/tools/sr_mapping/bowtie_wrapper.xml @@ -12,6 +12,10 @@ --snpfrac="None" --keepends="None" #if $refGenomeSource.genomeSource == "history": + #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ): + --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}" + --do_not_build_index + #else: --ref=$refGenomeSource.ownFile --indexSettings=$refGenomeSource.indexParams.indexSettings #if $refGenomeSource.indexParams.indexSettings == "indexFull": @@ -50,6 +54,7 @@ --iseed="None" --icutoff="None" #end if + #end if #else: --ref=$refGenomeSource.index --indexSettings="None" @@ -203,9 +208,9 @@ </param></when><when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + <param name="ownFile" type="data" format="bowtie_base_index,fasta" metadata_name="dbkey" label="Select the reference genome" /><conditional name="indexParams"> - <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own"> + <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own" help="These settings are ignored when using a prebuilt index"><option value="indexPreSet">Default</option><option value="indexFull">Set your own</option></param> --- /dev/null +++ b/lib/galaxy/datatypes/converters/fasta_to_bowtie_base_index_converter.xml @@ -0,0 +1,18 @@ +<tool id="CONVERTER_fasta_to_bowtie_base_index" name="Convert FASTA to Bowtie base space Index" version="1.0.0"> + <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <!-- Used on the metadata edit page. --> + <command> + mkdir ${output.files_path} + && bowtie-build --quiet + -f + $input ${output.files_path}/${output.metadata.base_name} + </command> + <inputs> + <param name="input" type="data" format="fasta" label="Fasta file"/> + </inputs> + <outputs> + <data name="output" format="bowtie_base_index"/> + </outputs> + <help> + </help> +</tool> --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -3,7 +3,7 @@ Provides mapping between extensions and """ import os, tempfile import logging -import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly +import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex import galaxy.util from galaxy.util.odict import odict from display_applications.application import DisplayApplication --- a/tools/sr_mapping/bowtie_wrapper.py +++ b/tools/sr_mapping/bowtie_wrapper.py @@ -56,6 +56,7 @@ usage: bowtie_wrapper.py [options] -c, --icutoff=c: Number of first bases of the reference sequence to index -x, --indexSettings=x: Whether or not indexing options are to be set -H, --suppressHeader=H: Suppress header + --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' """ import optparse, os, shutil, subprocess, sys, tempfile @@ -118,6 +119,7 @@ def __main__(): parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' ) parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) + parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) (options, args) = parser.parse_args() stdout = '' @@ -129,7 +131,7 @@ def __main__(): else: colorspace = '' # index if necessary - if options.genomeSource == 'history': + if options.genomeSource == 'history' and not options.do_not_build_index: # set up commands if options.index_settings =='indexPreSet': indexing_cmds = '%s' % colorspace --- a/tools/sr_mapping/bowtie_color_wrapper.xml +++ b/tools/sr_mapping/bowtie_color_wrapper.xml @@ -9,6 +9,10 @@ --suppressHeader=$suppressHeader --genomeSource=$refGenomeSource.genomeSource #if $refGenomeSource.genomeSource == "history": + #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ): + --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}" + --do_not_build_index + #else: --ref=$refGenomeSource.ownFile --indexSettings=$refGenomeSource.indexParams.indexSettings #if $refGenomeSource.indexParams.indexSettings == "indexFull": @@ -47,6 +51,7 @@ --iseed="None" --icutoff="None" #end if + #end if #else: --ref=$refGenomeSource.index --indexSettings="None" @@ -206,9 +211,9 @@ </param></when><when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + <param name="ownFile" type="data" format="bowtie_color_index,fasta" metadata_name="dbkey" label="Select the reference genome" /><conditional name="indexParams"> - <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own"> + <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own" help="These settings are ignored when using a prebuilt index"><option value="indexPreSet">Default</option><option value="indexFull">Set your own</option></param> --- /dev/null +++ b/lib/galaxy/datatypes/ngsindex.py @@ -0,0 +1,75 @@ +""" +NGS indexes +""" +import logging +from metadata import MetadataElement +from images import Html + +log = logging.getLogger(__name__) + +class BowtieIndex( Html ): + """ + base class for BowtieIndex + is subclassed by BowtieColorIndex and BowtieBaseIndex + """ + MetadataElement( name="base_name", desc="base name for this index set", default='galaxy_generated_bowtie_index', set_in_upload=True, readonly=True ) + MetadataElement( name="sequence_space", desc="sequence_space for this index set", default='unknown', set_in_upload=True, readonly=True ) + + file_ext = 'bowtie_index' + is_binary = True + composite_type = 'auto_primary_file' + allow_datatype_change = False + + def generate_primary_file( self, dataset = None ): + """ + This is called only at upload to write the html file + cannot rename the datasets here - they come with the default unfortunately + """ + return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' + + def regenerate_primary_file(self,dataset): + """ + cannot do this until we are setting metadata + """ + bn = dataset.metadata.base_name + flist = os.listdir(dataset.extra_files_path) + rval = ['<html><head><title>Files for Composite Dataset %s</title></head><p/>Comprises the following files:<p/><ul>' % (bn)] + for i,fname in enumerate(flist): + sfname = os.path.split(fname)[-1] + rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) ) + rval.append( '</ul></html>' ) + f = file(dataset.file_name,'w') + f.write("\n".join( rval )) + f.write('\n') + f.close() + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Bowtie index file (%s)" % ( dataset.metadata.sequence_space ) + dataset.blurb = "%s space" % ( dataset.metadata.sequence_space ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Bowtie index file" + def sniff( self, filename ): + return False + +class BowtieColorIndex( BowtieIndex ): + """ + Bowtie color space index + """ + MetadataElement( name="sequence_space", desc="sequence_space for this index set", default='color', set_in_upload=True, readonly=True ) + + file_ext = 'bowtie_color_index' + +class BowtieBaseIndex( BowtieIndex ): + """ + Bowtie base space index + """ + MetadataElement( name="sequence_space", desc="sequence_space for this index set", default='base', set_in_upload=True, readonly=True ) + + file_ext = 'bowtie_base_index' --- /dev/null +++ b/lib/galaxy/datatypes/converters/fasta_to_bowtie_color_index_converter.xml @@ -0,0 +1,19 @@ +<tool id="CONVERTER_fasta_to_bowtie_color_index" name="Convert FASTA to Bowtie color space Index" version="1.0.0"> + <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <!-- Used on the metadata edit page. --> + <command> + mkdir ${output.files_path} + && bowtie-build --quiet + --color + -f + $input ${output.files_path}/${output.metadata.base_name} + </command> + <inputs> + <param name="input" type="data" format="fasta" label="Fasta file"/> + </inputs> + <outputs> + <data name="output" format="bowtie_color_index"/> + </outputs> + <help> + </help> +</tool>