commit/galaxy-central: jgoecks: Skeleton wrapper for Bowtie2.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/51fccbc2e9bc/ changeset: 51fccbc2e9bc user: jgoecks date: 2012-04-26 21:28:17 summary: Skeleton wrapper for Bowtie2. affected #: 5 files diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tool-data/bowtie2_indices.loc.sample --- /dev/null +++ b/tool-data/bowtie2_indices.loc.sample @@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie2 indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie2_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id><dbkey><display_name><file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie2/hg18/, +#then the bowtie2_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie2/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie2/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie2_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie2/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie2/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie2/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -336,6 +336,7 @@ <tool file="sr_mapping/lastz_wrapper.xml" /><tool file="sr_mapping/lastz_paired_reads_wrapper.xml" /><tool file="sr_mapping/bowtie_wrapper.xml" /> + <tool file="sr_mapping/bowtie2_wrapper.xml" /><tool file="sr_mapping/bowtie_color_wrapper.xml" /><tool file="sr_mapping/bwa_wrapper.xml" /><tool file="sr_mapping/bwa_color_wrapper.xml" /> diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample +++ b/tool_data_table_conf.xml.sample @@ -25,6 +25,11 @@ <columns>value, dbkey, name, path</columns><file path="tool-data/bowtie_indices.loc" /></table> + <!-- Locations of indexes in the Bowtie2 mapper format --> + <table name="bowtie2_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie2_indices.loc" /> + </table><!-- Locations of indexes in the Bowtie color-space mapper format --><table name="bowtie_indexes_color" comment_char="#"><columns>value, dbkey, name, path</columns> diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tools/sr_mapping/bowtie2_wrapper.py --- /dev/null +++ b/tools/sr_mapping/bowtie2_wrapper.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' ) + parser.add_option( '', '--own-file', dest='own_file', help='' ) + parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' ) + + # Wrapper options. + parser.add_option( '-O', '--output', dest='output' ) + parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '', '--single-paired', dest='single_paired', help='' ) + parser.add_option( '', '--settings', dest='settings', help='' ) + + (options, args) = parser.parse_args() + + # Creat bowtie index if necessary. + tmp_index_dir = tempfile.mkdtemp() + if options.own_file: + index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) ) + try: + os.link( options.own_file, index_path + '.fa' ) + except: + # Bowtie prefers (but doesn't require) fasta file to be in same directory, with .fa extension + pass + cmd_index = 'bowtie2-build -f %s %s' % ( options.own_file, index_path ) + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + else: + index_path = options.index_path + + # Build bowtie command. + cmd = 'bowtie2 %s -x %s %s -S %s' + + # Set up reads. + if options.single_paired == 'paired': + reads = " -1 %s -2 %s" % ( options.input1, options.input2 ) + else: + reads = " -U %s" % ( options.input1 ) + + # Set up options. + opts = '-p %s' % ( options.num_threads ) + if options.settings == 'preSet': + pass + else: + pass + + # Final command: + cmd = cmd % ( opts, index_path, reads, options.output ) + print cmd + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in bowtie2:\n' + str( e ) ) + + # Clean up temp dirs + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + +if __name__=="__main__": __main__() diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tools/sr_mapping/bowtie2_wrapper.xml --- /dev/null +++ b/tools/sr_mapping/bowtie2_wrapper.xml @@ -0,0 +1,109 @@ +<tool id="bowtie2" name="Bowtie2" version="0.1"> + <!-- Wrapper compatible with Bowtie version 2.0.0 --> + <description>is a short-read mapper</description> + <version_command>bowtie2 --version</version_command> + <requirements> + <requirement type="package">bowtie2</requirement> + </requirements> + <command interpreter="python"> + bowtie2_wrapper.py + + ## Change this to accommodate the number of threads you have available. + --num-threads="4" + + ## Outputs. + --output=$output + + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile + #else: + --indexes-path="${refGenomeSource.index.fields.path}" + #end if + + ## Are reads single-end or paired? + --single-paired=$singlePaired.sPaired + + ## First input file always required. + --input1=$input1 + + ## Second input only if input is paired-end. + #if $singlePaired.sPaired == "paired" + --input2=$singlePaired.input2 + #end if + + ## Set params. + --settings=$params.settingsType + </command> + <inputs> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> + </when> + <when value="paired"> + <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <param format="fastqsanger" name="input2" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <!-- TODO: paired-end specific parameters. --> + </when> + </conditional> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_data_table="bowtie2_indexes"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when><!-- history --> + </conditional><!-- refGenomeSource --> + <conditional name="params"> + <param name="settingsType" type="select" label="Bowtie settings to use" help="You can use the default settings or set custom values for any of Bowtie's parameters."> + <option value="preSet">Use Defaults</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <!-- Full/advanced params. --> + <when value="full"> + </when><!-- full --> + </conditional><!-- params --> + </inputs> + + <outputs> + <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + </outputs> + + <tests> + </tests> + + <help> + </help> +</tool> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket