commit/galaxy-central: jgoecks: Skeleton wrapper for Bowtie2.

26 Apr 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/51fccbc2e9bc/
changeset:   51fccbc2e9bc
user:        jgoecks
date:        2012-04-26 21:28:17
summary:     Skeleton wrapper for Bowtie2.
affected #:  5 files

diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tool-data/bowtie2_indices.loc.sample

--- /dev/null
+++ b/tool-data/bowtie2_indices.loc.sample
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie2 indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie2_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id><dbkey><display_name><file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie2/hg18/,
+#then the bowtie2_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie2/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie2/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie2_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie2/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie2/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie2/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#


diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tool_conf.xml.sample
--- a/tool_conf.xml.sample
+++ b/tool_conf.xml.sample
@@ -336,6 +336,7 @@
     <tool file="sr_mapping/lastz_wrapper.xml" /><tool file="sr_mapping/lastz_paired_reads_wrapper.xml" /><tool file="sr_mapping/bowtie_wrapper.xml" />
+    <tool file="sr_mapping/bowtie2_wrapper.xml" /><tool file="sr_mapping/bowtie_color_wrapper.xml" /><tool file="sr_mapping/bwa_wrapper.xml" /><tool file="sr_mapping/bwa_color_wrapper.xml" />


diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample
+++ b/tool_data_table_conf.xml.sample
@@ -25,6 +25,11 @@
         <columns>value, dbkey, name, path</columns><file path="tool-data/bowtie_indices.loc" /></table>
+    <!-- Locations of indexes in the Bowtie2 mapper format -->
+    <table name="bowtie2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table><!-- Locations of indexes in the Bowtie color-space mapper format --><table name="bowtie_indexes_color" comment_char="#"><columns>value, dbkey, name, path</columns>


diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tools/sr_mapping/bowtie2_wrapper.py
--- /dev/null
+++ b/tools/sr_mapping/bowtie2_wrapper.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+import optparse, os, shutil, subprocess, sys, tempfile, fileinput
+
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
+    parser.add_option( '', '--own-file', dest='own_file', help='' )
+    parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )
+
+    # Wrapper options.
+    parser.add_option( '-O', '--output', dest='output' )
+    parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
+    parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
+    parser.add_option( '', '--single-paired', dest='single_paired', help='' )
+    parser.add_option( '', '--settings', dest='settings', help='' )
+
+    (options, args) = parser.parse_args()
+
+    # Creat bowtie index if necessary.
+    tmp_index_dir = tempfile.mkdtemp()
+    if options.own_file:
+        index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) )
+        try:
+            os.link( options.own_file, index_path + '.fa' )
+        except:
+            # Bowtie prefers (but doesn't require) fasta file to be in same directory, with .fa extension
+            pass
+        cmd_index = 'bowtie2-build -f %s %s' % ( options.own_file, index_path )
+        try:
+            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
+            tmp_stderr = open( tmp, 'wb' )
+            proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
+            returncode = proc.wait()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+        except Exception, e:
+            if os.path.exists( tmp_index_dir ):
+                shutil.rmtree( tmp_index_dir )
+            stop_err( 'Error indexing reference sequence\n' + str( e ) )
+    else:
+        index_path = options.index_path
+
+    # Build bowtie command.
+    cmd = 'bowtie2 %s -x %s %s -S %s'
+    
+    # Set up reads.
+    if options.single_paired == 'paired':
+        reads = " -1 %s -2 %s" % ( options.input1, options.input2 )
+    else:
+        reads = " -U %s" % ( options.input1 )
+    
+    # Set up options.
+    opts = '-p %s' % ( options.num_threads )
+    if options.settings == 'preSet':
+        pass
+    else:
+        pass
+        
+    # Final command:
+    cmd = cmd % ( opts, index_path, reads, options.output )
+    print cmd
+
+    # Run
+    try:
+        tmp_out = tempfile.NamedTemporaryFile().name
+        tmp_stdout = open( tmp_out, 'wb' )
+        tmp_err = tempfile.NamedTemporaryFile().name
+        tmp_stderr = open( tmp_err, 'wb' )
+        proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
+        returncode = proc.wait()
+        tmp_stderr.close()
+        # get stderr, allowing for case where it's very large
+        tmp_stderr = open( tmp_err, 'rb' )
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += tmp_stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        tmp_stdout.close()
+        tmp_stderr.close()
+        if returncode != 0:
+            raise Exception, stderr
+            
+        # TODO: look for errors in program output.
+    except Exception, e:
+        stop_err( 'Error in bowtie2:\n' + str( e ) ) 
+
+    # Clean up temp dirs
+    if os.path.exists( tmp_index_dir ):
+        shutil.rmtree( tmp_index_dir )
+
+if __name__=="__main__": __main__()


diff -r 1c8eb226af94b29ef8484c4fbed557282170259e -r 51fccbc2e9bc401c83b52447b513e201fe776ba2 tools/sr_mapping/bowtie2_wrapper.xml
--- /dev/null
+++ b/tools/sr_mapping/bowtie2_wrapper.xml
@@ -0,0 +1,109 @@
+<tool id="bowtie2" name="Bowtie2" version="0.1">
+    <!-- Wrapper compatible with Bowtie version 2.0.0 -->
+    <description>is a short-read mapper</description>
+    <version_command>bowtie2 --version</version_command>
+    <requirements>
+        <requirement type="package">bowtie2</requirement>
+    </requirements>
+    <command interpreter="python">
+        bowtie2_wrapper.py
+        
+        ## Change this to accommodate the number of threads you have available.
+        --num-threads="4"
+
+        ## Outputs.
+        --output=$output
+
+        ## Handle reference file.
+        #if $refGenomeSource.genomeSource == "history":
+            --own-file=$refGenomeSource.ownFile
+        #else:
+            --indexes-path="${refGenomeSource.index.fields.path}"
+        #end if
+
+        ## Are reads single-end or paired?
+        --single-paired=$singlePaired.sPaired
+
+        ## First input file always required.
+        --input1=$input1
+        
+        ## Second input only if input is paired-end.
+        #if $singlePaired.sPaired == "paired"
+            --input2=$singlePaired.input2
+        #end if
+
+        ## Set params.
+        --settings=$params.settingsType
+    </command>
+    <inputs>
+        <conditional name="singlePaired">
+            <param name="sPaired" type="select" label="Is this library mate-paired?">
+              <option value="single">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
+            </when>
+            <when value="paired">
+                <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <param format="fastqsanger" name="input2" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <!-- TODO: paired-end specific parameters. -->
+            </when>
+        </conditional>
+        <conditional name="refGenomeSource">
+          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie2_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when><!-- history -->
+        </conditional><!-- refGenomeSource -->
+        <conditional name="params">
+            <param name="settingsType" type="select" label="Bowtie settings to use" help="You can use the default settings or set custom values for any of Bowtie's parameters.">
+              <option value="preSet">Use Defaults</option>
+              <option value="full">Full parameter list</option>
+            </param>
+            <when value="preSet" />
+            <!-- Full/advanced params. -->
+            <when value="full">
+            </when><!-- full -->
+      </conditional><!-- params -->
+    </inputs>
+
+    <outputs>
+        <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
+          <actions>
+            <conditional name="refGenomeSource.genomeSource">
+              <when value="indexed">
+                <action type="metadata" name="dbkey">
+                  <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0">
+                    <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                    <filter type="param_value" ref="refGenomeSource.index" column="0"/>
+                  </option>
+                </action>
+              </when>
+              <when value="history">
+                <action type="metadata" name="dbkey">
+                  <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
+                </action>
+              </when>
+            </conditional>
+          </actions>
+        </data>
+    </outputs>
+
+    <tests>
+    </tests>
+
+    <help>
+    </help>
+</tool>

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

Bitbucket

tags

participants (1)