galaxy-dist commit ad83deba6458: Adding blastx and tblastx wrappers, tidying

20 Nov 2010

# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User peterjc <p.j.a.cock@googlemail.com>
# Date 1285688573 -3600
# Node ID ad83deba645811d8b9509a6e3c58e986ffa89e3b
# Parent  92bb0d0d7e9058bc4b6b43eaa015349241bc13e0
Adding blastx and tblastx wrappers, tidying

--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
@@ -12,8 +12,8 @@
        -num_threads 8
     </command><inputs>
-        <param name="query" type="data" format="fasta" label="Query sequence(s)"/> 
-        <param name="database" type="select" display="radio" label="Nucelotide BLAST database">
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <param name="database" type="select" display="radio" label="Nucleotide BLAST database"><options from_file="blastdb.loc"><column name="name" index="0"/><column name="value" index="1"/>
@@ -26,13 +26,6 @@
             <option value="dc-megablast">dc-megablast</option><option value="vecscreen">vecscreen</option></param>
-        <!-- TODO - integer, min 4, what default?
-        <param name="word_size" type="integer" label="using word size" help="Size of best perfect match">
-            <option value="28">28</option>
-            <option value="16">16</option>
-        </param>
-        <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity" help="no cutoff if 0" />
-        --><param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /><param name="out_format" type="select" label="Output format"><option value="6">Tabular</option>
@@ -89,19 +82,22 @@
     
 .. class:: warningmark
 
-**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing.  
+**Note**. Database searches may take substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
 
 -----
 
 **What it does**
 
-This tool runs NCBI BLAST+ blastn tool (which include *megablast*).
+Search a *nucleotide database* using a *nucleotide query*,
+using the NCBI BLAST+ blastn command line tool.
+Algorithms include blastn, megablast, and discontiguous megablast.
 
 -----
 
 **Output format**
 
-The default Output of this tool is tabular, containing 12 columns:
+The default output of this tool is tabular, containing 12 columns:
 
 1. Id of your sequence 
 2. GI of the database hit 

--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
@@ -11,7 +11,7 @@
        -num_threads 8
     </command><inputs>
-        <param name="query" type="data" format="fasta" label="Query sequence(s)"/> 
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/><param name="database" type="select" display="radio" label="Nucelotide BLAST database"><options from_file="blastdb.loc"><column name="name" index="0"/>
@@ -60,19 +60,21 @@
     
 .. class:: warningmark
 
-**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing.  
+**Note**. Database searches may take substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
 
 -----
 
 **What it does**
 
-This tool runs NCBI BLAST+ tblastn tool.
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastn command line tool.
 
 -----
 
 **Output format**
 
-The default Output of this tool is tabular, containing 12 columns:
+The default output of this tool is tabular, containing 12 columns:
 
 1. Id of your sequence 
 2. GI of the database hit 

--- a/tool_conf.xml.sample
+++ b/tool_conf.xml.sample
@@ -264,8 +264,10 @@
   --><section name="NCBI BLAST+" id="ncbi_blast_plus_tools"><tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" />
+   <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" />
+   <tool file="ncbi_blast_plus/ncbi_blastx_wrapper.xml" /><tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" />
-   <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" />
+   <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" /><tool file="ncbi_blast_plus/blast_filter_fasta.xml" /></section><section name="NGS: Mapping" id="solexa_tools">

--- /dev/null
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
@@ -0,0 +1,112 @@
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.1">
+    <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
+    <command>
+      tblastx
+       -query "$query"
+       -db "$database"
+       -evalue $evalue_cutoff
+       -seg $adv_opts.filter_query
+       -out $output1
+       -outfmt $out_format
+       -num_threads 8
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <param name="database" type="select" display="radio" label="Nucelotide BLAST database">
+            <options from_file="blastdb.loc">
+              <column name="name" index="0"/>
+              <column name="value" index="1"/>
+            </options>
+        </param>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6">Tabular</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <!--
+            <option value="11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic">
+                <param name="filter_query" type="hidden" value="yes" />
+            </when>
+            <when value="advanced">
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="yes" falsevalue="no" checked="true" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <!-- TODO, can I get the caption rather than the value? e.g. 'NT' rather than a long path? -->
+        <data name="output1" format="tabular" label="tblastx on ${database.value_label}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+            </change_format>
+            <change_format>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastn</requirement>
+    </requirements>
+    <tests>
+        <test>
+            <param name="input_query" value="megablast_wrapper_test1.fa" ftype="fasta"/>
+            <!-- database needs to match the entry in the blastdb.loc file (first column), which includes the last update date if appropriate --> 
+            <param name="database" value="phiX" />
+            <param name="blast_type" value="megablast" />
+            <!--
+            <param name="word_size" value="28" />
+            <param name="iden_cutoff" value="99.0" />
+            -->
+            <param name="evalue_cutoff" value="10.0" />
+            <param name="filter_query" value="yes" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="megablast_wrapper_test1.out"/> 
+        </test>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastx command line tool.
+
+-----
+
+**Output format**
+
+The default output of this tool is tabular, containing 12 columns:
+
+1. Id of your sequence 
+2. GI of the database hit 
+3. % identity
+4. Alignment length
+5. # mismatches
+6. # gaps
+7. Start position in your sequence
+8. End position in your sequence
+9. Start position in database hit
+10. End position in database hit
+11. E-value
+12. Bit score
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>

--- /dev/null
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
@@ -0,0 +1,99 @@
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.1">
+    <description>Search protein database with translated nucleotide query sequence(s)</description>
+    <command>
+      blastx
+       -query "$query"
+       -db "$database"
+       -evalue $evalue_cutoff
+       -seg $adv_opts.filter_query
+       -out $output1
+       -outfmt $out_format
+       -num_threads 8
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <param name="database" type="select" display="radio" label="Protein BLAST database">
+            <options from_file="blastdb_p.loc">
+              <column name="name" index="0"/>
+              <column name="value" index="1"/>
+            </options>
+        </param>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6">Tabular</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <!--
+            <option value="11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic">
+                <param name="filter_query" type="hidden" value="yes" />
+            </when>
+            <when value="advanced">
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="yes" falsevalue="no" checked="true" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <!-- TODO, can I get the caption rather than the value? e.g. 'NR' rather than a long path? -->
+        <data name="output1" format="tabular" label="blastx on ${database.value_label}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+            </change_format>
+            <change_format>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastn</requirement>
+    </requirements>
+    <tests>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *translated nucleotide query*,
+using the NCBI BLAST+ blastx command line tool.
+
+-----
+
+**Output format**
+
+The default output of this tool is tabular, containing 12 columns:
+
+1. Id of your sequence 
+2. GI of the database hit 
+3. % identity
+4. Alignment length
+5. # mismatches
+6. # gaps
+7. Start position in your sequence
+8. End position in your sequence
+9. Start position in database hit
+10. End position in database hit
+11. E-value
+12. Bit score
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>

--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
@@ -12,7 +12,7 @@
        -num_threads 8
     </command><inputs>
-        <param name="query" type="data" format="fasta" label="Query sequence(s)"/> 
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/><param name="database" type="select" display="radio" label="Protein BLAST database"><options from_file="blastdb_p.loc"><column name="name" index="0"/>
@@ -23,13 +23,6 @@
             <option value="blastp">blastp</option><option value="blastp-short">blastp-short</option></param>
-        <!-- TODO - integer, min 4, what default?
-        <param name="word_size" type="integer" label="using word size" help="Size of best perfect match">
-            <option value="28">28</option>
-            <option value="16">16</option>
-        </param>
-        <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity" help="no cutoff if 0" />
-        --><param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /><param name="out_format" type="select" label="Output format"><option value="6">Tabular</option>
@@ -72,19 +65,21 @@
     
 .. class:: warningmark
 
-**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing.  
+**Note**. Database searches may take substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
 
 -----
 
 **What it does**
 
-This runs the command line NCBI BLAST+ blastp tool.
+Search a *protein database* using a *protein query*,
+using the NCBI BLAST+ blastp command line tool.
 
 -----
 
 **Output format**
 
-The default Output of this tool is tabular, containing 12 columns:
+The default output of this tool is tabular, containing 12 columns:
 
 1. Id of your sequence 
 2. GI of the database hit

    

commits-noreply＠bitbucket.org

tags

participants (1)