# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User peterjc <p.j.a.cock@googlemail.com> # Date 1285688573 -3600 # Node ID ad83deba645811d8b9509a6e3c58e986ffa89e3b # Parent 92bb0d0d7e9058bc4b6b43eaa015349241bc13e0 Adding blastx and tblastx wrappers, tidying --- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml @@ -12,8 +12,8 @@ -num_threads 8 </command><inputs> - <param name="query" type="data" format="fasta" label="Query sequence(s)"/> - <param name="database" type="select" display="radio" label="Nucelotide BLAST database"> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + <param name="database" type="select" display="radio" label="Nucleotide BLAST database"><options from_file="blastdb.loc"><column name="name" index="0"/><column name="value" index="1"/> @@ -26,13 +26,6 @@ <option value="dc-megablast">dc-megablast</option><option value="vecscreen">vecscreen</option></param> - <!-- TODO - integer, min 4, what default? - <param name="word_size" type="integer" label="using word size" help="Size of best perfect match"> - <option value="28">28</option> - <option value="16">16</option> - </param> - <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity" help="no cutoff if 0" /> - --><param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /><param name="out_format" type="select" label="Output format"><option value="6">Tabular</option> @@ -89,19 +82,22 @@ .. class:: warningmark -**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing. +**Note**. Database searches may take substantial amount of time. +For large input datasets it is advisable to allow overnight processing. ----- **What it does** -This tool runs NCBI BLAST+ blastn tool (which include *megablast*). +Search a *nucleotide database* using a *nucleotide query*, +using the NCBI BLAST+ blastn command line tool. +Algorithms include blastn, megablast, and discontiguous megablast. ----- **Output format** -The default Output of this tool is tabular, containing 12 columns: +The default output of this tool is tabular, containing 12 columns: 1. Id of your sequence 2. GI of the database hit --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml @@ -11,7 +11,7 @@ -num_threads 8 </command><inputs> - <param name="query" type="data" format="fasta" label="Query sequence(s)"/> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/><param name="database" type="select" display="radio" label="Nucelotide BLAST database"><options from_file="blastdb.loc"><column name="name" index="0"/> @@ -60,19 +60,21 @@ .. class:: warningmark -**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing. +**Note**. Database searches may take substantial amount of time. +For large input datasets it is advisable to allow overnight processing. ----- **What it does** -This tool runs NCBI BLAST+ tblastn tool. +Search a *translated nucleotide database* using a *protein query*, +using the NCBI BLAST+ tblastn command line tool. ----- **Output format** -The default Output of this tool is tabular, containing 12 columns: +The default output of this tool is tabular, containing 12 columns: 1. Id of your sequence 2. GI of the database hit --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -264,8 +264,10 @@ --><section name="NCBI BLAST+" id="ncbi_blast_plus_tools"><tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" /> + <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" /> + <tool file="ncbi_blast_plus/ncbi_blastx_wrapper.xml" /><tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" /> - <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" /> + <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" /><tool file="ncbi_blast_plus/blast_filter_fasta.xml" /></section><section name="NGS: Mapping" id="solexa_tools"> --- /dev/null +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml @@ -0,0 +1,112 @@ +<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.1"> + <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> + <command> + tblastx + -query "$query" + -db "$database" + -evalue $evalue_cutoff + -seg $adv_opts.filter_query + -out $output1 + -outfmt $out_format + -num_threads 8 + </command> + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + <param name="database" type="select" display="radio" label="Nucelotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /> + <param name="out_format" type="select" label="Output format"> + <option value="6">Tabular</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <!-- + <option value="11">BLAST archive format (ASN.1)</option> + --> + </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic"> + <param name="filter_query" type="hidden" value="yes" /> + </when> + <when value="advanced"> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="yes" falsevalue="no" checked="true" /> + </when> + </conditional> + </inputs> + <outputs> + <!-- TODO, can I get the caption rather than the value? e.g. 'NT' rather than a long path? --> + <data name="output1" format="tabular" label="tblastx on ${database.value_label}"> + <change_format> + <when input="out_format" value="0" format="txt"/> + </change_format> + <change_format> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + </data> + </outputs> + <requirements> + <requirement type="binary">blastn</requirement> + </requirements> + <tests> + <test> + <param name="input_query" value="megablast_wrapper_test1.fa" ftype="fasta"/> + <!-- database needs to match the entry in the blastdb.loc file (first column), which includes the last update date if appropriate --> + <param name="database" value="phiX" /> + <param name="blast_type" value="megablast" /> + <!-- + <param name="word_size" value="28" /> + <param name="iden_cutoff" value="99.0" /> + --> + <param name="evalue_cutoff" value="10.0" /> + <param name="filter_query" value="yes" /> + <param name="out_format" value="6" /> + <output name="output1" file="megablast_wrapper_test1.out"/> + </test> + </tests> + <help> + +.. class:: warningmark + +**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing. + +----- + +**What it does** + +Search a *translated nucleotide database* using a *protein query*, +using the NCBI BLAST+ tblastx command line tool. + +----- + +**Output format** + +The default output of this tool is tabular, containing 12 columns: + +1. Id of your sequence +2. GI of the database hit +3. % identity +4. Alignment length +5. # mismatches +6. # gaps +7. Start position in your sequence +8. End position in your sequence +9. Start position in database hit +10. End position in database hit +11. E-value +12. Bit score + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + + </help> +</tool> --- /dev/null +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml @@ -0,0 +1,99 @@ +<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.1"> + <description>Search protein database with translated nucleotide query sequence(s)</description> + <command> + blastx + -query "$query" + -db "$database" + -evalue $evalue_cutoff + -seg $adv_opts.filter_query + -out $output1 + -outfmt $out_format + -num_threads 8 + </command> + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + <param name="database" type="select" display="radio" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /> + <param name="out_format" type="select" label="Output format"> + <option value="6">Tabular</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <!-- + <option value="11">BLAST archive format (ASN.1)</option> + --> + </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic"> + <param name="filter_query" type="hidden" value="yes" /> + </when> + <when value="advanced"> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="yes" falsevalue="no" checked="true" /> + </when> + </conditional> + </inputs> + <outputs> + <!-- TODO, can I get the caption rather than the value? e.g. 'NR' rather than a long path? --> + <data name="output1" format="tabular" label="blastx on ${database.value_label}"> + <change_format> + <when input="out_format" value="0" format="txt"/> + </change_format> + <change_format> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + </data> + </outputs> + <requirements> + <requirement type="binary">blastn</requirement> + </requirements> + <tests> + </tests> + <help> + +.. class:: warningmark + +**Note**. Database searches may take substantial amount of time. +For large input datasets it is advisable to allow overnight processing. + +----- + +**What it does** + +Search a *protein database* using a *translated nucleotide query*, +using the NCBI BLAST+ blastx command line tool. + +----- + +**Output format** + +The default output of this tool is tabular, containing 12 columns: + +1. Id of your sequence +2. GI of the database hit +3. % identity +4. Alignment length +5. # mismatches +6. # gaps +7. Start position in your sequence +8. End position in your sequence +9. Start position in database hit +10. End position in database hit +11. E-value +12. Bit score + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + + </help> +</tool> --- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml @@ -12,7 +12,7 @@ -num_threads 8 </command><inputs> - <param name="query" type="data" format="fasta" label="Query sequence(s)"/> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/><param name="database" type="select" display="radio" label="Protein BLAST database"><options from_file="blastdb_p.loc"><column name="name" index="0"/> @@ -23,13 +23,6 @@ <option value="blastp">blastp</option><option value="blastp-short">blastp-short</option></param> - <!-- TODO - integer, min 4, what default? - <param name="word_size" type="integer" label="using word size" help="Size of best perfect match"> - <option value="28">28</option> - <option value="16">16</option> - </param> - <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity" help="no cutoff if 0" /> - --><param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /><param name="out_format" type="select" label="Output format"><option value="6">Tabular</option> @@ -72,19 +65,21 @@ .. class:: warningmark -**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing. +**Note**. Database searches may take substantial amount of time. +For large input datasets it is advisable to allow overnight processing. ----- **What it does** -This runs the command line NCBI BLAST+ blastp tool. +Search a *protein database* using a *protein query*, +using the NCBI BLAST+ blastp command line tool. ----- **Output format** -The default Output of this tool is tabular, containing 12 columns: +The default output of this tool is tabular, containing 12 columns: 1. Id of your sequence 2. GI of the database hit