# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User peterjc <p.j.a.cock@googlemail.com> # Date 1285670500 -3600 # Node ID 6dd6224f571deb34c01c7fd0c3c4daccf948eda1 # Parent fbc4d5d45649043ac57270cac5741ec153f6de66 Add basic blastp wrapper --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -264,6 +264,7 @@ --><section name="NCBI BLAST+" id="ncbi_blast_plus_tools"><tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" /> + <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" /></section><section name="NGS: Mapping" id="solexa_tools"><tool file="sr_mapping/lastz_wrapper.xml" /> --- /dev/null +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml @@ -0,0 +1,111 @@ +<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.1"> + <description>Use NCBI BLAST+ tool to search a protein database with protein query sequence(s)</description> + <command> + blastp + -query "$query" + -db "$database" + -task $blast_type + -evalue $evalue_cutoff + -seg $adv_opts.filter_query + -out $output1 + -outfmt $out_format + -num_threads 8 + </command> + <inputs> + <param name="query" type="data" format="fasta" label="Query sequence(s)"/> + <param name="database" type="select" display="radio" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="blastp">blastp</option> + <option value="blastp-short">blastp-short</option> + </param> + <!-- TODO - integer, min 4, what default? + <param name="word_size" type="integer" label="using word size" help="Size of best perfect match"> + <option value="28">28</option> + <option value="16">16</option> + </param> + <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity" help="no cutoff if 0" /> + --> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" /> + <param name="out_format" type="select" label="Output format"> + <option value="6">Tabular</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <!-- + <option value="11">BLAST archive format (ASN.1)</option> + --> + </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic"> + <param name="filter_query" type="hidden" value="yes" /> + </when> + <when value="advanced"> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="yes" falsevalue="no" checked="true" /> + </when> + </conditional> + </inputs> + <outputs> + <!-- TODO, can I get the caption rather than the value? e.g. 'NR' rather than a long path? --> + <data name="output1" format="tabular" label="${blast_type.value_label} on ${database.value_label}"> + <change_format> + <when input="out_format" value="0" format="txt"/> + </change_format> + <change_format> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + </data> + </outputs> + <requirements> + <requirement type="binary">blastn</requirement> + </requirements> + <tests> + </tests> + <help> + +.. class:: warningmark + +**Note**. Database searches may take substantial amount of time. For large input datasets it is advisable to allow overnight processing. + +----- + +**What it does** + +This runs the command line NCBI BLAST+ blastp tool. + +----- + +**Output format** + +The default Output of this tool is tabular, containing 12 columns: + +1. Id of your sequence +2. GI of the database hit +3. % identity +4. Alignment length +5. # mismatches +6. # gaps +7. Start position in your sequence +8. End position in your sequence +9. Start position in database hit +10. End position in database hit +11. E-value +12. Bit score + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + +Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. + + </help> +</tool>