galaxy-dist commit d8184d91928c: Remove FASTA filter script from BLAST+ tools (I want to generalise it)

20 Nov 2010

# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User peterjc <p.j.a.cock@googlemail.com>
# Date 1287998919 -3600
# Node ID d8184d91928c83f0269bd0267be949bc5d676e8b
# Parent  5c212dfc6189bb41d334b0519411ca4f04fde9ec
Remove FASTA filter script from BLAST+ tools (I want to generalise it)

--- a/tool_conf.xml.sample
+++ b/tool_conf.xml.sample
@@ -269,7 +269,6 @@
    <tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" /><tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" /><tool file="ncbi_blast_plus/blastxml_to_tabular.xml" />
-   <tool file="ncbi_blast_plus/blast_filter_fasta.xml" /></section><section name="NGS: Mapping" id="solexa_tools"><tool file="sr_mapping/lastz_wrapper.xml" />

--- a/tools/ncbi_blast_plus/blast_filter_fasta.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-"""Filter a FASTA file using tabular output, e.g. from BLAST.
-
-Takes five command line options, tabular BLAST filename, ID column number
-(using one based counting), input FASTA filename, and two output FASTA
-filenames (for records with and without any BLAST hits).
-
-In the default NCBI BLAST+ tabular output, the query sequence ID is in column
-one, and the ID of the match from the database is in column two.
-"""
-import sys
-from galaxy_utils.sequence.fasta import fastaReader, fastaWriter
-
-#Parse Command Line
-blast_file, blast_col, in_file, out_positive_file, out_negative_file = sys.argv[1:]
-blast_col = int(blast_col)-1
-assert blast_col >= 0
-
-#Read tabular BLAST file and record all queries with hit(s)
-ids = set()
-blast_handle = open(blast_file, "rU")  
-for line in blast_handle:
-    ids.add(line.split("\t")[blast_col])
-blast_handle.close()
-
-#Write filtered FASTA file based on IDs from BLAST file
-reader = fastaReader(open(in_file, "rU"))
-positive_writer = fastaWriter(open(out_positive_file, "w"))
-negative_writer = fastaWriter(open(out_negative_file, "w"))
-for record in reader:
-    #The [1:] is because the fastaReader leaves the > on the identifer.
-    if record.identifier and record.identifier.split()[0][1:] in ids:
-        positive_writer.write(record)
-    else:
-        negative_writer.write(record)
-positive_writer.close()
-negative_writer.close()
-reader.close()

--- a/tools/ncbi_blast_plus/blast_filter_fasta.xml
+++ /dev/null
@@ -1,37 +0,0 @@
-<tool id="blast_filter_fasta" name="Filter FASTA using BLAST output" version="0.0.1">
-    <description>Divide a FASTA file based on BLAST hits</description>
-    <command interpreter="python">
-      blast_filter_fasta.py $blast_file $blast_col $in_file $out_positive_file $out_negative_file
-    </command>
-    <inputs>
-        <param name="in_file" type="data" format="fasta" label="FASTA file to filter"/> 
-        <param name="blast_file" type="data" format="tabular" label="Tabular BLAST output"/> 
-        <param name="blast_col" type="select" label="Column containing FASTA identifiers">
-            <option value="1">Column 1 - BLAST query ID</option>
-            <option value="2">Column 2 - BLAST match ID</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="out_positive_file" format="fasta" label="Sequences with BLAST hits" />
-        <data name="out_negative_file" format="fasta" label="Sequences without BLAST hits" />
-    </outputs>
-    <requirements>
-    </requirements>
-    <tests>
-    </tests>
-    <help>
-    
-**What it does**
-
-Typical use would be to take a multi-sequence FASTA and the tabular output of
-running BLAST on it, and divide the FASTA file in two: those sequence with a
-BLAST hit, and those without.
-
-In the default NCBI BLAST+ tabular output, the query sequence ID is in column
-one, and the ID of the match from the database is in column two.
-
-This allows you to filter the FASTA file for the subjects in the BLAST search,
-rather than filtering the FASTA file for the queries in the BLAST search.
-
-    </help>
-</tool>

    

commits-noreply＠bitbucket.org

tags

participants (1)