1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/605eb591503a/ changeset: 605eb591503a user: dan date: 2012-04-27 21:33:10 summary: Have implicit SAM to BAM converter sort the output BAM file so that indexing will not fail. affected #: 2 files diff -r 5293f1bf1dba762cae71983e40304d1d8e4ae5b5 -r 605eb591503a73ed9fdc939f71c14e6c74762673 lib/galaxy/datatypes/converters/sam_to_bam.py --- /dev/null +++ b/lib/galaxy/datatypes/converters/sam_to_bam.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for converting SAM to BAM, with sorting. +%prog input_filename.sam output_filename.bam +""" + +import sys, optparse, os, tempfile, subprocess, shutil + +CHUNK_SIZE = 2**20 #1mb + + +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + (options, args) = parser.parse_args() + + assert len( args ) == 2, 'You must specify the input and output filenames' + input_filename, output_filename = args + + tmp_dir = tempfile.mkdtemp( prefix='tmp-sam_to_bam_converter-' ) + + #convert to SAM + unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' ) + unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' ) + cmd = 'samtools view -bS "%s" > "%s"' % ( input_filename, unsorted_bam_filename ) + proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) + return_code = proc.wait() + if return_code: + stderr_target = sys.stderr + else: + stderr_target = sys.stdout + stderr = open( unsorted_stderr_filename ) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + + #sort sam, so indexing will not fail + sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' ) + sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' ) + cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_filename ) + proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) + return_code = proc.wait() + + if return_code: + stderr_target = sys.stderr + else: + stderr_target = sys.stdout + stderr = open( sorted_stderr_filename ) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + + cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__() diff -r 5293f1bf1dba762cae71983e40304d1d8e4ae5b5 -r 605eb591503a73ed9fdc939f71c14e6c74762673 lib/galaxy/datatypes/converters/sam_to_bam.xml --- a/lib/galaxy/datatypes/converters/sam_to_bam.xml +++ b/lib/galaxy/datatypes/converters/sam_to_bam.xml @@ -1,11 +1,11 @@ -<tool id="CONVERTER_sam_to_bam" name="Convert SAM to BAM" version="1.0.0"> +<tool id="CONVERTER_sam_to_bam" name="Convert SAM to BAM" version="2.0.0"><!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --><!-- Used on the metadata edit page. --><!-- FIXME: conversion will only work if headers for reference sequences are in input file. To fix this: (a) merge sam_to_bam tool in tools with this conversion (like fasta_to_len conversion); and (b) define a datatype-specific way to set converter parameters. --> - <command>samtools view -bS $input1 > $output 2> /dev/null </command> + <command interpreter="python">sam_to_bam.py $input1 $output</command><inputs><param name="input1" type="data" format="sam" label="SAM file"/></inputs> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.