commit/galaxy-central: dan: Enhance Picard SamToFastq to allow read group aware processing.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/82797507cbc9/ changeset: 82797507cbc9 user: dan date: 2012-01-24 21:03:35 summary: Enhance Picard SamToFastq to allow read group aware processing. affected #: 2 files diff -r 226c3216eaf8565c215032a065018789f8f78bd8 -r 82797507cbc95c5a5123bdb0a77bce7bb0e81727 tools/picard/picard_SamToFastq.xml --- a/tools/picard/picard_SamToFastq.xml +++ b/tools/picard/picard_SamToFastq.xml @@ -1,10 +1,13 @@ -<tool id="picard_SamToFastq" name="SAM to FASTQ" version="1.56.0"> +<tool id="picard_SamToFastq" name="SAM to FASTQ" version="1.56.1" force_history_refresh="True"><description>creates a FASTQ file</description><requirements><requirement type="package" version="1.56.0">picard</requirement></requirements><!-- Dan Blankenberg --> - <command>java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC + <command interpreter="python">picard_SamToFastq_wrapper.py + -p ' + java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/SamToFastq.jar" INPUT="${input_sam}" + VALIDATION_STRINGENCY="LENIENT" RE_REVERSE=${re_reverse} INCLUDE_NON_PF_READS=${include_non_pf_reads} #if str( $clipping_attribute ): @@ -34,13 +37,26 @@ READ2_MAX_BASES_TO_WRITE="${single_paired_end_type.read2_max_bases_to_write}" #end if #end if + ' #else: - #raise Exception( 'Per Read Group not yet supported.' ) OUTPUT_PER_RG=true - OUTPUT_DIR="./picard_sam_to_fastq_tmp_dir/" + #if str( $single_paired_end_type.single_paired_end_type_selector ) == 'paired': + ' + --read_group_file_2 "${output_fastq2}" + --file_id_2 "${output_fastq2.id}" + -p ' + #if str( $single_paired_end_type.read2_trim ): + READ2_TRIM="${single_paired_end_type.read2_trim}" + #end if + #if str( $single_paired_end_type.read2_max_bases_to_write ): + READ2_MAX_BASES_TO_WRITE="${single_paired_end_type.read2_max_bases_to_write}" + #end if + #end if + ' + --read_group_file_1 "${output_fastq1}" + --new_files_path "${$__new_file_path__}" + --file_id_1 "${output_fastq1.id}" #end if - 2>&1 - || echo "Error running SamToFastq" >&2 </command><inputs><param name="input_sam" type="data" format="sam,bam" label="BAM/SAM file" /> @@ -48,8 +64,7 @@ <param name="read1_max_bases_to_write" type="integer" optional="True" value="" label="The maximum number of bases to write from read 1 after trimming." /><param name="output_per_read_group_selector" type="select" label="Output per read group"><option value="per_sam_file" selected="True">Per BAM/SAM file</option> - <!-- <option value="per_read_group">Per Read Group</option> --> - <validator type="expression" message="Per Read Group selection is not yet implemented">value == 'per_sam_file'</validator> + <option value="per_read_group">Per Read Group</option></param><conditional name="single_paired_end_type"><param name="single_paired_end_type_selector" type="select" label="Single or Paired end"> @@ -107,6 +122,22 @@ <output name="output_fastq1" file="bwa_wrapper_in2.fastqsanger" lines_diff="64"/><!-- 16 unaligned fastq blocks not present in original sam file --><output name="output_fastq2" file="bwa_wrapper_in3.fastqsanger" lines_diff="64"/><!-- 16 unaligned fastq blocks not present in original sam file --></test> + <test> + <param name="input_sam" value="bwa_wrapper_out3.sam" ftype="sam" /> + <param name="output_per_read_group_selector" value="per_read_group" /> + <param name="single_paired_end_type_selector" value="paired" /> + <param name="read1_trim" value="" /> + <param name="read1_max_bases_to_write" value="" /> + <param name="read2_trim" value="" /> + <param name="read2_max_bases_to_write" value="" /> + <param name="re_reverse" value="True" /> + <param name="include_non_pf_reads" value="False" /> + <param name="clipping_action" value="" /> + <param name="clipping_attribute" value="" /> + <param name="include_non_primary_alignments" value="False" /> + <output name="output_fastq1" file="bwa_wrapper_in2.fastqsanger" lines_diff="64"/><!-- 16 unaligned fastq blocks not present in original sam file --> + <output name="output_fastq2" file="bwa_wrapper_in3.fastqsanger" lines_diff="64"/><!-- 16 unaligned fastq blocks not present in original sam file --> + </test></tests><help> **What it does** diff -r 226c3216eaf8565c215032a065018789f8f78bd8 -r 82797507cbc95c5a5123bdb0a77bce7bb0e81727 tools/picard/picard_SamToFastq_wrapper.py --- /dev/null +++ b/tools/picard/picard_SamToFastq_wrapper.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for running the Picard SamToFastq command. Allows parsing read groups into separate files. +""" + +import sys, optparse, os, tempfile, subprocess, shutil + +CHUNK_SIZE = 2**20 #1mb + + +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def open_file_from_option( filename, mode = 'rb' ): + if filename: + return open( filename, mode = mode ) + return None + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to PICARD, without any modification.' ) + parser.add_option( '-1', '--read_group_file_1', dest='read_group_file_1', action='store', type="string", default=None, help='Read Group 1 output file, when using multiple readgroups' ) + parser.add_option( '-2', '--read_group_file_2', dest='read_group_file_2', action='store', type="string", default=None, help='Read Group 2 output file, when using multiple readgroups and paired end' ) + parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' ) + parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) + parser.add_option( '-n', '--new_files_path', dest='new_files_path', action='store', type="string", default=None, help='new_files_path') + parser.add_option( '-i', '--file_id_1', dest='file_id_1', action='store', type="string", default=None, help='file_id_1') + parser.add_option( '-f', '--file_id_2', dest='file_id_2', action='store', type="string", default=None, help='file_id_2') + (options, args) = parser.parse_args() + + tmp_dir = tempfile.mkdtemp( prefix='tmp-picard-' ) + if options.pass_through_options: + cmd = ' '.join( options.pass_through_options ) + else: + cmd = '' + if options.new_files_path is not None: + print 'Creating FASTQ files by Read Group' + assert None not in [ options.read_group_file_1, options.new_files_path, options.file_id_1 ], 'When using read group aware, you need to specify --read_group_file_1, --read_group_file_2 (when paired end), --new_files_path, and --file_id' + cmd = '%s OUTPUT_DIR="%s"' % ( cmd, tmp_dir) + #set up stdout and stderr output options + stdout = open_file_from_option( options.stdout, mode = 'wb' ) + if stdout is None: + stdout = sys.stdout + stderr = open_file_from_option( options.stderr, mode = 'wb' ) + #if no stderr file is specified, we'll use our own + if stderr is None: + stderr = tempfile.NamedTemporaryFile( prefix="picard-stderr-", dir=tmp_dir ) + + proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) + return_code = proc.wait() + + if return_code: + stderr_target = sys.stderr + else: + stderr_target = sys.stdout + stderr.flush() + stderr.seek(0) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + #if rg aware, put files where they belong + if options.new_files_path is not None: + fastq_1_name = options.read_group_file_1 + fastq_2_name = options.read_group_file_2 + file_id_1 = options.file_id_1 + file_id_2 = options.file_id_2 + if file_id_2 is None: + file_id_2 = file_id_1 + for filename in sorted( os.listdir( tmp_dir ) ): + if filename.endswith( '_1.fastq' ): + if fastq_1_name: + shutil.move( os.path.join( tmp_dir, filename ), fastq_1_name ) + fastq_1_name = None + else: + shutil.move( os.path.join( tmp_dir, filename ), os.path.join( options.new_files_path, 'primary_%s_%s - 1_visible_fastq' % ( file_id_1, filename[:-len( '_1.fastq' )] ) ) ) + elif filename.endswith( '_2.fastq' ): + if fastq_2_name: + shutil.move( os.path.join( tmp_dir, filename ), fastq_2_name ) + fastq_2_name = None + else: + shutil.move( os.path.join( tmp_dir, filename ), os.path.join( options.new_files_path, 'primary_%s_%s - 2_visible_fastq' % ( file_id_2, filename[:-len( '_2.fastq' )] ) ) ) + + cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__() Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket