commit/galaxy-central: dan: Remove bam index metadata validator from GATK tools. The GATK wrapper will now build missing bam indexes on the fly as needed.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/167fddae9646/ changeset: 167fddae9646 user: dan date: 2012-02-27 18:49:30 summary: Remove bam index metadata validator from GATK tools. The GATK wrapper will now build missing bam indexes on the fly as needed. affected #: 10 files diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/analyze_covariates.xml --- a/tools/gatk/analyze_covariates.xml +++ b/tools/gatk/analyze_covariates.xml @@ -74,7 +74,7 @@ **Outputs** -The output is in and HTML file with links to PDF graphs and a data files. +The output is in CSV and HTML files with links to PDF graphs and a data files. Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/count_covariates.xml --- a/tools/gatk/count_covariates.xml +++ b/tools/gatk/count_covariates.xml @@ -2,12 +2,15 @@ <description>on BAM files</description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" - -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "CountCovariates" @@ -143,7 +146,6 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"> @@ -155,7 +157,6 @@ </when><when value="history"><!-- FIX ME!!!! --><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/depth_of_coverage.xml --- a/tools/gatk/depth_of_coverage.xml +++ b/tools/gatk/depth_of_coverage.xml @@ -2,13 +2,16 @@ <description>on BAM files</description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" #for $i, $input_bam in enumerate( $reference_source.input_bams ): -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" - -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if #end for -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" @@ -190,7 +193,6 @@ <repeat name="input_bams" title="BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param></repeat> @@ -204,7 +206,6 @@ <when value="history"><!-- FIX ME!!!! --><repeat name="input_bams" title="BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param></repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/gatk_wrapper.py --- a/tools/gatk/gatk_wrapper.py +++ b/tools/gatk/gatk_wrapper.py @@ -43,6 +43,23 @@ html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) html_out.write( '</ul>\n</body>\n</html>\n' ) +def index_bam_files( bam_filenames, tmp_dir ): + for bam_filename in bam_filenames: + bam_index_filename = "%s.bai" % bam_filename + if not os.path.exists( bam_index_filename ): + #need to index this bam file + stderr_name = tempfile.NamedTemporaryFile( prefix = "bam_index_stderr" ).name + command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) + proc = subprocess.Popen( args=command, shell=True, stderr=open( stderr_name, 'wb' ) ) + return_code = proc.wait() + if return_code: + for line in open( stderr_name ): + print >> sys.stderr, line + os.unlink( stderr_name ) #clean up + cleanup_before_exit( tmp_dir ) + raise Exception( "Error indexing BAM file" ) + os.unlink( stderr_name ) #clean up + def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -67,11 +84,15 @@ cmd = cmd.replace( 'java ', 'java -Xmx%s ' % ( options.max_jvm_heap ), 1 ) elif options.max_jvm_heap_fraction is not None: cmd = cmd.replace( 'java ', 'java -XX:DefaultMaxRAMFraction=%s -XX:+UseParallelGC ' % ( options.max_jvm_heap_fraction ), 1 ) + bam_filenames = [] if options.datasets: for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: gatk_filename = gatk_filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix ) if dataset_arg: cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) + if galaxy_ext == "bam": + bam_filenames.append( gatk_filename ) + index_bam_files( bam_filenames, tmp_dir ) #set up stdout and stderr output options stdout = open_file_from_option( options.stdout, mode = 'wb' ) stderr = open_file_from_option( options.stderr, mode = 'wb' ) diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/indel_realigner.xml --- a/tools/gatk/indel_realigner.xml +++ b/tools/gatk/indel_realigner.xml @@ -2,12 +2,15 @@ <description>- perform local realignment</description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" - -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "IndelRealigner" @@ -121,7 +124,6 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"> @@ -133,7 +135,6 @@ </when><when value="history"><!-- FIX ME!!!! --><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param><param name="ref_file" type="data" format="fasta" label="Using reference file"><options> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/print_reads.xml --- a/tools/gatk/print_reads.xml +++ b/tools/gatk/print_reads.xml @@ -2,13 +2,16 @@ <description>from BAM files</description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" #for $i, $input_bam in enumerate( $reference_source.input_bams ): -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" - -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if #end for -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" @@ -106,7 +109,6 @@ <repeat name="input_bams" title="Sample BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param></repeat> @@ -120,7 +122,6 @@ <when value="history"><!-- FIX ME!!!! --><repeat name="input_bams" title="Sample BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param></repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/realigner_target_creator.xml --- a/tools/gatk/realigner_target_creator.xml +++ b/tools/gatk/realigner_target_creator.xml @@ -2,12 +2,15 @@ <description>for use in local realignment</description><requirements><requirement type="package" version="1.3">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" - -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "RealignerTargetCreator" @@ -109,7 +112,6 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"> @@ -121,7 +123,6 @@ </when><when value="history"><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param><param name="ref_file" type="data" format="fasta" label="Using reference file"><options> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/table_recalibration.xml --- a/tools/gatk/table_recalibration.xml +++ b/tools/gatk/table_recalibration.xml @@ -2,12 +2,15 @@ <description>on BAM files</description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" - -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "TableRecalibration" @@ -126,7 +129,6 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"> @@ -138,7 +140,6 @@ </when><when value="history"><!-- FIX ME!!!! --><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/unified_genotyper.xml --- a/tools/gatk/unified_genotyper.xml +++ b/tools/gatk/unified_genotyper.xml @@ -2,13 +2,16 @@ <description>SNP and indel caller</description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" #for $i, $input_bam in enumerate( $reference_source.input_bams ): -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" - -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if #end for -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" @@ -155,7 +158,6 @@ <repeat name="input_bams" title="Sample BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param></repeat> @@ -169,7 +171,6 @@ <when value="history"><!-- FIX ME!!!! --><repeat name="input_bams" title="Sample BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param></repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /> diff -r 17caba3f5b808bd62a96ab3076934779dd01fb77 -r 167fddae9646f761a87c99a15984f52a605cc498 tools/gatk/variant_annotator.xml --- a/tools/gatk/variant_annotator.xml +++ b/tools/gatk/variant_annotator.xml @@ -2,13 +2,16 @@ <description></description><requirements><requirement type="package" version="1.4">gatk</requirement> + <requirement type="package">samtools</requirement></requirements><command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" #if str( $reference_source.input_bam ) != "None": -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" - -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if #end if -d "--variant" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" -p 'java @@ -151,7 +154,6 @@ <param name="input_variant_bti" type="boolean" truevalue="-BTI variant" falsevalue="" label="Increase efficiency for small variant files." /><param name="input_bam" type="data" format="bam" label="BAM file" optional="True" help="Not needed for all annotations." ><validator type="unspecified_build" /> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/><validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"> @@ -165,7 +167,6 @@ <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" /><param name="input_variant_bti" type="boolean" truevalue="-BTI variant" falsevalue="" label="Increase efficiency for small variant files." /><param name="input_bam" type="data" format="bam" label="BAM file" optional="True" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/></param><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket