1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/070513d25b63/ changeset: 070513d25b63 user: dan date: 2011-10-20 16:24:19 summary: First pass at updating GATK tools to version 1.2. Changes are not backwards compatible with workflows. Still considered BETA, and so future changes are like to also not be backwards compatible. affected #: 20 files (-1 bytes) --- a/test-data/gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains Thu Oct 20 10:03:28 2011 -0400 +++ b/test-data/gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains Thu Oct 20 10:24:19 2011 -0400 @@ -10,4 +10,4 @@ UnifiedGenotyper - % confidently called bases of callable loci 100.000 UnifiedGenotyper - Actual calls made 1 TraversalEngine - Total runtime -TraversalEngine - 0 reads were filtered out during traversal out of 10 total (0.00%) \ No newline at end of file +TraversalEngine - 0 reads were filtered out during traversal out of 20 total (0.00%) --- a/test-data/gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf Thu Oct 20 10:03:28 2011 -0400 +++ b/test-data/gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf Thu Oct 20 10:24:19 2011 -0400 @@ -21,7 +21,8 @@ ##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> ##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> ##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> -##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias"> -##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/gatk_input_0.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/gatk_input.fasta rodBind=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/input_dbsnp_0.bed] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_ALL_CONFIDENT_SITES standard_min_confidence_threshold_for_calling=4.0 standard_min_confidence_threshold_for_emitting=4.0 noSLOD=false assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=-1.0 min_indel_count_for_genotyping=2 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=3.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[]" +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input_0.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input.fasta rodBind=[] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=4 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_ALL_CONFIDENT_SITES standard_min_confidence_threshold_for_calling=0.0 standard_min_confidence_threshold_for_emitting=4.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=-1.0 min_indel_count_for_genotyping=2 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=3.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false dbsnp=(RodBinding name=dbsnp source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/input_dbsnp_0.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null annotation=[]" +##contig=<ID=phiX174,length=5386> +##reference=file:///var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input.fasta #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A Fake phiX Sample -phiX174 1443 . AC . 37.27 . AC=0;AF=0.00;AN=2;DP=10;MQ=37.74;MQ0=0 GT:DP:GQ:PL 0/0:7:0:0,0,0 +phiX174 1443 . AC . 0 . DB;DP=10;MQ=37.74;MQ0=0;QD=0.00 GT:DP:PL ./.:10:0,0,0 --- a/test-data/gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf Thu Oct 20 10:03:28 2011 -0400 +++ b/test-data/gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf Thu Oct 20 10:24:19 2011 -0400 @@ -22,8 +22,10 @@ ##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> ##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> ##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> -##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias"> -##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/gatk_input_0.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/gatk_input.fasta rodBind=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/input_dbsnp_0.bed] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_ALL_CONFIDENT_SITES standard_min_confidence_threshold_for_calling=4.0 standard_min_confidence_threshold_for_emitting=4.0 noSLOD=false assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=-1.0 min_indel_count_for_genotyping=2 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=3.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[]" -##VariantAnnotator="analysis_type=VariantAnnotator input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/gatk_input.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/gatk_input.fasta rodBind=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/input_variant.vcf, /var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/input_dbsnp_0.bed] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sampleName=null annotation=[AlleleBalance, BaseQualityRankSumTest, DepthOfCoverage, HomopolymerRun, MappingQualityRankSumTest, MappingQualityZero, QualByDepth, RMSMappingQuality, SpanningDeletions, HaplotypeScore] group=[] expression=[] useAllAnnotations=false list=false assume_single_sample_reads=null vcfContainsOnlyIndels=false" +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input_0.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input.fasta rodBind=[] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=4 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_ALL_CONFIDENT_SITES standard_min_confidence_threshold_for_calling=0.0 standard_min_confidence_threshold_for_emitting=4.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=-1.0 min_indel_count_for_genotyping=2 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=3.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false dbsnp=(RodBinding name=dbsnp source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/input_dbsnp_0.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null annotation=[]" +##VariantAnnotator="analysis_type=VariantAnnotator input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/gatk_input.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/gatk_input.fasta rodBind=[] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/input_variant.vcf) snpEffFile=(RodBinding name= source=UNBOUND) dbsnp=(RodBinding name=dbsnp source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/input_dbsnp_dbsnp.vcf) comp=[] resource=[] out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub annotation=[SpanningDeletions, MappingQualityZero, AlleleBalance, RMSMappingQuality, HaplotypeScore, HomopolymerRun, DepthOfCoverage, MappingQualityRankSumTest, BaseQualityRankSumTest, QualByDepth] group=[] expression=[] useAllAnnotations=false list=false assume_single_sample_reads=null vcfContainsOnlyIndels=false" +##contig=<ID=phiX174,length=5386> +##reference=file:///var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input.fasta +##reference=file:///var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/gatk_input.fasta #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A Fake phiX Sample -phiX174 1443 . AC . 37.27 . AC=0;AF=0.00;AN=2;DP=10;MQ=37.74;MQ0=0 GT:DP:GQ:PL 0/0:7:0:0,0,0 +phiX174 1443 . AC . 0 . DB;DP=10;MQ=37.74;MQ0=0;QD=0.00 GT:DP:PL ./.:10:0,0,0 --- a/test-data/gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf Thu Oct 20 10:03:28 2011 -0400 +++ b/test-data/gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf Thu Oct 20 10:24:19 2011 -0400 @@ -1,5 +1,5 @@ ##fileformat=VCFv4.1 -##CombineVariants="analysis_type=CombineVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpNWtPhs/gatk_input.fasta rodBind=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpNWtPhs/input_variant_from_variant_annotator.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub genotypemergeoption=PRIORITIZE filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED rod_priority_list=from_variant_annotator printComplexMerges=false filteredAreUncalled=false minimalVCF=false setKey=set assumeIdenticalSamples=false minimumN=1 masterMerge=false mergeInfoWithMaxAC=false" +##CombineVariants="analysis_type=CombineVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-bFIpbp/gatk_input.fasta rodBind=[] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=[(RodBinding name=from_variant_annotator source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-bFIpbp/input_variant_from_variant_annotator.vcf)] out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub genotypemergeoption=PRIORITIZE filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED rod_priority_list=from_variant_annotator printComplexMerges=false filteredAreUncalled=false minimalVCF=false setKey=set assumeIdenticalSamples=false minimumN=1 mergeInfoWithMaxAC=false" ##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)"> ##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> @@ -23,9 +23,11 @@ ##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> ##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> ##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> -##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias"> ##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants"> -##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/gatk_input_0.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/gatk_input.fasta rodBind=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpXqIddV/input_dbsnp_0.bed] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_ALL_CONFIDENT_SITES standard_min_confidence_threshold_for_calling=4.0 standard_min_confidence_threshold_for_emitting=4.0 noSLOD=false assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=-1.0 min_indel_count_for_genotyping=2 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=3.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[]" -##VariantAnnotator="analysis_type=VariantAnnotator input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/gatk_input.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/gatk_input.fasta rodBind=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/input_variant.vcf, /var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmpYvqW3G/input_dbsnp_0.bed] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sampleName=null annotation=[AlleleBalance, BaseQualityRankSumTest, DepthOfCoverage, HomopolymerRun, MappingQualityRankSumTest, MappingQualityZero, QualByDepth, RMSMappingQuality, SpanningDeletions, HaplotypeScore] group=[] expression=[] useAllAnnotations=false list=false assume_single_sample_reads=null vcfContainsOnlyIndels=false" +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input_0.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input.fasta rodBind=[] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=4 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_ALL_CONFIDENT_SITES standard_min_confidence_threshold_for_calling=0.0 standard_min_confidence_threshold_for_emitting=4.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=-1.0 min_indel_count_for_genotyping=2 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=3.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false dbsnp=(RodBinding name=dbsnp source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/input_dbsnp_0.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null annotation=[]" +##VariantAnnotator="analysis_type=VariantAnnotator input_file=[/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/gatk_input.bam] sample_metadata=[] read_buffer_size=null phone_home=NO_ET read_filter=[] intervals=null excludeIntervals=null reference_sequence=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/gatk_input.fasta rodBind=[] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/input_variant.vcf) snpEffFile=(RodBinding name= source=UNBOUND) dbsnp=(RodBinding name=dbsnp source=/var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-gaJtgB/input_dbsnp_dbsnp.vcf) comp=[] resource=[] out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub annotation=[SpanningDeletions, MappingQualityZero, AlleleBalance, RMSMappingQuality, HaplotypeScore, HomopolymerRun, DepthOfCoverage, MappingQualityRankSumTest, BaseQualityRankSumTest, QualByDepth] group=[] expression=[] useAllAnnotations=false list=false assume_single_sample_reads=null vcfContainsOnlyIndels=false" +##contig=<ID=phiX174,length=5386> +##reference=file:///var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-aSMuO5/gatk_input.fasta +##reference=file:///var/folders/78/786YaG3QH58XnzrWynoDBk+++TI/-Tmp-/tmp-gatk-bFIpbp/gatk_input.fasta #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A Fake phiX Sample -phiX174 1443 . AC . . PASS AC=0;AF=0.00;AN=2;DP=10;MQ=37.74;MQ0=0;set=ReferenceInAll GT:DP:GQ:PL 0/0:7:0:0,0,0 +phiX174 1443 . AC . . PASS AC=0;AF=0.00;AN=0;DB;DP=10;MQ=37.74;MQ0=0;QD=0.00;set=ReferenceInAll GT:DP:PL ./.:10:0,0,0 --- a/test-data/gatk/gatk_variant_eval/gatk_variant_eval_out_1.tabular Thu Oct 20 10:03:28 2011 -0400 +++ b/test-data/gatk/gatk_variant_eval/gatk_variant_eval_out_1.tabular Thu Oct 20 10:24:19 2011 -0400 @@ -1,36 +1,36 @@ -##:GATKReport.v0.1 CompOverlap : The overlap between eval and comp sites -CompOverlap CompRod EvalRod JexlExpression Novelty nEvalVariants nCompVariants novelSites nVariantsAtComp compRate nConcordant concordantRate -CompOverlap dbsnp eval none all 0 0 0 0 0.00000000 0 0.00000000 -CompOverlap dbsnp eval none known 0 0 0 0 0.00000000 0 0.00000000 -CompOverlap dbsnp eval none novel 0 0 0 0 0.00000000 0 0.00000000 +##:GATKReport.v0.2 CompOverlap : The overlap between eval and comp sites +CompOverlap CompRod EvalRod JexlExpression Novelty nEvalVariants novelSites nVariantsAtComp compRate nConcordant concordantRate +CompOverlap dbsnp input_0 none all 0 0 0 0.00000000 0 0.00000000 +CompOverlap dbsnp input_0 none known 0 0 0 0.00000000 0 0.00000000 +CompOverlap dbsnp input_0 none novel 0 0 0 0.00000000 0 0.00000000 -##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample -CountVariants CompRod EvalRod JexlExpression Novelty nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate variantRatePerBp nSNPs nMNPs nInsertions nDeletions nComplex nNoCalls nHets nHomRef nHomVar nSingletons nHomDerived heterozygosity heterozygosityPerBp hetHomRatio indelRate indelRatePerBp deletionInsertionRatio -CountVariants dbsnp eval none all 5386 1 1 0 0.00000000 0.00000000 0 0 0 0 0 0 0 1 0 0 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 -CountVariants dbsnp eval none known 5386 0 0 0 0.00000000 0.00000000 0 0 0 0 0 0 0 0 0 0 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 -CountVariants dbsnp eval none novel 5386 1 1 0 0.00000000 0.00000000 0 0 0 0 0 0 0 1 0 0 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 +##:GATKReport.v0.2 CountVariants : Counts different classes of variants in the sample +CountVariants CompRod EvalRod JexlExpression Novelty nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate variantRatePerBp nSNPs nMNPs nInsertions nDeletions nComplex nMixed nNoCalls nHets nHomRef nHomVar nSingletons nHomDerived heterozygosity heterozygosityPerBp hetHomRatio indelRate indelRatePerBp deletionInsertionRatio +CountVariants dbsnp input_0 none all 5386 1 1 0 0.00000000 0.00000000 0 0 0 0 0 0 1 0 0 0 0 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 +CountVariants dbsnp input_0 none known 5386 1 1 0 0.00000000 0.00000000 0 0 0 0 0 0 1 0 0 0 0 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 +CountVariants dbsnp input_0 none novel 5386 0 0 0 0.00000000 0.00000000 0 0 0 0 0 0 0 0 0 0 0 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 -##:GATKReport.v0.1 SimpleMetricsByAC.metrics : TiTv by allele count +##:GATKReport.v0.2 SimpleMetricsByAC.metrics : TiTv by allele count SimpleMetricsByAC.metrics CompRod EvalRod JexlExpression Novelty row AC nTi nTv n TiTv -SimpleMetricsByAC.metrics dbsnp eval none all ac0 0 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none all ac1 1 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none all ac2 2 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none known ac0 0 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none known ac1 1 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none known ac2 2 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none novel ac0 0 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none novel ac1 1 0 0 0 0.0 -SimpleMetricsByAC.metrics dbsnp eval none novel ac2 2 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none all ac0 0 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none all ac1 1 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none all ac2 2 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none known ac0 0 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none known ac1 1 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none known ac2 2 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none novel ac0 0 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none novel ac1 1 0 0 0 0.0 +SimpleMetricsByAC.metrics dbsnp input_0 none novel ac2 2 0 0 0 0.0 -##:GATKReport.v0.1 TiTvVariantEvaluator : Ti/Tv Variant Evaluator -TiTvVariantEvaluator CompRod EvalRod JexlExpression Novelty nTi nTv tiTvRatio nTiInComp nTvInComp TiTvRatioStandard nTiDerived nTvDerived tiTvDerivedRatio -TiTvVariantEvaluator dbsnp eval none all 0 0 0.00000000 0 0 0.00000000 0 0 0.00000000 -TiTvVariantEvaluator dbsnp eval none known 0 0 0.00000000 0 0 0.00000000 0 0 0.00000000 -TiTvVariantEvaluator dbsnp eval none novel 0 0 0.00000000 0 0 0.00000000 0 0 0.00000000 +##:GATKReport.v0.2 TiTvVariantEvaluator : Ti/Tv Variant Evaluator +TiTvVariantEvaluator CompRod EvalRod JexlExpression Novelty nTi nTv tiTvRatio nTiInComp nTvInComp TiTvRatioStandard nTiDerived nTvDerived tiTvDerivedRatio +TiTvVariantEvaluator dbsnp input_0 none all 0 0 0.00000000 0 0 0.00000000 0 0 0.00000000 +TiTvVariantEvaluator dbsnp input_0 none known 0 0 0.00000000 0 0 0.00000000 0 0 0.00000000 +TiTvVariantEvaluator dbsnp input_0 none novel 0 0 0.00000000 0 0 0.00000000 0 0 0.00000000 -##:GATKReport.v0.1 ValidationReport : Assess site accuracy and sensitivity of callset against follow-up validation assay -ValidationReport CompRod EvalRod JexlExpression Novelty nComp TP FP FN TN sensitivity specificity PPV FDR CompMonoEvalNoCall CompMonoEvalFiltered CompMonoEvalMono CompMonoEvalPoly CompPolyEvalNoCall CompPolyEvalFiltered CompPolyEvalMono CompPolyEvalPoly CompFiltered nDifferentAlleleSites -ValidationReport dbsnp eval none all 0 0 0 0 0 NaN 100.00000000 NaN NaN 0 0 0 0 0 0 0 0 0 0 -ValidationReport dbsnp eval none known 0 0 0 0 0 NaN 100.00000000 NaN NaN 0 0 0 0 0 0 0 0 0 0 -ValidationReport dbsnp eval none novel 0 0 0 0 0 NaN 100.00000000 NaN NaN 0 0 0 0 0 0 0 0 0 0 +##:GATKReport.v0.2 ValidationReport : Assess site accuracy and sensitivity of callset against follow-up validation assay +ValidationReport CompRod EvalRod JexlExpression Novelty nComp TP FP FN TN sensitivity specificity PPV FDR CompMonoEvalNoCall CompMonoEvalFiltered CompMonoEvalMono CompMonoEvalPoly CompPolyEvalNoCall CompPolyEvalFiltered CompPolyEvalMono CompPolyEvalPoly CompFiltered nDifferentAlleleSites +ValidationReport dbsnp input_0 none all 43 0 0 0 43 NaN 100.00000000 NaN NaN 42 0 1 0 0 0 0 0 0 0 +ValidationReport dbsnp input_0 none known 1 0 0 0 1 NaN 100.00000000 NaN NaN 0 0 1 0 0 0 0 0 0 0 +ValidationReport dbsnp input_0 none novel 42 0 0 0 42 NaN 100.00000000 NaN NaN 42 0 0 0 0 0 0 0 0 0 --- a/tools/gatk/analyze_covariates.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/analyze_covariates.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,7 +1,10 @@ -<tool id="gatk_analyze_covariates" name="Analyze Covariates" version="0.0.2"> +<tool id="gatk_analyze_covariates" name="Analyze Covariates" version="0.0.3"><description>- draw plots</description> -<command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements> + <command interpreter="python">gatk_wrapper.py + --max_jvm_heap_fraction "1" --stdout "${output_log}" --html_report_from_directory "${output_html}" "${output_html.files_path}" -p 'java --- a/tools/gatk/count_covariates.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/count_covariates.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,7 +1,10 @@ -<tool id="gatk_count_covariates" name="Count Covariates" version="0.0.2"> +<tool id="gatk_count_covariates" name="Count Covariates" version="0.0.3"><description>on BAM files</description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index @@ -35,7 +38,7 @@ #set $snp_dataset_provided = True #end if #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + -d "--knownSites:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): -p '--rodToIntervalTrackName "${rod_bind_name}"' #end if --- a/tools/gatk/indel_realigner.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/indel_realigner.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,7 +1,10 @@ -<tool id="gatk_indel_realigner" name="Indel Realigner" version="0.0.2"> +<tool id="gatk_indel_realigner" name="Indel Realigner" version="0.0.3"><description>- perform local realignment</description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index @@ -26,7 +29,7 @@ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector #end if #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): -p '--rodToIntervalTrackName "${rod_bind_name}"' #end if @@ -104,7 +107,7 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="picard_indexes"> @@ -114,7 +117,11 @@ </when><when value="history"><!-- FIX ME!!!! --><param name="input_bam" type="data" format="bam" label="BAM file" /> - <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file"> + <options> + <filter type="data_meta" key="dbkey" ref="input_bam" /><!-- FIX ME!!!! --> + </options> + </param></when></conditional><param name="target_intervals" type="data" format="gatk_interval,bed,picard_interval_list" label="Restrict realignment to provided intervals" /> @@ -277,7 +284,7 @@ <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" /><param name="rod_bind_type_selector" value="snps" /><param name="rodToIntervalTrackName" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /><param name="lod_threshold" value="5.0" /><param name="knowns_only" /><param name="gatk_param_type_selector" value="basic" /> --- a/tools/gatk/realigner_target_creator.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/realigner_target_creator.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,7 +1,10 @@ -<tool id="gatk_realigner_target_creator" name="Realigner Target Creator" version="0.0.2"> +<tool id="gatk_realigner_target_creator" name="Realigner Target Creator" version="0.0.3"><description>for use in local realignment</description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index @@ -23,7 +26,7 @@ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector #end if #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): -p '--rodToIntervalTrackName "${rod_bind_name}"' #end if @@ -92,7 +95,7 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="picard_indexes"> @@ -100,9 +103,13 @@ </options></param></when> - <when value="history"><!-- FIX ME!!!! --> + <when value="history"><param name="input_bam" type="data" format="bam" label="BAM file" /> - <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file"> + <options> + <filter type="data_meta" key="dbkey" ref="input_bam" /><!-- FIX ME!!!! --> + </options> + </param></when></conditional> @@ -115,20 +122,20 @@ <option value="custom">Custom</option></param><when value="dbsnp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when><when value="snps"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when><when value="indels"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when><when value="custom"><param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when></conditional> @@ -254,9 +261,13 @@ <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" /><param name="rod_bind_type_selector" value="dbsnp" /><param name="rodToIntervalTrackName" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /><param name="gatk_param_type_selector" value="basic" /> - <param name="analysis_param_type_selector" value="basic" /> + <param name="analysis_param_type_selector" value="advanced" /> + <param name="windowSize" value="10" /> + <param name="mismatchFraction" value="0.15" /> + <param name="minReadsAtLocus" value="4" /> + <param name="maxIntervalSize" value="500" /><output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" /><output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/></test> --- a/tools/gatk/table_recalibration.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/table_recalibration.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,7 +1,10 @@ -<tool id="gatk_table_recalibration" name="Table Recalibration" version="0.0.2"> +<tool id="gatk_table_recalibration" name="Table Recalibration" version="0.0.3"><description>on BAM files</description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index @@ -120,7 +123,7 @@ <when value="cached"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="picard_indexes"> --- a/tools/gatk/unified_genotyper.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/unified_genotyper.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,9 +1,14 @@ -<tool id="gatk_unified_genotyper" name="Unified Genotyper" version="0.0.2"> +<tool id="gatk_unified_genotyper" name="Unified Genotyper" version="0.0.3"><description>SNP and indel caller</description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" #for $i, $input_bam in enumerate( $reference_source.input_bams ): + ${dir( $input_bam.input_bam )} + ${dir( $input_bam.input_bam.dataset )} -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index #end for @@ -11,12 +16,14 @@ -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "UnifiedGenotyper" --num_threads 4 ##hard coded, for now - -o "${output_vcf}" + --out "${output_vcf}" + --metrics_file "${output_metrics}" -et "NO_ET" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if + --genotype_likelihoods_model "${genotype_likelihoods_model}" --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}" --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}" ' @@ -28,7 +35,7 @@ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector #end if #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): -p '--rodToIntervalTrackName "${rod_bind_name}"' #end if @@ -81,13 +88,15 @@ ##start analysis specific options #if $analysis_param_type.analysis_param_type_selector == "advanced": -p ' - --genotype_likelihoods_model "${analysis_param_type.genotype_likelihoods_model}" --p_nonref_model "${analysis_param_type.p_nonref_model}" --heterozygosity "${analysis_param_type.heterozygosity}" --pcr_error_rate "${analysis_param_type.pcr_error_rate}" - --genotyping_mode "${analysis_param_type.genotyping_mode}" + --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}" + #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES': + --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}" + #end if --output_mode "${analysis_param_type.output_mode}" - ${analysis_param_type.noSLOD} + ${analysis_param_type.compute_SLOD} --min_base_quality_score "${analysis_param_type.min_base_quality_score}" --min_mapping_quality_score "${analysis_param_type.min_mapping_quality_score}" --max_deletion_fraction "${analysis_param_type.max_deletion_fraction}" @@ -120,7 +129,7 @@ <repeat name="input_bams" title="Sample BAM file" min="1"><param name="input_bam" type="data" format="bam" label="BAM file"><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param></repeat><param name="ref_file" type="select" label="Using reference genome"> @@ -146,25 +155,31 @@ <option value="custom">Custom</option></param><when value="dbsnp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when><when value="snps"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when><when value="indels"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when><when value="custom"><param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> + <param name="input_rod" type="data" format="vcf" label="ROD file" /><param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /></when></conditional></repeat> + <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ"> + <option value="BOTH" selected="True">BOTH</option> + <option value="SNP">SNP</option> + <option value="INDEL">INDEL</option> + </param> + <param name="standard_min_confidence_threshold_for_calling" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called" /><param name="standard_min_confidence_threshold_for_emitting" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)" /> @@ -271,27 +286,30 @@ <!-- Do nothing here --></when><when value="advanced"> - <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ"> - <option value="BOTH" selected="True">BOTH</option> - <option value="SNP">SNP</option> - <option value="INDEL">INDEL</option> - </param><param name="p_nonref_model" type="select" label="Non-reference probability calculation model to employ"><option value="EXACT" selected="True">EXACT</option><option value="GRID_SEARCH">GRID_SEARCH</option></param><param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus" /><param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods" /> - <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping"> - <option value="DISCOVERY" selected="True">DISCOVERY</option> - <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> - </param> + <conditional name="genotyping_mode_type"> + <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping"> + <option value="DISCOVERY" selected="True">DISCOVERY</option> + <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> + </param> + <when value="DISCOVERY"> + <!-- Do nothing here --> + </when> + <when value="GENOTYPE_GIVEN_ALLELES"> + <param name="input_alleles_rod" type="data" format="vcf" label="Alleles ROD file" /> + </when> + </conditional><param name="output_mode" type="select" label="Should we output confident genotypes (i.e. including ref calls) or just the variants?"><option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option><option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option><option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option></param> - <param name="noSLOD" type="boolean" truevalue="--noSLOD" falsevalue="" label="Do not calculate the SLOD" /> + <param name="compute_SLOD" type="boolean" truevalue="--computeSLOD" falsevalue="" label="Compute the SLOD" /><param name="min_base_quality_score" type="integer" value="17" label="Minimum base quality required to consider a base for calling" /><param name="min_mapping_quality_score" type="integer" value="20" label="Minimum read mapping quality required to consider a read for calling" /><param name="max_deletion_fraction" type="float" value="0.05" label="Maximum fraction of reads with deletions spanning this locus for it to be callable" help="to disable, set to < 0 or > 1" /> @@ -302,16 +320,35 @@ <param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" /><param name="doContextDependentGapPenalties" type="boolean" truevalue="--doContextDependentGapPenalties" falsevalue="" label="Vary gap penalties by context" /><param name="annotation" type="select" multiple="True" display="checkboxes" label="Annotation Types"> - <option value="AlleleBalance">AlleleBalance</option> - <option value="BaseQualityRankSumTest">BaseQualityRankSumTest</option> - <option value="DepthOfCoverage">DepthOfCoverage</option> - <option value="HomopolymerRun">HomopolymerRun</option> - <option value="MappingQualityRankSumTest">MappingQualityRankSumTest</option> - <option value="MappingQualityZero">MappingQualityZero</option> - <option value="QualByDepth">QualByDepth</option> - <option value="RMSMappingQuality">RMSMappingQuality</option> - <option value="SpanningDeletions">SpanningDeletions</option> - <option value="HaplotypeScore">HaplotypeScore</option> + <option value="ChromosomeCounts"/> + <option value="IndelType"/> + <option value="SpanningDeletions"/> + <option value="HardyWeinberg"/> + <option value="NBaseCount"/> + <option value="MappingQualityZero"/> + <option value="AlleleBalance"/> + <option value="BaseCounts"/> + <option value="LowMQ"/> + <option value="InbreedingCoeff"/> + <option value="RMSMappingQuality"/> + <option value="HaplotypeScore"/> + <option value="TechnologyComposition"/> + <option value="SampleList"/> + <option value="FisherStrand"/> + <option value="HomopolymerRun"/> + <option value="DepthOfCoverage"/> + <option value="SnpEff"/> + <option value="MappingQualityZeroFraction"/> + <option value="GCContent"/> + <option value="MappingQualityRankSumTest"/> + <option value="ReadPosRankSumTest"/> + <option value="BaseQualityRankSumTest"/> + <option value="QualByDepth"/> + <option value="SBByDepth"/> + <option value="ReadDepthAndAllelicFractionBySample"/> + <option value="AlleleBalanceBySample"/> + <option value="DepthPerAlleleBySample"/> + <option value="MappingQualityZeroBySample"/></param><param name="group" type="select" multiple="True" display="checkboxes" label="Annotation Interfaces/Groups"><option value="Standard">Standard</option> @@ -324,6 +361,7 @@ </inputs><outputs><data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" /> + <data format="txt" name="output_metrics" label="${tool.name} on ${on_string} (metrics)" /><data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /></outputs><!-- FIXME! <trackster_conf/> --> @@ -333,9 +371,9 @@ <param name="ref_file" value="phiX.fasta" ftype="fasta" /><param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /><param name="rod_bind_type_selector" value="dbsnp" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /><param name="rodToIntervalTrackName" /> - <param name="standard_min_confidence_threshold_for_calling" value="4" /> + <param name="standard_min_confidence_threshold_for_calling" value="0" /><param name="standard_min_confidence_threshold_for_emitting" value="4" /><param name="gatk_param_type_selector" value="basic" /><param name="analysis_param_type_selector" value="advanced" /> @@ -345,7 +383,7 @@ <param name="pcr_error_rate" value="0.0001" /><param name="genotyping_mode" value="DISCOVERY" /><param name="output_mode" value="EMIT_ALL_CONFIDENT_SITES" /> - <param name="noSLOD" /> + <param name="compute_SLOD" /><param name="min_base_quality_score" value="17" /><param name="min_mapping_quality_score" value="20" /><param name="max_deletion_fraction" value="-1" /> @@ -357,8 +395,9 @@ <param name="doContextDependentGapPenalties" /><!-- <param name="annotation" value="" /><param name="group" value="" /> --> - <output name="output_vcf" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf" lines_diff="2"/> - <output name="output_log" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains" compare="contains"/> + <output name="output_vcf" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf" lines_diff="4" /> + <output name="output_metrics" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.metrics" /> + <output name="output_log" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains" compare="contains" /></test></tests><help> --- a/tools/gatk/variant_annotator.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variant_annotator.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,13 +1,16 @@ -<tool id="gatk_variant_annotator" name="Variant Annotator" version="0.0.1"> +<tool id="gatk_variant_annotator" name="Variant Annotator" version="0.0.2"><description></description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" #if str( $reference_source.input_bam ) != "None": -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index #end if - -d "-B:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -d "--variant" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "VariantAnnotator" @@ -35,26 +38,40 @@ ## #end if ## #end for ## #end for - #if str( $reference_source.input_variant_name ): - --sampleName "${reference_source.input_variant_name}" - #end if ${reference_source.input_variant_bti} ' - #set $rod_binding_names = dict() - #for $rod_binding in $rod_bind: - #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': - #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name - #elif str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'comp': - #set $rod_bind_name = "comp" + str( $rod_binding.rod_bind_type.custom_rod_name ) - #else - #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #for $rod_binding in $comp_rod_bind: + -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" + #if str( $rod_binding.comp_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${rod_binding.comp_rod_name}"' #end if - #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): - -p '--rodToIntervalTrackName "${rod_bind_name}"' + #end for + + #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #if str( $dbsnp_rod_bind_type.dbsnp_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${dbsnp_rod_bind_type.dbsnp_rod_name}"' #end if + #end if + + + #for $rod_binding in $resource_rod_bind: + -d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}" + #if str( $rod_binding.resource_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${rod_binding.resource_rod_name}"' + #end if + #end for + + #if str( $snpEff_rod_bind_type.snpEff_rod_bind_type_selector ) == 'set_snpEff': + -d "--snpEff:${snpEff_rod_bind_type.snpEff_rod_name},%(file_type)s" "${snpEff_rod_bind_type.snpEff_input_rod}" "${snpEff_rod_bind_type.snpEff_input_rod.ext}" "input_snpEff_${snpEff_rod_bind_type.snpEff_rod_name}" + #if str( $snpEff_rod_bind_type.snpEff_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${snpEff_rod_bind_type.snpEff_rod_name}"' + #end if + #end if + + #for $expression in $expressions: + -p '--expression "${expression.expression}"' #end for ##start standard gatk options @@ -101,16 +118,13 @@ #end if ##end standard gatk options - ##start analysis specific options - #if $analysis_param_type.analysis_param_type_selector == "advanced": - -p ' - #if $analysis_param_type.group.value: - #for $annotation_group in str( $analysis_param_type.group ).split( ',' ): - --group "${annotation_group}" - #end for - #end if + -p ' + #if $annotation_group.value: + #for $group in str( $annotation_group ).split( ',' ): + --group "${group}" + #end for + #end if ' - #end if </command><inputs><conditional name="reference_source"> @@ -119,12 +133,11 @@ <option value="history">History</option></param><when value="cached"> - <param name="input_variant" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> - <param name="input_variant_name" type="text" value="" label="Variant Name" help="Not needed for VCF inputs."/> + <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" /><param name="input_variant_bti" type="boolean" truevalue="-BTI variant" falsevalue="" label="Increase efficiency for small variant files." /><param name="input_bam" type="data" format="bam" label="BAM file" optional="True" help="Not needed for all annotations." ><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --></param><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="picard_indexes"> @@ -133,8 +146,7 @@ </param></when><when value="history"><!-- FIX ME!!!! --> - <param name="input_variant" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> - <param name="input_variant_name" type="text" value="" label="Variant Name" help="Not needed for VCF inputs."/> + <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" /><param name="input_variant_bti" type="boolean" truevalue="-BTI variant" falsevalue="" label="Increase efficiency for small variant files." /><param name="input_bam" type="data" format="bam" label="BAM file" optional="True" /><param name="ref_file" type="data" format="fasta" label="Using reference file" /> @@ -151,90 +163,83 @@ <when value="choose"><param name="annotations" type="select" multiple="True" display="checkboxes" label="Annotations to apply" ><!-- might we want to load the available annotations from an external configuration file, since additional ones can be added to local installs? --> - <option value="AlleleBalance" /> - <option value="BaseQualityRankSumTest" /> - <option value="DepthOfCoverage" /> - <option value="HomopolymerRun" /> - <option value="MappingQualityRankSumTest" /> - <option value="MappingQualityZero" /> - <option value="QualByDepth" /> - <option value="RMSMappingQuality" /> - <option value="SpanningDeletions" /> - <option value="HaplotypeScore" /> - <!-- annotations below were pulled from list option --> - <option value="ChromosomeCounts" /> - <option value="IndelType" /> - <option value="HardyWeinberg" /> - <option value="BaseCounts" /> - <option value="LowMQ" /> - <option value="FisherStrand" /> - <option value="GenomicAnnotation" /> - <option value="GCContent" /> - <option value="ReadPosRankSumTest" /> - <option value="SBByDepth" /> - <option value="ReadDepthAndAllelicFractionBySample" /> - <option value="AlleleBalanceBySample" /> - <option value="DepthPerAlleleBySample" /> - <option value="MappingQualityZeroBySample" /> - <option value="NBaseCount" /> - <option value="GLstats" /> - <option value="SampleList" /> - <option value="MappingQualityZeroFraction" /> + <option value="ChromosomeCounts"/> + <option value="IndelType"/> + <option value="SpanningDeletions"/> + <option value="HardyWeinberg"/> + <option value="NBaseCount"/> + <option value="MappingQualityZero"/> + <option value="AlleleBalance"/> + <option value="BaseCounts"/> + <option value="LowMQ"/> + <option value="InbreedingCoeff"/> + <option value="RMSMappingQuality"/> + <option value="HaplotypeScore"/> + <option value="TechnologyComposition"/> + <option value="SampleList"/> + <option value="FisherStrand"/> + <option value="HomopolymerRun"/> + <option value="DepthOfCoverage"/> + <option value="SnpEff"/> + <option value="MappingQualityZeroFraction"/> + <option value="GCContent"/> + <option value="MappingQualityRankSumTest"/> + <option value="ReadPosRankSumTest"/> + <option value="BaseQualityRankSumTest"/> + <option value="QualByDepth"/> + <option value="SBByDepth"/> + <option value="ReadDepthAndAllelicFractionBySample"/> + <option value="AlleleBalanceBySample"/> + <option value="DepthPerAlleleBySample"/> + <option value="MappingQualityZeroBySample"/></param></when></conditional> - <!-- <repeat name="additional_annotations" title="Additional annotation"> - <conditional name="additional_annotation_type"> - <param name="additional_annotation_type_selector" type="select" label="Choose annotation type"> - <option value="SnpEff">snpEff</option> - </param> - <when value="SnpEff"> - <param name="snpEffFile" type="data" format="snpEff" label="snpEff output file" /> - </when> - </conditional> - </repeat> --> - <repeat name="rod_bind" title="Binding for reference-ordered data"> - <conditional name="rod_bind_type"> - <param name="rod_bind_type_selector" type="select" label="Binding Type"> - <option value="variant">Variants</option> - <option value="dbsnp">dbSNP</option> - <option value="snps">SNPs</option> - <option value="comp" selected="True">Comps</option> - <option value="indels">INDELs</option> - <option value="mask">Mask</option> - <option value="custom">Custom</option> - </param> - <when value="variant"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="Variant ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="comp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp" label="ROD file" /> - <param name="custom_rod_name" type="text" value="Unnamed" label="ROD Name"/> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="dbsnp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="snps"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="indels"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="mask"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="custom"> - <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - </conditional> + + <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data"> + <param name="comp_input_rod" type="data" format="vcf" label="ROD file" /> + <param name="comp_rod_name" type="text" value="Unnamed" label="ROD Name"/> + <param name="comp_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + </repeat> + + <conditional name="dbsnp_rod_bind_type"> + <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file"> + <option value="set_dbsnp" selected="True">Set dbSNP</option> + <option value="exclude_dbsnp">Don't set dbSNP</option> + </param> + <when value="exclude_dbsnp"> + <!-- Do nothing here --> + </when> + <when value="set_dbsnp"> + <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" /> + <param name="dbsnp_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/> + </when> + </conditional> + + <repeat name="resource_rod_bind" title="Binding for reference-ordered resource data"> + <param name="resource_input_rod" type="data" format="vcf" label="ROD file" /> + <param name="resource_rod_name" type="text" value="Unnamed" label="ROD Name"/> + <param name="resource_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + </repeat> + + <conditional name="snpEff_rod_bind_type"> + <param name="snpEff_rod_bind_type_selector" type="select" label="Provide a snpEff reference-ordered data file"> + <option value="set_snpEff">Set snpEff</option> + <option value="exclude_snpEff" selected="True">Don't set snpEff</option> + </param> + <when value="exclude_snpEff"> + <!-- Do nothing here --> + </when> + <when value="set_snpEff"> + <param name="snpEff_input_rod" type="data" format="vcf" label="ROD file" /> + <param name="snpEff_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + <param name="snpEff_rod_name" type="hidden" value="snpEff" label="ROD Name"/> + </when> + </conditional> + + <repeat name="expressions" title="Expression"> + <param name="expression" type="text" value="" label="Expression"/></repeat><conditional name="gatk_param_type"> @@ -328,23 +333,14 @@ </when></conditional> - <conditional name="analysis_param_type"> - <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options"> - <option value="basic" selected="True">Basic</option> - <option value="advanced">Advanced</option> - </param> - <when value="basic"> - <!-- Do nothing here --> - </when> - <when value="advanced"> - <param name="group" type="select" multiple="True" display="checkboxes" label="annotation interfaces/groups to apply to variant calls"> - <option value="Standard">Standard</option> - <option value="Experimental">Experimental</option> - <option value="WorkInProgress">WorkInProgress</option> - </param> - - </when> - </conditional> + <param name="annotation_group" type="select" multiple="True" display="checkboxes" label="annotation interfaces/groups to apply to variant calls"> + <option value="RodRequiringAnnotation">RodRequiringAnnotation</option> + <option value="Standard">Standard</option> + <option value="Experimental">Experimental</option> + <option value="WorkInProgress">WorkInProgress</option> + <option value="RankSumTest">RankSumTest</option> + </param> + </inputs><outputs><data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" /> @@ -356,17 +352,20 @@ <param name="ref_file" value="phiX.fasta" ftype="fasta" /><param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /><param name="input_variant" value="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf" ftype="vcf" /> + <param name="input_variant_bti" /><param name="annotations_type_selector" value="choose" /><param name="annotations" value="AlleleBalance,BaseQualityRankSumTest,DepthOfCoverage,HomopolymerRun,MappingQualityRankSumTest,MappingQualityZero,QualByDepth,RMSMappingQuality,SpanningDeletions,HaplotypeScore" /> - <!-- <param name="additional_annotation_type_selector" value="snpEff" /> - <param name="snpEffFile" value="single_comment.dat" ftype="snpEff" /> --> - <param name="rod_bind_type_selector" value="dbsnp" /> - <param name="rodToIntervalTrackName" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> + <param name="dbsnp_rodToIntervalTrackName" /> + <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="snpEff_rod_bind_type_selector" value="exclude_snpEff" /><param name="gatk_param_type_selector" value="basic" /> - <param name="analysis_param_type_selector" value="basic" /> - <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="2" /> + <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" /><output name="output_log" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.log.contains" compare="contains" /> + <param name="comp_rod_bind" value="0" /> + <param name="resource_rod_bind" value="0" /> + <param name="expressions" value="0" /> + <!-- <param name="annotation_group" /> --></test></tests><help> --- a/tools/gatk/variant_apply_recalibration.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variant_apply_recalibration.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,9 +1,14 @@ -<tool id="gatk_variant_apply_recalibration" name="Apply Variant Recalibration" version="0.0.1"> +<tool id="gatk_variant_apply_recalibration" name="Apply Variant Recalibration" version="0.0.2"><description></description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" - -d "-B:input,%(file_type)s" "${reference_source.input_variants}" "${reference_source.input_variants.ext}" "input_variants" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}" + #end for -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "ApplyRecalibration" @@ -62,21 +67,18 @@ ##end standard gatk options ##start analysis specific options + -p ' + --mode "${mode}" - #if $analysis_param_type.analysis_param_type_selector == "advanced": - -p ' - --mode "${analysis_param_type.mode}" - - #for $ignore_filter in $analysis_param_type.ignore_filters: - #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector ) - #if $ignore_filter_name == "custom": - #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name ) - #end if - --ignore_filter "${ignore_filter_name}" - #end for - ' - #end if - -p '--ts_filter_level "${ts_filter_level}"' + #for $ignore_filter in $ignore_filters: + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector ) + #if $ignore_filter_name == "custom": + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name ) + #end if + --ignore_filter "${ignore_filter_name}" + #end for + --ts_filter_level "${ts_filter_level}" + ' </command><inputs><conditional name="reference_source"> @@ -85,17 +87,21 @@ <option value="history">History</option></param><when value="cached"> - <param name="input_variants" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> + <repeat name="variants" title="Variant" min="1"> + <param name="input_variants" type="data" format="vcf" label="Variant file to annotate" /> + </repeat><param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" /><param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" /><param name="ref_file" type="select" label="Using reference genome"> - <options from_data_table="picard_indexes"> - <filter type="data_meta" key="dbkey" ref="input_variants" column="dbkey"/> - </options> + <!-- <options from_data_table="picard_indexes"> + <filter type="data_meta" key="dbkey" ref="variants[0].input_variants" column="dbkey"/> + </options> --></param></when><when value="history"><!-- FIX ME!!!! --> - <param name="input_variants" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> + <repeat name="variants" title="Variant" min="1"> + <param name="input_variants" type="data" format="vcf" label="Variant file to annotate" /> + </repeat><param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" /><param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" /><param name="ref_file" type="data" format="fasta" label="Using reference file" /> @@ -193,15 +199,6 @@ </when></conditional> - <conditional name="analysis_param_type"> - <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options"> - <option value="basic" selected="True">Basic</option> - <option value="advanced">Advanced</option> - </param> - <when value="basic"> - <!-- Do nothing here --> - </when> - <when value="advanced"><param name="mode" type="select" label="Recalibration mode"><option value="SNP" selected="True">SNP</option><option value="INDEL">INDEL</option> @@ -219,8 +216,6 @@ </when></conditional></repeat> - </when> - </conditional><param name="ts_filter_level" type="float" label="truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots" value="99.0"/></inputs><outputs> --- a/tools/gatk/variant_combine.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variant_combine.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,12 +1,15 @@ -<tool id="gatk_variant_combine" name="Combine Variants" version="0.0.1"> +<tool id="gatk_variant_combine" name="Combine Variants" version="0.0.2"><description></description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" #set $priority_order = [] #for $input_variant in $reference_source.input_variants: - -d "-B:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}" + -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}" #set $input_variant_name = str( $input_variant.input_variant_name ) #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator #silent $priority_order.append( $input_variant_name ) @@ -21,6 +24,7 @@ #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if + --genotypemergeoption "${genotype_merge_option}" --rod_priority_list "${ ','.join( $priority_order ) }" ' @@ -72,7 +76,6 @@ ##start analysis specific options #if $analysis_param_type.analysis_param_type_selector == "advanced": -p ' - --genotypemergeoption "${analysis_param_type.genotype_merge_option}" --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}" ${analysis_param_type.print_complex_merges} ${analysis_param_type.filtered_are_uncalled} @@ -95,7 +98,7 @@ <option value="history">History</option></param><when value="cached"> - <repeat min="1" name="input_variants" title="Variants to Merge"> + <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here."><param name="input_variant" type="data" format="vcf" label="Input variant file" /><param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique"><validator type="length" min="1" message="You must provide a unique name for this set of variants" /> @@ -108,7 +111,7 @@ </param></when><when value="history"><!-- FIX ME!!!! --> - <repeat min="1" name="input_variants" title="Variants to Merge"> + <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here."><param name="input_variant" type="data" format="vcf" label="Input variant file" /><param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique"><validator type="length" min="1" message="You must provide a unique name for this set of variants" /> @@ -117,7 +120,14 @@ <param name="ref_file" type="data" format="fasta" label="Using reference file" /></when></conditional> - + + <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" > + <option value="UNIQUIFY" /> + <option value="PRIORITIZE" selected="true"/> + <option value="UNSORTED" /> + <option value="REQUIRE_UNIQUE" /> + </param> + <conditional name="gatk_param_type"><param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options"><option value="basic" selected="True">Basic</option> @@ -219,16 +229,10 @@ <!-- Do nothing here --></when><when value="advanced"> - <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" > - <option value="UNIQUIFY" /> - <option value="PRIORITIZE" selected="true"/> - <option value="UNSORTED" /> - <option value="REQUIRE_UNIQUE" /> - </param><param name="filtered_records_merge_type" type="select" label="How should we deal with records seen at the same site in the VCF, but with different FILTER fields? " ><option value="KEEP_IF_ANY_UNFILTERED" selected="true"/><option value="KEEP_IF_ALL_UNFILTERED" /> - </param> + </param><param name="print_complex_merges" checked="false" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" /><param name="filtered_are_uncalled" checked="false" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF" /> @@ -253,9 +257,10 @@ <param name="ref_file" value="phiX.fasta" ftype="fasta" /><param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /><param name="input_variant_name" value="from_variant_annotator" /> + <param name="genotype_merge_option" value="PRIORITIZE" /><param name="gatk_param_type_selector" value="basic" /><param name="analysis_param_type_selector" value="basic" /> - <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="2" /> + <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="4" /><output name="output_log" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.log.contains" compare="contains" /></test></tests> --- a/tools/gatk/variant_eval.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variant_eval.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,9 +1,14 @@ -<tool id="gatk_variant_eval" name="Eval Variants" version="0.0.1"> +<tool id="gatk_variant_eval" name="Eval Variants" version="0.0.2"><description></description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" - -d "-B:eval,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--eval:input_${var_count},%(file_type)s" "${variant.input_variant}" "${variant.input_variant.ext}" "input_variants_${var_count}" + #end for -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "VariantEval" @@ -16,23 +21,25 @@ #end if ' - #set $rod_binding_names = dict() - #for $rod_binding in $rod_bind: - #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': - #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name - #else - #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #for $rod_binding in $comp_rod_bind: + -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" + #if str( $rod_binding.comp_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${rod_binding.comp_rod_name}"' #end if - #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): - -p '--rodToIntervalTrackName "${rod_bind_name}"' + #if str( $rod_binding.comp_known_names ): + -p '--known_names "${rod_binding.comp_rod_name}"' #end if - #if str( $rod_binding.rod_bind_type.known_names ): - -p '--known_names "${rod_bind_name}"' + #end for + + #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #if str( $dbsnp_rod_bind_type.dbsnp_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${dbsnp_rod_bind_type.dbsnp_rod_name}"' + #if str( $dbsnp_rod_bind_type.dbsnp_known_names ): + -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"' + #end if #end if - - #end for + #end if ##start standard gatk options #if $gatk_param_type.gatk_param_type_selector == "advanced": @@ -138,51 +145,46 @@ <option value="history">History</option></param><when value="cached"> - <param name="input_variant" type="data" format="vcf" label="Input variant file" /> + <repeat name="variants" title="Variant" min="1"> + <param name="input_variant" type="data" format="vcf" label="Input variant file" /> + </repeat><param name="ref_file" type="select" label="Using reference genome"> - <options from_data_table="picard_indexes"> + <!--<options from_data_table="picard_indexes"><filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> - </options> + </options>--></param></when><when value="history"><!-- FIX ME!!!! --> - <param name="input_variant" type="data" format="vcf" label="Input variant file" /> + <repeat name="variants" title="Variant" min="1"> + <param name="input_variant" type="data" format="vcf" label="Input variant file" /> + </repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when></conditional> - <repeat name="rod_bind" title="Binding for reference-ordered data"> - <conditional name="rod_bind_type"> - <param name="rod_bind_type_selector" type="select" label="Binding Type"> - <option value="dbsnp" selected="True">dbSNP</option> - <option value="snps">SNPs</option> - <option value="indels">INDELs</option> - <option value="custom">Custom</option> - </param> - <when value="dbsnp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - <param name="known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Treat ROD as known when splitting eval rods into known and novel subsets" /> - </when> - <when value="snps"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - <param name="known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Treat ROD as known when splitting eval rods into known and novel subsets" /> - </when> - <when value="indels"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - <param name="known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Treat ROD as known when splitting eval rods into known and novel subsets" /> - </when> - <when value="custom"> - <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - <param name="known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Treat ROD as known when splitting eval rods into known and novel subsets" /> - </when> - </conditional> + <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data"> + <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" /> + <param name="comp_rod_name" type="text" value="Unnamed" label="Comparison ROD Name"/> + <param name="comp_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use Comparison ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + <param name="comp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use Comparison ROD as known_names" /></repeat> + <conditional name="dbsnp_rod_bind_type"> + <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file"> + <option value="set_dbsnp" selected="True">Set dbSNP</option> + <option value="exclude_dbsnp">Don't set dbSNP</option> + </param> + <when value="exclude_dbsnp"> + <!-- Do nothing here --> + </when> + <when value="set_dbsnp"> + <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" /> + <param name="dbsnp_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use dbSNP ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="dbSNP ROD Name"/> + <param name="dbsnp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use dbSNP ROD as known_names" /> + </when> + </conditional> + <conditional name="gatk_param_type"><param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options"><option value="basic" selected="True">Basic</option> @@ -293,7 +295,7 @@ <param name="sample" value="" type="text" label="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context"/></repeat> - <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the eval track(s)" > + <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the w track(s)" ><!-- do these need individual options also? gatk wiki has little info --><option value="AlleleFrequency" /><option value="AlleleCount" /> @@ -356,10 +358,11 @@ <param name="reference_source_selector" value="history" /><param name="ref_file" value="phiX.fasta" ftype="fasta" /><param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> - <param name="rod_bind_type_selector" value="dbsnp" /> - <param name="rodToIntervalTrackName" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> - <param name="known_names" value="True"/> + <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> + <param name="dbsnp_rodToIntervalTrackName" /> + <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="dbsnp_known_names" value="True"/> + <param name="comp_rod_bind" value="0" /><param name="gatk_param_type_selector" value="basic" /><param name="analysis_param_type_selector" value="basic" /><output name="output_table" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.tabular" /> --- a/tools/gatk/variant_filtration.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variant_filtration.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,9 +1,12 @@ -<tool id="gatk_variant_filtration" name="Variant Filtration" version="0.0.1"> +<tool id="gatk_variant_filtration" name="Variant Filtration" version="0.0.2"><description>on VCF files</description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" - -d "-B:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "VariantFiltration" @@ -20,22 +23,17 @@ #end for ' - #set $rod_binding_names = dict() - #for $rod_binding in $rod_bind: - #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': - #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name - #else - #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + + #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask': + -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}" + -p ' + --maskExtension "${mask_rod_bind_type.mask_extension}" + --maskName "${mask_rod_bind_type.mask_rod_name}" + ' + #if str( $mask_rod_bind_type.mask_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${mask_rod_bind_type.mask_rod_name}"' #end if - #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #if $rod_binding.rod_bind_type.rod_bind_type_selector == "mask": - --maskName "${rod_binding.rod_bind_type.mask_rod_name}" - #end if - #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): - -p '--rodToIntervalTrackName "${rod_bind_name}"' - #end if - #end for + #end if ##start standard gatk options #if $gatk_param_type.gatk_param_type_selector == "advanced": @@ -97,7 +95,7 @@ <option value="history">History</option></param><when value="cached"> - <param name="input_variant" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> + <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" /><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="picard_indexes"><filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> @@ -105,7 +103,7 @@ </param></when><when value="history"><!-- FIX ME!!!! --> - <param name="input_variant" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> + <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" /><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when></conditional> @@ -125,39 +123,24 @@ <param name="is_genotype_filter" type="boolean" truevalue="genotypeFilter" falsevalue="filter" label="Use filter at the individual sample level" /></repeat> - <repeat name="rod_bind" title="Binding for reference-ordered data"> - <conditional name="rod_bind_type"> - <param name="rod_bind_type_selector" type="select" label="Binding Type"> - <option value="dbsnp">dbSNP</option> - <option value="snps">SNPs</option> - <option value="indels">INDELs</option> - <option value="mask" selected="True">Mask</option> - <option value="custom">Custom</option> - </param> - <when value="dbsnp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="snps"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="indels"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="custom"> - <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="mask"> - <param name="mask_rod_name" type="text" value="Unknown" label="Mask Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - </conditional> - </repeat> + + + <conditional name="mask_rod_bind_type"> + <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file"> + <option value="set_mask" selected="True">Set maskP</option> + <option value="exclude_mask">Don't set mask</option> + </param> + <when value="exclude_mask"> + <!-- Do nothing here --> + </when> + <when value="set_mask"> + <param name="input_mask_rod" type="data" format="bed,gatk_dbsnp,vcf" label="Mask ROD file" /> + <param name="mask_rod_name" type="text" value="Mask" label="Mask Name"/> + <param name="mask_extension" type="integer" value="0" label="Mask Extension"/> + <param name="mask_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use Mask ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + </when> + </conditional> + <conditional name="gatk_param_type"><param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options"> @@ -279,9 +262,11 @@ <param name="filter_expression" value="MQ < 37.74 || MQ0 > 50" /><param name="filter_name" value="Galaxy_filter" /><param name="is_genotype_filter" /> - <param name="rod_bind_type_selector" value="dbsnp" /> - <param name="rodToIntervalTrackName" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="mask_rod_bind_type_selector" value="set_mask" /> + <param name="mask_rodToIntervalTrackName" /> + <param name="input_mask_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="mask_rod_name" value="." /> + <param name="mask_extension" value="0" /><param name="gatk_param_type_selector" value="basic" /><param name="cluster_snp_type_selector" value="do_not_cluster_snp" /><param name="missing_values_in_expressions_should_evaluate_as_failing" /> --- a/tools/gatk/variant_recalibrator.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variant_recalibrator.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,9 +1,14 @@ -<tool id="gatk_variant_recalibrator" name="Variant Recalibrator" version="0.0.1"> +<tool id="gatk_variant_recalibrator" name="Variant Recalibrator" version="0.0.2"><description></description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" - -d "-B:input,%(file_type)s" "${reference_source.input_variants}" "${reference_source.input_variants.ext}" "input_variants" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}" + #end for -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "VariantRecalibrator" @@ -29,9 +34,9 @@ #end if #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 #if $rod_binding.rod_bind_type.rod_training_type.rod_training_type_selector == "not_training_truth_known": - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + -d "--resource:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" #else: - -d "-B:${rod_bind_name},%(file_type)s,known=${rod_binding.rod_bind_type.rod_training_type.known},training=${rod_binding.rod_bind_type.rod_training_type.training},truth=${rod_binding.rod_bind_type.rod_training_type.truth},bad=${rod_binding.rod_bind_type.rod_training_type.bad},prior=${rod_binding.rod_bind_type.rod_training_type.prior}" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + -d "--resource:${rod_bind_name},%(file_type)s,known=${rod_binding.rod_bind_type.rod_training_type.known},training=${rod_binding.rod_bind_type.rod_training_type.training},truth=${rod_binding.rod_bind_type.rod_training_type.truth},bad=${rod_binding.rod_bind_type.rod_training_type.bad},prior=${rod_binding.rod_bind_type.rod_training_type.prior}" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" #end if #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): -p '--rodToIntervalTrackName "${rod_bind_name}"' @@ -89,11 +94,11 @@ --use_annotation "${annotation}" #end for #end if + --mode "${mode}" ' #if $analysis_param_type.analysis_param_type_selector == "advanced": -p ' - --mode "${analysis_param_type.mode}" --maxGaussians "${analysis_param_type.max_gaussians}" --maxIterations "${analysis_param_type.max_iterations}" --numKMeans "${analysis_param_type.num_k_means}" @@ -102,8 +107,11 @@ --shrinkage "${analysis_param_type.shrinkage}" --dirichlet "${analysis_param_type.dirichlet}" --priorCounts "${analysis_param_type.prior_counts}" - --percentBadVariants "${analysis_param_type.percent_bad_variants}" - --minNumBadVariants "${analysis_param_type.min_num_bad_variants}" + #if str( $analysis_param_type.bad_variant_selector.bad_variant_selector_type ) == 'percent': + --percentBadVariants "${analysis_param_type.bad_variant_selector.percent_bad_variants}" + #else: + --minNumBadVariants "${analysis_param_type.bad_variant_selector.min_num_bad_variants}" + #end if --target_titv "${analysis_param_type.target_titv}" #for $tranche in [ $tranche.strip() for $tranche in str( $analysis_param_type.ts_tranche ).split( ',' ) if $tranche.strip() ] --TStranche "${tranche}" @@ -131,15 +139,19 @@ <option value="history">History</option></param><when value="cached"> - <param name="input_variants" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> + <repeat name="variants" title="Variant" min="1"> + <param name="input_variants" type="data" format="vcf" label="Variant file to recalibrate" /> + </repeat><param name="ref_file" type="select" label="Using reference genome"> - <options from_data_table="picard_indexes"> - <filter type="data_meta" key="dbkey" ref="input_variants" column="dbkey"/> - </options> + <!-- <options from_data_table="picard_indexes"> + <filter type="data_meta" key="dbkey" ref="variants[0].input_variants" column="dbkey"/> + </options> --></param></when><when value="history"><!-- FIX ME!!!! --> - <param name="input_variants" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file to annotate" /> + <repeat name="variants" title="Variant" min="1"> + <param name="input_variants" type="data" format="vcf" label="Variant file to recalibrate" /> + </repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when></conditional> @@ -323,37 +335,42 @@ <param name="annotations" type="select" multiple="True" display="checkboxes" label="annotations which should used for calculations"><!-- might we want to load the available annotations from an external configuration file, since additional ones can be added to local installs? --> - <option value="AlleleBalance" /> - <option value="BaseQualityRankSumTest" /> - <option value="DepthOfCoverage" /> - <option value="HomopolymerRun" /> - <option value="MappingQualityRankSumTest" /> - <option value="MappingQualityZero" /> - <option value="QualByDepth" /> - <option value="RMSMappingQuality" /> - <option value="SpanningDeletions" /> - <option value="HaplotypeScore" /> - <!-- annotations below were pulled from list option --> - <option value="ChromosomeCounts" /> - <option value="IndelType" /> - <option value="HardyWeinberg" /> - <option value="BaseCounts" /> - <option value="LowMQ" /> - <option value="FisherStrand" /> - <option value="GenomicAnnotation" /> - <option value="GCContent" /> - <option value="ReadPosRankSumTest" /> - <option value="SBByDepth" /> - <option value="ReadDepthAndAllelicFractionBySample" /> - <option value="AlleleBalanceBySample" /> - <option value="DepthPerAlleleBySample" /> - <option value="MappingQualityZeroBySample" /> - <option value="NBaseCount" /> - <option value="GLstats" /> - <option value="SampleList" /> - <option value="MappingQualityZeroFraction" /> + <option value="ChromosomeCounts"/> + <option value="IndelType"/> + <option value="SpanningDeletions"/> + <option value="HardyWeinberg"/> + <option value="NBaseCount"/> + <option value="MappingQualityZero"/> + <option value="AlleleBalance"/> + <option value="BaseCounts"/> + <option value="LowMQ"/> + <option value="InbreedingCoeff"/> + <option value="RMSMappingQuality"/> + <option value="HaplotypeScore"/> + <option value="TechnologyComposition"/> + <option value="SampleList"/> + <option value="FisherStrand"/> + <option value="HomopolymerRun"/> + <option value="DepthOfCoverage"/> + <option value="SnpEff"/> + <option value="MappingQualityZeroFraction"/> + <option value="GCContent"/> + <option value="MappingQualityRankSumTest"/> + <option value="ReadPosRankSumTest"/> + <option value="BaseQualityRankSumTest"/> + <option value="QualByDepth"/> + <option value="SBByDepth"/> + <option value="ReadDepthAndAllelicFractionBySample"/> + <option value="AlleleBalanceBySample"/> + <option value="DepthPerAlleleBySample"/> + <option value="MappingQualityZeroBySample"/></param> + <param name="mode" type="select" label="Recalibration mode"> + <option value="SNP" selected="True">SNP</option> + <option value="INDEL">INDEL</option> + <option value="BOTH">BOTH</option> + </param><conditional name="gatk_param_type"><param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options"> @@ -455,11 +472,6 @@ <!-- Do nothing here --></when><when value="advanced"> - <param name="mode" type="select" label="Recalibration mode"> - <option value="SNP" selected="True">SNP</option> - <option value="INDEL">INDEL</option> - <option value="BOTH">BOTH</option> - </param><param name="max_gaussians" type="integer" label="maximum number of Gaussians to try during variational Bayes Algorithm" value="10"/><param name="max_iterations" type="integer" label="maximum number of maximum number of VBEM iterations to be performed in variational Bayes Algorithm" value="100"/><param name="num_k_means" type="integer" label="number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model" value="30"/> @@ -468,8 +480,18 @@ <param name="shrinkage" type="float" label="shrinkage parameter in variational Bayes algorithm" value="1.0"/><param name="dirichlet" type="float" label="dirichlet parameter in variational Bayes algorithm" value="0.001"/><param name="prior_counts" type="float" label="number of prior counts to use in variational Bayes algorithm" value="20.0"/> - <param name="percent_bad_variants" type="float" label="percentage of the worst scoring variants to use when building the Gaussian mixture model of bad variants. 0.07 means bottom 7 percent." value="0.03"/> - <param name="min_num_bad_variants" type="integer" label="minimum amount of worst scoring variants to use when building the Gaussian mixture model of bad variants. Will override -percentBad arugment if necessary" value="2000"/> + <conditional name="bad_variant_selector"> + <param name="bad_variant_selector_type" type="select" label="Basic or Advanced Analysis options"> + <option value="percent" selected="True">Percent</option> + <option value="min_num">Number</option> + </param> + <when value="percent"> + <param name="percent_bad_variants" type="float" label="percentage of the worst scoring variants to use when building the Gaussian mixture model of bad variants. 0.07 means bottom 7 percent." value="0.03"/> + </when> + <when value="min_num"> + <param name="min_num_bad_variants" type="integer" label="minimum amount of worst scoring variants to use when building the Gaussian mixture model of bad variants. Will override -percentBad arugment if necessary" value="2000"/> + </when> + </conditional><param name="target_titv" type="float" label="expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!" value="2.15"/><param name="ts_tranche" type="text" label="levels of novel false discovery rate (FDR, implied by ti/tv) at which to slice the data. (in percent, that is 1.0 for 1 percent)" value="100.0, 99.9, 99.0, 90.0"/><repeat name="ignore_filters" title="Ignore Filter"> --- a/tools/gatk/variants_validate.xml Thu Oct 20 10:03:28 2011 -0400 +++ b/tools/gatk/variants_validate.xml Thu Oct 20 10:24:19 2011 -0400 @@ -1,9 +1,12 @@ -<tool id="gatk_validate_variants" name="Validate Variants" version="0.0.1"> +<tool id="gatk_validate_variants" name="Validate Variants" version="0.0.2"><description></description> + <requirements> + <requirement type="package" version="1.2">gatk</requirement> + </requirements><command interpreter="python">gatk_wrapper.py - --max_jvm_heap_fraction "2" + --max_jvm_heap_fraction "1" --stdout "${output_log}" - -d "-B:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" -p 'java -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar" -T "ValidateVariants" @@ -17,19 +20,12 @@ ${do_not_validate_filtered_records} ' - #set $rod_binding_names = dict() - #for $rod_binding in $rod_bind: - #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': - #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name - #else - #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #if str( $dbsnp_rod_bind_type.dbsnp_rodToIntervalTrackName ): + -p '--rodToIntervalTrackName "${dbsnp_rod_bind_type.dbsnp_rod_name}"' #end if - #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ): - -p '--rodToIntervalTrackName "${rod_bind_name}"' - #end if - #end for + #end if ##start standard gatk options #if $gatk_param_type.gatk_param_type_selector == "advanced": @@ -96,38 +92,21 @@ </when></conditional> - <repeat name="rod_bind" title="Binding for reference-ordered data"> - <conditional name="rod_bind_type"> - <param name="rod_bind_type_selector" type="select" label="Binding Type"> - <!-- <option value="variant" selected="True">Variant</option> --> - <option value="dbsnp" selected="True">dbSNP</option> - <option value="snps">SNPs</option> - <!-- <option value="indels">INDELs</option> - <option value="custom">Custom</option> --> - </param> - <when value="variant"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /><!-- this should be restricted to genome type, or genome type should be limited as a result of this selection --> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="dbsnp"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="snps"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="indels"> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - <when value="custom"> - <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" /> - <param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> - </when> - </conditional> - </repeat> + <conditional name="dbsnp_rod_bind_type"> + <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file"> + <option value="set_dbsnp" selected="True">Set dbSNP</option> + <option value="exclude_dbsnp">Don't set dbSNP</option> + </param> + <when value="exclude_dbsnp"> + <!-- Do nothing here --> + </when> + <when value="set_dbsnp"> + <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" /> + <param name="dbsnp_rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" /> + <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/> + </when> + </conditional> + <param name="warn_on_errors" type="boolean" checked="False" truevalue="-warnOnErrors" falsevalue="" label="instead of terminating the run at the first error, print warning messages for each error seen. "/><param name="do_not_validate_filtered_records" type="boolean" checked="False" truevalue="-doNotValidateFilteredRecords" falsevalue="" label="do not try to validate records that are FILTERed. "/> @@ -231,9 +210,9 @@ <param name="reference_source_selector" value="history" /><param name="ref_file" value="phiX.fasta" ftype="fasta" /><param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> - <param name="rod_bind_type_selector" value="dbsnp" /> - <param name="rodToIntervalTrackName" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> + <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> + <param name="dbsnp_rodToIntervalTrackName" /> + <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /><param name="warn_on_errors" value="True"/><param name="do_not_validate_filtered_records" /><param name="gatk_param_type_selector" value="basic" /> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.