4 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/ff8ac1441d90/ Changeset: ff8ac1441d90 User: nsoranzo Date: 2013-06-24 14:15:46 Summary: Use sam_fa_indexes tool data table instead of searching the dbkey in sam_fa_indices.loc Also remove some unused imports. Affected #: 5 files diff -r 4f7b7e4ca213498824d5fba7526676ddf976b823 -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 tools/samtools/sam_pileup.py --- a/tools/samtools/sam_pileup.py +++ b/tools/samtools/sam_pileup.py @@ -8,8 +8,7 @@ -o, --output1=o: Output pileup -R, --ref=R: Reference file type -n, --ownFile=n: User-supplied fasta reference file - -d, --dbkey=d: dbkey of user-supplied file - -x, --indexDir=x: Index directory + -g, --index=g: Path of the indexed reference genome -b, --bamIndex=b: BAM index file -s, --lastCol=s: Print the mapping quality as the last column -i, --indels=i: Only output lines containing indels @@ -31,24 +30,9 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): - seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR - seqPath = '' - for line in open( seqFile ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seqPath = fields[2].strip() - break - return seqPath - def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) - seqPath = check_seq_file( options.dbkey, options.indexDir ) # output version # of tool try: tmp = tempfile.NamedTemporaryFile().name @@ -77,7 +61,6 @@ tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) tmpf1_name = tmpf1.name tmpf1.close() - tmpf1fai_name = '%s.fai' % tmpf1_name #link bam and bam index to working directory (can't move because need to leave original) os.symlink( options.input1, tmpf0bam_name ) os.symlink( options.bamIndex, tmpf0bambai_name ) @@ -100,9 +83,9 @@ try: #index reference if necessary and prepare pileup command if options.ref == 'indexed': - if not os.path.exists( "%s.fai" % seqPath ): - raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey - cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) + if not os.path.exists( "%s.fai" % options.index ): + raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index + cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 ) elif options.ref == 'history': os.symlink( options.ownFile, tmpf1_name ) cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) diff -r 4f7b7e4ca213498824d5fba7526676ddf976b823 -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 tools/samtools/sam_pileup.xml --- a/tools/samtools/sam_pileup.xml +++ b/tools/samtools/sam_pileup.xml @@ -1,4 +1,4 @@ -<tool id="sam_pileup" name="Generate pileup" version="1.1.1"> +<tool id="sam_pileup" name="Generate pileup" version="1.1.2"><description>from BAM dataset</description><requirements><requirement type="package" version="0.1.16">samtools</requirement> @@ -11,10 +11,8 @@ #if $refOrHistory.reference == "history": --ownFile=$refOrHistory.ownFile #else: - --ownFile="None" + --index=${refOrHistory.index.fields.path} #end if - --dbkey=${input1.metadata.dbkey} - --indexDir=${GALAXY_DATA_INDEX_DIR} --bamIndex=${input1.metadata.bam_index} --lastCol=$lastCol --indels=$indels @@ -41,7 +39,13 @@ <when value="indexed"><param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for"><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="input1" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options></param></when><when value="history"> @@ -100,6 +104,7 @@ --><param name="reference" value="indexed" /><param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" /> + <param name="index" value="chr_m" /><param name="lastCol" value="no" /><param name="indels" value="no" /><param name="mapCap" value="60" /> diff -r 4f7b7e4ca213498824d5fba7526676ddf976b823 -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 tools/samtools/sam_to_bam.py --- a/tools/samtools/sam_to_bam.py +++ b/tools/samtools/sam_to_bam.py @@ -3,43 +3,24 @@ Converts SAM data to sorted BAM data. usage: sam_to_bam.py [options] --input1: SAM file to be converted - --dbkey: dbkey value + --index: path of the indexed reference genome --ref_file: Reference file if choosing from history --output1: output dataset in bam format - --index_dir: GALAXY_DATA_INDEX_DIR """ -import optparse, os, sys, subprocess, tempfile, shutil, gzip -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse -from galaxy import util +import optparse, os, sys, subprocess, tempfile, shutil def stop_err( msg ): sys.stderr.write( '%s\n' % msg ) sys.exit() -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) + parser.add_option( '', '--index', dest='index', help='The path of the indexed reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) ( options, args ) = parser.parse_args() # output version # of tool @@ -61,24 +42,17 @@ except: sys.stdout.write( 'Could not determine Samtools version\n' ) - cached_seqs_pointer_file = '%s/sam_fa_indices.loc' % options.index_dir - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) tmp_dir = tempfile.mkdtemp() if not options.ref_file or options.ref_file == 'None': # We're using locally cached reference sequences( e.g., /galaxy/data/equCab2/sam_index/equCab2.fa ). # The indexes for /galaxy/data/equCab2/sam_index/equCab2.fa will be contained in # a file named /galaxy/data/equCab2/sam_index/equCab2.fa.fai - fai_index_file_base = seq_path - fai_index_file_path = '%s.fai' % seq_path + fai_index_file_path = '%s.fai' % options.index if not os.path.exists( fai_index_file_path ): #clean up temp files if os.path.exists( tmp_dir ): shutil.rmtree( tmp_dir ) - stop_err( 'No sequences are available for build (%s), request them by reporting this error.' % options.dbkey ) + stop_err( 'Indexed genome %s not present, request it by reporting this error.' % options.index ) else: try: # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will: diff -r 4f7b7e4ca213498824d5fba7526676ddf976b823 -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 tools/samtools/sam_to_bam.xml --- a/tools/samtools/sam_to_bam.xml +++ b/tools/samtools/sam_to_bam.xml @@ -1,4 +1,4 @@ -<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.2"> +<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.3"><description>converts SAM format to BAM format</description><requirements><requirement type="package">samtools</requirement> @@ -7,13 +7,11 @@ sam_to_bam.py --input1=$source.input1 #if $source.index_source == "history": - --dbkey=${ref_file.metadata.dbkey} --ref_file=$source.ref_file #else - --dbkey=${input1.metadata.dbkey} + --index=${source.index.fields.path} #end if --output1=$output1 - --index_dir=${GALAXY_DATA_INDEX_DIR} </command><inputs><conditional name="source"> @@ -22,13 +20,19 @@ <option value="history">History</option></param><when value="cached"> - <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM File to Convert"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM file to convert"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="input1" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options></param></when><when value="history"> - <param name="input1" type="data" format="sam" label="Convert SAM file" /> + <param name="input1" type="data" format="sam" label="SAM file to convert" /><param name="ref_file" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" /></when></conditional> @@ -76,6 +80,7 @@ --><param name="index_source" value="cached" /><param name="input1" value="sam_to_bam_in1.sam" ftype="sam" dbkey="chrM" /> + <param name="index" value="chr_m" /><output name="output1" file="sam_to_bam_out2.bam" ftype="bam" /></test></tests> diff -r 4f7b7e4ca213498824d5fba7526676ddf976b823 -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 tools/samtools/samtools_mpileup.xml --- a/tools/samtools/samtools_mpileup.xml +++ b/tools/samtools/samtools_mpileup.xml @@ -1,4 +1,4 @@ -<tool id="samtools_mpileup" name="MPileup" version="0.0.1"> +<tool id="samtools_mpileup" name="MPileup" version="0.0.2"><description>SNP and indel caller</description><requirements><requirement type="package">samtools</requirement> @@ -59,22 +59,22 @@ </param><when value="cached"><repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> - </param> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + </param></repeat><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="sam_fa_indexes"> - <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...--> + <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--></options></param></when><when value="history"><!-- FIX ME!!!! --><repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/> - </param> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> + </param></repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> https://bitbucket.org/galaxy/galaxy-central/commits/6769c6ef2ae1/ Changeset: 6769c6ef2ae1 User: nsoranzo Date: 2013-06-24 14:17:42 Summary: Use sam_fa_indexes tool data table instead of searching the dbkey in sam_fa_indices.loc also in Cufflinks wrappers Affected #: 8 files diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cuffcompare_wrapper.py --- a/tools/ngs_rna/cuffcompare_wrapper.py +++ b/tools/ngs_rna/cuffcompare_wrapper.py @@ -8,20 +8,6 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -30,8 +16,7 @@ parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.') # Wrapper / Galaxy options. - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -60,21 +45,16 @@ # Set/link to sequence file. if options.use_seq_data: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cuffcompare_wrapper.xml --- a/tools/ngs_rna/cuffcompare_wrapper.xml +++ b/tools/ngs_rna/cuffcompare_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cuffcompare" name="Cuffcompare" version="0.0.5"> +<tool id="cuffcompare" name="Cuffcompare" version="0.0.6"><!-- Wrapper supports Cuffcompare versions v1.3.0 and newer --><description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description><requirements> @@ -17,14 +17,12 @@ ## Use sequence data? #if $seq_data.use_seq_data == "Yes": - -s + -s #if $seq_data.seq_source.index_source == "history": --ref_file=$seq_data.seq_source.ref_file #else: - --ref_file="None" + --index=${seq_data.seq_source.index.fields.path} #end if - --dbkey=${first_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Outputs. @@ -66,7 +64,14 @@ <option value="cached">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="first_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cuffdiff_wrapper.py --- a/tools/ngs_rna/cuffdiff_wrapper.py +++ b/tools/ngs_rna/cuffdiff_wrapper.py @@ -35,20 +35,6 @@ sys.stderr.write( "%s\n" % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -83,8 +69,7 @@ # Bias correction options. parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -123,21 +108,16 @@ # If doing bias correction, set/link to sequence file. if options.do_bias_correction: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cuffdiff_wrapper.xml --- a/tools/ngs_rna/cuffdiff_wrapper.xml +++ b/tools/ngs_rna/cuffdiff_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cuffdiff" name="Cuffdiff" version="0.0.5"> +<tool id="cuffdiff" name="Cuffdiff" version="0.0.6"><!-- Wrapper supports Cuffdiff versions 2.1.0-2.1.1 --><description>find significant changes in transcript expression, splicing, and promoter use</description><requirements> @@ -42,14 +42,12 @@ ## Bias correction? #if $bias_correction.do_bias_correction == "Yes": - -b + -b #if $bias_correction.seq_source.index_source == "history": --ref_file=$bias_correction.seq_source.ref_file #else: - --ref_file="None" + --index=${bias_correction.seq_source.index.fields.path} #end if - --dbkey=${gtf_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Inputs. @@ -131,7 +129,14 @@ <option value="cached">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="gtf_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cufflinks_wrapper.py --- a/tools/ngs_rna/cufflinks_wrapper.py +++ b/tools/ngs_rna/cufflinks_wrapper.py @@ -10,20 +10,6 @@ sys.stderr.write( "%s\n" % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -52,8 +38,7 @@ # Bias correction options. parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Global model. @@ -82,21 +67,16 @@ # If doing bias correction, set/link to sequence file. if options.do_bias_correction: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cufflinks_wrapper.xml --- a/tools/ngs_rna/cufflinks_wrapper.xml +++ b/tools/ngs_rna/cufflinks_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cufflinks" name="Cufflinks" version="0.0.5"> +<tool id="cufflinks" name="Cufflinks" version="0.0.6"><!-- Wrapper supports Cufflinks versions v1.3.0 and newer --><description>transcript assembly and FPKM (RPKM) estimates for RNA-Seq data</description><requirements> @@ -28,14 +28,12 @@ ## Bias correction? #if $bias_correction.do_bias_correction == "Yes": - -b + -b #if $bias_correction.seq_source.index_source == "history": --ref_file=$bias_correction.seq_source.ref_file #else: - --ref_file="None" + --index=${bias_correction.seq_source.index.fields.path} #end if - --dbkey=${input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Multi-read correct? @@ -66,15 +64,15 @@ <when value="No"></when><when value="Use reference annotation"><param format="gff3,gtf" name="reference_annotation_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/> - </when> - <when value="Use reference annotation guide"> + </when> + <when value="Use reference annotation guide"><param format="gff3,gtf" name="reference_annotation_guide_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/> - </when> + </when></conditional><conditional name="bias_correction"><param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates."><option value="No" selected="true">No</option> - <option value="Yes">Yes</option> + <option value="Yes">Yes</option></param><when value="Yes"><conditional name="seq_source"> @@ -82,7 +80,14 @@ <option value="cached" selected="true">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cuffmerge_wrapper.py --- a/tools/ngs_rna/cuffmerge_wrapper.py +++ b/tools/ngs_rna/cuffmerge_wrapper.py @@ -8,20 +8,6 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -31,8 +17,7 @@ # Wrapper / Galaxy options. - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -61,21 +46,16 @@ # Set/link to sequence file. if options.use_seq_data: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r ff8ac1441d909237bcac94f18aa7db3df61e6be9 -r 6769c6ef2ae18b120b208c63474f8877865128c6 tools/ngs_rna/cuffmerge_wrapper.xml --- a/tools/ngs_rna/cuffmerge_wrapper.xml +++ b/tools/ngs_rna/cuffmerge_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cuffmerge" name="Cuffmerge" version="0.0.5"> +<tool id="cuffmerge" name="Cuffmerge" version="0.0.6"><!-- Wrapper supports Cuffmerge versions 1.3 and newer --><description>merge together several Cufflinks assemblies</description><requirements> @@ -16,14 +16,12 @@ ## Use sequence data? #if $seq_data.use_seq_data == "Yes": - -s + -s #if $seq_data.seq_source.index_source == "history": --ref_file=$seq_data.seq_source.ref_file #else: - --ref_file="None" + --index=${seq_data.seq_source.index.fields.path} #end if - --dbkey=${first_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Outputs. @@ -64,7 +62,14 @@ <option value="cached">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="first_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> https://bitbucket.org/galaxy/galaxy-central/commits/af20b15f7eda/ Changeset: af20b15f7eda User: nsoranzo Date: 2013-04-18 19:25:55 Summary: Add new loc file for SAMtools indexes to support genome variants Affected #: 2 files diff -r 6769c6ef2ae18b120b208c63474f8877865128c6 -r af20b15f7eda2332313933055c87056ef9af252a tool-data/sam_fa_new_indices.loc.sample --- /dev/null +++ b/tool-data/sam_fa_new_indices.loc.sample @@ -0,0 +1,30 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_new_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_new_indices.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id><dbkey><display_name><file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the sam_fa_new_indices.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your sam_fa_new_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa + diff -r 6769c6ef2ae18b120b208c63474f8877865128c6 -r af20b15f7eda2332313933055c87056ef9af252a tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample +++ b/tool_data_table_conf.xml.sample @@ -55,10 +55,26 @@ <columns>value, name, path</columns><file path="tool-data/perm_color_index.loc" /></table> - <!-- Location of SAMTools indexes and other files --> - <table name="sam_fa_indexes" comment_char="#"> + <!-- Location of SAMTools indexes and other files (new version) + Warning: until Galaxy release_2013.06.03 the format of this + table was: + <columns>line_type, value, path</columns><file path="tool-data/sam_fa_indices.loc" /> + + If you are updating your tool_data_table_conf.xml to the current + version you should first migrate your + tool-data/sam_fa_indices.loc file to a new + tool-data/sam_fa_new_indices.loc file with the format specified + below, which is explained in the relative sample file + tool-data/sam_fa_new_indices.loc.sample . + By using the new format it is possible to let the user choose + among multiple indexed genome variants having the same dbkey, + e.g. hg19canon vs. hg19full variants for hg19 dbkey. + --> + <table name="sam_fa_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/sam_fa_new_indices.loc" /></table><!-- Location of Picard dict file and other files --><table name="picard_indexes" comment_char="#"> https://bitbucket.org/galaxy/galaxy-central/commits/7538c4cb86dc/ Changeset: 7538c4cb86dc User: jgoecks Date: 2013-06-24 16:37:12 Summary: Merged in nsoranzo/galaxy-central (pull request #188) New loc file for SAMtools indexes to support genome variants (backward compatible) Affected #: 15 files diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tool-data/sam_fa_new_indices.loc.sample --- /dev/null +++ b/tool-data/sam_fa_new_indices.loc.sample @@ -0,0 +1,30 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_new_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_new_indices.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id><dbkey><display_name><file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the sam_fa_new_indices.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your sam_fa_new_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa + diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample +++ b/tool_data_table_conf.xml.sample @@ -55,10 +55,26 @@ <columns>value, name, path</columns><file path="tool-data/perm_color_index.loc" /></table> - <!-- Location of SAMTools indexes and other files --> - <table name="sam_fa_indexes" comment_char="#"> + <!-- Location of SAMTools indexes and other files (new version) + Warning: until Galaxy release_2013.06.03 the format of this + table was: + <columns>line_type, value, path</columns><file path="tool-data/sam_fa_indices.loc" /> + + If you are updating your tool_data_table_conf.xml to the current + version you should first migrate your + tool-data/sam_fa_indices.loc file to a new + tool-data/sam_fa_new_indices.loc file with the format specified + below, which is explained in the relative sample file + tool-data/sam_fa_new_indices.loc.sample . + By using the new format it is possible to let the user choose + among multiple indexed genome variants having the same dbkey, + e.g. hg19canon vs. hg19full variants for hg19 dbkey. + --> + <table name="sam_fa_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/sam_fa_new_indices.loc" /></table><!-- Location of Picard dict file and other files --><table name="picard_indexes" comment_char="#"> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cuffcompare_wrapper.py --- a/tools/ngs_rna/cuffcompare_wrapper.py +++ b/tools/ngs_rna/cuffcompare_wrapper.py @@ -8,20 +8,6 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -30,8 +16,7 @@ parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.') # Wrapper / Galaxy options. - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -60,21 +45,16 @@ # Set/link to sequence file. if options.use_seq_data: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cuffcompare_wrapper.xml --- a/tools/ngs_rna/cuffcompare_wrapper.xml +++ b/tools/ngs_rna/cuffcompare_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cuffcompare" name="Cuffcompare" version="0.0.5"> +<tool id="cuffcompare" name="Cuffcompare" version="0.0.6"><!-- Wrapper supports Cuffcompare versions v1.3.0 and newer --><description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description><requirements> @@ -17,14 +17,12 @@ ## Use sequence data? #if $seq_data.use_seq_data == "Yes": - -s + -s #if $seq_data.seq_source.index_source == "history": --ref_file=$seq_data.seq_source.ref_file #else: - --ref_file="None" + --index=${seq_data.seq_source.index.fields.path} #end if - --dbkey=${first_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Outputs. @@ -66,7 +64,14 @@ <option value="cached">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="first_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cuffdiff_wrapper.py --- a/tools/ngs_rna/cuffdiff_wrapper.py +++ b/tools/ngs_rna/cuffdiff_wrapper.py @@ -35,20 +35,6 @@ sys.stderr.write( "%s\n" % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -83,8 +69,7 @@ # Bias correction options. parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -123,21 +108,16 @@ # If doing bias correction, set/link to sequence file. if options.do_bias_correction: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cuffdiff_wrapper.xml --- a/tools/ngs_rna/cuffdiff_wrapper.xml +++ b/tools/ngs_rna/cuffdiff_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cuffdiff" name="Cuffdiff" version="0.0.5"> +<tool id="cuffdiff" name="Cuffdiff" version="0.0.6"><!-- Wrapper supports Cuffdiff versions 2.1.0-2.1.1 --><description>find significant changes in transcript expression, splicing, and promoter use</description><requirements> @@ -42,14 +42,12 @@ ## Bias correction? #if $bias_correction.do_bias_correction == "Yes": - -b + -b #if $bias_correction.seq_source.index_source == "history": --ref_file=$bias_correction.seq_source.ref_file #else: - --ref_file="None" + --index=${bias_correction.seq_source.index.fields.path} #end if - --dbkey=${gtf_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Inputs. @@ -131,7 +129,14 @@ <option value="cached">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="gtf_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cufflinks_wrapper.py --- a/tools/ngs_rna/cufflinks_wrapper.py +++ b/tools/ngs_rna/cufflinks_wrapper.py @@ -10,20 +10,6 @@ sys.stderr.write( "%s\n" % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -52,8 +38,7 @@ # Bias correction options. parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Global model. @@ -82,21 +67,16 @@ # If doing bias correction, set/link to sequence file. if options.do_bias_correction: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cufflinks_wrapper.xml --- a/tools/ngs_rna/cufflinks_wrapper.xml +++ b/tools/ngs_rna/cufflinks_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cufflinks" name="Cufflinks" version="0.0.5"> +<tool id="cufflinks" name="Cufflinks" version="0.0.6"><!-- Wrapper supports Cufflinks versions v1.3.0 and newer --><description>transcript assembly and FPKM (RPKM) estimates for RNA-Seq data</description><requirements> @@ -28,14 +28,12 @@ ## Bias correction? #if $bias_correction.do_bias_correction == "Yes": - -b + -b #if $bias_correction.seq_source.index_source == "history": --ref_file=$bias_correction.seq_source.ref_file #else: - --ref_file="None" + --index=${bias_correction.seq_source.index.fields.path} #end if - --dbkey=${input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Multi-read correct? @@ -66,15 +64,15 @@ <when value="No"></when><when value="Use reference annotation"><param format="gff3,gtf" name="reference_annotation_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/> - </when> - <when value="Use reference annotation guide"> + </when> + <when value="Use reference annotation guide"><param format="gff3,gtf" name="reference_annotation_guide_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/> - </when> + </when></conditional><conditional name="bias_correction"><param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates."><option value="No" selected="true">No</option> - <option value="Yes">Yes</option> + <option value="Yes">Yes</option></param><when value="Yes"><conditional name="seq_source"> @@ -82,7 +80,14 @@ <option value="cached" selected="true">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cuffmerge_wrapper.py --- a/tools/ngs_rna/cuffmerge_wrapper.py +++ b/tools/ngs_rna/cuffmerge_wrapper.py @@ -8,20 +8,6 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -31,8 +17,7 @@ # Wrapper / Galaxy options. - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -61,21 +46,16 @@ # Set/link to sequence file. if options.use_seq_data: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/ngs_rna/cuffmerge_wrapper.xml --- a/tools/ngs_rna/cuffmerge_wrapper.xml +++ b/tools/ngs_rna/cuffmerge_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="cuffmerge" name="Cuffmerge" version="0.0.5"> +<tool id="cuffmerge" name="Cuffmerge" version="0.0.6"><!-- Wrapper supports Cuffmerge versions 1.3 and newer --><description>merge together several Cufflinks assemblies</description><requirements> @@ -16,14 +16,12 @@ ## Use sequence data? #if $seq_data.use_seq_data == "Yes": - -s + -s #if $seq_data.seq_source.index_source == "history": --ref_file=$seq_data.seq_source.ref_file #else: - --ref_file="None" + --index=${seq_data.seq_source.index.fields.path} #end if - --dbkey=${first_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Outputs. @@ -64,7 +62,14 @@ <option value="cached">Locally cached</option><option value="history">History</option></param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="first_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when><when value="history"><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/samtools/sam_pileup.py --- a/tools/samtools/sam_pileup.py +++ b/tools/samtools/sam_pileup.py @@ -8,8 +8,7 @@ -o, --output1=o: Output pileup -R, --ref=R: Reference file type -n, --ownFile=n: User-supplied fasta reference file - -d, --dbkey=d: dbkey of user-supplied file - -x, --indexDir=x: Index directory + -g, --index=g: Path of the indexed reference genome -b, --bamIndex=b: BAM index file -s, --lastCol=s: Print the mapping quality as the last column -i, --indels=i: Only output lines containing indels @@ -31,24 +30,9 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): - seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR - seqPath = '' - for line in open( seqFile ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seqPath = fields[2].strip() - break - return seqPath - def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) - seqPath = check_seq_file( options.dbkey, options.indexDir ) # output version # of tool try: tmp = tempfile.NamedTemporaryFile().name @@ -77,7 +61,6 @@ tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) tmpf1_name = tmpf1.name tmpf1.close() - tmpf1fai_name = '%s.fai' % tmpf1_name #link bam and bam index to working directory (can't move because need to leave original) os.symlink( options.input1, tmpf0bam_name ) os.symlink( options.bamIndex, tmpf0bambai_name ) @@ -100,9 +83,9 @@ try: #index reference if necessary and prepare pileup command if options.ref == 'indexed': - if not os.path.exists( "%s.fai" % seqPath ): - raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey - cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) + if not os.path.exists( "%s.fai" % options.index ): + raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index + cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 ) elif options.ref == 'history': os.symlink( options.ownFile, tmpf1_name ) cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/samtools/sam_pileup.xml --- a/tools/samtools/sam_pileup.xml +++ b/tools/samtools/sam_pileup.xml @@ -1,4 +1,4 @@ -<tool id="sam_pileup" name="Generate pileup" version="1.1.1"> +<tool id="sam_pileup" name="Generate pileup" version="1.1.2"><description>from BAM dataset</description><requirements><requirement type="package" version="0.1.16">samtools</requirement> @@ -11,10 +11,8 @@ #if $refOrHistory.reference == "history": --ownFile=$refOrHistory.ownFile #else: - --ownFile="None" + --index=${refOrHistory.index.fields.path} #end if - --dbkey=${input1.metadata.dbkey} - --indexDir=${GALAXY_DATA_INDEX_DIR} --bamIndex=${input1.metadata.bam_index} --lastCol=$lastCol --indels=$indels @@ -41,7 +39,13 @@ <when value="indexed"><param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for"><validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="input1" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options></param></when><when value="history"> @@ -100,6 +104,7 @@ --><param name="reference" value="indexed" /><param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" /> + <param name="index" value="chr_m" /><param name="lastCol" value="no" /><param name="indels" value="no" /><param name="mapCap" value="60" /> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/samtools/sam_to_bam.py --- a/tools/samtools/sam_to_bam.py +++ b/tools/samtools/sam_to_bam.py @@ -3,43 +3,24 @@ Converts SAM data to sorted BAM data. usage: sam_to_bam.py [options] --input1: SAM file to be converted - --dbkey: dbkey value + --index: path of the indexed reference genome --ref_file: Reference file if choosing from history --output1: output dataset in bam format - --index_dir: GALAXY_DATA_INDEX_DIR """ -import optparse, os, sys, subprocess, tempfile, shutil, gzip -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse -from galaxy import util +import optparse, os, sys, subprocess, tempfile, shutil def stop_err( msg ): sys.stderr.write( '%s\n' % msg ) sys.exit() -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) + parser.add_option( '', '--index', dest='index', help='The path of the indexed reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) ( options, args ) = parser.parse_args() # output version # of tool @@ -61,24 +42,17 @@ except: sys.stdout.write( 'Could not determine Samtools version\n' ) - cached_seqs_pointer_file = '%s/sam_fa_indices.loc' % options.index_dir - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) tmp_dir = tempfile.mkdtemp() if not options.ref_file or options.ref_file == 'None': # We're using locally cached reference sequences( e.g., /galaxy/data/equCab2/sam_index/equCab2.fa ). # The indexes for /galaxy/data/equCab2/sam_index/equCab2.fa will be contained in # a file named /galaxy/data/equCab2/sam_index/equCab2.fa.fai - fai_index_file_base = seq_path - fai_index_file_path = '%s.fai' % seq_path + fai_index_file_path = '%s.fai' % options.index if not os.path.exists( fai_index_file_path ): #clean up temp files if os.path.exists( tmp_dir ): shutil.rmtree( tmp_dir ) - stop_err( 'No sequences are available for build (%s), request them by reporting this error.' % options.dbkey ) + stop_err( 'Indexed genome %s not present, request it by reporting this error.' % options.index ) else: try: # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will: diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/samtools/sam_to_bam.xml --- a/tools/samtools/sam_to_bam.xml +++ b/tools/samtools/sam_to_bam.xml @@ -1,4 +1,4 @@ -<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.2"> +<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.3"><description>converts SAM format to BAM format</description><requirements><requirement type="package">samtools</requirement> @@ -7,13 +7,11 @@ sam_to_bam.py --input1=$source.input1 #if $source.index_source == "history": - --dbkey=${ref_file.metadata.dbkey} --ref_file=$source.ref_file #else - --dbkey=${input1.metadata.dbkey} + --index=${source.index.fields.path} #end if --output1=$output1 - --index_dir=${GALAXY_DATA_INDEX_DIR} </command><inputs><conditional name="source"> @@ -22,13 +20,19 @@ <option value="history">History</option></param><when value="cached"> - <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM File to Convert"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM file to convert"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <filter type="data_meta" ref="input1" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options></param></when><when value="history"> - <param name="input1" type="data" format="sam" label="Convert SAM file" /> + <param name="input1" type="data" format="sam" label="SAM file to convert" /><param name="ref_file" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" /></when></conditional> @@ -76,6 +80,7 @@ --><param name="index_source" value="cached" /><param name="input1" value="sam_to_bam_in1.sam" ftype="sam" dbkey="chrM" /> + <param name="index" value="chr_m" /><output name="output1" file="sam_to_bam_out2.bam" ftype="bam" /></test></tests> diff -r fc9b51ce979861604abaa9da6b15977f1238957d -r 7538c4cb86dcb9383ece63152f623d3c5e517256 tools/samtools/samtools_mpileup.xml --- a/tools/samtools/samtools_mpileup.xml +++ b/tools/samtools/samtools_mpileup.xml @@ -1,4 +1,4 @@ -<tool id="samtools_mpileup" name="MPileup" version="0.0.1"> +<tool id="samtools_mpileup" name="MPileup" version="0.0.2"><description>SNP and indel caller</description><requirements><requirement type="package">samtools</requirement> @@ -59,22 +59,22 @@ </param><when value="cached"><repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> - </param> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /><!-- fixme!!! this needs to be a select --> + </param></repeat><param name="ref_file" type="select" label="Using reference genome"><options from_data_table="sam_fa_indexes"> - <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...--> + <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--></options></param></when><when value="history"><!-- FIX ME!!!! --><repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file" > - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/> - </param> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> + </param></repeat><param name="ref_file" type="data" format="fasta" label="Using reference file" /></when> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.