details: http://www.bx.psu.edu/hg/galaxy/rev/8ab38aa72998 changeset: 1536:8ab38aa72998 user: Dan Blankenberg dan@bx.psu.edu date: Tue Sep 30 16:17:12 2008 -0400 description: Add a change_format tag to output datasets in tools. This allows for dynamic switching of output datatype based upon input values.
Several tools have been updated to take advantage of this, eliminating their need for code_files.
12 file(s) affected in this change:
lib/galaxy/tools/__init__.py lib/galaxy/tools/actions/__init__.py tools/annotation_profiler/annotation_profiler.xml tools/annotation_profiler/annotation_profiler_code.py tools/extract/extract_genomic_dna.xml tools/extract/extract_genomic_dna_code.py tools/filters/pasteWrapper.xml tools/filters/pasteWrapper_code.py tools/maf/maf_stats.xml tools/maf/maf_stats_code.py tools/sr_mapping/lastz_code.py tools/sr_mapping/lastz_wrapper.xml
diffs (222 lines):
diff -r 931d6ca549d3 -r 8ab38aa72998 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Tue Sep 30 15:30:57 2008 -0400 +++ b/lib/galaxy/tools/__init__.py Tue Sep 30 16:17:12 2008 -0400 @@ -299,6 +299,7 @@ for data_elem in out_elem.findall("data"): output = ToolOutput( data_elem.get("name") ) output.format = data_elem.get("format", "data") + output.change_format = data_elem.findall("change_format") output.metadata_source = data_elem.get("metadata_source", "") output.parent = data_elem.get("parent", None) output.label = util.xml_text( data_elem, "label" ) diff -r 931d6ca549d3 -r 8ab38aa72998 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py Tue Sep 30 15:30:57 2008 -0400 +++ b/lib/galaxy/tools/actions/__init__.py Tue Sep 30 16:17:12 2008 -0400 @@ -115,6 +115,21 @@ ext = output.format if ext == "input": ext = input_ext + #process change_format tags + if output.change_format: + for change_elem in output.change_format: + for when_elem in change_elem.findall( 'when' ): + check = incoming.get( when_elem.get( 'input' ), None ) + if check is not None: + if check == when_elem.get( 'value', None ): + ext = when_elem.get( 'format', ext ) + else: + check = when_elem.get( 'input_dataset', None ) + if check is not None: + check = inp_data.get( check, None ) + if check is not None: + if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ): + ext = when_elem.get( 'format', ext ) data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True ) # Commit the dataset immediately so it gets database assigned unique id data.flush() diff -r 931d6ca549d3 -r 8ab38aa72998 tools/annotation_profiler/annotation_profiler.xml --- a/tools/annotation_profiler/annotation_profiler.xml Tue Sep 30 15:30:57 2008 -0400 +++ b/tools/annotation_profiler/annotation_profiler.xml Tue Sep 30 16:17:12 2008 -0400 @@ -16,9 +16,12 @@ <param name="table_names" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Tables to Use" help="Selecting no tables will result in using all tables." from_file="annotation_profiler_options.xml"/> </inputs> <outputs> - <data format="input" name="out_file1"/> + <data format="input" name="out_file1"> + <change_format> + <when input="summary" value="-S" format="tabular" /> + </change_format> + </data> </outputs> - <code file="annotation_profiler_code.py" /> <tests> <test> <param name="input1" value="4.bed" dbkey="hg18"/> diff -r 931d6ca549d3 -r 8ab38aa72998 tools/annotation_profiler/annotation_profiler_code.py --- a/tools/annotation_profiler/annotation_profiler_code.py Tue Sep 30 15:30:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -#Change format from Interval to tabular if needed -def exec_before_job(app, inp_data, out_data, param_dict, tool): - if param_dict['summary']: - out_data['out_file1'].change_datatype('tabular') \ No newline at end of file diff -r 931d6ca549d3 -r 8ab38aa72998 tools/extract/extract_genomic_dna.xml --- a/tools/extract/extract_genomic_dna.xml Tue Sep 30 15:30:57 2008 -0400 +++ b/tools/extract/extract_genomic_dna.xml Tue Sep 30 16:17:12 2008 -0400 @@ -12,9 +12,12 @@ </param> </inputs> <outputs> - <data format="fasta" name="out_file1" /> + <data format="fasta" name="out_file1"> + <change_format> + <when input="out_format" value="interval" format="interval" /> + </change_format> + </data> </outputs> - <code file="extract_genomic_dna_code.py" /> <tests> <test> <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> diff -r 931d6ca549d3 -r 8ab38aa72998 tools/extract/extract_genomic_dna_code.py --- a/tools/extract/extract_genomic_dna_code.py Tue Sep 30 15:30:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -# by dan -#Change format from FASTA to Interval if needed; use metadata from input file -def exec_before_job(app, inp_data, out_data, param_dict, tool): - if param_dict['out_format'] == "interval": - out_data['out_file1'].change_datatype('interval') - out_data['out_file1'].init_meta( copy_from=inp_data['input'] ) diff -r 931d6ca549d3 -r 8ab38aa72998 tools/filters/pasteWrapper.xml --- a/tools/filters/pasteWrapper.xml Tue Sep 30 15:30:57 2008 -0400 +++ b/tools/filters/pasteWrapper.xml Tue Sep 30 16:17:12 2008 -0400 @@ -15,7 +15,11 @@ </param> </inputs> <outputs> - <data format="input" name="out_file1" metadata_source="input1" /> + <data format="input" name="out_file1" metadata_source="input1"> + <change_format> + <when input_dataset="input1" attribute="ext" value="bed" format="interval"/> + </change_format> + </data> </outputs> <tests> <test> @@ -60,5 +64,4 @@ a 3 40
</help> -<code file="pasteWrapper_code.py"/> </tool> diff -r 931d6ca549d3 -r 8ab38aa72998 tools/filters/pasteWrapper_code.py --- a/tools/filters/pasteWrapper_code.py Tue Sep 30 15:30:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#post processing, if bed file, change to interval file -from galaxy import datatypes -def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr): - for name, data in out_data.items(): - if data.ext == "bed": - data = app.datatypes_registry.change_datatype(data, "interval") - data.flush() \ No newline at end of file diff -r 931d6ca549d3 -r 8ab38aa72998 tools/maf/maf_stats.xml --- a/tools/maf/maf_stats.xml Tue Sep 30 15:30:57 2008 -0400 +++ b/tools/maf/maf_stats.xml Tue Sep 30 16:17:12 2008 -0400 @@ -44,7 +44,11 @@ </param> </inputs> <outputs> - <data format="interval" name="out_file1" metadata_source="input1"/> + <data format="interval" name="out_file1" metadata_source="input1"> + <change_format> + <when input="summary" value="true" format="tabular" /> + </change_format> + </data> </outputs> <requirements> <requirement type="python-module">numpy</requirement> @@ -95,5 +99,4 @@
where **coverage** is the number of nucleotides divided by the total length of the provided intervals. </help> - <code file="maf_stats_code.py"/> </tool> diff -r 931d6ca549d3 -r 8ab38aa72998 tools/maf/maf_stats_code.py --- a/tools/maf/maf_stats_code.py Tue Sep 30 15:30:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -import os - -def load_maf_data( GALAXY_DATA_INDEX_DIR, sep='\t' ): - # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to - # set data.name in exec_before_job(). - maf_sets = {} - filename = "%s/maf_index.loc" % GALAXY_DATA_INDEX_DIR - for i, line in enumerate( file( filename ) ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( sep ) - #read each line, if not enough fields, go to next line - try: - maf_desc = fields[0] - maf_uid = fields[1] - builds = fields[2] - build_list =[] - split_builds = builds.split( "," ) - for build in split_builds: - this_build = build.split( "=" )[0] - build_list.append( this_build ) - paths = fields[3] - maf_sets[ maf_uid ] = {} - maf_sets[ maf_uid ][ 'description' ] = maf_desc - maf_sets[ maf_uid ][ 'builds' ] = build_list - except: - continue - return maf_sets -def exec_before_job(app, inp_data, out_data, param_dict, tool): - maf_sets = load_maf_data( app.config.tool_data_path, sep='\t' ) - if param_dict[ 'maf_source_type' ][ 'maf_source' ] == "cached": - for name, data in out_data.items(): - try: - data.name = data.name + " [" + maf_sets[ str( param_dict[ 'maf_source_type' ][ 'mafType' ] ) ][ 'description' ] + "]" - except KeyError: - data.name = data.name + " [unknown MAF source specified]" - if param_dict[ 'summary' ].lower() == "true": - for name, data in out_data.items(): - data.change_datatype( 'tabular' ) diff -r 931d6ca549d3 -r 8ab38aa72998 tools/sr_mapping/lastz_code.py --- a/tools/sr_mapping/lastz_code.py Tue Sep 30 15:30:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -# by dan -#Change format from tabular to maf if needed; use metadata from input file -def exec_before_job(app, inp_data, out_data, param_dict, tool): - if param_dict['out_format'] == "maf": - out_data['output1'].change_datatype('maf') -# out_data['output1'].init_meta( copy_from=inp_data['input1'] ) diff -r 931d6ca549d3 -r 8ab38aa72998 tools/sr_mapping/lastz_wrapper.xml --- a/tools/sr_mapping/lastz_wrapper.xml Tue Sep 30 15:30:57 2008 -0400 +++ b/tools/sr_mapping/lastz_wrapper.xml Tue Sep 30 16:17:12 2008 -0400 @@ -77,13 +77,16 @@ <param name="min_cvrg" type="integer" size="3" value="0" label="Do not report matches that cover less than this fraction (%) of each read"/> </inputs> <outputs> - <data format="tabular" name="output1" /> + <data format="tabular" name="output1"> + <change_format> + <when input="out_format" value="maf" format="maf" /> + </change_format> + </data> <data format="tabular" name="output2" /> </outputs> <requirements> <requirement type="binary">lastz</requirement> </requirements> - <code file="lastz_code.py" /> <tests> <test> <param name="input1" value="phiX.fa" ftype="fasta" />
galaxy-dev@lists.galaxyproject.org