[hg] galaxy 1536: Add a change_format tag to output datasets in ...

1 Oct 2008

details:   http://www.bx.psu.edu/hg/galaxy/rev/8ab38aa72998
changeset: 1536:8ab38aa72998
user:      Dan Blankenberg <dan@bx.psu.edu>
date:      Tue Sep 30 16:17:12 2008 -0400
description:
Add a change_format tag to output datasets in tools. This allows for dynamic switching of output datatype based upon input values.

Several tools have been updated to take advantage of this, eliminating their need for code_files.

12 file(s) affected in this change:

lib/galaxy/tools/__init__.py
lib/galaxy/tools/actions/__init__.py
tools/annotation_profiler/annotation_profiler.xml
tools/annotation_profiler/annotation_profiler_code.py
tools/extract/extract_genomic_dna.xml
tools/extract/extract_genomic_dna_code.py
tools/filters/pasteWrapper.xml
tools/filters/pasteWrapper_code.py
tools/maf/maf_stats.xml
tools/maf/maf_stats_code.py
tools/sr_mapping/lastz_code.py
tools/sr_mapping/lastz_wrapper.xml

diffs (222 lines):

diff -r 931d6ca549d3 -r 8ab38aa72998 lib/galaxy/tools/__init__.py

--- a/lib/galaxy/tools/__init__.py	Tue Sep 30 15:30:57 2008 -0400
+++ b/lib/galaxy/tools/__init__.py	Tue Sep 30 16:17:12 2008 -0400
@@ -299,6 +299,7 @@
             for data_elem in out_elem.findall("data"):
                 output = ToolOutput( data_elem.get("name") )
                 output.format = data_elem.get("format", "data")
+                output.change_format = data_elem.findall("change_format")
                 output.metadata_source = data_elem.get("metadata_source", "")
                 output.parent = data_elem.get("parent", None)
                 output.label = util.xml_text( data_elem, "label" )
diff -r 931d6ca549d3 -r 8ab38aa72998 lib/galaxy/tools/actions/__init__.py
--- a/lib/galaxy/tools/actions/__init__.py	Tue Sep 30 15:30:57 2008 -0400
+++ b/lib/galaxy/tools/actions/__init__.py	Tue Sep 30 16:17:12 2008 -0400
@@ -115,6 +115,21 @@
                 ext = output.format
                 if ext == "input":
                     ext = input_ext
+                #process change_format tags
+                if output.change_format:
+                    for change_elem in output.change_format:
+                        for when_elem in change_elem.findall( 'when' ):
+                            check = incoming.get( when_elem.get( 'input' ), None )
+                            if check is not None:
+                                if check == when_elem.get( 'value', None ):
+                                    ext = when_elem.get( 'format', ext )
+                            else:
+                                check = when_elem.get( 'input_dataset', None )
+                                if check is not None:
+                                    check = inp_data.get( check, None )
+                                    if check is not None:
+                                        if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
+                                            ext = when_elem.get( 'format', ext )
                 data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True )
                 # Commit the dataset immediately so it gets database assigned unique id
                 data.flush()
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/annotation_profiler/annotation_profiler.xml
--- a/tools/annotation_profiler/annotation_profiler.xml	Tue Sep 30 15:30:57 2008 -0400
+++ b/tools/annotation_profiler/annotation_profiler.xml	Tue Sep 30 16:17:12 2008 -0400
@@ -16,9 +16,12 @@
     <param name="table_names" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Tables to Use" help="Selecting no tables will result in using all tables." from_file="annotation_profiler_options.xml"/>
    </inputs>
    <outputs>
-     <data format="input" name="out_file1"/>
+     <data format="input" name="out_file1">
+       <change_format>
+         <when input="summary" value="-S" format="tabular" />
+       </change_format>
+     </data>
    </outputs>
-   <code file="annotation_profiler_code.py" />  
    <tests>
      <test>
        <param name="input1" value="4.bed" dbkey="hg18"/>
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/annotation_profiler/annotation_profiler_code.py
--- a/tools/annotation_profiler/annotation_profiler_code.py	Tue Sep 30 15:30:57 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-#Change format from Interval to tabular if needed
-def exec_before_job(app, inp_data, out_data, param_dict, tool):
-    if param_dict['summary']:
-        out_data['out_file1'].change_datatype('tabular')
\ No newline at end of file
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/extract/extract_genomic_dna.xml
--- a/tools/extract/extract_genomic_dna.xml	Tue Sep 30 15:30:57 2008 -0400
+++ b/tools/extract/extract_genomic_dna.xml	Tue Sep 30 16:17:12 2008 -0400
@@ -12,9 +12,12 @@
 	</param>
   </inputs>
   <outputs>
-    <data format="fasta" name="out_file1" />
+    <data format="fasta" name="out_file1">
+      <change_format>
+        <when input="out_format" value="interval" format="interval" />
+      </change_format>
+    </data>
   </outputs>
-  <code file="extract_genomic_dna_code.py" />  
   <tests>
     <test>
       <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/extract/extract_genomic_dna_code.py
--- a/tools/extract/extract_genomic_dna_code.py	Tue Sep 30 15:30:57 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-# by dan
-#Change format from FASTA to Interval if needed; use metadata from input file
-def exec_before_job(app, inp_data, out_data, param_dict, tool):
-    if param_dict['out_format'] == "interval":
-        out_data['out_file1'].change_datatype('interval')
-        out_data['out_file1'].init_meta( copy_from=inp_data['input'] )
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/filters/pasteWrapper.xml
--- a/tools/filters/pasteWrapper.xml	Tue Sep 30 15:30:57 2008 -0400
+++ b/tools/filters/pasteWrapper.xml	Tue Sep 30 16:17:12 2008 -0400
@@ -15,7 +15,11 @@
     </param>
   </inputs>
   <outputs>
-    <data format="input" name="out_file1" metadata_source="input1" />
+    <data format="input" name="out_file1" metadata_source="input1">
+      <change_format>
+        <when input_dataset="input1" attribute="ext" value="bed" format="interval"/>
+      </change_format>
+    </data>
   </outputs>
   <tests>
     <test>
@@ -60,5 +64,4 @@
     a 3 40
 
 </help>
-<code file="pasteWrapper_code.py"/>
 </tool>
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/filters/pasteWrapper_code.py
--- a/tools/filters/pasteWrapper_code.py	Tue Sep 30 15:30:57 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#post processing, if bed file, change to interval file
-from galaxy import datatypes
-def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
-    for name, data in out_data.items():
-        if data.ext == "bed":
-            data = app.datatypes_registry.change_datatype(data, "interval")
-            data.flush()
\ No newline at end of file
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/maf/maf_stats.xml
--- a/tools/maf/maf_stats.xml	Tue Sep 30 15:30:57 2008 -0400
+++ b/tools/maf/maf_stats.xml	Tue Sep 30 16:17:12 2008 -0400
@@ -44,7 +44,11 @@
     </param>
   </inputs>
   <outputs>
-    <data format="interval" name="out_file1" metadata_source="input1"/>
+    <data format="interval" name="out_file1" metadata_source="input1">
+      <change_format>
+        <when input="summary" value="true" format="tabular" />
+      </change_format>
+    </data>
   </outputs>
   <requirements>
     <requirement type="python-module">numpy</requirement>
@@ -95,5 +99,4 @@
 
   where **coverage** is the number of nucleotides divided by the total length of the provided intervals.
   </help>
-  <code file="maf_stats_code.py"/>
 </tool>
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/maf/maf_stats_code.py
--- a/tools/maf/maf_stats_code.py	Tue Sep 30 15:30:57 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-import os
-
-def load_maf_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
-    # FIXME: this function is duplicated in the DynamicOptions class.  It is used here only to
-    # set data.name in exec_before_job(). 
-    maf_sets = {}
-    filename = "%s/maf_index.loc" % GALAXY_DATA_INDEX_DIR
-    for i, line in enumerate( file( filename ) ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( sep )
-            #read each line, if not enough fields, go to next line
-            try:
-                maf_desc = fields[0]
-                maf_uid = fields[1]
-                builds = fields[2]
-                build_list =[]
-                split_builds = builds.split( "," )
-                for build in split_builds:
-                    this_build = build.split( "=" )[0]
-                    build_list.append( this_build )
-                paths = fields[3]
-                maf_sets[ maf_uid ] = {}
-                maf_sets[ maf_uid ][ 'description' ] = maf_desc
-                maf_sets[ maf_uid ][ 'builds' ] = build_list
-            except:
-                continue
-    return maf_sets
-def exec_before_job(app, inp_data, out_data, param_dict, tool):
-    maf_sets = load_maf_data( app.config.tool_data_path, sep='\t'  )
-    if param_dict[ 'maf_source_type' ][ 'maf_source' ] == "cached":
-        for name, data in out_data.items():
-            try:
-                data.name = data.name + " [" + maf_sets[ str( param_dict[ 'maf_source_type' ][ 'mafType' ] ) ][ 'description' ] + "]"
-            except KeyError:
-                data.name = data.name + " [unknown MAF source specified]"
-    if param_dict[ 'summary' ].lower() == "true":
-        for name, data in out_data.items():
-            data.change_datatype( 'tabular' )
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/sr_mapping/lastz_code.py
--- a/tools/sr_mapping/lastz_code.py	Tue Sep 30 15:30:57 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-# by dan
-#Change format from tabular to maf if needed; use metadata from input file
-def exec_before_job(app, inp_data, out_data, param_dict, tool):
-    if param_dict['out_format'] == "maf":
-        out_data['output1'].change_datatype('maf')
-#        out_data['output1'].init_meta( copy_from=inp_data['input1'] )
diff -r 931d6ca549d3 -r 8ab38aa72998 tools/sr_mapping/lastz_wrapper.xml
--- a/tools/sr_mapping/lastz_wrapper.xml	Tue Sep 30 15:30:57 2008 -0400
+++ b/tools/sr_mapping/lastz_wrapper.xml	Tue Sep 30 16:17:12 2008 -0400
@@ -77,13 +77,16 @@
     <param name="min_cvrg"  type="integer" size="3" value="0"   label="Do not report matches that cover less than this fraction (%) of each read"/>
   </inputs>
     <outputs>
-    <data format="tabular" name="output1" />
+    <data format="tabular" name="output1">
+      <change_format>
+        <when input="out_format" value="maf" format="maf" />
+      </change_format>
+    </data>
     <data format="tabular" name="output2" />
   </outputs>
   	<requirements>
 	  <requirement type="binary">lastz</requirement>
 	</requirements>
-	<code file="lastz_code.py" />  
   <tests>
     <test>
       <param name="input1" value="phiX.fa" ftype="fasta"  />

    

Greg Von Kuster

tags

participants (1)