[hg] galaxy 1714: Fixes for Biomart and UCSC to better ensure th...
details: http://www.bx.psu.edu/hg/galaxy/rev/dbb3210e4549 changeset: 1714:dbb3210e4549 user: Greg Von Kuster <greg@bx.psu.edu> date: Wed Jan 21 17:03:39 2009 -0500 description: Fixes for Biomart and UCSC to better ensure the data type is correctly set. A data type of "tabular" is now the default, and data will be sniffed if this is the data type. 6 file(s) affected in this change: lib/galaxy/tools/__init__.py tools/data_source/biomart.xml tools/data_source/biomart_test.xml tools/data_source/ucsc_tablebrowser.xml tools/data_source/ucsc_tablebrowser_archaea.xml tools/data_source/ucsc_tablebrowser_test.xml diffs (182 lines): diff -r 6d849785ff86 -r dbb3210e4549 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Mon Jan 19 15:47:02 2009 -0500 +++ b/lib/galaxy/tools/__init__.py Wed Jan 21 17:03:39 2009 -0500 @@ -1240,8 +1240,6 @@ def exec_before_job( self, app, inp_data, out_data, param_dict={} ): if self.tool_type == 'data_source': - # List for converting UCSC to Galaxy exts, if not in following dictionary, use provided datatype - data_type_to_ext = { 'wigdata':'wig', 'tab':'interval', 'hyperlinks':'html', 'sequence':'fasta' } dbkey = param_dict.get( 'dbkey' ) organism = param_dict.get( 'organism' ) table = param_dict.get( 'table' ) @@ -1263,12 +1261,9 @@ data.name = '%s on %s' % ( data.name, gb_landmark_region ) data.info = info data.dbkey = dbkey - try: - data_type = data_type_to_ext[ data_type ] - except: - pass - if data_type not in app.datatypes_registry.datatypes_by_extension: - data_type = 'interval' + if data_type not in app.datatypes_registry.datatypes_by_extension: + # Setting data_type to tabular will force the data to be sniffed in exec_after_process() + data_type = 'tabular' data = app.datatypes_registry.change_datatype( data, data_type ) # Store external data source's request parameters temporarily in output file. # In case the config setting for "outputs_to_working_directory" is True, we must write to @@ -1284,8 +1279,6 @@ return out_data def exec_after_process( self, app, inp_data, out_data, param_dict ): - # TODO: for data_source tools at least, this code can probably be handled more optimally by adding a new - # tag set in the tool config. if self.tool_type == 'data_source': name, data = out_data.items()[0] data.set_size() @@ -1294,9 +1287,10 @@ data.info = param_dict.get( 'info', data.name ) data.dbkey = param_dict.get( 'dbkey', data.dbkey ) data.extension = param_dict.get( 'data_type', data.extension ) - if data.extension == 'txt': + if data.extension in [ 'txt', 'tabular' ]: data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) - data = app.datatypes_registry.change_datatype( data, data_type ) + if data.extension != data_type: + data = app.datatypes_registry.change_datatype( data, data_type ) elif not isinstance( data.datatype, datatypes.interval.Bed ) and isinstance( data.datatype, datatypes.interval.Interval ): data.set_meta() if data.missing_meta(): diff -r 6d849785ff86 -r dbb3210e4549 tools/data_source/biomart.xml --- a/tools/data_source/biomart.xml Mon Jan 19 15:47:02 2009 -0500 +++ b/tools/data_source/biomart.xml Wed Jan 21 17:03:39 2009 -0500 @@ -21,17 +21,21 @@ <param_from_source name="GALAXY_URL" missing="0" /> </add_to_url> </request_param> + <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > + <data_type_translation> + <format galaxy_format="tabular" remote_format="TSV" /> + </data_type_translation> + </request_param> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> <request_param galaxy_name="description" remote_name="description" missing="" /> <request_param galaxy_name="name" remote_name="name" missing="Biomart query" /> <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="type" missing="txt" /> </request_param_translation> <uihints minwidth="800"/> <outputs> - <data name="output" format="txt" /> + <data name="output" format="tabular" /> </outputs> <options sanitize="False" refresh="True"/> </tool> diff -r 6d849785ff86 -r dbb3210e4549 tools/data_source/biomart_test.xml --- a/tools/data_source/biomart_test.xml Mon Jan 19 15:47:02 2009 -0500 +++ b/tools/data_source/biomart_test.xml Wed Jan 21 17:03:39 2009 -0500 @@ -21,17 +21,21 @@ <param_from_source name="GALAXY_URL" missing="0" /> </add_to_url> </request_param> + <request_param galaxy_name="data_type" remote_name="exportView_outputformat" missing="tabular" > + <data_type_translation> + <format galaxy_format="tabular" remote_format="TSV" /> + </data_type_translation> + </request_param> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> <request_param galaxy_name="description" remote_name="description" missing="" /> <request_param galaxy_name="name" remote_name="name" missing="Biomart test query" /> <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="type" missing="txt" /> </request_param_translation> <uihints minwidth="800"/> <outputs> - <data name="output" format="txt" /> + <data name="output" format="tabular" /> </outputs> <options sanitize="False" refresh="True"/> </tool> diff -r 6d849785ff86 -r dbb3210e4549 tools/data_source/ucsc_tablebrowser.xml --- a/tools/data_source/ucsc_tablebrowser.xml Mon Jan 19 15:47:02 2009 -0500 +++ b/tools/data_source/ucsc_tablebrowser.xml Wed Jan 21 17:03:39 2009 -0500 @@ -21,16 +21,20 @@ <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> <request_param galaxy_name="table" remote_name="hgta_track" missing="unknown table" /> <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="bed" > + <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > <data_type_translation> <format galaxy_format="tabular" remote_format="primaryTable" /> <format galaxy_format="tabular" remote_format="selectedFields" /> + <format galaxy_format="wig" remote_format="wigdata" /> + <format galaxy_format="interval" remote_format="tab" /> + <format galaxy_format="html" remote_format="hyperlinks" /> + <format galaxy_format="fasta" remote_format="sequence" /> </data_type_translation> </request_param> </request_param_translation> <uihints minwidth="800"/> <outputs> - <data name="output" format="bed" /> + <data name="output" format="tabular" /> </outputs> <options sanitize="False" refresh="True"/> </tool> diff -r 6d849785ff86 -r dbb3210e4549 tools/data_source/ucsc_tablebrowser_archaea.xml --- a/tools/data_source/ucsc_tablebrowser_archaea.xml Mon Jan 19 15:47:02 2009 -0500 +++ b/tools/data_source/ucsc_tablebrowser_archaea.xml Wed Jan 21 17:03:39 2009 -0500 @@ -21,16 +21,20 @@ <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> <request_param galaxy_name="table" remote_name="hgta_track" missing="" /> <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="bed" > + <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > <data_type_translation> <format galaxy_format="tabular" remote_format="primaryTable" /> <format galaxy_format="tabular" remote_format="selectedFields" /> + <format galaxy_format="wig" remote_format="wigdata" /> + <format galaxy_format="interval" remote_format="tab" /> + <format galaxy_format="html" remote_format="hyperlinks" /> + <format galaxy_format="fasta" remote_format="sequence" /> </data_type_translation> </request_param> </request_param_translation> <uihints minwidth="800"/> <outputs> - <data name="output" format="bed" /> + <data name="output" format="tabular" /> </outputs> <options sanitize="False" refresh="True"/> </tool> diff -r 6d849785ff86 -r dbb3210e4549 tools/data_source/ucsc_tablebrowser_test.xml --- a/tools/data_source/ucsc_tablebrowser_test.xml Mon Jan 19 15:47:02 2009 -0500 +++ b/tools/data_source/ucsc_tablebrowser_test.xml Wed Jan 21 17:03:39 2009 -0500 @@ -21,16 +21,20 @@ <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> <request_param galaxy_name="table" remote_name="hgta_track" missing="" /> <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="bed" > + <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > <data_type_translation> <format galaxy_format="tabular" remote_format="primaryTable" /> <format galaxy_format="tabular" remote_format="selectedFields" /> + <format galaxy_format="wig" remote_format="wigdata" /> + <format galaxy_format="interval" remote_format="tab" /> + <format galaxy_format="html" remote_format="hyperlinks" /> + <format galaxy_format="fasta" remote_format="sequence" /> </data_type_translation> </request_param> </request_param_translation> <uihints minwidth="800"/> <outputs> - <data name="output" format="bed" /> + <data name="output" format="tabular" /> </outputs> <options sanitize="False" refresh="True"/> </tool>
participants (1)
-
Greg Von Kuster