[hg] galaxy 1535: Modified the way biomart tool runs: output gen...
details: http://www.bx.psu.edu/hg/galaxy/rev/931d6ca549d3 changeset: 1535:931d6ca549d3 user: guru date: Tue Sep 30 15:30:57 2008 -0400 description: Modified the way biomart tool runs: output generation will be completed before exec_afer_process hook is called. 4 file(s) affected in this change: tools/data_source/biomart.py tools/data_source/biomart.xml tools/data_source/biomart_filter.py tools/data_source/biomart_test.xml diffs (148 lines): diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/biomart.py Tue Sep 30 15:30:57 2008 -0400 @@ -0,0 +1,55 @@ +#!/usr/bin/env python +#Retreives data from BIOMART and stores in a file. Biomart parameters are provided in the input/output file. +#guruprasad Ananda + +import urllib, sys, os, gzip, tempfile, shutil +from galaxy import eggs + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + filename = sys.argv[1] + params = {} + for line in open(filename, 'r'): + try: + line = line.strip() + fields = line.split('\t') + params[fields[0]] = fields[1] + except: + continue + + URL = params.get( 'URL', None ) + if not URL: + open( filename, 'w' ).write( "" ) + stop_err( 'Datasource has not sent back a URL parameter.' ) + URL = URL + '&_export=1&GALAXY_URL=0' + CHUNK_SIZE = 2**20 # 1Mb + MAX_SIZE = CHUNK_SIZE * 100 + try: + page = urllib.urlopen(URL) + except Exception, exc: + stop_err('Problems connecting to %s (%s)' % (URL, exc) ) + + fp = open(filename, 'w') + size = 0 + max_size_exceeded = False + while 1: + chunk = page.read(CHUNK_SIZE) + if not chunk: + break + size += len(chunk) + if size > MAX_SIZE: + max_size_exceeded = True + break + fp.write(chunk) + fp.close() + + if max_size_exceeded: + print 'Maximum data size of 100 MB exceeded, incomplete data retrieval.' + +if __name__ == "__main__": + __main__() diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart.xml --- a/tools/data_source/biomart.xml Mon Sep 29 10:37:54 2008 -0400 +++ b/tools/data_source/biomart.xml Tue Sep 30 15:30:57 2008 -0400 @@ -1,14 +1,14 @@ <?xml version="1.0"?> <tool name="BioMart" id="biomart"> - <description>Central server</description> - - <command/> - + <command interpreter="python"> + biomart.py + $output + </command> <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to BioMart Central $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> - </inputs> + </inputs> <uihints minwidth="800"/> diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart_filter.py --- a/tools/data_source/biomart_filter.py Mon Sep 29 10:37:54 2008 -0400 +++ b/tools/data_source/biomart_filter.py Tue Sep 30 15:30:57 2008 -0400 @@ -14,41 +14,16 @@ data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) data = app.datatypes_registry.change_datatype(data, data_type) data.name = data_name + #store BIOMART parameters temporarily in output file + out = open(data.file_name,'w') + for key, value in param_dict.items(): + print >> out, "%s\t%s" % (key,value) + out.close() out_data[name] = data + def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): - """Verifies the data after the run""" - URL = param_dict.get( 'URL', None ) - if not URL: - raise Exception('Datasource has not sent back a URL parameter') - URL = URL + '&_export=1&GALAXY_URL=0' - CHUNK_SIZE = 2**20 # 1Mb - MAX_SIZE = CHUNK_SIZE * 100 - try: - page = urllib.urlopen(URL) - except Exception, exc: - raise Exception('Problems connecting to %s (%s)' % (URL, exc) ) name, data = out_data.items()[0] - fp = open(data.file_name, 'wb') - size = 0 - max_size_exceeded = False - - while 1: - chunk = page.read(CHUNK_SIZE) - if not chunk: - break - size += len(chunk) - if size > MAX_SIZE: - max_size_exceeded = True - break - fp.write(chunk) - fp.close() - - if max_size_exceeded: - data.info = 'Maximum data size of 100 MB exceeded, incomplete data retrieval.' - else: - data.info = data.name - if not isinstance(data.datatype, datatypes.interval.Bed) and isinstance(data.datatype, datatypes.interval.Interval): #Set meta data, format file to be valid interval type data.set_meta(first_line_is_header=True) diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart_test.xml --- a/tools/data_source/biomart_test.xml Mon Sep 29 10:37:54 2008 -0400 +++ b/tools/data_source/biomart_test.xml Tue Sep 30 15:30:57 2008 -0400 @@ -3,7 +3,10 @@ <description>Test server</description> - <command/> + <command interpreter="python"> + biomart.py + $output + </command> <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to BioMart Central $GALAXY_URL</display>
participants (1)
-
Greg Von Kuster