details:
http://www.bx.psu.edu/hg/galaxy/rev/931d6ca549d3
changeset: 1535:931d6ca549d3
user: guru
date: Tue Sep 30 15:30:57 2008 -0400
description:
Modified the way biomart tool runs: output generation will be completed before
exec_afer_process hook is called.
4 file(s) affected in this change:
tools/data_source/biomart.py
tools/data_source/biomart.xml
tools/data_source/biomart_filter.py
tools/data_source/biomart_test.xml
diffs (148 lines):
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/biomart.py Tue Sep 30 15:30:57 2008 -0400
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#Retreives data from BIOMART and stores in a file. Biomart parameters are provided in the
input/output file.
+#guruprasad Ananda
+
+import urllib, sys, os, gzip, tempfile, shutil
+from galaxy import eggs
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+ sys.stderr.write( msg )
+ sys.exit()
+
+def __main__():
+ filename = sys.argv[1]
+ params = {}
+ for line in open(filename, 'r'):
+ try:
+ line = line.strip()
+ fields = line.split('\t')
+ params[fields[0]] = fields[1]
+ except:
+ continue
+
+ URL = params.get( 'URL', None )
+ if not URL:
+ open( filename, 'w' ).write( "" )
+ stop_err( 'Datasource has not sent back a URL parameter.' )
+ URL = URL + '&_export=1&GALAXY_URL=0'
+ CHUNK_SIZE = 2**20 # 1Mb
+ MAX_SIZE = CHUNK_SIZE * 100
+ try:
+ page = urllib.urlopen(URL)
+ except Exception, exc:
+ stop_err('Problems connecting to %s (%s)' % (URL, exc) )
+
+ fp = open(filename, 'w')
+ size = 0
+ max_size_exceeded = False
+ while 1:
+ chunk = page.read(CHUNK_SIZE)
+ if not chunk:
+ break
+ size += len(chunk)
+ if size > MAX_SIZE:
+ max_size_exceeded = True
+ break
+ fp.write(chunk)
+ fp.close()
+
+ if max_size_exceeded:
+ print 'Maximum data size of 100 MB exceeded, incomplete data retrieval.'
+
+if __name__ == "__main__":
+ __main__()
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart.xml
--- a/tools/data_source/biomart.xml Mon Sep 29 10:37:54 2008 -0400
+++ b/tools/data_source/biomart.xml Tue Sep 30 15:30:57 2008 -0400
@@ -1,14 +1,14 @@
<?xml version="1.0"?>
<tool name="BioMart" id="biomart">
-
<description>Central server</description>
-
- <command/>
-
+ <command interpreter="python">
+ biomart.py
+ $output
+ </command>
<inputs
action="http://www.biomart.org/biomart/martview"
check_values="false" method="get" target="_top">
<display>go to BioMart Central $GALAXY_URL</display>
<param name="GALAXY_URL" type="baseurl"
value="/tool_runner/biomart" />
- </inputs>
+ </inputs>
<uihints minwidth="800"/>
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart_filter.py
--- a/tools/data_source/biomart_filter.py Mon Sep 29 10:37:54 2008 -0400
+++ b/tools/data_source/biomart_filter.py Tue Sep 30 15:30:57 2008 -0400
@@ -14,41 +14,16 @@
data_type = sniff.guess_ext( data.file_name,
sniff_order=app.datatypes_registry.sniff_order )
data = app.datatypes_registry.change_datatype(data, data_type)
data.name = data_name
+ #store BIOMART parameters temporarily in output file
+ out = open(data.file_name,'w')
+ for key, value in param_dict.items():
+ print >> out, "%s\t%s" % (key,value)
+ out.close()
out_data[name] = data
+
def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None,
stderr=None):
- """Verifies the data after the run"""
- URL = param_dict.get( 'URL', None )
- if not URL:
- raise Exception('Datasource has not sent back a URL parameter')
- URL = URL + '&_export=1&GALAXY_URL=0'
- CHUNK_SIZE = 2**20 # 1Mb
- MAX_SIZE = CHUNK_SIZE * 100
- try:
- page = urllib.urlopen(URL)
- except Exception, exc:
- raise Exception('Problems connecting to %s (%s)' % (URL, exc) )
name, data = out_data.items()[0]
- fp = open(data.file_name, 'wb')
- size = 0
- max_size_exceeded = False
-
- while 1:
- chunk = page.read(CHUNK_SIZE)
- if not chunk:
- break
- size += len(chunk)
- if size > MAX_SIZE:
- max_size_exceeded = True
- break
- fp.write(chunk)
- fp.close()
-
- if max_size_exceeded:
- data.info = 'Maximum data size of 100 MB exceeded, incomplete data
retrieval.'
- else:
- data.info = data.name
-
if not isinstance(data.datatype, datatypes.interval.Bed) and
isinstance(data.datatype, datatypes.interval.Interval):
#Set meta data, format file to be valid interval type
data.set_meta(first_line_is_header=True)
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart_test.xml
--- a/tools/data_source/biomart_test.xml Mon Sep 29 10:37:54 2008 -0400
+++ b/tools/data_source/biomart_test.xml Tue Sep 30 15:30:57 2008 -0400
@@ -3,7 +3,10 @@
<description>Test server</description>
- <command/>
+ <command interpreter="python">
+ biomart.py
+ $output
+ </command>
<inputs
action="http://test.biomart.org/biomart/martview"
check_values="false" method="get" target="_top">
<display>go to BioMart Central $GALAXY_URL</display>