[galaxy-dev] [hg] galaxy 2421: Add a defaulttimeout setting of 10 minutes to ...

29 May 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/186fcb977e75
changeset: 2421:186fcb977e75
user:      Greg Von Kuster <greg@bx.psu.edu>
date:      Thu May 28 11:10:44 2009 -0400
description:
Add a defaulttimeout setting of 10 minutes to the urlopen() call for requests to remote data sources.

1 file(s) affected in this change:

tools/data_source/data_source.py

diffs (43 lines):

diff -r 5c2d30d3fb04 -r 186fcb977e75 tools/data_source/data_source.py

--- a/tools/data_source/data_source.py	Wed May 27 14:19:59 2009 -0400
+++ b/tools/data_source/data_source.py	Thu May 28 11:10:44 2009 -0400
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
-#Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file.
-import urllib, sys, os, gzip, tempfile, shutil
+# Retrieves data from external data source applications and stores in a dataset file.
+# Data source application parameters are temporarily stored in the dataset file.
+import socket, urllib, sys, os, gzip, tempfile, shutil
 from galaxy import eggs
-#from galaxy.datatypes import data
 from galaxy.util import gzip_magic
 
 assert sys.version_info[:2] >= ( 2, 4 )
@@ -34,15 +34,23 @@
         open( filename, 'w' ).write( "" )
         stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
     URL_method = params.get( 'URL_method', None )
-    out = open( filename, 'w' )
-    CHUNK_SIZE = 2**20 # 1Mb 
+    CHUNK_SIZE = 2**20 # 1Mb
+    # The Python support for fetching resources from the web is layered. urllib uses the httplib
+    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
+    # a socket should wait for a response before timing out. By default the socket module has no
+    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
+    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
+    # doing the following.
+    socket.setdefaulttimeout( 600 )
+    # The following calls to urllib2.urlopen() will use the above default timeout
     try:
         if not URL_method or URL_method == 'get':
             page = urllib.urlopen( URL )
         elif URL_method == 'post':
             page = urllib.urlopen( URL, urllib.urlencode( params ) )
-    except:
-        stop_err( 'It appears that the remote data source application is currently off line. Please try again later.' )
+    except Exception, e:
+        stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
+    out = open( filename, 'w' )
     while 1:
         chunk = page.read( CHUNK_SIZE )
         if not chunk: