details: http://www.bx.psu.edu/hg/galaxy/rev/a7ba71cade35 changeset: 2519:a7ba71cade35 user: Dan Blankenberg <dan@bx.psu.edu> date: Mon Aug 03 12:02:54 2009 -0400 description: Enhance data_source.py to take advantage of the content length when known. If content length provided by external application is greater than config.output_size_limit, no data will be retrieved and an error message will be provided to the user. Tools using this script have been updated to provide the max file size on the command line. Resolves ticket #93. 15 file(s) affected in this change: lib/galaxy/jobs/__init__.py tools/data_source/biomart.xml tools/data_source/biomart_test.xml tools/data_source/data_source.py tools/data_source/epigraph_import.xml tools/data_source/epigraph_import_test.xml tools/data_source/eupathdb.xml tools/data_source/flymine.xml tools/data_source/flymine_test.xml tools/data_source/gramene_mart.xml tools/data_source/ucsc_tablebrowser.xml tools/data_source/ucsc_tablebrowser_archaea.xml tools/data_source/ucsc_tablebrowser_test.xml tools/data_source/wormbase.xml tools/data_source/wormbase_test.xml diffs (193 lines): diff -r 07dffb2735dd -r a7ba71cade35 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Fri Jul 31 12:01:02 2009 -0400 +++ b/lib/galaxy/jobs/__init__.py Mon Aug 03 12:02:54 2009 -0400 @@ -595,7 +595,7 @@ self.real_path = real_path self.false_path = false_path def __str__( self ): - if false_path is None: + if self.false_path is None: return self.real_path else: return self.false_path diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/biomart.xml --- a/tools/data_source/biomart.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/biomart.xml Mon Aug 03 12:02:54 2009 -0400 @@ -9,7 +9,7 @@ --> <tool name="BioMart" id="biomart" tool_type="data_source" URL_method="get" version="1.0.1"> <description>Central server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to BioMart Central $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/biomart_test.xml --- a/tools/data_source/biomart_test.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/biomart_test.xml Mon Aug 03 12:02:54 2009 -0400 @@ -9,7 +9,7 @@ --> <tool name="BioMart" id="biomart_test" tool_type="data_source" URL_method="get" version="1.0.1"> <description>Test server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to BioMart Central $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/data_source.py --- a/tools/data_source/data_source.py Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/data_source.py Mon Aug 03 12:02:54 2009 -0400 @@ -21,6 +21,10 @@ def __main__(): filename = sys.argv[1] + try: + max_file_size = int( sys.argv[2] ) + except: + max_file_size = 0 params = {} for line in open( filename, 'r' ): try: @@ -50,6 +54,10 @@ page = urllib.urlopen( URL, urllib.urlencode( params ) ) except Exception, e: stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) ) + if max_file_size: + file_size = int( page.info().get( 'Content-Length', 0 ) ) + if file_size > max_file_size: + stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) ) out = open( filename, 'w' ) while 1: chunk = page.read( CHUNK_SIZE ) diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/epigraph_import.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source" URL_method="get"> <description> server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> <display>go to EpiGRAPH server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/epigraph_import_test.xml --- a/tools/data_source/epigraph_import_test.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/epigraph_import_test.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source" URL_method="get"> <description> test server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> <display>go to EpiGRAPH server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/eupathdb.xml --- a/tools/data_source/eupathdb.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/eupathdb.xml Mon Aug 03 12:02:54 2009 -0400 @@ -5,7 +5,7 @@ --> <tool name="EuPathDB" id="eupathdb" tool_type="data_source" url_method="post"> <description>server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://galaxy.eupathdb.org/eupathdb.galaxy/queries_tools.jsp" check_values="false" method="post"> <display>go to EuPathDB server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=eupathdb" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/flymine.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="Flymine" id="flymine" tool_type="data_source" URL_method="post"> <description>server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.flymine.org" check_values="false" method="get" target="_top"> <display>go to Flymine server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/flymine_test.xml --- a/tools/data_source/flymine_test.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/flymine_test.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="Flymine test" id="flymine_test" tool_type="data_source" URL_method="post"> <description>server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top"> <display>go to Flymine server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/gramene_mart.xml --- a/tools/data_source/gramene_mart.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/gramene_mart.xml Mon Aug 03 12:02:54 2009 -0400 @@ -9,7 +9,7 @@ --> <tool name="GrameneMart" id="gramenemart" tool_type="data_source" URL_method="get" version="1.0.1"> <description> Central server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.gramene.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to GrameneMart Central $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/ucsc_tablebrowser.xml --- a/tools/data_source/ucsc_tablebrowser.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/ucsc_tablebrowser.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source" URL_method="post"> <description>table browser</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> <display>go to UCSC Table Browser $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/ucsc_tablebrowser_archaea.xml --- a/tools/data_source/ucsc_tablebrowser_archaea.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/ucsc_tablebrowser_archaea.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source" URL_method="post"> <description>table browser</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> <display>go to UCSC Table Browser $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/ucsc_tablebrowser_test.xml --- a/tools/data_source/ucsc_tablebrowser_test.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/ucsc_tablebrowser_test.xml Mon Aug 03 12:02:54 2009 -0400 @@ -6,7 +6,7 @@ --> <tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source" URL_method="post"> <description>table browser</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> <display>go to UCSC Table Browser $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/wormbase.xml --- a/tools/data_source/wormbase.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/wormbase.xml Mon Aug 03 12:02:54 2009 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool name="Wormbase" id="wormbase" tool_type="data_source" URL_method="post"> <description>server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://www.wormbase.org/db/seq/gbgff/c_elegans/" check_values="false" target="_top"> <display>go to Wormbase server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase" /> diff -r 07dffb2735dd -r a7ba71cade35 tools/data_source/wormbase_test.xml --- a/tools/data_source/wormbase_test.xml Fri Jul 31 12:01:02 2009 -0400 +++ b/tools/data_source/wormbase_test.xml Mon Aug 03 12:02:54 2009 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool name="Wormbase" id="wormbase_test" tool_type="data_source" URL_method="post"> <description>test server</description> - <command interpreter="python">data_source.py $output</command> + <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> <inputs action="http://dev.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" target="_top"> <display>go to Wormbase test server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=wormbase_test" />