[hg] galaxy 2672: Get rid of the hacky "alternate path" stuff us...

10 Sep 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/d3fe789e3931
changeset: 2672:d3fe789e3931
user:      Nate Coraor <nate@bx.psu.edu>
date:      Thu Sep 10 14:52:38 2009 -0400
description:
Get rid of the hacky "alternate path" stuff used by the upload tool and fix setting metadata when using autodetect and set_metadata_externally

4 file(s) affected in this change:

lib/galaxy/jobs/__init__.py
lib/galaxy/tools/actions/upload.py
tools/data_source/upload.py
tools/data_source/upload.xml

diffs (151 lines):

diff -r dbbc63c0630a -r d3fe789e3931 lib/galaxy/jobs/__init__.py

--- a/lib/galaxy/jobs/__init__.py	Thu Sep 10 10:42:50 2009 -0400
+++ b/lib/galaxy/jobs/__init__.py	Thu Sep 10 14:52:38 2009 -0400
@@ -502,13 +502,6 @@
             context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
             #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
             for dataset in dataset_assoc.dataset.dataset.history_associations: #need to update all associated output hdas, i.e. history was shared with job running
-                if context.get( 'path', None ):
-                    # The tool can set an alternate output path for the dataset.
-                    try:
-                        shutil.move( context['path'], dataset.file_name )
-                    except ( IOError, OSError ):
-                        if not context['stderr']:
-                            context['stderr'] = 'This dataset could not be processed'
                 dataset.blurb = 'done'
                 dataset.peek  = 'no peek'
                 dataset.info  = context['stdout'] + context['stderr']
@@ -707,6 +700,13 @@
             sizes.append( ( outfile, os.stat( outfile ).st_size ) )
         return sizes
     def setup_external_metadata( self, exec_dir = None, tmp_dir = None, dataset_files_path = None, config_root = None, datatypes_config = None, **kwds ):
+        # extension could still be 'auto' if this is the upload tool.
+        job = model.Job.get( self.job_id )
+        for output_dataset_assoc in job.output_datasets:
+            if output_dataset_assoc.dataset.ext == 'auto':
+                context = self.get_dataset_finish_context( dict(), output_dataset_assoc.dataset.dataset )
+                output_dataset_assoc.dataset.extension = context.get( 'ext', 'data' )
+        mapping.context.current.flush()
         if tmp_dir is None:
             #this dir should should relative to the exec_dir
             tmp_dir = self.app.config.new_file_path
@@ -716,7 +716,6 @@
             config_root = self.app.config.root
         if datatypes_config is None:
             datatypes_config = self.app.config.datatypes_config
-        job = model.Job.get( self.job_id )
         return self.external_output_metadata.setup_external_metadata( [ output_dataset_assoc.dataset for output_dataset_assoc in job.output_datasets ], exec_dir = exec_dir, tmp_dir = tmp_dir, dataset_files_path = dataset_files_path, config_root = config_root, datatypes_config = datatypes_config, **kwds )
 
 class DefaultJobDispatcher( object ):
diff -r dbbc63c0630a -r d3fe789e3931 lib/galaxy/tools/actions/upload.py
--- a/lib/galaxy/tools/actions/upload.py	Thu Sep 10 10:42:50 2009 -0400
+++ b/lib/galaxy/tools/actions/upload.py	Thu Sep 10 14:52:38 2009 -0400
@@ -144,7 +144,7 @@
             job.add_parameter( name, value )
         job.add_parameter( 'paramfile', to_json_string( json_file_path ) )
         for i, dataset in enumerate( data_list ):
-            job.add_output_dataset( i, dataset )
+            job.add_output_dataset( 'output%i' % i, dataset )
         job.state = trans.app.model.Job.states.NEW
         trans.app.model.flush()
         
diff -r dbbc63c0630a -r d3fe789e3931 tools/data_source/upload.py
--- a/tools/data_source/upload.py	Thu Sep 10 10:42:50 2009 -0400
+++ b/tools/data_source/upload.py	Thu Sep 10 14:52:38 2009 -0400
@@ -115,7 +115,14 @@
             return ( True, False, test_ext )
     return ( True, True, test_ext )
 
-def add_file( dataset, json_file ):
+def parse_outputs( args ):
+    rval = {}
+    for arg in args:
+        id, path = arg.split( ':', 1 )
+        rval[int( id )] = path
+    return rval
+
+def add_file( dataset, json_file, output_path ):
     data_type = None
     line_count = None
 
@@ -229,16 +236,18 @@
         ext = dataset.ext
     if ext == 'auto':
         ext = 'data'
+    # Move the dataset to its "real" path
+    shutil.move( dataset.path, output_path )
+    # Write the job info
     info = dict( type = 'dataset',
                  dataset_id = dataset.dataset_id,
-                 path = dataset.path,
                  ext = ext,
                  stdout = 'uploaded %s file' % data_type,
                  name = dataset.name,
                  line_count = line_count )
     json_file.write( to_json_string( info ) + "\n" )
 
-def add_composite_file( dataset, json_file ):
+def add_composite_file( dataset, json_file, output_path ):
         if dataset.composite_files:
             os.mkdir( dataset.extra_files_path )
             for name, value in dataset.composite_files.iteritems():
@@ -253,17 +262,21 @@
                         else:
                             sniff.convert_newlines( dataset.composite_file_paths[ value.name ][ 'path' ] )
                     shutil.move( dataset.composite_file_paths[ value.name ][ 'path' ], os.path.join( dataset.extra_files_path, name ) )
+        # Move the dataset to its "real" path
+        shutil.move( dataset.primary_file, output_path )
+        # Write the job info
         info = dict( type = 'dataset',
                      dataset_id = dataset.dataset_id,
-                     path = dataset.primary_file,
                      stdout = 'uploaded %s file' % dataset.file_type )
         json_file.write( to_json_string( info ) + "\n" )
 
 def __main__():
 
-    if len( sys.argv ) != 2:
-        print >>sys.stderr, 'usage: upload.py <json paramfile>'
+    if len( sys.argv ) < 2:
+        print >>sys.stderr, 'usage: upload.py <json paramfile> <output spec> ...'
         sys.exit( 1 )
+
+    output_paths = parse_outputs( sys.argv[2:] )
 
     json_file = open( 'galaxy.json', 'w' )
 
@@ -271,10 +284,16 @@
         dataset = from_json_string( line )
         dataset = util.bunch.Bunch( **safe_dict( dataset ) )
 
+        try:
+            output_path = output_paths[int( dataset.dataset_id )]
+        except:
+            print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id
+            sys.exit( 1 )
+
         if dataset.type == 'composite':
-            add_composite_file( dataset, json_file )
+            add_composite_file( dataset, json_file, output_path )
         else:
-            add_file( dataset, json_file )
+            add_file( dataset, json_file, output_path )
 
     # clean up paramfile
     try:
diff -r dbbc63c0630a -r d3fe789e3931 tools/data_source/upload.xml
--- a/tools/data_source/upload.xml	Thu Sep 10 10:42:50 2009 -0400
+++ b/tools/data_source/upload.xml	Thu Sep 10 14:52:38 2009 -0400
@@ -7,6 +7,12 @@
   <action module="galaxy.tools.actions.upload" class="UploadToolAction"/>
   <command interpreter="python">
     upload.py $paramfile
+    #set $outnum = 0
+    #while $varExists('output%i' % $outnum):
+        #set $output = $getVar('output%i' % $outnum)
+        #set $outnum += 1
+        ${output.dataset.dataset.id}:${output}
+    #end while
   </command>
   <inputs>
     <param name="file_type" type="select" label="File Format" help="Which format? See help below">

    

Greg Von Kuster

tags

participants (1)