details: http://www.bx.psu.edu/hg/galaxy/rev/d3fe789e3931 changeset: 2672:d3fe789e3931 user: Nate Coraor <nate@bx.psu.edu> date: Thu Sep 10 14:52:38 2009 -0400 description: Get rid of the hacky "alternate path" stuff used by the upload tool and fix setting metadata when using autodetect and set_metadata_externally 4 file(s) affected in this change: lib/galaxy/jobs/__init__.py lib/galaxy/tools/actions/upload.py tools/data_source/upload.py tools/data_source/upload.xml diffs (151 lines): diff -r dbbc63c0630a -r d3fe789e3931 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Thu Sep 10 10:42:50 2009 -0400 +++ b/lib/galaxy/jobs/__init__.py Thu Sep 10 14:52:38 2009 -0400 @@ -502,13 +502,6 @@ context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset ) #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur for dataset in dataset_assoc.dataset.dataset.history_associations: #need to update all associated output hdas, i.e. history was shared with job running - if context.get( 'path', None ): - # The tool can set an alternate output path for the dataset. - try: - shutil.move( context['path'], dataset.file_name ) - except ( IOError, OSError ): - if not context['stderr']: - context['stderr'] = 'This dataset could not be processed' dataset.blurb = 'done' dataset.peek = 'no peek' dataset.info = context['stdout'] + context['stderr'] @@ -707,6 +700,13 @@ sizes.append( ( outfile, os.stat( outfile ).st_size ) ) return sizes def setup_external_metadata( self, exec_dir = None, tmp_dir = None, dataset_files_path = None, config_root = None, datatypes_config = None, **kwds ): + # extension could still be 'auto' if this is the upload tool. + job = model.Job.get( self.job_id ) + for output_dataset_assoc in job.output_datasets: + if output_dataset_assoc.dataset.ext == 'auto': + context = self.get_dataset_finish_context( dict(), output_dataset_assoc.dataset.dataset ) + output_dataset_assoc.dataset.extension = context.get( 'ext', 'data' ) + mapping.context.current.flush() if tmp_dir is None: #this dir should should relative to the exec_dir tmp_dir = self.app.config.new_file_path @@ -716,7 +716,6 @@ config_root = self.app.config.root if datatypes_config is None: datatypes_config = self.app.config.datatypes_config - job = model.Job.get( self.job_id ) return self.external_output_metadata.setup_external_metadata( [ output_dataset_assoc.dataset for output_dataset_assoc in job.output_datasets ], exec_dir = exec_dir, tmp_dir = tmp_dir, dataset_files_path = dataset_files_path, config_root = config_root, datatypes_config = datatypes_config, **kwds ) class DefaultJobDispatcher( object ): diff -r dbbc63c0630a -r d3fe789e3931 lib/galaxy/tools/actions/upload.py --- a/lib/galaxy/tools/actions/upload.py Thu Sep 10 10:42:50 2009 -0400 +++ b/lib/galaxy/tools/actions/upload.py Thu Sep 10 14:52:38 2009 -0400 @@ -144,7 +144,7 @@ job.add_parameter( name, value ) job.add_parameter( 'paramfile', to_json_string( json_file_path ) ) for i, dataset in enumerate( data_list ): - job.add_output_dataset( i, dataset ) + job.add_output_dataset( 'output%i' % i, dataset ) job.state = trans.app.model.Job.states.NEW trans.app.model.flush() diff -r dbbc63c0630a -r d3fe789e3931 tools/data_source/upload.py --- a/tools/data_source/upload.py Thu Sep 10 10:42:50 2009 -0400 +++ b/tools/data_source/upload.py Thu Sep 10 14:52:38 2009 -0400 @@ -115,7 +115,14 @@ return ( True, False, test_ext ) return ( True, True, test_ext ) -def add_file( dataset, json_file ): +def parse_outputs( args ): + rval = {} + for arg in args: + id, path = arg.split( ':', 1 ) + rval[int( id )] = path + return rval + +def add_file( dataset, json_file, output_path ): data_type = None line_count = None @@ -229,16 +236,18 @@ ext = dataset.ext if ext == 'auto': ext = 'data' + # Move the dataset to its "real" path + shutil.move( dataset.path, output_path ) + # Write the job info info = dict( type = 'dataset', dataset_id = dataset.dataset_id, - path = dataset.path, ext = ext, stdout = 'uploaded %s file' % data_type, name = dataset.name, line_count = line_count ) json_file.write( to_json_string( info ) + "\n" ) -def add_composite_file( dataset, json_file ): +def add_composite_file( dataset, json_file, output_path ): if dataset.composite_files: os.mkdir( dataset.extra_files_path ) for name, value in dataset.composite_files.iteritems(): @@ -253,17 +262,21 @@ else: sniff.convert_newlines( dataset.composite_file_paths[ value.name ][ 'path' ] ) shutil.move( dataset.composite_file_paths[ value.name ][ 'path' ], os.path.join( dataset.extra_files_path, name ) ) + # Move the dataset to its "real" path + shutil.move( dataset.primary_file, output_path ) + # Write the job info info = dict( type = 'dataset', dataset_id = dataset.dataset_id, - path = dataset.primary_file, stdout = 'uploaded %s file' % dataset.file_type ) json_file.write( to_json_string( info ) + "\n" ) def __main__(): - if len( sys.argv ) != 2: - print >>sys.stderr, 'usage: upload.py <json paramfile>' + if len( sys.argv ) < 2: + print >>sys.stderr, 'usage: upload.py <json paramfile> <output spec> ...' sys.exit( 1 ) + + output_paths = parse_outputs( sys.argv[2:] ) json_file = open( 'galaxy.json', 'w' ) @@ -271,10 +284,16 @@ dataset = from_json_string( line ) dataset = util.bunch.Bunch( **safe_dict( dataset ) ) + try: + output_path = output_paths[int( dataset.dataset_id )] + except: + print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id + sys.exit( 1 ) + if dataset.type == 'composite': - add_composite_file( dataset, json_file ) + add_composite_file( dataset, json_file, output_path ) else: - add_file( dataset, json_file ) + add_file( dataset, json_file, output_path ) # clean up paramfile try: diff -r dbbc63c0630a -r d3fe789e3931 tools/data_source/upload.xml --- a/tools/data_source/upload.xml Thu Sep 10 10:42:50 2009 -0400 +++ b/tools/data_source/upload.xml Thu Sep 10 14:52:38 2009 -0400 @@ -7,6 +7,12 @@ <action module="galaxy.tools.actions.upload" class="UploadToolAction"/> <command interpreter="python"> upload.py $paramfile + #set $outnum = 0 + #while $varExists('output%i' % $outnum): + #set $output = $getVar('output%i' % $outnum) + #set $outnum += 1 + ${output.dataset.dataset.id}:${output} + #end while </command> <inputs> <param name="file_type" type="select" label="File Format" help="Which format? See help below">