1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/2604f7623fb4/ Changeset: 2604f7623fb4 User: dannon Date: 2014-09-05 21:43:36 Summary: Merged in kellrott/galaxy-farm/update-hist-export (pull request #472) Updating history import export Affected #: 8 files diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/tools/imp_exp/__init__.py --- a/lib/galaxy/tools/imp_exp/__init__.py +++ b/lib/galaxy/tools/imp_exp/__init__.py @@ -3,6 +3,7 @@ import logging import tempfile import json +import datetime from galaxy import model from galaxy.tools.parameters.basic import UnvalidatedValue from galaxy.web.framework.helpers import to_unicode @@ -134,6 +135,11 @@ datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attr_str = read_file_contents( datasets_attrs_file_name ) datasets_attrs = from_json_string( datasets_attr_str ) + + if os.path.exists( datasets_attrs_file_name + ".provenance" ): + provenance_attr_str = read_file_contents( datasets_attrs_file_name + ".provenance" ) + provenance_attrs = from_json_string( provenance_attr_str ) + datasets_attrs += provenance_attrs # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). @@ -162,7 +168,14 @@ history=new_history, create_dataset=True, sa_session=self.sa_session ) - hda.state = hda.states.OK + if 'uuid' in dataset_attrs: + hda.dataset.uuid = dataset_attrs["uuid"] + if dataset_attrs.get('exported', True) == False: + hda.state = hda.states.DISCARDED + hda.deleted = True + hda.purged = True + else: + hda.state = hda.states.OK self.sa_session.add( hda ) self.sa_session.flush() new_history.add_dataset( hda, genome_build=None ) @@ -171,17 +184,18 @@ #permissions = trans.app.security_agent.history_get_default_permissions( new_history ) #trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) self.sa_session.flush() - - # Do security check and move/copy dataset data. - temp_dataset_file_name = \ - os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) - if not file_in_dir( temp_dataset_file_name, os.path.join( archive_dir, "datasets" ) ): - raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name ) - if datasets_usage_counts[ temp_dataset_file_name ] == 1: - shutil.move( temp_dataset_file_name, hda.file_name ) - else: - datasets_usage_counts[ temp_dataset_file_name ] -= 1 - shutil.copyfile( temp_dataset_file_name, hda.file_name ) + if dataset_attrs.get('exported', True) == True: + # Do security check and move/copy dataset data. + temp_dataset_file_name = \ + os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) + if not file_in_dir( temp_dataset_file_name, os.path.join( archive_dir, "datasets" ) ): + raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name ) + if datasets_usage_counts[ temp_dataset_file_name ] == 1: + shutil.move( temp_dataset_file_name, hda.file_name ) + else: + datasets_usage_counts[ temp_dataset_file_name ] -= 1 + shutil.copyfile( temp_dataset_file_name, hda.file_name ) + hda.dataset.set_total_size() #update the filesize record in the database # Set tags, annotations. if user: @@ -225,10 +239,21 @@ # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history + imported_job.imported = True imported_job.tool_id = job_attrs[ 'tool_id' ] imported_job.tool_version = job_attrs[ 'tool_version' ] imported_job.set_state( job_attrs[ 'state' ] ) - imported_job.imported = True + imported_job.info = job_attrs.get('info', None) + imported_job.exit_code = job_attrs.get('exit_code', None) + imported_job.traceback = job_attrs.get('traceback', None) + imported_job.stdout = job_attrs.get('stdout', None) + imported_job.stderr = job_attrs.get('stderr', None) + imported_job.command_line = job_attrs.get('command_line', None) + try: + imported_job.create_time = datetime.datetime.strptime(job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f") + imported_job.update_time = datetime.datetime.strptime(job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f") + except: + pass self.sa_session.add( imported_job ) self.sa_session.flush() @@ -266,6 +291,16 @@ if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda ) + # Connect jobs to input datasets. + if 'input_mapping' in job_attrs: + for input_name, input_hid in job_attrs[ 'input_mapping' ].items(): + #print "%s job has input dataset %i" % (imported_job.id, input_hid) + input_hda = self.sa_session.query( model.HistoryDatasetAssociation ) \ + .filter_by( history=new_history, hid=input_hid ).first() + if input_hda: + imported_job.add_input_dataset( input_name, input_hda ) + + self.sa_session.flush() # Done importing. @@ -323,7 +358,7 @@ def default( self, obj ): """ Encode an HDA, default encoding for everything else. """ if isinstance( obj, trans.app.model.HistoryDatasetAssociation ): - return { + rval = { "__HistoryDatasetAssociation__": True, "create_time": obj.create_time.__str__(), "update_time": obj.update_time.__str__(), @@ -339,9 +374,17 @@ "deleted": obj.deleted, "visible": obj.visible, "file_name": obj.file_name, + "uuid" : ( lambda uuid: str( uuid ) if uuid else None )( obj.dataset.uuid ), "annotation": to_unicode( getattr( obj, 'annotation', '' ) ), "tags": get_item_tag_dict( obj ), } + if not obj.visible and not include_hidden: + rval['exported'] = False + elif obj.deleted and not include_deleted: + rval['exported'] = False + else: + rval['exported'] = True + return rval if isinstance( obj, UnvalidatedValue ): return obj.__str__() return json.JSONEncoder.default( self, obj ) @@ -374,19 +417,23 @@ datasets = self.get_history_datasets( trans, history ) included_datasets = [] datasets_attrs = [] + provenance_attrs = [] for dataset in datasets: - if not dataset.visible and not include_hidden: - continue - if dataset.deleted and not include_deleted: - continue dataset.annotation = self.get_item_annotation_str( trans.sa_session, history.user, dataset ) - datasets_attrs.append( dataset ) - included_datasets.append( dataset ) + if (not dataset.visible and not include_hidden) or (dataset.deleted and not include_deleted): + provenance_attrs.append( dataset ) + else: + datasets_attrs.append( dataset ) + included_datasets.append( dataset ) datasets_attrs_filename = tempfile.NamedTemporaryFile( dir=temp_output_dir ).name datasets_attrs_out = open( datasets_attrs_filename, 'w' ) datasets_attrs_out.write( to_json_string( datasets_attrs, cls=HistoryDatasetAssociationEncoder ) ) datasets_attrs_out.close() jeha.datasets_attrs_filename = datasets_attrs_filename + + provenance_attrs_out = open( datasets_attrs_filename + ".provenance", 'w' ) + provenance_attrs_out.write( to_json_string( provenance_attrs, cls=HistoryDatasetAssociationEncoder ) ) + provenance_attrs_out.close() # # Write jobs attributes file. @@ -422,6 +469,15 @@ job_attrs[ 'tool_id' ] = job.tool_id job_attrs[ 'tool_version' ] = job.tool_version job_attrs[ 'state' ] = job.state + job_attrs[ 'info' ] = job.info + job_attrs[ 'traceback' ] = job.traceback + job_attrs[ 'command_line' ] = job.command_line + job_attrs[ 'stderr' ] = job.stderr + job_attrs[ 'stdout' ] = job.stdout + job_attrs[ 'exit_code' ] = job.exit_code + job_attrs[ 'create_time' ] = job.create_time.isoformat() + job_attrs[ 'update_time' ] = job.update_time.isoformat() + # Get the job's parameters try: @@ -438,11 +494,14 @@ # -- Get input, output datasets. -- input_datasets = [] + input_mapping = {} for assoc in job.input_datasets: # Optional data inputs will not have a dataset. if assoc.dataset: input_datasets.append( assoc.dataset.hid ) + input_mapping[assoc.name] = assoc.dataset.hid job_attrs[ 'input_datasets' ] = input_datasets + job_attrs[ 'input_mapping'] = input_mapping output_datasets = [ assoc.dataset.hid for assoc in job.output_datasets ] job_attrs[ 'output_datasets' ] = output_datasets diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/tools/imp_exp/export_history.py --- a/lib/galaxy/tools/imp_exp/export_history.py +++ b/lib/galaxy/tools/imp_exp/export_history.py @@ -44,12 +44,13 @@ # Add datasets to archive and update dataset attributes. # TODO: security check to ensure that files added are in Galaxy dataset directory? for dataset_attrs in datasets_attrs: - dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name. - dataset_archive_name = os.path.join( 'datasets', - get_dataset_filename( dataset_attrs[ 'name' ], dataset_attrs[ 'extension' ] ) ) - history_archive.add( dataset_file_name, arcname=dataset_archive_name ) - # Update dataset filename to be archive name. - dataset_attrs[ 'file_name' ] = dataset_archive_name + if dataset_attrs['exported']: + dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name. + dataset_archive_name = os.path.join( 'datasets', + get_dataset_filename( dataset_attrs[ 'name' ], dataset_attrs[ 'extension' ] ) ) + history_archive.add( dataset_file_name, arcname=dataset_archive_name ) + # Update dataset filename to be archive name. + dataset_attrs[ 'file_name' ] = dataset_archive_name # Rewrite dataset attributes file. datasets_attrs_out = open( datasets_attrs_file, 'w' ) @@ -59,6 +60,8 @@ # Finish archive. history_archive.add( history_attrs_file, arcname="history_attrs.txt" ) history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" ) + if os.path.exists( datasets_attrs_file + ".provenance" ): + history_archive.add( datasets_attrs_file + ".provenance", arcname="datasets_attrs.txt.provenance" ) history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" ) history_archive.close() diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/webapps/galaxy/api/histories.py --- a/lib/galaxy/webapps/galaxy/api/histories.py +++ b/lib/galaxy/webapps/galaxy/api/histories.py @@ -312,6 +312,8 @@ check_ownership=False, check_accessible=True ) jeha = history.latest_export up_to_date = jeha and jeha.up_to_date + if 'force' in kwds: + up_to_date = False #Temp hack to force rebuild everytime during dev if not up_to_date: # Need to create new JEHA + job. gzip = kwds.get( "gzip", True ) diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/webapps/galaxy/controllers/history.py --- a/lib/galaxy/webapps/galaxy/controllers/history.py +++ b/lib/galaxy/webapps/galaxy/controllers/history.py @@ -1118,7 +1118,7 @@ #TODO: used in this file and index.mako @web.expose - def export_archive( self, trans, id=None, gzip=True, include_hidden=False, include_deleted=False ): + def export_archive( self, trans, id=None, gzip=True, include_hidden=False, include_deleted=False, preview=False ): """ Export a history to an archive. """ # # Get history to export. @@ -1139,7 +1139,13 @@ jeha = history.latest_export if jeha and jeha.up_to_date: if jeha.ready: - return self.serve_ready_history_export( trans, jeha ) + if preview: + url = url_for( controller='history', action="export_archive", id=id, qualified=True ) + return trans.show_message( "History Ready: '%(n)s'. Use this link to download \ + the archive or import it to another Galaxy server: \ + <a href='%(u)s'>%(u)s</a>" % ( { 'n' : history.name, 'u' : url } ) ) + else: + return self.serve_ready_history_export( trans, jeha ) elif jeha.preparing: return trans.show_message( "Still exporting history %(n)s; please check back soon. Link: <a href='%(s)s'>%(s)s</a>" \ % ( { 'n' : history.name, 's' : url_for( controller='history', action="export_archive", id=id, qualified=True ) } ) ) diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 templates/show_params.mako --- a/templates/show_params.mako +++ b/templates/show_params.mako @@ -107,6 +107,7 @@ <tbody><% encoded_hda_id = trans.security.encode_id( hda.id ) + encoded_history_id = trans.security.encode_id( hda.history_id ) %><tr><td>Name:</td><td>${hda.name | h}</td></tr><tr><td>Created:</td><td>${hda.create_time.strftime(trans.app.config.pretty_datetime_format)}</td></tr> @@ -120,6 +121,7 @@ <tr><td>Tool Standard Error:</td><td><a href="${h.url_for( controller='dataset', action='stderr', dataset_id=encoded_hda_id )}">stderr</a></td></tr><tr><td>Tool Exit Code:</td><td>${job.exit_code | h}</td></tr><tr><td>API ID:</td><td>${encoded_hda_id}</td></tr> + <tr><td>History ID:</td><td>${encoded_history_id}</td></tr> %if hda.dataset.uuid: <tr><td>UUID:</td><td>${hda.dataset.uuid}</td></tr> %endif diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 templates/webapps/galaxy/root/index.mako --- a/templates/webapps/galaxy/root/index.mako +++ b/templates/webapps/galaxy/root/index.mako @@ -105,7 +105,7 @@ galaxy_main.location = "${h.url_for( controller='history', action='citations' )}"; }, "${_("Export to File")}": function() { - galaxy_main.location = "${h.url_for( controller='history', action='export_archive' )}"; + galaxy_main.location = "${h.url_for( controller='history', action='export_archive', preview=True )}"; }, "${_("Delete")}": function() { if ( confirm( "Really delete the current history?" ) ) { Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.