[galaxy-commits] commit/galaxy-central: dannon: Merged in kellrott/galaxy-farm/update-hist-export (pull request #472)

5 Sep 2014

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/2604f7623fb4/
Changeset:   2604f7623fb4
User:        dannon
Date:        2014-09-05 21:43:36
Summary:     Merged in kellrott/galaxy-farm/update-hist-export (pull request #472)

Updating history import export
Affected #:  8 files

diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/tools/imp_exp/__init__.py

--- a/lib/galaxy/tools/imp_exp/__init__.py
+++ b/lib/galaxy/tools/imp_exp/__init__.py
@@ -3,6 +3,7 @@
 import logging
 import tempfile
 import json
+import datetime
 from galaxy import model
 from galaxy.tools.parameters.basic import UnvalidatedValue
 from galaxy.web.framework.helpers import to_unicode
@@ -134,6 +135,11 @@
                 datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt')
                 datasets_attr_str = read_file_contents( datasets_attrs_file_name )
                 datasets_attrs = from_json_string( datasets_attr_str )
+                
+                if os.path.exists( datasets_attrs_file_name + ".provenance" ):
+                    provenance_attr_str = read_file_contents( datasets_attrs_file_name + ".provenance" )
+                    provenance_attrs = from_json_string( provenance_attr_str )
+                    datasets_attrs += provenance_attrs                    
 
                 # Get counts of how often each dataset file is used; a file can
                 # be linked to multiple dataset objects (HDAs).
@@ -162,7 +168,14 @@
                                                            history=new_history,
                                                            create_dataset=True,
                                                            sa_session=self.sa_session )
-                    hda.state = hda.states.OK
+                    if 'uuid' in dataset_attrs:
+                        hda.dataset.uuid = dataset_attrs["uuid"]
+                    if dataset_attrs.get('exported', True) == False:
+                        hda.state = hda.states.DISCARDED
+                        hda.deleted = True
+                        hda.purged = True
+                    else:
+                        hda.state = hda.states.OK
                     self.sa_session.add( hda )
                     self.sa_session.flush()
                     new_history.add_dataset( hda, genome_build=None )
@@ -171,17 +184,18 @@
                     #permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                     #trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                     self.sa_session.flush()
-
-                    # Do security check and move/copy dataset data.
-                    temp_dataset_file_name = \
-                        os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
-                    if not file_in_dir( temp_dataset_file_name, os.path.join( archive_dir, "datasets" ) ):
-                        raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name )
-                    if datasets_usage_counts[ temp_dataset_file_name ] == 1:
-                        shutil.move( temp_dataset_file_name, hda.file_name )
-                    else:
-                        datasets_usage_counts[ temp_dataset_file_name ] -= 1
-                        shutil.copyfile( temp_dataset_file_name, hda.file_name )
+                    if dataset_attrs.get('exported', True) == True:
+                        # Do security check and move/copy dataset data.
+                        temp_dataset_file_name = \
+                            os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
+                        if not file_in_dir( temp_dataset_file_name, os.path.join( archive_dir, "datasets" ) ):
+                            raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name )
+                        if datasets_usage_counts[ temp_dataset_file_name ] == 1:
+                            shutil.move( temp_dataset_file_name, hda.file_name )
+                        else:
+                            datasets_usage_counts[ temp_dataset_file_name ] -= 1
+                            shutil.copyfile( temp_dataset_file_name, hda.file_name )
+                        hda.dataset.set_total_size() #update the filesize record in the database
 
                     # Set tags, annotations.
                     if user:
@@ -225,10 +239,21 @@
                     # TODO: set session?
                     # imported_job.session = trans.get_galaxy_session().id
                     imported_job.history = new_history
+                    imported_job.imported = True
                     imported_job.tool_id = job_attrs[ 'tool_id' ]
                     imported_job.tool_version = job_attrs[ 'tool_version' ]
                     imported_job.set_state( job_attrs[ 'state' ] )
-                    imported_job.imported = True
+                    imported_job.info = job_attrs.get('info', None)
+                    imported_job.exit_code = job_attrs.get('exit_code', None)
+                    imported_job.traceback = job_attrs.get('traceback', None)
+                    imported_job.stdout = job_attrs.get('stdout', None)
+                    imported_job.stderr = job_attrs.get('stderr', None)
+                    imported_job.command_line = job_attrs.get('command_line', None)
+                    try:
+                        imported_job.create_time = datetime.datetime.strptime(job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f")
+                        imported_job.update_time = datetime.datetime.strptime(job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f")
+                    except:
+                        pass
                     self.sa_session.add( imported_job )
                     self.sa_session.flush()
 
@@ -266,6 +291,16 @@
                         if output_hda:
                             imported_job.add_output_dataset( output_hda.name, output_hda )
 
+                    # Connect jobs to input datasets.
+                    if 'input_mapping' in job_attrs: 
+                        for input_name, input_hid in job_attrs[ 'input_mapping' ].items():
+                            #print "%s job has input dataset %i" % (imported_job.id, input_hid)
+                            input_hda = self.sa_session.query( model.HistoryDatasetAssociation ) \
+                                            .filter_by( history=new_history, hid=input_hid ).first()
+                            if input_hda:
+                                imported_job.add_input_dataset( input_name, input_hda )
+                            
+
                     self.sa_session.flush()
 
                 # Done importing.
@@ -323,7 +358,7 @@
             def default( self, obj ):
                 """ Encode an HDA, default encoding for everything else. """
                 if isinstance( obj, trans.app.model.HistoryDatasetAssociation ):
-                    return {
+                    rval = {
                         "__HistoryDatasetAssociation__": True,
                         "create_time": obj.create_time.__str__(),
                         "update_time": obj.update_time.__str__(),
@@ -339,9 +374,17 @@
                         "deleted": obj.deleted,
                         "visible": obj.visible,
                         "file_name": obj.file_name,
+                        "uuid" :  ( lambda uuid: str( uuid ) if uuid else None )( obj.dataset.uuid ),
                         "annotation": to_unicode( getattr( obj, 'annotation', '' ) ),
                         "tags": get_item_tag_dict( obj ),
                     }
+                    if not obj.visible and not include_hidden:
+                        rval['exported'] = False
+                    elif obj.deleted and not include_deleted:
+                        rval['exported'] = False
+                    else:
+                        rval['exported'] = True
+                    return rval
                 if isinstance( obj, UnvalidatedValue ):
                     return obj.__str__()
                 return json.JSONEncoder.default( self, obj )
@@ -374,19 +417,23 @@
         datasets = self.get_history_datasets( trans, history )
         included_datasets = []
         datasets_attrs = []
+        provenance_attrs = []
         for dataset in datasets:
-            if not dataset.visible and not include_hidden:
-                continue
-            if dataset.deleted and not include_deleted:
-                continue
             dataset.annotation = self.get_item_annotation_str( trans.sa_session, history.user, dataset )
-            datasets_attrs.append( dataset )
-            included_datasets.append( dataset )
+            if (not dataset.visible and not include_hidden) or (dataset.deleted and not include_deleted):
+                provenance_attrs.append( dataset )
+            else:
+                datasets_attrs.append( dataset )
+                included_datasets.append( dataset )
         datasets_attrs_filename = tempfile.NamedTemporaryFile( dir=temp_output_dir ).name
         datasets_attrs_out = open( datasets_attrs_filename, 'w' )
         datasets_attrs_out.write( to_json_string( datasets_attrs, cls=HistoryDatasetAssociationEncoder ) )
         datasets_attrs_out.close()
         jeha.datasets_attrs_filename = datasets_attrs_filename
+        
+        provenance_attrs_out = open( datasets_attrs_filename + ".provenance", 'w' )
+        provenance_attrs_out.write( to_json_string( provenance_attrs, cls=HistoryDatasetAssociationEncoder ) )
+        provenance_attrs_out.close()
 
         #
         # Write jobs attributes file.
@@ -422,6 +469,15 @@
             job_attrs[ 'tool_id' ] = job.tool_id
             job_attrs[ 'tool_version' ] = job.tool_version
             job_attrs[ 'state' ] = job.state
+            job_attrs[ 'info' ] = job.info
+            job_attrs[ 'traceback' ] = job.traceback
+            job_attrs[ 'command_line' ] = job.command_line
+            job_attrs[ 'stderr' ] = job.stderr
+            job_attrs[ 'stdout' ] = job.stdout
+            job_attrs[ 'exit_code' ] = job.exit_code
+            job_attrs[ 'create_time' ] = job.create_time.isoformat()
+            job_attrs[ 'update_time' ] = job.update_time.isoformat()
+            
 
             # Get the job's parameters
             try:
@@ -438,11 +494,14 @@
             # -- Get input, output datasets. --
 
             input_datasets = []
+            input_mapping = {}
             for assoc in job.input_datasets:
                 # Optional data inputs will not have a dataset.
                 if assoc.dataset:
                     input_datasets.append( assoc.dataset.hid )
+                    input_mapping[assoc.name] = assoc.dataset.hid
             job_attrs[ 'input_datasets' ] = input_datasets
+            job_attrs[ 'input_mapping'] = input_mapping
             output_datasets = [ assoc.dataset.hid for assoc in job.output_datasets ]
             job_attrs[ 'output_datasets' ] = output_datasets
 

diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/tools/imp_exp/export_history.py
--- a/lib/galaxy/tools/imp_exp/export_history.py
+++ b/lib/galaxy/tools/imp_exp/export_history.py
@@ -44,12 +44,13 @@
         # Add datasets to archive and update dataset attributes.
         # TODO: security check to ensure that files added are in Galaxy dataset directory?
         for dataset_attrs in datasets_attrs:
-            dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name.
-            dataset_archive_name = os.path.join( 'datasets',
-                                                 get_dataset_filename( dataset_attrs[ 'name' ], dataset_attrs[ 'extension' ] ) )
-            history_archive.add( dataset_file_name, arcname=dataset_archive_name )
-            # Update dataset filename to be archive name.
-            dataset_attrs[ 'file_name' ] = dataset_archive_name
+            if dataset_attrs['exported']:
+                dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name.
+                dataset_archive_name = os.path.join( 'datasets',
+                                                     get_dataset_filename( dataset_attrs[ 'name' ], dataset_attrs[ 'extension' ] ) )
+                history_archive.add( dataset_file_name, arcname=dataset_archive_name )
+                # Update dataset filename to be archive name.
+                dataset_attrs[ 'file_name' ] = dataset_archive_name
 
         # Rewrite dataset attributes file.
         datasets_attrs_out = open( datasets_attrs_file, 'w' )
@@ -59,6 +60,8 @@
         # Finish archive.
         history_archive.add( history_attrs_file, arcname="history_attrs.txt" )
         history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" )
+        if os.path.exists( datasets_attrs_file + ".provenance" ):
+            history_archive.add( datasets_attrs_file + ".provenance", arcname="datasets_attrs.txt.provenance" )            
         history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" )
         history_archive.close()
 

diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/webapps/galaxy/api/histories.py
--- a/lib/galaxy/webapps/galaxy/api/histories.py
+++ b/lib/galaxy/webapps/galaxy/api/histories.py
@@ -312,6 +312,8 @@
             check_ownership=False, check_accessible=True )
         jeha = history.latest_export
         up_to_date = jeha and jeha.up_to_date
+        if 'force' in kwds:
+            up_to_date = False #Temp hack to force rebuild everytime during dev
         if not up_to_date:
             # Need to create new JEHA + job.
             gzip = kwds.get( "gzip", True )

diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 lib/galaxy/webapps/galaxy/controllers/history.py
--- a/lib/galaxy/webapps/galaxy/controllers/history.py
+++ b/lib/galaxy/webapps/galaxy/controllers/history.py
@@ -1118,7 +1118,7 @@
         #TODO: used in this file and index.mako
 
     @web.expose
-    def export_archive( self, trans, id=None, gzip=True, include_hidden=False, include_deleted=False ):
+    def export_archive( self, trans, id=None, gzip=True, include_hidden=False, include_deleted=False, preview=False ):
         """ Export a history to an archive. """
         #
         # Get history to export.
@@ -1139,7 +1139,13 @@
         jeha = history.latest_export
         if jeha and jeha.up_to_date:
             if jeha.ready:
-                return self.serve_ready_history_export( trans, jeha )
+                if preview:
+                    url = url_for( controller='history', action="export_archive", id=id, qualified=True )
+                    return trans.show_message( "History Ready: '%(n)s'. Use this link to download \
+                                                the archive or import it to another Galaxy server: \
+                                                <a href='%(u)s'>%(u)s</a>" % ( { 'n' : history.name, 'u' : url } ) )
+                else:
+                    return self.serve_ready_history_export( trans, jeha )
             elif jeha.preparing:
                 return trans.show_message( "Still exporting history %(n)s; please check back soon. Link: <a href='%(s)s'>%(s)s</a>" \
                         % ( { 'n' : history.name, 's' : url_for( controller='history', action="export_archive", id=id, qualified=True ) } ) )

diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 templates/show_params.mako
--- a/templates/show_params.mako
+++ b/templates/show_params.mako
@@ -107,6 +107,7 @@
     <tbody><%
         encoded_hda_id = trans.security.encode_id( hda.id )
+        encoded_history_id = trans.security.encode_id( hda.history_id )
         %><tr><td>Name:</td><td>${hda.name | h}</td></tr><tr><td>Created:</td><td>${hda.create_time.strftime(trans.app.config.pretty_datetime_format)}</td></tr>
@@ -120,6 +121,7 @@
         <tr><td>Tool Standard Error:</td><td><a href="${h.url_for( controller='dataset', action='stderr', dataset_id=encoded_hda_id )}">stderr</a></td></tr><tr><td>Tool Exit Code:</td><td>${job.exit_code | h}</td></tr><tr><td>API ID:</td><td>${encoded_hda_id}</td></tr>
+        <tr><td>History ID:</td><td>${encoded_history_id}</td></tr>
         %if hda.dataset.uuid:
         <tr><td>UUID:</td><td>${hda.dataset.uuid}</td></tr>
         %endif

diff -r 6866bba8b7e28e25820ae0ca73c9159b91bd922e -r 2604f7623fb459f3cbc6b3e30551e1a8ce0ea1b2 templates/webapps/galaxy/root/index.mako
--- a/templates/webapps/galaxy/root/index.mako
+++ b/templates/webapps/galaxy/root/index.mako
@@ -105,7 +105,7 @@
                 galaxy_main.location = "${h.url_for( controller='history', action='citations' )}";
             },
             "${_("Export to File")}": function() {
-                galaxy_main.location = "${h.url_for( controller='history', action='export_archive' )}";
+                galaxy_main.location = "${h.url_for( controller='history', action='export_archive', preview=True )}";
             },
             "${_("Delete")}": function() {
                 if ( confirm( "Really delete the current history?" ) ) {

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: dannon: Merged in kellrott/galaxy-farm/update-hist-export (pull request #472)

commits-noreply＠bitbucket.org