# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks@emory.edu> # Date 1285793202 14400 # Node ID 516a02525898381be3a9af8f3ffec205133dbda1 # Parent a9694088cf69e31fbd3f940cca497e1d2e366f75 Move export_history.py script into lib/galaxy/tools/imp_exp so that it's simpler to execute. --- /dev/null +++ b/lib/galaxy/tools/imp_exp/export_history.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +""" +Export a history to an archive file using attribute files. + +usage: %prog history_attrs dataset_attrs job_attrs out_file + -G, --gzip: gzip archive file +""" + +from galaxy import eggs +from galaxy.util.json import * +import optparse, sys, os, tempfile, tarfile + +def create_archive( history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False ): + """ Create archive from the given attribute/metadata files and save it to out_file. """ + tarfile_mode = "w" + if gzip: + tarfile_mode += ":gz" + try: + + history_archive = tarfile.open( out_file, tarfile_mode ) + + # Read datasets attributes from file. + datasets_attr_in = open( datasets_attrs_file, 'rb' ) + datasets_attr_str = '' + buffsize = 1048576 + try: + while True: + datasets_attr_str += datasets_attr_in.read( buffsize ) + if not datasets_attr_str or len( datasets_attr_str ) % buffsize != 0: + break + except OverflowError: + pass + datasets_attr_in.close() + datasets_attrs = from_json_string( datasets_attr_str ) + + # Add datasets to archive and update dataset attributes. + # TODO: security check to ensure that files added are in Galaxy dataset directory? + for dataset_attrs in datasets_attrs: + dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name. + dataset_archive_name = os.path.join( "datasets", os.path.split( dataset_file_name )[-1] ) + history_archive.add( dataset_file_name, arcname=dataset_archive_name ) + # Update dataset filename to be archive name. + dataset_attrs[ 'file_name' ] = dataset_archive_name + + # Rewrite dataset attributes file. + datasets_attrs_out = open( datasets_attrs_file, 'w' ) + datasets_attrs_out.write( to_json_string( datasets_attrs ) ) + datasets_attrs_out.close() + + # Finish archive. + history_archive.add( history_attrs_file, arcname="history_attrs.txt" ) + history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" ) + history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" ) + history_archive.close() + + # Status. + return 'Created history archive.' + except Exception, e: + return 'Error creating history archive: %s' % str( e ), sys.stderr + +if __name__ == "__main__": + # Parse command line. + parser = optparse.OptionParser() + parser.add_option( '-G', '--gzip', dest='gzip', action="store_true", help='Compress archive using gzip.' ) + (options, args) = parser.parse_args() + gzip = bool( options.gzip ) + history_attrs, dataset_attrs, job_attrs, out_file = args + + # Create archive. + status = create_archive( history_attrs, dataset_attrs, job_attrs, out_file, gzip ) + print status --- a/lib/galaxy/tools/imp_exp/__init__.py +++ b/lib/galaxy/tools/imp_exp/__init__.py @@ -199,9 +199,10 @@ class JobExportHistoryArchiveWrapper( ob options = "" if jeha.compressed: options = "-G" - return "%s %s %s %s %s %s" % ( os.path.join( os.path.abspath( os.getcwd() ), "export_history.sh" ), \ - options, history_attrs_filename, datasets_attrs_filename, \ - jobs_attrs_filename, jeha.dataset.file_name ) + return "python %s %s %s %s %s %s" % ( + os.path.join( os.path.abspath( os.getcwd() ), "lib/galaxy/tools/imp_exp/export_history.py" ), \ + options, history_attrs_filename, datasets_attrs_filename, jobs_attrs_filename, \ + jeha.dataset.file_name ) def cleanup_after_job( self, db_session ): """ Remove temporary directory and attribute files generated during setup for this job. """ --- a/scripts/export_history.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -""" -Export a history to an archive file using attribute files. - -NOTE: This should not be called directly. Use the set_metadata.sh script in Galaxy's -top level directly. - -usage: %prog history_attrs dataset_attrs job_attrs out_file - -G, --gzip: gzip archive file - -D, --delete_dir: delete attribute files' directory if operation completed successfully -""" -from galaxy import eggs -import os, pkg_resources, tempfile, tarfile -pkg_resources.require( "bx-python" ) -import sys, traceback, fileinput -from galaxy.util.json import * -from warnings import warn -from bx.cookbook import doc_optparse - -def create_archive( history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False ): - """ Create archive from the given attribute/metadata files and save it to out_file. """ - tarfile_mode = "w" - if gzip: - tarfile_mode += ":gz" - try: - - history_archive = tarfile.open( out_file, tarfile_mode ) - - # Read datasets attributes from file. - datasets_attr_in = open( datasets_attrs_file, 'rb' ) - datasets_attr_str = '' - buffsize = 1048576 - try: - while True: - datasets_attr_str += datasets_attr_in.read( buffsize ) - if not datasets_attr_str or len( datasets_attr_str ) % buffsize != 0: - break - except OverflowError: - pass - datasets_attr_in.close() - datasets_attrs = from_json_string( datasets_attr_str ) - - # Add datasets to archive and update dataset attributes. - # TODO: security check to ensure that files added are in Galaxy dataset directory? - for dataset_attrs in datasets_attrs: - dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name. - dataset_archive_name = os.path.join( "datasets", os.path.split( dataset_file_name )[-1] ) - history_archive.add( dataset_file_name, arcname=dataset_archive_name ) - # Update dataset filename to be archive name. - dataset_attrs[ 'file_name' ] = dataset_archive_name - - # Rewrite dataset attributes file. - datasets_attrs_out = open( datasets_attrs_file, 'w' ) - datasets_attrs_out.write( to_json_string( datasets_attrs ) ) - datasets_attrs_out.close() - - # Finish archive. - history_archive.add( history_attrs_file, arcname="history_attrs.txt" ) - history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" ) - history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" ) - history_archive.close() - - # Status. - return 'Created history archive.' - except Exception, e: - return 'Error creating history archive: %s' + str( e ), sys.stderr - -if __name__ == "__main__": - # Parse command line. - options, args = doc_optparse.parse( __doc__ ) - try: - gzip = bool( options.gzip ) - delete_dir = bool( options.delete_dir ) - history_attrs, dataset_attrs, job_attrs, out_file = args - except: - doc_optparse.exception() - - # Create archive. - status = create_archive( history_attrs, dataset_attrs, job_attrs, out_file, gzip ) - print status --- a/lib/galaxy/tools/actions/history_imp_exp.py +++ b/lib/galaxy/tools/actions/history_imp_exp.py @@ -54,9 +54,6 @@ class ExportHistoryToolAction( ToolActio job_wrapper = JobExportHistoryArchiveWrapper( job ) cmd_line = job_wrapper.setup_job( trans, jeha, include_hidden=incoming[ 'include_hidden' ], \ include_deleted=incoming[ 'include_deleted' ] ) - - # Save jeha. - trans.sa_session.flush() # # Add parameters to job_parameter table. --- a/export_history.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -cd `dirname $0` -python ./scripts/export_history.py $@