[galaxy-commits] commit/galaxy-central: dan: Refactor installation of sample files from the Tool Shed into Galaxy. Data Tables and location files are now namespaced when installed from a ToolShed. Data Managers will write only to the location files that were installed along with them. Modify behavior of 'from_shed_config' in Data Tables so that it only applies within the toolshed application.

3 Jun 2013

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/6ae2d6a466b8/
Changeset:   6ae2d6a466b8
User:        dan
Date:        2013-06-03 22:31:11
Summary:     Refactor installation of sample files from the Tool Shed into Galaxy. Data Tables and location files are now namespaced when installed from a ToolShed. Data Managers will write only to the location files that were installed along with them. Modify behavior of 'from_shed_config' in Data Tables so that it only applies within the toolshed application.

TODO: directory structure inside of namespacing is still flat (matching previous behavior), we should allow keeping and utilizing directory hierarchy, especially when considering two same named files which exist under different directories in the repository.
Affected #:  9 files

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/galaxy/app.py

--- a/lib/galaxy/app.py
+++ b/lib/galaxy/app.py
@@ -92,7 +92,7 @@
         # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
         self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config,
                                                      tool_data_path=self.tool_data_tables.tool_data_path,
-                                                     from_shed_config=True )
+                                                     from_shed_config=False )
         # Initialize the job management configuration
         self.job_config = jobs.JobConfiguration(self)
         # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file.

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/galaxy/config.py
--- a/lib/galaxy/config.py
+++ b/lib/galaxy/config.py
@@ -65,6 +65,11 @@
         else:
             tcf = 'tool_conf.xml'
         self.tool_configs = [ resolve_path( p, self.root ) for p in listify( tcf ) ]
+        self.shed_tool_data_path = kwargs.get( "shed_tool_data_path", None )
+        if self.shed_tool_data_path:
+            self.shed_tool_data_path = resolve_path( self.shed_tool_data_path, self.root )
+        else:
+            self.shed_tool_data_path = self.tool_data_path
         self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root )
         self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root )
         self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) )

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/galaxy/tools/data/__init__.py
--- a/lib/galaxy/tools/data/__init__.py
+++ b/lib/galaxy/tools/data/__init__.py
@@ -13,9 +13,11 @@
 import tempfile
 
 from galaxy import util
+from galaxy.util.odict import odict
 
 log = logging.getLogger( __name__ )
 
+DEFAULT_TABLE_TYPE = 'tabular'
 
 class ToolDataTableManager( object ):
     """Manages a collection of tool data tables"""
@@ -26,9 +28,6 @@
         # at server startup. If tool shed repositories are installed that contain a valid file named tool_data_table_conf.xml.sample, entries
         # from that file are inserted into this dict at the time of installation.
         self.data_tables = {}
-        # Store config elements for on-the-fly persistence to the defined shed_tool_data_table_config file name.
-        self.shed_data_table_elems = []
-        self.data_table_elem_names = []
         if config_filename:
             self.load_from_config_file( config_filename, self.tool_data_path, from_shed_config=False )
 
@@ -58,23 +57,15 @@
         root = tree.getroot()
         table_elems = []
         for table_elem in root.findall( 'table' ):
-            type = table_elem.get( 'type', 'tabular' )
-            assert type in tool_data_table_types, "Unknown data table type '%s'" % type
+            table = ToolDataTable.from_elem( table_elem, tool_data_path, from_shed_config )
             table_elems.append( table_elem )
-            table_elem_name = table_elem.get( 'name', None )
-            if table_elem_name and table_elem_name not in self.data_table_elem_names:
-                self.data_table_elem_names.append( table_elem_name )
-                if from_shed_config:
-                    self.shed_data_table_elems.append( table_elem )
-            table = tool_data_table_types[ type ]( table_elem, tool_data_path, from_shed_config)
             if table.name not in self.data_tables:
                 self.data_tables[ table.name ] = table
                 log.debug( "Loaded tool data table '%s'", table.name )
             else:
-                for table_row in table.data:
-                    # FIXME: This does not account for an entry with the same unique build ID, but a different path.
-                    if table_row not in self.data_tables[ table.name ].data:
-                        self.data_tables[ table.name ].data.append( table_row )
+                log.debug( "Loading another instance of data table '%s', attempting to merge content.", table.name )
+                self.data_tables[ table.name ].merge_tool_data_table( table, allow_duplicates=False ) #only merge content, do not persist to disk, do not allow duplicate rows when merging
+                # FIXME: This does not account for an entry with the same unique build ID, but a different path.
         return table_elems
 
     def add_new_entries_from_config_file( self, config_filename, tool_data_path, shed_tool_data_table_config, persist=False ):
@@ -100,84 +91,98 @@
 
         """
         error_message = ''
-        table_elems = []
         try:
-            tree = util.parse_xml( config_filename )
-            root = tree.getroot()
+            table_elems = self.load_from_config_file( config_filename=config_filename,
+                                                      tool_data_path=tool_data_path,
+                                                      from_shed_config=True )
         except Exception, e:
             error_message = 'Error attempting to parse file %s: %s' % ( str( os.path.split( config_filename )[ 1 ] ), str( e ) )
             log.debug( error_message )
-            return table_elems, error_message
-        # Make a copy of the current list of data_table_elem_names so we can persist later if changes to the config file are necessary.
-        original_data_table_elem_names = [ name for name in self.data_table_elem_names ]
-        if root.tag == 'tables':
-            table_elems = self.load_from_config_file( config_filename=config_filename,
-                                                      tool_data_path=tool_data_path,
-                                                      from_shed_config=True )
-        else:
-            type = root.get( 'type', 'tabular' )
-            assert type in tool_data_table_types, "Unknown data table type '%s'" % type
-            table_elems.append( root )
-            table_elem_name = root.get( 'name', None )
-            if table_elem_name and table_elem_name not in self.data_table_elem_names:
-                self.data_table_elem_names.append( table_elem_name )
-                self.shed_data_table_elems.append( root )
-            table = tool_data_table_types[ type ]( root, tool_data_path )
-            if table.name not in self.data_tables:
-                self.data_tables[ table.name ] = table
-                log.debug( "Added new tool data table '%s'", table.name )
-        if persist and self.data_table_elem_names != original_data_table_elem_names:
+            table_elems = []
+        if persist:
             # Persist Galaxy's version of the changed tool_data_table_conf.xml file.
-            self.to_xml_file( shed_tool_data_table_config )
+            self.to_xml_file( shed_tool_data_table_config, table_elems )
         return table_elems, error_message
 
-    def to_xml_file( self, shed_tool_data_table_config ):
-        """Write the current in-memory version of the shed_tool_data_table_conf.xml file to disk."""
+    def to_xml_file( self, shed_tool_data_table_config, new_elems=None, remove_elems=None ):
+        """
+        Write the current in-memory version of the shed_tool_data_table_conf.xml file to disk.
+        remove_elems are removed before new_elems are added.
+        """
+        if not ( new_elems or remove_elems ):
+            log.debug( 'ToolDataTableManager.to_xml_file called without any elements to add or remove.' )
+            return #no changes provided, no need to persist any changes
+        if not new_elems:
+            new_elems = []
+        if not remove_elems:
+            remove_elems = []
         full_path = os.path.abspath( shed_tool_data_table_config )
-        fd, filename = tempfile.mkstemp()
-        os.write( fd, '<?xml version="1.0"?>\n' )
-        os.write( fd, '<tables>\n' )
-        for elem in self.shed_data_table_elems:
-            os.write( fd, '%s' % util.xml_to_string( elem ) )
-        os.write( fd, '</tables>\n' )
-        os.close( fd )
-        shutil.move( filename, full_path )
+        #FIXME: we should lock changing this file by other threads / head nodes
+        try:
+            tree = util.parse_xml( full_path )
+            root = tree.getroot()
+            out_elems = [ elem for elem in root ]
+        except Exception, e:
+            out_elems = []
+            log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e )
+        for elem in remove_elems:
+            #handle multiple occurrences of remove elem in existing elems
+            while elem in out_elems:
+                remove_elems.remove( elem )
+        #add new elems
+        out_elems.extend( new_elems )
+        with open( full_path, 'wb' ) as out:
+            out.write( '<?xml version="1.0"?>\n<tables>\n' )
+            for elem in out_elems:
+                out.write( util.xml_to_string( elem ) )
+            out.write( '</tables>\n' )
         os.chmod( full_path, 0644 )
 
+class ToolDataTable( object ):
 
-class ToolDataTable( object ):
+    @classmethod
+    def from_elem( cls, table_elem, tool_data_path, from_shed_config ):
+        table_type = table_elem.get( 'type', 'tabular' )
+        assert table_type in tool_data_table_types, "Unknown data table type '%s'" % type
+        return tool_data_table_types[ table_type ]( table_elem, tool_data_path, from_shed_config=from_shed_config )
 
     def __init__( self, config_element, tool_data_path, from_shed_config = False):
         self.name = config_element.get( 'name' )
         self.comment_char = config_element.get( 'comment_char' )
         self.empty_field_value = config_element.get( 'empty_field_value', '' )
         self.empty_field_values = {}
-        for file_elem in config_element.findall( 'file' ):
-            # There should only be one file_elem.
-            if 'path' in file_elem.attrib:
-                tool_data_file_path = file_elem.get( 'path' )
-                self.tool_data_file = os.path.split( tool_data_file_path )[1]
-            else:
-                self.tool_data_file = None
+        self.filenames = odict()
         self.tool_data_path = tool_data_path
         self.missing_index_file = None
         # increment this variable any time a new entry is added, or when the table is totally reloaded
         # This value has no external meaning, and does not represent an abstract version of the underlying data
         self._loaded_content_version = 1
-
+    
+    def _update_version( self ):
+        self._loaded_content_version += 1
+        return self._loaded_content_version
+    
     def get_empty_field_by_name( self, name ):
         return self.empty_field_values.get( name, self.empty_field_value )
     
-    def _add_entry( self, entry, persist=False, persist_on_error=False, **kwd ):
+    def _add_entry( self, entry, allow_duplicates=True, persist=False, persist_on_error=False, entry_source=None, **kwd ):
         raise NotImplementedError( "Abstract method" )
     
-    def add_entry( self, entry, persist=False, persist_on_error=False, **kwd ):
-        self._add_entry( entry, persist=persist, persist_on_error=persist_on_error, **kwd )
-        self._loaded_content_version += 1
+    def add_entry( self, entry, allow_duplicates=True, persist=False, persist_on_error=False, entry_source=None, **kwd ):
+        self._add_entry( entry, allow_duplicates=allow_duplicates, persist=persist, persist_on_error=persist_on_error, entry_source=entry_source, **kwd )
+        return self._update_version()
+    
+    def add_entries( self, entries, allow_duplicates=True, persist=False, persist_on_error=False, entry_source=None, **kwd ):
+        if entries:
+            for entry in entries:
+                self.add_entry( entry, allow_duplicates=allow_duplicates, persist=persist, persist_on_error=persist_on_error, entry_source=entry_source, **kwd )
         return self._loaded_content_version
     
     def is_current_version( self, other_version ):
         return self._loaded_content_version == other_version
+    
+    def merge_tool_data_table( self, other_table, allow_duplicates=True, persist=False, persist_on_error=False, entry_source=None, **kwd ):
+        raise NotImplementedError( "Abstract method" )
 
 class TabularToolDataTable( ToolDataTable ):
     """
@@ -196,6 +201,7 @@
 
     def __init__( self, config_element, tool_data_path, from_shed_config = False):
         super( TabularToolDataTable, self ).__init__( config_element, tool_data_path, from_shed_config)
+        self.data = []
         self.configure_and_load( config_element, tool_data_path, from_shed_config)
 
     def configure_and_load( self, config_element, tool_data_path, from_shed_config = False):
@@ -206,24 +212,37 @@
         self.comment_char = config_element.get( 'comment_char', '#' )
         # Configure columns
         self.parse_column_spec( config_element )
+        
+        #store repo info if available:
+        repo_elem = config_element.find( 'tool_shed_repository' )
+        if repo_elem is not None:
+            repo_info = dict( tool_shed=repo_elem.find( 'tool_shed' ).text, name=repo_elem.find( 'repository_name' ).text, 
+                              owner=repo_elem.find( 'repository_owner' ).text, installed_changeset_revision=repo_elem.find( 'installed_changeset_revision' ).text )
+        else:
+            repo_info = None
         # Read every file
-        all_rows = []
         for file_element in config_element.findall( 'file' ):
+            filename = file_path = file_element.get( 'path', None )
             found = False
+            if file_path is None:
+                log.debug( "Encountered a file element (%s) that does not contain a path value when loading tool data table '%s'.", util.xml_to_string( file_element ), self.name )
+                continue
+            
+            #FIXME: splitting on and merging paths from a configuration file when loading is wonky
+            # Data should exist on disk in the state needed, i.e. the xml configuration should
+            # point directly to the desired file to load. Munging of the tool_data_tables_conf.xml.sample
+            # can be done during installing / testing / metadata resetting with the creation of a proper
+            # tool_data_tables_conf.xml file, containing correct <file path=> attributes. Allowing a 
+            # path.join with a different root should be allowed, but splitting should not be necessary.
             if tool_data_path and from_shed_config:
                 # Must identify with from_shed_config as well, because the
                 # regular galaxy app has and uses tool_data_path.
                 # We're loading a tool in the tool shed, so we cannot use the Galaxy tool-data
                 # directory which is hard-coded into the tool_data_table_conf.xml entries.
-                filepath = file_element.get( 'path' )
-                filename = os.path.split( filepath )[ 1 ]
+                filename = os.path.split( file_path )[ 1 ]
                 filename = os.path.join( tool_data_path, filename )
-            else:
-               filename = file_element.get( 'path' )
             if os.path.exists( filename ):
                 found = True
-                all_rows.extend( self.parse_file_fields( open( filename ) ) )
-                self.filename = filename
             else:
                 # Since the path attribute can include a hard-coded path to a specific directory
                 # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value
@@ -233,14 +252,32 @@
                 if file_path and file_path != self.tool_data_path:
                     corrected_filename = os.path.join( self.tool_data_path, file_name )
                     if os.path.exists( corrected_filename ):
+                        filename = corrected_filename
                         found = True
-                        all_rows.extend( self.parse_file_fields( open( corrected_filename ) ) )
-                        self.filename = corrected_filename
-            if not found:
+            
+            if found:
+                self.data.extend( self.parse_file_fields( open( filename ) ) )
+                self._update_version()
+            else:
                 self.missing_index_file = filename
                 log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) )
-        self.data = all_rows
-
+            
+            if filename not in self.filenames or not self.filenames[ filename ][ 'found' ]:
+                self.filenames[ filename ] = dict( found=found, filename=filename, from_shed_config=from_shed_config, tool_data_path=tool_data_path, 
+                                                   config_element=config_element, tool_shed_repository=repo_info )
+            else:
+                log.debug( "Filename '%s' already exists in filenames (%s), not adding", filename, self.filenames.keys() )
+            
+    
+    def merge_tool_data_table( self, other_table, allow_duplicates=True, persist=False, persist_on_error=False, entry_source=None, **kwd ):
+        assert self.columns == other_table.columns, "Merging tabular data tables with non matching columns is not allowed: %s:%s != %s:%s" % ( self.name, self.columns, other_table.name, other_table.columns )
+        #merge filename info
+        for filename, info in other_table.filenames.iteritems():
+            if filename not in self.filenames:
+                self.filenames[ filename ] = info
+        #add data entries and return current data table version
+        return self.add_entries( other_table.data, allow_duplicates=allow_duplicates, persist=persist, persist_on_error=persist_on_error, entry_source=entry_source, **kwd )
+    
     def handle_found_index_file( self, filename ):
         self.missing_index_file = None
         self.data.extend( self.parse_file_fields( open( filename ) ) )
@@ -341,7 +378,7 @@
                 break
         return rval
     
-    def _add_entry( self, entry, persist=False, persist_on_error=False, **kwd ):
+    def _add_entry( self, entry, allow_duplicates=True, persist=False, persist_on_error=False, entry_source=None, **kwd ):
         #accepts dict or list of columns
         if isinstance( entry, dict ):
             fields = []
@@ -354,28 +391,53 @@
                 fields.append( field_value )
         else:
             fields = entry
+        is_error = False
         if self.largest_index < len( fields ):
             fields = self._replace_field_separators( fields )
-            self.data.append( fields )
-            field_len_error = False
+            if fields not in self.data or allow_duplicates:
+                self.data.append( fields )
+            else:
+                log.error( "Attempted to add fields (%s) to data table '%s', but this entry already exists and allow_duplicates is False.", fields, self.name )
+                is_error = True
         else:
             log.error( "Attempted to add fields (%s) to data table '%s', but there were not enough fields specified ( %i < %i ).", fields, self.name, len( fields ), self.largest_index + 1 )
-            field_len_error = True
-        if persist and ( not field_len_error or persist_on_error ):
-            #FIXME: Need to lock these files for editing
-            try:
-                data_table_fh = open( self.filename, 'r+b' )
-            except IOError, e:
-                log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s', self.filename, e )
-                data_table_fh = open( self.filename, 'wb' )
-            if os.stat( self.filename )[6] != 0:
-                # ensure last existing line ends with new line
-                data_table_fh.seek( -1, 2 ) #last char in file
-                last_char = data_table_fh.read( 1 )
-                if last_char not in [ '\n', '\r' ]:
-                    data_table_fh.write( '\n' )
-            data_table_fh.write( "%s\n" % ( self.separator.join( fields ) ) )
-        return not field_len_error
+            is_error = True
+        filename = None
+                    
+        if persist and ( not is_error or persist_on_error ):
+            if entry_source:
+                #if dict, assume is compatible info dict, otherwise call method
+                if isinstance( entry_source, dict ):
+                    source_repo_info = entry_source
+                else:
+                    source_repo_info = entry_source.get_tool_shed_repository_info_dict()
+            else:
+                source_repo_info = None
+            for name, value in self.filenames.iteritems():
+                repo_info = value.get( 'tool_shed_repository', None )
+                if ( not source_repo_info and not repo_info ) or ( source_repo_info and repo_info and source_repo_info == repo_info ):
+                    filename = name
+                    break
+            if filename is None:
+                #should we default to using any filename here instead?
+                log.error( "Unable to determine filename for persisting data table '%s' values: '%s'.", self.name, fields )
+                is_error = True
+            else:
+                #FIXME: Need to lock these files for editing
+                log.debug( "Persisting changes to file: %s", filename )
+                try:
+                    data_table_fh = open( filename, 'r+b' )
+                except IOError, e:
+                    log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s', self.filename, e )
+                    data_table_fh = open( filename, 'wb' )
+                if os.stat( filename )[6] != 0:
+                    # ensure last existing line ends with new line
+                    data_table_fh.seek( -1, 2 ) #last char in file
+                    last_char = data_table_fh.read( 1 )
+                    if last_char not in [ '\n', '\r' ]:
+                        data_table_fh.write( '\n' )
+                data_table_fh.write( "%s\n" % ( self.separator.join( fields ) ) )
+        return not is_error
     
     def _replace_field_separators( self, fields, separator=None, replace=None, comment_char=None ):
         #make sure none of the fields contain separator

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/galaxy/tools/data_manager/manager.py
--- a/lib/galaxy/tools/data_manager/manager.py
+++ b/lib/galaxy/tools/data_manager/manager.py
@@ -106,6 +106,7 @@
         self.output_ref_by_data_table = {}
         self.move_by_data_table_column = {}
         self.value_translation_by_data_table_column = {}
+        self.tool_shed_repository_info_dict = None
         if elem is not None:
             self.load_from_element( elem, tool_path or self.data_managers.tool_path )
     def load_from_element( self, elem, tool_path ):
@@ -126,6 +127,9 @@
             repository_name = tool_elem.find( 'repository_name' ).text
             repository_owner = tool_elem.find( 'repository_owner' ).text
             installed_changeset_revision = tool_elem.find( 'installed_changeset_revision' ).text
+            #save repository info here
+            self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision )
+            #get tool_shed repo id
             tool_shed_repository = suc.get_tool_shed_repository_by_shed_name_owner_installed_changeset_revision( self.data_managers.app, tool_shed, repository_name, repository_owner, installed_changeset_revision )
             tool_shed_repository_id = self.data_managers.app.security.encode_id( tool_shed_repository.id )
             #use shed_conf_file to determine tool_path
@@ -241,7 +245,7 @@
                     if name in output_ref_values:
                         moved = self.process_move( data_table_name, name, output_ref_values[ name ].extra_files_path, **data_table_value )
                         data_table_value[ name ] = self.process_value_translation( data_table_name, name, **data_table_value )
-                data_table.add_entry( data_table_value, persist=True )
+                data_table.add_entry( data_table_value, persist=True, entry_source=self )
         
         for data_table_name, data_table_values in data_tables_dict.iteritems():
             #tool returned extra data table entries, but data table was not declared in data manager
@@ -289,3 +293,6 @@
             value_translation = self.value_translation_by_data_table_column[ data_table_name ][ column_name ]
             value = fill_template( value_translation, GALAXY_DATA_MANAGER_DATA_PATH=self.data_managers.app.config.galaxy_data_manager_data_path, **kwd  )
         return value
+    
+    def get_tool_shed_repository_info_dict( self ):
+        return self.tool_shed_repository_info_dict

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/galaxy/util/__init__.py
--- a/lib/galaxy/util/__init__.py
+++ b/lib/galaxy/util/__init__.py
@@ -137,8 +137,60 @@
 def xml_to_string( elem, pretty=False ):
     """Returns a string from an xml tree"""
     if pretty:
-        return ElementTree.tostring( pretty_print_xml( elem ) )
-    return ElementTree.tostring( elem )
+        elem = pretty_print_xml( elem )
+    try:
+        return ElementTree.tostring( elem )
+    except TypeError, e:
+        #assume this is a comment
+        if hasattr( elem, 'text' ):
+            return "<!-- %s -->\n" % ( elem.text )
+        else:
+            raise e
+
+def xml_element_compare( elem1, elem2 ):
+    if not isinstance( elem1, dict ):
+        elem1 = xml_element_to_dict( elem1 )
+    if not isinstance( elem2, dict ):
+        elem2 = xml_element_to_dict( elem2 )
+    return elem1 == elem2
+
+def xml_element_list_compare( elem_list1, elem_list2 ):
+    return [ xml_element_to_dict( elem ) for elem in elem_list1  ] == [ xml_element_to_dict( elem ) for elem in elem_list2  ]
+
+def xml_element_to_dict( elem ):
+    rval = {}
+    if elem.attrib:
+        rval[ elem.tag ] = {}
+    else:
+        rval[ elem.tag ] = None
+    
+    sub_elems = list( elem )
+    if sub_elems:
+        sub_elem_dict = dict()
+        for sub_sub_elem_dict in map( xml_element_to_dict, sub_elems ):
+            for key, value in sub_sub_elem_dict.iteritems():
+                if key not in sub_elem_dict:
+                    sub_elem_dict[ key ] = []
+                sub_elem_dict[ key ].append( value )
+        for key, value in sub_elem_dict.iteritems():
+            if len( value ) == 1:
+                rval[ elem.tag ][ k ] = value[0]
+            else:
+                rval[ elem.tag ][ k ] = value
+    if elem.attrib:
+        for key, value in elem.attrib.iteritems():
+            rval[ elem.tag ][ "@%s" % key ] = value
+    
+    if elem.text:
+        text = elem.text.strip()
+        if text and sub_elems or elem.attrib:
+            rval[ elem.tag ][ '#text' ] = text
+        else:
+            rval[ elem.tag ] = text
+    
+    return rval
+
+
 
 def pretty_print_xml( elem, level=0 ):
     pad = '    '
@@ -287,7 +339,6 @@
     elif isinstance( value, list ):
         return map(sanitize_text, value)
     else:
-        print value
         raise Exception, 'Unknown parameter type (%s)' % ( type( value ) )
 
 valid_filename_chars = set( string.ascii_letters + string.digits + '_.' )

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/tool_shed/galaxy_install/repository_util.py
--- a/lib/tool_shed/galaxy_install/repository_util.py
+++ b/lib/tool_shed/galaxy_install/repository_util.py
@@ -283,6 +283,12 @@
     trans.sa_session.flush()
     if 'tool_dependencies' in metadata_dict and not reinstalling:
         tool_dependencies = tool_dependency_util.create_tool_dependency_objects( trans.app, tool_shed_repository, relative_install_dir, set_status=True )
+    if 'sample_files' in metadata_dict:
+        sample_files = metadata_dict.get( 'sample_files', [] )
+        tool_index_sample_files = tool_util.get_tool_index_sample_files( sample_files )
+        tool_data_table_conf_filename, tool_data_table_elems = tool_util.install_tool_data_tables( trans.app, tool_shed_repository, tool_index_sample_files )
+        if tool_data_table_elems:
+            trans.app.tool_data_tables.add_new_entries_from_config_file( tool_data_table_conf_filename, None, trans.app.config.shed_tool_data_table_config, persist=True )
     if 'tools' in metadata_dict:
         tool_panel_dict = tool_util.generate_tool_panel_dict_for_new_install( metadata_dict[ 'tools' ], tool_section )
         sample_files = metadata_dict.get( 'sample_files', [] )
@@ -483,7 +489,7 @@
                 message += "from the installed repository's <b>Repository Actions</b> menu.  "
                 status = 'error'
         if install_tool_dependencies and tool_shed_repository.tool_dependencies and 'tool_dependencies' in metadata:
-            work_dir = tempfile.mkdtemp()
+            work_dir = tempfile.mkdtemp( prefix="tmp-toolshed-itsr" )
             # Install tool dependencies.
             suc.update_tool_shed_repository_status( trans.app,
                                                     tool_shed_repository,

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/tool_shed/util/metadata_util.py
--- a/lib/tool_shed/util/metadata_util.py
+++ b/lib/tool_shed/util/metadata_util.py
@@ -571,12 +571,12 @@
         app.config.tool_data_table_config_path = repository_files_dir
     else:
         # Use a temporary working directory to copy all sample files.
-        work_dir = tempfile.mkdtemp()
+        work_dir = tempfile.mkdtemp( prefix="tmp-toolshed-gmfcr" )
         # All other files are on disk in the repository's repo_path, which is the value of relative_install_dir.
         files_dir = relative_install_dir
         if shed_config_dict.get( 'tool_path' ):
             files_dir = os.path.join( shed_config_dict[ 'tool_path' ], files_dir )
-        app.config.tool_data_path = work_dir
+        app.config.tool_data_path = work_dir #FIXME: Thread safe?
         app.config.tool_data_table_config_path = work_dir
     # Handle proprietary datatypes, if any.
     datatypes_config = suc.get_config_from_disk( 'datatypes_conf.xml', files_dir )
@@ -598,7 +598,7 @@
             new_table_elems, error_message = app.tool_data_tables.add_new_entries_from_config_file( config_filename=sample_file,
                                                                                                     tool_data_path=app.config.tool_data_path,
                                                                                                     shed_tool_data_table_config=app.config.shed_tool_data_table_config,
-                                                                                                    persist=persist )
+                                                                                                    persist=False )
             if error_message:
                 invalid_file_tups.append( ( filename, error_message ) )
     for root, dirs, files in os.walk( files_dir ):
@@ -1584,7 +1584,7 @@
     invalid_file_tups = []
     home_dir = os.getcwd()
     for changeset in repo.changelog:
-        work_dir = tempfile.mkdtemp()
+        work_dir = tempfile.mkdtemp( prefix="tmp-toolshed-ramorits" )
         current_changeset_revision = str( repo.changectx( changeset ) )
         ctx = repo.changectx( changeset )
         log.debug( "Cloning repository changeset revision: %s", str( ctx.rev() ) )

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/tool_shed/util/shed_util_common.py
--- a/lib/tool_shed/util/shed_util_common.py
+++ b/lib/tool_shed/util/shed_util_common.py
@@ -189,7 +189,7 @@
 
 def config_elems_to_xml_file( app, config_elems, config_filename, tool_path ):
     """Persist the current in-memory list of config_elems to a file named by the value of config_filename."""
-    fd, filename = tempfile.mkstemp()
+    fd, filename = tempfile.mkstemp( prefix="tmp-toolshed-cetxf"  )
     os.write( fd, '<?xml version="1.0"?>\n' )
     os.write( fd, '<toolbox tool_path="%s">\n' % str( tool_path ) )
     for elem in config_elems:
@@ -294,6 +294,32 @@
     # Don't include the changeset_revision in clone urls.
     return url_join( toolshed, 'repos', owner, name )
 
+def generate_repository_info_elem( tool_shed, repository_name, changeset_revision, owner, parent_elem=None, **kwd ):
+    """Create and return an ElementTree repository info Element."""
+    if parent_elem is None:
+        elem = XmlET.Element( 'tool_shed_repository' )
+    else:
+        elem = XmlET.SubElement( parent_elem, 'tool_shed_repository' )
+    
+    tool_shed_elem = XmlET.SubElement( elem, 'tool_shed' )
+    tool_shed_elem.text = tool_shed
+    repository_name_elem = XmlET.SubElement( elem, 'repository_name' )
+    repository_name_elem.text = repository_name
+    repository_owner_elem = XmlET.SubElement( elem, 'repository_owner' )
+    repository_owner_elem.text = owner
+    changeset_revision_elem = XmlET.SubElement( elem, 'installed_changeset_revision' )
+    changeset_revision_elem.text = changeset_revision
+    #add additional values
+    #TODO: enhance additional values to allow e.g. use of dict values that will recurse
+    for key, value in kwd.iteritems():
+        new_elem = XmlET.SubElement( elem, key )
+        new_elem.text = value
+    return elem
+    
+def generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=None, **kwd ):
+    return generate_repository_info_elem( tool_shed_repository.tool_shed, tool_shed_repository.name, tool_shed_repository.installed_changeset_revision, tool_shed_repository.owner, parent_elem=parent_elem, **kwd )
+    
+
 def generate_sharable_link_for_repository_in_tool_shed( trans, repository, changeset_revision=None ):
     """Generate the URL for sharing a repository that is in the tool shed."""
     base_url = url_for( '/', qualified=True ).rstrip( '/' )
@@ -546,7 +572,7 @@
                 fctx = None
                 continue
             if fctx:
-                fh = tempfile.NamedTemporaryFile( 'wb', dir=dir )
+                fh = tempfile.NamedTemporaryFile( 'wb', prefix="tmp-toolshed-gntfc", dir=dir )
                 tmp_filename = fh.name
                 fh.close()
                 fh = open( tmp_filename, 'wb' )

diff -r 1c7ca04c86393c3f45df48514ac153cb64b4695e -r 6ae2d6a466b8f721aa39d3e183b36594d7b4e235 lib/tool_shed/util/tool_util.py
--- a/lib/tool_shed/util/tool_util.py
+++ b/lib/tool_shed/util/tool_util.py
@@ -416,7 +416,7 @@
                     # The ctx_file may have been moved in the change set.  For example, 'ncbi_blastp_wrapper.xml' was moved to
                     # 'tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml', so keep looking for the file until we find the new location.
                     continue
-                fh = tempfile.NamedTemporaryFile( 'wb' )
+                fh = tempfile.NamedTemporaryFile( 'wb', prefix="tmp-toolshed-gltcrfrm" )
                 tmp_filename = fh.name
                 fh.close()
                 fh = open( tmp_filename, 'wb' )
@@ -538,8 +538,7 @@
         # The repository must contain a tool_data_table_conf.xml.sample file that includes all required entries for all tools in the repository.
         sample_tool_data_table_conf = suc.get_config_from_disk( 'tool_data_table_conf.xml.sample', relative_install_dir )
         if sample_tool_data_table_conf:
-            # Add entries to the ToolDataTableManager's in-memory data_tables dictionary as well as the list of data_table_elems and the list of
-            # data_table_elem_names.
+            # Add entries to the ToolDataTableManager's in-memory data_tables dictionary.
             error, message = handle_sample_tool_data_table_conf_file( app, sample_tool_data_table_conf, persist=True )
             if error:
                 # TODO: Do more here than logging an exception.
@@ -706,6 +705,60 @@
                 sa_session.add( tool_version_association )
                 sa_session.flush()
 
+def install_tool_data_tables( app, tool_shed_repository, tool_index_sample_files ):
+    """Only ever called from Galaxy end when installing"""
+    TOOL_DATA_TABLE_FILE_NAME = 'tool_data_table_conf.xml'
+    TOOL_DATA_TABLE_FILE_SAMPLE_NAME = '%s.sample' % ( TOOL_DATA_TABLE_FILE_NAME )
+    SAMPLE_SUFFIX = '.sample'
+    SAMPLE_SUFFIX_OFFSET = -len( SAMPLE_SUFFIX )
+    tool_path, relative_target_dir = tool_shed_repository.get_tool_relative_path( app )
+    target_dir = os.path.join( app.config.shed_tool_data_path, relative_target_dir ) #this is where index files will reside on a per repo/installed version
+    if not os.path.exists( target_dir ):
+        os.makedirs( target_dir )
+    for sample_file in tool_index_sample_files:
+        path, filename = os.path.split ( sample_file )
+        target_filename = filename
+        if target_filename.endswith( SAMPLE_SUFFIX ):
+            target_filename = target_filename[ : SAMPLE_SUFFIX_OFFSET ]
+        source_file = os.path.join( tool_path, sample_file )
+        #we're not currently uninstalling index files, do not overwrite existing files
+        target_path_filename = os.path.join( target_dir, target_filename )
+        if not os.path.exists( target_path_filename ) or target_filename == TOOL_DATA_TABLE_FILE_NAME:
+            shutil.copy2( source_file, target_path_filename )
+        else:
+            log.debug( "Did not copy sample file '%s' to install directory '%s' because file already exists.", filename, target_dir )
+        #for provenance and to simplify introspection, lets keep the original data table sample file around
+        if filename == TOOL_DATA_TABLE_FILE_SAMPLE_NAME:
+            shutil.copy2( source_file, os.path.join( target_dir, filename ) )
+    tool_data_table_conf_filename = os.path.join( target_dir, TOOL_DATA_TABLE_FILE_NAME )
+    elems = []
+    if os.path.exists( tool_data_table_conf_filename ):
+        tree, error_message = xml_util.parse_xml( tool_data_table_conf_filename )
+        if tree:
+            for elem in tree.getroot():
+                #append individual table elems or other elemes, but not tables elems
+                if elem.tag == 'tables':
+                    for table_elem in elems:
+                        elems.append( elem )
+                else:
+                    elems.append( elem )
+    else:
+        log.debug( "The '%s' data table file was not found, but was expected to be copied from '%s' during repository installation.", tool_data_table_conf_filename, TOOL_DATA_TABLE_FILE_SAMPLE_NAME )
+    for elem in elems:
+        if elem.tag == 'table':
+            for file_elem in elem.findall( 'file' ):
+                path = file_elem.get( 'path', None )
+                if path:
+                    file_elem.set( 'path', os.path.normpath( os.path.join( target_dir, os.path.split( path )[1] ) ) )
+            #store repository info in the table tagset for traceability
+            repo_elem = suc.generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=elem )
+    if elems:
+        os.unlink( tool_data_table_conf_filename ) #remove old data_table
+        app.tool_data_tables.to_xml_file( tool_data_table_conf_filename, elems ) #persist new data_table content
+    
+    return tool_data_table_conf_filename, elems
+    
+
 def is_column_based( fname, sep='\t', skip=0, is_multi_byte=False ):
     """See if the file is column based with respect to a separator."""
     headers = get_headers( fname, sep, is_multi_byte=is_multi_byte )
@@ -763,7 +816,7 @@
     tool = None
     can_use_disk_file = False
     tool_config_filepath = suc.get_absolute_path_to_file_in_repository( repo_files_dir, tool_config_filename )
-    work_dir = tempfile.mkdtemp()
+    work_dir = tempfile.mkdtemp( prefix="tmp-toolshed-ltfcr" )
     can_use_disk_file = can_use_tool_config_disk_file( trans, repository, repo, tool_config_filepath, changeset_revision )
     if can_use_disk_file:
         trans.app.config.tool_data_path = work_dir

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org