2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/c691188b11ad/ changeset: c691188b11ad user: kellrott date: 2012-10-28 08:28:42 summary: Adding the initial componenents of an extended metadata system for datasets. The idea behind an 'extended metadata' for datasets is to provide a mechanism to encode and index user generated metadata and keep it attached to files. Users can provide a JSON style data structure which will then be indexed and stored. Later this index can be used to search and discover datasets via a pseudo-formal schema. This patch introduces the tables, and makes it possible to inject user metadata into a file load using the 'extended_metadata' field in the query. affected #: 5 files diff -r ecd131b136d00d5753992061d12b12156be8b277 -r c691188b11ad81ae21d1135501a5ede81135bffa lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -977,7 +977,7 @@ permitted_actions = Dataset.permitted_actions def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, tool_version=None, extension=None, dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None, - parent_id=None, validation_errors=None, visible=True, create_dataset=False, sa_session=None ): + parent_id=None, validation_errors=None, visible=True, create_dataset=False, sa_session=None, extended_metadata=None ): self.name = name or "Unnamed dataset" self.id = id self.info = info @@ -987,6 +987,7 @@ self.extension = extension self.designation = designation self.metadata = metadata or dict() + self.extended_metadata = extended_metadata if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata' self.dbkey = dbkey self.deleted = deleted @@ -1891,6 +1892,18 @@ ldda_name = unicode( ldda_name, 'utf-8' ) return ldda_name +class ExtendedMetadata( object ): + def __init__(self, data): + self.data = data + + +class ExtendedMetadataIndex( object ): + def __init__( self, extended_metadata, path, value): + self.extended_metadata = extended_metadata + self.path = path + self.value = value + + class LibraryInfoAssociation( object ): def __init__( self, library, form_definition, info, inheritable=False ): self.library = library diff -r ecd131b136d00d5753992061d12b12156be8b277 -r c691188b11ad81ae21d1135501a5ede81135bffa lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -323,7 +323,24 @@ Column( "deleted", Boolean, index=True, default=False ), Column( "visible", Boolean ), Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), - Column( "message", TrimmedString( 255 ) ) ) + Column( "message", TrimmedString( 255 ) ), + Column( "extended_metadata_id", Integer, + ForeignKey( "extended_metadata.id" ), index=True ) + ) + + +ExtendedMetadata.table = Table("extended_metadata", metadata, + Column( "id", Integer, primary_key=True ), + Column( "data", JSONType ) ) + +ExtendedMetadataIndex.table = Table("extended_metadata_index", metadata, + Column( "id", Integer, primary_key=True ), + Column( "extended_metadata_id", Integer, ForeignKey("extended_metadata.id", + onupdate="CASCADE", + ondelete="CASCADE" ), + index=True ), + Column( "path", String( 255 )), + Column( "value", TEXT)) Library.table = Table( "library", metadata, Column( "id", Integer, primary_key=True ), @@ -1436,6 +1453,28 @@ ) ) +assign_mapper(context, ExtendedMetadata, ExtendedMetadata.table, + properties=dict( + children=relation( + ExtendedMetadataIndex, + primaryjoin=( ExtendedMetadataIndex.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ), + backref=backref("parent", + primaryjoin=( ExtendedMetadataIndex.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ) + ) + ) + ) +) + +assign_mapper(context, ExtendedMetadataIndex, ExtendedMetadataIndex.table, + properties=dict( + extended_metadata=relation( + ExtendedMetadata, + primaryjoin=( ( ExtendedMetadataIndex.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ) ) + ) + ) +) + + assign_mapper( context, LibraryInfoAssociation, LibraryInfoAssociation.table, properties=dict( library=relation( Library, primaryjoin=( ( LibraryInfoAssociation.table.c.library_id == Library.table.c.id ) & ( not_( LibraryInfoAssociation.table.c.deleted ) ) ), backref="info_association" ), @@ -1515,8 +1554,12 @@ backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), visible_children=relation( LibraryDatasetDatasetAssociation, - primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ) - ) ) + primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ), + extended_metadata=relation( + ExtendedMetadata, + primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ) ) + ) + )) assign_mapper( context, LibraryDatasetDatasetInfoAssociation, LibraryDatasetDatasetInfoAssociation.table, properties=dict( library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, diff -r ecd131b136d00d5753992061d12b12156be8b277 -r c691188b11ad81ae21d1135501a5ede81135bffa lib/galaxy/model/migrate/versions/0108_add_extended_metadata.py --- /dev/null +++ b/lib/galaxy/model/migrate/versions/0108_add_extended_metadata.py @@ -0,0 +1,75 @@ +""" +Add the ExtendedMetadata and ExtendedMetadataIndex tables +""" + +from sqlalchemy import * +from sqlalchemy.orm import * +from migrate import * +from migrate.changeset import * +from galaxy.model.custom_types import JSONType + +import logging +log = logging.getLogger( __name__ ) + +metadata = MetaData( migrate_engine ) +#db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) ) + + +ExtendedMetadata_table = Table("extended_metadata", metadata, + Column( "id", Integer, primary_key=True ), + Column( "data", JSONType ) ) + +ExtendedMetadataIndex_table = Table("extended_metadata_index", metadata, + Column( "id", Integer, primary_key=True ), + Column( "extended_metadata_id", Integer, ForeignKey("extended_metadata.id", + onupdate="CASCADE", + ondelete="CASCADE" ), + index=True ), + Column( "path", String( 255 )), + Column( "value", TEXT)) + +extended_metadata_ldda_col = Column( "extended_metadata_id", Integer, ForeignKey("extended_metadata.id"), nullable=True ) + + +def display_migration_details(): + print "" + print "This migration script adds a 'handler' column to the Job table." + +def upgrade(): + print __doc__ + metadata.reflect() + try: + ExtendedMetadata_table.create() + except: + log.debug("Could not create ExtendedMetadata Table.") + try: + ExtendedMetadataIndex_table.create() + except: + log.debug("Could not create ExtendedMetadataIndex Table.") + + + # Add the extended_metadata_id to the ldda table + try: + ldda_table = Table( "library_dataset_dataset_association", metadata, autoload=True ) + extended_metadata_ldda_col.create( ldda_table ) + assert extended_metadata_ldda_col is ldda_table.c.extended_metadata_id + except Exception, e: + print str(e) + log.error( "Adding column 'extended_metadata_id' to library_dataset_dataset_association table failed: %s" % str( e ) ) + return + + +def downgrade(): + metadata.reflect() + ExtendedMetadata_table.drop() + ExtendedMetadataIndex_table.drop() + + # Drop the Job table's exit_code column. + try: + job_table = Table( "library_dataset_dataset_association", metadata, autoload=True ) + extended_metadata_id = job_table.c.extended_metadata_id + extended_metadata_id.drop() + except Exception, e: + log.debug( "Dropping 'extended_metadata_id' column from library_dataset_dataset_association table failed: %s" % ( str( e ) ) ) + + diff -r ecd131b136d00d5753992061d12b12156be8b277 -r c691188b11ad81ae21d1135501a5ede81135bffa lib/galaxy/webapps/galaxy/api/library_contents.py --- a/lib/galaxy/webapps/galaxy/api/library_contents.py +++ b/lib/galaxy/webapps/galaxy/api/library_contents.py @@ -7,6 +7,7 @@ from galaxy.web.base.controller import * from galaxy.util.sanitize_html import sanitize_html from galaxy.model.orm import * +from galaxy.model import ExtendedMetadata, ExtendedMetadataIndex log = logging.getLogger( __name__ ) @@ -114,6 +115,13 @@ return str( e ) # The rest of the security happens in the library_common controller. real_folder_id = trans.security.encode_id( parent.id ) + + #check for extended metadata, store it and pop it out of the param + #otherwise sanitize_param will have a fit + ex_meta_payload = None + if 'extended_metadata' in payload: + ex_meta_payload = payload.pop('extended_metadata') + # Now create the desired content object, either file or folder. if create_type == 'file': status, output = trans.webapp.controllers['library_common'].upload_library_dataset( trans, 'api', library_id, real_folder_id, **payload ) @@ -125,6 +133,19 @@ else: rval = [] for k, v in output.items(): + if ex_meta_payload is not None: + """ + If there is extended metadata, store it, attach it to the dataset, and index it + """ + ex_meta = ExtendedMetadata(ex_meta_payload) + trans.sa_session.add( ex_meta ) + v.extended_metadata = ex_meta + trans.sa_session.add(v) + trans.sa_session.flush() + for path, value in self._scan_json_block(ex_meta_payload): + meta_i = ExtendedMetadataIndex(ex_meta, path, value) + trans.sa_session.add(meta_i) + trans.sa_session.flush() if type( v ) == trans.app.model.LibraryDatasetDatasetAssociation: v = v.library_dataset encoded_id = trans.security.encode_id( v.id ) @@ -135,6 +156,33 @@ url = url_for( 'library_content', library_id=library_id, id=encoded_id ) ) ) return rval + def _scan_json_block(self, meta, prefix=""): + """ + Scan a json style data structure, and emit all fields and their values. + Example paths + + Data + { "data" : [ 1, 2, 3 ] } + + Path: + /data == [1,2,3] + + /data/[0] == 1 + + """ + if isinstance(meta, dict): + for a in meta: + for path, value in self._scan_json_block(meta[a], prefix + "/" + a): + yield path, value + elif isinstance(meta, list): + for i, a in enumerate(meta): + for path, value in self._scan_json_block(a, prefix + "[%d]" % (i)): + yield path, value + else: + #BUG: Everything is cast to string, which can lead to false positives + #for cross type comparisions, ie "True" == True + yield prefix, str(meta) + @web.expose_api def update( self, trans, id, library_id, payload, **kwd ): """ diff -r ecd131b136d00d5753992061d12b12156be8b277 -r c691188b11ad81ae21d1135501a5ede81135bffa scripts/api/load_data_with_metadata.py --- /dev/null +++ b/scripts/api/load_data_with_metadata.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +""" + +This script scans a directory for files with companion '.json' files, then loads +the data from the file, and attaches the .json contents using the 'extended_metadata' +system in the library + +Sample call: +python load_data_with_metadata.py <api_key><api_url> /tmp/g_inbox/ /tmp/g_inbox/done/ "API Imports" + +NOTE: The upload method used requires the data library filesystem upload allow_library_path_paste +""" +import os +import shutil +import sys +import json +import time +sys.path.insert( 0, os.path.dirname( __file__ ) ) +from common import submit, display + +def main(api_key, api_url, in_folder, out_folder, data_library): + # Find/Create data library with the above name. Assume we're putting datasets in the root folder '/' + libs = display(api_key, api_url + 'libraries', return_formatted=False) + library_id = None + for library in libs: + if library['name'] == data_library: + library_id = library['id'] + if not library_id: + lib_create_data = {'name':data_library} + library = submit(api_key, api_url + 'libraries', lib_create_data, return_formatted=False) + library_id = library[0]['id'] + folders = display(api_key, api_url + "libraries/%s/contents" % library_id, return_formatted = False) + for f in folders: + if f['name'] == "/": + library_folder_id = f['id'] + if not library_id or not library_folder_id: + print "Failure to configure library destination." + sys.exit(1) + # Watch in_folder, upload anything that shows up there to data library and get ldda, + # invoke workflow, move file to out_folder. + for fname in os.listdir(in_folder): + fullpath = os.path.join(in_folder, fname) + if os.path.isfile(fullpath) and os.path.exists(fullpath + ".json"): + data = {} + data['folder_id'] = library_folder_id + data['file_type'] = 'auto' + data['dbkey'] = '' + data['upload_option'] = 'upload_paths' + data['filesystem_paths'] = fullpath + data['create_type'] = 'file' + + handle = open( fullpath + ".json" ) + smeta = handle.read() + handle.close() + data['extended_metadata'] = json.loads(smeta) + libset = submit(api_key, api_url + "libraries/%s/contents" % library_id, data, return_formatted = True) + print libset + +if __name__ == '__main__': + try: + api_key = sys.argv[1] + api_url = sys.argv[2] + in_folder = sys.argv[3] + out_folder = sys.argv[4] + data_library = sys.argv[5] + except IndexError: + print 'usage: %s key url in_folder out_folder data_library' % os.path.basename( sys.argv[0] ) + sys.exit( 1 ) + main(api_key, api_url, in_folder, out_folder, data_library ) + https://bitbucket.org/galaxy/galaxy-central/changeset/38ce114e06f9/ changeset: 38ce114e06f9 user: dannon date: 2012-11-12 17:54:45 summary: Merged in kellrott/galaxy-central (pull request #81) affected #: 5 files diff -r dd63b34f813972b00f7bf492aec1e74ef9bd5f59 -r 38ce114e06f965dfd7eca383849a91b07d1473c6 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -993,7 +993,7 @@ permitted_actions = Dataset.permitted_actions def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, tool_version=None, extension=None, dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None, - parent_id=None, validation_errors=None, visible=True, create_dataset=False, sa_session=None ): + parent_id=None, validation_errors=None, visible=True, create_dataset=False, sa_session=None, extended_metadata=None ): self.name = name or "Unnamed dataset" self.id = id self.info = info @@ -1003,6 +1003,7 @@ self.extension = extension self.designation = designation self.metadata = metadata or dict() + self.extended_metadata = extended_metadata if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata' self.dbkey = dbkey self.deleted = deleted @@ -1908,6 +1909,18 @@ ldda_name = unicode( ldda_name, 'utf-8' ) return ldda_name +class ExtendedMetadata( object ): + def __init__(self, data): + self.data = data + + +class ExtendedMetadataIndex( object ): + def __init__( self, extended_metadata, path, value): + self.extended_metadata = extended_metadata + self.path = path + self.value = value + + class LibraryInfoAssociation( object ): def __init__( self, library, form_definition, info, inheritable=False ): self.library = library diff -r dd63b34f813972b00f7bf492aec1e74ef9bd5f59 -r 38ce114e06f965dfd7eca383849a91b07d1473c6 lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -323,7 +323,24 @@ Column( "deleted", Boolean, index=True, default=False ), Column( "visible", Boolean ), Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), - Column( "message", TrimmedString( 255 ) ) ) + Column( "message", TrimmedString( 255 ) ), + Column( "extended_metadata_id", Integer, + ForeignKey( "extended_metadata.id" ), index=True ) + ) + + +ExtendedMetadata.table = Table("extended_metadata", metadata, + Column( "id", Integer, primary_key=True ), + Column( "data", JSONType ) ) + +ExtendedMetadataIndex.table = Table("extended_metadata_index", metadata, + Column( "id", Integer, primary_key=True ), + Column( "extended_metadata_id", Integer, ForeignKey("extended_metadata.id", + onupdate="CASCADE", + ondelete="CASCADE" ), + index=True ), + Column( "path", String( 255 )), + Column( "value", TEXT)) Library.table = Table( "library", metadata, Column( "id", Integer, primary_key=True ), @@ -1436,6 +1453,28 @@ ) ) +assign_mapper(context, ExtendedMetadata, ExtendedMetadata.table, + properties=dict( + children=relation( + ExtendedMetadataIndex, + primaryjoin=( ExtendedMetadataIndex.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ), + backref=backref("parent", + primaryjoin=( ExtendedMetadataIndex.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ) + ) + ) + ) +) + +assign_mapper(context, ExtendedMetadataIndex, ExtendedMetadataIndex.table, + properties=dict( + extended_metadata=relation( + ExtendedMetadata, + primaryjoin=( ( ExtendedMetadataIndex.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ) ) + ) + ) +) + + assign_mapper( context, LibraryInfoAssociation, LibraryInfoAssociation.table, properties=dict( library=relation( Library, primaryjoin=( ( LibraryInfoAssociation.table.c.library_id == Library.table.c.id ) & ( not_( LibraryInfoAssociation.table.c.deleted ) ) ), backref="info_association" ), @@ -1515,8 +1554,12 @@ backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), visible_children=relation( LibraryDatasetDatasetAssociation, - primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ) - ) ) + primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ), + extended_metadata=relation( + ExtendedMetadata, + primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.extended_metadata_id == ExtendedMetadata.table.c.id ) ) + ) + )) assign_mapper( context, LibraryDatasetDatasetInfoAssociation, LibraryDatasetDatasetInfoAssociation.table, properties=dict( library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, diff -r dd63b34f813972b00f7bf492aec1e74ef9bd5f59 -r 38ce114e06f965dfd7eca383849a91b07d1473c6 lib/galaxy/model/migrate/versions/0108_add_extended_metadata.py --- /dev/null +++ b/lib/galaxy/model/migrate/versions/0108_add_extended_metadata.py @@ -0,0 +1,75 @@ +""" +Add the ExtendedMetadata and ExtendedMetadataIndex tables +""" + +from sqlalchemy import * +from sqlalchemy.orm import * +from migrate import * +from migrate.changeset import * +from galaxy.model.custom_types import JSONType + +import logging +log = logging.getLogger( __name__ ) + +metadata = MetaData( migrate_engine ) +#db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) ) + + +ExtendedMetadata_table = Table("extended_metadata", metadata, + Column( "id", Integer, primary_key=True ), + Column( "data", JSONType ) ) + +ExtendedMetadataIndex_table = Table("extended_metadata_index", metadata, + Column( "id", Integer, primary_key=True ), + Column( "extended_metadata_id", Integer, ForeignKey("extended_metadata.id", + onupdate="CASCADE", + ondelete="CASCADE" ), + index=True ), + Column( "path", String( 255 )), + Column( "value", TEXT)) + +extended_metadata_ldda_col = Column( "extended_metadata_id", Integer, ForeignKey("extended_metadata.id"), nullable=True ) + + +def display_migration_details(): + print "" + print "This migration script adds a 'handler' column to the Job table." + +def upgrade(): + print __doc__ + metadata.reflect() + try: + ExtendedMetadata_table.create() + except: + log.debug("Could not create ExtendedMetadata Table.") + try: + ExtendedMetadataIndex_table.create() + except: + log.debug("Could not create ExtendedMetadataIndex Table.") + + + # Add the extended_metadata_id to the ldda table + try: + ldda_table = Table( "library_dataset_dataset_association", metadata, autoload=True ) + extended_metadata_ldda_col.create( ldda_table ) + assert extended_metadata_ldda_col is ldda_table.c.extended_metadata_id + except Exception, e: + print str(e) + log.error( "Adding column 'extended_metadata_id' to library_dataset_dataset_association table failed: %s" % str( e ) ) + return + + +def downgrade(): + metadata.reflect() + ExtendedMetadata_table.drop() + ExtendedMetadataIndex_table.drop() + + # Drop the Job table's exit_code column. + try: + job_table = Table( "library_dataset_dataset_association", metadata, autoload=True ) + extended_metadata_id = job_table.c.extended_metadata_id + extended_metadata_id.drop() + except Exception, e: + log.debug( "Dropping 'extended_metadata_id' column from library_dataset_dataset_association table failed: %s" % ( str( e ) ) ) + + diff -r dd63b34f813972b00f7bf492aec1e74ef9bd5f59 -r 38ce114e06f965dfd7eca383849a91b07d1473c6 lib/galaxy/webapps/galaxy/api/library_contents.py --- a/lib/galaxy/webapps/galaxy/api/library_contents.py +++ b/lib/galaxy/webapps/galaxy/api/library_contents.py @@ -7,6 +7,7 @@ from galaxy.web.base.controller import * from galaxy.util.sanitize_html import sanitize_html from galaxy.model.orm import * +from galaxy.model import ExtendedMetadata, ExtendedMetadataIndex log = logging.getLogger( __name__ ) @@ -114,6 +115,13 @@ return str( e ) # The rest of the security happens in the library_common controller. real_folder_id = trans.security.encode_id( parent.id ) + + #check for extended metadata, store it and pop it out of the param + #otherwise sanitize_param will have a fit + ex_meta_payload = None + if 'extended_metadata' in payload: + ex_meta_payload = payload.pop('extended_metadata') + # Now create the desired content object, either file or folder. if create_type == 'file': status, output = trans.webapp.controllers['library_common'].upload_library_dataset( trans, 'api', library_id, real_folder_id, **payload ) @@ -125,6 +133,19 @@ else: rval = [] for k, v in output.items(): + if ex_meta_payload is not None: + """ + If there is extended metadata, store it, attach it to the dataset, and index it + """ + ex_meta = ExtendedMetadata(ex_meta_payload) + trans.sa_session.add( ex_meta ) + v.extended_metadata = ex_meta + trans.sa_session.add(v) + trans.sa_session.flush() + for path, value in self._scan_json_block(ex_meta_payload): + meta_i = ExtendedMetadataIndex(ex_meta, path, value) + trans.sa_session.add(meta_i) + trans.sa_session.flush() if type( v ) == trans.app.model.LibraryDatasetDatasetAssociation: v = v.library_dataset encoded_id = trans.security.encode_id( v.id ) @@ -135,6 +156,33 @@ url = url_for( 'library_content', library_id=library_id, id=encoded_id ) ) ) return rval + def _scan_json_block(self, meta, prefix=""): + """ + Scan a json style data structure, and emit all fields and their values. + Example paths + + Data + { "data" : [ 1, 2, 3 ] } + + Path: + /data == [1,2,3] + + /data/[0] == 1 + + """ + if isinstance(meta, dict): + for a in meta: + for path, value in self._scan_json_block(meta[a], prefix + "/" + a): + yield path, value + elif isinstance(meta, list): + for i, a in enumerate(meta): + for path, value in self._scan_json_block(a, prefix + "[%d]" % (i)): + yield path, value + else: + #BUG: Everything is cast to string, which can lead to false positives + #for cross type comparisions, ie "True" == True + yield prefix, str(meta) + @web.expose_api def update( self, trans, id, library_id, payload, **kwd ): """ diff -r dd63b34f813972b00f7bf492aec1e74ef9bd5f59 -r 38ce114e06f965dfd7eca383849a91b07d1473c6 scripts/api/load_data_with_metadata.py --- /dev/null +++ b/scripts/api/load_data_with_metadata.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +""" + +This script scans a directory for files with companion '.json' files, then loads +the data from the file, and attaches the .json contents using the 'extended_metadata' +system in the library + +Sample call: +python load_data_with_metadata.py <api_key><api_url> /tmp/g_inbox/ /tmp/g_inbox/done/ "API Imports" + +NOTE: The upload method used requires the data library filesystem upload allow_library_path_paste +""" +import os +import shutil +import sys +import json +import time +sys.path.insert( 0, os.path.dirname( __file__ ) ) +from common import submit, display + +def main(api_key, api_url, in_folder, out_folder, data_library): + # Find/Create data library with the above name. Assume we're putting datasets in the root folder '/' + libs = display(api_key, api_url + 'libraries', return_formatted=False) + library_id = None + for library in libs: + if library['name'] == data_library: + library_id = library['id'] + if not library_id: + lib_create_data = {'name':data_library} + library = submit(api_key, api_url + 'libraries', lib_create_data, return_formatted=False) + library_id = library[0]['id'] + folders = display(api_key, api_url + "libraries/%s/contents" % library_id, return_formatted = False) + for f in folders: + if f['name'] == "/": + library_folder_id = f['id'] + if not library_id or not library_folder_id: + print "Failure to configure library destination." + sys.exit(1) + # Watch in_folder, upload anything that shows up there to data library and get ldda, + # invoke workflow, move file to out_folder. + for fname in os.listdir(in_folder): + fullpath = os.path.join(in_folder, fname) + if os.path.isfile(fullpath) and os.path.exists(fullpath + ".json"): + data = {} + data['folder_id'] = library_folder_id + data['file_type'] = 'auto' + data['dbkey'] = '' + data['upload_option'] = 'upload_paths' + data['filesystem_paths'] = fullpath + data['create_type'] = 'file' + + handle = open( fullpath + ".json" ) + smeta = handle.read() + handle.close() + data['extended_metadata'] = json.loads(smeta) + libset = submit(api_key, api_url + "libraries/%s/contents" % library_id, data, return_formatted = True) + print libset + +if __name__ == '__main__': + try: + api_key = sys.argv[1] + api_url = sys.argv[2] + in_folder = sys.argv[3] + out_folder = sys.argv[4] + data_library = sys.argv[5] + except IndexError: + print 'usage: %s key url in_folder out_folder data_library' % os.path.basename( sys.argv[0] ) + sys.exit( 1 ) + main(api_key, api_url, in_folder, out_folder, data_library ) + Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.