details: http://www.bx.psu.edu/hg/galaxy/rev/1d93eb88c93f changeset: 3131:1d93eb88c93f user: Enis Afgan <afgane@gmail.com> date: Mon Nov 30 16:36:38 2009 -0500 description: Added ability for a user to add additional storage volume(s) to a running EC2 instance. In this context, a new EBS volume is created, attached to a running instance, and then added to remote zpool. When an instance is shut down and then started back up, all of the associated storage volumes are automatically attached to it and the zpool is imported. diffstat: lib/galaxy/cloud/__init__.py | 100 +++++++++- lib/galaxy/cloud/providers/ec2.py | 300 +++++++++++++++++++++++++++----- lib/galaxy/cloud/providers/eucalyptus.py | 20 +- lib/galaxy/web/controllers/cloud.py | 93 +++++++++- templates/cloud/configure_cloud.mako | 19 +- 5 files changed, 445 insertions(+), 87 deletions(-) diffs (922 lines): diff -r f4654abcec1e -r 1d93eb88c93f lib/galaxy/cloud/__init__.py --- a/lib/galaxy/cloud/__init__.py Fri Nov 20 11:03:41 2009 -0500 +++ b/lib/galaxy/cloud/__init__.py Mon Nov 30 16:36:38 2009 -0500 @@ -29,6 +29,8 @@ SUBMITTED = "submitted", SHUTTING_DOWN_UCI = "shutting-downUCI", SHUTTING_DOWN = "shutting-down", + ADD_STORAGE_UCI = "add-storageUCI", + ADD_STORAGE = "add-storage", AVAILABLE = "available", RUNNING = "running", PENDING = "pending", @@ -40,6 +42,7 @@ TERMINATED = "terminated", SUBMITTED = "submitted", RUNNING = "running", + ADDING = "adding-storage", PENDING = "pending", SHUTTING_DOWN = "shutting-down", ERROR = "error" @@ -48,6 +51,7 @@ store_status = Bunch( WAITING = "waiting", IN_USE = "in-use", + ADDING = "adding", CREATING = "creating", DELETED = 'deleted', ERROR = "error" @@ -161,7 +165,8 @@ model.UCI.table.c.state==uci_states.SUBMITTED_UCI, model.UCI.table.c.state==uci_states.SHUTTING_DOWN_UCI, model.UCI.table.c.state==uci_states.DELETING_UCI, - model.UCI.table.c.state==uci_states.SNAPSHOT_UCI ) ) \ + model.UCI.table.c.state==uci_states.SNAPSHOT_UCI, + model.UCI.table.c.state==uci_states.ADD_STORAGE_UCI ) ) \ .all(): uci_wrapper = UCIwrapper( r, self.app ) new_requests.append( uci_wrapper ) @@ -195,6 +200,12 @@ self.uci_id = uci.id self.app = app self.sa_session = self.app.model.context + base_directory = os.path.join( self.app.config.job_working_directory, "cloud" ) + self.working_directory = os.path.join( base_directory, str( self.uci_id ) ) +# log.debug( "Cloud controller working directory for UCI DB ID '%s': '%s'" % ( self.uci_id, self.working_directory ) ) + if not os.path.exists( base_directory ): + os.mkdir( base_directory ) + # --------- Setter methods ----------------- @@ -364,11 +375,18 @@ self.sa_session.add( uci ) self.sa_session.flush() - def set_store_device( self, store_id, device ): +# def set_store_device( self, store_id, device ): +# uci = self.sa_session.query( model.UCI ).get( self.uci_id ) +# self.sa_session.refresh( uci ) +# uci.store[store_id].device = device +# uci.store[store_id].flush() + + def set_uci_total_size( self, total_size ): uci = self.sa_session.query( model.UCI ).get( self.uci_id ) self.sa_session.refresh( uci ) - uci.store[store_id].device = device - uci.store[store_id].flush() + uci.total_size = total_size + self.sa_session.add( uci ) + self.sa_session.flush() def set_store_error( self, error, store_index=None, store_id=None ): if store_index != None: @@ -405,15 +423,24 @@ def set_store_volume_id( self, store_index, volume_id ): """ - Given store index associated with this UCI in local database, set volume ID as it is registered + Given store index as it is stored in local database, set volume ID as it is registered on the cloud provider (e.g., vol-39890501) """ - uci = self.sa_session.query( model.UCI ).get( self.uci_id ) - self.sa_session.refresh( uci ) - uci.store[store_index].volume_id = volume_id - #uci.store[store_index].flush() - self.sa_session.add( uci ) - self.sa_session.flush() + + if store_index != None: + store = self.sa_session.query( model.CloudStore ).get( store_index ) + store.volume_id = volume_id + self.sa_session.add( store ) + self.sa_session.flush() + else: + return None + +# uci = self.sa_session.query( model.UCI ).get( self.uci_id ) +# self.sa_session.refresh( uci ) +# uci.store[store_index].volume_id = volume_id +# #uci.store[store_index].flush() +# self.sa_session.add( uci ) +# self.sa_session.flush() def set_store_instance( self, vol_id, instance_id ): """ @@ -421,7 +448,29 @@ be given in following format: 'vol-78943248' """ vol = self.sa_session.query( model.CloudStore ).filter( model.CloudStore.table.c.volume_id == vol_id ).first() - vol.inst.instance_id = instance_id + inst = self.sa_session.query( model.CloudInstance ).filter_by( instance_id=instance_id ).first() + vol.inst = inst + self.sa_session.add( vol ) + self.sa_session.flush() + + def set_store_device( self, vol_id, device ): + """ + Stores instance ID that given store volume is attached to. Store volume ID should + be given in following format: 'vol-78943248' + """ + vol = self.sa_session.query( model.CloudStore ).filter( model.CloudStore.table.c.volume_id == vol_id ).first() + vol.device = str( device ) + self.sa_session.add( vol ) + self.sa_session.flush() + + def set_store_deleted( self, vol_id, status=None ): + """ + Set storage volume as deleted in local database. Optionally, set the volume status too. + """ + vol = self.sa_session.query( model.CloudStore ).filter( model.CloudStore.table.c.volume_id == vol_id ).first() + vol.deleted = True + if status != None: + vol.status = status self.sa_session.add( vol ) self.sa_session.flush() @@ -503,6 +552,12 @@ self.sa_session.refresh( uci ) return uci.instance[instance_id].state + def get_instaces_in_state( self, state ): + """ Get database objects of all instances associated with this UCI in given state. """ + return self.sa_session.query( model.CloudInstance ) \ + .filter_by( uci_id=self.uci_id, state = state ) \ + .all() + def get_instances_ids( self ): """ Returns list IDs of all instances' associated with this UCI that are not in 'terminated' or @@ -590,6 +645,11 @@ self.sa_session.refresh( uci ) return uci.store[0].availability_zone + def get_uci_total_size( self ): + uci = self.sa_session.query( model.UCI ).get( self.uci_id ) + self.sa_session.refresh( uci ) + return uci.total_size + def get_store_size( self, store_id=0 ): uci = self.sa_session.query( model.UCI ).get( self.uci_id ) self.sa_session.refresh( uci ) @@ -604,9 +664,18 @@ self.sa_session.refresh( uci ) return uci.store[store_id].volume_id + def get_all_stores_in_status( self, status ): + """ + Return database objects of all stores associated with this UCI that have their + status set to value passed as parameter. + """ + return self.sa_session.query( model.CloudStore ).filter_by( deleted=False, uci_id=self.uci_id, status=status ).all() + def get_all_stores( self ): - """ Returns all storage volumes' database objects associated with this UCI. """ - return self.sa_session.query( model.CloudStore ).filter( model.CloudStore.table.c.uci_id == self.uci_id ).all() + """ Returns all storage volumes' database objects associated with this UCI that have not been marked as 'deleted'. """ + return self.sa_session.query( model.CloudStore ) \ + .filter_by( deleted=False, uci_id=self.uci_id ) \ + .all() def get_snapshots( self, status=None ): """ Returns database objects for all snapshots associated with this UCI and in given status.""" @@ -618,6 +687,9 @@ self.sa_session.refresh( uci ) return uci + def get_uci_working_directory( self ): + return self.working_directory + def uci_launch_time_set( self ): uci = self.sa_session.query( model.UCI ).get( self.uci_id ) self.sa_session.refresh( uci ) diff -r f4654abcec1e -r 1d93eb88c93f lib/galaxy/cloud/providers/ec2.py --- a/lib/galaxy/cloud/providers/ec2.py Fri Nov 20 11:03:41 2009 -0500 +++ b/lib/galaxy/cloud/providers/ec2.py Mon Nov 30 16:36:38 2009 -0500 @@ -1,4 +1,4 @@ -import subprocess, threading, os, errno, time, datetime +import subprocess, threading, os, errno, time, datetime, stat from Queue import Queue, Empty from datetime import datetime @@ -30,6 +30,8 @@ SUBMITTED = "submitted", SHUTTING_DOWN_UCI = "shutting-downUCI", SHUTTING_DOWN = "shutting-down", + ADD_STORAGE_UCI = "add-storageUCI", + ADD_STORAGE = "add-storage", AVAILABLE = "available", RUNNING = "running", PENDING = "pending", @@ -43,6 +45,7 @@ TERMINATED = "terminated", SUBMITTED = "submitted", RUNNING = "running", + ADDING = "adding-storage", PENDING = "pending", SHUTTING_DOWN = "shutting-down", ERROR = "error" @@ -51,6 +54,7 @@ store_status = Bunch( WAITING = "waiting", IN_USE = "in-use", + ADDING = "adding", CREATING = "creating", DELETED = 'deleted', ERROR = "error" @@ -122,6 +126,9 @@ self.stop_uci( uci_wrapper ) elif uci_state==uci_states.SNAPSHOT: self.snapshot_uci( uci_wrapper ) + elif uci_state==uci_states.ADD_STORAGE: + self.add_storage_to_uci( uci_wrapper ) + #self.dummy_start_uci( uci_wrapper ) except: log.exception( "Uncaught exception executing cloud request." ) cnt += 1 @@ -241,25 +248,14 @@ log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone ) uci_wrapper.set_store_availability_zone( self.zone ) - log.info( "Creating volume in zone '%s'..." % uci_wrapper.get_uci_availability_zone() ) + store = uci_wrapper.get_all_stores_in_status( store_status.ADDING )[0] # Because at UCI creation time only 1 storage volume can be created, reference it directly + + log.info( "Creating storage volume in zone '%s' of size '%s'..." % ( uci_wrapper.get_uci_availability_zone(), store.size ) ) # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t # current UCI is 0, so reference it in following methods - vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None ) - uci_wrapper.set_store_volume_id( 0, vol.id ) - - # Wait for a while to ensure volume was created -# vol_status = vol.status -# for i in range( 30 ): -# if vol_status is not "available": -# log.debug( 'Updating volume status; current status: %s' % vol_status ) -# vol_status = vol.status -# time.sleep(3) -# if i is 29: -# log.debug( "Error while creating volume '%s'; stuck in state '%s'; deleting volume." % ( vol.id, vol_status ) ) -# conn.delete_volume( vol.id ) -# uci_wrapper.change_state( uci_state='error' ) -# return - + vol = conn.create_volume( store.size, uci_wrapper.get_uci_availability_zone(), snapshot=None ) + uci_wrapper.set_store_volume_id( store.id, vol.id ) + # Retrieve created volume again to get updated status try: vl = conn.get_all_volumes( [vol.id] ) @@ -306,6 +302,7 @@ if conn.delete_volume( v.volume_id ): deletedList.append( v.volume_id ) v.deleted = True + v.status = store_status.DELETED self.sa_session.add( v ) self.sa_session.flush() count += 1 @@ -358,15 +355,180 @@ uci_wrapper.change_state( uci_state=uci_states.AVAILABLE ) - def add_storage_to_uci( self, name ): - """ Adds more storage to specified UCI - TODO""" - + def add_storage_to_uci( self, uci_wrapper ): + """ + Add an additional storage volume to specified UCI by creating the storage volume + on cloud provider, attaching it to currently running instance and adding it to + 'galaxyData' zpool on remote instance. + """ + conn = self.get_connection( uci_wrapper ) + + stores = uci_wrapper.get_all_stores_in_status( store_status.ADDING ) + for store in stores: + vol_size = store.size + availability_zone = uci_wrapper.get_uci_availability_zone() + log.info( "Adding storage volume to UCI '%s' in zone '%s' of size '%s'..." % ( uci_wrapper.get_name(), availability_zone, vol_size ) ) + + try: + vol = conn.create_volume( vol_size, availability_zone, snapshot=None ) + uci_wrapper.set_store_volume_id( store.id, vol.id ) + uci_wrapper.set_store_availability_zone( availability_zone, vol.id ) + log.debug( "New storage volume created: '%s'" % vol.id ) + except boto.exception.EC2ResponseError, e: + err = "EC2 response error while creating storage volume: " + str( e ) + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, True ) + return + except Exception, ex: + err = "Error while creating storage volume: " + str( ex ) + log.error( err ) + uci_wrapper.set_error( err, True ) + return + + # Retrieve created volume again to get updated status + try: + vl = conn.get_all_volumes( [vol.id] ) + except boto.exception.EC2ResponseError, e: + err = "EC2 response error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( e ) + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, True ) + return + except Exception, ex: + err = "Error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( ex ) + log.error( err ) + uci_wrapper.set_error( err, True ) + return + + # Wait for a while to ensure volume was created + if len( vl ) > 0: + vol_status = vl[0].status # Bc. only single vol is queried, reference it as 0th list element + for i in range( 30 ): + if vol_status != "available": + log.debug( "(%s) Updating volume status; current status: '%s'" % (i, vol_status ) ) + uci_wrapper.change_state( uci_state=vol_status ) + time.sleep(5) + vol_status = vl[0].status + if vol_status == "available": + log.debug( "(%s) New volume status '%s', continuing with file system adjustment." % (i, vol_status ) ) + uci_wrapper.set_store_status( vl[0].id, vol_status ) + break + if i is 29: + err = "Error while creating volume '"+vl[0].id+"'; stuck in state '"+vol_status+"'; deleting volume." + conn.delete_volume( vl[0].id ) + log.error( err ) + uci_wrapper.set_error( err, True ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + conn.delete_volume( vl[0].id ) + uci_wrapper.set_store_deleted( vl[0].id ) + return + else: + err = "Volume '" + vol.id +"' not found by EC2 after being created." + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, True ) + return + + # Get private key for given instance + pk = uci_wrapper.get_key_pair_material() + if pk == None: #If pk does not exist, create it + self.check_key_pair( uci_wrapper, conn ) + pk = uci_wrapper.get_key_pair_material() + + # Get working directory for this UCI and store pk into a file + wd = uci_wrapper.get_uci_working_directory() + if not os.path.exists( wd ): + os.mkdir( wd ) + pk_file_path = os.path.join( wd, "pk" ) + + if pk != None: + # Save private key to a file + pk_file = open( pk_file_path, "w" ) + pk_file.write( pk ) + pk_file.close() + else: + err = "ERROR: Private key not available for this UCI." + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, True ) + return + + if os.path.exists( pk_file_path ): + # Change permissions of the file - this is required by later used ssh + os.chmod( pk_file_path, stat.S_IRUSR | stat.S_IWUSR ) + + # Get # of storage volumes associated with this UCI to know as which device to connect new volume to the instance + device_num = len( uci_wrapper.get_all_stores_in_status( store_status.IN_USE ) ) + 5 # First device num is 5, so all subsequent ones should follow + + # Get instance that the new storage volume is to be attached to. Although a list is returned, + # only 1 instance can be in 'adding-storage' state (because, for now, only 1 instance is assoc. with + # each UCI) and volume can be attached to only to it + il = uci_wrapper.get_instaces_in_state( instance_states.ADDING ) + if len( il ) > 0: + # Attach new volume to the instance + log.debug( "Attaching new storage volume '%s' to UCI '%s' as device '%s'" % + ( vol.id, uci_wrapper.get_name(), device_num ) ) + try: + vol_status = conn.attach_volume( vol.id, il[0].instance_id, device_num ) + except boto.exception.EC2ResponseError, e: + err = "Attaching just created storage volume '" + vol.id + "'to instance '" + \ + il[0].instance_id + "' as device '" + str( device_num ) + "' failed: " + str( e ) + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, True ) + return + # For a while, keep checking attachment status of the new volume + for i in range(30): + log.debug( "Checking attachment status of new volume '%s': '%s'" % ( vol.id, vol_status ) ) + if vol_status == 'attached': + uci_wrapper.set_store_status( vol.id, vol_status ) + uci_wrapper.set_store_device( vol.id, device_num ) + break + if i == 29: + err = "Storage volume '" + vol.id + "' failed to attach to instance '" + il[0].instance_id + \ + "'. Manual check needed." + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, False ) + return + + time.sleep(4) + vol_list = conn.get_all_volumes( [vol.id] ) + for v in vol_list: + vol_status = v.attachment_state() + + # Once storage volume is attached, add it to the zpool by issuing system level command + cmd = 'ssh -o StrictHostKeyChecking=no -i '+ pk_file_path +' root@'+il[0].public_dns+' "zpool add galaxyData c7d' + str( device_num )+'"' + log.debug( "Adding new storage volume to zpool cmd: %s" % cmd ) + stdout = os.system( cmd ) + if stdout != 0: + err = "Adding newly created storage volume to zpool on instance '" + il[0].instance_id + \ + "' failed. Error code: " + str( stdout ) + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, False ) + return + else: + err = "No instance(s) found in 'adding-storage' state. New disk not added to UCI's zpool." + log.error( err ) + uci_wrapper.set_store_error( err, store_id=vol.id ) + uci_wrapper.set_error( err, True ) + return + + # Update UCI's total storage size + uci_wrapper.set_uci_total_size( uci_wrapper.get_uci_total_size() + vol.size ) + # Reset UCI's and instance's state + uci_wrapper.change_state( uci_state=uci_states.RUNNING, instance_id=il[0].instance_id, i_state=instance_states.RUNNING ) + log.debug( "Successfully added storage volume '%s' to UCI '%s'." % ( vol.id, uci_wrapper.get_name() ) ) + def dummy_start_uci( self, uci_wrapper ): uci = uci_wrapper.get_uci() - log.debug( "Would be starting instance '%s'" % uci.name ) - uci_wrapper.change_state( uci_state.PENDING ) + log.debug( "Dummy start UCI '%s'" % uci.name ) + + +# uci_wrapper.change_state( uci_state.PENDING ) # log.debug( "Sleeping a bit... (%s)" % uci.name ) # time.sleep(20) # log.debug( "Woke up! (%s)" % uci.name ) @@ -418,10 +580,25 @@ if uci_wrapper.get_uci_state() != uci_states.ERROR: # Start an instance log.debug( "Starting instance for UCI '%s'" % uci_wrapper.get_name() ) - #TODO: Once multiple volumes can be attached to a single instance, update 'userdata' composition - userdata = uci_wrapper.get_store_volume_id()+"|"+uci_wrapper.get_access_key()+"|"+uci_wrapper.get_secret_key() + #TODO: Once multiple volumes can be attached to a single instance, update 'userdata' composition + # Compose user data; for storage volumes, separate multiple volumes with a colon (:) ensuring that + # the last volume in the list is not followed by a colon. + stores = uci_wrapper.get_all_stores() + volume_ids = "" + if len( stores ) > 0: + for i, store in enumerate( stores ): + volume_ids += store.volume_id + if i < len( stores )-1: + volume_ids += ":" + else: + err = "No storage volumes found that are associated with UCI '%s'" + uci_wrapper.get_name() + log.error( err ) + uci_wrapper.set_error( err, True ) + return + userdata = volume_ids+"|"+uci_wrapper.get_access_key()+"|"+uci_wrapper.get_secret_key() log.debug( "Using following command: conn.run_instances( image_id='%s', key_name='%s', security_groups=['%s'], user_data=[OMITTED], instance_type='%s', placement='%s' )" % ( mi_id, uci_wrapper.get_key_pair_name(), self.security_group, uci_wrapper.get_instance_type( i_index ), uci_wrapper.get_uci_availability_zone() ) ) + # Start an instance reservation = None try: reservation = conn.run_instances( image_id=mi_id, @@ -454,7 +631,10 @@ uci_wrapper.change_state( s, i_id, s ) uci_wrapper.set_security_group_name( self.security_group, i_id=i_id ) vol_id = uci_wrapper.get_store_volume_id( store_id=0 ) # TODO: Once more that one vol/UCI is allowed, update this! - uci_wrapper.set_store_status( vol_id, store_status.WAITING ) + # Following line is pointless bc. general update updates status of volume to 'available' + # before it actually connects to starting instance... This has been dealt w/ in general update method + #uci_wrapper.set_store_status( vol_id, store_status.WAITING ) + uci_wrapper.set_store_instance( vol_id, i_id ) log.debug( "Instance of UCI '%s' started, current state: '%s'" % ( uci_wrapper.get_name(), uci_wrapper.get_uci_state() ) ) except boto.exception.EC2ResponseError, e: err = "EC2 response error when retrieving instance information for UCI '" + uci_wrapper.get_name() + "': " + str( e ) @@ -571,19 +751,30 @@ .all() for inst in instances: if self.type == inst.uci.credentials.provider.type: - log.debug( "[%s] Running general status update on instance '%s'" % ( inst.uci.credentials.provider.type, inst.instance_id ) ) + log.debug( "[%s] Running general status update on instance '%s'" + % ( inst.uci.credentials.provider.type, inst.instance_id ) ) self.update_instance( inst ) + # Update storage volume(s) associated with current instance + stores = self.sa_session.query( model.CloudStore ) \ + .filter_by( uci_id=inst.uci_id, deleted=False ) \ + .all() + for store in stores: + if self.type == store.uci.credentials.provider.type: # and store.volume_id != None: + log.debug( "[%s] Running general status update on store with local database ID: '%s'" + % ( store.uci.credentials.provider.type, store.id ) ) + self.update_store( store ) # Update storage volume(s) stores = self.sa_session.query( model.CloudStore ) \ - .filter( or_( model.CloudStore.table.c.status==store_status.IN_USE, - model.CloudStore.table.c.status==store_status.CREATING, - model.CloudStore.table.c.status==store_status.WAITING, + .filter( or_( model.CloudStore.table.c.status==store_status.CREATING, +# model.CloudStore.table.c.status==store_status.IN_USE, +# model.CloudStore.table.c.status==store_status.WAITING, model.CloudStore.table.c.status==None ) ) \ .all() for store in stores: if self.type == store.uci.credentials.provider.type: # and store.volume_id != None: - log.debug( "[%s] Running general status update on store with local database ID: '%s'" % ( store.uci.credentials.provider.type, store.id ) ) + log.debug( "[%s] Running general status update on store with local database ID: '%s'" + % ( store.uci.credentials.provider.type, store.id ) ) self.update_store( store ) # else: # log.error( "[%s] There exists an entry for UCI (%s) storage volume without an ID. Storage volume might have been created with " @@ -668,7 +859,8 @@ for i, cInst in enumerate( r.instances ): try: s = cInst.update() - log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) ) + log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' " \ + "and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) ) if s != inst.state: inst.state = s self.sa_session.add( inst ) @@ -693,7 +885,8 @@ self.sa_session.add( inst ) self.sa_session.flush() except boto.exception.EC2ResponseError, e: - err = "Updating instance status from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e ) + err = "Updating instance status from cloud failed for UCI '"+ uci.name + \ + "' during general status update: " + str( e ) log.error( err ) uci.error = err uci.state = uci_states.ERROR @@ -715,7 +908,7 @@ # Get reservations handle for given store try: - log.debug( "Updating storage volume command: vl = conn.get_all_volumes( [%s] )" % store.volume_id ) + log.debug( "Retrieving reference to storage volume '%s' during update..." % store.volume_id ) vl = conn.get_all_volumes( [store.volume_id] ) except boto.exception.EC2ResponseError, e: err = "Retrieving volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e ) @@ -729,7 +922,7 @@ # Update store status in local DB with info from cloud provider if len(vl) > 0: try: - log.debug( "Storage volume '%s' current status: '%s'" % (store.volume_id, vl[0].status ) ) + log.debug( "General status update for storage volume '%s'; current status: '%s'" % (store.volume_id, vl[0].status ) ) if store.status != vl[0].status: # In case something failed during creation of UCI but actual storage volume was created and yet # UCI state remained as 'new', try to remedy this by updating UCI state here @@ -746,21 +939,26 @@ store.status = vl[0].status self.sa_session.add( store ) self.sa_session.flush() - if store.inst != None: - if store.inst.instance_id != vl[0].instance_id: - store.inst.instance_id = vl[0].instance_id - self.sa_session.add( store ) - self.sa_session.flush() - if store.attach_time != vl[0].attach_time: - store.attach_time = vl[0].attach_time - self.sa_session.add( store ) - self.sa_session.flush() - if store.device != vl[0].device: - store.device = vl[0].device - self.sa_session.add( store ) - self.sa_session.flush() + # Boto does not seem to be reporting these values although fields exist so comment them out... +# log.debug( "vl[0].instance_id: '%s'" % vl[0].instance_id ) +# if store.inst != None: +# if store.inst.instance_id != vl[0].instance_id: +# store.inst.instance_id = vl[0].instance_id +# self.sa_session.add( store ) +# self.sa_session.flush() +# log.debug( "vl[0].attach_time: '%s'" % vl[0].attach_time ) +# if store.attach_time != vl[0].attach_time: +# store.attach_time = vl[0].attach_time +# self.sa_session.add( store ) +# self.sa_session.flush() +## log.debug( "vl[0].device: '%s'" % vl[0].device ) +# if store.device != vl[0].device: +# store.device = vl[0].device +# self.sa_session.add( store ) +# self.sa_session.flush() except boto.exception.EC2ResponseError, e: - err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e ) + err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + \ + "' during general status update: " + str( e ) log.error( err ) uci.error = err uci.state = uci_states.ERROR @@ -768,7 +966,7 @@ self.sa_session.flush() return None else: - err = "No storage volumes returned by cloud provider on general update" + err = "No storage volumes returned by cloud provider on general update for volume with id: " + store.volume_id log.error( "%s for UCI '%s'" % ( err, uci.name ) ) store.status = store_status.ERROR store.error = err diff -r f4654abcec1e -r 1d93eb88c93f lib/galaxy/cloud/providers/eucalyptus.py --- a/lib/galaxy/cloud/providers/eucalyptus.py Fri Nov 20 11:03:41 2009 -0500 +++ b/lib/galaxy/cloud/providers/eucalyptus.py Mon Nov 30 16:36:38 2009 -0500 @@ -30,6 +30,8 @@ SUBMITTED = "submitted", SHUTTING_DOWN_UCI = "shutting-downUCI", SHUTTING_DOWN = "shutting-down", + ADD_STORAGE_UCI = "add-storageUCI", + ADD_STORAGE = "add-storage", AVAILABLE = "available", RUNNING = "running", PENDING = "pending", @@ -43,6 +45,7 @@ TERMINATED = "terminated", SUBMITTED = "submitted", RUNNING = "running", + ADDING = "adding-storage", PENDING = "pending", SHUTTING_DOWN = "shutting-down", ERROR = "error" @@ -51,6 +54,7 @@ store_status = Bunch( WAITING = "waiting", IN_USE = "in-use", + ADDING = "adding", CREATING = "creating", DELETED = 'deleted', ERROR = "error" @@ -121,6 +125,8 @@ self.stop_uci( uci_wrapper ) elif uci_state==uci_states.SNAPSHOT: self.snapshot_uci( uci_wrapper ) + elif uci_state==uci_states.ADD_STORAGE: + self.add_storage_to_uci( uci_wrapper ) except: log.exception( "Uncaught exception executing cloud request." ) cnt += 1 @@ -245,9 +251,16 @@ log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone ) uci_wrapper.set_store_availability_zone( self.zone ) - log.debug( "Creating volume; using command: conn.create_volume( %s, '%s', snapshot=None )" % ( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone() )) - vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None ) - uci_wrapper.set_store_volume_id( 0, vol.id ) +# log.debug( "Creating volume; using command: conn.create_volume( %s, '%s', snapshot=None )" % ( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone() )) +# vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None ) +# uci_wrapper.set_store_volume_id( 0, vol.id ) + store = uci_wrapper.get_all_stores_in_status( store_status.ADDING )[0] # Because at UCI creation time only 1 storage volume can be created, reference it directly + + log.info( "Creating storage volume in zone '%s' of size '%s'..." % ( uci_wrapper.get_uci_availability_zone(), store.size ) ) + # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t + # current UCI is 0, so reference it in following methods + vol = conn.create_volume( store.size, uci_wrapper.get_uci_availability_zone(), snapshot=None ) + uci_wrapper.set_store_volume_id( store.id, vol.id ) # Retrieve created volume again to get updated status try: @@ -364,6 +377,7 @@ def add_storage_to_uci( self, uci_wrapper ): """ Adds more storage to specified UCI """ + uci_wrapper.set_error( "Adding storage to eucalyptus-based clouds is not yet supported.", True ) def dummy_start_uci( self, uci_wrapper ): diff -r f4654abcec1e -r 1d93eb88c93f lib/galaxy/web/controllers/cloud.py --- a/lib/galaxy/web/controllers/cloud.py Fri Nov 20 11:03:41 2009 -0500 +++ b/lib/galaxy/web/controllers/cloud.py Mon Nov 30 16:36:38 2009 -0500 @@ -41,6 +41,8 @@ SUBMITTED = "submitted", SHUTTING_DOWN_UCI = "shutting-downUCI", SHUTTING_DOWN = "shutting-down", + ADD_STORAGE_UCI = "add-storageUCI", + ADD_STORAGE = "add-storage", AVAILABLE = "available", RUNNING = "running", PENDING = "pending", @@ -54,13 +56,16 @@ TERMINATED = "terminated", SUBMITTED = "submitted", RUNNING = "running", + ADDING = "adding-storage", PENDING = "pending", SHUTTING_DOWN = "shutting-down", ERROR = "error" ) store_status = Bunch( + WAITING = "waiting", IN_USE = "in-use", + ADDING = "adding", CREATING = "creating", DELETED = 'deleted', ERROR = "error" @@ -108,7 +113,9 @@ model.UCI.table.c.state==uci_states.SUBMITTED, model.UCI.table.c.state==uci_states.SUBMITTED_UCI, model.UCI.table.c.state==uci_states.SHUTTING_DOWN, - model.UCI.table.c.state==uci_states.SHUTTING_DOWN_UCI ) ) \ + model.UCI.table.c.state==uci_states.SHUTTING_DOWN_UCI, + model.UCI.table.c.state==uci_states.ADD_STORAGE, + model.UCI.table.c.state==uci_states.ADD_STORAGE_UCI ) ) \ .order_by( desc( model.UCI.table.c.update_time ) ) \ .all() @@ -201,7 +208,8 @@ storage.user = user storage.uci = uci storage.size = volSize - storage.availability_zone = zone + storage.availability_zone = zone + storage.status = store_status.ADDING # Persist session = trans.sa_session session.add( uci ) @@ -306,6 +314,8 @@ ( uci.state != uci_states.ERROR ) and \ ( uci.state != uci_states.SHUTTING_DOWN_UCI ) and \ ( uci.state != uci_states.SHUTTING_DOWN ) and \ + ( uci.state != uci_states.ADD_STORAGE_UCI ) and \ + ( uci.state != uci_states.ADD_STORAGE ) and \ ( uci.state != uci_states.AVAILABLE ): uci.state = uci_states.SHUTTING_DOWN_UCI session = trans.sa_session @@ -496,13 +506,53 @@ @web.expose @web.require_login( "add instance storage" ) - def add_storage( self, trans, id ): - instance = get_uci( trans, id ) + def add_storage( self, trans, id, vol_size=None ): + error = None + uci = get_uci( trans, id ) + stores = get_stores_in_status( trans, uci, store_status.IN_USE ) + # Start adding of storage making sure given UCI is running and that at least one + # storage volume is attached to it (this is needed to by cloud controller to know + # as which device to attach the new storage volume) + if uci.state == uci_states.RUNNING and len( stores ) > 0: + if vol_size is not None: + try: + vol_size = int( vol_size ) + except ValueError: + error = "Volume size must be integer value between 1 and 1000." + + if not error: + user = trans.get_user() + + storage = model.CloudStore() + storage.user = user + storage.uci = uci + storage.size = vol_size + storage.status = store_status.ADDING + + # Set state of instance - NOTE that this code will only work (with code in cloud controller) + # for scenario where a UCI is associated with *1* compute instance!!! + instances = get_instances( trans, uci ) + instances.state = instance_states.ADDING + + uci.state = uci_states.ADD_STORAGE_UCI + # Persist + session = trans.sa_session + session.add( instances ) + session.add( storage ) + session.add( uci ) + session.flush() + # Log and display the management page + trans.log_event( "User added storage volume to UCI: '%s'" % uci.name ) + trans.set_message( "Adding of storage to instance '%s' initiated." % uci.name ) + return self.list( trans ) + else: + error( "Storage can only be added to instances that are in state 'RUNNING' with existing " \ + "storage volume(s) already attached." ) - error( "Adding storage to instance '%s' is not supported yet." % instance.name ) - - return self.list( trans ) + return trans.show_form( + web.FormBuilder( url_for( id=trans.security.encode_id(uci.id) ), "Add storage to an instance", submit_text="Add" ) + .add_text( "vol_size", "Storage size (1-1000 GB)", value='', error=error ) ) # ----- Image methods ----- @web.expose @@ -1034,6 +1084,7 @@ dict = {} dict['id'] = uci.id dict['state'] = uci.state + dict['total_size'] = uci.total_size if uci.error != None: dict['error'] = str( uci.error ) else: @@ -1136,19 +1187,30 @@ def get_stores( trans, uci ): """ - Get stores objects that are connected to uci object + Get stores objects that are associated with given uci and are not in 'error' status """ user = trans.get_user() stores = trans.sa_session.query( model.CloudStore ) \ - .filter_by( user=user, uci_id=uci.id ) \ + .filter_by( user=user, uci_id=uci.id, deleted=False ) \ .filter( model.CloudStore.table.c.status != store_status.ERROR ) \ .all() return stores +def get_stores_in_status( trans, uci, status ): + """ + Get stores objects that are associated with given uci and are not have given status + """ + user = trans.get_user() + stores = trans.sa_session.query( model.CloudStore ) \ + .filter_by( user=user, uci_id=uci.id, status=status ) \ + .all() + + return stores + def get_instances( trans, uci ): """ - Get objects of instances that are pending or running and are connected to uci object + Get objects of instances that are pending or running and are connected to the given uci object """ user = trans.get_user() instances = trans.sa_session.query( model.CloudInstance ) \ @@ -1159,6 +1221,17 @@ return instances +def get_instances_in_state( trans, uci, state ): + """ + Get objects of instances that are in specified state and are connected to the given uci object + """ + user = trans.get_user() + instances = trans.sa_session.query( model.CloudInstance ) \ + .filter_by( user=user, uci_id=uci.id, state=state ) \ + .all() + + return instances + def get_connection( trans, creds ): """ Establishes cloud connection using user's credentials diff -r f4654abcec1e -r 1d93eb88c93f templates/cloud/configure_cloud.mako --- a/templates/cloud/configure_cloud.mako Fri Nov 20 11:03:41 2009 -0500 +++ b/templates/cloud/configure_cloud.mako Mon Nov 30 16:36:38 2009 -0500 @@ -79,7 +79,8 @@ }); } - // Update 'state' and 'time alive' fields + // Update 'size', 'state' and 'time alive' fields + $(elem + "-size").text( data[i].total_size ); $(elem + "-state").text( data[i].state ); if ( new_state != 'error' ) { // Because 'error' state is handled as a JS link, don't include it in update $(elem + "-state-p").text( data[i].state ); @@ -219,7 +220,7 @@ ${liveInstance.name} (${liveInstance.credentials.name}) <a id="li-${i}-popup" class="popup-arrow" style="display: none;">▼</a> </td> - <td>${str(liveInstance.total_size)}</td> + <td id="${ liveInstance.id }-size">${str(liveInstance.total_size)}</td> <td id="${ liveInstance.id }-state">${str(liveInstance.state)}</td> <td id="${ liveInstance.id }-launch_time"> ##${str(liveInstance.launch_time)[:16]} @@ -251,7 +252,8 @@ <a class="action-button" confirm="Are you sure you want to stop instance '${liveInstance.name}'?" href="${h.url_for( action='stop', id=trans.security.encode_id(liveInstance.id) )}">Stop</a> <a class="action-button" href="${h.url_for( action='rename_uci', id=trans.security.encode_id(liveInstance.id) )}">Rename</a> <a class="action-button" href="${h.url_for( action='view_uci_details', id=trans.security.encode_id(liveInstance.id) )}">View details</a> - <a class="action-button" href="${h.url_for( action='uci_usage_report', id=trans.security.encode_id(liveInstance.id) )}">Usage report</a> + <a class="action-button" href="${h.url_for( action='add_storage', id=trans.security.encode_id(liveInstance.id) )}">Add storage</a> + <a class="action-button" href="${h.url_for( action='uci_usage_report', id=trans.security.encode_id(liveInstance.id) )}">Usage report</a> </div> </td> </tr> @@ -315,14 +317,13 @@ </td> <td> <div popupmenu="pi-${i}-popup"> - <a class="action-button" href="${h.url_for( action='start', id=trans.security.encode_id(prevInstance.id), type='m1.small' )}"> Start m1.small</a> + <a class="action-button" href="${h.url_for( action='uci_usage_report', id=trans.security.encode_id(prevInstance.id) )}">Usage report</a> + <a class="action-button" href="${h.url_for( action='rename_uci', id=trans.security.encode_id(prevInstance.id) )}">Rename</a> + <a class="action-button" href="${h.url_for( action='start', id=trans.security.encode_id(prevInstance.id), type='m1.small' )}"> Start m1.small</a> <a class="action-button" href="${h.url_for( action='start', id=trans.security.encode_id(prevInstance.id), type='c1.medium' )}"> Start c1.medium</a> - <a class="action-button" href="${h.url_for( action='rename_uci', id=trans.security.encode_id(prevInstance.id) )}">Rename</a> - <a class="action-button" href="${h.url_for( action='uci_usage_report', id=trans.security.encode_id(prevInstance.id) )}">Usage report</a> - <a class="action-button" href="${h.url_for( action='create_snapshot', id=trans.security.encode_id(prevInstance.id) )}">Create snapshot</a> + <a class="action-button" href="${h.url_for( action='create_snapshot', id=trans.security.encode_id(prevInstance.id) )}">Create snapshot</a> <a class="action-button" href="${h.url_for( action='view_snapshots', id=trans.security.encode_id(prevInstance.id) )}">View snapshots</a> - <a class="action-button" href="${h.url_for( action='add_storage', id=trans.security.encode_id(prevInstance.id) )}" target="_parent">Add storage</a> - <a class="action-button" confirm="Are you sure you want to delete instance '${prevInstance.name}'? This will delete all of your data assocaiated with this instance!" href="${h.url_for( action='delete_uci', id=trans.security.encode_id(prevInstance.id) )}">Delete</a> + <a class="action-button" confirm="Are you sure you want to delete instance '${prevInstance.name}'? This will delete all of your data assocaiated with this instance!" href="${h.url_for( action='delete_uci', id=trans.security.encode_id(prevInstance.id) )}">Delete</a> </div> </td> </tr>