details: http://www.bx.psu.edu/hg/galaxy/rev/74e392ea6ae8 changeset: 1643:74e392ea6ae8 user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Dec 02 10:50:54 2008 -0500 description: Call set_size() before set_peek() on datasets, changes to set_peek() to not read entire files for some datatypes. 14 file(s) affected in this change: lib/galaxy/datatypes/images.py lib/galaxy/datatypes/qualityscore.py lib/galaxy/datatypes/registry.py lib/galaxy/datatypes/sequence.py lib/galaxy/model/__init__.py lib/galaxy/tools/__init__.py lib/galaxy/tools/actions/upload.py lib/galaxy/web/controllers/root.py tools/data_source/encode_import_code.py tools/data_source/gbrowse_filter_code.py tools/data_source/hbvar_filter.py tools/data_source/microbial_import_code.py tools/data_source/ucsc_filter.py tools/maf/maf_to_bed_code.py diffs (385 lines): diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/datatypes/images.py Tue Dec 02 10:50:54 2008 -0500 @@ -15,8 +15,8 @@ file_ext = "ab1" def set_peek( self, dataset ): export_url = "/history_add_to?"+urlencode({'history_id':dataset.history_id,'ext':'ab1','name':'ab1 sequence','info':'Sequence file','dbkey':dataset.dbkey}) - dataset.peek = "Binary ab1 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) - dataset.blurb = "Binary ab1 sequence file" + dataset.peek = "Binary ab1 sequence file" + dataset.blurb = data.nice_size( dataset.get_size() ) def display_peek(self, dataset): try: return dataset.peek @@ -28,8 +28,8 @@ file_ext = "scf" def set_peek( self, dataset ): export_url = "/history_add_to?"+urlencode({'history_id':dataset.history_id,'ext':'scf','name':'scf sequence','info':'Sequence file','dbkey':dataset.dbkey}) - dataset.peek = "Binary scf sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) - dataset.blurb = "Binary scf sequence file" + dataset.peek = "Binary scf sequence file" + dataset.blurb = data.nice_size( dataset.get_size() ) def display_peek(self, dataset): try: return dataset.peek @@ -42,8 +42,8 @@ def set_peek( self, dataset ): zip_file = zipfile.ZipFile( dataset.file_name, "r" ) num_files = len( zip_file.namelist() ) - dataset.peek = "Binary sequence file archive (%s)" % ( data.nice_size( dataset.get_size() ) ) - dataset.blurb = 'Zip archive of %s binary sequence files' % ( str( num_files ) ) + dataset.peek = "Archive of %s binary sequence files" % ( str( num_files ) ) + dataset.blurb = data.nice_size( dataset.get_size() ) def display_peek(self, dataset): try: return dataset.peek @@ -59,8 +59,8 @@ def set_peek( self, dataset ): zip_file = zipfile.ZipFile( dataset.file_name, "r" ) num_files = len( zip_file.namelist() ) - dataset.peek = "Text sequence file archive (%s)" % ( data.nice_size( dataset.get_size() ) ) - dataset.blurb = 'Zip archive of %s text sequence files' % ( str( num_files ) ) + dataset.peek = "Archive of %s text sequence files" % ( str( num_files ) ) + dataset.blurb = data.nice_size( dataset.get_size() ) def display_peek(self, dataset): try: return dataset.peek @@ -73,8 +73,8 @@ class Image( data.Data ): """Class describing an image""" def set_peek( self, dataset ): - dataset.peek = 'Image in %s format (%s)' % ( dataset.extension, data.nice_size( dataset.get_size() ) ) - dataset.blurb = 'image' + dataset.peek = 'Image in %s format' % dataset.extension + dataset.blurb = data.nice_size( dataset.get_size() ) def create_applet_tag_peek( class_name, archive, params ): text = """ @@ -147,7 +147,7 @@ """Class describing an html file""" file_ext = "html" def set_peek( self, dataset ): - dataset.peek = "HTML file (%s)" % ( data.nice_size( dataset.get_size() ) ) + dataset.peek = "HTML file" dataset.blurb = data.nice_size( dataset.get_size() ) def get_mime(self): """Returns the mime type of the datatype""" diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/datatypes/qualityscore.py --- a/lib/galaxy/datatypes/qualityscore.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/datatypes/qualityscore.py Tue Dec 02 10:50:54 2008 -0500 @@ -18,7 +18,7 @@ def set_peek( self, dataset, line_count=None ): dataset.peek = data.get_file_peek( dataset.file_name ) if line_count is None: - dataset.blurb = "%s lines, Quality score file" % util.commaify( str( data.get_line_count( dataset.file_name ) ) ) + dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.blurb = "%s lines, Quality score file" % util.commaify( str( line_count ) ) diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/datatypes/registry.py Tue Dec 02 10:50:54 2008 -0500 @@ -191,6 +191,7 @@ # being converted *to* will handle any metadata copying and # initialization. if data.has_data(): + data.set_size() data.init_meta( copy_from=data ) data.set_meta( overwrite = False ) data.set_peek() diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/datatypes/sequence.py Tue Dec 02 10:50:54 2008 -0500 @@ -32,17 +32,7 @@ def set_peek( self, dataset ): dataset.peek = data.get_file_peek( dataset.file_name ) - count = size = 0 - for line in file( dataset.file_name ): - if line and line[0] == ">": - count += 1 - else: - line = line.strip() - size += len(line) - if count == 1: - dataset.blurb = '%d bases' % size - else: - dataset.blurb = '%d sequences' % count + dataset.blurb = data.nice_size( dataset.get_size() ) def sniff(self, filename): """ @@ -96,17 +86,7 @@ def set_peek( self, dataset ): dataset.peek = data.get_file_peek( dataset.file_name ) - count = size = 0 - for line in file( dataset.file_name ): - if line and line[0] == ">": - count += 1 - else: - line = line.strip() - size += len(line) - if count == 1: - dataset.blurb = '%d bases' % size - else: - dataset.blurb = '%d sequences' % count + dataset.blurb = data.nice_size( dataset.get_size() ) def sniff( self, filename ): """ @@ -128,18 +108,7 @@ def set_peek( self, dataset ): dataset.peek = data.get_file_peek( dataset.file_name ) - count = size = 0 - bases_regexp = re.compile("^[NGTAC]*$") - for i, line in enumerate(file( dataset.file_name )): - if line and line[0] == "@" and i % 4 == 0: - count += 1 - elif bases_regexp.match(line): - line = line.strip() - size += len(line) - if count == 1: - dataset.blurb = '%d bases' % size - else: - dataset.blurb = '%d sequences' % count + dataset.blurb = data.nice_size( dataset.get_size() ) def sniff( self, filename ): """ diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/model/__init__.py Tue Dec 02 10:50:54 2008 -0500 @@ -289,6 +289,7 @@ def copy( self, copy_children = False, parent_id = None ): des = HistoryDatasetAssociation( hid=self.hid, name=self.name, info=self.info, blurb=self.blurb, peek=self.peek, extension=self.extension, dbkey=self.dbkey, dataset = self.dataset, visible=self.visible, deleted=self.deleted, parent_id=parent_id, copied_from_history_dataset_association = self ) des.flush() + des.set_size() des.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id if copy_children: for child in self.children: @@ -467,7 +468,8 @@ def set_size( self ): """Returns the size of the data on disk""" try: - self.file_size = os.path.getsize( self.file_name ) + if not self.file_size: + self.file_size = os.path.getsize( self.file_name ) except OSError: self.file_size = 0 def has_data( self ): diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/tools/__init__.py Tue Dec 02 10:50:54 2008 -0500 @@ -1189,6 +1189,7 @@ # tag set in the tool config. if self.tool_type == 'data_source': name, data = out_data.items()[0] + data.set_size() if data.state == data.states.OK: data.name = param_dict.get( 'name', data.name ) data.info = param_dict.get( 'info', data.name ) @@ -1202,7 +1203,6 @@ if data.missing_meta(): data = app.datatypes_registry.change_datatype( data, 'tabular' ) data.set_peek() - data.set_size() data.flush() def collect_associated_files( self, output ): @@ -1234,12 +1234,12 @@ # Move data from temp location to dataset location shutil.move( filename, child_dataset.file_name ) child_dataset.flush() + child_dataset.set_size() child_dataset.name = "Secondary Dataset (%s)" % ( designation ) child_dataset.state = child_dataset.states.OK child_dataset.init_meta() child_dataset.set_meta() child_dataset.set_peek() - child_dataset.set_size() child_dataset.flush() # Add child to return dict children[name][designation] = child_dataset @@ -1270,13 +1270,13 @@ primary_data.flush() # Move data from temp location to dataset location shutil.move( filename, primary_data.file_name ) + primary_data.set_size() primary_data.name = dataset.name primary_data.info = dataset.info primary_data.state = primary_data.states.OK primary_data.init_meta( copy_from=dataset ) primary_data.set_meta() primary_data.set_peek() - primary_data.set_size() primary_data.flush() outdata.history.add_dataset( primary_data ) # Add dataset to return dict diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/tools/actions/upload.py --- a/lib/galaxy/tools/actions/upload.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/tools/actions/upload.py Tue Dec 02 10:50:54 2008 -0500 @@ -210,6 +210,7 @@ data.flush() shutil.move( temp_name, data.file_name ) data.state = data.states.OK + data.set_size() data.init_meta() if self.line_count is not None: try: @@ -218,7 +219,6 @@ data.set_peek() else: data.set_peek() - data.set_size() # validate incomming data """ diff -r 499eed864ab3 -r 74e392ea6ae8 lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Mon Dec 01 14:07:36 2008 -0500 +++ b/lib/galaxy/web/controllers/root.py Tue Dec 02 10:50:54 2008 -0500 @@ -647,13 +647,13 @@ data_file.write( file_data.file.read() ) data_file.close() data.state = data.states.OK + data.set_size() data.init_meta() data.set_meta() data.flush() history.add_dataset( data ) history.flush() data.set_peek() - data.set_size() data.flush() trans.log_event("Added dataset %d to history %d" %(data.id, trans.history.id)) if self.app.memory_usage: diff -r 499eed864ab3 -r 74e392ea6ae8 tools/data_source/encode_import_code.py --- a/tools/data_source/encode_import_code.py Mon Dec 01 14:07:36 2008 -0500 +++ b/tools/data_source/encode_import_code.py Tue Dec 02 10:50:54 2008 -0500 @@ -19,6 +19,7 @@ dbkey = fields[2] file_type = fields[3] name, data = out_data.items()[0] + data.set_size() basic_name = data.name data.name = data.name + " (" + description + ")" data.dbkey = dbkey @@ -26,7 +27,6 @@ data = app.datatypes_registry.change_datatype( data, file_type ) data.init_meta() data.set_peek() - data.set_size() app.model.flush() elif fields[0] == "#NewFile": description = fields[1] @@ -34,6 +34,7 @@ filepath = fields[3] file_type = fields[4] newdata = app.model.HistoryDatasetAssociation( create_dataset = True ) #This import should become a library + newdata.set_size() newdata.extension = file_type newdata.name = basic_name + " (" + description + ")" history.add_dataset( newdata ) @@ -48,5 +49,4 @@ newdata.dbkey = dbkey newdata.set_meta() newdata.set_peek() - newdata.set_size() app.model.flush() diff -r 499eed864ab3 -r 74e392ea6ae8 tools/data_source/gbrowse_filter_code.py --- a/tools/data_source/gbrowse_filter_code.py Mon Dec 01 14:07:36 2008 -0500 +++ b/tools/data_source/gbrowse_filter_code.py Tue Dec 02 10:50:54 2008 -0500 @@ -24,11 +24,11 @@ def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ): """Verifies the data after the run""" name, data = out_data.items()[0] + data.set_size() if data.state == data.states.OK: data.info = data.name if data.extension == 'txt': data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) data = app.datatypes_registry.change_datatype( data, data_type ) data.set_peek() - data.set_size() data.flush() diff -r 499eed864ab3 -r 74e392ea6ae8 tools/data_source/hbvar_filter.py --- a/tools/data_source/hbvar_filter.py Mon Dec 01 14:07:36 2008 -0500 +++ b/tools/data_source/hbvar_filter.py Tue Dec 02 10:50:54 2008 -0500 @@ -71,6 +71,6 @@ else: data = app.datatypes_registry.change_datatype(data, 'tabular') + data.set_size() data.set_peek() - data.set_size() data.flush() diff -r 499eed864ab3 -r 74e392ea6ae8 tools/data_source/microbial_import_code.py --- a/tools/data_source/microbial_import_code.py Mon Dec 01 14:07:36 2008 -0500 +++ b/tools/data_source/microbial_import_code.py Tue Dec 02 10:50:54 2008 -0500 @@ -114,6 +114,7 @@ dbkey = fields[3] file_type = fields[4] name, data = out_data.items()[0] + data.set_size() basic_name = data.name data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")" data.dbkey = dbkey @@ -121,7 +122,6 @@ data = app.datatypes_registry.change_datatype( data, file_type ) data.init_meta() data.set_peek() - data.set_size() app.model.flush() elif fields[0] == "#NewFile": description = fields[1] @@ -130,6 +130,7 @@ filepath = fields[4] file_type = fields[5] newdata = app.model.HistoryDatasetAssociation( create_dataset = True ) #This import should become a library + newdata.set_size() newdata.extension = file_type newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")" newdata.flush() @@ -145,5 +146,4 @@ newdata.dbkey = dbkey newdata.init_meta() newdata.set_peek() - newdata.set_size() app.model.flush() diff -r 499eed864ab3 -r 74e392ea6ae8 tools/data_source/ucsc_filter.py --- a/tools/data_source/ucsc_filter.py Mon Dec 01 14:07:36 2008 -0500 +++ b/tools/data_source/ucsc_filter.py Tue Dec 02 10:50:54 2008 -0500 @@ -43,6 +43,7 @@ """Verifies the data after the run""" items = out_data.items() for name, data in items: + data.set_size() try: err_msg, err_flag = 'Errors:', False line_count = 0 @@ -65,4 +66,3 @@ except Exception, exc: data.info = data.info + "\n" + str(exc) data.blurb = "error" - data.set_size() diff -r 499eed864ab3 -r 74e392ea6ae8 tools/maf/maf_to_bed_code.py --- a/tools/maf/maf_to_bed_code.py Mon Dec 01 14:07:36 2008 -0500 +++ b/tools/maf/maf_to_bed_code.py Tue Dec 02 10:50:54 2008 -0500 @@ -29,6 +29,7 @@ filepath = fields[2] newdata = app.model.HistoryDatasetAssociation( create_dataset = True ) + newdata.set_size() newdata.extension = "bed" newdata.name = basic_name + " (" + dbkey + ")" newdata.flush() @@ -46,7 +47,6 @@ newdata.dbkey = dbkey newdata.init_meta() newdata.set_peek() - newdata.set_size() app.model.flush() output_data_list.append(newdata) else: