commit/galaxy-central: dannon: Strip trailing whitespace in data_providers
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/f33687b0e590/ Changeset: f33687b0e590 User: dannon Date: 2013-08-30 05:32:37 Summary: Strip trailing whitespace in data_providers Affected #: 1 file diff -r c05e0e9714f5bd2ecfa96cfd6b6f7152b815e82f -r f33687b0e59013a924a696f1c9630cae960b74a2 lib/galaxy/visualization/data_providers/basic.py --- a/lib/galaxy/visualization/data_providers/basic.py +++ b/lib/galaxy/visualization/data_providers/basic.py @@ -15,37 +15,37 @@ self.original_dataset = original_dataset self.dependencies = dependencies self.error_max_vals = error_max_vals - + def has_data( self, **kwargs ): """ Returns true if dataset has data in the specified genome window, false otherwise. """ raise Exception( "Unimplemented Function" ) - + def get_iterator( self, **kwargs ): """ Returns an iterator that provides data in the region chrom:start-end """ raise Exception( "Unimplemented Function" ) - + def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ): """ Process data from an iterator to a format that can be provided to client. """ - raise Exception( "Unimplemented Function" ) - + raise Exception( "Unimplemented Function" ) + def get_data( self, chrom, start, end, start_val=0, max_vals=sys.maxint, **kwargs ): - """ - Returns data as specified by kwargs. start_val is the first element to + """ + Returns data as specified by kwargs. start_val is the first element to return and max_vals indicates the number of values to return. - + Return value must be a dictionary with the following attributes: dataset_type, data """ iterator = self.get_iterator( chrom, start, end ) return self.process_data( iterator, start_val, max_vals, **kwargs ) - + def write_data_to_file( self, filename, **kwargs ): """ Write data in region defined by chrom, start, and end to a file. @@ -56,20 +56,20 @@ class ColumnDataProvider( BaseDataProvider ): """ Data provider for columnar data """ MAX_LINES_RETURNED = 30000 - + def __init__( self, original_dataset, max_lines_returned=MAX_LINES_RETURNED ): # Compatibility check. if not isinstance( original_dataset.datatype, Tabular ): raise Exception( "Data provider can only be used with tabular data" ) - + # Attribute init. self.original_dataset = original_dataset # allow throttling self.max_lines_returned = max_lines_returned - + def get_data( self, columns=None, start_val=0, max_vals=None, skip_comments=True, **kwargs ): """ - Returns data from specified columns in dataset. Format is list of lists + Returns data from specified columns in dataset. Format is list of lists where each list is a line of data. """ if not columns: @@ -81,20 +81,20 @@ max_vals = min([ max_vals, self.max_lines_returned ]) except ( ValueError, TypeError ): max_vals = self.max_lines_returned - + try: start_val = int( start_val ) start_val = max([ start_val, 0 ]) except ( ValueError, TypeError ): start_val = 0 - + # skip comment lines (if any/avail) # pre: should have original_dataset and if( skip_comments and self.original_dataset.metadata.comment_lines and start_val < self.original_dataset.metadata.comment_lines ): start_val = int( self.original_dataset.metadata.comment_lines ) - + # columns is an array of ints for now (should handle column names later) columns = from_json_string( columns ) for column in columns: @@ -103,7 +103,7 @@ "column index (%d) must be positive and less" % ( column ) + " than the number of columns: %d" % ( self.original_dataset.metadata.columns ) ) #print columns, start_val, max_vals, skip_comments, kwargs - + # set up the response, column lists response = {} response[ 'data' ] = data = [ [] for column in columns ] @@ -113,9 +113,9 @@ 'count' : 0, 'sum' : 0 } for column in columns ] - + column_types = [ self.original_dataset.metadata.column_types[ column ] for column in columns ] - + # function for casting by column_types def cast_val( val, type ): """ Cast value based on type. Return None if can't be cast """ @@ -126,12 +126,12 @@ try: val = float( val ) except: return None return val - + returning_data = False f = open( self.original_dataset.file_name ) #TODO: add f.seek if given fptr in kwargs for count, line in enumerate( f ): - + # check line v. desired start, end if count < start_val: continue @@ -139,7 +139,7 @@ break returning_data = True - + fields = line.split() fields_len = len( fields ) #NOTE: this will return None/null for abberrant column values (including bad indeces) @@ -149,39 +149,39 @@ if column < fields_len: column_val = cast_val( fields[ column ], column_type ) if column_val != None: - + # if numeric, maintain min, max, sum if( column_type == 'float' or column_type == 'int' ): if( ( meta[ index ][ 'min' ] == None ) or ( column_val < meta[ index ][ 'min' ] ) ): meta[ index ][ 'min' ] = column_val - + if( ( meta[ index ][ 'max' ] == None ) or ( column_val > meta[ index ][ 'max' ] ) ): meta[ index ][ 'max' ] = column_val - + meta[ index ][ 'sum' ] += column_val - + # maintain a count - for other stats meta[ index ][ 'count' ] += 1 data[ index ].append( column_val ) - + response[ 'endpoint' ] = dict( last_line=( count - 1 ), file_ptr=f.tell() ) f.close() if not returning_data: return None - + for index, meta in enumerate( response[ 'meta' ] ): column_type = column_types[ index ] count = meta[ 'count' ] - + if( ( column_type == 'float' or column_type == 'int' ) and count ): meta[ 'mean' ] = float( meta[ 'sum' ] ) / count - + sorted_data = sorted( response[ 'data' ][ index ] ) middle_index = ( count / 2 ) - 1 if count % 2 == 0: meta[ 'median' ] = ( ( sorted_data[ middle_index ] + sorted_data[( middle_index + 1 )] ) / 2.0 ) - + else: meta[ 'median' ] = sorted_data[ middle_index ] Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org