commit/galaxy-central: richard_burhans: allow limiting the number of data lines used to determine column data types while still counting all data lines

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/29f860135720/ changeset: 29f860135720 user: richard_burhans date: 2012-03-26 19:55:05 summary: allow limiting the number of data lines used to determine column data types while still counting all data lines affected #: 1 file diff -r 6b8535e5b03001cc25e55f05b391fafacbf56b8e -r 29f8601357203ad282f3848b2fa2c144ef6980f2 lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py +++ b/lib/galaxy/datatypes/tabular.py @@ -26,7 +26,7 @@ def init_meta( self, dataset, copy_from=None ): data.Text.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = 100000, **kwd ): + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = 100000, max_guess_type_data_lines = None, **kwd ): """ Tries to determine the number of columns as well as those columns that contain numerical values in the dataset. A skip parameter is @@ -116,13 +116,14 @@ comment_lines += 1 else: data_lines += 1 - fields = line.split( '\t' ) - for field_count, field in enumerate( fields ): - if field_count >= len( column_types ): #found a previously unknown column, we append None - column_types.append( None ) - column_type = guess_column_type( field ) - if type_overrules_type( column_type, column_types[field_count] ): - column_types[field_count] = column_type + if max_guess_type_data_lines is None or data_lines <= max_guess_type_data_lines: + fields = line.split( '\t' ) + for field_count, field in enumerate( fields ): + if field_count >= len( column_types ): #found a previously unknown column, we append None + column_types.append( None ) + column_type = guess_column_type( field ) + if type_overrules_type( column_type, column_types[field_count] ): + column_types[field_count] = column_type if i == 0 and requested_skip is None: # This is our first line, people seem to like to upload files that have a header line, but do not # start with '#' (i.e. all column types would then most likely be detected as str). We will assume Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket