commit/galaxy-central: jgoecks: Enable GFF filter attributes tool to accept arbitrary conditions.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/aab333e661a2/ changeset: r5501:aab333e661a2 user: jgoecks date: 2011-05-05 22:51:56 summary: Enable GFF filter attributes tool to accept arbitrary conditions. affected #: 5 files (1.3 KB) --- a/lib/galaxy/datatypes/interval.py Wed May 04 16:45:10 2011 -0400 +++ b/lib/galaxy/datatypes/interval.py Thu May 05 16:51:56 2011 -0400 @@ -889,7 +889,7 @@ MetadataElement( name="column_types", default=['str','str','str','int','int','float','str','int','list'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False ) MetadataElement( name="attributes", default=0, desc="Number of attributes", readonly=True, visible=False, no_value=0 ) - MetadataElement( name="attribute_types", default={}, desc="Attribute types", param=metadata.ColumnTypesParameter, readonly=True, visible=False, no_value=[] ) + MetadataElement( name="attribute_types", default={}, desc="Attribute types", param=metadata.DictParameter, readonly=True, visible=False, no_value=[] ) def sniff( self, filename ): """ --- a/lib/galaxy/datatypes/metadata.py Wed May 04 16:45:10 2011 -0400 +++ b/lib/galaxy/datatypes/metadata.py Thu May 05 16:51:56 2011 -0400 @@ -358,6 +358,11 @@ def to_string( self, value ): return ",".join( map( str, value ) ) + +class DictParameter( MetadataParameter ): + + def to_string( self, value ): + return simplejson.dumps( value ) class PythonObjectParameter( MetadataParameter ): --- a/tools/filters/gff/gff_filter_by_attribute.py Wed May 04 16:45:10 2011 -0400 +++ b/tools/filters/gff/gff_filter_by_attribute.py Thu May 05 16:51:56 2011 -0400 @@ -5,7 +5,9 @@ # abstracted and leveraged in each filtering tool. from __future__ import division -import sys, re, os.path +import sys +from galaxy import eggs +from galaxy.util.json import to_json_string, from_json_string # Older py compatibility try: @@ -15,6 +17,10 @@ assert sys.version_info[:2] >= ( 2, 4 ) +# +# Helper functions. +# + def get_operands( filter_condition ): # Note that the order of all_operators is important items_to_strip = ['+', '-', '**', '*', '//', '/', '%', '<<', '>>', '&', '|', '^', '~', '<=', '<', '>=', '>', '==', '!=', '<>', ' and ', ' or ', ' not ', ' is ', ' is not ', ' in ', ' not in '] @@ -28,12 +34,31 @@ sys.stderr.write( msg ) sys.exit() +def check_for_executable( text, description='' ): + # Attempt to determine if the condition includes executable stuff and, if so, exit. + secured = dir() + operands = get_operands( text ) + for operand in operands: + try: + check = int( operand ) + except: + if operand in secured: + stop_err( "Illegal value '%s' in %s '%s'" % ( operand, description, text ) ) + +# +# Process inputs. +# + in_fname = sys.argv[1] out_fname = sys.argv[2] -attribute_type = sys.argv[3] -attribute_name = sys.argv[4] -cond_text = sys.argv[5] +cond_text = sys.argv[3] +attribute_types = from_json_string( sys.argv[4] ) +# Convert types from str to type objects. +for name, a_type in attribute_types.items(): + check_for_executable(a_type) + attribute_types[ name ] = eval( a_type ) + # Unescape if input has been escaped mapped_str = { '__lt__': '<', @@ -47,26 +72,22 @@ } for key, value in mapped_str.items(): cond_text = cond_text.replace( key, value ) + +# Attempt to determine if the condition includes executable stuff and, if so, exit. +check_for_executable( cond_text, 'condition') + +# Prepare the column variable names and wrappers for column data types. Only +# prepare columns up to largest column in condition. +attrs, type_casts = [], [] +for name, attr_type in attribute_types.items(): + attrs.append( name ) + type_cast = "get_value('%(name)s', attribute_types['%(name)s'], attribute_values)" % ( {'name': name} ) + type_casts.append( type_cast ) -# Condition text is 'attribute meets condition.' -cond_text = attribute_name + cond_text +attr_str = ', '.join( attrs ) # 'c1, c2, c3, c4' +type_cast_str = ', '.join( type_casts ) # 'str(c1), int(c2), int(c3), str(c4)' +wrap = "%s = %s" % ( attr_str, type_cast_str ) -# Attempt to determine if the condition includes executable stuff and, if so, exit -secured = dir() -operands = get_operands(cond_text) -for operand in operands: - try: - check = int( operand ) - except: - if operand in secured: - stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) ) - -# Set up assignment. -assignment = "%s = attributes.get('%s', None)" % ( attribute_name, attribute_name ) - -# Set up type casting based on attribute type. -type_cast = "%s = %s(%s)" % ( attribute_name, attribute_type, attribute_name) - # Stats skipped_lines = 0 first_invalid_line = 0 @@ -74,6 +95,13 @@ lines_kept = 0 total_lines = 0 out = open( out_fname, 'wt' ) + +# Helper function to safely get and type cast a value in a dict. +def get_value(name, a_type, values_dict): + if name in values_dict: + return (a_type)(values_dict[ name ]) + else: + return None # Read and filter input file, skipping invalid lines code = ''' @@ -87,34 +115,30 @@ invalid_line = line continue try: - # GTF format: chrom source, name, chromStart, chromEnd, score, strand, frame, attributes. - # Attributes format: name1 "value1" ; name2 "value2" ; ... + # Place attribute values into variables with attribute + # name; type casting is done as well. elems = line.split( '\t' ) - attributes_list = elems[8].split(";") - attributes = {} - for name_value_pair in attributes_list: + attribute_values = {} + for name_value_pair in elems[8].split(";"): pair = name_value_pair.strip().split(" ") if pair == '': continue name = pair[0].strip() if name == '': continue - # Need to strip double quote from values - value = pair[1].strip(" \\"") - attributes[name] = value + # Need to strip double quote from value and typecast. + attribute_values[name] = pair[1].strip(" \\"") %s if %s: - %s - if %s: - lines_kept += 1 - print >> out, line + lines_kept += 1 + print >> out, line except Exception, e: + print e skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line -''' % ( assignment, attribute_name, type_cast, cond_text ) - +''' % ( wrap, cond_text ) valid_filter = True try: --- a/tools/filters/gff/gff_filter_by_attribute.xml Wed May 04 16:45:10 2011 -0400 +++ b/tools/filters/gff/gff_filter_by_attribute.xml Thu May 05 16:51:56 2011 -0400 @@ -1,23 +1,11 @@ <tool id="gff_filter_by_attribute" name="Filter GFF data by attribute" version="0.1"><description>using simple expressions</description><command interpreter="python"> - gff_filter_by_attribute.py $input $out_file1 "$attribute_type" "$attribute_name" "$cond" + gff_filter_by_attribute.py $input $out_file1 "$cond" '${input.metadata.attribute_types}' </command><inputs><param format="gff" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/> - <param name="attribute_name" type="select" label="Attribute name" help=""> - <options from_dataset="input"> - <column name="name" index="8"/> - <column name="value" index="8"/> - <filter type="attribute_value_splitter" pair_separator=";" column="8"/> - </options> - </param> - <param name="attribute_type" type="select" label="Attribute type"> - <option value="float">Float</option> - <option value="int">Integer</option> - <option value="str">String</option> - </param> - <param name="cond" size="40" type="text" value=">0" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool."> + <param name="cond" size="40" type="text" value="gene_id=='uc002loc.1'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool."><validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/></param></inputs> @@ -27,11 +15,14 @@ <tests><test><param name="input" value="gff_filter_attr_in1.gff"/> - <param name="attribute_name" value="conf_lo"/> - <param name="attribute_type" value="float"/> - <param name="cond" value=">0"/> + <param name="cond" value="conf_lo>0"/><output name="out_file1" file="gff_filter_by_attribute_out1.gff"/></test> + <test> + <param name="input" value="gff_filter_attr_in1.gff"/> + <param name="cond" value="conf_lo==0 or conf_hi>125"/> + <output name="out_file1" file="gff_filter_by_attribute_out2.gff"/> + </test></tests><help> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket