commit/galaxy-central: jgoecks: Subtract query tool: make ignoring empty end columns and whitespace optional.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/ce9789a35356/ changeset: ce9789a35356 user: jgoecks date: 2013-02-07 20:46:49 summary: Subtract query tool: make ignoring empty end columns and whitespace optional. affected #: 3 files diff -r fd82f2ff9533fb5e4ef37c0404ede7c58d9234c5 -r ce9789a35356da2b2ee4ae723506d5af57a0ce69 test-data/eq-showbeginning_e.dat --- /dev/null +++ b/test-data/eq-showbeginning_e.dat @@ -0,0 +1,10 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - diff -r fd82f2ff9533fb5e4ef37c0404ede7c58d9234c5 -r ce9789a35356da2b2ee4ae723506d5af57a0ce69 tools/new_operations/subtract_query.py --- a/tools/new_operations/subtract_query.py +++ b/tools/new_operations/subtract_query.py @@ -3,7 +3,8 @@ """ Subtract an entire query from another query -usage: %prog in_file_1 in_file_2 begin_col end_col output +usage: %prog in_file_1 in_file_2 begin_col end_col output + --ignore-empty-end-cols: ignore empty end columns when subtracting """ import sys, re from galaxy import eggs @@ -18,7 +19,7 @@ assert sys.version_info[:2] >= ( 2, 4 ) -def get_lines(fname, begin_col='', end_col=''): +def get_lines(fname, begin_col='', end_col='', ignore_empty_end_cols=False): lines = set([]) i = 0 for i, line in enumerate(file(fname)): @@ -29,12 +30,15 @@ try: line = line.split('\t') line = '\t'.join([line[j] for j in range(begin_col-1, end_col)]) - # removing empty fields, we do not compare empty fields at the end of a line. - line = line.rstrip() + if ignore_empty_end_cols: + # removing empty fields, we do not compare empty fields at the end of a line. + line = line.rstrip() lines.add( line ) except: pass else: - line = line.rstrip() + if ignore_empty_end_cols: + # removing empty fields, we do not compare empty fields at the end of a line. + line = line.rstrip() lines.add( line ) if i: return (i+1, lines) else: return (i, lines) @@ -83,9 +87,9 @@ lines1 is the set of unique lines in inp1_file diff1 is the number of duplicate lines removed from inp1_file """ - len1, lines1 = get_lines(inp1_file, begin_col, end_col) + len1, lines1 = get_lines(inp1_file, begin_col, end_col, options.ignore_empty_end_cols) diff1 = len1 - len(lines1) - len2, lines2 = get_lines(inp2_file, begin_col, end_col) + len2, lines2 = get_lines(inp2_file, begin_col, end_col, options.ignore_empty_end_cols) lines1.difference_update(lines2) """lines1 is now the set of unique lines in inp1_file - the set of unique lines in inp2_file""" diff -r fd82f2ff9533fb5e4ef37c0404ede7c58d9234c5 -r ce9789a35356da2b2ee4ae723506d5af57a0ce69 tools/new_operations/subtract_query.xml --- a/tools/new_operations/subtract_query.xml +++ b/tools/new_operations/subtract_query.xml @@ -1,11 +1,18 @@ -<tool id="subtract_query1" name="Subtract Whole Dataset"> +<tool id="subtract_query1" name="Subtract Whole Dataset" version="0.1"><description>from another dataset</description> - <command interpreter="python">subtract_query.py $input1 $input2 $begin_col $end_col $output</command> + <command interpreter="python"> + subtract_query.py $input1 $input2 $begin_col $end_col $output + #if str($ignore_empty_end_cols) == 'true': + --ignore-empty-end-cols + #end if + + </command><inputs><param format="txt" name="input2" type="data" label="Subtract" help="Second dataset" /><param format="txt" name="input1" type="data" label="from" help="First dataset" /><param name="begin_col" type="data_column" data_ref="input1" force_select="False" label="Restrict subtraction between 'begin column'" /><param name="end_col" type="data_column" data_ref="input1" force_select="False" label="and 'end column'" help="Specifying columns for restricting subtraction is available only for tabular formatted datasets" /> + <param name="ignore_empty_end_cols" type="boolean" label="Ignore empty columns and whitespace at end of line when subtracting"/></inputs><outputs><data format="input" name="output" metadata_source="input1" /> @@ -45,6 +52,15 @@ <param name="end_col" value="None" /><output name="output" file="subtract-query-4.dat" /></test> + <!-- Subtract 2 tabular files with no column restrictions, ignoring empty end columns. --> + <test> + <param name="input1" value="eq-showbeginning_e.dat" /> + <param name="input2" value="eq-showtail.dat" /> + <param name="begin_col" value="None" /> + <param name="end_col" value="None" /> + <param name="ignore_empty_end_cols" value="true" /> + <output name="output" file="subtract-query-2.dat" /> + </test></tests><help> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket