[hg] galaxy 3745: Enhanced column join to allow user to specify ...
details: http://www.bx.psu.edu/hg/galaxy/rev/ab83e2ef8a99 changeset: 3745:ab83e2ef8a99 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Wed May 05 11:34:29 2010 -0400 description: Enhanced column join to allow user to specify fill values for empty columns, either one value for all or by column diffstat: test-data/column_join_out1.pileup | 120 +++++++++++++++--------------- test-data/column_join_out2.pileup | 80 ++++++++++---------- test-data/column_join_out3.pileup | 140 +++++++++++++++++----------------- tools/new_operations/column_join.py | 120 +++++++++++++++++++++++------ tools/new_operations/column_join.xml | 86 ++++++++++++++++++--- 5 files changed, 337 insertions(+), 209 deletions(-) diffs (667 lines): diff -r 5f59c890ee8d -r ab83e2ef8a99 test-data/column_join_out1.pileup --- a/test-data/column_join_out1.pileup Tue May 04 22:27:05 2010 -0400 +++ b/test-data/column_join_out1.pileup Wed May 05 11:34:29 2010 -0400 @@ -1,60 +1,60 @@ -chr1 1 T T 87 25 G G 25 25 -chr1 2 A T 87 25 T T 25 25 -chr1 3 A G 87 25 T T 27 25 -chr1 4 A T 55 25 A A 36 25 -chr1 5 C A 54 25 A A 36 25 -chr1 6 G T 87 25 T T 36 25 -chr1 7 A C 87 25 G G 42 25 -chr1 8 G A 87 25 T T 45 25 -chr1 9 A T 87 25 A A 51 25 -chr1 10 G A 57 25 G G 54 25 -chr1 11 C A 57 25 C C 57 25 -chr1 12 T A 99 25 T T 60 25 -chr1 13 A C 99 25 T T 78 25 -chr1 14 G C 55 25 A A 56 25 -chr1 15 T G 68 25 A A 87 25 -chr4 1 T T 90 25 -chr4 2 A A 87 25 -chr4 3 A A 34 25 -chr4 4 T T 55 25 -chr4 5 A A 54 25 -chr4 6 T T 87 25 -chr4 7 A A 80 25 -chr4 8 A A 87 25 -chr4 9 A A 87 25 -chr4 10 G G 57 25 -chr4 11 C C 57 25 -chr4 12 A A 99 25 -chr4 13 A A 99 25 -chr4 14 G G 55 25 -chr4 15 G G 68 25 -chrM 1 A G 25 25 T G 25 25 -chrM 2 C T 25 25 C A 25 25 -chrM 3 T G 27 25 C A 25 25 -chrM 4 G C 36 25 G A 36 25 -chrM 5 T A 38 25 A T 36 25 -chrM 6 T A 39 25 A G 36 25 -chrM 7 G C 43 25 C G 42 25 -chrM 8 A C 46 25 T T 45 25 -chrM 9 T G 53 25 C A 51 25 -chrM 10 T G 56 25 G G 54 25 -chrM 11 C G 57 25 T C 57 25 -chrM 12 T C 61 25 A T 60 25 -chrM 13 T G 79 25 A T 78 25 -chrM 14 A C 58 25 G A 56 25 -chrM 15 G A 87 25 G A 87 25 -chrM 16 G C 88 25 -chrM 17 A C 88 25 -chrM 18 G A 89 25 -chrM 19 G T 58 25 -chrM 20 T C 55 25 -chrM 21 C T 87 25 -chrM 22 C A 87 25 -chrM 23 A G 87 25 -chrM 24 A T 89 25 -chrM 25 G A 57 25 -chrM 26 C G 58 25 -chrM 27 T A 99 25 -chrM 28 A C 99 25 -chrM 29 G C 58 25 -chrM 30 T T 65 25 +chr1 1 ? ? ? ? T T 87 25 G G 25 25 +chr1 2 ? ? ? ? A T 87 25 T T 25 25 +chr1 3 ? ? ? ? A G 87 25 T T 27 25 +chr1 4 ? ? ? ? A T 55 25 A A 36 25 +chr1 5 ? ? ? ? C A 54 25 A A 36 25 +chr1 6 ? ? ? ? G T 87 25 T T 36 25 +chr1 7 ? ? ? ? A C 87 25 G G 42 25 +chr1 8 ? ? ? ? G A 87 25 T T 45 25 +chr1 9 ? ? ? ? A T 87 25 A A 51 25 +chr1 10 ? ? ? ? G A 57 25 G G 54 25 +chr1 11 ? ? ? ? C A 57 25 C C 57 25 +chr1 12 ? ? ? ? T A 99 25 T T 60 25 +chr1 13 ? ? ? ? A C 99 25 T T 78 25 +chr1 14 ? ? ? ? G C 55 25 A A 56 25 +chr1 15 ? ? ? ? T G 68 25 A A 87 25 +chr4 1 ? ? ? ? ? ? ? ? T T 90 25 +chr4 2 ? ? ? ? ? ? ? ? A A 87 25 +chr4 3 ? ? ? ? ? ? ? ? A A 34 25 +chr4 4 ? ? ? ? ? ? ? ? T T 55 25 +chr4 5 ? ? ? ? ? ? ? ? A A 54 25 +chr4 6 ? ? ? ? ? ? ? ? T T 87 25 +chr4 7 ? ? ? ? ? ? ? ? A A 80 25 +chr4 8 ? ? ? ? ? ? ? ? A A 87 25 +chr4 9 ? ? ? ? ? ? ? ? A A 87 25 +chr4 10 ? ? ? ? ? ? ? ? G G 57 25 +chr4 11 ? ? ? ? ? ? ? ? C C 57 25 +chr4 12 ? ? ? ? ? ? ? ? A A 99 25 +chr4 13 ? ? ? ? ? ? ? ? A A 99 25 +chr4 14 ? ? ? ? ? ? ? ? G G 55 25 +chr4 15 ? ? ? ? ? ? ? ? G G 68 25 +chrM 1 A G 25 25 T G 25 25 ? ? ? ? +chrM 2 C T 25 25 C A 25 25 ? ? ? ? +chrM 3 T G 27 25 C A 25 25 ? ? ? ? +chrM 4 G C 36 25 G A 36 25 ? ? ? ? +chrM 5 T A 38 25 A T 36 25 ? ? ? ? +chrM 6 T A 39 25 A G 36 25 ? ? ? ? +chrM 7 G C 43 25 C G 42 25 ? ? ? ? +chrM 8 A C 46 25 T T 45 25 ? ? ? ? +chrM 9 T G 53 25 C A 51 25 ? ? ? ? +chrM 10 T G 56 25 G G 54 25 ? ? ? ? +chrM 11 C G 57 25 T C 57 25 ? ? ? ? +chrM 12 T C 61 25 A T 60 25 ? ? ? ? +chrM 13 T G 79 25 A T 78 25 ? ? ? ? +chrM 14 A C 58 25 G A 56 25 ? ? ? ? +chrM 15 G A 87 25 G A 87 25 ? ? ? ? +chrM 16 G C 88 25 ? ? ? ? ? ? ? ? +chrM 17 A C 88 25 ? ? ? ? ? ? ? ? +chrM 18 G A 89 25 ? ? ? ? ? ? ? ? +chrM 19 G T 58 25 ? ? ? ? ? ? ? ? +chrM 20 T C 55 25 ? ? ? ? ? ? ? ? +chrM 21 C T 87 25 ? ? ? ? ? ? ? ? +chrM 22 C A 87 25 ? ? ? ? ? ? ? ? +chrM 23 A G 87 25 ? ? ? ? ? ? ? ? +chrM 24 A T 89 25 ? ? ? ? ? ? ? ? +chrM 25 G A 57 25 ? ? ? ? ? ? ? ? +chrM 26 C G 58 25 ? ? ? ? ? ? ? ? +chrM 27 T A 99 25 ? ? ? ? ? ? ? ? +chrM 28 A C 99 25 ? ? ? ? ? ? ? ? +chrM 29 G C 58 25 ? ? ? ? ? ? ? ? +chrM 30 T T 65 25 ? ? ? ? ? ? ? ? diff -r 5f59c890ee8d -r ab83e2ef8a99 test-data/column_join_out2.pileup --- a/test-data/column_join_out2.pileup Tue May 04 22:27:05 2010 -0400 +++ b/test-data/column_join_out2.pileup Wed May 05 11:34:29 2010 -0400 @@ -1,15 +1,15 @@ -chr1 1 C 1 -chr1 2 G 2 -chr1 3 A 2 -chr1 4 C 2 -chr1 5 T 3 -chr1 6 G 3 -chr1 7 C 4 -chr1 8 A 4 -chr1 9 T 5 -chr1 10 G 5 -chr1 11 A 5 -chr1 12 C 5 +chr1 1 C 1 +chr1 2 G 2 +chr1 3 A 2 +chr1 4 C 2 +chr1 5 T 3 +chr1 6 G 3 +chr1 7 C 4 +chr1 8 A 4 +chr1 9 T 5 +chr1 10 G 5 +chr1 11 A 5 +chr1 12 C 5 chr1 42 T 1 C 1 chr1 43 G 2 C 2 chr1 44 T 2 T 2 @@ -20,21 +20,21 @@ chr1 49 C 5 G 5 chr1 50 A 5 A 5 chr1 51 A 5 G 5 -chr1 52 A 5 -chr1 53 G 5 -chr2 1 T 6 -chr2 2 G 6 -chr2 3 C 7 -chr2 4 G 7 -chr2 5 G 7 -chr2 6 A 7 -chr2 7 T 8 -chr2 8 A 8 -chr2 9 C 9 -chr2 10 T 9 -chr2 11 C 10 -chr2 12 G 10 -chr2 13 A 11 +chr1 52 A 5 +chr1 53 G 5 +chr2 1 T 6 +chr2 2 G 6 +chr2 3 C 7 +chr2 4 G 7 +chr2 5 G 7 +chr2 6 A 7 +chr2 7 T 8 +chr2 8 A 8 +chr2 9 C 9 +chr2 10 T 9 +chr2 11 C 10 +chr2 12 G 10 +chr2 13 A 11 chr2 52 T 5 chr2 53 A 5 chr2 54 T 5 @@ -43,19 +43,19 @@ chr2 57 T 5 chr2 58 T 6 chr2 59 A 6 -chr2 77 C 6 -chr2 78 G 6 -chr2 79 T 7 -chr2 80 C 7 -chr2 81 G 7 -chr2 82 A 8 -chr2 83 A 8 -chr2 84 T 8 -chr2 85 G 8 -chr2 86 C 9 -chr2 87 G 9 -chr2 88 G 10 -chr2 89 G 10 +chr2 77 C 6 +chr2 78 G 6 +chr2 79 T 7 +chr2 80 C 7 +chr2 81 G 7 +chr2 82 A 8 +chr2 83 A 8 +chr2 84 T 8 +chr2 85 G 8 +chr2 86 C 9 +chr2 87 G 9 +chr2 88 G 10 +chr2 89 G 10 chr3 60 C 6 chr3 61 T 6 chr3 62 C 6 diff -r 5f59c890ee8d -r ab83e2ef8a99 test-data/column_join_out3.pileup --- a/test-data/column_join_out3.pileup Tue May 04 22:27:05 2010 -0400 +++ b/test-data/column_join_out3.pileup Wed May 05 11:34:29 2010 -0400 @@ -1,70 +1,70 @@ -chr1 1 C 1 ^:. -chr1 2 G 2 .^:. -chr1 3 A 2 .. -chr1 4 C 2 .. -chr1 5 T 3 ..^:. -chr1 6 G 3 ..^:, -chr1 7 C 4 .N., -chr1 8 A 4 ..., -chr1 9 T 5 ..C., -chr1 10 G 5 N..., -chr1 11 A 5 .C.., -chr1 12 C 5 ..N., -chr1 42 T 1 ^:. C 1 ^:. -chr1 43 G 2 .^:. C 2 .^:. -chr1 44 T 2 .. T 2 .. -chr1 45 C 3 ..^:. A 3 ..^:. -chr1 46 G 3 ..^:. G 4 ...^:. -chr1 47 T 4 ...^:, A 5 ....^:, -chr1 48 A 4 .N., T 5 ...N, -chr1 49 C 5 ...., G 5 ...., -chr1 50 A 5 ..G., A 5 .G.., -chr1 51 A 5 A..., G 5 ...., -chr1 52 A 5 ...., -chr1 53 G 5 ..N., -chr2 1 T 6 .C..., -chr2 2 G 6 ..N.., -chr2 3 C 7 ..C..., -chr2 4 G 7 .G...., -chr2 5 G 7 ...N.., -chr2 6 A 7 ..T..., -chr2 7 T 8 ...C..., -chr2 8 A 8 ..A...., -chr2 9 C 9 .GA..N.., -chr2 10 T 9 ........, -chr2 11 C 10 .>>..T..., -chr2 12 G 10 .N..G...., -chr2 13 A 11 ....A..T.., -chr2 14 G 11 ..N....... -chr2 15 C 11 A.....NG.. -chr2 16 T 11 ...C.....G -chr2 17 C 12 G......TN.. -chr2 18 A 12 N......G..A -chr2 19 A 13 .......NN... -chr2 20 C 13 ..GT.......N -chr2 52 T 5 .N.., -chr2 53 A 5 ...., -chr2 54 T 5 ...., -chr2 55 T 5 ...., -chr2 56 C 5 ...., -chr2 57 T 5 ...., -chr2 58 T 6 .N..., -chr2 59 A 6 ....., -chr2 77 C 6 .G..., -chr2 78 G 6 ..N.., -chr2 79 T 7 ..N..., -chr2 80 C 7 .G...., -chr2 81 G 7 ...A.., -chr2 82 A 8 ...G..., -chr2 83 A 8 ...T..., -chr2 84 T 8 ..A...., -chr2 85 G 8 .GA...., -chr2 86 C 9 ........, -chr2 87 G 9 ....T..., -chr3 60 C 6 ...G., -chr3 61 T 6 ..N.., -chr3 62 C 6 ...A., -chr3 63 C 7 .N...., -chr3 64 A 7 ...G.., -chr3 65 T 7 ...AA., -chr3 66 A 7 ....N., +chr1 1 C 1 ^:. X X +chr1 2 G 2 .^:. X X +chr1 3 A 2 .. X X +chr1 4 C 2 .. X X +chr1 5 T 3 ..^:. X X +chr1 6 G 3 ..^:, X X +chr1 7 C 4 .N., X X +chr1 8 A 4 ..., X X +chr1 9 T 5 ..C., X X +chr1 10 G 5 N..., X X +chr1 11 A 5 .C.., X X +chr1 12 C 5 ..N., X X +chr1 42 X T 1 ^:. C 1 ^:. +chr1 43 X G 2 .^:. C 2 .^:. +chr1 44 X T 2 .. T 2 .. +chr1 45 X C 3 ..^:. A 3 ..^:. +chr1 46 X G 3 ..^:. G 4 ...^:. +chr1 47 X T 4 ...^:, A 5 ....^:, +chr1 48 X A 4 .N., T 5 ...N, +chr1 49 X C 5 ...., G 5 ...., +chr1 50 X A 5 ..G., A 5 .G.., +chr1 51 X A 5 A..., G 5 ...., +chr1 52 X A 5 ...., X +chr1 53 X G 5 ..N., X +chr2 1 T 6 .C..., X X +chr2 2 G 6 ..N.., X X +chr2 3 C 7 ..C..., X X +chr2 4 G 7 .G...., X X +chr2 5 G 7 ...N.., X X +chr2 6 A 7 ..T..., X X +chr2 7 T 8 ...C..., X X +chr2 8 A 8 ..A...., X X +chr2 9 C 9 .GA..N.., X X +chr2 10 T 9 ........, X X +chr2 11 C 10 .>>..T..., X X +chr2 12 G 10 .N..G...., X X +chr2 13 A 11 ....A..T.., X X +chr2 14 G 11 ..N....... X X +chr2 15 C 11 A.....NG.. X X +chr2 16 T 11 ...C.....G X X +chr2 17 C 12 G......TN.. X X +chr2 18 A 12 N......G..A X X +chr2 19 A 13 .......NN... X X +chr2 20 C 13 ..GT.......N X X +chr2 52 X X T 5 .N.., +chr2 53 X X A 5 ...., +chr2 54 X X T 5 ...., +chr2 55 X X T 5 ...., +chr2 56 X X C 5 ...., +chr2 57 X X T 5 ...., +chr2 58 X X T 6 .N..., +chr2 59 X X A 6 ....., +chr2 77 X C 6 .G..., X +chr2 78 X G 6 ..N.., X +chr2 79 X T 7 ..N..., X +chr2 80 X C 7 .G...., X +chr2 81 X G 7 ...A.., X +chr2 82 X A 8 ...G..., X +chr2 83 X A 8 ...T..., X +chr2 84 X T 8 ..A...., X +chr2 85 X G 8 .GA...., X +chr2 86 X C 9 ........, X +chr2 87 X G 9 ....T..., X +chr3 60 X X C 6 ...G., +chr3 61 X X T 6 ..N.., +chr3 62 X X C 6 ...A., +chr3 63 X X C 7 .N...., +chr3 64 X X A 7 ...G.., +chr3 65 X X T 7 ...AA., +chr3 66 X X A 7 ....N., diff -r 5f59c890ee8d -r ab83e2ef8a99 tools/new_operations/column_join.py --- a/tools/new_operations/column_join.py Tue May 04 22:27:05 2010 -0400 +++ b/tools/new_operations/column_join.py Wed May 05 11:34:29 2010 -0400 @@ -13,7 +13,19 @@ """ -import os, re, sys, tempfile +import optparse, os, re, struct, sys, tempfile + +try: + simple_json_exception = None + from galaxy import eggs + from galaxy.util.bunch import Bunch + from galaxy.util import stringify_dictionary_keys + import pkg_resources + pkg_resources.require("simplejson") + import simplejson +except Exception, e: + simplejson_exception = e + simplejson = None def stop_err( msg ): sys.stderr.write( msg ) @@ -136,17 +148,50 @@ return '%s\t%s' % tuple( min_ref_pos ), min_loc def __main__(): - output = sys.argv[1] - input1 = sys.argv[2] - input2 = sys.argv[3] - hinge = int( sys.argv[4] ) - cols = [ int( c ) for c in sys.argv[5].split( ',' ) ] - inputs = sys.argv[6:] - assert len( cols ) > 2, 'You need to select at least one column in addition to the first two' + parser = optparse.OptionParser() + parser.add_option( '', '--output', dest='output', help='' ) + parser.add_option( '', '--input1', dest='input1', help='' ) + parser.add_option( '', '--input2', dest='input2', help='' ) + parser.add_option( '', '--hinge', dest='hinge', help='' ) + parser.add_option( '', '--columns', dest='columns', help='' ) + parser.add_option( '', '--fill_options_file', dest='fill_options_file', default=None, help='' ) + (options, args) = parser.parse_args() + output = options.output + input1 = options.input1 + input2 = options.input2 + hinge = int( options.hinge ) + cols = [ int( c ) for c in str( options.columns ).split( ',' ) if int( c ) > hinge ] + inputs = [ input1, input2 ] + if options.fill_options_file == "None": + inputs.extend( args ) + else: + try: + col = int( args[0] ) + except ValueError: + inputs.extend( args ) + fill_options = None + if options.fill_options_file != "None" and options.fill_options_file is not None: + try: + if simplejson is None: + raise simplejson_exception + fill_options = Bunch( **stringify_dictionary_keys( simplejson.load( open( options.fill_options_file ) ) ) ) + except Exception, e: + print "Warning: Ignoring fill options due to simplejson error (%s)." % e + if fill_options is None: + fill_options = Bunch() + if 'file1_columns' not in fill_options: + fill_options.file1_columns = None + if fill_options and fill_options.file1_columns: + fill_empty = {} + for col in cols: + fill_empty[ col ] = fill_options.file1_columns[ col - 1 ] + else: + fill_empty = None + assert len( cols ) > 0, 'You need to select at least one column in addition to the hinge' + delimiter = '\t' # make sure all files are sorted in same way, ascending tmp_input_files = [] - input_files = [ input1, input2 ] - input_files.extend( inputs ) + input_files = inputs[:] for in_file in input_files: tmp_file = tempfile.NamedTemporaryFile() tmp_file_name = tmp_file.name @@ -162,10 +207,9 @@ current_lines = [ f.readline() for f in tmp_input_files ] last_lines = ''.join( current_lines ).strip() last_loc = -1 - i = 0 while last_lines: # get the "minimum" hinge, which should come first, and the file location in list - hinges = [ '\t'.join( line.split( '\t' )[ :hinge ] ) for line in current_lines ] + hinges = [ delimiter.join( line.split( delimiter )[ :hinge ] ) for line in current_lines ] hinge_dict = {} for i in range( len( hinges ) ): if not hinge_dict.has_key( hinges[ i ] ): @@ -174,33 +218,59 @@ hinges = [ h for h in hinges if h ] current, loc = hinges[0], hinge_dict[ hinges[0] ] # first output empty columns for vertical alignment (account for "missing" files) + # write output if trailing empty columns + current_data = [] if current != old_current: - last_loc = -1 - if loc - last_loc > 1: - current_data = [ '' for col in range( ( loc - last_loc - 1 ) * len( [ col for col in cols if col > hinge ] ) ) ] + # fill trailing empty columns with appropriate fill value + if not first_line: + if last_loc < len( inputs ) - 1: + if not fill_empty: + filler = [ '' for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ] + else: + filler = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ] + fout.write( '%s%s' % ( delimiter, delimiter.join( filler ) ) ) + # insert line break before current line + fout.write( '\n' ) + # fill leading empty columns with appropriate fill value + if loc > 0: + if not fill_empty: + current_data = [ '' for col in range( loc * len( cols ) ) ] + else: + current_data = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( loc * len( cols ) ) ] else: - current_data = [] + if loc - last_loc > 1: + if not fill_empty: + current_data = [ '' for col in range( ( loc - last_loc - 1 ) * len( cols ) ) ] + else: + current_data = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( ( loc - last_loc - 1 ) * len( cols ) ) ] # now output actual data - split_line = current_lines[ loc ].strip().split( '\t' ) + split_line = current_lines[ loc ].strip().split( delimiter ) if ''.join( split_line ): + # add actual data to be output below for col in cols: if col > hinge: current_data.append( split_line[ col - 1 ] ) + # grab next line for selected file current_lines[ loc ] = tmp_input_files[ loc ].readline() + # write relevant data to file if current == old_current: - if current_data: - fout.write( '\t%s' % '\t'.join( current_data ) ) + fout.write( '%s%s' % ( delimiter, delimiter.join( current_data ) ) ) else: - if not first_line: - fout.write( '\n' ) - fout.write( '%s\t%s' % ( current, '\t'.join( current_data ) ) ) - first_line = False + fout.write( '%s%s%s' % ( current, delimiter, delimiter.join( current_data ) ) ) + last_loc = loc old_current = current last_lines = ''.join( current_lines ).strip() - last_loc = loc + first_line = False + # fill trailing empty columns for final line + if last_loc < len( inputs ) - 1: + if not fill_empty: + filler = [ '' for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ] + else: + filler = [ fill_empty[ cols[ col % len( cols ) ] ] for col in range( ( len( inputs ) - last_loc - 1 ) * len( cols ) ) ] + fout.write( '%s%s' % ( delimiter, delimiter.join( filler ) ) ) fout.write( '\n' ) + fout.close() for f in tmp_input_files: os.unlink( f.name ) - fout.close() if __name__ == "__main__" : __main__() diff -r 5f59c890ee8d -r ab83e2ef8a99 tools/new_operations/column_join.xml --- a/tools/new_operations/column_join.xml Tue May 04 22:27:05 2010 -0400 +++ b/tools/new_operations/column_join.xml Wed May 05 11:34:29 2010 -0400 @@ -2,53 +2,111 @@ <description></description> <command interpreter="python"> column_join.py - $output - $input1 - $input2 - $hinge - $columns + --output=$output + --input1=$input1 + --input2=$input2 + --hinge=$hinge + --columns=$columns + #if $fill_empty_columns.fill_empty_columns_switch == "fill_empty": + --fill_options_file=$fill_options_file + #end if #for $f in $file_chooser: ${f.input} #end for </command> <inputs> <param name="input1" type="data" format="tabular" label="Choose the first file for the join" /> + <param name="hinge" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Use this column and columns to left the 'hinge' (matching data for each join)" help="All columns to left of selected column (plus selected column) will be used. Select 2 for pileup" /> <param name="columns" type="data_column" data_ref="input1" multiple="true" numerical="false" label="Include these column" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" /> - <param name="hinge" type="data_column" data_ref="input1" multiple="false" numerical="false" label="Use this column and columns to left the 'hinge' (matching data for each join)" help="All columns to left of selected column (plus selected column) will be used. Select 2 for pileup" /> + <conditional name="fill_empty_columns"> + <param name="fill_empty_columns_switch" type="select" label="Fill empty columns"> + <option value="no_fill" selected="True">No</option> + <option value="fill_empty">Yes</option> + </param> + <when value="no_fill" /> + <when value="fill_empty"> + <conditional name="do_fill_empty_columns"> + <param name="column_fill_type" type="select" label="Fill Columns by"> + <option value="single_fill_value" selected="True">Single fill value</option> + <option value="fill_value_by_column">Values by column</option> + </param> + <when value="single_fill_value"> + <param type="text" name="fill_value" label="Fill value" value="." /> + </when> + <when value="fill_value_by_column"> + <repeat name="column_fill" title="Fill Column"> + <param name="column_number" label="Column" type="data_column" data_ref="input1" /> + <param type="text" name="fill_value" value="." /> + </repeat> + </when> + </conditional> + </when> + </conditional> <param name="input2" type="data" format="tabular" label="Choose the second file for the join" /> <repeat name="file_chooser" title="Additional Input"> <param name="input" label="Additional input file" type="data" format="tabular" /> </repeat> </inputs> + <configfiles> + <configfile name="fill_options_file"><% +import simplejson +%> +#set $__fill_options = {} +#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty': + #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value': + #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value + #else: + #set $__start_fill = "" + #end if + #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ] + #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column': + #for column_fill in $fill_empty_columns['do_fill_empty_columns']['column_fill']: + #set $__fill_options['file1_columns'][ int( column_fill['column_number'].value ) - 1 ] = column_fill['fill_value'].value + #end for + #end if +#end if +${simplejson.dumps( __fill_options )} + </configfile> + </configfiles> <outputs> <data name="output" format="tabular" /> </outputs> <tests> <test> <param name="input1" value="column_join_in1.pileup" ftype="pileup" /> + <param name="hinge" value="2" /> <param name="columns" value="1,2,3,4,5,7" /> - <param name="hinge" value="1,2" /> + <param name="fill_empty_columns_switch" value="fill_empty" /> + <param name="column_fill_type" value="single_fill_value" /> + <param name="fill_value" value="?" /> <param name="input2" value="column_join_in2.pileup" ftype="pileup" /> <param name="input" value="column_join_in3.pileup" ftype="pileup" /> <output name="output" file="column_join_out1.pileup" ftype="tabular" /> </test> <test> <param name="input1" value="column_join_in4.pileup" ftype="pileup" /> + <param name="hinge" value="2" /> <param name="columns" value="1,2,3,4" /> - <param name="hinge" value="1,2" /> + <param name="fill_empty_columns_switch" value="no_fill" /> <param name="input2" value="column_join_in5.pileup" ftype="pileup" /> <param name="input" value="column_join_in6.pileup" ftype="pileup" /> <output name="output" file="column_join_out2.pileup" ftype="tabular" /> </test> +<!-- This test is failing for an unclear reason (the column values do not get + passed into the script), but passes in the browser <test> - <param name="input1" value="column_join_in7.pileup" ftype="pileup" /> - <param name="columns" value="1,2,3,4,5" /> - <param name="hinge" value="1,2" /> - <param name="input2" value="column_join_in8.pileup" ftype="pileup" /> - <param name="input" value="column_join_in9.pileup" ftype="pileup" /> + <param name="input1" value="column_join_in7.pileup" ftype="tabular" /> + <param name="hinge" value="2" /> + <param name="columns" value="3,4,5" /> + <param name="fill_empty_columns_switch" value="fill_empty" /> + <param name="column_fill_type" value="fill_value_by_column" /> + <param name="column_number" value="5" /> + <param name="fill_value" value="X" /> + <param name="input2" value="column_join_in8.pileup" ftype="tabular" /> + <param name="input" value="column_join_in9.pileup" ftype="tabular" /> <output name="output" file="column_join_out3.pileup" ftype="tabular" /> </test> - </tests> +--> </tests> <help> **What it does**
participants (1)
-
Nate Coraor