details: http://www.bx.psu.edu/hg/galaxy/rev/8bc85721cbce changeset: 3021:8bc85721cbce user: rc date: Thu Nov 12 13:20:19 2009 -0500 description: Applied patch provided by Brad Chapman in the liftOver tool to support BED files that contain track or browser lines. Resolves bitbucket issue 201. diffstat: tools/extract/liftOver_wrapper.py | 30 ++++++++++++++++++++++++++++-- tools/extract/liftOver_wrapper.xml | 5 ++++- 2 files changed, 32 insertions(+), 3 deletions(-) diffs (72 lines): diff -r 2e8b8b0bc366 -r 8bc85721cbce tools/extract/liftOver_wrapper.py --- a/tools/extract/liftOver_wrapper.py Thu Nov 12 13:12:12 2009 -0500 +++ b/tools/extract/liftOver_wrapper.py Thu Nov 12 13:20:19 2009 -0500 @@ -5,12 +5,34 @@ """ import sys, os, string +import tempfile +import re assert sys.version_info[:2] >= ( 2, 4 ) def stop_err(msg): sys.stderr.write(msg) sys.exit() + +def safe_bed_file(infile): + """Make a BED file with track and browser lines ready for liftOver. + + liftOver will fail with track or browser lines. We can make it happy + by converting these to comments. See: + + https://lists.soe.ucsc.edu/pipermail/genome/2007-May/013561.html + """ + fix_pat = re.compile("^(track|browser)") + (fd, fname) = tempfile.mkstemp() + in_handle = open(infile) + out_handle = open(fname, "w") + for line in in_handle: + if fix_pat.match(line): + line = "#" + line + out_handle.write(line) + in_handle.close() + out_handle.close() + return fname if len( sys.argv ) != 7: stop_err( "USAGE: prog input out_file1 out_file2 input_dbkey output_dbkey minMatch" ) @@ -29,11 +51,15 @@ if in_dbkey == "?": stop_err( "Input dataset genome build unspecified, click the pencil icon in the history item to specify it." ) -cmd_line = "liftOver -minMatch=" + str(minMatch) + " " + infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + " > /dev/null 2>&1" if not os.path.isfile( mapfilepath ): stop_err( "%s mapping is not currently available." % ( mapfilepath.split('/')[-1].split('.')[0] ) ) + +safe_infile = safe_bed_file(infile) +cmd_line = "liftOver -minMatch=" + str(minMatch) + " " + safe_infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + " > /dev/null 2>&1" try: os.system( cmd_line ) except Exception, exc: - stop_err( "Exception caught attempting conversion: %s" % str( exc ) ) \ No newline at end of file + stop_err( "Exception caught attempting conversion: %s" % str( exc ) ) +finally: + os.remove(safe_infile) diff -r 2e8b8b0bc366 -r 8bc85721cbce tools/extract/liftOver_wrapper.xml --- a/tools/extract/liftOver_wrapper.xml Thu Nov 12 13:12:12 2009 -0500 +++ b/tools/extract/liftOver_wrapper.xml Thu Nov 12 13:20:19 2009 -0500 @@ -42,7 +42,10 @@ .. class:: warningmark -This tool will only work on interval datasets with chromosome in column 1, start co-ordinate in column 2 and end co-ordinate in column 3. If this is not the case with any line of the input dataset, the tool will return empty output datasets. +This tool will only work on interval datasets with chromosome in column 1, +start co-ordinate in column 2 and end co-ordinate in column 3. BED comments +and track and browser lines will be ignored, but if other non-interval lines +are present the tool will return empty output datasets. -----