[hg] galaxy 2457: Extract genomic DNA tool now uses only alignse...

26 Jun 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/8dfe971fcc27
changeset: 2457:8dfe971fcc27
user:      guru
date:      Thu Jun 25 15:29:24 2009 -0400
description:
Extract genomic DNA tool now uses only alignseq.loc to access sequence files. The twobit.loc file will henceforth be unnecessary, and all sequences from it should be moved to alignseq.loc.

1 file(s) affected in this change:

tools/extract/extract_genomic_dna.py

diffs (92 lines):

diff -r d3abf05d9272 -r 8dfe971fcc27 tools/extract/extract_genomic_dna.py

--- a/tools/extract/extract_genomic_dna.py	Fri Jun 19 11:21:13 2009 -0400
+++ b/tools/extract/extract_genomic_dna.py	Thu Jun 25 15:29:24 2009 -0400
@@ -4,7 +4,7 @@
     -1, --cols=N,N,N,N: Columns for start, end, strand in input file
     -d, --dbkey=N: Genome build of input file
     -o, --output_format=N: the data type of the output file
-    -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc and twobit.loc
+    -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc
 """
 from galaxy import eggs
 import pkg_resources
@@ -29,33 +29,20 @@
     reversed_s.reverse()
     return "".join( reversed_s )
 
-def check_nib_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    nib_file = "%s/alignseq.loc" % GALAXY_DATA_INDEX_DIR
-    nib_path = ''
-    for line in open( nib_file ):
+def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
+    seq_file = "%s/alignseq.loc" % GALAXY_DATA_INDEX_DIR
+    seq_path = ''
+    for line in open( seq_file ):
         line = line.rstrip( '\r\n' )
         if line and not line.startswith( "#" ) and line.startswith( 'seq' ):
             fields = line.split( '\t' )
             if len( fields ) < 3:
                 continue
             if fields[1] == dbkey:
-                nib_path = fields[2].strip()
+                seq_path = fields[2].strip()
                 break
-    return nib_path
+    return seq_path
 
-def check_twobit_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    twobit_file = "%s/twobit.loc" % GALAXY_DATA_INDEX_DIR
-    twobit_path = ''
-    for line in open( twobit_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( "#" ): 
-            fields = line.split( '\t' )
-            if len( fields ) < 2:
-                continue
-            if fields[0] == dbkey:
-                twobit_path = fields[1].strip()
-                break
-    return twobit_path
         
 def __main__():
     options, args = doc_optparse.parse( __doc__ )
@@ -72,9 +59,8 @@
     strand = None
     nibs = {}
     twobits = {}
-    nib_path = check_nib_file( dbkey, GALAXY_DATA_INDEX_DIR )
-    twobit_path = check_twobit_file( dbkey, GALAXY_DATA_INDEX_DIR )
-    if not os.path.exists( nib_path ) and not os.path.exists( twobit_path ):
+    seq_path = check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR )
+    if not os.path.exists( seq_path ):
         # If this occurs, we need to fix the metadata validator.
         stop_err( "No sequences are available for '%s', request them by reporting this error." % dbkey )
 
@@ -116,11 +102,11 @@
                 strand = '+'
             sequence = ''
 
-            if nib_path and os.path.exists( "%s/%s.nib" % ( nib_path, chrom ) ):
+            if seq_path and os.path.exists( "%s/%s.nib" % ( seq_path, chrom ) ):
                 if chrom in nibs:
                     nib = nibs[chrom]
                 else:
-                    nibs[chrom] = nib = bx.seq.nib.NibFile( file( "%s/%s.nib" % ( nib_path, chrom ) ) )
+                    nibs[chrom] = nib = bx.seq.nib.NibFile( file( "%s/%s.nib" % ( seq_path, chrom ) ) )
                 try:
                     sequence = nib.get( start, end-start )
                 except:
@@ -131,11 +117,11 @@
                         first_invalid_line = i + 1
                         invalid_line = line
                     continue
-            elif twobit_path and os.path.exists( twobit_path ):
+            elif seq_path and os.path.exists( seq_path ):
                 if chrom in twobits:
                     t = twobits[chrom]
                 else:
-                    twobits[chrom] = t = bx.seq.twobit.TwoBitFile( file( twobit_path ) )
+                    twobits[chrom] = t = bx.seq.twobit.TwoBitFile( file( seq_path ) )
                 try:
                     sequence = t[chrom][start:end]
                 except:

    

Nate Coraor

tags

participants (1)