details:
http://www.bx.psu.edu/hg/galaxy/rev/8dfe971fcc27
changeset: 2457:8dfe971fcc27
user: guru
date: Thu Jun 25 15:29:24 2009 -0400
description:
Extract genomic DNA tool now uses only alignseq.loc to access sequence files. The
twobit.loc file will henceforth be unnecessary, and all sequences from it should be moved
to alignseq.loc.
1 file(s) affected in this change:
tools/extract/extract_genomic_dna.py
diffs (92 lines):
diff -r d3abf05d9272 -r 8dfe971fcc27 tools/extract/extract_genomic_dna.py
--- a/tools/extract/extract_genomic_dna.py Fri Jun 19 11:21:13 2009 -0400
+++ b/tools/extract/extract_genomic_dna.py Thu Jun 25 15:29:24 2009 -0400
@@ -4,7 +4,7 @@
-1, --cols=N,N,N,N: Columns for start, end, strand in input file
-d, --dbkey=N: Genome build of input file
-o, --output_format=N: the data type of the output file
- -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc and twobit.loc
+ -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc
"""
from galaxy import eggs
import pkg_resources
@@ -29,33 +29,20 @@
reversed_s.reverse()
return "".join( reversed_s )
-def check_nib_file( dbkey, GALAXY_DATA_INDEX_DIR ):
- nib_file = "%s/alignseq.loc" % GALAXY_DATA_INDEX_DIR
- nib_path = ''
- for line in open( nib_file ):
+def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
+ seq_file = "%s/alignseq.loc" % GALAXY_DATA_INDEX_DIR
+ seq_path = ''
+ for line in open( seq_file ):
line = line.rstrip( '\r\n' )
if line and not line.startswith( "#" ) and line.startswith(
'seq' ):
fields = line.split( '\t' )
if len( fields ) < 3:
continue
if fields[1] == dbkey:
- nib_path = fields[2].strip()
+ seq_path = fields[2].strip()
break
- return nib_path
+ return seq_path
-def check_twobit_file( dbkey, GALAXY_DATA_INDEX_DIR ):
- twobit_file = "%s/twobit.loc" % GALAXY_DATA_INDEX_DIR
- twobit_path = ''
- for line in open( twobit_file ):
- line = line.rstrip( '\r\n' )
- if line and not line.startswith( "#" ):
- fields = line.split( '\t' )
- if len( fields ) < 2:
- continue
- if fields[0] == dbkey:
- twobit_path = fields[1].strip()
- break
- return twobit_path
def __main__():
options, args = doc_optparse.parse( __doc__ )
@@ -72,9 +59,8 @@
strand = None
nibs = {}
twobits = {}
- nib_path = check_nib_file( dbkey, GALAXY_DATA_INDEX_DIR )
- twobit_path = check_twobit_file( dbkey, GALAXY_DATA_INDEX_DIR )
- if not os.path.exists( nib_path ) and not os.path.exists( twobit_path ):
+ seq_path = check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR )
+ if not os.path.exists( seq_path ):
# If this occurs, we need to fix the metadata validator.
stop_err( "No sequences are available for '%s', request them by
reporting this error." % dbkey )
@@ -116,11 +102,11 @@
strand = '+'
sequence = ''
- if nib_path and os.path.exists( "%s/%s.nib" % ( nib_path, chrom )
):
+ if seq_path and os.path.exists( "%s/%s.nib" % ( seq_path, chrom )
):
if chrom in nibs:
nib = nibs[chrom]
else:
- nibs[chrom] = nib = bx.seq.nib.NibFile( file( "%s/%s.nib" %
( nib_path, chrom ) ) )
+ nibs[chrom] = nib = bx.seq.nib.NibFile( file( "%s/%s.nib" %
( seq_path, chrom ) ) )
try:
sequence = nib.get( start, end-start )
except:
@@ -131,11 +117,11 @@
first_invalid_line = i + 1
invalid_line = line
continue
- elif twobit_path and os.path.exists( twobit_path ):
+ elif seq_path and os.path.exists( seq_path ):
if chrom in twobits:
t = twobits[chrom]
else:
- twobits[chrom] = t = bx.seq.twobit.TwoBitFile( file( twobit_path ) )
+ twobits[chrom] = t = bx.seq.twobit.TwoBitFile( file( seq_path ) )
try:
sequence = t[chrom][start:end]
except: