commit/galaxy-central: richard_burhans: python version of "phyloP interspecies conservation scores" tool
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/665853dfd9e1/ changeset: 665853dfd9e1 user: richard_burhans date: 2011-11-30 18:05:59 summary: python version of "phyloP interspecies conservation scores" tool affected #: 3 files diff -r 36998f5a7ddb12ee93068b8001a09b84a47944f4 -r 665853dfd9e12e137f26777d1a2d2167df4b4275 tool-data/add_scores.loc.sample --- a/tool-data/add_scores.loc.sample +++ b/tool-data/add_scores.loc.sample @@ -1,21 +1,20 @@ -#This is a sample file distributed with Galaxy that enables tools to use a -#directory of gzipped genome files for use with add_scores. You will need to -#supply these files and then create a add_scores.loc file similar to this one -#(store it in this directory) that points to the directories in which those -#files are stored. The add_scores.loc file has this format (white space -#characters are TAB characters): +#This is a sample file distributed with Galaxy that lists the BigWig files +#available for use with the add_scores (phyloP interspecies conservation +#scores) tool. You will need to supply these BigWig files and then create +#an add_scores.loc file similar to this one (store it in this directory) +#that lists their locations. The add_scores.loc file has the following +#format (white space characters are TAB characters): # -#<build><file_path> +#<build><BigWig_file_path> # #So, for example, if your add_scores.loc began like this: # -#hg18 /galaxy/data/hg18/misc/phyloP +#hg18 /galaxy/data/hg18/misc/phyloP44way.primate.bw # -#then your /galaxy/data/hg18/misc/phyloP directory would need to contain -#the following gzipped files, among others: +#then your /galaxy/data/hg18/misc/ directory would need to contain a +#BigWig file named phyloP44way.primate.bw, among others: # -#-rw-r--r-- 1 g2data g2data 161981190 2010-03-19 12:48 chr10.phyloP44way.primate.wigFix.gz -#-rw-r--r-- 1 g2data g2data 54091 2010-03-19 12:56 chr10_random.phyloP44way.primate.wigFix.gz -#-rw-r--r-- 1 g2data g2data 158621990 2010-03-19 12:46 chr11.phyloP44way.primate.wigFix.gz +#-rw-r--r-- 1 g2data g2data 6057387572 Nov 23 10:11 phyloP44way.primate.bw # -#hg18 /galaxy/data/hg18/misc/phyloP +#hg18 /galaxy/data/hg18/misc/phyloP44way.primate.bw +#hg19 /galaxy/data/hg19/misc/phyloP46way.primate.bw diff -r 36998f5a7ddb12ee93068b8001a09b84a47944f4 -r 665853dfd9e12e137f26777d1a2d2167df4b4275 tools/evolution/add_scores.py --- /dev/null +++ b/tools/evolution/add_scores.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +import sys +from galaxy import eggs +import pkg_resources +pkg_resources.require( "bx-python" ) +pkg_resources.require( "numpy" ) +from bx.bbi.bigwig_file import BigWigFile +import os + +################################################################################ + +def die( message ): + print >> sys.stderr, message + sys.exit(1) + +def open_or_die( filename, mode='r', message=None ): + if message is None: + message = 'Error opening {0}'.format( filename ) + try: + fh = open( filename, mode ) + except IOError, err: + die( '{0}: {1}'.format( message, err.strerror ) ) + return fh + +################################################################################ + +class LocationFile( object ): + def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ): + self.filename = filename + if comment_chars is None: + self.comment_chars = ( '#' ) + else: + self.comment_chars = tuple( comment_chars ) + self.delimiter = delimiter + self.key_column = key_column + self._map = {} + self._populate_map() + + def _populate_map( self ): + try: + with open( self.filename ) as fh: + line_number = 0 + for line in fh: + line_number += 1 + line = line.rstrip( '\r\n' ) + if not line.startswith( self.comment_chars ): + elems = line.split( self.delimiter ) + if len( elems ) <= self.key_column: + die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) ) + else: + key = elems.pop( self.key_column ) + if key in self._map: + if self._map[key] != elems: + die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) ) + else: + self._map[key] = elems + except IOError, err: + die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) ) + + def get_values( self, key ): + if key in self._map: + rval = self._map[key] + if len( rval ) == 1: + return rval[0] + else: + return rval + else: + die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) ) + +################################################################################ + +def main(): + input_filename, output_filename, loc_filename, loc_key, chrom_col, start_col = sys.argv[1:] + + # open input, output, and bigwig files + location_file = LocationFile( loc_filename ) + bigwig_filename = location_file.get_values( loc_key ) + bwfh = open_or_die( bigwig_filename, message='Error opening BigWig file {0}'.format( bigwig_filename ) ) + bw = BigWigFile( file=bwfh ) + ifh = open_or_die( input_filename, message='Error opening input file {0}'.format( input_filename ) ) + ofh = open_or_die( output_filename, mode='w', message='Error opening output file {0}'.format( output_filename ) ) + + # make column numbers 0-based + chrom_col = int( chrom_col ) - 1 + start_col = int( start_col ) - 1 + min_cols = max( chrom_col, start_col ) + + # add score column to imput file + line_number = 0 + for line in ifh: + line_number += 1 + line = line.rstrip( '\r\n' ) + elems = line.split( '\t' ) + if len( elems ) > min_cols: + chrom = elems[chrom_col].strip() + # base-0 position in chrom + start = int( elems[start_col] ) + score_list = bw.get( chrom, start, start + 1 ) + score_list_len = len( score_list ) + if score_list_len == 1: + beg, end, score = score_list[0] + score_val = '{0:1.3f}'.format( score ) + elif score_list_len == 0: + score_val = 'NA' + else: + die( '{0} line {1}: chrom={2}, start={3}, score_list_len = {4}'.format( input_filename, line_number, chrom, start, score_list_len ) ) + print >> ofh, '\t'.join( [line, score_val] ) + else: + print >> ofh, line + + bwfh.close() + ifh.close() + ofh.close() + +################################################################################ + +if __name__ == "__main__": + main() + diff -r 36998f5a7ddb12ee93068b8001a09b84a47944f4 -r 665853dfd9e12e137f26777d1a2d2167df4b4275 tools/evolution/add_scores.xml --- a/tools/evolution/add_scores.xml +++ b/tools/evolution/add_scores.xml @@ -1,8 +1,8 @@ <tool id="hgv_add_scores" name="phyloP" version="1.0.0"><description>interspecies conservation scores</description> - <command> - add_scores $input1 ${input1.metadata.dbkey} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${GALAXY_DATA_INDEX_DIR}/add_scores.loc $out_file1 + <command interpreter="python"> + add_scores.py "$input1" "$out_file1" "${GALAXY_DATA_INDEX_DIR}/add_scores.loc" "${input1.metadata.dbkey}" "${input1.metadata.chromCol}" "${input1.metadata.startCol}" </command><inputs> @@ -34,7 +34,7 @@ <help> .. class:: warningmark -This currently works only for build hg18. +This currently works only for builds hg18 and hg19. ----- Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket