1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/2a1b619a3fe6/ changeset: 2a1b619a3fe6 user: dan date: 2012-10-16 21:25:01 summary: Performance enhancements for Build custom track for UCSC genome browser tool. affected #: 1 file diff -r d5f1c28102befb1032b277b4c2fb75aa2d8cd88c -r 2a1b619a3fe6f94c764d1e2154f1b1a1195f03cc tools/visualization/build_ucsc_custom_track.py --- a/tools/visualization/build_ucsc_custom_track.py +++ b/tools/visualization/build_ucsc_custom_track.py @@ -11,6 +11,22 @@ sys.stderr.write( msg ) sys.exit() +FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' } +CHUNK_SIZE = 2**20 #1mb + +def get_track_line_is_interval( file_type, name, description, color, visibility ): + if file_type in FILE_TYPE_TO_TRACK_TYPE: + track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ] + is_interval = False + else: + track_type = None + is_interval = True + track_line = 'track ' + if track_type: + track_line += 'type=%s ' % ( track_type ) + track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility ) + return track_line, is_interval + args = sys.argv[1:] out_fname = args.pop(0) @@ -22,25 +38,23 @@ while args: # Suck in one dataset worth of arguments in_fname = args.pop(0) - type = args.pop(0) + file_type = args.pop(0) colspec = args.pop(0) name = args.pop(0) description = args.pop(0) color = args.pop(0).replace( '-', ',' ) visibility = args.pop(0) + track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility ) # Do the work - if type == "wig": - print >> out, '''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s''' \ - % ( name, description, color, visibility ) - for i, line in enumerate( file( in_fname ) ): - print >> out, line, - print >> out - elif type == "bed": - print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \ - % ( name, description, color, visibility ) - for i, line in enumerate( file( in_fname ) ): - print >> out, line, - print >> out + in_file = open( in_fname ) + out.write( track_line ) + if not is_interval: + while True: + chunk = in_file.read( CHUNK_SIZE ) + if chunk: + out.write( chunk ) + else: + break else: # Assume type is interval (don't pass this script anything else!) try: @@ -52,29 +66,27 @@ except: stop_err( "Columns in interval file invalid for UCSC custom track." ) - print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \ - % ( name, description, color, visibility ) i = 0 - for i, line in enumerate( file( in_fname ) ): + for i, line in enumerate( in_file ): line = line.rstrip( '\r\n' ) if line and not line.startswith( '#' ): fields = line.split( "\t" ) if st > 0: #strand column is present try: - print >> out, "%s\t%s\t%s\t%d\t0\t%s" % ( fields[c], fields[s], fields[e], i, fields[st] ) + out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) ) except: skipped_lines += 1 if not first_invalid_line: first_invalid_line = i+1 else: try: - print >> out, "%s\t%s\t%s" % ( fields[c], fields[s], fields[e] ) + out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) ) except: skipped_lines += 1 if not first_invalid_line: first_invalid_line = i+1 - print >> out + out.write( "\n" ) #separating newline num_tracks += 1 out.close() Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.