details: http://www.bx.psu.edu/hg/galaxy/rev/e73efc9387ee changeset: 2844:e73efc9387ee user: Greg Von Kuster <greg@bx.psu.edu> date: Wed Oct 07 16:37:48 2009 -0400 description: Incorporate code to provide UCSC and Gbrowse integration for wiggle files contributed by Brad Chapman - handles ticket # 134. 3 file(s) affected in this change: lib/galaxy/datatypes/genetics.py lib/galaxy/datatypes/interval.py lib/galaxy/datatypes/tabular.py diffs (263 lines): diff -r ebe3e881ac25 -r e73efc9387ee lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Wed Oct 07 15:31:18 2009 -0400 +++ b/lib/galaxy/datatypes/genetics.py Wed Oct 07 16:37:48 2009 -0400 @@ -56,10 +56,6 @@ def get_estimated_display_viewport( self, dataset ): """Return a chrom, start, stop tuple for viewing a file.""" raise notImplemented - - def as_ucsc_display_file( self, dataset, **kwd ): - """Returns file""" - return file(dataset.file_name,'r') def ucsc_links( self, dataset, type, app, base_url ): """ from the ever-helpful angie hinrichs angie@soe.ucsc.edu diff -r ebe3e881ac25 -r e73efc9387ee lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py Wed Oct 07 15:31:18 2009 -0400 +++ b/lib/galaxy/datatypes/interval.py Wed Oct 07 16:37:48 2009 -0400 @@ -493,7 +493,6 @@ """Initialize datatype, by adding GBrowse display app""" Tabular.__init__(self, **kwd) self.add_display_app ( 'c_elegans', 'display in Wormbase', 'as_gbrowse_display_file', 'gbrowse_links' ) - def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -508,7 +507,6 @@ except: pass Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) - def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] @@ -524,11 +522,6 @@ except Exception, exc: out = "Can't create peek %s" % exc return out - - def as_gbrowse_display_file( self, dataset, **kwd ): - """Returns file contents that can be displayed in GBrowse apps.""" - return open( dataset.file_name ) - def get_estimated_display_viewport( self, dataset ): """ Return a chrom, start, stop tuple for viewing a file. There are slight differences between gff 2 and gff 3 @@ -568,7 +561,6 @@ return ( seqid, str( start ), str( stop ) ) else: return ( '', '', '' ) - def gbrowse_links( self, dataset, type, app, base_url ): ret_val = [] if dataset.has_data: @@ -582,7 +574,6 @@ link = "%s?start=%s&stop=%s&ref=%s&dbkey=%s" % ( site_url, start, stop, seqid, dataset.dbkey ) ret_val.append( ( site_name, link ) ) return ret_val - def sniff( self, filename ): """ Determines whether the file is in gff format @@ -639,7 +630,6 @@ def __init__(self, **kwd): """Initialize datatype, by adding GBrowse display app""" Gff.__init__(self, **kwd) - def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -666,7 +656,6 @@ if valid_start and valid_end and start < end and strand in self.valid_gff3_strand and phase in self.valid_gff3_phase: break Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) - def sniff( self, filename ): """ Determines whether the file is in gff version 3 format @@ -740,9 +729,70 @@ MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) + def __init__( self, **kwd ): + Tabular.__init__( self, **kwd ) + self.add_display_app( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' ) + self.add_display_app( 'gbrowse', 'display in Gbrowse', 'as_gbrowse_display_file', 'gbrowse_links' ) + def get_estimated_display_viewport( self, dataset ): + value = ( "", "", "" ) + num_check_lines = 100 # only check up to this many non empty lines + for i, line in enumerate( file( dataset.file_name ) ): + line = line.rstrip( '\r\n' ) + if line and line.startswith( "browser" ): + chr_info = line.split()[-1] + wig_chr, coords = chr_info.split( ":" ) + start, end = coords.split( "-" ) + value = ( wig_chr, start, end ) + break + if i > num_check_lines: + break + return value + def _get_remote_call_url( self, redirect_url, site_name, dataset, type, app, base_url ): + """Retrieve the URL to call out to an external site and retrieve data. + This routes our external URL through a local galaxy instance which makes + the data available, followed by redirecting to the remote site with a + link back to the available information. + """ + internal_url = "%s" % url_for( controller='dataset', dataset_id=dataset.id, action='display_at', filename='%s_%s' % ( type, site_name ) ) + base_url = app.config.get( "display_at_callback", base_url ) + if base_url.startswith( 'https://' ): + base_url = base_url.replace( 'https', 'http', 1 ) + display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" % \ + ( base_url, url_for( controller='root' ), dataset.id, type ) ) + link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url ) + return link + def _get_viewer_range( self, dataset ): + """Retrieve the chromosome, start, end for an external viewer.""" + if dataset.has_data: + viewport_tuple = self.get_estimated_display_viewport( dataset ) + if viewport_tuple: + chrom = viewport_tuple[0] + start = viewport_tuple[1] + stop = viewport_tuple[2] + return ( chrom, start, stop ) + return ( None, None, None ) + def gbrowse_links( self, dataset, type, app, base_url ): + ret_val = [] + chrom, start, stop = self._get_viewer_range( dataset ) + if chrom is not None: + for site_name, site_url in util.get_gbrowse_sites_by_build( dataset.dbkey ): + if site_name in app.config.gbrowse_display_sites: + redirect_url = urllib.quote_plus( "%s%s/?ref=%s&start=%s&stop=%s&eurl=%%s" % ( site_url, dataset.dbkey, chrom, start, stop ) ) + link = self._get_remote_call_url( redirect_url, site_name, dataset, type, app, base_url ) + ret_val.append( ( site_name, link ) ) + return ret_val + def ucsc_links( self, dataset, type, app, base_url ): + ret_val = [] + chrom, start, stop = self._get_viewer_range( dataset ) + if chrom is not None: + for site_name, site_url in util.get_ucsc_by_build( dataset.dbkey ): + if site_name in app.config.ucsc_display_sites: + redirect_url = urllib.quote_plus( "%sdb=%s&position=%s:%s-%s&hgt.customText=%%s" % ( site_url, dataset.dbkey, chrom, start, stop ) ) + link = self._get_remote_call_url( redirect_url, site_name, dataset, type, app, base_url ) + ret_val.append( ( site_name, link ) ) + return ret_val def make_html_table( self, dataset ): return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) - def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -761,7 +811,6 @@ if do_break: break Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) - def sniff( self, filename ): """ Determines wether the file is in wiggle format @@ -792,7 +841,6 @@ return False except: return False - def get_track_window(self, dataset, data, start, end): """ Assumes we have a numpy file. @@ -817,7 +865,6 @@ y = data[ t_start : t_end ] return zip(x.tolist(), y.tolist()) - def get_track_resolution( self, dataset, start, end): range = end - start # Determine appropriate resolution to plot ~1000 points @@ -826,7 +873,6 @@ resolution = min( resolution, 100000 ) resolution = max( resolution, 1 ) return resolution - def get_track_type( self ): return "LineTrack" @@ -882,8 +928,6 @@ except: #return "." return ('', '', '') - def as_ucsc_display_file( self, dataset ): - return open(dataset.file_name) def ucsc_links( self, dataset, type, app, base_url ): ret_val = [] if dataset.has_data: @@ -948,58 +992,6 @@ return False return True -class GBrowseTrack ( Tabular ): - """GMOD GBrowseTrack""" - file_ext = "gbrowsetrack" - - def __init__(self, **kwd): - """Initialize datatype, by adding GBrowse display app""" - Tabular.__init__(self, **kwd) - self.add_display_app ('c_elegans', 'display in Wormbase', 'as_gbrowse_display_file', 'gbrowse_links' ) - - def set_readonly_meta( self, dataset, skip=1, **kwd ): - """Resets the values of readonly metadata elements.""" - Tabular.set_readonly_meta( self, dataset, skip = skip, **kwd ) - - def set_meta( self, dataset, overwrite = True, **kwd ): - Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 ) - - def make_html_table( self, dataset ): - return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) - - def get_estimated_display_viewport( self, dataset ): - #TODO: fix me... - return ('', '', '') - - def gbrowse_links( self, dataset, type, app, base_url ): - ret_val = [] - if dataset.has_data: - viewport_tuple = self.get_estimated_display_viewport(dataset) - if viewport_tuple: - chrom = viewport_tuple[0] - start = viewport_tuple[1] - stop = viewport_tuple[2] - for site_name, site_url in util.get_gbrowse_sites_by_build(dataset.dbkey): - if site_name in app.config.gbrowse_display_sites: - display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s" % (base_url, url_for( controller='root' ), dataset.id, type) ) - link = "%sname=%s&ref=%s:%s..%s&eurl=%s" % (site_url, dataset.dbkey, chrom, start, stop, display_url ) - ret_val.append( (site_name, link) ) - return ret_val - - def as_gbrowse_display_file( self, dataset, **kwd ): - """Returns file contents that can be displayed in GBrowse apps.""" - #TODO: fix me... - return open(dataset.file_name) - - def sniff( self, filename ): - """ - Determines whether the file is in gbrowsetrack format. - - GBrowseTrack files are built within Galaxy. - TODO: Not yet sure what this file will look like. Fix this sniffer and add some unit tests here as soon as we know. - """ - return False - if __name__ == '__main__': import doctest, sys doctest.testmod(sys.modules[__name__]) diff -r ebe3e881ac25 -r e73efc9387ee lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py Wed Oct 07 15:31:18 2009 -0400 +++ b/lib/galaxy/datatypes/tabular.py Wed Oct 07 16:37:48 2009 -0400 @@ -205,6 +205,10 @@ def display_peek( self, dataset ): """Returns formatted html of peek""" return self.make_html_table( dataset ) + def as_gbrowse_display_file( self, dataset, **kwd ): + return open( dataset.file_name ) + def as_ucsc_display_file( self, dataset, **kwd ): + return open( dataset.file_name ) class Taxonomy( Tabular ): def __init__(self, **kwd):