[hg] galaxy 3801: First pass at adding Ensembl browsers as an ex...
details: http://www.bx.psu.edu/hg/galaxy/rev/37586a11c13a changeset: 3801:37586a11c13a user: Dan Blankenberg <dan@bx.psu.edu> date: Fri May 21 15:25:56 2010 -0400 description: First pass at adding Ensembl browsers as an external display application. Two different URL generation and data attachment methods are used; one for 'old' Ensembl archives older than ~November 2008 and another for Ensembl sites using the current method. The tool-data/shared/ensembl/ensembl_sites.txt file contains the site and build information for using the current method; the tool-data/shared/ensembl/ensembl_sites_data_URL.txt file has the site and build information for when the older method is to be used. The new method follows: http://www.ensembl.org/info/docs/webcode/linking.html The old method follows: http://aug2007.archive.ensembl.org/Homo_sapiens/helpview?se=1;kw=urlsource diffstat: datatypes_conf.xml.sample | 2 + display_applications/ensembl/ensembl_gff.xml | 127 +++++++++++++++ display_applications/ensembl/ensembl_interval_as_bed.xml | 127 +++++++++++++++ tool-data/shared/ensembl/ensembl_sites.txt | 4 + tool-data/shared/ensembl/ensembl_sites_data_URL.txt | 8 + 5 files changed, 268 insertions(+), 0 deletions(-) diffs (301 lines): diff -r 0539d58e383a -r 37586a11c13a datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Fri May 21 14:41:20 2010 -0400 +++ b/datatypes_conf.xml.sample Fri May 21 15:25:56 2010 -0400 @@ -49,6 +49,7 @@ </datatype> <datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"> <converter file="gff_to_bed_converter.xml" target_datatype="bed"/> + <display file="ensembl/ensembl_gff.xml" inherit="True"/> </datatype> <datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/> <datatype extension="gif" type="galaxy.datatypes.images:Image" mimetype="image/gif"/> @@ -63,6 +64,7 @@ <indexer file="interval_awk.xml" /> <!-- <display file="ucsc/interval_as_bed.xml" inherit="True" /> --> <display file="genetrack.xml" inherit="True"/> + <display file="ensembl/ensembl_interval_as_bed.xml" inherit="True"/> </datatype> <datatype extension="jpg" type="galaxy.datatypes.images:Image" mimetype="image/jpeg"/> <datatype extension="laj" type="galaxy.datatypes.images:Laj"/> diff -r 0539d58e383a -r 37586a11c13a display_applications/ensembl/ensembl_gff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/ensembl/ensembl_gff.xml Fri May 21 15:25:56 2010 -0400 @@ -0,0 +1,127 @@ +<display id="ensembl_gff" version="1.0.0" name="display at Ensembl"> + <!-- Current Ensembl method of attaching user data via URL; archives older than ~November 2008 will use a different method --> + <!-- Load links from file: one line to one link --> + <dynamic_links from_file="tool-data/shared/ensembl/ensembl_sites.txt" skip_startswith="#" id="0" name="1"> + + <!-- Define parameters by column from file, allow splitting on builds --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_name" value="1"/> + <dynamic_param name="site_link" value="2"/> + <dynamic_param name="site_dbkeys" value="3" split="True" separator="," /> + <dynamic_param name="site_organisms" value="4" split="True" separator="," /> + + <!-- Filter out some of the links based upon matching site_dbkeys to dataset dbkey --> + <filter>${dataset.dbkey in $site_dbkeys}</filter> + + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url>${site_link}${site_organism}/Location/View?r=${position};contigviewbottom=url:${gff_file.qp}=normal</url> + + <param type="data" name="gff_file" url="galaxy_${DATASET_HASH}.gff" /> + <param type="template" name="site_organism" strip="True" > + #set index = $site_dbkeys.index( $gff_file.dbkey ) + $site_organisms[ $index ] + </param> + <param type="template" name="position" strip="True" > +#set line_count = 0 +#set chrom = None +#set start = float( 'inf' ) +#set end = 0 +#for $line in open( $gff_file.file_name ): + #if $line_count > 10: ##10 max lines to check for view port + #break + #end if + #if not $line.startswith( "#" ): + #set $fields = $line.split( "\t" ) + #try: + #if len( $fields ) >= 5: + #if $chrom is None or $fields[ 0 ] == $chrom: + #set chrom = $fields[ 0 ] + #set start = min( $start, int( $fields[ 3 ] ) ) + #set end = max( $end, int( $fields[ 4 ] ) ) + #end if + #end if + #except: + #pass + #end try + #end if + #set line_count += 1 +#end for +#if $chrom is not None: +##The difference between chr1 and 1 is handled by Ensembl, except for the viewport, we need to provide e.g. '1' instead of 'chr1' here +##This is rather naive, it would be more ideal to have actual mappings +#if $chrom == 'chrM': + #set $chrom = 'MT' +#end if +#if $chrom.startswith( 'chr' ): + #set $chrom = $chrom[3:] +#end if +${chrom}:${start}-${end} +#else: +##default view is of '1' +1 +#end if + </param> + </dynamic_links> + + <!-- Old Ensembl method of attaching user data via URL --> + <!-- Load links from file: one line to one link --> + <dynamic_links from_file="tool-data/shared/ensembl/ensembl_sites_data_URL.txt" skip_startswith="#" id="0" name="1"> + + <!-- Define parameters by column from file, allow splitting on builds --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_name" value="1"/> + <dynamic_param name="site_link" value="2"/> + <dynamic_param name="site_dbkeys" value="3" split="True" separator="," /> + <dynamic_param name="site_organisms" value="4" split="True" separator="," /> + + <!-- Filter out some of the links based upon matching site_dbkeys to dataset dbkey --> + <filter>${dataset.dbkey in $site_dbkeys}</filter> + + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url>${site_link}${site_organism}/contigview?data_URL=${gff_file.qp}${position}</url> + + <param type="data" name="gff_file" url="galaxy_${DATASET_HASH}.gff" /> + <param type="template" name="site_organism" strip="True" > + #set index = $site_dbkeys.index( $gff_file.dbkey ) + $site_organisms[ $index ] + </param> + <param type="template" name="position" strip="True" > + #set line_count = 0 + #set chrom = None + #set start = float( 'inf' ) + #set end = 0 + #for $line in open( $gff_file.file_name ): + #if $line_count > 10: ##10 max lines to check for view port + #break + #end if + #if not $line.startswith( "#" ): + #set $fields = $line.split( "\t" ) + #try: + #if len( $fields ) >= 5: + #if $chrom is None or $fields[ 0 ] == $chrom: + #set chrom = $fields[ 0 ] + #set start = min( $start, int( $fields[ 3 ] ) ) + #set end = max( $end, int( $fields[ 4 ] ) ) + #end if + #end if + #except: + #pass + #end try + #end if + #set line_count += 1 + #end for + #if $chrom is not None: + ##The difference between chr1 and 1 is handled by Ensembl, except for the viewport, we need to provide e.g. '1' instead of 'chr1' here + ##This is rather naive, it would be more ideal to have actual mappings + #if $chrom == 'chrM': + #set $chrom = 'MT' + #end if + #if $chrom.startswith( 'chr' ): + #set $chrom = $chrom[3:] + #end if + &chr=${chrom}&start=${start}&end=${end} + #end if + </param> + </dynamic_links> + +</display> diff -r 0539d58e383a -r 37586a11c13a display_applications/ensembl/ensembl_interval_as_bed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/ensembl/ensembl_interval_as_bed.xml Fri May 21 15:25:56 2010 -0400 @@ -0,0 +1,127 @@ +<display id="ensembl_interval" version="1.0.0" name="display at Ensembl"> + <!-- Current Ensembl method of attaching user data via URL; archives older than ~November 2008 will use a different method --> + <!-- Load links from file: one line to one link --> + <dynamic_links from_file="tool-data/shared/ensembl/ensembl_sites.txt" skip_startswith="#" id="0" name="1"> + + <!-- Define parameters by column from file, allow splitting on builds --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_name" value="1"/> + <dynamic_param name="site_link" value="2"/> + <dynamic_param name="site_dbkeys" value="3" split="True" separator="," /> + <dynamic_param name="site_organisms" value="4" split="True" separator="," /> + + <!-- Filter out some of the links based upon matching site_dbkeys to dataset dbkey --> + <filter>${dataset.dbkey in $site_dbkeys}</filter> + + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url>${site_link}${site_organism}/Location/View?r=${position};contigviewbottom=url:${bed_file.qp}=normal</url> + + <param type="data" name="bed_file" url="galaxy_${DATASET_HASH}.bed" format="bedstrict"/> + <param type="template" name="site_organism" strip="True" > + #set index = $site_dbkeys.index( $bed_file.dbkey ) + $site_organisms[ $index ] + </param> + <param type="template" name="position" strip="True" > +#set line_count = 0 +#set chrom = None +#set start = float( 'inf' ) +#set end = 0 +#for $line in open( $bed_file.file_name ): + #if $line_count > 10: ##10 max lines to check for view port + #break + #end if + #if not $line.startswith( "#" ): + #set $fields = $line.split( "\t" ) + #try: + #if len( $fields ) >= max( $bed_file.metadata.startCol, $bed_file.metadata.endCol, $bed_file.metadata.chromCol ): + #if $chrom is None or $fields[ $bed_file.metadata.chromCol - 1 ] == $chrom: + #set chrom = $fields[ $bed_file.metadata.chromCol - 1 ] + #set start = min( $start, int( $fields[ $bed_file.metadata.startCol - 1 ] ) ) + #set end = max( $end, int( $fields[ $bed_file.metadata.endCol - 1 ] ) ) + #end if + #end if + #except: + #pass + #end try + #end if + #set line_count += 1 +#end for +#if $chrom is not None: +##The difference between chr1 and 1 is handled by Ensembl, except for the viewport, we need to provide e.g. '1' instead of 'chr1' here +##This is rather naive, it would be more ideal to have actual mappings +#if $chrom == 'chrM': + #set $chrom = 'MT' +#end if +#if $chrom.startswith( 'chr' ): + #set $chrom = $chrom[3:] +#end if +${chrom}:${start + 1}-${end} +#else: +##default view is of '1' +1 +#end if + </param> + </dynamic_links> + + <!-- Old Ensembl method of attaching user data via URL --> + <!-- Load links from file: one line to one link --> + <dynamic_links from_file="tool-data/shared/ensembl/ensembl_sites_data_URL.txt" skip_startswith="#" id="0" name="1"> + + <!-- Define parameters by column from file, allow splitting on builds --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_name" value="1"/> + <dynamic_param name="site_link" value="2"/> + <dynamic_param name="site_dbkeys" value="3" split="True" separator="," /> + <dynamic_param name="site_organisms" value="4" split="True" separator="," /> + + <!-- Filter out some of the links based upon matching site_dbkeys to dataset dbkey --> + <filter>${dataset.dbkey in $site_dbkeys}</filter> + + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url>${site_link}${site_organism}/contigview?data_URL=${bed_file.qp}${position}</url> + + <param type="data" name="bed_file" url="galaxy_${DATASET_HASH}.bed" format="bedstrict"/> + <param type="template" name="site_organism" strip="True" > + #set index = $site_dbkeys.index( $bed_file.dbkey ) + $site_organisms[ $index ] + </param> + <param type="template" name="position" strip="True" > + #set line_count = 0 + #set chrom = None + #set start = float( 'inf' ) + #set end = 0 + #for $line in open( $bed_file.file_name ): + #if $line_count > 10: ##10 max lines to check for view port + #break + #end if + #if not $line.startswith( "#" ): + #set $fields = $line.split( "\t" ) + #try: + #if len( $fields ) >= max( $bed_file.metadata.startCol, $bed_file.metadata.endCol, $bed_file.metadata.chromCol ): + #if $chrom is None or $fields[ $bed_file.metadata.chromCol - 1 ] == $chrom: + #set chrom = $fields[ $bed_file.metadata.chromCol - 1 ] + #set start = min( $start, int( $fields[ $bed_file.metadata.startCol - 1 ] ) ) + #set end = max( $end, int( $fields[ $bed_file.metadata.endCol - 1 ] ) ) + #end if + #end if + #except: + #pass + #end try + #end if + #set line_count += 1 + #end for + #if $chrom is not None: + ##The difference between chr1 and 1 is handled by Ensembl, except for the viewport, we need to provide e.g. '1' instead of 'chr1' here + ##This is rather naive, it would be more ideal to have actual mappings + #if $chrom == 'chrM': + #set $chrom = 'MT' + #end if + #if $chrom.startswith( 'chr' ): + #set $chrom = $chrom[3:] + #end if + &chr=${chrom}&start=${start + 1}&end=${end} + #end if + </param> + </dynamic_links> + +</display> diff -r 0539d58e383a -r 37586a11c13a tool-data/shared/ensembl/ensembl_sites.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/shared/ensembl/ensembl_sites.txt Fri May 21 15:25:56 2010 -0400 @@ -0,0 +1,4 @@ +#These builds are displayed using the method described in: +#http://www.ensembl.org/info/docs/webcode/linking.html +ensembl_Current Current http://www.ensembl.org/ hg19,felCat3,galGal3,bosTau4,canFam2,loxAfr3,cavPor3,equCab2,anoCar1,oryLat2,mm9,monDom5,ponAbe2,susScr2,ornAna1,oryCun2,rn4,rheMac2,gasAcu1,tetNig2,xenTro2,taeGut1,danRer5,ci2,dm3,ce6,sacCer2 Homo_sapiens,Felis_catus,Gallus_gallus,Bos_taurus,Canis_familiaris,Loxodonta_africana,Cavia_porcellus,Equus_caballus,Anolis_carolinensis,Oryzias_latipes,Mus_musculus,Monodelphis_domestica,Pongo_pygmaeus,Sus_scrofa,Ornithorhynchus_anatinus,Oryctolagus_cuniculus,Rattus_norvegicus,Macaca_mulatta,Gasterosteus_aculeatus,Tetraodon_nigroviridis,Xenopus_tropicalis,Taeniopygia_guttata,Danio_rerio,Ciona_intestinalis,Drosophila_melanogaster,Caenorhabditis_elegans,Saccharomyces_cerevisiae +ensembl_May_2009 May 2009 http://may2009.archive.ensembl.org/ hg18 Homo_sapiens diff -r 0539d58e383a -r 37586a11c13a tool-data/shared/ensembl/ensembl_sites_data_URL.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/shared/ensembl/ensembl_sites_data_URL.txt Fri May 21 15:25:56 2010 -0400 @@ -0,0 +1,8 @@ +#These builds are displayed using the method described in: +#http://aug2007.archive.ensembl.org/Homo_sapiens/helpview?se=1;kw=urlsource +ensembl_March_2008 March 2008 http://mar2008.archive.ensembl.org/ bosTau3,tetNig1 Bos_taurus,Tetraodon_nigroviridis +ensembl_February_2007 February 2007 http://feb2007.archive.ensembl.org/ monDom4,danRer4 Monodelphis_domestica,Danio_rerio +ensembl_July_2008 July 2008 http://jul2008.archive.ensembl.org/ panTro2 Pan_troglodytes +ensembl_April_2006 April 2006 http://apr2006.archive.ensembl.org/ galGal2,bosTau2,canFam1,mm7,rheMac1,danRer3,apiMel2,sacCer1 Gallus_gallus,Bos_taurus,Canis_familiaris,Mus_musculus,Macaca_mulatta,Danio_rerio,Apis_mellifera,Saccharomyces_cerevisiae +ensembl_November_2005 November 2005 http://nov2005.archive.ensembl.org/ hg17,panTro1,bosTau1,mm6,xenTro1,anoGam1,dm2 Homo_sapiens,Pan_troglodytes,Bos_taurus,Mus_musculus,Xenopus_tropicalis,Anopheles_gambiae,Drosophila_melanogaster +ensembl_August_2007 August 2007 http://aug2007.archive.ensembl.org/ mm8,ce4 Mus_musculus,Caenorhabditis_elegans
participants (1)
-
Nate Coraor