1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/2dd7594c6ada/ changeset: 2dd7594c6ada user: jgoecks date: 2012-02-23 18:23:36 summary: Enable visualization of ENCODE peak tracks. Create 'encodepeak' datatype and add data providers for new datatype. affected #: 6 files diff -r 854e160efc7bd2b42eeb847057214bc92cec809f -r 2dd7594c6adaabbc872c287cfa1c1eb617026a03 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -130,6 +130,11 @@ <datatype extension="mafcustomtrack" type="galaxy.datatypes.sequence:MafCustomTrack"><display file="ucsc/maf_customtrack.xml" /></datatype> + <datatype extension="encodepeak" type="galaxy.datatypes.interval:ENCODEPeak" display_in_upload="True"> + <converter file="encodepeak_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> + <converter file="encodepeak_to_bgzip_converter.xml" target_datatype="bgzip"/> + <converter file="encodepeak_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + </datatype><datatype extension="pdf" type="galaxy.datatypes.images:Pdf" mimetype="application/pdf"/><datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true" /><datatype extension="png" type="galaxy.datatypes.images:Png" mimetype="image/png"/> diff -r 854e160efc7bd2b42eeb847057214bc92cec809f -r 2dd7594c6adaabbc872c287cfa1c1eb617026a03 lib/galaxy/datatypes/converters/encodepeak_to_bgzip_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/encodepeak_to_bgzip_converter.xml @@ -0,0 +1,19 @@ +<tool id="CONVERTER_encodepeak_to_bgzip_0" name="Convert ENCODEPeak to BGZIP" version="1.0.0" hidden="true"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">bgzip.py + -c ${input1.metadata.chromCol} + -s ${input1.metadata.startCol} + -e ${input1.metadata.endCol} + $input1 $output1 + </command> + <inputs> + <page> + <param format="ENCODEPeak" name="input1" type="data" label="Choose ENCODEPeak file"/> + </page> + </inputs> + <outputs> + <data format="bgzip" name="output1"/> + </outputs> + <help> + </help> +</tool> diff -r 854e160efc7bd2b42eeb847057214bc92cec809f -r 2dd7594c6adaabbc872c287cfa1c1eb617026a03 lib/galaxy/datatypes/converters/encodepeak_to_summary_tree_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/encodepeak_to_summary_tree_converter.xml @@ -0,0 +1,20 @@ +<tool id="CONVERTER_encodepeak_to_summary_tree_0" name="Convert ENCODEPeak to Summary Tree" version="1.0.0" hidden="true"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">interval_to_summary_tree_converter.py + -c ${input1.metadata.chromCol} + -s ${input1.metadata.startCol} + -e ${input1.metadata.endCol} + $input1 $output1 + </command> + + <inputs> + <page> + <param format="ENCODEPeak" name="input1" type="data" label="Choose ENCODEPeak file"/> + </page> + </inputs> + <outputs> + <data format="summary_tree" name="output1"/> + </outputs> + <help> + </help> +</tool> diff -r 854e160efc7bd2b42eeb847057214bc92cec809f -r 2dd7594c6adaabbc872c287cfa1c1eb617026a03 lib/galaxy/datatypes/converters/encodepeak_to_tabix_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/encodepeak_to_tabix_converter.xml @@ -0,0 +1,20 @@ +<tool id="CONVERTER_encodepeak_to_tabix_0" name="Convert ENCODEPeak to tabix" version="1.0.0" hidden="true"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">interval_to_tabix_converter.py + -c ${input1.metadata.chromCol} + -s ${input1.metadata.startCol} + -e ${input1.metadata.endCol} + $input1 $bgzip $output1 + </command> + <inputs> + <page> + <param format="encodepeak" name="input1" type="data" label="Choose ENCODEPeak file"/> + <param format="bgzip" name="bgzip" type="data" label="BGZIP file"/> + </page> + </inputs> + <outputs> + <data format="tabix" name="output1"/> + </outputs> + <help> + </help> +</tool> diff -r 854e160efc7bd2b42eeb847057214bc92cec809f -r 2dd7594c6adaabbc872c287cfa1c1eb617026a03 lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py +++ b/lib/galaxy/datatypes/interval.py @@ -1263,6 +1263,36 @@ except: return False return True + +class ENCODEPeak( Interval ): + ''' + Human ENCODE peak format. There are both broad and narrow peak formats. + Formats are very similar; narrow peak has an additional column, though. + + Broad peak ( http://genome.ucsc.edu/FAQ/FAQformat#format13 ): + This format is used to provide called regions of signal enrichment based + on pooled, normalized (interpreted) data. It is a BED 6+3 format. + + Narrow peak http://genome.ucsc.edu/FAQ/FAQformat#format12 and : + This format is used to provide called peaks of signal enrichment based on + pooled, normalized (interpreted) data. It is a BED6+4 format. + ''' + + file_ext = "encodepeak" + column_names = [ 'Chrom', 'Start', 'End', 'Name', 'Score', 'Strand', 'SignalValue', 'pValue', 'qValue', 'Peak' ] + + """Add metadata elements""" + MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) + MetadataElement( name="startCol", default=2, desc="Start column", param=metadata.ColumnParameter ) + MetadataElement( name="endCol", default=3, desc="End column", param=metadata.ColumnParameter ) + MetadataElement( name="strandCol", desc="Strand column (click box & select)", param=metadata.ColumnParameter, optional=True, no_value=0 ) + MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) + + def sniff( self, filename ): + return Exception( "Unimplemented Function" ) + + def get_track_type( self ): + return "FeatureTrack", {"data": "tabix", "index": "summary_tree"} if __name__ == '__main__': import doctest, sys diff -r 854e160efc7bd2b42eeb847057214bc92cec809f -r 2dd7594c6adaabbc872c287cfa1c1eb617026a03 lib/galaxy/visualization/tracks/data_providers.py --- a/lib/galaxy/visualization/tracks/data_providers.py +++ b/lib/galaxy/visualization/tracks/data_providers.py @@ -19,7 +19,7 @@ from galaxy.visualization.tracks.summary import * import galaxy_utils.sequence.vcf from galaxy.datatypes.tabular import Vcf -from galaxy.datatypes.interval import Bed, Gff, Gtf +from galaxy.datatypes.interval import Bed, Gff, Gtf, ENCODEPeak from pysam import csamtools, ctabix @@ -288,7 +288,7 @@ Provides """ # Build data to return. Payload format is: - # [ <guid/offset>, <start>, <end>, <name>, <score>, <strand>, <thick_start>, + # [ <guid/offset>, <start>, <end>, <name>, <strand>, <thick_start>, # <thick_end>, <blocks> ] # # First three entries are mandatory, others are optional. @@ -965,6 +965,126 @@ return { 'data': results, 'message': message } + +# +# -- ENCODE Peak data providers. +# + +class ENCODEPeakDataProvider( TracksDataProvider ): + """ + Abstract class that processes ENCODEPeak data from native format to payload format. + + Payload format: [ uid (offset), start, end, name, strand, thick_start, thick_end, blocks ] + """ + + def get_iterator( self, chrom, start, end ): + raise "Unimplemented Method" + + def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ): + """ + Provides + """ + + ## FIXMEs: + # (1) should be able to unify some of this code with BedDataProvider.process_data + # (2) are optional number of parameters supported? + + # Build data to return. Payload format is: + # [ <guid/offset>, <start>, <end>, <name>, <strand>, <thick_start>, + # <thick_end>, <blocks> ] + # + # First three entries are mandatory, others are optional. + # + no_detail = ( "no_detail" in kwargs ) + rval = [] + message = None + for count, line in enumerate( iterator ): + if count < start_val: + continue + if max_vals and count-start_val >= max_vals: + message = ERROR_MAX_VALS % ( max_vals, "features" ) + break + + feature = line.split() + length = len( feature ) + + # Feature initialization. + payload = [ + # GUID is just a hash of the line + hash( line ), + # Add start, end. + int( feature[1] ), + int( feature[2] ) + ] + + if no_detail: + rval.append( payload ) + continue + + # Extend with additional data. + payload.extend( [ + # Add name, strand. + feature[3], + feature[5], + # Thick start, end are feature start, end for now. + int( feature[1] ), + int( feature[2] ), + # No blocks. + None, + # Filtering data: Score, signalValue, pValue, qValue. + float( feature[4] ), + float( feature[6] ), + float( feature[7] ), + float( feature[8] ) + ] ) + + rval.append( payload ) + + return { 'data': rval, 'message': message } + + def write_data_to_file( self, chrom, start, end, filename ): + iterator = self.get_iterator( chrom, start, end ) + out = open( filename, "w" ) + for line in iterator: + out.write( "%s\n" % line ) + out.close() + +class ENCODEPeakTabixDataProvider( TabixDataProvider, ENCODEPeakDataProvider ): + """ + Provides data from an ENCODEPeak dataset indexed via tabix. + """ + + def get_filters( self ): + """ + Returns filters for dataset. + """ + # HACK: first 8 fields are for drawing, so start filter column index at 9. + filter_col = 8 + filters = [] + filters.append( { 'name': 'Score', + 'type': 'number', + 'index': filter_col, + 'tool_id': 'Filter1', + 'tool_exp_name': 'c6' } ) + filter_col += 1 + filters.append( { 'name': 'Signal Value', + 'type': 'number', + 'index': filter_col, + 'tool_id': 'Filter1', + 'tool_exp_name': 'c7' } ) + filter_col += 1 + filters.append( { 'name': 'pValue', + 'type': 'number', + 'index': filter_col, + 'tool_id': 'Filter1', + 'tool_exp_name': 'c8' } ) + filter_col += 1 + filters.append( { 'name': 'qValue', + 'type': 'number', + 'index': filter_col, + 'tool_id': 'Filter1', + 'tool_exp_name': 'c9' } ) + return filters # # -- Helper methods. -- @@ -974,7 +1094,7 @@ # type. First key is converted dataset type; if result is another dict, second key # is original dataset type. TODO: This needs to be more flexible. dataset_type_name_to_data_provider = { - "tabix": { Vcf: VcfTabixDataProvider, Bed: BedTabixDataProvider, "default" : TabixDataProvider }, + "tabix": { Vcf: VcfTabixDataProvider, Bed: BedTabixDataProvider, ENCODEPeak: ENCODEPeakTabixDataProvider, "default" : TabixDataProvider }, "interval_index": IntervalIndexDataProvider, "bai": BamDataProvider, "bam": SamDataProvider, Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.