Ira, Thanks for getting a sqlite datatype into galaxy. I was wanting to subclass a sqlite datatype for another application: cistrome CEAS I had made a sqlite datatype that captured a little metadata that I thought could be useful for the display peek, and perhaps for filtering. Is this worth considering? The peek for a CEAS DB would include table names, column names, and row count: SQLite Database GeneTable (chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2) [24892] GenomeBGP (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7] GenomeBGS (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7] GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7] GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7] $ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py diff -r f002131cb905 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Fri Jul 25 12:01:34 2014 -0400 +++ b/lib/galaxy/datatypes/binary.py Fri Jul 25 12:13:05 2014 -0500 @@ -20,7 +20,7 @@ from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE -from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.metadata import MetadataElement,ListParameter,DictParameter from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * import dataproviders @@ -550,8 +550,36 @@ @dataproviders.decorators.has_dataproviders class SQlite ( Binary ): + """Class describing a Sqlite database """ + MetadataElement( name="tables", default=[], param=ListParameter, desc="Database Tables", readonly=True, visible=True, no_value=[] ) + MetadataElement( name="table_columns", default={}, param=DictParameter, desc="Database Table Columns", readonly=True, visible=True, no_value={} ) + MetadataElement( name="table_row_count", default={}, param=DictParameter, desc="Database Table Row Count", readonly=True, visible=True, no_value={} ) file_ext = "sqlite" + def init_meta( self, dataset, copy_from=None ): + Binary.init_meta( self, dataset, copy_from=copy_from ) + + def set_meta( self, dataset, overwrite = True, **kwd ): + try: + tables = [] + columns = dict() + rowcounts = dict() + conn = sqlite3.connect(dataset.file_name) + c = conn.cursor() + tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table' ORDER BY name" + rslt = c.execute(tables_query).fetchall(); + for table,sql in rslt: + tables.append(table) + columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',') + for table in tables: + row_query = "SELECT count(*) FROM %s" % table + rowcounts[table] = c.execute(row_query).fetchone()[0]; + dataset.metadata.tables = tables + dataset.metadata.table_columns = columns + dataset.metadata.table_row_count = rowcounts + except Exception, exc: + pass + # Connects and runs a query that should work on any real database # If the file is not sqlite, an exception will be thrown and the sniffer will return false def sniff( self, filename ): @@ -567,7 +595,14 @@ def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: - dataset.peek = "SQLite Database" + lines = ['SQLite Database'] + if dataset.metadata.tables: + for table in dataset.metadata.tables: + try: + lines.append('%s (%s) [%s]' % (table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table])) + except: + continue + dataset.peek = '\n'.join(lines) dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' Thanks, JJ -- James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota
Hi JJ, I think this is a great idea. Do you want to make the pull request for this? Or I can merge it into my fork if you like. Cheers Ira On 26 Jul 2014, at 3:22 am, Jim Johnson <johns198@umn.edu> wrote:
Ira,
Thanks for getting a sqlite datatype into galaxy. I was wanting to subclass a sqlite datatype for another application: cistrome CEAS
I had made a sqlite datatype that captured a little metadata that I thought could be useful for the display peek, and perhaps for filtering. Is this worth considering?
The peek for a CEAS DB would include table names, column names, and row count: SQLite Database GeneTable (chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2) [24892] GenomeBGP (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7] GenomeBGS (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7] GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7] GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]
$ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py diff -r f002131cb905 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Fri Jul 25 12:01:34 2014 -0400 +++ b/lib/galaxy/datatypes/binary.py Fri Jul 25 12:13:05 2014 -0500 @@ -20,7 +20,7 @@
from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
-from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.metadata import MetadataElement,ListParameter,DictParameter from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * import dataproviders @@ -550,8 +550,36 @@
@dataproviders.decorators.has_dataproviders class SQlite ( Binary ): + """Class describing a Sqlite database """ + MetadataElement( name="tables", default=[], param=ListParameter, desc="Database Tables", readonly=True, visible=True, no_value=[] ) + MetadataElement( name="table_columns", default={}, param=DictParameter, desc="Database Table Columns", readonly=True, visible=True, no_value={} ) + MetadataElement( name="table_row_count", default={}, param=DictParameter, desc="Database Table Row Count", readonly=True, visible=True, no_value={} ) file_ext = "sqlite"
+ def init_meta( self, dataset, copy_from=None ): + Binary.init_meta( self, dataset, copy_from=copy_from ) + + def set_meta( self, dataset, overwrite = True, **kwd ): + try: + tables = [] + columns = dict() + rowcounts = dict() + conn = sqlite3.connect(dataset.file_name) + c = conn.cursor() + tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table' ORDER BY name" + rslt = c.execute(tables_query).fetchall(); + for table,sql in rslt: + tables.append(table) + columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',') + for table in tables: + row_query = "SELECT count(*) FROM %s" % table + rowcounts[table] = c.execute(row_query).fetchone()[0]; + dataset.metadata.tables = tables + dataset.metadata.table_columns = columns + dataset.metadata.table_row_count = rowcounts + except Exception, exc: + pass + # Connects and runs a query that should work on any real database # If the file is not sqlite, an exception will be thrown and the sniffer will return false def sniff( self, filename ): @@ -567,7 +595,14 @@
def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: - dataset.peek = "SQLite Database" + lines = ['SQLite Database'] + if dataset.metadata.tables: + for table in dataset.metadata.tables: + try: + lines.append('%s (%s) [%s]' % (table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table])) + except: + continue + dataset.peek = '\n'.join(lines) dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist'
Thanks,
JJ -- James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota
participants (2)
-
Ira Cooke
-
Jim Johnson