Ira,
Thanks for getting a sqlite datatype into galaxy. I was wanting
to subclass a sqlite datatype for another application: cistrome CEAS
I had made a sqlite datatype that captured a little metadata that
I thought could be useful for the display peek, and perhaps for
filtering.
Is this worth considering?
The peek for a CEAS DB would include table names, column names,
and row count:
SQLite Database
GeneTable
(chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2)
[24892]
GenomeBGP
(chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms)
[7]
GenomeBGS
(chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns)
[7]
GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms)
[7]
GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]
$ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py
diff -r f002131cb905 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py Fri Jul 25 12:01:34 2014
-0400
+++ b/lib/galaxy/datatypes/binary.py Fri Jul 25 12:13:05 2014
-0500
@@ -20,7 +20,7 @@
from bx.seq.twobit import TWOBIT_MAGIC_NUMBER,
TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
-from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.metadata import
MetadataElement,ListParameter,DictParameter
from galaxy.datatypes import metadata
from galaxy.datatypes.sniff import *
import dataproviders
@@ -550,8 +550,36 @@
@dataproviders.decorators.has_dataproviders
class SQlite ( Binary ):
+ """Class describing a Sqlite database """
+ MetadataElement( name="tables", default=[],
param=ListParameter, desc="Database Tables", readonly=True,
visible=True, no_value=[] )
+ MetadataElement( name="table_columns", default={},
param=DictParameter, desc="Database Table Columns", readonly=True,
visible=True, no_value={} )
+ MetadataElement( name="table_row_count", default={},
param=DictParameter, desc="Database Table Row Count",
readonly=True, visible=True, no_value={} )
file_ext = "sqlite"
+ def init_meta( self, dataset, copy_from=None ):
+ Binary.init_meta( self, dataset, copy_from=copy_from )
+
+ def set_meta( self, dataset, overwrite = True, **kwd ):
+ try:
+ tables = []
+ columns = dict()
+ rowcounts = dict()
+ conn = sqlite3.connect(dataset.file_name)
+ c = conn.cursor()
+ tables_query = "SELECT name,sql FROM sqlite_master
WHERE type='table' ORDER BY name"
+ rslt = c.execute(tables_query).fetchall();
+ for table,sql in rslt:
+ tables.append(table)
+ columns[table] =
re.sub('^.*\((.*)\)$','\\1',sql).split(',')
+ for table in tables:
+ row_query = "SELECT count(*) FROM %s" % table
+ rowcounts[table] =
c.execute(row_query).fetchone()[0];
+ dataset.metadata.tables = tables
+ dataset.metadata.table_columns = columns
+ dataset.metadata.table_row_count = rowcounts
+ except Exception, exc:
+ pass
+
# Connects and runs a query that should work on any real
database
# If the file is not sqlite, an exception will be thrown and
the sniffer will return false
def sniff( self, filename ):
@@ -567,7 +595,14 @@
def set_peek( self, dataset, is_multi_byte=False ):
if not dataset.dataset.purged:
- dataset.peek = "SQLite Database"
+ lines = ['SQLite Database']
+ if dataset.metadata.tables:
+ for table in dataset.metadata.tables:
+ try:
+ lines.append('%s (%s) [%s]' %
(table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table]))
+ except:
+ continue
+ dataset.peek = '\n'.join(lines)
dataset.blurb = data.nice_size( dataset.get_size() )
else:
dataset.peek = 'file does not exist'
Thanks,
JJ
--
James E. Johnson, Minnesota Supercomputing Institute, University
of Minnesota