Hi JJ,
I think this is a great idea.
Do you want to make the pull request for this? Or I can merge it into my fork if you like.
Cheers Ira
On 26 Jul 2014, at 3:22 am, Jim Johnson johns198@umn.edu wrote:
Ira,
Thanks for getting a sqlite datatype into galaxy. I was wanting to subclass a sqlite datatype for another application: cistrome CEAS
I had made a sqlite datatype that captured a little metadata that I thought could be useful for the display peek, and perhaps for filtering. Is this worth considering?
The peek for a CEAS DB would include table names, column names, and row count: SQLite Database GeneTable (chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2) [24892] GenomeBGP (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7] GenomeBGS (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7] GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7] GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]
$ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py diff -r f002131cb905 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Fri Jul 25 12:01:34 2014 -0400 +++ b/lib/galaxy/datatypes/binary.py Fri Jul 25 12:13:05 2014 -0500 @@ -20,7 +20,7 @@
from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
-from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.metadata import MetadataElement,ListParameter,DictParameter from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * import dataproviders @@ -550,8 +550,36 @@
@dataproviders.decorators.has_dataproviders class SQlite ( Binary ):
"""Class describing a Sqlite database """
MetadataElement( name="tables", default=[], param=ListParameter, desc="Database Tables", readonly=True, visible=True, no_value=[] )
MetadataElement( name="table_columns", default={}, param=DictParameter, desc="Database Table Columns", readonly=True, visible=True, no_value={} )
MetadataElement( name="table_row_count", default={}, param=DictParameter, desc="Database Table Row Count", readonly=True, visible=True, no_value={} ) file_ext = "sqlite"
def init_meta( self, dataset, copy_from=None ):
Binary.init_meta( self, dataset, copy_from=copy_from )
def set_meta( self, dataset, overwrite = True, **kwd ):
try:
tables = []
columns = dict()
rowcounts = dict()
conn = sqlite3.connect(dataset.file_name)
c = conn.cursor()
tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table' ORDER BY name"
rslt = c.execute(tables_query).fetchall();
for table,sql in rslt:
tables.append(table)
columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',')
for table in tables:
row_query = "SELECT count(*) FROM %s" % table
rowcounts[table] = c.execute(row_query).fetchone()[0];
dataset.metadata.tables = tables
dataset.metadata.table_columns = columns
dataset.metadata.table_row_count = rowcounts
except Exception, exc:
pass
# Connects and runs a query that should work on any real database # If the file is not sqlite, an exception will be thrown and the sniffer will return false def sniff( self, filename ):
@@ -567,7 +595,14 @@
def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged:
dataset.peek = "SQLite Database"
lines = ['SQLite Database']
if dataset.metadata.tables:
for table in dataset.metadata.tables:
try:
lines.append('%s (%s) [%s]' % (table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table]))
except:
continue
dataset.peek = '\n'.join(lines) dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist'
Thanks,
JJ
James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota