details: http://www.bx.psu.edu/hg/galaxy/rev/cf461b1d6659
changeset: 1659:cf461b1d6659
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Fri Dec 12 09:43:42 2008 -0500
description:
New report showing last login for users, a few bug fixes and some code cleanup for the reports as well.
9 file(s) affected in this change:
lib/galaxy/model/mapping.py
lib/galaxy/webapps/reports/app.py
lib/galaxy/webapps/reports/base/controller.py
lib/galaxy/webapps/reports/controllers/system.py
lib/galaxy/webapps/reports/controllers/users.py
lib/galaxy/webapps/reports/templates/index.mako
lib/galaxy/webapps/reports/templates/jobs_specified_date_in_error.mako
lib/galaxy/webapps/reports/templates/users.mako
lib/galaxy/webapps/reports/templates/users_last_access_date.mako
diffs (352 lines):
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/model/mapping.py
--- a/lib/galaxy/model/mapping.py Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/model/mapping.py Fri Dec 12 09:43:42 2008 -0500
@@ -299,6 +299,7 @@
assign_mapper( context, User, User.table,
properties=dict( histories=relation( History, backref="user",
order_by=desc(History.table.c.update_time) ),
+ galaxy_sessions=relation( GalaxySession, order_by=desc( GalaxySession.table.c.update_time ) ),
stored_workflow_menu_entries=relation( StoredWorkflowMenuEntry, backref="user",
cascade="all, delete-orphan",
collection_class=ordering_list( 'order_index' ) )
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/app.py
--- a/lib/galaxy/webapps/reports/app.py Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/webapps/reports/app.py Fri Dec 12 09:43:42 2008 -0500
@@ -1,6 +1,5 @@
-import sys, os, atexit
+import sys, config
import galaxy.model
-import config
class UniverseApplication( object ):
"""Encapsulates the state of a Universe application"""
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/base/controller.py
--- a/lib/galaxy/webapps/reports/base/controller.py Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/webapps/reports/base/controller.py Fri Dec 12 09:43:42 2008 -0500
@@ -2,7 +2,6 @@
import os, time, logging
# Pieces of Galaxy to make global in every controller
from galaxy import web, util
-from Cheetah.Template import Template
log = logging.getLogger( __name__ )
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/controllers/system.py
--- a/lib/galaxy/webapps/reports/controllers/system.py Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/webapps/reports/controllers/system.py Fri Dec 12 09:43:42 2008 -0500
@@ -37,7 +37,15 @@
deleted_histories_days, msg = self.deleted_histories( **kwd )
elif kwd['action'] == "deleted_datasets":
deleted_datasets_days, msg = self.deleted_datasets( **kwd )
- return trans.fill_template( 'system.mako', file_path=file_path, disk_usage=disk_usage, datasets=datasets, file_size_str=file_size_str, userless_histories_days=userless_histories_days, deleted_histories_days=deleted_histories_days, deleted_datasets_days=deleted_datasets_days, msg=msg )
+ return trans.fill_template( 'system.mako',
+ file_path=file_path,
+ disk_usage=disk_usage,
+ datasets=datasets,
+ file_size_str=file_size_str,
+ userless_histories_days=userless_histories_days,
+ deleted_histories_days=deleted_histories_days,
+ deleted_datasets_days=deleted_datasets_days,
+ msg=msg )
def userless_histories( self, **kwd ):
"""The number of userless histories and associated datasets that have not been updated for the specified number of days."""
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/controllers/users.py
--- a/lib/galaxy/webapps/reports/controllers/users.py Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/webapps/reports/controllers/users.py Fri Dec 12 09:43:42 2008 -0500
@@ -1,7 +1,9 @@
-from datetime import *
+from datetime import datetime, timedelta
+from time import strftime
import calendar
from galaxy.webapps.reports.base.controller import *
import galaxy.model
+from galaxy.model.orm import *
import pkg_resources
pkg_resources.require( "SQLAlchemy >= 0.4" )
import sqlalchemy as sa
@@ -10,23 +12,20 @@
class Users( BaseController ):
@web.expose
+ def index( self, trans, **kwd ):
+ params = util.Params( kwd )
+ msg = params.get( 'msg', '' )
+ return trans.fill_template( 'users.mako', msg=msg )
+ @web.expose
def registered_users( self, trans, **kwd ):
params = util.Params( kwd )
- msg = ''
- engine = galaxy.model.mapping.metadata.engine
- s = """
- SELECT
- count(id) AS num_users
- FROM
- galaxy_user
- """
- rows = engine.text( s ).execute().fetchall()
- num_users = rows[0].num_users
+ msg = params.get( 'msg', '' )
+ num_users = galaxy.model.User.query().count()
return trans.fill_template( 'registered_users.mako', num_users=num_users, msg=msg )
@web.expose
def registered_users_per_month( self, trans, **kwd ):
params = util.Params( kwd )
- msg = ''
+ msg = params.get( 'msg', '' )
q = sa.select( ( sa.func.date_trunc( 'month', sa.func.date( galaxy.model.User.table.c.create_time ) ).label( 'date' ),
sa.func.count( galaxy.model.User.table.c.id ).label( 'num_users' ) ),
from_obj = [ galaxy.model.User.table ],
@@ -42,7 +41,7 @@
@web.expose
def specified_month( self, trans, **kwd ):
params = util.Params( kwd )
- msg = ''
+ msg = params.get( 'msg', '' )
year, month = map( int, params.get( 'month', datetime.utcnow().strftime( "%Y-%m" ) ).split( "-" ) )
start_date = date( year, month, 1 )
end_date = start_date + timedelta( days=calendar.monthrange( year, month )[1] )
@@ -70,7 +69,7 @@
@web.expose
def specified_date( self, trans, **kwd ):
params = util.Params( kwd )
- msg = ''
+ msg = params.get( 'msg', '' )
year, month, day = map( int, params.get( 'specified_date', datetime.utcnow().strftime( "%Y-%m-%d" ) ).split( "-" ) )
start_date = date( year, month, day )
end_date = start_date + timedelta( days=1 )
@@ -95,3 +94,21 @@
day_of_month=day_of_month,
users=users,
msg=msg )
+ @web.expose
+ def last_access_date( self, trans, **kwd ):
+ params = util.Params( kwd )
+ msg = params.get( 'msg', '' )
+ not_logged_in_for_days = params.get( 'not_logged_in_for_days', 0 )
+ if not not_logged_in_for_days:
+ not_logged_in_for_days = 0
+ cutoff_time = datetime.utcnow() - timedelta( days=int( not_logged_in_for_days ) )
+ now = strftime( "%Y-%m-%d %H:%M:%S" )
+ users = []
+ for user in galaxy.model.User.query().order_by( galaxy.model.User.table.c.email ).all():
+ last_galaxy_session = user.galaxy_sessions[ 0 ]
+ if last_galaxy_session.update_time < cutoff_time:
+ users.append( ( user.email, last_galaxy_session.update_time.strftime( "%Y-%m-%d" ) ) )
+ return trans.fill_template( 'users_last_access_date.mako',
+ users=users,
+ not_logged_in_for_days=not_logged_in_for_days,
+ msg=msg )
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/templates/index.mako
--- a/lib/galaxy/webapps/reports/templates/index.mako Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/webapps/reports/templates/index.mako Fri Dec 12 09:43:42 2008 -0500
@@ -1,29 +1,88 @@
<%inherit file="/base_panels.mako"/>
<%def name="main_body()">
- ## Display the available reports
- <div class="body">
- <h3 align="center">Galaxy Reports</h3>
- <table align="center" width="40%" class="colored">
- <tr><td><div class="reportTitle">Job Information</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='today_all' )}">Number of jobs today</a> - displays the total number of jobs for today</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='specified_month_all' )}">Number of jobs per day for current month</a> - displays days of the current month with the total number of jobs for each day</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='specified_month_in_error' )}">Number of jobs in error per day for current month</a> - displays days of the current month with the total number of jobs in error for each day</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='all_unfinished' )}">All unfinished jobs</a> - displays jobs that are currently in a "new" state, a "queued" state or a "running" state</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='per_month_all' )}">Number of jobs per month</a> - displays a list of months with the total number of jobs for each month</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='per_month_in_error' )}">Number of jobs in error per month</a> - displays a list of months with the total number of jobs in error for each month</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='per_user' )}">Number of jobs per user</a> - displays users sorted in descending order by the number of jobs they have submitted</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='per_tool' )}">Number of jobs per tool</a> - displays tools sorted in alphabetical order by tool id and the number of jobs created by the tool</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='jobs', action='per_domain' )}">Number of jobs per Internet domain</a> - displays the number of jobs that have been submitted per Internet domain</div></td></tr>
- </table>
- <br clear="left"/><br/><br/><br/>
- <table align="center" width="40%" class="colored">
- <tr><td><div class="reportTitle">User Information</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='users', action='registered_users' )}">Number of registered users</a> - displays the total number of registered users</div></td></tr>
- </table>
- <br clear="left"/><br/><br/><br/>
- <table align="center" width="40%" class="colored">
- <tr><td><div class="reportTitle">System Information</div></td></tr>
- <tr><td><div class="reportBody"><a href="${h.url_for( controller='system', action='index' )}">Disk space, old histories and datasets</a> - displays history and dataset information including disk space allocation where datasets are stored</div></td></tr>
- </table>
- </div>
+ ## Display the available reports
+ <div class="body">
+ <h3 align="center">Galaxy Reports</h3>
+ <table align="center" width="40%" class="colored">
+ <tr><td><div class="reportTitle">Job Information</div></td></tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='today_all' )}">Number of jobs today</a> - displays the total number of jobs for today
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='specified_month_all' )}">Number of jobs per day for current month</a> - displays days of the current month with the total number of jobs for each day
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='specified_month_in_error' )}">Number of jobs in error per day for current month</a> - displays days of the current month with the total number of jobs in error for each day
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='all_unfinished' )}">All unfinished jobs</a> - displays jobs that are currently in a "new" state, a "queued" state or a "running" state
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='per_month_all' )}">Number of jobs per month</a> - displays a list of months with the total number of jobs for each month
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='per_month_in_error' )}">Number of jobs in error per month</a> - displays a list of months with the total number of jobs in error for each month
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='per_user' )}">Number of jobs per user</a> - displays users sorted in descending order by the number of jobs they have submitted
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='jobs', action='per_tool' )}">Number of jobs per tool</a> - displays tools sorted in alphabetical order by tool id and the number of jobs created by the tool
+ </div>
+ </td>
+ </tr>
+ </table>
+ <br clear="left"/><br/><br/><br/>
+ <table align="center" width="40%" class="colored">
+ <tr><td><div class="reportTitle">User Information</div></td></tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='users', action='index' )}">Users</a> - displays information about registered users
+ </div>
+ </td>
+ </tr>
+ </table>
+ <br clear="left"/><br/><br/><br/>
+ <table align="center" width="40%" class="colored">
+ <tr><td><div class="reportTitle">System Information</div></td></tr>
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='system', action='index' )}">Disk space, old histories and datasets</a> - displays history and dataset information including disk space allocation where datasets are stored
+ </div>
+ </td>
+ </tr>
+ </table>
+ </div>
</%def>
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/templates/jobs_specified_date_in_error.mako
--- a/lib/galaxy/webapps/reports/templates/jobs_specified_date_in_error.mako Thu Dec 11 16:00:06 2008 -0500
+++ b/lib/galaxy/webapps/reports/templates/jobs_specified_date_in_error.mako Fri Dec 12 09:43:42 2008 -0500
@@ -2,7 +2,7 @@
<%def name="main_body()">
<div class="reportBody">
- <h3 align="center">All Jobs in Error for ${day_label}, ${month_label} ${day_of_month}, ${year_label} ( includes jobs deleted before finishing )</h3>
+ <h3 align="center">All Jobs in Error for ${day_label}, ${month_label} ${day_of_month}, ${year_label}</h3>
%if msg:
<table align="center" width="70%" class="border" cellpadding="5" cellspacing="5">
<tr><td class="ok_bgr">${msg}</td></tr>
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/templates/users.mako
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/webapps/reports/templates/users.mako Fri Dec 12 09:43:42 2008 -0500
@@ -0,0 +1,29 @@
+<%inherit file="/base_panels.mako"/>
+
+<%def name="main_body()">
+ <h3 align="center">Registered Users</h3>
+ %if msg:
+ <table align="center" width="70%" class="border" cellpadding="5" cellspacing="5">
+ <tr><td class="ok_bgr">${msg}</td></tr>
+ </table>
+ %endif
+ <table align="center" width="40%" class="colored">
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='users', action='registered_users' )}">Registered Users</a> - displays the number of registered users
+ </div>
+ </td>
+ </tr>
+ </table>
+ <br clear="left" /><br />
+ <table align="center" width="40%" class="colored">
+ <tr>
+ <td>
+ <div class="reportBody">
+ <a href="${h.url_for( controller='users', action='last_access_date' )}">Date of Last Login</a> - displays users sorted by date of last login
+ </div>
+ </td>
+ </tr>
+ </table>
+</%def>
diff -r 64506c9397e4 -r cf461b1d6659 lib/galaxy/webapps/reports/templates/users_last_access_date.mako
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/webapps/reports/templates/users_last_access_date.mako Fri Dec 12 09:43:42 2008 -0500
@@ -0,0 +1,46 @@
+<%inherit file="/base_panels.mako"/>
+
+<%def name="main_body()">
+ <h3 align="center">Date of Last Galaxy Login</h3>
+ <h4 align="center">Listed in descending order by access date ( oldest date first )</h4>
+ %if msg:
+ <table align="center" width="70%" class="border" cellpadding="5" cellspacing="5">
+ <tr><td class="ok_bgr">${msg}</td></tr>
+ </table>
+ %endif
+ <table align="center" width="70%" class="colored" cellpadding="5" cellspacing="5">
+ <tr>
+ <td>
+ <form method="post" controller="users" action="last_access_date">
+ <p>
+ Users that have not logged in to Galaxy for
+ <input type="textfield" value="${not_logged_in_for_days}" size="3" name="not_logged_in_for_days"> days.
+ <button name="action" value="not_logged_in_for_days">Go</button>
+ </p>
+ </form>
+ </td>
+ </tr>
+ </table>
+ <table align="center" width="70%" class="colored" cellpadding="5" cellspacing="5">
+ %if users:
+ <tr class="header">
+ <td>Email</td>
+ <td>Date of last Login</td>
+ </tr>
+ <% ctr = 0 %>
+ %for user in users:
+ %if ctr % 2 == 1:
+ <tr class="odd_row">
+ %else:
+ <tr class="tr">
+ %endif
+ <td>${user[0]}</td>
+ <td>${user[1]}</td>
+ </tr>
+ <% ctr += 1 %>
+ %endfor
+ %else:
+ <tr><td>All users have logged in to Galaxy within the past ${not_logged_in_for_days} days</td></tr>
+ %endif
+ </table>
+</%def>
details: http://www.bx.psu.edu/hg/galaxy/rev/64506c9397e4
changeset: 1658:64506c9397e4
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Thu Dec 11 16:00:06 2008 -0500
description:
Update Fasta.sniff()
Rules for sniffing as True:
We don't care about line length (other than empty lines).
The first non-empty line must start with '>' and the Very Next line.strip() must have sequence data and not be a header.
'sequence data' here is loosely defined as non-empty lines which do not start with '>'
This will cause Color Space FASTA (csfasta) to be detected as True (they are, after all, still FASTA files - they have a header line followed by sequence data)
Previously this method did some checking to determine if the sequence data had integers (presumably to differentiate between fasta and csfasta)
This should be done through sniff order, where csfasta (currently has a null sniff function) is detected for first (stricter definition) followed sometime after by fasta
We will only check that the first purported sequence is correctly formatted.
1 file(s) affected in this change:
lib/galaxy/datatypes/sequence.py
diffs (83 lines):
diff -r 5c3736861e37 -r 64506c9397e4 lib/galaxy/datatypes/sequence.py
--- a/lib/galaxy/datatypes/sequence.py Thu Dec 11 15:09:37 2008 -0500
+++ b/lib/galaxy/datatypes/sequence.py Thu Dec 11 16:00:06 2008 -0500
@@ -34,7 +34,7 @@
dataset.peek = data.get_file_peek( dataset.file_name )
dataset.blurb = data.nice_size( dataset.get_size() )
- def sniff(self, filename):
+ def sniff( self, filename ):
"""
Determines whether the file is in fasta format
@@ -42,7 +42,16 @@
The first character of the description line is a greater-than (">") symbol in the first column.
All lines should be shorter than 80 charcters
- For complete details see http://www.g2l.bio.uni-goettingen.de/blast/fastades.html
+ For complete details see http://www.ncbi.nlm.nih.gov/blast/fasta.shtml
+
+ Rules for sniffing as True:
+ We don't care about line length (other than empty lines).
+ The first non-empty line must start with '>' and the Very Next line.strip() must have sequence data and not be a header.
+ 'sequence data' here is loosely defined as non-empty lines which do not start with '>'
+ This will cause Color Space FASTA (csfasta) to be detected as True (they are, after all, still FASTA files - they have a header line followed by sequence data)
+ Previously this method did some checking to determine if the sequence data had integers (presumably to differentiate between fasta and csfasta)
+ This should be done through sniff order, where csfasta (currently has a null sniff function) is detected for first (stricter definition) followed sometime after by fasta
+ We will only check that the first purported sequence is correctly formatted.
>>> fname = get_test_fname( 'sequence.maf' )
>>> Fasta().sniff( fname )
@@ -51,34 +60,26 @@
>>> Fasta().sniff( fname )
True
"""
- headers = get_headers( filename, None )
- data_found = False
+
try:
- if len(headers) > 1 and headers[0][0] and headers[0][0][0] == ">":
- for i, l in enumerate( headers ):
- line = l[0]
- if i < 1:
- continue
- if line:
- data_found = True
- try:
- int( line[0] )
- return False
- except:
- try:
- elems = line.split()
- int( elems[0] )
- return False
- except:
- return True
- else:
- return False
- if data_found:
- return True
- else:
- return False
+ fh = open( filename )
+ while True:
+ line = fh.readline()
+ if not line:
+ break #EOF
+ line = line.strip()
+ if line: #first non-empty line
+ if line.startswith( '>' ):
+ #The next line.strip() must not be '', nor startwith '>'
+ line = fh.readline().strip()
+ if line == '' or line.startswith( '>' ):
+ break
+ return True
+ else:
+ break #we found a non-empty line, but its not a fasta header
except:
- return False
+ pass
+ return False
class csFasta( Sequence ):
""" Class representing the SOLID Color-Space sequence ( csfasta ) """
details: http://www.bx.psu.edu/hg/galaxy/rev/9a5bfe13e47b
changeset: 1656:9a5bfe13e47b
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Thu Dec 11 12:18:00 2008 -0500
description:
Fix a development problem that I inadvertently left in the code.
1 file(s) affected in this change:
lib/galaxy/web/controllers/dataset.py
diffs (13 lines):
diff -r b1514bd5244e -r 9a5bfe13e47b lib/galaxy/web/controllers/dataset.py
--- a/lib/galaxy/web/controllers/dataset.py Tue Dec 09 15:51:12 2008 -0500
+++ b/lib/galaxy/web/controllers/dataset.py Thu Dec 11 12:18:00 2008 -0500
@@ -60,8 +60,7 @@
smtp_server = trans.app.config.smtp_server
if smtp_server is None:
return trans.show_error_message( "Sorry, mail is not configured for this galaxy instance" )
- #to_address = trans.app.config.error_email_to
- to_address = 'greg(a)bx.psu.edu'
+ to_address = trans.app.config.error_email_to
if to_address is None:
return trans.show_error_message( "Sorry, error reporting has been disabled for this galaxy instance" )
# Get the dataset and associated job
details: http://www.bx.psu.edu/hg/galaxy/rev/f77ec6315c7c
changeset: 1654:f77ec6315c7c
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Tue Dec 09 14:53:10 2008 -0500
description:
Update Tabular.set_meta() to use column types as guessed between rows, when column data is missing. Also better handling of the presumed header line (first line is treated as a header, but information is retained and used if needed, whereas this line would be previously tossed out.)
2 file(s) affected in this change:
lib/galaxy/datatypes/interval.py
lib/galaxy/datatypes/tabular.py
diffs (212 lines):
diff -r 06c63a161985 -r f77ec6315c7c lib/galaxy/datatypes/interval.py
--- a/lib/galaxy/datatypes/interval.py Fri Dec 05 11:39:38 2008 -0500
+++ b/lib/galaxy/datatypes/interval.py Tue Dec 09 14:53:10 2008 -0500
@@ -758,6 +758,9 @@
"""Initialize interval datatype, by adding UCSC display app"""
Tabular.__init__(self, **kwd)
self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' )
+ def set_readonly_meta( self, dataset, skip=1, **kwd ):
+ """Resets the values of readonly metadata elements."""
+ Tabular.set_readonly_meta( self, dataset, skip = skip, **kwd )
def set_meta( self, dataset, overwrite = True, **kwd ):
Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 )
def display_peek( self, dataset ):
@@ -869,7 +872,11 @@
"""Initialize datatype, by adding GBrowse display app"""
Tabular.__init__(self, **kwd)
self.add_display_app ('elegans', 'display in GBrowse', 'as_gbrowse_display_file', 'gbrowse_links' )
-
+
+ def set_readonly_meta( self, dataset, skip=1, **kwd ):
+ """Resets the values of readonly metadata elements."""
+ Tabular.set_readonly_meta( self, dataset, skip = skip, **kwd )
+
def set_meta( self, dataset, overwrite = True, **kwd ):
Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 )
diff -r 06c63a161985 -r f77ec6315c7c lib/galaxy/datatypes/tabular.py
--- a/lib/galaxy/datatypes/tabular.py Fri Dec 05 11:39:38 2008 -0500
+++ b/lib/galaxy/datatypes/tabular.py Tue Dec 09 14:53:10 2008 -0500
@@ -23,75 +23,125 @@
def init_meta( self, dataset, copy_from=None ):
data.Text.init_meta( self, dataset, copy_from=copy_from )
- def set_readonly_meta( self, dataset, skip=1, **kwd ):
+ def set_readonly_meta( self, dataset, skip=None, **kwd ):
"""Resets the values of readonly metadata elements."""
Tabular.set_meta( self, dataset, overwrite = True, skip = skip )
- def set_meta( self, dataset, overwrite = True, skip = 1, **kwd ):
+ def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
"""
Tries to determine the number of columns as well as those columns
that contain numerical values in the dataset. A skip parameter is
used because various tabular data types reuse this function, and
their data type classes are responsible to determine how many invalid
- comment lines should be skipped.
+ comment lines should be skipped. Using None for skip will cause skip
+ to be zero, but the first line will be processed as a header.
"""
#we treat 'overwrite' as always True (we always want to set tabular metadata when called)
+ #if a tabular file has no data, it will have one column of type str
+
+ num_check_lines = 100 #we will only check up to this many lines into the file
+ requested_skip = skip #store original skip value to check with later
+ if skip is None:
+ skip = 0
+
+ column_type_set_order = [ 'int', 'float', 'list', 'str' ] #Order to set column types in
+ default_column_type = column_type_set_order[-1] # Default column type is lowest in list
+ column_type_compare_order = list( column_type_set_order ) #Order to compare column types
+ column_type_compare_order.reverse()
+ def type_overrules_type( column_type1, column_type2 ):
+ if column_type1 is None or column_type1 == column_type2:
+ return False
+ if column_type2 is None:
+ return True
+ for column_type in column_type_compare_order:
+ if column_type1 == column_type:
+ return True
+ if column_type2 == column_type:
+ return False
+ #neither column type was found in our ordered list, this cannot happen
+ raise "Tried to compare unknown column types"
+ def is_int( column_text ):
+ try:
+ int( column_text )
+ return True
+ except:
+ return False
+ def is_float( column_text ):
+ try:
+ float( column_text )
+ return True
+ except:
+ if column_text.strip().lower() == 'na':
+ return True #na is special cased to be a float
+ return False
+ def is_list( column_text ):
+ return "," in column_text
+ def is_str( column_text ):
+ #anything, except an empty string, is True
+ if column_text == "":
+ return False
+ return True
+ is_column_type = {} #Dict to store column type string to checking function
+ for column_type in column_type_set_order:
+ is_column_type[column_type] = locals()[ "is_%s" % ( column_type ) ]
+ def guess_column_type( column_text ):
+ for column_type in column_type_set_order:
+ if is_column_type[column_type]( column_text ):
+ return column_type
+ return None
+
+ column_types = []
+ first_line_column_types = [default_column_type] # default value is one column of type str
if dataset.has_data():
- column_types = []
-
+ #NOTE: if skip > num_check_lines, we won't detect any metadata, and will use default
for i, line in enumerate( file ( dataset.file_name ) ):
- if i < skip:
+ line = line.rstrip('\r\n')
+ if i < skip or not line or line.startswith( '#' ):
continue
- line = line.rstrip('\r\n')
- if line and not line.startswith( '#' ):
- elems = line.split( '\t' )
- elems_len = len( elems )
- if elems_len > 0:
- # Set the columns metadata attribute
- if elems_len != dataset.metadata.columns:
- dataset.metadata.columns = elems_len
- # Set the column_types metadata attribute
- for col in range( 0, elems_len ):
- col_type = None
- val = elems[ col ]
- if not val:
- if i == 100:
- # We're about to end our loop, so default col_type to 'str'
- col_type = 'str'
- else:
- # Missing a column value, so go to the next line
- column_types = []
- break
- if not col_type and val.find( '.' ) < 0:
- try:
- int( val )
- col_type = 'int'
- except:
- pass
- if not col_type:
- try:
- float( val )
- col_type = 'float'
- except:
- if val.strip().lower() == 'na':
- col_type = 'float'
- if not col_type:
- val_elems = val.split( ',' )
- if len( val_elems ) > 1:
- col_type = 'list'
- if not col_type:
- # All parameters are strings, so this will be the default
- col_type = 'str'
- if col_type:
- column_types.append( col_type )
- else:
- # Couldn't determine column type, so go to the next line
- column_types = []
- break
- if column_types:
- break
- if i > 100:
- break # Hopefully we never get here...
- dataset.metadata.column_types = column_types
+
+ fields = line.split( '\t' )
+ for field_count, field in enumerate( fields ):
+ if field_count >= len( column_types ): #found a previously unknown column, we append None
+ column_types.append( None )
+ column_type = guess_column_type( field )
+ if type_overrules_type( column_type, column_types[field_count] ):
+ column_types[field_count] = column_type
+
+ if i == 0 and requested_skip is None:
+ #this is our first line, people seem to like to upload files that have a header line, but do not start with '#' (i.e. all column types would then most likely be detected as str)
+ #we will assume that the first line is always a header (this was previous behavior - it was always skipped) when the requested skip is None
+ #we only use the data from the first line if we have no other data for a column
+ #this is far from perfect, as:
+ #1,2,3 1.1 2.2 qwerty
+ #0 0 1,2,3
+ #will detect as
+ #"column_types": ["int", "int", "float", "list"]
+ #instead of:
+ #"column_types": ["list", "float", "float", "str"] *** would seem to be the 'Truth' by manual observation that the first line should be included as data
+ #old method would have detected as:
+ #"column_types": ["int", "int", "str", "list"]
+ first_line_column_types = column_types
+ column_types = [ None for col in first_line_column_types ]
+ elif ( column_types and None not in column_types ) or i > num_check_lines:
+ #found and set all known columns, or we exceeded our max check lines
+ break
+
+ #we error on the larger number of columns
+ #first we pad our column_types by using data from first line
+ if len( first_line_column_types ) > len( column_types ):
+ for column_type in first_line_column_types[len( column_types ):]:
+ column_types.append( column_type )
+
+ #Now we fill any unknown (None) column_types with data from first line
+ for i in range( len( column_types ) ):
+ if column_types[i] is None:
+ if first_line_column_types[i] is None:
+ column_types[i] = default_column_type
+ else:
+ column_types[i] = first_line_column_types[i]
+
+ dataset.metadata.column_types = column_types
+ dataset.metadata.columns = len( column_types )
+
def make_html_table( self, dataset, skipchars=[] ):
"""Create HTML table, used for displaying peek"""
out = ['<table cellspacing="0" cellpadding="3">']