galaxy-dev
Threads by month
- ----- 2026 -----
- January
- ----- 2025 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- 10009 discussions
details: http://www.bx.psu.edu/hg/galaxy/rev/9d67ae5ecda7
changeset: 2872:9d67ae5ecda7
user: Ross Lazarus <ross.lazarus(a)gmail.com>
date: Mon Oct 12 13:19:31 2009 -0400
description:
real replacement for stoopid symlink
1 file(s) affected in this change:
lib/galaxy/datatypes/genetics.py
diffs (671 lines):
diff -r 75a488a0cbc9 -r 9d67ae5ecda7 lib/galaxy/datatypes/genetics.py
--- a/lib/galaxy/datatypes/genetics.py Mon Oct 12 12:48:37 2009 -0400
+++ b/lib/galaxy/datatypes/genetics.py Mon Oct 12 13:19:31 2009 -0400
@@ -0,0 +1,667 @@
+"""
+rgenetics datatypes
+Use at your peril
+Ross Lazarus
+for the rgenetics and galaxy projects
+
+genome graphs datatypes derived from Interval datatypes
+genome graphs datasets have a header row with appropriate columnames
+The first column is always the marker - eg columname = rs, first row= rs12345 if the rows are snps
+subsequent row values are all numeric ! Will fail if any non numeric (eg '+' or 'NA') values
+ross lazarus for rgenetics
+august 20 2007
+"""
+
+import logging, os, sys, time, tempfile, shutil, string, glob
+import data
+from galaxy import util
+from cgi import escape
+import urllib
+from galaxy.web import url_for
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.images import Html
+
+gal_Log = logging.getLogger(__name__)
+
+class GenomeGraphs( Tabular ):
+ """Tab delimited data containing a marker id and any number of numeric values"""
+
+ """Add metadata elements"""
+ MetadataElement( name="markerCol", default=1, desc="Marker ID column", param=metadata.ColumnParameter )
+ MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True )
+ MetadataElement( name="column_types", default=[], desc="Column types", readonly=True, visible=False )
+ file_ext = 'gg'
+
+ def __init__(self, **kwd):
+ """Initialize gg datatype, by adding UCSC display apps"""
+ Tabular.__init__(self, **kwd)
+ self.add_display_app ( 'ucsc', 'Genome Graph', 'as_ucsc_display_file', 'ucsc_links' )
+
+ def set_peek( self, dataset ):
+ """Set the peek and blurb text"""
+ if not dataset.dataset.purged:
+ dataset.peek = data.get_file_peek( dataset.file_name )
+ dataset.blurb = util.commaify( str( data.get_line_count( dataset.file_name ) ) ) + " rows"
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+ def get_mime(self):
+ """Returns the mime type of the datatype"""
+ return 'application/vnd.msexcel'
+
+ def get_estimated_display_viewport( self, dataset ):
+ """Return a chrom, start, stop tuple for viewing a file."""
+ raise notImplemented
+
+ def as_ucsc_display_file( self, dataset, **kwd ):
+ """Returns file"""
+ return file(dataset.file_name,'r')
+
+ def ucsc_links( self, dataset, type, app, base_url ):
+ """ from the ever-helpful angie hinrichs angie(a)soe.ucsc.edu
+ a genome graphs call looks like this
+ http://genome.ucsc.edu/cgi-bin/hgGenome?clade=mammal&org=Human&db=hg18&hgGe…
+ &hgGenome_dataSetDescription=test&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess
+ &hgGenome_columnLabels=best%20guess&hgGenome_maxVal=&hgGenome_labelVals=
+ &hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=http://galaxy.esphealth.org/datasets/333/display/index
+ &hgGenome_doSubmitUpload=submit
+ Galaxy gives this for an interval file
+ http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg18&position=chr1:1-1000&hgt.cu…
+ http%3A%2F%2Fgalaxy.esphealth.org%2Fdisplay_as%3Fid%3D339%26display_app%3Ducsc
+ """
+ ret_val = []
+ ggtail = '&hgGenome_doSubmitUpload=submit'
+ if not dataset.dbkey:
+ dataset.dbkey = 'hg18' # punt!
+ ret_val = []
+ ggtail = '&hgGenome_doSubmitUpload=submit'
+ if not dataset.dbkey:
+ dataset.dbkey = 'hg18' # punt!
+ if dataset.has_data:
+ for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey):
+ if site_name in app.config.ucsc_display_sites:
+ site_url = site_url.replace('/hgTracks?','/hgGenome?') # for genome graphs
+ display_url = urllib.quote_plus( "%s/display_as?id=%i&display_app=%s" % (base_url, dataset.id, type) )
+ sl = ["%sdb=%s" % (site_url,dataset.dbkey ),]
+ sl.append("&hgGenome_dataSetName=%s&hgGenome_dataSetDescription=%s" % (dataset.name, 'GalaxyGG_data'))
+ sl.append("&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess")
+ sl.append("&hgGenome_columnLabels=first%20row&hgGenome_maxVal=&hgGenome_labelVals=")
+ sl.append("&hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=")
+ s = urllib.quote_plus( ''.join(sl) )
+ link = "%s%s%s" % (s, display_url, ggtail )
+ ret_val.append( (site_name, link) )
+ return ret_val
+
+
+ def validate( self, dataset ):
+ """Validate a gg file - all numeric after header row"""
+ errors = list()
+ infile = open(dataset.file_name, "r")
+ for i,row in enumerate(infile): #drop header
+ badvals = []
+ if i > 0:
+ ll = row.strip().split('\t')
+ for j,x in enumerate(ll):
+ try:
+ x = float(x)
+ except:
+ badval.append('col%d:%s' % (j+1,x))
+ if len(badvals) > 0:
+ errors.append('row %d, %s' % (' '.join(badvals)))
+ return errors
+
+ def repair_methods( self, dataset ):
+ """Return options for removing errors along with a description"""
+ return [("lines","Remove erroneous lines")]
+
+ def sniff(self,filename):
+ """
+ """
+ infile = open(dataset.file_name, "r")
+ header= infile.next() # header
+ badvals = []
+ for i,row in enumerate(infile[:10]): # sample first 10 rows
+ ll = row.strip().split('\t')
+ for j,x in enumerate(ll[1:]): # ignore first identifier col
+ try:
+ x = float(x)
+ except:
+ badval.append('col%d:%s' % (j+1,x))
+ if len(badvals) > 0:
+ return False
+ else:
+ return True
+
+class rgTabList(Tabular):
+ """ for sampleid and for featureid lists of exclusions or inclusions in the clean tool
+ featureid subsets on statistical criteria -> specialized display such as gg
+ """
+ file_ext = "rgTList"
+
+
+ def __init__(self, **kwd):
+ """Initialize featurelistt datatype"""
+ Tabular.__init__( self, **kwd )
+ self.column_names = []
+
+ def make_html_table( self, dataset, skipchars=[] ):
+ """Create HTML table, used for displaying peek"""
+ out = ['<table cellspacing="0" cellpadding="3">']
+ comments = []
+ try:
+ # Generate column header
+ out.append( '<tr>' )
+ for i, name in enumerate( self.column_names ):
+ out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) )
+ if dataset.metadata.columns - len( self.column_names ) > 0:
+ for i in range( len( self.column_names ), dataset.metadata.columns ):
+ out.append( '<th>%s</th>' % str( i+1 ) )
+ out.append( '</tr>' )
+ out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) )
+ out.append( '</table>' )
+ out = "".join( out )
+ except Exception, exc:
+ out = "Can't create peek %s" % exc
+ return out
+
+
+class rgSampleList(rgTabList):
+ """ for sampleid exclusions or inclusions in the clean tool
+ output from QC eg excess het, gender error, ibd pair member,eigen outlier,excess mendel errors,...
+ since they can be uploaded, should be flexible
+ but they are persistent at least
+ same infrastructure for expression?
+ """
+ file_ext = "rgSList"
+
+ def __init__(self, **kwd):
+ """Initialize samplelist datatype"""
+ rgTabList.__init__( self, **kwd )
+ self.column_names[0] = 'FID'
+ self.column_names[1] = 'IID'
+ # this is what Plink wants as at 2009
+
+ def sniff(self,filename):
+ """
+ """
+ infile = open(dataset.file_name, "r")
+ header= infile.next() # header
+ if header[0] == 'FID' and header[1] == 'IID':
+ return True
+ else:
+ return False
+
+class rgFeatureList( rgTabList ):
+ """ for featureid lists of exclusions or inclusions in the clean tool
+ output from QC eg low maf, high missingness, bad hwe in controls, excess mendel errors,...
+ featureid subsets on statistical criteria -> specialized display such as gg
+ same infrastructure for expression?
+ """
+ file_ext = "rgFList"
+
+ def __init__(self, **kwd):
+ """Initialize featurelist datatype"""
+ rgTabList.__init__( self, **kwd )
+ for i,s in enumerate(['#FeatureId', 'Chr', 'Genpos', 'Mappos']):
+ self.column_names[i] = s
+
+
+class Rgenetics(Html):
+ """class to use for rgenetics"""
+
+ MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="rgenetics",
+ readonly=True, set_in_upload=True)
+
+ composite_type = 'auto_primary_file'
+ allow_datatype_change = False
+ file_ext = 'rgenetics'
+
+
+ def missing_meta( self, dataset=None, **kwargs):
+ """Checks for empty meta values"""
+ for key, value in dataset.metadata.items():
+ if not value:
+ return True
+ return False
+
+ def generate_primary_file( self, dataset = None ):
+ rval = ['<html><head><title>Rgenetics Galaxy Composite Dataset </title></head><p/>']
+ rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
+ for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+ opt_text = ''
+ if composite_file.optional:
+ opt_text = ' (optional)'
+ rval.append( '<li><a href="%s" type="application/binary">%s</a>%s' % ( composite_name, composite_name, opt_text ) )
+ rval.append( '</ul></div></html>' )
+ return "\n".join( rval )
+
+class SNPMatrix(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="snpmatrix"
+
+ def set_peek( self, dataset ):
+ if not dataset.dataset.purged:
+ dataset.peek = "Binary RGenetics file"
+ dataset.blurb = data.nice_size( dataset.get_size() )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+ def sniff(self,filename):
+ """ need to check the file header hex code
+ """
+ infile = open(dataset.file_name, "b")
+ head = infile.read(16)
+ head = [hex(x) for x in head]
+ if head <> '':
+ return False
+ else:
+ return True
+
+
+class Lped(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="lped"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True )
+ self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True )
+
+
+class Pphe(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="pphe"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.pphe', description = 'Plink Phenotype File', substitute_name_with_metadata = 'base_name' )
+
+
+class Lmap(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="lmap"
+
+class Fphe(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="fphe"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.fphe', description = 'FBAT Phenotype File', substitute_name_with_metadata = 'base_name' )
+
+class Phe(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="phe"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.phe', description = 'Phenotype File', substitute_name_with_metadata = 'base_name' )
+
+
+
+class Fped(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="fped"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.fped', description = 'FBAT format pedfile', substitute_name_with_metadata = 'base_name' )
+
+
+class Pbed(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="pbed"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.bim', substitute_name_with_metadata = 'base_name', is_binary = True )
+ self.add_composite_file( '%s.bed', substitute_name_with_metadata = 'base_name', is_binary = True )
+ self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True )
+
+class Eigenstratgeno(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="eigenstratgeno"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True )
+ self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True )
+ self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True )
+
+
+
+class Eigenstratpca(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="eigenstratpca"
+
+ def __init__( self, **kwd ):
+ Rgenetics.__init__(self, **kwd)
+ self.add_composite_file( '%s.eigenstratpca', description = 'Eigenstrat PCA file', substitute_name_with_metadata = 'base_name' )
+
+
+class Snptest(Rgenetics):
+ """fake class to distinguish different species of Rgenetics data collections
+ """
+ file_ext="snptest"
+
+
+class Pheno(Tabular):
+ """
+ base class for pheno files
+ """
+ file_ext = 'pheno'
+
+
+class RexpBase( Html ):
+ """base class for BioC data structures in Galaxy
+ must be constructed with the pheno data in place since that
+ goes into the metadata for each instance
+ """
+ MetadataElement( name="columns", default=0, desc="Number of columns", visible=True )
+ MetadataElement( name="column_names", default=[], desc="Column names", visible=True )
+ MetadataElement(name="pheCols",default=[],desc="Select list for potentially interesting variables",visible=True)
+ MetadataElement( name="base_name",
+ desc="base name for all transformed versions of this expression dataset", default='rexpression', set_in_upload=True)
+ MetadataElement( name="pheno_path", desc="Path to phenotype data for this experiment", default="rexpression.pheno", visible=True)
+ file_ext = 'rexpbase'
+ html_table = None
+ is_binary = True
+ composite_type = 'auto_primary_file'
+ allow_datatype_change = False
+
+
+ def __init__( self, **kwd ):
+ Html.__init__(self,**kwd)
+ self.add_composite_file( '%s.pheno', description = 'Phenodata tab text file',
+ substitute_name_with_metadata = 'base_name', is_binary=True)
+
+ def generate_primary_file( self, dataset = None ):
+ """ This is called only at upload to write the html file
+ cannot rename the datasets here - they come with the default unfortunately
+ """
+ return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>'
+
+ def get_phecols(self, phenolist=[], maxConc=20):
+ """
+ sept 2009: cannot use whitespace to split - make a more complex structure here
+ and adjust the methods that rely on this structure
+ return interesting phenotype column names for an rexpression eset or affybatch
+ to use in array subsetting and so on. Returns a data structure for a
+ dynamic Galaxy select parameter.
+ A column with only 1 value doesn't change, so is not interesting for
+ analysis. A column with a different value in every row is equivalent to a unique
+ identifier so is also not interesting for anova or limma analysis - both these
+ are removed after the concordance (count of unique terms) is constructed for each
+ column. Then a complication - each remaining pair of columns is tested for
+ redundancy - if two columns are always paired, then only one is needed :)
+ """
+ for nrows,row in enumerate(phenolist): # construct concordance
+ if len(row.strip()) == 0:
+ break
+ row = row.strip().split('\t')
+ if nrows == 0: # set up from header
+ head = row
+ totcols = len(row)
+ concordance = [{} for x in head] # list of dicts
+ else:
+ for col,code in enumerate(row): # keep column order correct
+ if col >= totcols:
+ gal_Log.warning('### get_phecols error in pheno file - row %d col %d (%s) longer than header %s' % (nrows, col, row, head))
+ else:
+ concordance[col].setdefault(code,0) # first one is zero
+ concordance[col][code] += 1
+ useCols = []
+ useConc = [] # columns of interest to keep
+ nrows = len(phenolist)
+ nrows -= 1 # drop head from count
+ for c,conc in enumerate(concordance): # c is column number
+ if (len(conc) > 1) and (len(conc) < min(nrows,maxConc)): # not all same and not all different!!
+ useConc.append(conc) # keep concordance
+ useCols.append(c) # keep column
+ nuse = len(useCols)
+ # now to check for pairs of concordant columns - drop one of these.
+ delme = []
+ p = phenolist[1:] # drop header
+ plist = [x.strip().split('\t') for x in p] # list of lists
+ phe = [[x[i] for i in useCols] for x in plist if len(x) >= totcols] # strip unused data
+ for i in range(0,(nuse-1)): # for each interesting column
+ for j in range(i+1,nuse):
+ kdict = {}
+ for row in phe: # row is a list of lists
+ k = '%s%s' % (row[i],row[j]) # composite key
+ kdict[k] = k
+ if (len(kdict.keys()) == len(concordance[useCols[j]])): # i and j are always matched
+ delme.append(j)
+ delme = list(set(delme)) # remove dupes
+ listCol = []
+ delme.sort()
+ delme.reverse() # must delete from far end!
+ for i in delme:
+ del useConc[i] # get rid of concordance
+ del useCols[i] # and usecols entry
+ for i,conc in enumerate(useConc): # these are all unique columns for the design matrix
+ ccounts = [(conc.get(code,0),code) for code in conc.keys()] # decorate
+ ccounts.sort()
+ cc = [(x[1],x[0]) for x in ccounts] # list of code count tuples
+ codeDetails = (head[useCols[i]],cc) # ('foo',[('a',3),('b',11),..])
+ listCol.append(codeDetails)
+ if len(listCol) > 0:
+ res = listCol
+ # metadata.pheCols becomes [('bar;22,zot;113','foo'), ...]
+ else:
+ res = [('no usable phenotype columns found',[('?',0),]),]
+ return res
+
+
+
+ def get_pheno(self,dataset):
+ """expects a .pheno file in the extra_files_dir - ugh
+ note that R is wierd and adds the row.name in
+ the header so the columns are all wrong - unless you tell it not to.
+ A file can be written as
+ write.table(file='foo.pheno',pData(foo),sep='\t',quote=F,row.names=F)
+ """
+ p = file(dataset.metadata.pheno_path,'r').readlines()
+ if len(p) > 0: # should only need to fix an R pheno file once
+ head = p[0].strip().split('\t')
+ line1 = p[1].strip().split('\t')
+ if len(head) < len(line1):
+ head.insert(0,'ChipFileName') # fix R write.table b0rken-ness
+ p[0] = '\t'.join(head)
+ else:
+ p = []
+ return '\n'.join(p)
+
+ def set_peek( self, dataset ):
+ """expects a .pheno file in the extra_files_dir - ugh
+ note that R is wierd and does not include the row.name in
+ the header. why?"""
+ if not dataset.dataset.purged:
+ pp = os.path.join(dataset.extra_files_path,'%s.pheno' % dataset.metadata.base_name)
+ try:
+ p = file(pp,'r').readlines()
+ except:
+ p = ['##failed to find %s' % pp,]
+ gal_Log.debug('@@@rexpression set_peek, dataset.name=%s,\npp=%s,\np=%s' % (dataset.name,pp,p[:3]))
+ dataset.peek = ''.join(p[:5])
+ dataset.blurb = 'Galaxy Rexpression composite file'
+ else:
+ dataset.peek = 'file does not exist\n'
+ dataset.blurb = 'file purged from disk'
+
+ def get_peek( self, dataset ):
+ """expects a .pheno file in the extra_files_dir - ugh
+ """
+ pp = os.path.join(dataset.extra_files_path,'%s.pheno' % dataset.metadata.base_name)
+ try:
+ p = file(pp,'r').readlines()
+ except:
+ p = ['##failed to find %s' % pp]
+ gal_Log.debug('@@@rexpression get_peek, dataset.file_name=%s,\npp=%s,\np=%s' % (dataset.file_name,pp,p[:3]))
+ return ''.join(p[:5])
+
+ def get_file_peek(self,filename):
+ """
+ can't really peek at a filename - need the extra_files_path and such?
+ """
+ h = '## rexpression get_file_peek: no file found'
+ try:
+ h = file(filename,'r').readlines()
+ except:
+ pass
+ return ''.join(h[:5])
+
+ def regenerate_primary_file(self,dataset):
+ """cannot do this until we are setting metadata
+ """
+ bn = dataset.metadata.base_name
+ flist = os.listdir(dataset.extra_files_path)
+ rval = ['<html><head><title>Files for Composite Dataset %s</title></head><p/>Comprises the following files:<p/><ul>' % (bn)]
+ for i,fname in enumerate(flist):
+ sfname = os.path.split(fname)[-1]
+ rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
+ rval.append( '</ul></html>' )
+ gal_Log.debug('rexpression regenerate primary file, writing %s' % rval)
+ f = file(dataset.file_name,'w')
+ f.write("\n".join( rval ))
+ f.write('\n')
+ f.close()
+
+ """Add metadata elements"""
+ def init_meta( self, dataset, copy_from=None ):
+ if copy_from:
+ dataset.metadata = copy_from.metadata
+
+ def set_meta( self, dataset, **kwd ):
+
+ """
+ NOTE we apply the tabular machinary to the phenodata extracted
+ from a BioC eSet or affybatch.
+
+ """
+ try:
+ flist = os.listdir(dataset.extra_files_path)
+ except:
+ gal_Log.debug('@@@rexpression set_meta failed - no dataset?')
+ return
+ bn = None
+ for f in flist:
+ n = os.path.splitext(f)[0]
+ if not bn:
+ bn = n
+ dataset.metadata.base_name = bn
+ if not bn:
+ bn = '?'
+ pn = '%s.pheno' % (bn)
+ pp = os.path.join(dataset.extra_files_path,pn)
+ dataset.metadata.pheno_path=pp
+ try:
+ pf = file(pp,'r').readlines() # read the basename.phenodata in the extra_files_path
+ except:
+ pf = None
+ if pf:
+ h = pf[0].strip()
+ h = h.split('\t') # hope is header
+ h = [escape(x) for x in h]
+ dataset.metadata.column_names = h
+ dataset.metadata.columns = len(h)
+ dataset.peek = ''.join(pf[:5])
+ else:
+ dataset.metadata.column_names = []
+ dataset.metadata.columns = 0
+ dataset.peek = 'No pheno file found'
+ if len(pf) > 1:
+ dataset.metadata.pheCols = self.get_phecols(phenolist=pf)
+ else:
+ dataset.metadata.pheCols = [('','No useable phenotypes found',False),]
+ #self.regenerate_primary_file(dataset)
+ if not dataset.info:
+ dataset.info = 'Galaxy Expression datatype object'
+ if not dataset.blurb:
+ dataset.blurb = 'R loadable BioC expression object for the Rexpression Galaxy toolkit'
+ gal_Log.debug('@@@rexpression set_meta on dsn=%s, pf=%s, peek=%s' % (dataset.file_name,''.join(pf[:5]),dataset.peek))
+ return True
+
+ def make_html_table( self, pp='nothing supplied from peek\n'):
+ """Create HTML table, used for displaying peek"""
+ out = ['<table cellspacing="0" cellpadding="3">',]
+ p = pp.split('\n')
+ try:
+ # Generate column header
+ for i,row in enumerate(p):
+ lrow = row.strip().split('\t')
+ if i == 0:
+ orow = ['<th>%s</th>' % escape(x) for x in lrow]
+ orow.insert(0,'<tr>')
+ orow.append('</tr>')
+ else:
+ orow = ['<td>%s</td>' % escape(x) for x in lrow]
+ orow.insert(0,'<tr>')
+ orow.append('</tr>')
+ out.append(''.join(orow))
+ out.append( '</table>' )
+ out = "\n".join( out )
+ except Exception, exc:
+ out = "Can't create html table %s" % str( exc )
+ return out
+
+ def display_peek( self, dataset ):
+ """Returns formatted html of peek"""
+ out=self.make_html_table(dataset.peek)
+ return out
+
+ def get_mime(self):
+ """Returns the mime type of the datatype"""
+ return 'text/html'
+
+
+class Affybatch( RexpBase ):
+ """derived class for BioC data structures in Galaxy """
+
+ file_ext = "affybatch"
+
+ def __init__( self, **kwd ):
+ RexpBase.__init__(self, **kwd)
+ self.add_composite_file( '%s.affybatch', description = 'AffyBatch R object saved to file',
+ substitute_name_with_metadata = 'base_name', is_binary=True )
+
+class Eset( RexpBase ):
+ """derived class for BioC data structures in Galaxy """
+ file_ext = "eset"
+
+ def __init__( self, **kwd ):
+ RexpBase.__init__(self, **kwd)
+ self.add_composite_file( '%s.eset', description = 'ESet R object saved to file',
+ substitute_name_with_metadata = 'base_name', is_binary = True )
+
+
+class MAlist( RexpBase ):
+ """derived class for BioC data structures in Galaxy """
+ file_ext = "malist"
+
+ def __init__( self, **kwd ):
+ RexpBase.__init__(self, **kwd)
+ self.add_composite_file( '%s.malist', description = 'MAlist R object saved to file',
+ substitute_name_with_metadata = 'base_name', is_binary = True )
+
+
+if __name__ == '__main__':
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__])
+
+
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/1a0bb7d6897c
changeset: 2870:1a0bb7d6897c
user: Ross Lazarus <ross.lazarus(a)gmail.com>
date: Sun Oct 11 15:07:11 2009 -0400
description:
symlinks are not very useful
6 file(s) affected in this change:
lib/galaxy/datatypes/converters/lped_to_fped_converter.py
lib/galaxy/datatypes/converters/lped_to_fped_converter.xml
lib/galaxy/datatypes/converters/lped_to_pbed_converter.py
lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml
lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml
diffs (383 lines):
diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_fped_converter.py
--- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15:00:40 2009 -0400
+++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15:07:11 2009 -0400
@@ -1,1 +1,110 @@
-/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.py
\ No newline at end of file
+# for rgenetics - lped to fbat
+# recode to numeric fbat version
+# much slower so best to always
+# use numeric alleles internally
+
+import sys,os,time
+
+
+prog = os.path.split(sys.argv[0])[-1]
+myversion = 'Oct 10 2009'
+
+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
+<title></title>
+<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+</head>
+<body>
+<div class="document">
+"""
+
+def timenow():
+ """return current time as a string
+ """
+ return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+
+def rgConv(inpedfilepath,outhtmlname,outfilepath):
+ """convert linkage ped/map to fbat"""
+ recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'}
+ basename = os.path.split(inpedfilepath)[-1] # get basename
+ inmap = '%s.map' % inpedfilepath
+ inped = '%s.ped' % inpedfilepath
+ outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data
+ outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to
+ try:
+ mf = file(inmap,'r')
+ except:
+ sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap))
+ sys.exit(1)
+ try:
+ rsl = [x.split()[1] for x in mf]
+ except:
+ sys.stderr.write('## cannot parse %s' % inmap)
+ sys.exit(1)
+ try:
+ os.makedirs(outfilepath)
+ except:
+ pass # already exists
+ head = ' '.join(rsl) # list of rs numbers
+ # TODO add anno to rs but fbat will prolly barf?
+ pedf = file(inped,'r')
+ o = file(outfpath,'w',2**20)
+ o.write(head)
+ o.write('\n')
+ for i,row in enumerate(pedf):
+ if i == 0:
+ lrow = row.split()
+ try:
+ x = [int(x) for x in lrow[10:50]] # look for non numeric codes
+ except:
+ dorecode = 1
+ if dorecode:
+ lrow = row.strip().split()
+ p = lrow[:6]
+ g = lrow[6:]
+ gc = [recode.get(x,'0') for x in g]
+ lrow = p+gc
+ row = '%s\n' % ' '.join(lrow)
+ o.write(row)
+ o.close()
+
+
+def main():
+ """call fbater
+ need to work with rgenetics composite datatypes
+ so in and out are html files with data in extrafiles path
+ <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name'
+ '$output1' '$output1.extra_files_path'
+ </command>
+ """
+ nparm = 3
+ if len(sys.argv) < nparm:
+ sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
+ sys.exit(1)
+ inpedfilepath = sys.argv[1]
+ outhtmlname = sys.argv[2]
+ outfilepath = sys.argv[3]
+ try:
+ os.makedirs(outfilepath)
+ except:
+ pass
+ rgConv(inpedfilepath,outhtmlname,outfilepath)
+ f = file(outhtmlname,'w')
+ f.write(galhtmlprefix % prog)
+ flist = os.listdir(outfilepath)
+ print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
+ f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow()))
+ for i, data in enumerate( flist ):
+ f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
+ f.write("</div></body></html>")
+ f.close()
+
+
+
+if __name__ == "__main__":
+ main()
diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_fped_converter.xml
--- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 15:00:40 2009 -0400
+++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 15:07:11 2009 -0400
@@ -1,1 +1,15 @@
-/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.xml
\ No newline at end of file
+<tool id="lped2fpedconvert" name="Convert lped to fped" version="0.01">
+ <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+ <!-- Used on the metadata edit page. -->
+ <command interpreter="python">
+ lped_to_fped_converter.py '$input1.extra_files_path/$input1.metadata.base_name' '$output1' '$output1.extra_files_path'
+ </command>
+ <inputs>
+ <param format="lped" name="input1" type="data" label="Choose linkage pedigree file"/>
+ </inputs>
+ <outputs>
+ <data format="fped" name="output1" metadata_source="input1"/>
+ </outputs>
+ <help>
+ </help>
+</tool>
diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_pbed_converter.py
--- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15:00:40 2009 -0400
+++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15:07:11 2009 -0400
@@ -1,1 +1,110 @@
-/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.py
\ No newline at end of file
+# for rgenetics - lped to pbed
+# where to stop with converters
+# pbed might be central
+# eg lped/eigen/fbat/snpmatrix all to pbed
+# and pbed to lped/eigen/fbat/snpmatrix ?
+# that's a lot of converters
+import sys,os,time,subprocess
+
+
+prog = os.path.split(sys.argv[0])[-1]
+myversion = 'Oct 10 2009'
+
+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
+<title></title>
+<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+</head>
+<body>
+<div class="document">
+"""
+
+def timenow():
+ """return current time as a string
+ """
+ return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+def getMissval(inped=''):
+ """
+ read some lines...ugly hack - try to guess missing value
+ should be N or 0 but might be . or -
+ """
+ commonmissvals = {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'}
+ try:
+ f = file(inped,'r')
+ except:
+ return None # signal no in file
+ missval = None
+ while missval == None: # doggedly continue until we solve the mystery
+ try:
+ l = f.readline()
+ except:
+ break
+ ll = l.split()[6:] # ignore pedigree stuff
+ for c in ll:
+ if commonmissvals.get(c,None):
+ missval = c
+ f.close()
+ return missval
+ if not missval:
+ missval = 'N' # punt
+ close(f)
+ return missval
+
+def rgConv(inpedfilepath,outhtmlname,outfilepath,plink):
+ """
+ """
+ pedf = '%s.ped' % inpedfilepath
+ basename = os.path.split(inpedfilepath)[-1] # get basename
+ outroot = os.path.join(outfilepath,basename)
+ missval = getMissval(inped = pedf)
+ if not missval:
+ print '### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf
+ missval = '0'
+ cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink,inpedfilepath,outroot,missval)
+ p = subprocess.Popen(cl,shell=True,cwd=outfilepath)
+ retval = p.wait() # run plink
+
+
+
+
+def main():
+ """
+ need to work with rgenetics composite datatypes
+ so in and out are html files with data in extrafiles path
+ <command interpreter="python">lped_to_pbed_converter.py '$input1/$input1.metadata.base_name'
+ '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
+ </command>
+ """
+ nparm = 4
+ if len(sys.argv) < nparm:
+ sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
+ sys.exit(1)
+ inpedfilepath = sys.argv[1]
+ outhtmlname = sys.argv[2]
+ outfilepath = sys.argv[3]
+ try:
+ os.makedirs(outfilepath)
+ except:
+ pass
+ plink = sys.argv[4]
+ rgConv(inpedfilepath,outhtmlname,outfilepath,plink)
+ f = file(outhtmlname,'w')
+ f.write(galhtmlprefix % prog)
+ flist = os.listdir(outfilepath)
+ s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
+ print s
+ f.write('<div>%s\n<ol>' % (s))
+ for i, data in enumerate( flist ):
+ f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
+ f.write("</div></body></html>")
+ f.close()
+
+
+
+if __name__ == "__main__":
+ main()
diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml
--- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 15:00:40 2009 -0400
+++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 15:07:11 2009 -0400
@@ -1,1 +1,16 @@
-/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.xml
\ No newline at end of file
+<tool id="lped2pbedconvert" name="Convert lped to plink pbed" version="0.01">
+ <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+ <!-- Used on the metadata edit page. -->
+ <command interpreter="python">
+ lped_to_pbed_converter.py '$input1.extra_files_path/$input1.metadata.base_name'
+ '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
+ </command>
+ <inputs>
+ <param format="lped" name="input1" type="data" label="Choose linkage pedigree file"/>
+ </inputs>
+ <outputs>
+ <data format="pbed" name="output1" metadata_source="input1"/>
+ </outputs>
+ <help>
+ </help>
+</tool>
diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
--- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15:00:40 2009 -0400
+++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15:07:11 2009 -0400
@@ -1,1 +1,80 @@
-/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.py
\ No newline at end of file
+# for rgenetics - lped to pbed
+# where to stop with converters
+# pbed might be central
+# eg lped/eigen/fbat/snpmatrix all to pbed
+# and pbed to lped/eigen/fbat/snpmatrix ?
+# that's a lot of converters
+import sys,os,time,subprocess
+
+
+prog = os.path.split(sys.argv[0])[-1]
+myversion = 'Oct 10 2009'
+
+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
+<title></title>
+<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+</head>
+<body>
+<div class="document">
+"""
+
+def timenow():
+ """return current time as a string
+ """
+ return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+
+def rgConv(inpedfilepath,outhtmlname,outfilepath,plink):
+ """
+ """
+
+ basename = os.path.split(inpedfilepath)[-1] # get basename
+ outroot = os.path.join(outfilepath,basename)
+ cl = '%s --noweb --bfile %s --recode --out %s ' % (plink,inpedfilepath,outroot)
+ p = subprocess.Popen(cl,shell=True,cwd=outfilepath)
+ retval = p.wait() # run plink
+
+
+
+
+def main():
+ """
+ need to work with rgenetics composite datatypes
+ so in and out are html files with data in extrafiles path
+ <command interpreter="python">pbed_to_lped_converter.py '$input1/$input1.metadata.base_name'
+ '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
+ </command>
+ """
+ nparm = 4
+ if len(sys.argv) < nparm:
+ sys.stderr.write('## %s called with %s - needs %d parameters \n' % (myname,sys.argv,nparm))
+ sys.exit(1)
+ inpedfilepath = sys.argv[1]
+ outhtmlname = sys.argv[2]
+ outfilepath = sys.argv[3]
+ try:
+ os.makedirs(outfilepath)
+ except:
+ pass
+ plink = sys.argv[4]
+ rgConv(inpedfilepath,outhtmlname,outfilepath,plink)
+ f = file(outhtmlname,'w')
+ f.write(galhtmlprefix % prog)
+ flist = os.listdir(outfilepath)
+ s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
+ print s
+ f.write('<div>%s\n<ol>' % (s))
+ for i, data in enumerate( flist ):
+ f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
+ f.write("</div></body></html>")
+ f.close()
+
+
+
+if __name__ == "__main__":
+ main()
diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml
--- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 15:00:40 2009 -0400
+++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 15:07:11 2009 -0400
@@ -1,1 +1,16 @@
-/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.xml
\ No newline at end of file
+<tool id="pbed2lpedconvert" name="Convert plink pbed to linkage lped" version="0.01">
+ <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+ <!-- Used on the metadata edit page. -->
+ <command interpreter="python">
+ pbed_to_lped_converter.py '$input1.extra_files_path/$input1.metadata.base_name'
+ '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
+ </command>
+ <inputs>
+ <param format="pbed" name="input1" type="data" label="Choose compressed Plink binary format genotype file"/>
+ </inputs>
+ <outputs>
+ <data format="lped" name="output1" metadata_source="input1"/>
+ </outputs>
+ <help>
+ </help>
+</tool>
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/639c6b5e0073
changeset: 2869:639c6b5e0073
user: Ross Lazarus <ross.lazarus(a)gmail.com>
date: Sun Oct 11 15:00:40 2009 -0400
description:
adding rgenetics converters
6 file(s) affected in this change:
lib/galaxy/datatypes/converters/lped_to_fped_converter.py
lib/galaxy/datatypes/converters/lped_to_fped_converter.xml
lib/galaxy/datatypes/converters/lped_to_pbed_converter.py
lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml
lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml
diffs (36 lines):
diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_fped_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15:00:40 2009 -0400
@@ -0,0 +1,1 @@
+/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.py
\ No newline at end of file
diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_fped_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 15:00:40 2009 -0400
@@ -0,0 +1,1 @@
+/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.xml
\ No newline at end of file
diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_pbed_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15:00:40 2009 -0400
@@ -0,0 +1,1 @@
+/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.py
\ No newline at end of file
diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 15:00:40 2009 -0400
@@ -0,0 +1,1 @@
+/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.xml
\ No newline at end of file
diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15:00:40 2009 -0400
@@ -0,0 +1,1 @@
+/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.py
\ No newline at end of file
diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 15:00:40 2009 -0400
@@ -0,0 +1,1 @@
+/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.xml
\ No newline at end of file
1
0
13 Oct '09
details: http://www.bx.psu.edu/hg/galaxy/rev/109e94372abb
changeset: 2868:109e94372abb
user: Ross Lazarus <ross.lazarus(a)gmail.com>
date: Sun Oct 11 09:45:38 2009 -0400
description:
added 2 valid chars ;| and patch for empty url_paste in composite upload
2 file(s) affected in this change:
lib/galaxy/tools/parameters/grouping.py
lib/galaxy/util/__init__.py
diffs (24 lines):
diff -r 5391edcf618d -r 109e94372abb lib/galaxy/tools/parameters/grouping.py
--- a/lib/galaxy/tools/parameters/grouping.py Sun Oct 11 09:37:43 2009 -0400
+++ b/lib/galaxy/tools/parameters/grouping.py Sun Oct 11 09:45:38 2009 -0400
@@ -226,7 +226,7 @@
if context.get( 'space_to_tab', None ) not in ["None", None]:
space_to_tab = True
file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
- if file_bunch.path:
+ if file_bunch.path and url_paste:
if url_paste.strip():
warnings.append( "All file contents specified in the paste box were ignored." )
else: #we need to use url_paste
diff -r 5391edcf618d -r 109e94372abb lib/galaxy/util/__init__.py
--- a/lib/galaxy/util/__init__.py Sun Oct 11 09:37:43 2009 -0400
+++ b/lib/galaxy/util/__init__.py Sun Oct 11 09:45:38 2009 -0400
@@ -109,7 +109,7 @@
return text
# characters that are valid
-valid_chars = set(string.letters + string.digits + " -=_.()/+*^,:?!")
+valid_chars = set(string.letters + string.digits + " -=_.()/+*^,:?!;|")
# characters that are allowed but need to be escaped
mapped_chars = { '>' :'__gt__',
1
0
13 Oct '09
details: http://www.bx.psu.edu/hg/galaxy/rev/5391edcf618d
changeset: 2867:5391edcf618d
user: Ross Lazarus <ross.lazarus(a)gmail.com>
date: Sun Oct 11 09:37:43 2009 -0400
description:
rexpression datatypes and fixes for rgenetics datatypes
1 file(s) affected in this change:
lib/galaxy/datatypes/genetics.py
diffs (376 lines):
diff -r 080b3c44963e -r 5391edcf618d lib/galaxy/datatypes/genetics.py
--- a/lib/galaxy/datatypes/genetics.py Sat Oct 10 16:18:08 2009 -0400
+++ b/lib/galaxy/datatypes/genetics.py Sun Oct 11 09:37:43 2009 -0400
@@ -1,370 +1,1 @@
-"""
-rgenetics datatypes
-Use at your peril
-Ross Lazarus
-for the rgenetics and galaxy projects
-
-genome graphs datatypes derived from Interval datatypes
-genome graphs datasets have a header row with appropriate columnames
-The first column is always the marker - eg columname = rs, first row= rs12345 if the rows are snps
-subsequent row values are all numeric ! Will fail if any non numeric (eg '+' or 'NA') values
-ross lazarus for rgenetics
-august 20 2007
-"""
-
-import logging, os, sys, time, tempfile, shutil
-import data
-from galaxy import util
-from cgi import escape
-import urllib
-from galaxy.web import url_for
-from galaxy.datatypes import metadata
-from galaxy.datatypes.metadata import MetadataElement
-#from galaxy.datatypes.data import Text
-from galaxy.datatypes.tabular import Tabular
-from galaxy.datatypes.images import Html
-
-log = logging.getLogger(__name__)
-
-
-
-class GenomeGraphs( Tabular ):
- """Tab delimited data containing a marker id and any number of numeric values"""
-
- """Add metadata elements"""
- MetadataElement( name="markerCol", default=1, desc="Marker ID column", param=metadata.ColumnParameter )
- MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True )
- MetadataElement( name="column_types", default=[], desc="Column types", readonly=True, visible=False )
- file_ext = 'gg'
-
- def __init__(self, **kwd):
- """Initialize gg datatype, by adding UCSC display apps"""
- Tabular.__init__(self, **kwd)
- self.add_display_app ( 'ucsc', 'Genome Graph', 'as_ucsc_display_file', 'ucsc_links' )
-
- def set_peek( self, dataset ):
- """Set the peek and blurb text"""
- if not dataset.dataset.purged:
- dataset.peek = data.get_file_peek( dataset.file_name )
- dataset.blurb = util.commaify( str( data.get_line_count( dataset.file_name ) ) ) + " rows"
- #i don't think set_meta should not be called here, it should be called separately
- self.set_meta( dataset )
- else:
- dataset.peek = 'file does not exist'
- dataset.blurb = 'file purged from disk'
-
- def get_estimated_display_viewport( self, dataset ):
- """Return a chrom, start, stop tuple for viewing a file."""
- raise notImplemented
-
- def ucsc_links( self, dataset, type, app, base_url ):
- """ from the ever-helpful angie hinrichs angie(a)soe.ucsc.edu
- a genome graphs call looks like this
- http://genome.ucsc.edu/cgi-bin/hgGenome?clade=mammal&org=Human&db=hg18&hgGe…
- &hgGenome_dataSetDescription=test&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess
- &hgGenome_columnLabels=best%20guess&hgGenome_maxVal=&hgGenome_labelVals=
- &hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=http://galaxy.esphealth.org/datasets/333/display/index
- &hgGenome_doSubmitUpload=submit
- Galaxy gives this for an interval file
- http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg18&position=chr1:1-1000&hgt.cu…
- http%3A%2F%2Fgalaxy.esphealth.org%2Fdisplay_as%3Fid%3D339%26display_app%3Ducsc
- """
- ret_val = []
- ggtail = '&hgGenome_doSubmitUpload=submit'
- if not dataset.dbkey:
- dataset.dbkey = 'hg18' # punt!
- if dataset.has_data:
- for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey):
- if site_name in app.config.ucsc_display_sites:
- site_url = site_url.replace('/hgTracks?','/hgGenome?') # for genome graphs
- display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s" % (base_url, url_for( controller='root' ), dataset.id, type))
- sl = ["%sdb=%s" % (site_url,dataset.dbkey ),]
- sl.append("&hgGenome_dataSetName=%s&hgGenome_dataSetDescription=%s" % (dataset.name, 'GalaxyGG_data'))
- sl.append("&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess")
- sl.append("&hgGenome_columnLabels=first%20row&hgGenome_maxVal=&hgGenome_labelVals=")
- sl.append("&hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=%%s")
- sl.append(ggtail)
- s = urllib.quote_plus( ''.join(sl) )
- link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, s, display_url )
- ret_val.append( (site_name, link) )
- return ret_val
-
- def validate( self, dataset ):
- """Validate a gg file - all numeric after header row"""
- errors = list()
- infile = open(dataset.file_name, "r")
- header= infile.next() # header
- for i,row in enumerate(infile):
- ll = row.strip().split('\t')
- badvals = []
- for j,x in enumerate(ll):
- try:
- x = float(x)
- except:
- badval.append('col%d:%s' % (j+1,x))
- if len(badvals) > 0:
- errors.append('row %d, %s' % (' '.join(badvals)))
- return errors
-
- def repair_methods( self, dataset ):
- """Return options for removing errors along with a description"""
- return [("lines","Remove erroneous lines")]
-
-
-class Rgenetics(Html):
- """class to use for rgenetics"""
- """Add metadata elements"""
- MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="galaxy", readonly=True, set_in_upload=True)
-
- file_ext="html"
- composite_type = 'auto_primary_file'
- allow_datatype_change = False
-
- def missing_meta( self, dataset ):
- """Checks for empty meta values"""
- for key, value in dataset.metadata.items():
- if not value:
- return True
- return False
-
- def generate_primary_file( self, dataset = None ):
- rval = ['<html><head><title>Files for Composite Dataset (%s)</title></head><p/>This composite dataset is composed of the following files:<p/><ul>' % ( self.file_ext ) ]
- for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
- opt_text = ''
- if composite_file.optional:
- opt_text = ' (optional)'
- rval.append( '<li><a href="%s">%s</a>%s' % ( composite_name, composite_name, opt_text ) )
- rval.append( '</ul></html>' )
- return "\n".join( rval )
-
-class SNPMatrix(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="snpmatrix"
-
- def set_peek( self, dataset ):
- if not dataset.dataset.purged:
- dataset.peek = "Binary RGenetics file"
- dataset.blurb = data.nice_size( dataset.get_size() )
- else:
- dataset.peek = 'file does not exist'
- dataset.blurb = 'file purged from disk'
-
-
-class Lped(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="lped"
-
- def __init__( self, **kwd ):
- Rgenetics.__init__( self, **kwd )
- self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True )
-
-
-class Pphe(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="pphe"
-
-class Lmap(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="lmap"
-
-class Fphe(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="fphe"
-
-class Phe(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="phe"
-
-
-class Fped(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="fped"
-
-
-class Pbed(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="pbed"
-
- def __init__( self, **kwd ):
- Rgenetics.__init__( self, **kwd )
- self.add_composite_file( '%s.bim', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s.bed', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True )
-
-
-class Eigenstratgeno(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="eigenstratgeno"
-
- def __init__( self, **kwd ):
- Rgenetics.__init__( self, **kwd )
- self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True )
- self.add_composite_file( '%s_fo.eigenstratgeno', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True )
- self.add_composite_file( '%s_fo.ind', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True )
- self.add_composite_file( '%s_fo.map', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True )
- self.add_composite_file( '%s_oo.eigenstratgeno', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True )
- self.add_composite_file( '%s_oo.ind', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True )
- self.add_composite_file( '%s_oo.map', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True )
-
-
-
-class Eigenstratpca(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="eigenstratpca"
-
-class Snptest(Rgenetics):
- """fake class to distinguish different species of Rgenetics data collections
- """
- file_ext="snptest"
-
-class RexpBase( Html ):
- """base class for BioC data structures in Galaxy
- must be constructed with the pheno data in place since that
- goes into the metadata for each instance"""
-
- """Add metadata elements"""
- MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=False )
- MetadataElement( name="column_names", default=[], desc="Column names", readonly=True,visible=True )
- MetadataElement( name="base_name",
- desc="base name for all transformed versions of this genetic dataset", readonly=True, default='galaxy', set_in_upload=True)
- ### Do we really need these below? can we rely on dataset.extra_files_path: os.path.join( dataset.extra_files_path, '%s.phenodata' % dataset.metadata.base_name ) ?
- ### Do these have a different purpose? Ross will need to clarify
- ### Uploading these datatypes will not work until this is sorted out (set_peek fails)...
- MetadataElement( name="pheno_path",
- desc="Path to phenotype data for this experiment", readonly=True)
- MetadataElement( name="pheno",
- desc="Phenotype data for this experiment", readonly=True)
-
- file_ext = None
-
- is_binary = True
-
- allow_datatype_change = False
-
- composite_type = 'basic'
-
- def __init__( self, **kwd ):
- Html.__init__( self, **kwd )
- self.add_composite_file( '%s.phenodata', substitute_name_with_metadata = 'base_name', is_binary = True )
-
- def set_peek( self, dataset ):
- """expects a .pheno file in the extra_files_dir - ugh
- note that R is wierd and does not include the row.name in
- the header. why?"""
- p = file(dataset.metadata.pheno_path,'r').readlines() #this fails
- head = p[0].strip().split('\t')
- head.insert(0,'ChipFileName') # fix R write.table b0rken-ness
- p[0] = '\t'.join(head)
- p = '\n'.join(p)
- dataset.peek = p
- dataset.metadata.pheno = p
- dataset.blurb = 'R loadable BioC expression object for the Rexpression Galaxy toolkit'
-
- # stolen from Tabular
- # class Tabular( data.Text ):
- """Tab delimited data"""
-
- """Add metadata elements"""
- def init_meta( self, dataset, copy_from=None ):
- if copy_from:
- dataset.metadata = copy_from.metadata
-
-
- #def set_readonly_meta( self, dataset, skip=0, **kwd ):
- # """Resets the values of readonly metadata elements."""
- # RexpBase.set_meta( self, dataset, skip=skip )
-
- def set_readonly_meta( self, dataset, **kwd ):
- """Resets the values of readonly metadata elements."""
- RexpBase.set_meta( self, dataset )
-
- #def set_meta( self, dataset, skip=0, **kwd ):
- def set_meta( self, dataset, **kwd ):
-
- """
- NOTE we apply the tabular machinary to the phenodata extracted
- from a BioC eSet or affybatch.
-
- """
- if not dataset.peek:
- dataset.set_peek()
- pk = dataset.peek # use the peek which is the pheno data insead of dataset (!)
- ###this is probably not the best source, can we just access the raw data directly?
- if pk:
- p = pk.split('\n')
- h = p[0].strip().split('\t') # hope is header
- h = [escape(x) for x in h]
- dataset.metadata.column_names = h
- dataset.metadata.columns = len(h)
- else:
- dataset.metadata.column_names = []
- dataset.metadata.columns = 0
-
- def make_html_table( self, dataset):
- """Create HTML table, used for displaying peek"""
- out = ['<table cellspacing="0" cellpadding="3">',]
- try:
- # Generate column header
- pk = dataset.peek
- p = pk.split('\n')
- for i,row in enumerate(p):
- lrow = row.strip().split('\t')
- if i == 0:
- orow = ['<th>%s</th>' % escape(x) for x in lrow]
- orow.insert(0,'<tr>')
- orow.append('</tr>')
- else:
- orow = ['<td>%s</td>' % escape(x) for x in lrow]
- orow.insert(0,'<tr>')
- orow.append('</tr>')
- out.append(''.join(orow))
- out.append( '</table>' )
- out = "\n".join( out )
- except Exception, exc:
- out = "Can't create peek %s" % str( exc )
- return out
-
- def display_peek( self, dataset ):
- """Returns formatted html of peek"""
- if not dataset.peek:
- dataset.set_peek()
- return self.make_html_table( dataset )
-
- def get_mime(self):
- """Returns the mime type of the datatype"""
- return 'application/gzip'
-
-
-class AffyBatch( RexpBase ):
- """derived class for BioC data structures in Galaxy """
- file_ext = "affybatch"
-
-
-class ESet( RexpBase ):
- """derived class for BioC data structures in Galaxy """
- file_ext = "eset"
-
-
-class MAList( RexpBase ):
- """derived class for BioC data structures in Galaxy """
- file_ext = "malist"
-
-
-if __name__ == '__main__':
- import doctest, sys
- doctest.testmod(sys.modules[__name__])
-
+/opt/galaxy/tools/rgenetics/genetics.py
\ No newline at end of file
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/316b413c933b
changeset: 2865:316b413c933b
user: James Taylor <james(a)jamestaylor.org>
date: Sat Oct 10 14:03:08 2009 -0400
description:
Backed out changeset 037374950cc9
5 file(s) affected in this change:
lib/galaxy/web/controllers/dataset.py
lib/galaxy/web/controllers/root.py
templates/dataset/large_file.mako
templates/root/history_common.mako
test/base/twilltestcase.py
diffs (187 lines):
diff -r 037374950cc9 -r 316b413c933b lib/galaxy/web/controllers/dataset.py
--- a/lib/galaxy/web/controllers/dataset.py Fri Oct 09 16:59:35 2009 -0400
+++ b/lib/galaxy/web/controllers/dataset.py Sat Oct 10 14:03:08 2009 -0400
@@ -199,9 +199,8 @@
return 'This link may not be followed from within Galaxy.'
@web.expose
- def display(self, trans, encoded_id=None, show_all=False, to_ext=False, **kwd):
+ def display(self, trans, dataset_id=None, filename=None, show_all=False, **kwd):
"""Catches the dataset id and displays file contents as directed"""
- dataset_id = trans.security.decode_id( encoded_id )
data = trans.app.model.HistoryDatasetAssociation.get( dataset_id )
if not data:
raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) )
@@ -209,29 +208,27 @@
if trans.app.security_agent.can_access_dataset( roles, data.dataset ):
if data.state == trans.model.Dataset.states.UPLOAD:
return trans.show_error_message( "Please wait until this dataset finishes uploading before attempting to view it." )
-
- mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() )
- trans.response.set_content_type(mime)
- trans.log_event( "Display dataset id: %s" % str( dataset_id ) )
-
- if to_ext: # Saving the file
- trans.response.headers['Content-Length'] = int( os.stat( data.file_name ).st_size )
- if to_ext[0] != ".":
- to_ext = "." + to_ext
- valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- fname = data.name
- fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150]
- trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, to_ext)
- return open( data.file_name )
+ if filename is None or filename.lower() == "index":
+ file_path = data.file_name
+ mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() )
+ trans.response.set_content_type(mime)
+ trans.log_event( "Display dataset id: %s" % str( dataset_id ) )
+
+ else:
+ file_path = os.path.join( data.extra_files_path, filename )
+ mime, encoding = mimetypes.guess_type( file_path )
+ if mime is None:
+ mime = trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( file_path )[-1] )
+ trans.response.set_content_type( mime )
- if os.path.exists( data.file_name ):
+ if os.path.exists( file_path ):
max_peek_size = 1000000 # 1 MB
- if show_all or os.stat( data.file_name ).st_size < max_peek_size:
- return open( data.file_name )
+ if show_all or os.stat( file_path ).st_size < max_peek_size:
+ return open( file_path )
else:
trans.response.set_content_type( "text/html" )
- return trans.stream_template_mako( "/dataset/large_file.mako",
- truncated_data = open( data.file_name ).read(max_peek_size),
+ return trans.fill_template( "/dataset/large_file.mako",
+ truncated_data = open( file_path ).read(max_peek_size),
data = data )
else:
raise paste.httpexceptions.HTTPNotFound( "File Not Found (%s)." % ( filename ) )
diff -r 037374950cc9 -r 316b413c933b lib/galaxy/web/controllers/root.py
--- a/lib/galaxy/web/controllers/root.py Fri Oct 09 16:59:35 2009 -0400
+++ b/lib/galaxy/web/controllers/root.py Sat Oct 10 14:03:08 2009 -0400
@@ -134,6 +134,72 @@
return rval
## ---- Dataset display / editing ----------------------------------------
+
+ @web.expose
+ def display( self, trans, id=None, hid=None, tofile=None, toext=".txt", **kwd ):
+ """
+ Returns data directly into the browser.
+ Sets the mime-type according to the extension
+ """
+ if hid is not None:
+ try:
+ hid = int( hid )
+ except:
+ return "hid '%s' is invalid" %str( hid )
+ history = trans.get_history()
+ for dataset in history.datasets:
+ if dataset.hid == hid:
+ data = dataset
+ break
+ else:
+ raise Exception( "No dataset with hid '%d'" % hid )
+ else:
+ try:
+ data = self.app.model.HistoryDatasetAssociation.get( id )
+ except:
+ return "Dataset id '%s' is invalid" %str( id )
+ if data:
+ user, roles = trans.get_user_and_roles()
+ if trans.app.security_agent.can_access_dataset( roles, data.dataset ):
+ mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() )
+ trans.response.set_content_type(mime)
+ if tofile:
+ fStat = os.stat(data.file_name)
+ trans.response.headers['Content-Length'] = int(fStat.st_size)
+ if toext[0:1] != ".":
+ toext = "." + toext
+ valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ fname = data.name
+ fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150]
+ trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, toext)
+ trans.log_event( "Display dataset id: %s" % str(id) )
+ try:
+ return open( data.file_name )
+ except:
+ return "This dataset contains no content"
+ else:
+ return "You are not allowed to access this dataset"
+ else:
+ return "No dataset with id '%s'" % str( id )
+
+ @web.expose
+ def display_child(self, trans, parent_id=None, designation=None, tofile=None, toext=".txt"):
+ """
+ Returns child data directly into the browser, based upon parent_id and designation.
+ """
+ try:
+ data = self.app.model.HistoryDatasetAssociation.get( parent_id )
+ if data:
+ child = data.get_child_by_designation( designation )
+ if child:
+ user, roles = trans.get_user_and_roles()
+ if trans.app.security_agent.can_access_dataset( roles, child ):
+ return self.display( trans, id=child.id, tofile=tofile, toext=toext )
+ else:
+ return "You are not privileged to access this dataset."
+ except Exception:
+ pass
+ return "A child named %s could not be found for data %s" % ( designation, parent_id )
@web.expose
def display_as( self, trans, id=None, display_app=None, **kwd ):
diff -r 037374950cc9 -r 316b413c933b templates/dataset/large_file.mako
--- a/templates/dataset/large_file.mako Fri Oct 09 16:59:35 2009 -0400
+++ b/templates/dataset/large_file.mako Sat Oct 10 14:03:08 2009 -0400
@@ -2,8 +2,8 @@
<div class="warningmessagelarge">
This dataset is large and only the first megabyte is shown below.<br />
- <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), show_all=True )}">Show all</a> |
- <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">Save</a>
+ <a href="${h.url_for( controller='dataset', action='display', dataset_id=data.id, show_all=True )}">Show all</a> |
+ <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}">Save</a>
</div>
<pre>
diff -r 037374950cc9 -r 316b413c933b templates/root/history_common.mako
--- a/templates/root/history_common.mako Fri Oct 09 16:59:35 2009 -0400
+++ b/templates/root/history_common.mako Sat Oct 10 14:03:08 2009 -0400
@@ -32,7 +32,7 @@
<img src="${h.url_for('/static/images/pencil_icon_grey.png')}" width='16' height='16' alt='edit attributes' title='edit attributes' class='button edit' border='0'>
%endif
%else:
- <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ))}" target="galaxy_main"></a>
+ <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', dataset_id=data.id, action='display', filename='index')}" target="galaxy_main"></a>
%if user_owns_dataset:
<a class="icon-button edit" title="edit attributes" href="${h.url_for( controller='root', action='edit', id=data.id )}" target="galaxy_main"></a>
%endif
@@ -86,7 +86,7 @@
<div class="info">${_('Info: ')}${data.display_info()}</div>
<div>
%if data.has_data:
- <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">save</a>
+ <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}" target="_blank">save</a>
%if user_owns_dataset:
| <a href="${h.url_for( controller='tool_runner', action='rerun', id=data.id )}" target="galaxy_main">rerun</a>
%endif
diff -r 037374950cc9 -r 316b413c933b test/base/twilltestcase.py
--- a/test/base/twilltestcase.py Fri Oct 09 16:59:35 2009 -0400
+++ b/test/base/twilltestcase.py Sat Oct 10 14:03:08 2009 -0400
@@ -390,9 +390,9 @@
self.visit_url( "%s/dataset/undelete?id=%s" % ( self.url, hda_id ) )
if check_str:
self.check_page_for_string( check_str )
- def display_history_item( self, hda_id, check_str='' ):
+ def display_history_item( self, id, check_str='' ):
"""Displays a history item - simulates eye icon click"""
- self.visit_url( '%s/dataset/display?encoded_id=%s' % ( self.url, self.security.encode_id( hda_id ) ) )
+ self.visit_url( '%s/datasets/%s/display/index' % ( self.url, id ) )
if check_str:
self.check_page_for_string( check_str )
self.home()
1
0
13 Oct '09
details: http://www.bx.psu.edu/hg/galaxy/rev/037374950cc9
changeset: 2864:037374950cc9
user: Kanwei Li <kanwei(a)gmail.com>
date: Fri Oct 09 16:59:35 2009 -0400
description:
Encode HDA ids when peeking and saving. Fixes #110
5 file(s) affected in this change:
lib/galaxy/web/controllers/dataset.py
lib/galaxy/web/controllers/root.py
templates/dataset/large_file.mako
templates/root/history_common.mako
test/base/twilltestcase.py
diffs (187 lines):
diff -r 5d63e4eee122 -r 037374950cc9 lib/galaxy/web/controllers/dataset.py
--- a/lib/galaxy/web/controllers/dataset.py Fri Oct 09 16:02:31 2009 -0400
+++ b/lib/galaxy/web/controllers/dataset.py Fri Oct 09 16:59:35 2009 -0400
@@ -199,8 +199,9 @@
return 'This link may not be followed from within Galaxy.'
@web.expose
- def display(self, trans, dataset_id=None, filename=None, show_all=False, **kwd):
+ def display(self, trans, encoded_id=None, show_all=False, to_ext=False, **kwd):
"""Catches the dataset id and displays file contents as directed"""
+ dataset_id = trans.security.decode_id( encoded_id )
data = trans.app.model.HistoryDatasetAssociation.get( dataset_id )
if not data:
raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) )
@@ -208,27 +209,29 @@
if trans.app.security_agent.can_access_dataset( roles, data.dataset ):
if data.state == trans.model.Dataset.states.UPLOAD:
return trans.show_error_message( "Please wait until this dataset finishes uploading before attempting to view it." )
- if filename is None or filename.lower() == "index":
- file_path = data.file_name
- mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() )
- trans.response.set_content_type(mime)
- trans.log_event( "Display dataset id: %s" % str( dataset_id ) )
-
- else:
- file_path = os.path.join( data.extra_files_path, filename )
- mime, encoding = mimetypes.guess_type( file_path )
- if mime is None:
- mime = trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( file_path )[-1] )
- trans.response.set_content_type( mime )
+
+ mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() )
+ trans.response.set_content_type(mime)
+ trans.log_event( "Display dataset id: %s" % str( dataset_id ) )
+
+ if to_ext: # Saving the file
+ trans.response.headers['Content-Length'] = int( os.stat( data.file_name ).st_size )
+ if to_ext[0] != ".":
+ to_ext = "." + to_ext
+ valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ fname = data.name
+ fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150]
+ trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, to_ext)
+ return open( data.file_name )
- if os.path.exists( file_path ):
+ if os.path.exists( data.file_name ):
max_peek_size = 1000000 # 1 MB
- if show_all or os.stat( file_path ).st_size < max_peek_size:
- return open( file_path )
+ if show_all or os.stat( data.file_name ).st_size < max_peek_size:
+ return open( data.file_name )
else:
trans.response.set_content_type( "text/html" )
- return trans.fill_template( "/dataset/large_file.mako",
- truncated_data = open( file_path ).read(max_peek_size),
+ return trans.stream_template_mako( "/dataset/large_file.mako",
+ truncated_data = open( data.file_name ).read(max_peek_size),
data = data )
else:
raise paste.httpexceptions.HTTPNotFound( "File Not Found (%s)." % ( filename ) )
diff -r 5d63e4eee122 -r 037374950cc9 lib/galaxy/web/controllers/root.py
--- a/lib/galaxy/web/controllers/root.py Fri Oct 09 16:02:31 2009 -0400
+++ b/lib/galaxy/web/controllers/root.py Fri Oct 09 16:59:35 2009 -0400
@@ -134,72 +134,6 @@
return rval
## ---- Dataset display / editing ----------------------------------------
-
- @web.expose
- def display( self, trans, id=None, hid=None, tofile=None, toext=".txt", **kwd ):
- """
- Returns data directly into the browser.
- Sets the mime-type according to the extension
- """
- if hid is not None:
- try:
- hid = int( hid )
- except:
- return "hid '%s' is invalid" %str( hid )
- history = trans.get_history()
- for dataset in history.datasets:
- if dataset.hid == hid:
- data = dataset
- break
- else:
- raise Exception( "No dataset with hid '%d'" % hid )
- else:
- try:
- data = self.app.model.HistoryDatasetAssociation.get( id )
- except:
- return "Dataset id '%s' is invalid" %str( id )
- if data:
- user, roles = trans.get_user_and_roles()
- if trans.app.security_agent.can_access_dataset( roles, data.dataset ):
- mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() )
- trans.response.set_content_type(mime)
- if tofile:
- fStat = os.stat(data.file_name)
- trans.response.headers['Content-Length'] = int(fStat.st_size)
- if toext[0:1] != ".":
- toext = "." + toext
- valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- fname = data.name
- fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150]
- trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, toext)
- trans.log_event( "Display dataset id: %s" % str(id) )
- try:
- return open( data.file_name )
- except:
- return "This dataset contains no content"
- else:
- return "You are not allowed to access this dataset"
- else:
- return "No dataset with id '%s'" % str( id )
-
- @web.expose
- def display_child(self, trans, parent_id=None, designation=None, tofile=None, toext=".txt"):
- """
- Returns child data directly into the browser, based upon parent_id and designation.
- """
- try:
- data = self.app.model.HistoryDatasetAssociation.get( parent_id )
- if data:
- child = data.get_child_by_designation( designation )
- if child:
- user, roles = trans.get_user_and_roles()
- if trans.app.security_agent.can_access_dataset( roles, child ):
- return self.display( trans, id=child.id, tofile=tofile, toext=toext )
- else:
- return "You are not privileged to access this dataset."
- except Exception:
- pass
- return "A child named %s could not be found for data %s" % ( designation, parent_id )
@web.expose
def display_as( self, trans, id=None, display_app=None, **kwd ):
diff -r 5d63e4eee122 -r 037374950cc9 templates/dataset/large_file.mako
--- a/templates/dataset/large_file.mako Fri Oct 09 16:02:31 2009 -0400
+++ b/templates/dataset/large_file.mako Fri Oct 09 16:59:35 2009 -0400
@@ -2,8 +2,8 @@
<div class="warningmessagelarge">
This dataset is large and only the first megabyte is shown below.<br />
- <a href="${h.url_for( controller='dataset', action='display', dataset_id=data.id, show_all=True )}">Show all</a> |
- <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}">Save</a>
+ <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), show_all=True )}">Show all</a> |
+ <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">Save</a>
</div>
<pre>
diff -r 5d63e4eee122 -r 037374950cc9 templates/root/history_common.mako
--- a/templates/root/history_common.mako Fri Oct 09 16:02:31 2009 -0400
+++ b/templates/root/history_common.mako Fri Oct 09 16:59:35 2009 -0400
@@ -32,7 +32,7 @@
<img src="${h.url_for('/static/images/pencil_icon_grey.png')}" width='16' height='16' alt='edit attributes' title='edit attributes' class='button edit' border='0'>
%endif
%else:
- <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', dataset_id=data.id, action='display', filename='index')}" target="galaxy_main"></a>
+ <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ))}" target="galaxy_main"></a>
%if user_owns_dataset:
<a class="icon-button edit" title="edit attributes" href="${h.url_for( controller='root', action='edit', id=data.id )}" target="galaxy_main"></a>
%endif
@@ -86,7 +86,7 @@
<div class="info">${_('Info: ')}${data.display_info()}</div>
<div>
%if data.has_data:
- <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}" target="_blank">save</a>
+ <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">save</a>
%if user_owns_dataset:
| <a href="${h.url_for( controller='tool_runner', action='rerun', id=data.id )}" target="galaxy_main">rerun</a>
%endif
diff -r 5d63e4eee122 -r 037374950cc9 test/base/twilltestcase.py
--- a/test/base/twilltestcase.py Fri Oct 09 16:02:31 2009 -0400
+++ b/test/base/twilltestcase.py Fri Oct 09 16:59:35 2009 -0400
@@ -390,9 +390,9 @@
self.visit_url( "%s/dataset/undelete?id=%s" % ( self.url, hda_id ) )
if check_str:
self.check_page_for_string( check_str )
- def display_history_item( self, id, check_str='' ):
+ def display_history_item( self, hda_id, check_str='' ):
"""Displays a history item - simulates eye icon click"""
- self.visit_url( '%s/datasets/%s/display/index' % ( self.url, id ) )
+ self.visit_url( '%s/dataset/display?encoded_id=%s' % ( self.url, self.security.encode_id( hda_id ) ) )
if check_str:
self.check_page_for_string( check_str )
self.home()
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/5d63e4eee122
changeset: 2863:5d63e4eee122
user: James Taylor <james(a)jamestaylor.org>
date: Fri Oct 09 16:02:31 2009 -0400
description:
Proper title for pages
1 file(s) affected in this change:
templates/page/display.mako
diffs (19 lines):
diff -r 205a6782d9ae -r 5d63e4eee122 templates/page/display.mako
--- a/templates/page/display.mako Fri Oct 09 14:47:54 2009 -0400
+++ b/templates/page/display.mako Fri Oct 09 16:02:31 2009 -0400
@@ -1,4 +1,6 @@
<%inherit file="/base_panels.mako"/>
+
+<%def name="title()">Galaxy :: ${page.user.username} :: ${page.title}</%def>
<%def name="init()">
<%
@@ -14,7 +16,7 @@
<div class="unified-panel-header" unselectable="on">
<div class="unified-panel-header-inner">
- ${page.user.username} / ${page.title}
+ ${page.user.username} :: ${page.title}
</div>
</div>
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/080b3c44963e
changeset: 2866:080b3c44963e
user: Anton Nekrutenko <anton(a)bx.psu.edu>
date: Sat Oct 10 16:18:08 2009 -0400
description:
Trimmer fix for fastq processing
1 file(s) affected in this change:
tools/filters/trimmer.py
diffs (14 lines):
diff -r 316b413c933b -r 080b3c44963e tools/filters/trimmer.py
--- a/tools/filters/trimmer.py Sat Oct 10 14:03:08 2009 -0400
+++ b/tools/filters/trimmer.py Sat Oct 10 16:18:08 2009 -0400
@@ -83,9 +83,7 @@
print line
continue
- if options.fastq and line.startswith('@'):
- stop_err('Malformed fastq file: even numbered line starts with @')
-
+
if line[0] not in invalid_starts:
if col == 0:
if int( options.end ) > 0:
1
0
13 Oct '09
details: http://www.bx.psu.edu/hg/galaxy/rev/205a6782d9ae
changeset: 2862:205a6782d9ae
user: jeremy goecks <jeremy.goecks at emory.edu>
date: Fri Oct 09 14:47:54 2009 -0400
description:
Increase number of datasets/page on HDA grid.
1 file(s) affected in this change:
lib/galaxy/web/controllers/dataset.py
diffs (21 lines):
diff -r c7dd346c20d5 -r 205a6782d9ae lib/galaxy/web/controllers/dataset.py
--- a/lib/galaxy/web/controllers/dataset.py Thu Oct 08 22:01:36 2009 -0400
+++ b/lib/galaxy/web/controllers/dataset.py Fri Oct 09 14:47:54 2009 -0400
@@ -107,7 +107,7 @@
return accepted_filters
# Grid definition
- title = "Stored datasets"
+ title = "Stored Datasets"
model_class = model.HistoryDatasetAssociation
template='/dataset/grid.mako'
default_sort_key = "-create_time"
@@ -125,7 +125,7 @@
default_filter = dict( deleted="False", tags="All" )
preserve_state = False
use_paging = True
- num_rows_per_page = 10
+ num_rows_per_page = 50
def apply_default_filter( self, trans, query, **kwargs ):
# This is a somewhat obtuse way to join the History and HDA tables. However, it's necessary
# because the initial query in build_initial_query is specificied on the HDA table (this is reasonable)
1
0