- galaxy-dev - lists.galaxyproject.org

[hg] galaxy 2872: real replacement for stoopid symlink
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/9d67ae5ecda7 changeset: 2872:9d67ae5ecda7 user: Ross Lazarus <ross.lazarus(a)gmail.com> date: Mon Oct 12 13:19:31 2009 -0400 description: real replacement for stoopid symlink 1 file(s) affected in this change: lib/galaxy/datatypes/genetics.py diffs (671 lines): diff -r 75a488a0cbc9 -r 9d67ae5ecda7 lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Mon Oct 12 12:48:37 2009 -0400 +++ b/lib/galaxy/datatypes/genetics.py Mon Oct 12 13:19:31 2009 -0400 @@ -0,0 +1,667 @@ +""" +rgenetics datatypes +Use at your peril +Ross Lazarus +for the rgenetics and galaxy projects + +genome graphs datatypes derived from Interval datatypes +genome graphs datasets have a header row with appropriate columnames +The first column is always the marker - eg columname = rs, first row= rs12345 if the rows are snps +subsequent row values are all numeric ! Will fail if any non numeric (eg '+' or 'NA') values +ross lazarus for rgenetics +august 20 2007 +""" + +import logging, os, sys, time, tempfile, shutil, string, glob +import data +from galaxy import util +from cgi import escape +import urllib +from galaxy.web import url_for +from galaxy.datatypes import metadata +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.data import Text +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.images import Html + +gal_Log = logging.getLogger(__name__) + +class GenomeGraphs( Tabular ): + """Tab delimited data containing a marker id and any number of numeric values""" + + """Add metadata elements""" + MetadataElement( name="markerCol", default=1, desc="Marker ID column", param=metadata.ColumnParameter ) + MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True ) + MetadataElement( name="column_types", default=[], desc="Column types", readonly=True, visible=False ) + file_ext = 'gg' + + def __init__(self, **kwd): + """Initialize gg datatype, by adding UCSC display apps""" + Tabular.__init__(self, **kwd) + self.add_display_app ( 'ucsc', 'Genome Graph', 'as_ucsc_display_file', 'ucsc_links' ) + + def set_peek( self, dataset ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name ) + dataset.blurb = util.commaify( str( data.get_line_count( dataset.file_name ) ) ) + " rows" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def get_mime(self): + """Returns the mime type of the datatype""" + return 'application/vnd.msexcel' + + def get_estimated_display_viewport( self, dataset ): + """Return a chrom, start, stop tuple for viewing a file.""" + raise notImplemented + + def as_ucsc_display_file( self, dataset, **kwd ): + """Returns file""" + return file(dataset.file_name,'r') + + def ucsc_links( self, dataset, type, app, base_url ): + """ from the ever-helpful angie hinrichs angie(a)soe.ucsc.edu + a genome graphs call looks like this + http://genome.ucsc.edu/cgi-bin/hgGenome?clade=mammal&org=Human&db=hg18&hgGe… + &hgGenome_dataSetDescription=test&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess + &hgGenome_columnLabels=best%20guess&hgGenome_maxVal=&hgGenome_labelVals= + &hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=http://galaxy.esphealth.org/datasets/333/display/index + &hgGenome_doSubmitUpload=submit + Galaxy gives this for an interval file + http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg18&position=chr1:1-1000&hgt.cu… + http%3A%2F%2Fgalaxy.esphealth.org%2Fdisplay_as%3Fid%3D339%26display_app%3Ducsc + """ + ret_val = [] + ggtail = '&hgGenome_doSubmitUpload=submit' + if not dataset.dbkey: + dataset.dbkey = 'hg18' # punt! + ret_val = [] + ggtail = '&hgGenome_doSubmitUpload=submit' + if not dataset.dbkey: + dataset.dbkey = 'hg18' # punt! + if dataset.has_data: + for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey): + if site_name in app.config.ucsc_display_sites: + site_url = site_url.replace('/hgTracks?','/hgGenome?') # for genome graphs + display_url = urllib.quote_plus( "%s/display_as?id=%i&display_app=%s" % (base_url, dataset.id, type) ) + sl = ["%sdb=%s" % (site_url,dataset.dbkey ),] + sl.append("&hgGenome_dataSetName=%s&hgGenome_dataSetDescription=%s" % (dataset.name, 'GalaxyGG_data')) + sl.append("&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess") + sl.append("&hgGenome_columnLabels=first%20row&hgGenome_maxVal=&hgGenome_labelVals=") + sl.append("&hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=") + s = urllib.quote_plus( ''.join(sl) ) + link = "%s%s%s" % (s, display_url, ggtail ) + ret_val.append( (site_name, link) ) + return ret_val + + + def validate( self, dataset ): + """Validate a gg file - all numeric after header row""" + errors = list() + infile = open(dataset.file_name, "r") + for i,row in enumerate(infile): #drop header + badvals = [] + if i > 0: + ll = row.strip().split('\t') + for j,x in enumerate(ll): + try: + x = float(x) + except: + badval.append('col%d:%s' % (j+1,x)) + if len(badvals) > 0: + errors.append('row %d, %s' % (' '.join(badvals))) + return errors + + def repair_methods( self, dataset ): + """Return options for removing errors along with a description""" + return [("lines","Remove erroneous lines")] + + def sniff(self,filename): + """ + """ + infile = open(dataset.file_name, "r") + header= infile.next() # header + badvals = [] + for i,row in enumerate(infile[:10]): # sample first 10 rows + ll = row.strip().split('\t') + for j,x in enumerate(ll[1:]): # ignore first identifier col + try: + x = float(x) + except: + badval.append('col%d:%s' % (j+1,x)) + if len(badvals) > 0: + return False + else: + return True + +class rgTabList(Tabular): + """ for sampleid and for featureid lists of exclusions or inclusions in the clean tool + featureid subsets on statistical criteria -> specialized display such as gg + """ + file_ext = "rgTList" + + + def __init__(self, **kwd): + """Initialize featurelistt datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = [] + + def make_html_table( self, dataset, skipchars=[] ): + """Create HTML table, used for displaying peek""" + out = ['<table cellspacing="0" cellpadding="3">'] + comments = [] + try: + # Generate column header + out.append( '<tr>' ) + for i, name in enumerate( self.column_names ): + out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) + if dataset.metadata.columns - len( self.column_names ) > 0: + for i in range( len( self.column_names ), dataset.metadata.columns ): + out.append( '<th>%s</th>' % str( i+1 ) ) + out.append( '</tr>' ) + out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) + out.append( '</table>' ) + out = "".join( out ) + except Exception, exc: + out = "Can't create peek %s" % exc + return out + + +class rgSampleList(rgTabList): + """ for sampleid exclusions or inclusions in the clean tool + output from QC eg excess het, gender error, ibd pair member,eigen outlier,excess mendel errors,... + since they can be uploaded, should be flexible + but they are persistent at least + same infrastructure for expression? + """ + file_ext = "rgSList" + + def __init__(self, **kwd): + """Initialize samplelist datatype""" + rgTabList.__init__( self, **kwd ) + self.column_names[0] = 'FID' + self.column_names[1] = 'IID' + # this is what Plink wants as at 2009 + + def sniff(self,filename): + """ + """ + infile = open(dataset.file_name, "r") + header= infile.next() # header + if header[0] == 'FID' and header[1] == 'IID': + return True + else: + return False + +class rgFeatureList( rgTabList ): + """ for featureid lists of exclusions or inclusions in the clean tool + output from QC eg low maf, high missingness, bad hwe in controls, excess mendel errors,... + featureid subsets on statistical criteria -> specialized display such as gg + same infrastructure for expression? + """ + file_ext = "rgFList" + + def __init__(self, **kwd): + """Initialize featurelist datatype""" + rgTabList.__init__( self, **kwd ) + for i,s in enumerate(['#FeatureId', 'Chr', 'Genpos', 'Mappos']): + self.column_names[i] = s + + +class Rgenetics(Html): + """class to use for rgenetics""" + + MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="rgenetics", + readonly=True, set_in_upload=True) + + composite_type = 'auto_primary_file' + allow_datatype_change = False + file_ext = 'rgenetics' + + + def missing_meta( self, dataset=None, **kwargs): + """Checks for empty meta values""" + for key, value in dataset.metadata.items(): + if not value: + return True + return False + + def generate_primary_file( self, dataset = None ): + rval = ['<html><head><title>Rgenetics Galaxy Composite Dataset </title></head><p/>'] + rval.append('<div>This composite dataset is composed of the following files:<p/><ul>') + for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): + opt_text = '' + if composite_file.optional: + opt_text = ' (optional)' + rval.append( '<li><a href="%s" type="application/binary">%s</a>%s' % ( composite_name, composite_name, opt_text ) ) + rval.append( '</ul></div></html>' ) + return "\n".join( rval ) + +class SNPMatrix(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="snpmatrix" + + def set_peek( self, dataset ): + if not dataset.dataset.purged: + dataset.peek = "Binary RGenetics file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff(self,filename): + """ need to check the file header hex code + """ + infile = open(dataset.file_name, "b") + head = infile.read(16) + head = [hex(x) for x in head] + if head <> '': + return False + else: + return True + + +class Lped(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="lped" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True ) + self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True ) + + +class Pphe(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="pphe" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.pphe', description = 'Plink Phenotype File', substitute_name_with_metadata = 'base_name' ) + + +class Lmap(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="lmap" + +class Fphe(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="fphe" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.fphe', description = 'FBAT Phenotype File', substitute_name_with_metadata = 'base_name' ) + +class Phe(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="phe" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.phe', description = 'Phenotype File', substitute_name_with_metadata = 'base_name' ) + + + +class Fped(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="fped" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.fped', description = 'FBAT format pedfile', substitute_name_with_metadata = 'base_name' ) + + +class Pbed(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="pbed" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.bim', substitute_name_with_metadata = 'base_name', is_binary = True ) + self.add_composite_file( '%s.bed', substitute_name_with_metadata = 'base_name', is_binary = True ) + self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True ) + +class Eigenstratgeno(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="eigenstratgeno" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True ) + self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True ) + self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True ) + + + +class Eigenstratpca(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="eigenstratpca" + + def __init__( self, **kwd ): + Rgenetics.__init__(self, **kwd) + self.add_composite_file( '%s.eigenstratpca', description = 'Eigenstrat PCA file', substitute_name_with_metadata = 'base_name' ) + + +class Snptest(Rgenetics): + """fake class to distinguish different species of Rgenetics data collections + """ + file_ext="snptest" + + +class Pheno(Tabular): + """ + base class for pheno files + """ + file_ext = 'pheno' + + +class RexpBase( Html ): + """base class for BioC data structures in Galaxy + must be constructed with the pheno data in place since that + goes into the metadata for each instance + """ + MetadataElement( name="columns", default=0, desc="Number of columns", visible=True ) + MetadataElement( name="column_names", default=[], desc="Column names", visible=True ) + MetadataElement(name="pheCols",default=[],desc="Select list for potentially interesting variables",visible=True) + MetadataElement( name="base_name", + desc="base name for all transformed versions of this expression dataset", default='rexpression', set_in_upload=True) + MetadataElement( name="pheno_path", desc="Path to phenotype data for this experiment", default="rexpression.pheno", visible=True) + file_ext = 'rexpbase' + html_table = None + is_binary = True + composite_type = 'auto_primary_file' + allow_datatype_change = False + + + def __init__( self, **kwd ): + Html.__init__(self,**kwd) + self.add_composite_file( '%s.pheno', description = 'Phenodata tab text file', + substitute_name_with_metadata = 'base_name', is_binary=True) + + def generate_primary_file( self, dataset = None ): + """ This is called only at upload to write the html file + cannot rename the datasets here - they come with the default unfortunately + """ + return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' + + def get_phecols(self, phenolist=[], maxConc=20): + """ + sept 2009: cannot use whitespace to split - make a more complex structure here + and adjust the methods that rely on this structure + return interesting phenotype column names for an rexpression eset or affybatch + to use in array subsetting and so on. Returns a data structure for a + dynamic Galaxy select parameter. + A column with only 1 value doesn't change, so is not interesting for + analysis. A column with a different value in every row is equivalent to a unique + identifier so is also not interesting for anova or limma analysis - both these + are removed after the concordance (count of unique terms) is constructed for each + column. Then a complication - each remaining pair of columns is tested for + redundancy - if two columns are always paired, then only one is needed :) + """ + for nrows,row in enumerate(phenolist): # construct concordance + if len(row.strip()) == 0: + break + row = row.strip().split('\t') + if nrows == 0: # set up from header + head = row + totcols = len(row) + concordance = [{} for x in head] # list of dicts + else: + for col,code in enumerate(row): # keep column order correct + if col >= totcols: + gal_Log.warning('### get_phecols error in pheno file - row %d col %d (%s) longer than header %s' % (nrows, col, row, head)) + else: + concordance[col].setdefault(code,0) # first one is zero + concordance[col][code] += 1 + useCols = [] + useConc = [] # columns of interest to keep + nrows = len(phenolist) + nrows -= 1 # drop head from count + for c,conc in enumerate(concordance): # c is column number + if (len(conc) > 1) and (len(conc) < min(nrows,maxConc)): # not all same and not all different!! + useConc.append(conc) # keep concordance + useCols.append(c) # keep column + nuse = len(useCols) + # now to check for pairs of concordant columns - drop one of these. + delme = [] + p = phenolist[1:] # drop header + plist = [x.strip().split('\t') for x in p] # list of lists + phe = [[x[i] for i in useCols] for x in plist if len(x) >= totcols] # strip unused data + for i in range(0,(nuse-1)): # for each interesting column + for j in range(i+1,nuse): + kdict = {} + for row in phe: # row is a list of lists + k = '%s%s' % (row[i],row[j]) # composite key + kdict[k] = k + if (len(kdict.keys()) == len(concordance[useCols[j]])): # i and j are always matched + delme.append(j) + delme = list(set(delme)) # remove dupes + listCol = [] + delme.sort() + delme.reverse() # must delete from far end! + for i in delme: + del useConc[i] # get rid of concordance + del useCols[i] # and usecols entry + for i,conc in enumerate(useConc): # these are all unique columns for the design matrix + ccounts = [(conc.get(code,0),code) for code in conc.keys()] # decorate + ccounts.sort() + cc = [(x[1],x[0]) for x in ccounts] # list of code count tuples + codeDetails = (head[useCols[i]],cc) # ('foo',[('a',3),('b',11),..]) + listCol.append(codeDetails) + if len(listCol) > 0: + res = listCol + # metadata.pheCols becomes [('bar;22,zot;113','foo'), ...] + else: + res = [('no usable phenotype columns found',[('?',0),]),] + return res + + + + def get_pheno(self,dataset): + """expects a .pheno file in the extra_files_dir - ugh + note that R is wierd and adds the row.name in + the header so the columns are all wrong - unless you tell it not to. + A file can be written as + write.table(file='foo.pheno',pData(foo),sep='\t',quote=F,row.names=F) + """ + p = file(dataset.metadata.pheno_path,'r').readlines() + if len(p) > 0: # should only need to fix an R pheno file once + head = p[0].strip().split('\t') + line1 = p[1].strip().split('\t') + if len(head) < len(line1): + head.insert(0,'ChipFileName') # fix R write.table b0rken-ness + p[0] = '\t'.join(head) + else: + p = [] + return '\n'.join(p) + + def set_peek( self, dataset ): + """expects a .pheno file in the extra_files_dir - ugh + note that R is wierd and does not include the row.name in + the header. why?""" + if not dataset.dataset.purged: + pp = os.path.join(dataset.extra_files_path,'%s.pheno' % dataset.metadata.base_name) + try: + p = file(pp,'r').readlines() + except: + p = ['##failed to find %s' % pp,] + gal_Log.debug('@@@rexpression set_peek, dataset.name=%s,\npp=%s,\np=%s' % (dataset.name,pp,p[:3])) + dataset.peek = ''.join(p[:5]) + dataset.blurb = 'Galaxy Rexpression composite file' + else: + dataset.peek = 'file does not exist\n' + dataset.blurb = 'file purged from disk' + + def get_peek( self, dataset ): + """expects a .pheno file in the extra_files_dir - ugh + """ + pp = os.path.join(dataset.extra_files_path,'%s.pheno' % dataset.metadata.base_name) + try: + p = file(pp,'r').readlines() + except: + p = ['##failed to find %s' % pp] + gal_Log.debug('@@@rexpression get_peek, dataset.file_name=%s,\npp=%s,\np=%s' % (dataset.file_name,pp,p[:3])) + return ''.join(p[:5]) + + def get_file_peek(self,filename): + """ + can't really peek at a filename - need the extra_files_path and such? + """ + h = '## rexpression get_file_peek: no file found' + try: + h = file(filename,'r').readlines() + except: + pass + return ''.join(h[:5]) + + def regenerate_primary_file(self,dataset): + """cannot do this until we are setting metadata + """ + bn = dataset.metadata.base_name + flist = os.listdir(dataset.extra_files_path) + rval = ['<html><head><title>Files for Composite Dataset %s</title></head><p/>Comprises the following files:<p/><ul>' % (bn)] + for i,fname in enumerate(flist): + sfname = os.path.split(fname)[-1] + rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) ) + rval.append( '</ul></html>' ) + gal_Log.debug('rexpression regenerate primary file, writing %s' % rval) + f = file(dataset.file_name,'w') + f.write("\n".join( rval )) + f.write('\n') + f.close() + + """Add metadata elements""" + def init_meta( self, dataset, copy_from=None ): + if copy_from: + dataset.metadata = copy_from.metadata + + def set_meta( self, dataset, **kwd ): + + """ + NOTE we apply the tabular machinary to the phenodata extracted + from a BioC eSet or affybatch. + + """ + try: + flist = os.listdir(dataset.extra_files_path) + except: + gal_Log.debug('@@@rexpression set_meta failed - no dataset?') + return + bn = None + for f in flist: + n = os.path.splitext(f)[0] + if not bn: + bn = n + dataset.metadata.base_name = bn + if not bn: + bn = '?' + pn = '%s.pheno' % (bn) + pp = os.path.join(dataset.extra_files_path,pn) + dataset.metadata.pheno_path=pp + try: + pf = file(pp,'r').readlines() # read the basename.phenodata in the extra_files_path + except: + pf = None + if pf: + h = pf[0].strip() + h = h.split('\t') # hope is header + h = [escape(x) for x in h] + dataset.metadata.column_names = h + dataset.metadata.columns = len(h) + dataset.peek = ''.join(pf[:5]) + else: + dataset.metadata.column_names = [] + dataset.metadata.columns = 0 + dataset.peek = 'No pheno file found' + if len(pf) > 1: + dataset.metadata.pheCols = self.get_phecols(phenolist=pf) + else: + dataset.metadata.pheCols = [('','No useable phenotypes found',False),] + #self.regenerate_primary_file(dataset) + if not dataset.info: + dataset.info = 'Galaxy Expression datatype object' + if not dataset.blurb: + dataset.blurb = 'R loadable BioC expression object for the Rexpression Galaxy toolkit' + gal_Log.debug('@@@rexpression set_meta on dsn=%s, pf=%s, peek=%s' % (dataset.file_name,''.join(pf[:5]),dataset.peek)) + return True + + def make_html_table( self, pp='nothing supplied from peek\n'): + """Create HTML table, used for displaying peek""" + out = ['<table cellspacing="0" cellpadding="3">',] + p = pp.split('\n') + try: + # Generate column header + for i,row in enumerate(p): + lrow = row.strip().split('\t') + if i == 0: + orow = ['<th>%s</th>' % escape(x) for x in lrow] + orow.insert(0,'<tr>') + orow.append('</tr>') + else: + orow = ['<td>%s</td>' % escape(x) for x in lrow] + orow.insert(0,'<tr>') + orow.append('</tr>') + out.append(''.join(orow)) + out.append( '</table>' ) + out = "\n".join( out ) + except Exception, exc: + out = "Can't create html table %s" % str( exc ) + return out + + def display_peek( self, dataset ): + """Returns formatted html of peek""" + out=self.make_html_table(dataset.peek) + return out + + def get_mime(self): + """Returns the mime type of the datatype""" + return 'text/html' + + +class Affybatch( RexpBase ): + """derived class for BioC data structures in Galaxy """ + + file_ext = "affybatch" + + def __init__( self, **kwd ): + RexpBase.__init__(self, **kwd) + self.add_composite_file( '%s.affybatch', description = 'AffyBatch R object saved to file', + substitute_name_with_metadata = 'base_name', is_binary=True ) + +class Eset( RexpBase ): + """derived class for BioC data structures in Galaxy """ + file_ext = "eset" + + def __init__( self, **kwd ): + RexpBase.__init__(self, **kwd) + self.add_composite_file( '%s.eset', description = 'ESet R object saved to file', + substitute_name_with_metadata = 'base_name', is_binary = True ) + + +class MAlist( RexpBase ): + """derived class for BioC data structures in Galaxy """ + file_ext = "malist" + + def __init__( self, **kwd ): + RexpBase.__init__(self, **kwd) + self.add_composite_file( '%s.malist', description = 'MAlist R object saved to file', + substitute_name_with_metadata = 'base_name', is_binary = True ) + + +if __name__ == '__main__': + import doctest, sys + doctest.testmod(sys.modules[__name__]) + +

1 0

[hg] galaxy 2870: symlinks are not very useful
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/1a0bb7d6897c changeset: 2870:1a0bb7d6897c user: Ross Lazarus <ross.lazarus(a)gmail.com> date: Sun Oct 11 15:07:11 2009 -0400 description: symlinks are not very useful 6 file(s) affected in this change: lib/galaxy/datatypes/converters/lped_to_fped_converter.py lib/galaxy/datatypes/converters/lped_to_fped_converter.xml lib/galaxy/datatypes/converters/lped_to_pbed_converter.py lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml lib/galaxy/datatypes/converters/pbed_to_lped_converter.py lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml diffs (383 lines): diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_fped_converter.py --- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15:07:11 2009 -0400 @@ -1,1 +1,110 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.py \ No newline at end of file +# for rgenetics - lped to fbat +# recode to numeric fbat version +# much slower so best to always +# use numeric alleles internally + +import sys,os,time + + +prog = os.path.split(sys.argv[0])[-1] +myversion = 'Oct 10 2009' + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + +def rgConv(inpedfilepath,outhtmlname,outfilepath): + """convert linkage ped/map to fbat""" + recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'} + basename = os.path.split(inpedfilepath)[-1] # get basename + inmap = '%s.map' % inpedfilepath + inped = '%s.ped' % inpedfilepath + outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data + outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to + try: + mf = file(inmap,'r') + except: + sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap)) + sys.exit(1) + try: + rsl = [x.split()[1] for x in mf] + except: + sys.stderr.write('## cannot parse %s' % inmap) + sys.exit(1) + try: + os.makedirs(outfilepath) + except: + pass # already exists + head = ' '.join(rsl) # list of rs numbers + # TODO add anno to rs but fbat will prolly barf? + pedf = file(inped,'r') + o = file(outfpath,'w',2**20) + o.write(head) + o.write('\n') + for i,row in enumerate(pedf): + if i == 0: + lrow = row.split() + try: + x = [int(x) for x in lrow[10:50]] # look for non numeric codes + except: + dorecode = 1 + if dorecode: + lrow = row.strip().split() + p = lrow[:6] + g = lrow[6:] + gc = [recode.get(x,'0') for x in g] + lrow = p+gc + row = '%s\n' % ' '.join(lrow) + o.write(row) + o.close() + + +def main(): + """call fbater + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' + </command> + """ + nparm = 3 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm)) + sys.exit(1) + inpedfilepath = sys.argv[1] + outhtmlname = sys.argv[2] + outfilepath = sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + rgConv(inpedfilepath,outhtmlname,outfilepath) + f = file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist = os.listdir(outfilepath) + print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info + f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow())) + for i, data in enumerate( flist ): + f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) + f.write("</div></body></html>") + f.close() + + + +if __name__ == "__main__": + main() diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_fped_converter.xml --- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 15:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 15:07:11 2009 -0400 @@ -1,1 +1,15 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.xml \ No newline at end of file +<tool id="lped2fpedconvert" name="Convert lped to fped" version="0.01"> +  +  + <command interpreter="python"> + lped_to_fped_converter.py '$input1.extra_files_path/$input1.metadata.base_name' '$output1' '$output1.extra_files_path' + </command> + <inputs> + <param format="lped" name="input1" type="data" label="Choose linkage pedigree file"/> + </inputs> + <outputs> + <data format="fped" name="output1" metadata_source="input1"/> + </outputs> + <help> + </help> +</tool> diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_pbed_converter.py --- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15:07:11 2009 -0400 @@ -1,1 +1,110 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.py \ No newline at end of file +# for rgenetics - lped to pbed +# where to stop with converters +# pbed might be central +# eg lped/eigen/fbat/snpmatrix all to pbed +# and pbed to lped/eigen/fbat/snpmatrix ? +# that's a lot of converters +import sys,os,time,subprocess + + +prog = os.path.split(sys.argv[0])[-1] +myversion = 'Oct 10 2009' + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +def getMissval(inped=''): + """ + read some lines...ugly hack - try to guess missing value + should be N or 0 but might be . or - + """ + commonmissvals = {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'} + try: + f = file(inped,'r') + except: + return None # signal no in file + missval = None + while missval == None: # doggedly continue until we solve the mystery + try: + l = f.readline() + except: + break + ll = l.split()[6:] # ignore pedigree stuff + for c in ll: + if commonmissvals.get(c,None): + missval = c + f.close() + return missval + if not missval: + missval = 'N' # punt + close(f) + return missval + +def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): + """ + """ + pedf = '%s.ped' % inpedfilepath + basename = os.path.split(inpedfilepath)[-1] # get basename + outroot = os.path.join(outfilepath,basename) + missval = getMissval(inped = pedf) + if not missval: + print '### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf + missval = '0' + cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink,inpedfilepath,outroot,missval) + p = subprocess.Popen(cl,shell=True,cwd=outfilepath) + retval = p.wait() # run plink + + + + +def main(): + """ + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + <command interpreter="python">lped_to_pbed_converter.py '$input1/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' + </command> + """ + nparm = 4 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm)) + sys.exit(1) + inpedfilepath = sys.argv[1] + outhtmlname = sys.argv[2] + outfilepath = sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + plink = sys.argv[4] + rgConv(inpedfilepath,outhtmlname,outfilepath,plink) + f = file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist = os.listdir(outfilepath) + s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info + print s + f.write('<div>%s\n<ol>' % (s)) + for i, data in enumerate( flist ): + f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) + f.write("</div></body></html>") + f.close() + + + +if __name__ == "__main__": + main() diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml --- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 15:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 15:07:11 2009 -0400 @@ -1,1 +1,16 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.xml \ No newline at end of file +<tool id="lped2pbedconvert" name="Convert lped to plink pbed" version="0.01"> +  +  + <command interpreter="python"> + lped_to_pbed_converter.py '$input1.extra_files_path/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' + </command> + <inputs> + <param format="lped" name="input1" type="data" label="Choose linkage pedigree file"/> + </inputs> + <outputs> + <data format="pbed" name="output1" metadata_source="input1"/> + </outputs> + <help> + </help> +</tool> diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/pbed_to_lped_converter.py --- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15:07:11 2009 -0400 @@ -1,1 +1,80 @@ -/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.py \ No newline at end of file +# for rgenetics - lped to pbed +# where to stop with converters +# pbed might be central +# eg lped/eigen/fbat/snpmatrix all to pbed +# and pbed to lped/eigen/fbat/snpmatrix ? +# that's a lot of converters +import sys,os,time,subprocess + + +prog = os.path.split(sys.argv[0])[-1] +myversion = 'Oct 10 2009' + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + +def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): + """ + """ + + basename = os.path.split(inpedfilepath)[-1] # get basename + outroot = os.path.join(outfilepath,basename) + cl = '%s --noweb --bfile %s --recode --out %s ' % (plink,inpedfilepath,outroot) + p = subprocess.Popen(cl,shell=True,cwd=outfilepath) + retval = p.wait() # run plink + + + + +def main(): + """ + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + <command interpreter="python">pbed_to_lped_converter.py '$input1/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' + </command> + """ + nparm = 4 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (myname,sys.argv,nparm)) + sys.exit(1) + inpedfilepath = sys.argv[1] + outhtmlname = sys.argv[2] + outfilepath = sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + plink = sys.argv[4] + rgConv(inpedfilepath,outhtmlname,outfilepath,plink) + f = file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist = os.listdir(outfilepath) + s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info + print s + f.write('<div>%s\n<ol>' % (s)) + for i, data in enumerate( flist ): + f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) + f.write("</div></body></html>") + f.close() + + + +if __name__ == "__main__": + main() diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml --- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 15:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 15:07:11 2009 -0400 @@ -1,1 +1,16 @@ -/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.xml \ No newline at end of file +<tool id="pbed2lpedconvert" name="Convert plink pbed to linkage lped" version="0.01"> +  +  + <command interpreter="python"> + pbed_to_lped_converter.py '$input1.extra_files_path/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' + </command> + <inputs> + <param format="pbed" name="input1" type="data" label="Choose compressed Plink binary format genotype file"/> + </inputs> + <outputs> + <data format="lped" name="output1" metadata_source="input1"/> + </outputs> + <help> + </help> +</tool>

1 0

[hg] galaxy 2869: adding rgenetics converters
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/639c6b5e0073 changeset: 2869:639c6b5e0073 user: Ross Lazarus <ross.lazarus(a)gmail.com> date: Sun Oct 11 15:00:40 2009 -0400 description: adding rgenetics converters 6 file(s) affected in this change: lib/galaxy/datatypes/converters/lped_to_fped_converter.py lib/galaxy/datatypes/converters/lped_to_fped_converter.xml lib/galaxy/datatypes/converters/lped_to_pbed_converter.py lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml lib/galaxy/datatypes/converters/pbed_to_lped_converter.py lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml diffs (36 lines): diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_fped_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15:00:40 2009 -0400 @@ -0,0 +1,1 @@ +/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.py \ No newline at end of file diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_fped_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 15:00:40 2009 -0400 @@ -0,0 +1,1 @@ +/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.xml \ No newline at end of file diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_pbed_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15:00:40 2009 -0400 @@ -0,0 +1,1 @@ +/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.py \ No newline at end of file diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 15:00:40 2009 -0400 @@ -0,0 +1,1 @@ +/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.xml \ No newline at end of file diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/pbed_to_lped_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15:00:40 2009 -0400 @@ -0,0 +1,1 @@ +/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.py \ No newline at end of file diff -r 109e94372abb -r 639c6b5e0073 lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 15:00:40 2009 -0400 @@ -0,0 +1,1 @@ +/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.xml \ No newline at end of file

1 0

[hg] galaxy 2868: added 2 valid chars ; | and patch for empty url...
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/109e94372abb changeset: 2868:109e94372abb user: Ross Lazarus <ross.lazarus(a)gmail.com> date: Sun Oct 11 09:45:38 2009 -0400 description: added 2 valid chars ;| and patch for empty url_paste in composite upload 2 file(s) affected in this change: lib/galaxy/tools/parameters/grouping.py lib/galaxy/util/__init__.py diffs (24 lines): diff -r 5391edcf618d -r 109e94372abb lib/galaxy/tools/parameters/grouping.py --- a/lib/galaxy/tools/parameters/grouping.py Sun Oct 11 09:37:43 2009 -0400 +++ b/lib/galaxy/tools/parameters/grouping.py Sun Oct 11 09:45:38 2009 -0400 @@ -226,7 +226,7 @@ if context.get( 'space_to_tab', None ) not in ["None", None]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) - if file_bunch.path: + if file_bunch.path and url_paste: if url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) else: #we need to use url_paste diff -r 5391edcf618d -r 109e94372abb lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py Sun Oct 11 09:37:43 2009 -0400 +++ b/lib/galaxy/util/__init__.py Sun Oct 11 09:45:38 2009 -0400 @@ -109,7 +109,7 @@ return text # characters that are valid -valid_chars = set(string.letters + string.digits + " -=_.()/+*^,:?!") +valid_chars = set(string.letters + string.digits + " -=_.()/+*^,:?!;|") # characters that are allowed but need to be escaped mapped_chars = { '>' :'__gt__',

1 0

[hg] galaxy 2867: rexpression datatypes and fixes for rgenetics ...
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/5391edcf618d changeset: 2867:5391edcf618d user: Ross Lazarus <ross.lazarus(a)gmail.com> date: Sun Oct 11 09:37:43 2009 -0400 description: rexpression datatypes and fixes for rgenetics datatypes 1 file(s) affected in this change: lib/galaxy/datatypes/genetics.py diffs (376 lines): diff -r 080b3c44963e -r 5391edcf618d lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Sat Oct 10 16:18:08 2009 -0400 +++ b/lib/galaxy/datatypes/genetics.py Sun Oct 11 09:37:43 2009 -0400 @@ -1,370 +1,1 @@ -""" -rgenetics datatypes -Use at your peril -Ross Lazarus -for the rgenetics and galaxy projects - -genome graphs datatypes derived from Interval datatypes -genome graphs datasets have a header row with appropriate columnames -The first column is always the marker - eg columname = rs, first row= rs12345 if the rows are snps -subsequent row values are all numeric ! Will fail if any non numeric (eg '+' or 'NA') values -ross lazarus for rgenetics -august 20 2007 -""" - -import logging, os, sys, time, tempfile, shutil -import data -from galaxy import util -from cgi import escape -import urllib -from galaxy.web import url_for -from galaxy.datatypes import metadata -from galaxy.datatypes.metadata import MetadataElement -#from galaxy.datatypes.data import Text -from galaxy.datatypes.tabular import Tabular -from galaxy.datatypes.images import Html - -log = logging.getLogger(__name__) - - - -class GenomeGraphs( Tabular ): - """Tab delimited data containing a marker id and any number of numeric values""" - - """Add metadata elements""" - MetadataElement( name="markerCol", default=1, desc="Marker ID column", param=metadata.ColumnParameter ) - MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True ) - MetadataElement( name="column_types", default=[], desc="Column types", readonly=True, visible=False ) - file_ext = 'gg' - - def __init__(self, **kwd): - """Initialize gg datatype, by adding UCSC display apps""" - Tabular.__init__(self, **kwd) - self.add_display_app ( 'ucsc', 'Genome Graph', 'as_ucsc_display_file', 'ucsc_links' ) - - def set_peek( self, dataset ): - """Set the peek and blurb text""" - if not dataset.dataset.purged: - dataset.peek = data.get_file_peek( dataset.file_name ) - dataset.blurb = util.commaify( str( data.get_line_count( dataset.file_name ) ) ) + " rows" - #i don't think set_meta should not be called here, it should be called separately - self.set_meta( dataset ) - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' - - def get_estimated_display_viewport( self, dataset ): - """Return a chrom, start, stop tuple for viewing a file.""" - raise notImplemented - - def ucsc_links( self, dataset, type, app, base_url ): - """ from the ever-helpful angie hinrichs angie(a)soe.ucsc.edu - a genome graphs call looks like this - http://genome.ucsc.edu/cgi-bin/hgGenome?clade=mammal&org=Human&db=hg18&hgGe… - &hgGenome_dataSetDescription=test&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess - &hgGenome_columnLabels=best%20guess&hgGenome_maxVal=&hgGenome_labelVals= - &hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=http://galaxy.esphealth.org/datasets/333/display/index - &hgGenome_doSubmitUpload=submit - Galaxy gives this for an interval file - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg18&position=chr1:1-1000&hgt.cu… - http%3A%2F%2Fgalaxy.esphealth.org%2Fdisplay_as%3Fid%3D339%26display_app%3Ducsc - """ - ret_val = [] - ggtail = '&hgGenome_doSubmitUpload=submit' - if not dataset.dbkey: - dataset.dbkey = 'hg18' # punt! - if dataset.has_data: - for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey): - if site_name in app.config.ucsc_display_sites: - site_url = site_url.replace('/hgTracks?','/hgGenome?') # for genome graphs - display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s" % (base_url, url_for( controller='root' ), dataset.id, type)) - sl = ["%sdb=%s" % (site_url,dataset.dbkey ),] - sl.append("&hgGenome_dataSetName=%s&hgGenome_dataSetDescription=%s" % (dataset.name, 'GalaxyGG_data')) - sl.append("&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess") - sl.append("&hgGenome_columnLabels=first%20row&hgGenome_maxVal=&hgGenome_labelVals=") - sl.append("&hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=%%s") - sl.append(ggtail) - s = urllib.quote_plus( ''.join(sl) ) - link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, s, display_url ) - ret_val.append( (site_name, link) ) - return ret_val - - def validate( self, dataset ): - """Validate a gg file - all numeric after header row""" - errors = list() - infile = open(dataset.file_name, "r") - header= infile.next() # header - for i,row in enumerate(infile): - ll = row.strip().split('\t') - badvals = [] - for j,x in enumerate(ll): - try: - x = float(x) - except: - badval.append('col%d:%s' % (j+1,x)) - if len(badvals) > 0: - errors.append('row %d, %s' % (' '.join(badvals))) - return errors - - def repair_methods( self, dataset ): - """Return options for removing errors along with a description""" - return [("lines","Remove erroneous lines")] - - -class Rgenetics(Html): - """class to use for rgenetics""" - """Add metadata elements""" - MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="galaxy", readonly=True, set_in_upload=True) - - file_ext="html" - composite_type = 'auto_primary_file' - allow_datatype_change = False - - def missing_meta( self, dataset ): - """Checks for empty meta values""" - for key, value in dataset.metadata.items(): - if not value: - return True - return False - - def generate_primary_file( self, dataset = None ): - rval = ['<html><head><title>Files for Composite Dataset (%s)</title></head><p/>This composite dataset is composed of the following files:<p/><ul>' % ( self.file_ext ) ] - for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): - opt_text = '' - if composite_file.optional: - opt_text = ' (optional)' - rval.append( '<li><a href="%s">%s</a>%s' % ( composite_name, composite_name, opt_text ) ) - rval.append( '</ul></html>' ) - return "\n".join( rval ) - -class SNPMatrix(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="snpmatrix" - - def set_peek( self, dataset ): - if not dataset.dataset.purged: - dataset.peek = "Binary RGenetics file" - dataset.blurb = data.nice_size( dataset.get_size() ) - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' - - -class Lped(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="lped" - - def __init__( self, **kwd ): - Rgenetics.__init__( self, **kwd ) - self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True ) - - -class Pphe(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="pphe" - -class Lmap(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="lmap" - -class Fphe(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="fphe" - -class Phe(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="phe" - - -class Fped(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="fped" - - -class Pbed(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="pbed" - - def __init__( self, **kwd ): - Rgenetics.__init__( self, **kwd ) - self.add_composite_file( '%s.bim', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s.bed', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True ) - - -class Eigenstratgeno(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="eigenstratgeno" - - def __init__( self, **kwd ): - Rgenetics.__init__( self, **kwd ) - self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True ) - self.add_composite_file( '%s_fo.eigenstratgeno', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True ) - self.add_composite_file( '%s_fo.ind', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True ) - self.add_composite_file( '%s_fo.map', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True ) - self.add_composite_file( '%s_oo.eigenstratgeno', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True ) - self.add_composite_file( '%s_oo.ind', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True ) - self.add_composite_file( '%s_oo.map', substitute_name_with_metadata = 'base_name', optional = 'True', is_binary = True ) - - - -class Eigenstratpca(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="eigenstratpca" - -class Snptest(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections - """ - file_ext="snptest" - -class RexpBase( Html ): - """base class for BioC data structures in Galaxy - must be constructed with the pheno data in place since that - goes into the metadata for each instance""" - - """Add metadata elements""" - MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=False ) - MetadataElement( name="column_names", default=[], desc="Column names", readonly=True,visible=True ) - MetadataElement( name="base_name", - desc="base name for all transformed versions of this genetic dataset", readonly=True, default='galaxy', set_in_upload=True) - ### Do we really need these below? can we rely on dataset.extra_files_path: os.path.join( dataset.extra_files_path, '%s.phenodata' % dataset.metadata.base_name ) ? - ### Do these have a different purpose? Ross will need to clarify - ### Uploading these datatypes will not work until this is sorted out (set_peek fails)... - MetadataElement( name="pheno_path", - desc="Path to phenotype data for this experiment", readonly=True) - MetadataElement( name="pheno", - desc="Phenotype data for this experiment", readonly=True) - - file_ext = None - - is_binary = True - - allow_datatype_change = False - - composite_type = 'basic' - - def __init__( self, **kwd ): - Html.__init__( self, **kwd ) - self.add_composite_file( '%s.phenodata', substitute_name_with_metadata = 'base_name', is_binary = True ) - - def set_peek( self, dataset ): - """expects a .pheno file in the extra_files_dir - ugh - note that R is wierd and does not include the row.name in - the header. why?""" - p = file(dataset.metadata.pheno_path,'r').readlines() #this fails - head = p[0].strip().split('\t') - head.insert(0,'ChipFileName') # fix R write.table b0rken-ness - p[0] = '\t'.join(head) - p = '\n'.join(p) - dataset.peek = p - dataset.metadata.pheno = p - dataset.blurb = 'R loadable BioC expression object for the Rexpression Galaxy toolkit' - - # stolen from Tabular - # class Tabular( data.Text ): - """Tab delimited data""" - - """Add metadata elements""" - def init_meta( self, dataset, copy_from=None ): - if copy_from: - dataset.metadata = copy_from.metadata - - - #def set_readonly_meta( self, dataset, skip=0, **kwd ): - # """Resets the values of readonly metadata elements.""" - # RexpBase.set_meta( self, dataset, skip=skip ) - - def set_readonly_meta( self, dataset, **kwd ): - """Resets the values of readonly metadata elements.""" - RexpBase.set_meta( self, dataset ) - - #def set_meta( self, dataset, skip=0, **kwd ): - def set_meta( self, dataset, **kwd ): - - """ - NOTE we apply the tabular machinary to the phenodata extracted - from a BioC eSet or affybatch. - - """ - if not dataset.peek: - dataset.set_peek() - pk = dataset.peek # use the peek which is the pheno data insead of dataset (!) - ###this is probably not the best source, can we just access the raw data directly? - if pk: - p = pk.split('\n') - h = p[0].strip().split('\t') # hope is header - h = [escape(x) for x in h] - dataset.metadata.column_names = h - dataset.metadata.columns = len(h) - else: - dataset.metadata.column_names = [] - dataset.metadata.columns = 0 - - def make_html_table( self, dataset): - """Create HTML table, used for displaying peek""" - out = ['<table cellspacing="0" cellpadding="3">',] - try: - # Generate column header - pk = dataset.peek - p = pk.split('\n') - for i,row in enumerate(p): - lrow = row.strip().split('\t') - if i == 0: - orow = ['<th>%s</th>' % escape(x) for x in lrow] - orow.insert(0,'<tr>') - orow.append('</tr>') - else: - orow = ['<td>%s</td>' % escape(x) for x in lrow] - orow.insert(0,'<tr>') - orow.append('</tr>') - out.append(''.join(orow)) - out.append( '</table>' ) - out = "\n".join( out ) - except Exception, exc: - out = "Can't create peek %s" % str( exc ) - return out - - def display_peek( self, dataset ): - """Returns formatted html of peek""" - if not dataset.peek: - dataset.set_peek() - return self.make_html_table( dataset ) - - def get_mime(self): - """Returns the mime type of the datatype""" - return 'application/gzip' - - -class AffyBatch( RexpBase ): - """derived class for BioC data structures in Galaxy """ - file_ext = "affybatch" - - -class ESet( RexpBase ): - """derived class for BioC data structures in Galaxy """ - file_ext = "eset" - - -class MAList( RexpBase ): - """derived class for BioC data structures in Galaxy """ - file_ext = "malist" - - -if __name__ == '__main__': - import doctest, sys - doctest.testmod(sys.modules[__name__]) - +/opt/galaxy/tools/rgenetics/genetics.py \ No newline at end of file

1 0

[hg] galaxy 2865: Backed out changeset 037374950cc9
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/316b413c933b changeset: 2865:316b413c933b user: James Taylor <james(a)jamestaylor.org> date: Sat Oct 10 14:03:08 2009 -0400 description: Backed out changeset 037374950cc9 5 file(s) affected in this change: lib/galaxy/web/controllers/dataset.py lib/galaxy/web/controllers/root.py templates/dataset/large_file.mako templates/root/history_common.mako test/base/twilltestcase.py diffs (187 lines): diff -r 037374950cc9 -r 316b413c933b lib/galaxy/web/controllers/dataset.py --- a/lib/galaxy/web/controllers/dataset.py Fri Oct 09 16:59:35 2009 -0400 +++ b/lib/galaxy/web/controllers/dataset.py Sat Oct 10 14:03:08 2009 -0400 @@ -199,9 +199,8 @@ return 'This link may not be followed from within Galaxy.' @web.expose - def display(self, trans, encoded_id=None, show_all=False, to_ext=False, **kwd): + def display(self, trans, dataset_id=None, filename=None, show_all=False, **kwd): """Catches the dataset id and displays file contents as directed""" - dataset_id = trans.security.decode_id( encoded_id ) data = trans.app.model.HistoryDatasetAssociation.get( dataset_id ) if not data: raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) ) @@ -209,29 +208,27 @@ if trans.app.security_agent.can_access_dataset( roles, data.dataset ): if data.state == trans.model.Dataset.states.UPLOAD: return trans.show_error_message( "Please wait until this dataset finishes uploading before attempting to view it." ) - - mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() ) - trans.response.set_content_type(mime) - trans.log_event( "Display dataset id: %s" % str( dataset_id ) ) - - if to_ext: # Saving the file - trans.response.headers['Content-Length'] = int( os.stat( data.file_name ).st_size ) - if to_ext[0] != ".": - to_ext = "." + to_ext - valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - fname = data.name - fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150] - trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, to_ext) - return open( data.file_name ) + if filename is None or filename.lower() == "index": + file_path = data.file_name + mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() ) + trans.response.set_content_type(mime) + trans.log_event( "Display dataset id: %s" % str( dataset_id ) ) + + else: + file_path = os.path.join( data.extra_files_path, filename ) + mime, encoding = mimetypes.guess_type( file_path ) + if mime is None: + mime = trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( file_path )[-1] ) + trans.response.set_content_type( mime ) - if os.path.exists( data.file_name ): + if os.path.exists( file_path ): max_peek_size = 1000000 # 1 MB - if show_all or os.stat( data.file_name ).st_size < max_peek_size: - return open( data.file_name ) + if show_all or os.stat( file_path ).st_size < max_peek_size: + return open( file_path ) else: trans.response.set_content_type( "text/html" ) - return trans.stream_template_mako( "/dataset/large_file.mako", - truncated_data = open( data.file_name ).read(max_peek_size), + return trans.fill_template( "/dataset/large_file.mako", + truncated_data = open( file_path ).read(max_peek_size), data = data ) else: raise paste.httpexceptions.HTTPNotFound( "File Not Found (%s)." % ( filename ) ) diff -r 037374950cc9 -r 316b413c933b lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Fri Oct 09 16:59:35 2009 -0400 +++ b/lib/galaxy/web/controllers/root.py Sat Oct 10 14:03:08 2009 -0400 @@ -134,6 +134,72 @@ return rval ## ---- Dataset display / editing ---------------------------------------- + + @web.expose + def display( self, trans, id=None, hid=None, tofile=None, toext=".txt", **kwd ): + """ + Returns data directly into the browser. + Sets the mime-type according to the extension + """ + if hid is not None: + try: + hid = int( hid ) + except: + return "hid '%s' is invalid" %str( hid ) + history = trans.get_history() + for dataset in history.datasets: + if dataset.hid == hid: + data = dataset + break + else: + raise Exception( "No dataset with hid '%d'" % hid ) + else: + try: + data = self.app.model.HistoryDatasetAssociation.get( id ) + except: + return "Dataset id '%s' is invalid" %str( id ) + if data: + user, roles = trans.get_user_and_roles() + if trans.app.security_agent.can_access_dataset( roles, data.dataset ): + mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() ) + trans.response.set_content_type(mime) + if tofile: + fStat = os.stat(data.file_name) + trans.response.headers['Content-Length'] = int(fStat.st_size) + if toext[0:1] != ".": + toext = "." + toext + valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + fname = data.name + fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150] + trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, toext) + trans.log_event( "Display dataset id: %s" % str(id) ) + try: + return open( data.file_name ) + except: + return "This dataset contains no content" + else: + return "You are not allowed to access this dataset" + else: + return "No dataset with id '%s'" % str( id ) + + @web.expose + def display_child(self, trans, parent_id=None, designation=None, tofile=None, toext=".txt"): + """ + Returns child data directly into the browser, based upon parent_id and designation. + """ + try: + data = self.app.model.HistoryDatasetAssociation.get( parent_id ) + if data: + child = data.get_child_by_designation( designation ) + if child: + user, roles = trans.get_user_and_roles() + if trans.app.security_agent.can_access_dataset( roles, child ): + return self.display( trans, id=child.id, tofile=tofile, toext=toext ) + else: + return "You are not privileged to access this dataset." + except Exception: + pass + return "A child named %s could not be found for data %s" % ( designation, parent_id ) @web.expose def display_as( self, trans, id=None, display_app=None, **kwd ): diff -r 037374950cc9 -r 316b413c933b templates/dataset/large_file.mako --- a/templates/dataset/large_file.mako Fri Oct 09 16:59:35 2009 -0400 +++ b/templates/dataset/large_file.mako Sat Oct 10 14:03:08 2009 -0400 @@ -2,8 +2,8 @@ <div class="warningmessagelarge"> This dataset is large and only the first megabyte is shown below.<br /> - <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), show_all=True )}">Show all</a> | - <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">Save</a> + <a href="${h.url_for( controller='dataset', action='display', dataset_id=data.id, show_all=True )}">Show all</a> | + <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}">Save</a> </div> <pre> diff -r 037374950cc9 -r 316b413c933b templates/root/history_common.mako --- a/templates/root/history_common.mako Fri Oct 09 16:59:35 2009 -0400 +++ b/templates/root/history_common.mako Sat Oct 10 14:03:08 2009 -0400 @@ -32,7 +32,7 @@ <img src="${h.url_for('/static/images/pencil_icon_grey.png')}" width='16' height='16' alt='edit attributes' title='edit attributes' class='button edit' border='0'> %endif %else: - <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ))}" target="galaxy_main"></a> + <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', dataset_id=data.id, action='display', filename='index')}" target="galaxy_main"></a> %if user_owns_dataset: <a class="icon-button edit" title="edit attributes" href="${h.url_for( controller='root', action='edit', id=data.id )}" target="galaxy_main"></a> %endif @@ -86,7 +86,7 @@ <div class="info">${_('Info: ')}${data.display_info()}</div> <div> %if data.has_data: - <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">save</a> + <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}" target="_blank">save</a> %if user_owns_dataset: | <a href="${h.url_for( controller='tool_runner', action='rerun', id=data.id )}" target="galaxy_main">rerun</a> %endif diff -r 037374950cc9 -r 316b413c933b test/base/twilltestcase.py --- a/test/base/twilltestcase.py Fri Oct 09 16:59:35 2009 -0400 +++ b/test/base/twilltestcase.py Sat Oct 10 14:03:08 2009 -0400 @@ -390,9 +390,9 @@ self.visit_url( "%s/dataset/undelete?id=%s" % ( self.url, hda_id ) ) if check_str: self.check_page_for_string( check_str ) - def display_history_item( self, hda_id, check_str='' ): + def display_history_item( self, id, check_str='' ): """Displays a history item - simulates eye icon click""" - self.visit_url( '%s/dataset/display?encoded_id=%s' % ( self.url, self.security.encode_id( hda_id ) ) ) + self.visit_url( '%s/datasets/%s/display/index' % ( self.url, id ) ) if check_str: self.check_page_for_string( check_str ) self.home()

1 0

[hg] galaxy 2864: Encode HDA ids when peeking and saving. Fixes ...
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/037374950cc9 changeset: 2864:037374950cc9 user: Kanwei Li <kanwei(a)gmail.com> date: Fri Oct 09 16:59:35 2009 -0400 description: Encode HDA ids when peeking and saving. Fixes #110 5 file(s) affected in this change: lib/galaxy/web/controllers/dataset.py lib/galaxy/web/controllers/root.py templates/dataset/large_file.mako templates/root/history_common.mako test/base/twilltestcase.py diffs (187 lines): diff -r 5d63e4eee122 -r 037374950cc9 lib/galaxy/web/controllers/dataset.py --- a/lib/galaxy/web/controllers/dataset.py Fri Oct 09 16:02:31 2009 -0400 +++ b/lib/galaxy/web/controllers/dataset.py Fri Oct 09 16:59:35 2009 -0400 @@ -199,8 +199,9 @@ return 'This link may not be followed from within Galaxy.' @web.expose - def display(self, trans, dataset_id=None, filename=None, show_all=False, **kwd): + def display(self, trans, encoded_id=None, show_all=False, to_ext=False, **kwd): """Catches the dataset id and displays file contents as directed""" + dataset_id = trans.security.decode_id( encoded_id ) data = trans.app.model.HistoryDatasetAssociation.get( dataset_id ) if not data: raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) ) @@ -208,27 +209,29 @@ if trans.app.security_agent.can_access_dataset( roles, data.dataset ): if data.state == trans.model.Dataset.states.UPLOAD: return trans.show_error_message( "Please wait until this dataset finishes uploading before attempting to view it." ) - if filename is None or filename.lower() == "index": - file_path = data.file_name - mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() ) - trans.response.set_content_type(mime) - trans.log_event( "Display dataset id: %s" % str( dataset_id ) ) - - else: - file_path = os.path.join( data.extra_files_path, filename ) - mime, encoding = mimetypes.guess_type( file_path ) - if mime is None: - mime = trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( file_path )[-1] ) - trans.response.set_content_type( mime ) + + mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() ) + trans.response.set_content_type(mime) + trans.log_event( "Display dataset id: %s" % str( dataset_id ) ) + + if to_ext: # Saving the file + trans.response.headers['Content-Length'] = int( os.stat( data.file_name ).st_size ) + if to_ext[0] != ".": + to_ext = "." + to_ext + valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + fname = data.name + fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150] + trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, to_ext) + return open( data.file_name ) - if os.path.exists( file_path ): + if os.path.exists( data.file_name ): max_peek_size = 1000000 # 1 MB - if show_all or os.stat( file_path ).st_size < max_peek_size: - return open( file_path ) + if show_all or os.stat( data.file_name ).st_size < max_peek_size: + return open( data.file_name ) else: trans.response.set_content_type( "text/html" ) - return trans.fill_template( "/dataset/large_file.mako", - truncated_data = open( file_path ).read(max_peek_size), + return trans.stream_template_mako( "/dataset/large_file.mako", + truncated_data = open( data.file_name ).read(max_peek_size), data = data ) else: raise paste.httpexceptions.HTTPNotFound( "File Not Found (%s)." % ( filename ) ) diff -r 5d63e4eee122 -r 037374950cc9 lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Fri Oct 09 16:02:31 2009 -0400 +++ b/lib/galaxy/web/controllers/root.py Fri Oct 09 16:59:35 2009 -0400 @@ -134,72 +134,6 @@ return rval ## ---- Dataset display / editing ---------------------------------------- - - @web.expose - def display( self, trans, id=None, hid=None, tofile=None, toext=".txt", **kwd ): - """ - Returns data directly into the browser. - Sets the mime-type according to the extension - """ - if hid is not None: - try: - hid = int( hid ) - except: - return "hid '%s' is invalid" %str( hid ) - history = trans.get_history() - for dataset in history.datasets: - if dataset.hid == hid: - data = dataset - break - else: - raise Exception( "No dataset with hid '%d'" % hid ) - else: - try: - data = self.app.model.HistoryDatasetAssociation.get( id ) - except: - return "Dataset id '%s' is invalid" %str( id ) - if data: - user, roles = trans.get_user_and_roles() - if trans.app.security_agent.can_access_dataset( roles, data.dataset ): - mime = trans.app.datatypes_registry.get_mimetype_by_extension( data.extension.lower() ) - trans.response.set_content_type(mime) - if tofile: - fStat = os.stat(data.file_name) - trans.response.headers['Content-Length'] = int(fStat.st_size) - if toext[0:1] != ".": - toext = "." + toext - valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - fname = data.name - fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150] - trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, toext) - trans.log_event( "Display dataset id: %s" % str(id) ) - try: - return open( data.file_name ) - except: - return "This dataset contains no content" - else: - return "You are not allowed to access this dataset" - else: - return "No dataset with id '%s'" % str( id ) - - @web.expose - def display_child(self, trans, parent_id=None, designation=None, tofile=None, toext=".txt"): - """ - Returns child data directly into the browser, based upon parent_id and designation. - """ - try: - data = self.app.model.HistoryDatasetAssociation.get( parent_id ) - if data: - child = data.get_child_by_designation( designation ) - if child: - user, roles = trans.get_user_and_roles() - if trans.app.security_agent.can_access_dataset( roles, child ): - return self.display( trans, id=child.id, tofile=tofile, toext=toext ) - else: - return "You are not privileged to access this dataset." - except Exception: - pass - return "A child named %s could not be found for data %s" % ( designation, parent_id ) @web.expose def display_as( self, trans, id=None, display_app=None, **kwd ): diff -r 5d63e4eee122 -r 037374950cc9 templates/dataset/large_file.mako --- a/templates/dataset/large_file.mako Fri Oct 09 16:02:31 2009 -0400 +++ b/templates/dataset/large_file.mako Fri Oct 09 16:59:35 2009 -0400 @@ -2,8 +2,8 @@ <div class="warningmessagelarge"> This dataset is large and only the first megabyte is shown below.<br /> - <a href="${h.url_for( controller='dataset', action='display', dataset_id=data.id, show_all=True )}">Show all</a> | - <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}">Save</a> + <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), show_all=True )}">Show all</a> | + <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">Save</a> </div> <pre> diff -r 5d63e4eee122 -r 037374950cc9 templates/root/history_common.mako --- a/templates/root/history_common.mako Fri Oct 09 16:02:31 2009 -0400 +++ b/templates/root/history_common.mako Fri Oct 09 16:59:35 2009 -0400 @@ -32,7 +32,7 @@ <img src="${h.url_for('/static/images/pencil_icon_grey.png')}" width='16' height='16' alt='edit attributes' title='edit attributes' class='button edit' border='0'> %endif %else: - <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', dataset_id=data.id, action='display', filename='index')}" target="galaxy_main"></a> + <a class="icon-button display" title="display data" href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ))}" target="galaxy_main"></a> %if user_owns_dataset: <a class="icon-button edit" title="edit attributes" href="${h.url_for( controller='root', action='edit', id=data.id )}" target="galaxy_main"></a> %endif @@ -86,7 +86,7 @@ <div class="info">${_('Info: ')}${data.display_info()}</div> <div> %if data.has_data: - <a href="${h.url_for( controller='root', action='display', id=data.id, tofile='yes', toext=data.ext )}" target="_blank">save</a> + <a href="${h.url_for( controller='dataset', action='display', encoded_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">save</a> %if user_owns_dataset: | <a href="${h.url_for( controller='tool_runner', action='rerun', id=data.id )}" target="galaxy_main">rerun</a> %endif diff -r 5d63e4eee122 -r 037374950cc9 test/base/twilltestcase.py --- a/test/base/twilltestcase.py Fri Oct 09 16:02:31 2009 -0400 +++ b/test/base/twilltestcase.py Fri Oct 09 16:59:35 2009 -0400 @@ -390,9 +390,9 @@ self.visit_url( "%s/dataset/undelete?id=%s" % ( self.url, hda_id ) ) if check_str: self.check_page_for_string( check_str ) - def display_history_item( self, id, check_str='' ): + def display_history_item( self, hda_id, check_str='' ): """Displays a history item - simulates eye icon click""" - self.visit_url( '%s/datasets/%s/display/index' % ( self.url, id ) ) + self.visit_url( '%s/dataset/display?encoded_id=%s' % ( self.url, self.security.encode_id( hda_id ) ) ) if check_str: self.check_page_for_string( check_str ) self.home()

1 0

[hg] galaxy 2863: Proper title for pages
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/5d63e4eee122 changeset: 2863:5d63e4eee122 user: James Taylor <james(a)jamestaylor.org> date: Fri Oct 09 16:02:31 2009 -0400 description: Proper title for pages 1 file(s) affected in this change: templates/page/display.mako diffs (19 lines): diff -r 205a6782d9ae -r 5d63e4eee122 templates/page/display.mako --- a/templates/page/display.mako Fri Oct 09 14:47:54 2009 -0400 +++ b/templates/page/display.mako Fri Oct 09 16:02:31 2009 -0400 @@ -1,4 +1,6 @@ <%inherit file="/base_panels.mako"/> + +<%def name="title()">Galaxy :: ${page.user.username} :: ${page.title}</%def> <%def name="init()"> <% @@ -14,7 +16,7 @@ <div class="unified-panel-header" unselectable="on"> <div class="unified-panel-header-inner"> - ${page.user.username} / ${page.title} + ${page.user.username} :: ${page.title} </div> </div>

1 0

[hg] galaxy 2866: Trimmer fix for fastq processing
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/080b3c44963e changeset: 2866:080b3c44963e user: Anton Nekrutenko <anton(a)bx.psu.edu> date: Sat Oct 10 16:18:08 2009 -0400 description: Trimmer fix for fastq processing 1 file(s) affected in this change: tools/filters/trimmer.py diffs (14 lines): diff -r 316b413c933b -r 080b3c44963e tools/filters/trimmer.py --- a/tools/filters/trimmer.py Sat Oct 10 14:03:08 2009 -0400 +++ b/tools/filters/trimmer.py Sat Oct 10 16:18:08 2009 -0400 @@ -83,9 +83,7 @@ print line continue - if options.fastq and line.startswith('@'): - stop_err('Malformed fastq file: even numbered line starts with @') - + if line[0] not in invalid_starts: if col == 0: if int( options.end ) > 0:

1 0

[hg] galaxy 2862: Increase number of datasets/page on HDA grid.
by Greg Von Kuster 13 Oct '09

13 Oct '09

details: http://www.bx.psu.edu/hg/galaxy/rev/205a6782d9ae changeset: 2862:205a6782d9ae user: jeremy goecks <jeremy.goecks at emory.edu> date: Fri Oct 09 14:47:54 2009 -0400 description: Increase number of datasets/page on HDA grid. 1 file(s) affected in this change: lib/galaxy/web/controllers/dataset.py diffs (21 lines): diff -r c7dd346c20d5 -r 205a6782d9ae lib/galaxy/web/controllers/dataset.py --- a/lib/galaxy/web/controllers/dataset.py Thu Oct 08 22:01:36 2009 -0400 +++ b/lib/galaxy/web/controllers/dataset.py Fri Oct 09 14:47:54 2009 -0400 @@ -107,7 +107,7 @@ return accepted_filters # Grid definition - title = "Stored datasets" + title = "Stored Datasets" model_class = model.HistoryDatasetAssociation template='/dataset/grid.mako' default_sort_key = "-create_time" @@ -125,7 +125,7 @@ default_filter = dict( deleted="False", tags="All" ) preserve_state = False use_paging = True - num_rows_per_page = 10 + num_rows_per_page = 50 def apply_default_filter( self, trans, query, **kwargs ): # This is a somewhat obtuse way to join the History and HDA tables. However, it's necessary # because the initial query in build_initial_query is specificied on the HDA table (this is reasonable)

1 0