From greg@bx.psu.edu Tue Oct 13 15:23:26 2009 From: Greg Von Kuster To: galaxy-dev@lists.galaxyproject.org Subject: [galaxy-dev] [hg] galaxy 2870: symlinks are not very useful Date: Tue, 13 Oct 2009 15:23:16 -0400 Message-ID: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============7969039957994300040==" --===============7969039957994300040== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable details: http://www.bx.psu.edu/hg/galaxy/rev/1a0bb7d6897c changeset: 2870:1a0bb7d6897c user: Ross Lazarus date: Sun Oct 11 15:07:11 2009 -0400 description: symlinks are not very useful 6 file(s) affected in this change: lib/galaxy/datatypes/converters/lped_to_fped_converter.py lib/galaxy/datatypes/converters/lped_to_fped_converter.xml lib/galaxy/datatypes/converters/lped_to_pbed_converter.py lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml lib/galaxy/datatypes/converters/pbed_to_lped_converter.py lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml diffs (383 lines): diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_= fped_converter.py --- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15= :00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.py Sun Oct 11 15= :07:11 2009 -0400 @@ -1,1 +1,110 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.py \ No newline at end of file +# for rgenetics - lped to fbat +# recode to numeric fbat version +# much slower so best to always +# use numeric alleles internally + +import sys,os,time + + +prog =3D os.path.split(sys.argv[0])[-1] +myversion =3D 'Oct 10 2009' + +galhtmlprefix =3D """ + + + + + + + + + +
+""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + +def rgConv(inpedfilepath,outhtmlname,outfilepath): + """convert linkage ped/map to fbat""" =20 + recode=3D{'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2',= '3':'3','4':'4'} + basename =3D os.path.split(inpedfilepath)[-1] # get basename + inmap =3D '%s.map' % inpedfilepath + inped =3D '%s.ped' % inpedfilepath + outf =3D '%s.ped' % basename # note the fbat exe insists that this is the = extension for the ped data + outfpath =3D os.path.join(outfilepath,outf) # where to write the fbat form= at file to + try: + mf =3D file(inmap,'r') + except: + sys.stderr.write('%s cannot open inmap file %s - do you have permission?= \n' % (prog,inmap)) + sys.exit(1) + try: + rsl =3D [x.split()[1] for x in mf] + except: + sys.stderr.write('## cannot parse %s' % inmap) + sys.exit(1) + try: + os.makedirs(outfilepath) + except: + pass # already exists + head =3D ' '.join(rsl) # list of rs numbers + # TODO add anno to rs but fbat will prolly barf? + pedf =3D file(inped,'r') + o =3D file(outfpath,'w',2**20) + o.write(head) + o.write('\n') + for i,row in enumerate(pedf): + if i =3D=3D 0: + lrow =3D row.split() + try: + x =3D [int(x) for x in lrow[10:50]] # look for non numeric codes + except: + dorecode =3D 1 + if dorecode: + lrow =3D row.strip().split() + p =3D lrow[:6] + g =3D lrow[6:] + gc =3D [recode.get(x,'0') for x in g] + lrow =3D p+gc + row =3D '%s\n' % ' '.join(lrow) + o.write(row) + o.close() + + +def main(): + """call fbater + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + rg_convert_lped_fped.py '$input1/$input1= .metadata.base_name' + '$output1' '$output1.extra_files_path' + + """ + nparm =3D 3 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (= prog,sys.argv,nparm)) + sys.exit(1) + inpedfilepath =3D sys.argv[1] + outhtmlname =3D sys.argv[2] + outfilepath =3D sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + rgConv(inpedfilepath,outhtmlname,outfilepath) + f =3D file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist =3D os.listdir(outfilepath) + print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,ti= menow()) # becomes info + f.write('
## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n' % (prog,timenow())) + for i, data in enumerate( flist ): + f.write('
  • %s
  • \n' % (os.path.split(data)[-1= ],os.path.split(data)[-1])) + f.write("
    ") + f.close() + + =20 + +if __name__ =3D=3D "__main__": + main() diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_= fped_converter.xml --- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 1= 5:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.xml Sun Oct 11 1= 5:07:11 2009 -0400 @@ -1,1 +1,15 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_fped_converter.xml \ No newline at end of file + + + + + lped_to_fped_converter.py '$input1.extra_files_path/$input1.metadata.base= _name' '$output1' '$output1.extra_files_path' + + + + + + + + + + diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_= pbed_converter.py --- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15= :00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py Sun Oct 11 15= :07:11 2009 -0400 @@ -1,1 +1,110 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.py \ No newline at end of file +# for rgenetics - lped to pbed +# where to stop with converters +# pbed might be central +# eg lped/eigen/fbat/snpmatrix all to pbed +# and pbed to lped/eigen/fbat/snpmatrix ? +# that's a lot of converters +import sys,os,time,subprocess + + +prog =3D os.path.split(sys.argv[0])[-1] +myversion =3D 'Oct 10 2009' + +galhtmlprefix =3D """ + + + + + + + + + +
    +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +def getMissval(inped=3D''): + """=20 + read some lines...ugly hack - try to guess missing value + should be N or 0 but might be . or - + """ + commonmissvals =3D {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'} + try: + f =3D file(inped,'r') + except: + return None # signal no in file + missval =3D None + while missval =3D=3D None: # doggedly continue until we solve the mystery + try: + l =3D f.readline() + except: + break=20 + ll =3D l.split()[6:] # ignore pedigree stuff + for c in ll: + if commonmissvals.get(c,None): + missval =3D c=20 + f.close() + return missval + if not missval:=20 + missval =3D 'N' # punt + close(f)=20 + return missval + =20 +def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): + """ + """ + pedf =3D '%s.ped' % inpedfilepath + basename =3D os.path.split(inpedfilepath)[-1] # get basename + outroot =3D os.path.join(outfilepath,basename) + missval =3D getMissval(inped =3D pedf) + if not missval: + print '### lped_to_pbed_converter.py cannot identify missing value i= n %s' % pedf + missval =3D '0' + cl =3D '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' = % (plink,inpedfilepath,outroot,missval) + p =3D subprocess.Popen(cl,shell=3DTrue,cwd=3Doutfilepath) + retval =3D p.wait() # run plink + + + + +def main(): + """ + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + lped_to_pbed_converter.py '$input1/$inpu= t1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/= plink' + + """ + nparm =3D 4 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (= prog,sys.argv,nparm)) + sys.exit(1) + inpedfilepath =3D sys.argv[1] + outhtmlname =3D sys.argv[2] + outfilepath =3D sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + plink =3D sys.argv[4] + rgConv(inpedfilepath,outhtmlname,outfilepath,plink) + f =3D file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist =3D os.listdir(outfilepath) + s =3D '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,ti= menow()) # becomes info + print s + f.write('
    %s\n
      ' % (s)) + for i, data in enumerate( flist ): + f.write('
    1. %s
    2. \n' % (os.path.split(data)[-1= ],os.path.split(data)[-1])) + f.write("
    ") + f.close() + + =20 + +if __name__ =3D=3D "__main__": + main() diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/lped_to_= pbed_converter.xml --- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 1= 5:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.xml Sun Oct 11 1= 5:07:11 2009 -0400 @@ -1,1 +1,16 @@ -/opt/galaxy/tools/rgenetics/converters/lped_to_pbed_converter.xml \ No newline at end of file + + + + + lped_to_pbed_converter.py '$input1.extra_files_path/$input1.metadata.base= _name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/p= link' + + + + + + + + + + diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/pbed_to_= lped_converter.py --- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15= :00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py Sun Oct 11 15= :07:11 2009 -0400 @@ -1,1 +1,80 @@ -/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.py \ No newline at end of file +# for rgenetics - lped to pbed +# where to stop with converters +# pbed might be central +# eg lped/eigen/fbat/snpmatrix all to pbed +# and pbed to lped/eigen/fbat/snpmatrix ? +# that's a lot of converters +import sys,os,time,subprocess + + +prog =3D os.path.split(sys.argv[0])[-1] +myversion =3D 'Oct 10 2009' + +galhtmlprefix =3D """ + + + + + + + + + +
    +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + =20 +def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): + """ + """ + =20 + basename =3D os.path.split(inpedfilepath)[-1] # get basename + outroot =3D os.path.join(outfilepath,basename) + cl =3D '%s --noweb --bfile %s --recode --out %s ' % (plink,inpedfilepath= ,outroot) + p =3D subprocess.Popen(cl,shell=3DTrue,cwd=3Doutfilepath) + retval =3D p.wait() # run plink + + + + +def main(): + """ + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + pbed_to_lped_converter.py '$input1/$inpu= t1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/= plink' + + """ + nparm =3D 4 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (= myname,sys.argv,nparm)) + sys.exit(1) + inpedfilepath =3D sys.argv[1] + outhtmlname =3D sys.argv[2] + outfilepath =3D sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + plink =3D sys.argv[4] + rgConv(inpedfilepath,outhtmlname,outfilepath,plink) + f =3D file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist =3D os.listdir(outfilepath) + s =3D '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,ti= menow()) # becomes info + print s + f.write('
    %s\n
      ' % (s)) + for i, data in enumerate( flist ): + f.write('
    1. %s
    2. \n' % (os.path.split(data)[-1= ],os.path.split(data)[-1])) + f.write("
    ") + f.close() + + =20 + +if __name__ =3D=3D "__main__": + main() diff -r 639c6b5e0073 -r 1a0bb7d6897c lib/galaxy/datatypes/converters/pbed_to_= lped_converter.xml --- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 1= 5:00:40 2009 -0400 +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.xml Sun Oct 11 1= 5:07:11 2009 -0400 @@ -1,1 +1,16 @@ -/opt/galaxy/tools/rgenetics/converters/pbed_to_lped_converter.xml \ No newline at end of file + + + + + pbed_to_lped_converter.py '$input1.extra_files_path/$input1.metadata.base= _name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/p= link' + + + + + + + + + + --===============7969039957994300040==--