details: http://www.bx.psu.edu/hg/galaxy/rev/d16f28f312a9 changeset: 3031:d16f28f312a9 user: jeremy goecks <jeremy.goecks@emory.edu> date: Fri Nov 13 16:35:07 2009 -0500 description: Merge diffstat: datatypes_conf.xml.sample | 431 ++++++++++++++++++++++--------------------- lib/galaxy/datatypes/genetics.py | 119 ++++++----- tools/data_source/upload.xml | 6 + 3 files changed, 294 insertions(+), 262 deletions(-) diffs (805 lines): diff -r a5bdbae15600 -r d16f28f312a9 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Fri Nov 13 16:34:32 2009 -0500 +++ b/datatypes_conf.xml.sample Fri Nov 13 16:35:07 2009 -0500 @@ -1,213 +1,224 @@ <?xml version="1.0"?> <datatypes> - <registration converters_path="lib/galaxy/datatypes/converters"> - <datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true"/> - <datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true"/> - <datatype extension="bam" type="galaxy.datatypes.binary:Bam" mimetype="application/octet-stream"/> - <datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true"> - <converter file="bed_to_gff_converter.xml" target_datatype="gff"/> - <converter file="interval_to_coverage.xml" target_datatype="coverage"/> - <converter file="bed_to_interval_index_converter.xml" target_datatype="interval_index"/> - </datatype> - <datatype extension="binseq.zip" type="galaxy.datatypes.binary:Binseq" mimetype="application/zip" display_in_upload="true"/> - <datatype extension="len" type="galaxy.datatypes.chrominfo:ChromInfo" display_in_upload="true"> - <!-- no converters yet --> - </datatype> - <datatype extension="coverage" type="galaxy.datatypes.coverage:LastzCoverage" display_in_upload="true"> - <indexer file="coverage.xml" /> - </datatype> - <datatype extension="customtrack" type="galaxy.datatypes.interval:CustomTrack"/> - <datatype extension="csfasta" type="galaxy.datatypes.sequence:csFasta" display_in_upload="true"/> - <datatype extension="data" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream"/> - <datatype extension="fasta" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true"> - <converter file="fasta_to_tabular_converter.xml" target_datatype="tabular"/> - </datatype> - <datatype extension="fastq" type="galaxy.datatypes.sequence:Fastq" display_in_upload="true"/> - <datatype extension="fastqsanger" type="galaxy.datatypes.sequence:FastqSanger" display_in_upload="true"/> - <datatype extension="genetrack" type="galaxy.datatypes.tracks:GeneTrack"/> - <datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"> - <converter file="gff_to_bed_converter.xml" target_datatype="bed"/> - </datatype> - <datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/> - <datatype extension="gif" type="galaxy.datatypes.images:Image" mimetype="image/gif"/> - <datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/> - <datatype extension="html" type="galaxy.datatypes.images:Html" mimetype="text/html"/> - <datatype extension="interval" type="galaxy.datatypes.interval:Interval" display_in_upload="true"> - <converter file="interval_to_bed_converter.xml" target_datatype="bed"/> - <indexer file="interval_awk.xml" /> - </datatype> - <datatype extension="jpg" type="galaxy.datatypes.images:Image" mimetype="image/jpeg"/> - <datatype extension="laj" type="galaxy.datatypes.images:Laj"/> - <datatype extension="lav" type="galaxy.datatypes.sequence:Lav" display_in_upload="true"/> - <datatype extension="maf" type="galaxy.datatypes.sequence:Maf" display_in_upload="true"> - <converter file="maf_to_fasta_converter.xml" target_datatype="fasta"/> - <converter file="maf_to_interval_converter.xml" target_datatype="interval"/> - </datatype> - <datatype extension="pdf" type="galaxy.datatypes.images:Image" mimetype="application/pdf"/> - <datatype extension="png" type="galaxy.datatypes.images:Image" mimetype="image/png"/> - <datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/> - <datatype extension="qualsolid" type="galaxy.datatypes.qualityscore:QualityScoreSOLiD" display_in_upload="true"/> - <datatype extension="qual454" type="galaxy.datatypes.qualityscore:QualityScore454" display_in_upload="true"/> - <datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"/> - <datatype extension="scf" type="galaxy.datatypes.binary:Scf" mimetype="application/octet-stream" display_in_upload="true"/> - <datatype extension="sff" type="galaxy.datatypes.binary:Sff" mimetype="application/octet-stream" display_in_upload="true"/> - <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/> - <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/> - <datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/> - <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" display_in_upload="true"/> - <datatype extension="txtseq.zip" type="galaxy.datatypes.data:Txtseq" mimetype="application/zip" display_in_upload="true"/> - <datatype extension="wig" type="galaxy.datatypes.interval:Wiggle" display_in_upload="true"> - <converter file="wiggle_to_array_tree_converter.xml" target_datatype="array_tree"/> - </datatype> - <datatype extension="array_tree" type="galaxy.datatypes.data:Data" /> - <datatype extension="interval_index" type="galaxy.datatypes.data:Data" /> - <!-- EMBOSS TOOLS --> - <datatype extension="acedb" type="galaxy.datatypes.data:Text"/> - <datatype extension="asn1" type="galaxy.datatypes.data:Text"/> - <datatype extension="btwisted" type="galaxy.datatypes.data:Text"/> - <datatype extension="cai" type="galaxy.datatypes.data:Text"/> - <datatype extension="charge" type="galaxy.datatypes.data:Text"/> - <datatype extension="checktrans" type="galaxy.datatypes.data:Text"/> - <datatype extension="chips" type="galaxy.datatypes.data:Text"/> - <datatype extension="clustal" type="galaxy.datatypes.data:Text"/> - <datatype extension="codata" type="galaxy.datatypes.data:Text"/> - <datatype extension="codcmp" type="galaxy.datatypes.data:Text"/> - <datatype extension="coderet" type="galaxy.datatypes.data:Text"/> - <datatype extension="compseq" type="galaxy.datatypes.data:Text"/> - <datatype extension="cpgplot" type="galaxy.datatypes.data:Text"/> - <datatype extension="cpgreport" type="galaxy.datatypes.data:Text"/> - <datatype extension="cusp" type="galaxy.datatypes.data:Text"/> - <datatype extension="cut" type="galaxy.datatypes.data:Text"/> - <datatype extension="dan" type="galaxy.datatypes.data:Text"/> - <datatype extension="dbmotif" type="galaxy.datatypes.data:Text"/> - <datatype extension="diffseq" type="galaxy.datatypes.data:Text"/> - <datatype extension="digest" type="galaxy.datatypes.data:Text"/> - <datatype extension="dreg" type="galaxy.datatypes.data:Text"/> - <datatype extension="einverted" type="galaxy.datatypes.data:Text"/> - <datatype extension="embl" type="galaxy.datatypes.data:Text"/> - <datatype extension="epestfind" type="galaxy.datatypes.data:Text"/> - <datatype extension="equicktandem" type="galaxy.datatypes.data:Text"/> - <datatype extension="est2genome" type="galaxy.datatypes.data:Text"/> - <datatype extension="etandem" type="galaxy.datatypes.data:Text"/> - <datatype extension="excel" type="galaxy.datatypes.data:Text"/> - <datatype extension="feattable" type="galaxy.datatypes.data:Text"/> - <datatype extension="fitch" type="galaxy.datatypes.data:Text"/> - <datatype extension="freak" type="galaxy.datatypes.data:Text"/> - <datatype extension="fuzznuc" type="galaxy.datatypes.data:Text"/> - <datatype extension="fuzzpro" type="galaxy.datatypes.data:Text"/> - <datatype extension="fuzztran" type="galaxy.datatypes.data:Text"/> - <datatype extension="garnier" type="galaxy.datatypes.data:Text"/> - <datatype extension="gcg" type="galaxy.datatypes.data:Text"/> - <datatype extension="geecee" type="galaxy.datatypes.data:Text"/> - <datatype extension="genbank" type="galaxy.datatypes.data:Text"/> - <datatype extension="helixturnhelix" type="galaxy.datatypes.data:Text"/> - <datatype extension="hennig86" type="galaxy.datatypes.data:Text"/> - <datatype extension="hmoment" type="galaxy.datatypes.data:Text"/> - <datatype extension="ig" type="galaxy.datatypes.data:Text"/> - <datatype extension="isochore" type="galaxy.datatypes.data:Text"/> - <datatype extension="jackknifer" type="galaxy.datatypes.data:Text"/> - <datatype extension="jackknifernon" type="galaxy.datatypes.data:Text"/> - <datatype extension="markx10" type="galaxy.datatypes.data:Text"/> - <datatype extension="markx1" type="galaxy.datatypes.data:Text"/> - <datatype extension="markx0" type="galaxy.datatypes.data:Text"/> - <datatype extension="markx3" type="galaxy.datatypes.data:Text"/> - <datatype extension="markx2" type="galaxy.datatypes.data:Text"/> - <datatype extension="match" type="galaxy.datatypes.data:Text"/> - <datatype extension="mega" type="galaxy.datatypes.data:Text"/> - <datatype extension="meganon" type="galaxy.datatypes.data:Text"/> - <datatype extension="motif" type="galaxy.datatypes.data:Text"/> - <datatype extension="msf" type="galaxy.datatypes.data:Text"/> - <datatype extension="nametable" type="galaxy.datatypes.data:Text"/> - <datatype extension="ncbi" type="galaxy.datatypes.data:Text"/> - <datatype extension="needle" type="galaxy.datatypes.data:Text"/> - <datatype extension="newcpgreport" type="galaxy.datatypes.data:Text"/> - <datatype extension="newcpgseek" type="galaxy.datatypes.data:Text"/> - <datatype extension="nexus" type="galaxy.datatypes.data:Text"/> - <datatype extension="nexusnon" type="galaxy.datatypes.data:Text"/> - <datatype extension="noreturn" type="galaxy.datatypes.data:Text"/> - <datatype extension="pair" type="galaxy.datatypes.data:Text"/> - <datatype extension="palindrome" type="galaxy.datatypes.data:Text"/> - <datatype extension="pepcoil" type="galaxy.datatypes.data:Text"/> - <datatype extension="pepinfo" type="galaxy.datatypes.data:Text"/> - <datatype extension="pepstats" type="galaxy.datatypes.data:Text"/> - <datatype extension="phylip" type="galaxy.datatypes.data:Text"/> - <datatype extension="phylipnon" type="galaxy.datatypes.data:Text"/> - <datatype extension="pir" type="galaxy.datatypes.data:Text"/> - <datatype extension="polydot" type="galaxy.datatypes.data:Text"/> - <datatype extension="preg" type="galaxy.datatypes.data:Text"/> - <datatype extension="prettyseq" type="galaxy.datatypes.data:Text"/> - <datatype extension="primersearch" type="galaxy.datatypes.data:Text"/> - <datatype extension="regions" type="galaxy.datatypes.data:Text"/> - <datatype extension="score" type="galaxy.datatypes.data:Text"/> - <datatype extension="selex" type="galaxy.datatypes.data:Text"/> - <datatype extension="seqtable" type="galaxy.datatypes.data:Text"/> - <datatype extension="showfeat" type="galaxy.datatypes.data:Text"/> - <datatype extension="showorf" type="galaxy.datatypes.data:Text"/> - <datatype extension="simple" type="galaxy.datatypes.data:Text"/> - <datatype extension="sixpack" type="galaxy.datatypes.data:Text"/> - <datatype extension="srs" type="galaxy.datatypes.data:Text"/> - <datatype extension="srspair" type="galaxy.datatypes.data:Text"/> - <datatype extension="staden" type="galaxy.datatypes.data:Text"/> - <datatype extension="strider" type="galaxy.datatypes.data:Text"/> - <datatype extension="supermatcher" type="galaxy.datatypes.data:Text"/> - <datatype extension="swiss" type="galaxy.datatypes.data:Text"/> - <datatype extension="syco" type="galaxy.datatypes.data:Text"/> - <datatype extension="table" type="galaxy.datatypes.data:Text"/> - <datatype extension="textsearch" type="galaxy.datatypes.data:Text"/> - <datatype extension="vectorstrip" type="galaxy.datatypes.data:Text"/> - <datatype extension="wobble" type="galaxy.datatypes.data:Text"/> - <datatype extension="wordcount" type="galaxy.datatypes.data:Text"/> - <datatype extension="tagseq" type="galaxy.datatypes.data:Text"/> - <!-- Start RGenetics Datatypes --> - <!-- genome graphs ucsc file - first col is always marker then numeric values to plot --> - <datatype extension="gg" type="galaxy.datatypes.genetics:GenomeGraphs"/> - <datatype extension="rgenetics" type="galaxy.datatypes.genetics:Rgenetics"/> - <!-- linkage format pedigree (separate .map file) --> - <datatype extension="lped" type="galaxy.datatypes.genetics:Lped" display_in_upload="true"/> - <!-- plink compressed file - has bed extension unfortunately --> - <datatype extension="pbed" type="galaxy.datatypes.genetics:Pbed" display_in_upload="true"/> - <!-- eigenstrat pedigree input file --> - <datatype extension="eigenstratgeno" type="galaxy.datatypes.genetics:Eigenstratgeno"/> - <!-- eigenstrat pca output file for adjusted eigenQTL eg --> - <datatype extension="eigenstratpca" type="galaxy.datatypes.genetics:Eigenstratpca"/> - <!-- fbat/pbat format pedigree (header row of marker names) --> - <datatype extension="fped" type="galaxy.datatypes.genetics:Fped"/> - <!-- part of linkage format pedigree --> - <datatype extension="lmap" type="galaxy.datatypes.genetics:Lmap"/> - <!-- phenotype file - fbat format --> - <datatype extension="fphe" type="galaxy.datatypes.genetics:Fphe"/> - <!-- phenotype file - plink format --> - <datatype extension="pphe" type="galaxy.datatypes.genetics:Pphe"/> - <datatype extension="snptest" type="galaxy.datatypes.genetics:Snptest"/> - <datatype extension="snpmatrix" type="galaxy.datatypes.genetics:SNPMatrix"/> - <datatype extension="xls" type="galaxy.datatypes.tabular:Tabular"/> - <!-- End RGenetics Datatypes --> - </registration> - <sniffers> - <!-- - The order in which Galaxy attempts to determine data types is - important because some formats are much more loosely defined - than others. The following list should be the most rigidly - defined format first, followed by next-most rigidly defined, - and so on. - --> - <sniffer type="galaxy.datatypes.binary:Sff"/> - <sniffer type="galaxy.datatypes.xml:BlastXml"/> - <sniffer type="galaxy.datatypes.sequence:Maf"/> - <sniffer type="galaxy.datatypes.sequence:Lav"/> - <sniffer type="galaxy.datatypes.sequence:csFasta"/> - <sniffer type="galaxy.datatypes.qualityscore:QualityScoreSOLiD"/> - <sniffer type="galaxy.datatypes.qualityscore:QualityScore454"/> - <sniffer type="galaxy.datatypes.sequence:Fasta"/> - <sniffer type="galaxy.datatypes.sequence:Fastq"/> - <sniffer type="galaxy.datatypes.interval:Wiggle"/> - <sniffer type="galaxy.datatypes.images:Html"/> - <sniffer type="galaxy.datatypes.sequence:Axt"/> - <sniffer type="galaxy.datatypes.interval:Bed"/> - <sniffer type="galaxy.datatypes.interval:CustomTrack"/> - <sniffer type="galaxy.datatypes.interval:Gff"/> - <sniffer type="galaxy.datatypes.interval:Gff3"/> - <sniffer type="galaxy.datatypes.interval:Interval"/> - <sniffer type="galaxy.datatypes.tabular:Sam"/> - </sniffers> + <registration converters_path="lib/galaxy/datatypes/converters"> + <datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true"/> + <datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true"/> + <datatype extension="bam" type="galaxy.datatypes.binary:Bam" mimetype="application/octet-stream"/> + <datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true"> + <converter file="bed_to_gff_converter.xml" target_datatype="gff"/> + <converter file="interval_to_coverage.xml" target_datatype="coverage"/> + <converter file="bed_to_interval_index_converter.xml" target_datatype="interval_index"/> + </datatype> + <datatype extension="binseq.zip" type="galaxy.datatypes.binary:Binseq" mimetype="application/zip" display_in_upload="true"/> + <datatype extension="len" type="galaxy.datatypes.chrominfo:ChromInfo" display_in_upload="true"> + <!-- no converters yet --> + </datatype> + <datatype extension="coverage" type="galaxy.datatypes.coverage:LastzCoverage" display_in_upload="true"> + <indexer file="coverage.xml" /> + </datatype> + <datatype extension="customtrack" type="galaxy.datatypes.interval:CustomTrack"/> + <datatype extension="csfasta" type="galaxy.datatypes.sequence:csFasta" display_in_upload="true"/> + <datatype extension="data" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream"/> + <datatype extension="fasta" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true"> + <converter file="fasta_to_tabular_converter.xml" target_datatype="tabular"/> + </datatype> + <datatype extension="fastq" type="galaxy.datatypes.sequence:Fastq" display_in_upload="true"/> + <datatype extension="fastqsanger" type="galaxy.datatypes.sequence:FastqSanger" display_in_upload="true"/> + <datatype extension="genetrack" type="galaxy.datatypes.tracks:GeneTrack"/> + <datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"> + <converter file="gff_to_bed_converter.xml" target_datatype="bed"/> + </datatype> + <datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/> + <datatype extension="gif" type="galaxy.datatypes.images:Image" mimetype="image/gif"/> + <datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/> + <datatype extension="html" type="galaxy.datatypes.images:Html" mimetype="text/html"/> + <datatype extension="interval" type="galaxy.datatypes.interval:Interval" display_in_upload="true"> + <converter file="interval_to_bed_converter.xml" target_datatype="bed"/> + <indexer file="interval_awk.xml" /> + </datatype> + <datatype extension="jpg" type="galaxy.datatypes.images:Image" mimetype="image/jpeg"/> + <datatype extension="laj" type="galaxy.datatypes.images:Laj"/> + <datatype extension="lav" type="galaxy.datatypes.sequence:Lav" display_in_upload="true"/> + <datatype extension="maf" type="galaxy.datatypes.sequence:Maf" display_in_upload="true"> + <converter file="maf_to_fasta_converter.xml" target_datatype="fasta"/> + <converter file="maf_to_interval_converter.xml" target_datatype="interval"/> + </datatype> + <datatype extension="pdf" type="galaxy.datatypes.images:Image" mimetype="application/pdf"/> + <datatype extension="png" type="galaxy.datatypes.images:Image" mimetype="image/png"/> + <datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/> + <datatype extension="qualsolid" type="galaxy.datatypes.qualityscore:QualityScoreSOLiD" display_in_upload="true"/> + <datatype extension="qual454" type="galaxy.datatypes.qualityscore:QualityScore454" display_in_upload="true"/> + <datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"/> + <datatype extension="scf" type="galaxy.datatypes.binary:Scf" mimetype="application/octet-stream" display_in_upload="true"/> + <datatype extension="sff" type="galaxy.datatypes.binary:Sff" mimetype="application/octet-stream" display_in_upload="true"/> + <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/> + <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/> + <datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/> + <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" display_in_upload="true"/> + <datatype extension="txtseq.zip" type="galaxy.datatypes.data:Txtseq" mimetype="application/zip" display_in_upload="true"/> + <datatype extension="wig" type="galaxy.datatypes.interval:Wiggle" display_in_upload="true"> + <converter file="wiggle_to_array_tree_converter.xml" target_datatype="array_tree"/> + </datatype> + <datatype extension="array_tree" type="galaxy.datatypes.data:Data" /> + <datatype extension="interval_index" type="galaxy.datatypes.data:Data" /> + <!-- Start EMBOSS tools --> + <datatype extension="acedb" type="galaxy.datatypes.data:Text"/> + <datatype extension="asn1" type="galaxy.datatypes.data:Text"/> + <datatype extension="btwisted" type="galaxy.datatypes.data:Text"/> + <datatype extension="cai" type="galaxy.datatypes.data:Text"/> + <datatype extension="charge" type="galaxy.datatypes.data:Text"/> + <datatype extension="checktrans" type="galaxy.datatypes.data:Text"/> + <datatype extension="chips" type="galaxy.datatypes.data:Text"/> + <datatype extension="clustal" type="galaxy.datatypes.data:Text"/> + <datatype extension="codata" type="galaxy.datatypes.data:Text"/> + <datatype extension="codcmp" type="galaxy.datatypes.data:Text"/> + <datatype extension="coderet" type="galaxy.datatypes.data:Text"/> + <datatype extension="compseq" type="galaxy.datatypes.data:Text"/> + <datatype extension="cpgplot" type="galaxy.datatypes.data:Text"/> + <datatype extension="cpgreport" type="galaxy.datatypes.data:Text"/> + <datatype extension="cusp" type="galaxy.datatypes.data:Text"/> + <datatype extension="cut" type="galaxy.datatypes.data:Text"/> + <datatype extension="dan" type="galaxy.datatypes.data:Text"/> + <datatype extension="dbmotif" type="galaxy.datatypes.data:Text"/> + <datatype extension="diffseq" type="galaxy.datatypes.data:Text"/> + <datatype extension="digest" type="galaxy.datatypes.data:Text"/> + <datatype extension="dreg" type="galaxy.datatypes.data:Text"/> + <datatype extension="einverted" type="galaxy.datatypes.data:Text"/> + <datatype extension="embl" type="galaxy.datatypes.data:Text"/> + <datatype extension="epestfind" type="galaxy.datatypes.data:Text"/> + <datatype extension="equicktandem" type="galaxy.datatypes.data:Text"/> + <datatype extension="est2genome" type="galaxy.datatypes.data:Text"/> + <datatype extension="etandem" type="galaxy.datatypes.data:Text"/> + <datatype extension="excel" type="galaxy.datatypes.data:Text"/> + <datatype extension="feattable" type="galaxy.datatypes.data:Text"/> + <datatype extension="fitch" type="galaxy.datatypes.data:Text"/> + <datatype extension="freak" type="galaxy.datatypes.data:Text"/> + <datatype extension="fuzznuc" type="galaxy.datatypes.data:Text"/> + <datatype extension="fuzzpro" type="galaxy.datatypes.data:Text"/> + <datatype extension="fuzztran" type="galaxy.datatypes.data:Text"/> + <datatype extension="garnier" type="galaxy.datatypes.data:Text"/> + <datatype extension="gcg" type="galaxy.datatypes.data:Text"/> + <datatype extension="geecee" type="galaxy.datatypes.data:Text"/> + <datatype extension="genbank" type="galaxy.datatypes.data:Text"/> + <datatype extension="helixturnhelix" type="galaxy.datatypes.data:Text"/> + <datatype extension="hennig86" type="galaxy.datatypes.data:Text"/> + <datatype extension="hmoment" type="galaxy.datatypes.data:Text"/> + <datatype extension="ig" type="galaxy.datatypes.data:Text"/> + <datatype extension="isochore" type="galaxy.datatypes.data:Text"/> + <datatype extension="jackknifer" type="galaxy.datatypes.data:Text"/> + <datatype extension="jackknifernon" type="galaxy.datatypes.data:Text"/> + <datatype extension="markx10" type="galaxy.datatypes.data:Text"/> + <datatype extension="markx1" type="galaxy.datatypes.data:Text"/> + <datatype extension="markx0" type="galaxy.datatypes.data:Text"/> + <datatype extension="markx3" type="galaxy.datatypes.data:Text"/> + <datatype extension="markx2" type="galaxy.datatypes.data:Text"/> + <datatype extension="match" type="galaxy.datatypes.data:Text"/> + <datatype extension="mega" type="galaxy.datatypes.data:Text"/> + <datatype extension="meganon" type="galaxy.datatypes.data:Text"/> + <datatype extension="motif" type="galaxy.datatypes.data:Text"/> + <datatype extension="msf" type="galaxy.datatypes.data:Text"/> + <datatype extension="nametable" type="galaxy.datatypes.data:Text"/> + <datatype extension="ncbi" type="galaxy.datatypes.data:Text"/> + <datatype extension="needle" type="galaxy.datatypes.data:Text"/> + <datatype extension="newcpgreport" type="galaxy.datatypes.data:Text"/> + <datatype extension="newcpgseek" type="galaxy.datatypes.data:Text"/> + <datatype extension="nexus" type="galaxy.datatypes.data:Text"/> + <datatype extension="nexusnon" type="galaxy.datatypes.data:Text"/> + <datatype extension="noreturn" type="galaxy.datatypes.data:Text"/> + <datatype extension="pair" type="galaxy.datatypes.data:Text"/> + <datatype extension="palindrome" type="galaxy.datatypes.data:Text"/> + <datatype extension="pepcoil" type="galaxy.datatypes.data:Text"/> + <datatype extension="pepinfo" type="galaxy.datatypes.data:Text"/> + <datatype extension="pepstats" type="galaxy.datatypes.data:Text"/> + <datatype extension="phylip" type="galaxy.datatypes.data:Text"/> + <datatype extension="phylipnon" type="galaxy.datatypes.data:Text"/> + <datatype extension="pir" type="galaxy.datatypes.data:Text"/> + <datatype extension="polydot" type="galaxy.datatypes.data:Text"/> + <datatype extension="preg" type="galaxy.datatypes.data:Text"/> + <datatype extension="prettyseq" type="galaxy.datatypes.data:Text"/> + <datatype extension="primersearch" type="galaxy.datatypes.data:Text"/> + <datatype extension="regions" type="galaxy.datatypes.data:Text"/> + <datatype extension="score" type="galaxy.datatypes.data:Text"/> + <datatype extension="selex" type="galaxy.datatypes.data:Text"/> + <datatype extension="seqtable" type="galaxy.datatypes.data:Text"/> + <datatype extension="showfeat" type="galaxy.datatypes.data:Text"/> + <datatype extension="showorf" type="galaxy.datatypes.data:Text"/> + <datatype extension="simple" type="galaxy.datatypes.data:Text"/> + <datatype extension="sixpack" type="galaxy.datatypes.data:Text"/> + <datatype extension="srs" type="galaxy.datatypes.data:Text"/> + <datatype extension="srspair" type="galaxy.datatypes.data:Text"/> + <datatype extension="staden" type="galaxy.datatypes.data:Text"/> + <datatype extension="strider" type="galaxy.datatypes.data:Text"/> + <datatype extension="supermatcher" type="galaxy.datatypes.data:Text"/> + <datatype extension="swiss" type="galaxy.datatypes.data:Text"/> + <datatype extension="syco" type="galaxy.datatypes.data:Text"/> + <datatype extension="table" type="galaxy.datatypes.data:Text"/> + <datatype extension="textsearch" type="galaxy.datatypes.data:Text"/> + <datatype extension="vectorstrip" type="galaxy.datatypes.data:Text"/> + <datatype extension="wobble" type="galaxy.datatypes.data:Text"/> + <datatype extension="wordcount" type="galaxy.datatypes.data:Text"/> + <datatype extension="tagseq" type="galaxy.datatypes.data:Text"/> + <!-- End EMBOSS tools --> + <!-- Start RGenetics Datatypes --> + <datatype extension="affybatch" type="galaxy.datatypes.genetics:Affybatch" display_in_upload="true"/> + <!-- eigenstrat pedigree input file --> + <datatype extension="eigenstratgeno" type="galaxy.datatypes.genetics:Eigenstratgeno"/> + <!-- eigenstrat pca output file for adjusted eigenQTL eg --> + <datatype extension="eigenstratpca" type="galaxy.datatypes.genetics:Eigenstratpca"/> + <datatype extension="eset" type="galaxy.datatypes.genetics:Eset" display_in_upload="true" /> + <!-- fbat/pbat format pedigree (header row of marker names) --> + <datatype extension="fped" type="galaxy.datatypes.genetics:Fped" display_in_upload="true"/> + <!-- phenotype file - fbat format --> + <datatype extension="fphe" type="galaxy.datatypes.genetics:Fphe" display_in_upload="true" mimetype="text/html"/> + <!-- genome graphs ucsc file - first col is always marker then numeric values to plot --> + <datatype extension="gg" type="galaxy.datatypes.genetics:GenomeGraphs"/> + <!-- part of linkage format pedigree --> + <datatype extension="lmap" type="galaxy.datatypes.genetics:Lmap" display_in_upload="true"/> + <datatype extension="malist" type="galaxy.datatypes.genetics:MAlist" display_in_upload="true"/> + <!-- linkage format pedigree (separate .map file) --> + <datatype extension="lped" type="galaxy.datatypes.genetics:Lped" display_in_upload="true"> + <converter file="lped_to_fped_converter.xml" target_datatype="fped"/> + <converter file="lped_to_pbed_converter.xml" target_datatype="pbed"/> + </datatype> + <!-- plink compressed file - has bed extension unfortunately --> + <datatype extension="pbed" type="galaxy.datatypes.genetics:Pbed" display_in_upload="true"> + <converter file="pbed_to_lped_converter.xml" target_datatype="lped"/> + </datatype> + <datatype extension="pheno" type="galaxy.datatypes.genetics:Pheno"/> + <!-- phenotype file - plink format --> + <datatype extension="pphe" type="galaxy.datatypes.genetics:Pphe" display_in_upload="true" mimetype="text/html"/> + <datatype extension="rexpbase" type="galaxy.datatypes.genetics:RexpBase"/> + <datatype extension="rgenetics" type="galaxy.datatypes.genetics:Rgenetics"/> + <datatype extension="snptest" type="galaxy.datatypes.genetics:Snptest" display_in_upload="true"/> + <datatype extension="snpmatrix" type="galaxy.datatypes.genetics:SNPMatrix" display_in_upload="true"/> + <datatype extension="xls" type="galaxy.datatypes.tabular:Tabular"/> + <!-- End RGenetics Datatypes --> + </registration> + <sniffers> + <!-- + The order in which Galaxy attempts to determine data types is + important because some formats are much more loosely defined + than others. The following list should be the most rigidly + defined format first, followed by next-most rigidly defined, + and so on. + --> + <sniffer type="galaxy.datatypes.binary:Sff"/> + <sniffer type="galaxy.datatypes.xml:BlastXml"/> + <sniffer type="galaxy.datatypes.sequence:Maf"/> + <sniffer type="galaxy.datatypes.sequence:Lav"/> + <sniffer type="galaxy.datatypes.sequence:csFasta"/> + <sniffer type="galaxy.datatypes.qualityscore:QualityScoreSOLiD"/> + <sniffer type="galaxy.datatypes.qualityscore:QualityScore454"/> + <sniffer type="galaxy.datatypes.sequence:Fasta"/> + <sniffer type="galaxy.datatypes.sequence:Fastq"/> + <sniffer type="galaxy.datatypes.interval:Wiggle"/> + <sniffer type="galaxy.datatypes.images:Html"/> + <sniffer type="galaxy.datatypes.sequence:Axt"/> + <sniffer type="galaxy.datatypes.interval:Bed"/> + <sniffer type="galaxy.datatypes.interval:CustomTrack"/> + <sniffer type="galaxy.datatypes.interval:Gff"/> + <sniffer type="galaxy.datatypes.interval:Gff3"/> + <sniffer type="galaxy.datatypes.interval:Interval"/> + <sniffer type="galaxy.datatypes.tabular:Sam"/> + </sniffers> </datatypes> diff -r a5bdbae15600 -r d16f28f312a9 lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Fri Nov 13 16:34:32 2009 -0500 +++ b/lib/galaxy/datatypes/genetics.py Fri Nov 13 16:35:07 2009 -0500 @@ -1,6 +1,5 @@ """ rgenetics datatypes -Use at your peril Ross Lazarus for the rgenetics and galaxy projects @@ -11,7 +10,6 @@ ross lazarus for rgenetics august 20 2007 """ - import logging, os, sys, time, tempfile, shutil, string, glob import data from galaxy import util @@ -26,8 +24,7 @@ from galaxy.datatypes.interval import Interval from galaxy.util.hash_util import * -gal_Log = logging.getLogger(__name__) -verbose = False +log = logging.getLogger(__name__) class GenomeGraphs(Interval): @@ -154,7 +151,7 @@ link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url ) ret_val.append( (site_name, link) ) else: - gal_Log.debug('@@@ gg ucsc_links - no viewport_tuple') + log.debug('@@@ gg ucsc_links - no viewport_tuple') return ret_val def sniff( self, filename ): """ @@ -195,7 +192,8 @@ return False class rgTabList(Tabular): - """ for sampleid and for featureid lists of exclusions or inclusions in the clean tool + """ + for sampleid and for featureid lists of exclusions or inclusions in the clean tool featureid subsets on statistical criteria -> specialized display such as gg """ file_ext = "rgTList" @@ -225,16 +223,19 @@ return out class rgSampleList(rgTabList): - """ for sampleid exclusions or inclusions in the clean tool - output from QC eg excess het, gender error, ibd pair member,eigen outlier,excess mendel errors,... - since they can be uploaded, should be flexible - but they are persistent at least - same infrastructure for expression? + """ + for sampleid exclusions or inclusions in the clean tool + output from QC eg excess het, gender error, ibd pair member,eigen outlier,excess mendel errors,... + since they can be uploaded, should be flexible + but they are persistent at least + same infrastructure for expression? """ file_ext = "rgSList" def __init__(self, **kwd): - """Initialize samplelist datatype""" + """ + Initialize samplelist datatype + """ rgTabList.__init__( self, **kwd ) self.column_names[0] = 'FID' self.column_names[1] = 'IID' @@ -250,10 +251,11 @@ return False class rgFeatureList( rgTabList ): - """ for featureid lists of exclusions or inclusions in the clean tool - output from QC eg low maf, high missingness, bad hwe in controls, excess mendel errors,... - featureid subsets on statistical criteria -> specialized display such as gg - same infrastructure for expression? + """ + for featureid lists of exclusions or inclusions in the clean tool + output from QC eg low maf, high missingness, bad hwe in controls, excess mendel errors,... + featureid subsets on statistical criteria -> specialized display such as gg + same infrastructure for expression? """ file_ext = "rgFList" @@ -264,8 +266,9 @@ self.column_names[i] = s class Rgenetics(Html): - """class to use for rgenetics""" - + """ + class to use for rgenetics + """ MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="rgenetics", readonly=True, set_in_upload=True) composite_type = 'auto_primary_file' @@ -289,7 +292,8 @@ rval.append( '</ul></div></html>' ) return "\n".join( rval ) def regenerate_primary_file(self,dataset): - """cannot do this until we are setting metadata + """ + cannot do this until we are setting metadata """ def fix(oldpath,newbase): old,e = os.path.splitext(oldpath) @@ -314,26 +318,24 @@ f.write('\n') f.close() def set_meta( self, dataset, **kwd ): - """for lped/pbed eg""" + """ + for lped/pbed eg + """ if kwd.get('overwrite') == False: - if verbose: - gal_Log.debug('@@@ rgenetics set_meta called with overwrite = False') + #log.debug('@@@ rgenetics set_meta called with overwrite = False') return True try: efp = dataset.extra_files_path except: - if verbose: - gal_Log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0], dataset.name)) + #log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0], dataset.name)) return False try: flist = os.listdir(efp) except: - if verbose: - gal_Log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0],dataset.name)) + #log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0],dataset.name)) return False if len(flist) == 0: - if verbose: - gal_Log.debug('@@@rgenetics set_meta failed - %s efp %s is empty?' % (dataset.name,efp)) + #log.debug('@@@rgenetics set_meta failed - %s efp %s is empty?' % (dataset.name,efp)) return False bn = None for f in flist: @@ -351,7 +353,8 @@ return True class SNPMatrix(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="snpmatrix" @@ -363,7 +366,8 @@ dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff(self,filename): - """ need to check the file header hex code + """ + need to check the file header hex code """ infile = open(dataset.file_name, "b") head = infile.read(16) @@ -374,7 +378,8 @@ return True class Lped(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="lped" @@ -384,7 +389,8 @@ self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True ) class Pphe(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="pphe" @@ -393,12 +399,14 @@ self.add_composite_file( '%s.pphe', description = 'Plink Phenotype File', substitute_name_with_metadata = 'base_name' ) class Lmap(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="lmap" class Fphe(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="fphe" @@ -407,7 +415,8 @@ self.add_composite_file( '%s.fphe', description = 'FBAT Phenotype File', substitute_name_with_metadata = 'base_name' ) class Phe(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="phe" @@ -416,7 +425,8 @@ self.add_composite_file( '%s.phe', description = 'Phenotype File', substitute_name_with_metadata = 'base_name' ) class Fped(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="fped" @@ -425,7 +435,8 @@ self.add_composite_file( '%s.fped', description = 'FBAT format pedfile', substitute_name_with_metadata = 'base_name' ) class Pbed(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="pbed" @@ -436,7 +447,8 @@ self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True ) class Eigenstratgeno(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="eigenstratgeno" @@ -447,7 +459,8 @@ self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True ) class Eigenstratpca(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="eigenstratpca" @@ -456,7 +469,8 @@ self.add_composite_file( '%s.eigenstratpca', description = 'Eigenstrat PCA file', substitute_name_with_metadata = 'base_name' ) class Snptest(Rgenetics): - """fake class to distinguish different species of Rgenetics data collections + """ + fake class to distinguish different species of Rgenetics data collections """ file_ext="snptest" @@ -467,7 +481,8 @@ file_ext = 'pheno' class RexpBase( Html ): - """base class for BioC data structures in Galaxy + """ + base class for BioC data structures in Galaxy must be constructed with the pheno data in place since that goes into the metadata for each instance """ @@ -488,7 +503,8 @@ self.add_composite_file( '%s.pheno', description = 'Phenodata tab text file', substitute_name_with_metadata = 'base_name', is_binary=True) def generate_primary_file( self, dataset = None ): - """ This is called only at upload to write the html file + """ + This is called only at upload to write the html file cannot rename the datasets here - they come with the default unfortunately """ return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' @@ -517,7 +533,7 @@ else: for col,code in enumerate(row): # keep column order correct if col >= totcols: - gal_Log.warning('### get_phecols error in pheno file - row %d col %d (%s) longer than header %s' % (nrows, col, row, head)) + log.warning('### get_phecols error in pheno file - row %d col %d (%s) longer than header %s' % (nrows, col, row, head)) else: concordance[col].setdefault(code,0) # first one is zero concordance[col][code] += 1 @@ -564,7 +580,8 @@ return res def get_pheno(self,dataset): - """expects a .pheno file in the extra_files_dir - ugh + """ + expects a .pheno file in the extra_files_dir - ugh note that R is wierd and adds the row.name in the header so the columns are all wrong - unless you tell it not to. A file can be written as @@ -581,9 +598,11 @@ p = [] return '\n'.join(p) def set_peek( self, dataset ): - """expects a .pheno file in the extra_files_dir - ugh + """ + expects a .pheno file in the extra_files_dir - ugh note that R is wierd and does not include the row.name in - the header. why?""" + the header. why? + """ if not dataset.dataset.purged: pp = os.path.join(dataset.extra_files_path,'%s.pheno' % dataset.metadata.base_name) try: @@ -596,8 +615,7 @@ dataset.peek = 'file does not exist\n' dataset.blurb = 'file purged from disk' def get_peek( self, dataset ): - """expects a .pheno file in the extra_files_dir - ugh - """ + """expects a .pheno file in the extra_files_dir - ugh""" pp = os.path.join(dataset.extra_files_path,'%s.pheno' % dataset.metadata.base_name) try: p = file(pp,'r').readlines() @@ -640,8 +658,7 @@ try: flist = os.listdir(dataset.extra_files_path) except: - if verbose: - gal_Log.debug('@@@rexpression set_meta failed - no dataset?') + #log.debug('@@@rexpression set_meta failed - no dataset?') return False bn = None for f in flist: @@ -711,7 +728,6 @@ class Affybatch( RexpBase ): """derived class for BioC data structures in Galaxy """ - file_ext = "affybatch" def __init__( self, **kwd ): @@ -728,7 +744,6 @@ self.add_composite_file( '%s.eset', description = 'ESet R object saved to file', substitute_name_with_metadata = 'base_name', is_binary = True ) - class MAlist( RexpBase ): """derived class for BioC data structures in Galaxy """ file_ext = "malist" diff -r a5bdbae15600 -r d16f28f312a9 tools/data_source/upload.xml --- a/tools/data_source/upload.xml Fri Nov 13 16:34:32 2009 -0500 +++ b/tools/data_source/upload.xml Fri Nov 13 16:35:07 2009 -0500 @@ -181,6 +181,12 @@ ----- +**Sff** + +A binary file in 'Standard Flowgram Format' with a '.sff' file extension. + +----- + **Tabular (tab delimited)** Any data in tab delimited format (tabular)