# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Kanwei Li <kanwei@gmail.com> # Date 1278108645 14400 # Node ID 67f5a7f16c59d3e3bb68b62d278e037975f1c1af # Parent e5c40bd9c179059344645a9171b7172d74c64a25 Detect pdf files on upload [Brad Chapman]. Resolves #357 --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -10,6 +10,7 @@ from galaxy import eggs import galaxy.model from galaxy.datatypes import sniff from galaxy.datatypes.binary import * +from galaxy.datatypes.images import Pdf from galaxy.datatypes.registry import Registry from galaxy import util from galaxy.util.json import * @@ -85,6 +86,8 @@ def check_bam( temp_name ): return Bam().sniff( temp_name ) def check_sff( temp_name ): return Sff().sniff( temp_name ) +def check_pdf( temp_name ): + return Pdf().sniff( temp_name ) def check_gzip( temp_name ): # This method returns a tuple of booleans representing ( is_gzipped, is_valid ) # Make sure we have a gzipped file @@ -160,6 +163,9 @@ def add_file( dataset, json_file, output elif check_sff( dataset.path ): ext = 'sff' data_type = 'sff' + elif check_pdf( dataset.path ): + ext = 'pdf' + data_type = 'pdf' else: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip( dataset.path ) @@ -215,7 +221,7 @@ def add_file( dataset, json_file, output return if not data_type: if check_binary( dataset.path ): - # We have a binary dataset, but it is not Bam or Sff + # We have a binary dataset, but it is not Bam, Sff or Pdf data_type = 'binary' #binary_ok = False parts = dataset.name.split( "." ) --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -79,7 +79,7 @@ <datatype extension="mafcustomtrack" type="galaxy.datatypes.sequence:MafCustomTrack"><display file="ucsc/maf_customtrack.xml" /></datatype> - <datatype extension="pdf" type="galaxy.datatypes.images:Image" mimetype="application/pdf"/> + <datatype extension="pdf" type="galaxy.datatypes.images:Pdf" mimetype="application/pdf"/><datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true" /><datatype extension="png" type="galaxy.datatypes.images:Image" mimetype="image/png"/><datatype extension="qual" type="galaxy.datatypes.qualityscore:QualityScore" /> @@ -260,6 +260,7 @@ <sniffer type="galaxy.datatypes.sequence:Fastq"/><sniffer type="galaxy.datatypes.interval:Wiggle"/><sniffer type="galaxy.datatypes.images:Html"/> + <sniffer type="galaxy.datatypes.images:Pdf"/><sniffer type="galaxy.datatypes.sequence:Axt"/><sniffer type="galaxy.datatypes.interval:Bed"/><sniffer type="galaxy.datatypes.interval:CustomTrack"/> --- a/lib/galaxy/datatypes/images.py +++ b/lib/galaxy/datatypes/images.py @@ -23,6 +23,19 @@ class Image( data.Data ): dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' +class Pdf( Image ): + def sniff(self, filename): + """Determine if the file is in pdf format. + """ + headers = get_headers(filename, None, 1) + try: + if headers[0][0].startswith("%PDF"): + return True + else: + return False + except IndexError: + return False + def create_applet_tag_peek( class_name, archive, params ): text = """ <!--[if !IE]>-->