diff --git a/.gitignore b/.gitignore
index daba77c..e1ef93d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,13 @@ Gemfile.lock
.yardoc
doc/
+# mac junk
+._*
+
+# annoying emacs backups
+.\#*
+\#*\#
+
vendor
# don't include generated files
diff --git a/lib/mimemagic.rb b/lib/mimemagic.rb
index 8fe8372..26a8d2b 100644
--- a/lib/mimemagic.rb
+++ b/lib/mimemagic.rb
@@ -9,107 +9,405 @@
# Mime type detection
class MimeMagic
- attr_reader :type, :mediatype, :subtype
+ attr_reader :type, :mediatype, :subtype, :params
- # Mime type by type string
+ # Initialize a new MIME type by its string representation.
+ #
+ # @param type [#to_s] the type to parse.
+ #
def initialize(type)
- @type = type
- @mediatype, @subtype = type.split('/', 2)
+ @type, *params = type.to_s.strip.split(/(?:\s*;\s*)+/) # chop off params
+ @type.downcase! # normalize the case
+ # split parameter-value pairs if present
+ @params = params.map { |x| x.split(/\s*=\s*/, 2) } unless params.empty?
+ @mediatype, @subtype = @type.split ?/, 2 # split major and minor
+ end
+
+ # Syntactic sugar alias for constructor. No-op if `type` is already
+ # a {MimeMagic} object.
+ #
+ # @param type [#to_s] a string-like object representing a MIME type
+ # or file extension.
+ #
+ # @return [MimeMagic] the instantiated object.
+ #
+ def self.[] type
+ # try noop first
+ return type if type.is_a? self
+
+ # now we handle the string
+ type = type.to_s.strip
+ return by_extension type unless type.include? ?/
+
+ # otherwise pass to constructor
+ new type
end
- # Add custom mime type. Arguments:
- # * type: Mime type
- # * options: Options hash
+ # Add a custom MIME type to the internal dictionary.
+ #
+ # @param type [#to_s] the type
+ # @param extensions [Array<#to_s>] file extensions
+ # @param parents [Array<#to_s>] parent types
+ # @param magic [Array] MIME "magic" specification
+ # @param aliases [Array<#to_s>] alternative names for the type
+ # @param comment [#to_s] a comment
#
- # Option keys:
- # * :extensions: String list or single string of file extensions
- # * :parents: String list or single string of parent mime types
- # * :magic: Mime magic specification
- # * :comment: Comment string
- def self.add(type, options)
- extensions = [options[:extensions]].flatten.compact
- TYPES[type] = [extensions,
- [options[:parents]].flatten.compact,
- options[:comment]]
- extensions.each {|ext| EXTENSIONS[ext] = type }
- MAGIC.unshift [type, options[:magic]] if options[:magic]
+ def self.add type,
+ extensions: [], parents: [], magic: [], comment: nil, aliases: []
+ type = type.to_s.strip.downcase
+ extensions = [extensions].flatten.compact
+ aliases = [[aliases] || []].flatten.compact
+ t = TYPES[type] = [extensions, [parents].flatten.compact,
+ comment, type, aliases]
+ aliases.each { |a| TYPES[a] = t }
+ extensions.each {|ext| EXTENSIONS[ext] ||= type }
+
+ MAGIC.unshift [type, magic] if magic
+
+ true # output is ignored
end
- # Removes a mime type from the dictionary. You might want to do this if
+ # Removes a MIME type from the dictionary. You might want to do this if
# you're seeing impossible conflicts (for instance, application/x-gmc-link).
- # * type: The mime type to remove. All associated extensions and magic are removed too.
+ #
+ # @note All associated extensions and magic are removed too.
+ #
+ # @param type [#to_s] the type to remove.
+ #
def self.remove(type)
EXTENSIONS.delete_if {|ext, t| t == type }
MAGIC.delete_if {|t, m| t == type }
TYPES.delete(type)
+
+ true # output is also ignored
end
- # Returns true if type is a text format
- def text?; mediatype == 'text' || child_of?('text/plain'); end
+ # Returns true if type is a text format.
+ def text?; mediatype == 'text' || descendant_of?('text/plain'); end
- # Mediatype shortcuts
+ # Determine if the type is an image.
def image?; mediatype == 'image'; end
+
+ # Determine if the type is audio.
def audio?; mediatype == 'audio'; end
- def video?; mediatype == 'video'; end
- # Returns true if type is child of parent type
- def child_of?(parent)
- MimeMagic.child?(type, parent)
- end
+ # Determine if the type is video.
+ def video?; mediatype == 'video'; end
- # Get string list of file extensions
+ # Get string list of file extensions.
+ #
+ # @return [Array] associated file extensions.
+ #
def extensions
- TYPES.key?(type) ? TYPES[type][0] : []
+ TYPES.fetch(type, [[]]).first.map { |e| e.to_s.dup }
end
- # Get mime comment
+ # Get MIME comment.
+ #
+ # @return [nil, String] the comment
+ #
def comment
- (TYPES.key?(type) ? TYPES[type][2] : nil).to_s
+ TYPES.fetch(type, [nil, nil, nil])[2].to_s.dup
+ end
+
+ # Return the canonical type. Returns `nil` if the type is unknown to
+ # the registry.
+ #
+ # @return [MimeMagic, nil] the canonical type, if present.
+ #
+ def canonical
+ t = TYPES[type.downcase] or return
+ return self if type == t[3]
+ self.class.new t[3]
+ end
+
+ # Return the type's aliases.
+ #
+ # @return [Array] the aliases, if any.
+ #
+ def aliases
+ TYPES.fetch(type.downcase, [nil, nil, nil, nil, []])[4].map do |t|
+ self.class.new t
+ end
end
- # Lookup mime type by file extension
- def self.by_extension(ext)
- ext = ext.to_s.downcase
- mime = ext[0..0] == '.' ? EXTENSIONS[ext[1..-1]] : EXTENSIONS[ext]
- mime && new(mime)
+ # Determine if the type is an alias.
+ #
+ # @return [false, true] whether the type is an alias.
+ #
+ def alias?
+ type != canonical.type
end
- # Lookup mime type by filename
- def self.by_path(path)
- by_extension(File.extname(path))
+ # Returns true if the ancestor type is anywhere in the subject
+ # type's lineage. Always returns `false` if either `self` or
+ # `ancestor` are unknown to the type registry.
+ #
+ # @param ancestor [MimeType,#to_s] the candidate ancestor type
+ #
+ # @return [true, false] whether `self` is a descendant of `ancestor`
+ #
+ def descendant_of? ancestor
+ # always false if we don't know what this is
+ return unless c = canonical
+
+ # ancestor canonical could be nil which will be false
+ c.lineage.include? self.class[ancestor].canonical
end
- # Lookup mime type by magic content analysis.
- # This is a slow operation.
- def self.by_magic(io)
- mime = magic_match(io, :find)
- mime && new(mime[0])
+ # Returns true if type is child of parent type. Behaves the same as
+ # #descendant_of? if `recurse` is true, which is the default.
+ #
+ # @param parent [#to_s] a candidate parent type
+ # @param recurse [true, false] whether to recurse
+ #
+ # @return [true, false] whether `self` is a child of `parent`
+ #
+ def child_of?(parent, recurse: true)
+ return descendant_of? parent if recurse
+ return unless c = canonical
+ c.parents.include? self.class[parent].canonical
end
- # Lookup all mime types by magic content analysis.
- # This is a slower operation.
- def self.all_by_magic(io)
- magic_match(io, :select).map { |mime| new(mime[0]) }
+ # Fetches the immediate parent types.
+ #
+ # @return [Array] the type's parents
+ #
+ def parents
+ out = TYPES.fetch(type.to_s.downcase, [nil, []])[1].map do |x|
+ self.class.new x
+ end
+ # add this unless we're it
+ out << self.class.new('application/octet-stream') if
+ out.empty? and type.downcase != 'application/octet-stream'
+
+ out.uniq
end
- # Return type as string
- def to_s
- type
+ # Fetches the entire inheritance hierarchy for the given MIME type.
+ #
+ # @return [Array] the type's lineage
+ #
+ def lineage
+ ([canonical || self] + parents.map { |t| t.lineage }.flatten).uniq
+ end
+
+ alias_method :ancestor_types, :lineage
+
+ # Determine if the _type_ is a descendant of `text/plain`. Not to be
+ # confused with the class method {.binary?}, which concerns
+ # arbitrary input.
+ #
+ # @return [true, false, nil] whether the type is binary.
+ #
+ def binary?
+ not lineage.include? 'text/plain'
end
- # Allow comparison with string
+ # Compare the equality of the type with another (or plain string).
+ #
+ # @param other [#to_s] the other to test
+ #
+ # @return [false, true] whether the two are equal.
+ #
def eql?(other)
- type == other.to_s
+ # coerce the rhs
+ other = self.class[other]
+
+ # check for an exact match
+ return true if type == other.type
+
+ # now canonicalize both sides and check
+ lhs = canonical
+ rhs = other.canonical
+
+ lhs && rhs && lhs.type == rhs.type
end
+ alias_method :==, :eql?
+
+ # Return the object's (the underlying type string) hash.
+ #
+ # @return [Integer] the hash value.
+ #
def hash
type.hash
end
- alias == eql?
+ # Return the type as a string.
+ #
+ # @return [String] the type, as a string.
+ #
+ def to_s
+ type
+ end
+
+ # Return a diagnostic representation of the object.
+ #
+ # @return [String] a string representing the object.
+ #
+ def inspect
+ out = @type
+ out = [out, @params.map { |x| x.join ?= }].join ?; if
+ @params and !@params.empty?
+ %q[<%s "%s">] % [self.class, out]
+ end
+
+ # Look up MIME type by file extension. When `default` is true or a
+ # value, this method will always return a value.
+ #
+ # @param path [#to_s]
+ # @param default [false, true, #to_s, MimeMagic] a default fallback type
+ #
+ # @return [nil, MimeMagic] the type, if found.
+ #
+ def self.by_extension ext, default: false
+ ext = ext.to_s.downcase.delete_prefix ?.
+ default = coerce_default '', default
+ mime = EXTENSIONS[ext]
+ mime ? new(mime) : default
+ end
+
+ # Look up MIME type by file path. When `default` is true or a value,
+ # this method will always return a value.
+ #
+ # @param path [#to_s] the file/path to check
+ # @param default [false, true, #to_s, MimeMagic] a default fallback type
+ #
+ # @return [nil, MimeMagic] the type, if found.
+ #
+ def self.by_path path, default: false
+ by_extension(File.extname(path), default: default)
+ end
+
+ # Look up MIME type by magic content analysis. When `default` is true or a
+ # value, this method will always return a value.
+ #
+ # @note This is a relatively slow operation.
+ #
+ # @param io [#read, #to_s] the IO/String-like object to check for magic
+ # @param default [false, true, #to_s, MimeMagic] a default fallback type
+ #
+ # @return [nil, MimeMagic] a matching type, if found.
+ #
+ def self.by_magic io, default: false
+ default = coerce_default io, default
+ mime = magic_match(io, :find) or return default
+ new mime.first
+ end
+
+ # Return all matching MIME types by magic content analysis. When
+ # `default` is true or a value, the result will never be empty.
+ #
+ # @note This is a relatively slow operation.
+ #
+ # @param io [#read, #to_s] the IO/String-like object to check for magic
+ # @param default [false, true, #to_s, MimeMagic] a default fallback type
+ #
+ # @return [Array] all matching types
+ #
+ def self.all_by_magic io, default: false
+ default = coerce_default io, default
+ out = magic_match(io, :select).map { |mime| new mime.first }
+ out << default if out.empty? and default
+ out
+ end
- def self.child?(child, parent)
- child == parent || TYPES.key?(child) && TYPES[child][1].any? {|p| child?(p, parent) }
+ # Returns true if type is child of parent type.
+ #
+ # @param child [#to_s] a candidate child type
+ # @param parent [#to_s] a candidate parent type
+ #
+ # @return [true, false] whether `self` is a child of `parent`
+ #
+ def self.child?(child, parent, recurse: true)
+ self[child].child_of? parent, recurse: recurse
+ end
+
+ # Return the canonical type.
+ #
+ # @param type [#to_s] the type to test
+ #
+ # @return [MimeMagic, nil] the canonical type, if present.
+ #
+ def self.canonical type
+ self[type].canonical
+ end
+
+ # Return the type's aliases.
+ #
+ # @param type [#to_s] the type to check
+ #
+ # @return [Array] the aliases, if any.
+ #
+ def self.aliases type
+ self[type].aliases
+ end
+
+ # Determine if an _input_ is binary. Not to be confused with the
+ # instance method {#binary?}, which concerns the _type_.
+ #
+ # @param thing [#read, #to_s] the IO-like or String-like thing to
+ # test; can also be a file name/path/extension or MIME type.
+ #
+ # @return [true, false, nil] whether the input is binary (`nil` if
+ # indeterminate).
+ #
+ def self.binary? thing
+ sample = ''
+
+ # get some stuff out of the IO or get a substring
+ if thing.is_a? MimeMagic
+ return thing.binary?
+ elsif %i[seek tell read].all? { |m| thing.respond_to? m }
+ pos = thing.tell
+ thing.seek 0, 0
+ sample = thing.read(256).to_s # handle empty
+ thing.seek pos
+ elsif thing.respond_to? :to_s
+ str = thing.to_s
+ # if it contains a slash it could be either a path or mimetype
+ test = if str.include? ?/
+ canonical(str) || by_extension(str.split(?.).last)
+ else
+ by_extension str.split(?.).last
+ end
+
+ return test.binary? if test
+
+ sample = str[0, 256]
+ else
+ # nil if we don't know what this thing is
+ return
+ end
+
+ # consider this to be 'binary' if empty
+ return true if sample.empty?
+ # control codes minus ordinary whitespace
+ /[\x0-\x8\xe-\x1f\x7f]/n.match? sample.b
+ end
+
+ # Return either `application/octet-stream` or `text/plain` depending
+ # on whether the thing is binary.
+ #
+ # @param thing [#read, #to_s] the thing (IO-like, String-like, MIME type,
+ #
+ # @return [MimeMagic] the default type
+ #
+ def self.default_type thing
+ new(binary?(thing) ? 'application/octet-stream' : 'text/plain')
+ end
+
+ private
+
+ def self.coerce_default thing, default
+ case default
+ when nil, false then nil
+ when true then default_type thing
+ when MimeMagic then default
+ when String, -> x { x.respond_to? :to_s } then new default
+ else default_type thing
+ end
end
def self.magic_match(io, method)
@@ -138,5 +436,4 @@ def self.magic_match_io(io, matches, buffer)
end
end
- private_class_method :magic_match, :magic_match_io
end
diff --git a/lib/mimemagic/tables.rb b/lib/mimemagic/tables.rb
index 11da9f6..8b55bc7 100644
--- a/lib/mimemagic/tables.rb
+++ b/lib/mimemagic/tables.rb
@@ -80,17 +80,23 @@ def self.parse_database
comments = Hash[*(mime/'comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten]
type = mime['type']
subclass = (mime/'sub-class-of').map{|x| x['type']}
- exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact
+ exts = (mime/'glob').map do |x|
+ x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil
+ end.compact
+
(mime/'magic').each do |magic|
priority = magic['priority'].to_i
matches = get_matches(magic)
magics << [priority, type, matches]
end
- if !exts.empty?
- exts.each{|x|
- extensions[x] = type if !extensions.include?(x)
- }
- types[type] = [exts,subclass,comments[nil]]
+
+ aliases = (mime/'alias/@type').map { |a| a.value.downcase.strip.freeze }
+
+ # XXX uhh do we only use the type if it has a file extension??
+ unless exts.empty?
+ exts.each { |x| extensions[x] ||= type }
+ types[type] = [exts, subclass, comments[nil], type, aliases]
+ # don't add the aliases yet; we do that below
end
end
@@ -141,13 +147,20 @@ def self.parse_database
extensions.keys.sort.each do |key|
EXTENSIONS[key] = extensions[key]
end
+
types.keys.sort.each do |key|
- exts = types[key][0]
- parents = types[key][1].sort
- comment = types[key][2]
+ exts, parents, comment, canon, aliases = *types[key]
- TYPES[key] = [exts, parents, comment]
+ parents.sort!
+ aliases.sort!
+
+ # we are copying it i guess
+ t = TYPES[key] = [exts, parents, comment, canon, aliases].freeze
+
+ # now do the aliases oops they'll be out of order oh well
+ aliases.each { |a| TYPES[a] = t }
end
+
magics.each do |priority, type, matches|
MAGIC << [type, matches]
end
diff --git a/lib/mimemagic/version.rb b/lib/mimemagic/version.rb
index 56e3ee1..68702d2 100644
--- a/lib/mimemagic/version.rb
+++ b/lib/mimemagic/version.rb
@@ -1,5 +1,5 @@
class MimeMagic
# MimeMagic version string
# @api public
- VERSION = '0.4.3'
+ VERSION = '0.5.3'
end
diff --git a/test/mimemagic_test.rb b/test/mimemagic_test.rb
index 8aa48b1..db54224 100644
--- a/test/mimemagic_test.rb
+++ b/test/mimemagic_test.rb
@@ -25,6 +25,12 @@ def test_have_type_mediatype_and_subtype
assert_equal 'text/html', MimeMagic.new('text/html').type
assert_equal 'text', MimeMagic.new('text/html').mediatype
assert_equal 'html', MimeMagic.new('text/html').subtype
+
+ # a little more robust equality test perchance
+ assert MimeMagic['TEXT/HTML'] == 'TeXT/HtML;charset=utf-8'
+
+ # this was crashing because the RHS has no canonical
+ assert MimeMagic['text/html'] != 'application/x-bogus'
end
def test_have_mediatype_helpers
@@ -40,7 +46,10 @@ def test_have_mediatype_helpers
def test_have_hierarchy
assert MimeMagic.new('text/html').child_of?('text/plain')
- assert MimeMagic.new('text/x-java').child_of?('text/plain')
+ # drake-no: text/plain is an ancestor but not an immediate parent
+ refute MimeMagic.new('text/x-java').child_of?('text/plain', recurse: false)
+ # drake-yes
+ assert MimeMagic.new('text/x-java').descendant_of?('text/plain')
end
def test_have_extensions
@@ -52,28 +61,37 @@ def test_have_comment
end
def test_recognize_extensions
- assert true
+ assert MimeMagic.by_extension('html')
- # Unknown if this test failure is expected. Commenting out for now.
+ # these resolve to application/xhtml+xml instead of text/html
+ # because of ambiguities in file extension associations; the data
+ # file associates the former since it's first.
#
# assert_equal 'text/html', MimeMagic.by_extension('.html').to_s
# assert_equal 'text/html', MimeMagic.by_extension('html').to_s
# assert_equal 'text/html', MimeMagic.by_extension(:html).to_s
- # assert_equal 'application/x-ruby', MimeMagic.by_extension('rb').to_s
- # assert_nil MimeMagic.by_extension('crazy')
- # assert_nil MimeMagic.by_extension('')
+
+ assert_equal 'application/x-ruby', MimeMagic.by_extension('rb').to_s
+ assert_nil MimeMagic.by_extension('crazy')
+ assert_nil MimeMagic.by_extension('')
+ # try with duplicate
+ assert_equal 'application/octet-stream',
+ MimeMagic.by_extension('crazy', default: true).to_s
end
def test_recognize_by_a_path
- assert true
- # Unknown if this test failure is expected. Commenting out for now.
+ # once again, ambiguities.
#
# assert_equal 'text/html', MimeMagic.by_path('/adsjkfa/kajsdfkadsf/kajsdfjasdf.html').to_s
# assert_equal 'text/html', MimeMagic.by_path('something.html').to_s
- # assert_equal 'application/x-ruby', MimeMagic.by_path('wtf.rb').to_s
- # assert_nil MimeMagic.by_path('where/am.html/crazy')
- # assert_nil MimeMagic.by_path('')
+
+ assert_equal 'application/x-ruby', MimeMagic.by_path('wtf.rb').to_s
+ assert_nil MimeMagic.by_path('where/am.html/crazy')
+ assert_nil MimeMagic.by_path('')
+
+ assert_equal 'application/octet-stream',
+ MimeMagic.by_path('', default: true).to_s
end
def test_recognize_xlsx_as_zip_without_magic
@@ -118,7 +136,7 @@ def test_have_add
assert_equal 'application/mimemagic-test', MimeMagic.by_extension('ext2').to_s
assert_equal 'Comment', MimeMagic.by_extension('ext2').comment
assert_equal %w(ext1 ext2), MimeMagic.new('application/mimemagic-test').extensions
- assert MimeMagic.new('application/mimemagic-test').child_of?('text/plain')
+ assert MimeMagic.new('application/mimemagic-test').descendant_of?('text/plain')
end
def test_process_magic
@@ -151,6 +169,16 @@ def test_process_magic
assert_nil MimeMagic.by_magic(StringIO.new 'Z MAGICTEST')
end
+ def test_type_is_binary
+ assert MimeMagic.binary? 'psd'
+ refute MimeMagic.binary? 'html'
+ end
+
+ def test_fancy_constructor
+ assert_equal 'text/html', MimeMagic['text/html'].to_s
+ assert_equal 'application/pdf', MimeMagic['pdf'].to_s
+ end
+
class IOObject
def initialize
@io = StringIO.new('MAGICTEST')