ロード トップ 参照元 逆検索 検索 ヘルプ

[logo] ruby/magic.rb


SuzTiki:ruby


@ magic.rb
class Magic

   def getstr(l)
      val = '';
      while !(l.empty?)
	case l
	when /^\\([0-7])([0-7])([0-7])/ then
		v = ($1.to_i * 8 + $2.to_i) * 8 + $3.to_i
		val << v;
	when /^\\(.)/ then
		val << $1.to_s;
	when /^([^\\\s]+)/ then
		val << $1;
	when /^([\s]+)/ then break;
	when /.*/ then break;
	end
        l = $';
      end
     l.gsub!(/^[\s]*/,'');
      return  val , l ;
   end

   def getnum(l)
      val = 0;
	case l
	when /(0x[0-9a-fA-F]+)[\s]+/ then
		l = $';
		val = eval($1);
	when /([0-9]+)[\s]+/ then
		l = $';
		val = eval($1);
	end
        l = $';
      l.gsub!(/^[\s]*/,'');
      return val,  l;
   end

   def initialize(fname = "etc/magic.mime");
     @magic = [];
     m = File::open(fname , 'r');
     if (m == nil)
     else
	while (l = m.gets)
		next if (l =~ /^[\s]*#/);
		next if (l =~ /^[\s]*$/);
		case l 
		when /^>>([0-9]+)[\s]+/  then lvl = 2;
		when /^>([0-9]+)[\s]+/   then lvl = 1;
		when /^([0-9]+)[\s]+/    then lvl = 0;
		when /.*/	         then next;
		end
		off = $1.to_i;
		l = $';
		case l 
		when	/^(byte|short|long)[\s]+/   then type = 0;
		when	/^(beshort|belong)[\s]+/    then type = 1;
		when	/^(leshort|lelong)[\s]+/    then type = 2;
		when	/^(string)[\s]+/            then type = 3;
		when /.*/	         	    then next;
		end
		type_str = $1;
		l = $';
		val = nil;
		if (type == 3) # string
			 val , l  = getstr(l);
			 width = val.length;
		else
			 val , l  = getnum(l);
			case type_str
			when /byte/	then width = 1;
			when /short/	then width = 2;
			when /long/	then width = 4;
			end
		end
	name = nil;
	if (l =~ /^([^\s]+)/)
		name = $1;
	end
	@magic << [ lvl , type, off , width,  val , name ] ;
#printf("%d %d %d %d %s => %s\n",lvl,off,width,type,val.to_s,name);
	end
	m.close();
      end
   end

   def match_str(off,len, val)
     begin
	return (@buf[off,len] == val); 
     rescue
	return false;
     end
   end
   def match_le(off,len,val)
#printf("match_le %d-%d = %d\n",off,len,val);
	v = 0;
	off += len - 1;
     begin
	while (len != 0)
		v *= 256;
		v += @buf[off];
		len -= 1;
		off -= 1;
	end
	return (v == val);
     rescue
	return false;
     end
   end
   def match_be(off,len,val)
	v = 0;
     begin
	while (len != 0)
		v *= 256;
		v += @buf[off];
		len -= 1;
		off += 1;
	end
	return (v == val);
     rescue
	return false;
     end
   end
   def file(fname)
	m = File::open(fname , 'r');
	return nil if (m == nil);
	m.binmode;
	@buf = m.read(256);
	m.close();
	match = [];
	cur_lvl = 0;
	@magic.each {|ent|  # [ lvl , type,  off , width, val , name ]
		next if (ent[0] !=0 && ent[0] != cur_lvl);
		cur_lvl = 0 if (ent[0] == 0);
		result = false;
		case ent[1] # 0: native 1: be 2: le 3: str
		when 0	then result = match_le(ent[2],ent[3],ent[4])
		when 1	then result = match_be(ent[2],ent[3],ent[4])
		when 2	then result = match_le(ent[2],ent[3],ent[4])
		when 3	then result = match_str(ent[2],ent[3],ent[4])
		end
		next if (!result);
		return ent[5] if (ent[5] != nil);
		cur_lvl += 1;
	}
	return "text/plain";
   end
end

# m = Magic.new()
# print m.file("pack.o"),"\n";
# print m.file("pack.c"),"\n";
# print m.file("icons/suz.gif"),"\n";
# print m.file("magic.rb"),"\n";
# print m.file("html/index.html"),"\n";


@ mime の定義ファイル: NetBSD の /usr/share/misc/magic.mime
# $NetBSD: magic.mime,v 1.2 2000/05/14 23:28:43 soren Exp $
# Magic data for KMimeMagic (originally for file(1) command)
#
# The format is 4-5 columns:
#    Column #1: byte number to begin checking from, ">" indicates continuation
#    Column #2: type of data to match
#    Column #3: contents of data to match
#    Column #4: MIME type of result
#    Column #5: MIME encoding of result (optional)

#------------------------------------------------------------------------------
# Localstuff:  file(1) magic for locally observed files
# Add any locally observed files here.

#------------------------------------------------------------------------------
# end local stuff
#------------------------------------------------------------------------------

#------------------------------------------------------------------------------
# Java

0	short		0xcafe
>2	short		0xbabe		application/java

#------------------------------------------------------------------------------
# audio:  file(1) magic for sound formats
#
# from Jan Nicolai Langfeldt <janl@ifi.uio.no>,
#

# Sun/NeXT audio data
0	string		.snd
>12	belong		1		audio/basic
>12	belong		2		audio/basic
>12	belong		3		audio/basic
>12	belong		4		audio/basic
>12	belong		5		audio/basic
>12	belong		6		audio/basic
>12	belong		7		audio/basic

>12	belong		23		audio/x-adpcm

# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
# that uses little-endian encoding and has a different magic number
# (0x0064732E in little-endian encoding).
0	lelong		0x0064732E	
>12	lelong		1		audio/x-dec-basic
>12	lelong		2		audio/x-dec-basic
>12	lelong		3		audio/x-dec-basic
>12	lelong		4		audio/x-dec-basic
>12	lelong		5		audio/x-dec-basic
>12	lelong		6		audio/x-dec-basic
>12	lelong		7		audio/x-dec-basic
#                                       compressed (G.721 ADPCM)
>12	lelong		23		audio/x-dec-adpcm

# Bytes 0-3 of AIFF, AIFF-C, & 8SVX audio files are "FORM"
#					AIFF audio data
8	string		AIFF		audio/x-aiff	
#					AIFF-C audio data
8	string		AIFC		audio/x-aiff	
#					IFF/8SVX audio data
8	string		8SVX		audio/x-aiff	

# Creative Labs AUDIO stuff
#					Standard MIDI data
0	string	MThd			audio/unknown	
#>9 	byte	>0			(format %d)
#>11	byte	>1			using %d channels
#					Creative Music (CMF) data
0	string	CTMF			audio/unknown	
#					SoundBlaster instrument data
0	string	SBI			audio/unknown	
#					Creative Labs voice data
0	string	Creative\ Voice\ File	audio/unknown	
## is this next line right?  it came this way...
#>19	byte	0x1A
#>23	byte	>0			- version %d
#>22	byte	>0			\b.%d

# [GRR 950115:  is this also Creative Labs?  Guessing that first line
#  should be string instead of unknown-endian long...]
#0	long		0x4e54524b	MultiTrack sound data
#0	string		NTRK		MultiTrack sound data
#>4	long		x		- version %ld

# Microsoft WAVE format (*.wav)
# [GRR 950115:  probably all of the shorts and longs should be leshort/lelong]
#					Microsoft RIFF
0	string		RIFF		audio/unknown	
#					- WAVE format
>8	string		WAVE		audio/x-wav	
#
0	belong		0x2e7261fd	application/x-realaudio

# MPEG Layer 3 sound files
# Modified the 11/20/97 at 15:59:04 by Christophe Prud'homme <christophe.prudhomme@asci.fr>
0       belong          0xfffb          audio/x-mp3

#------------------------------------------------------------------------------
# c-lang:  file(1) magic for C programs or various scripts
#

# XPM icons (Greg Roelofs, newt@uchicago.edu)
# ideally should go into "images", but entries below would tag XPM as C source
0	string		/*\ XPM		image/x-xpm	7bit

# this first will upset you if you're a PL/1 shop... (are there any left?)
# in which case rm it; ascmagic will catch real C programs
#					C or REXX program text
#0	string		/*		text/x-c
#					C++ program text
#0	string		//		text/x-c++

#------------------------------------------------------------------------------
# commands:  file(1) magic for various shells and interpreters
#
#0       string          :\ shell archive or commands for antique kernel text
0       string          #!/bin/sh               application/x-shellscript
0       string          #!\ /bin/sh             application/x-shellscript
0       string          #!/bin/csh              application/x-shellscript
0       string          #!\ /bin/csh            application/x-shellscript
# korn shell magic, sent by George Wu, gwu@clyde.att.com
0       string          #!/bin/ksh              application/x-shellscript
0       string          #!\ /bin/ksh            application/x-shellscript
0       string          #!/bin/tcsh             application/x-shellscript
0       string          #!\ /bin/tcsh           application/x-shellscript
0       string          #!/usr/local/tcsh       application/x-shellscript
0       string          #!\ /usr/local/tcsh     application/x-shellscript
0       string          #!/usr/local/bin/tcsh   application/x-shellscript
0       string          #!\ /usr/local/bin/tcsh application/x-shellscript
# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
0       string          #!/bin/bash     		application/x-shellscript
0       string          #!\ /bin/bash           application/x-shellscript
0       string          #!/usr/local/bin/bash   application/x-shellscript
0       string          #!\ /usr/local/bin/bash application/x-shellscript

#
# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
0       string          #!/usr/local/bin/zsh    application/x-shellscript
0       string          #!\ /usr/local/bin/zsh  application/x-shellscript
0       string          #!/usr/local/bin/ash    application/x-shellscript
0       string          #!\ /usr/local/bin/ash  application/x-shellscript
#0       string          #!/usr/local/bin/ae     Neil Brown's ae
#0       string          #!\ /usr/local/bin/ae   Neil Brown's ae
0       string          #!/bin/nawk             application/x-nawk
0       string          #!\ /bin/nawk           application/x-nawk
0       string          #!/usr/bin/nawk         application/x-nawk
0       string          #!\ /usr/bin/nawk       application/x-nawk
0       string          #!/usr/local/bin/nawk   application/x-nawk
0       string          #!\ /usr/local/bin/nawk application/x-nawk
0       string          #!/bin/gawk             application/x-gawk
0       string          #!\ /bin/gawk           application/x-gawk
0       string          #!/usr/bin/gawk         application/x-gawk
0       string          #!\ /usr/bin/gawk       application/x-gawk
0       string          #!/usr/local/bin/gawk   application/x-gawk
0       string          #!\ /usr/local/bin/gawk application/x-gawk
#
0       string          #!/bin/awk              application/x-awk
0       string          #!\ /bin/awk            application/x-awk
0       string          #!/usr/bin/awk          application/x-awk
0       string          #!\ /usr/bin/awk        application/x-awk
0       string          BEGIN                   application/x-awk

# For Larry Wall's perl language.  The ``eval'' line recognizes an
# outrageously clever hack for USG systems.
#                               Keith Waclena <keith@cerberus.uchicago.edu>
0       string          #!/bin/perl                     application/x-perl
0       string          #!\ /bin/perl                   application/x-perl
0       string          eval\ "exec\ /bin/perl          application/x-perl
0       string          #!/usr/bin/perl                 application/x-perl
0       string          #!\ /usr/bin/perl               application/x-perl
0       string          eval\ "exec\ /usr/bin/perl      application/x-perl
0       string          #!/usr/local/bin/perl           application/x-perl
0       string          #!\ /usr/local/bin/perl         application/x-perl
0       string          eval\ "exec\ /usr/local/bin/perl application/x-perl

#------------------------------------------------------------------------------
# compress:  file(1) magic for pure-compression formats (no archives)
#
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
#
# Formats for various forms of compressed data
# Formats for "compress" proper have been moved into "compress.c",
# because it tries to uncompress it to figure out what's inside.

# standard unix compress
0	string		\037\235	application/x-compress

# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
0       string          \037\213        application/x-gzip

0		string			PK\003\004		application/x-zip

# According to gzip.h, this is the correct byte order for packed data.
0	string		\037\036	application/octet-stream
#
# This magic number is byte-order-independent.
#
0	short		017437		application/octet-stream

# XXX - why *two* entries for "compacted data", one of which is
# byte-order independent, and one of which is byte-order dependent?
#
# compacted data
0	short		0x1fff		application/octet-stream
0	string		\377\037	application/octet-stream
# huf output
0	short		0145405		application/octet-stream

# Squeeze and Crunch...
# These numbers were gleaned from the Unix versions of the programs to
# handle these formats.  Note that I can only uncrunch, not crunch, and
# I didn't have a crunched file handy, so the crunch number is untested.
#				Keith Waclena <keith@cerberus.uchicago.edu>
#0	leshort		0x76FF		squeezed data (CP/M, DOS)
#0	leshort		0x76FE		crunched data (CP/M, DOS)

# Freeze
#0	string		\037\237	Frozen file 2.1
#0	string		\037\236	Frozen file 1.0 (or gzip 0.5)

# lzh?
#0	string		\037\240	LZH compressed data

257	string		ustar\0		application/x-tar	posix
257	string		ustar\040\040\0		application/x-tar	gnu

0	short		070707		application/x-cpio
0	short		0143561		application/x-cpio	swapped

0	string		=<ar>		application/x-archive
0	string		!<arch>		application/x-archive
>8	string		debian		application/x-debian-package

#------------------------------------------------------------------------------
#
# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt@redhat.com)
#
0       beshort         0xedab
>2      beshort         0xeedb          application/x-rpm

0	lelong&0x8080ffff	0x0000081a	application/x-arc	lzw
0	lelong&0x8080ffff	0x0000091a	application/x-arc	squashed
0	lelong&0x8080ffff	0x0000021a	application/x-arc	uncompressed
0	lelong&0x8080ffff	0x0000031a	application/x-arc	packed
0	lelong&0x8080ffff	0x0000041a	application/x-arc	squeezed
0	lelong&0x8080ffff	0x0000061a	application/x-arc	crunched

0	leshort	0xea60	application/octet-stream	x-arj

# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2	string	-lh0-	application/x-lharc	lh0
2	string	-lh1-	application/x-lharc	lh1
2	string	-lz4-	application/x-lharc	lz4
2	string	-lz5-	application/x-lharc	lz5
#	[never seen any but the last; -lh4- reported in comp.compression:]
2	string	-lzs-	application/x-lha	lzs
2	string	-lh\ -	application/x-lha	lh
2	string	-lhd-	application/x-lha	lhd
2	string	-lh2-	application/x-lha	lh2
2	string	-lh3-	application/x-lha	lh3
2	string	-lh4-	application/x-lha	lh4
2	string	-lh5-	application/x-lha	lh5
# Shell archives
10	string	#\ This\ is\ a\ shell\ archive	application/octet-stream	x-shell

#------------------------------------------------------------------------------
# frame:  file(1) magic for FrameMaker files
#
# This stuff came on a FrameMaker demo tape, most of which is
# copyright, but this file is "published" as witness the following:
#
0	string		\<MakerFile	application/x-frame
0	string		\<MIFFile	application/x-frame
0	string		\<MakerDictionary	application/x-frame
0	string		\<MakerScreenFon	application/x-frame
0	string		\<MML		application/x-frame
0	string		\<Book		application/x-frame
0	string		\<Maker		application/x-frame

#------------------------------------------------------------------------------
# html:  file(1) magic for HTML (HyperText Markup Language) docs
#
# from Daniel Quinlan <quinlan@yggdrasil.com>
#
0	string		\<HEAD	text/html
0	string		\<head	text/html
0	string		\<TITLE	text/html
0	string		\<title	text/html
0       string          \<html	text/html
0       string          \<HTML	text/html
0	string		\<!--	text/html
0	string		\<h1	text/html
0	string		\<H1	text/html
0	string		\<!doctype\ HTML	text/html
0	string		\<!DOCTYPE\ HTML	text/html
0	string		\<!doctype\ html	text/html

#------------------------------------------------------------------------------
# images:  file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
#
# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
# additions by janl@ifi.uio.no as well as others. Jan also suggested
# merging several one- and two-line files into here.
#
# XXX - byte order for GIF and TIFF fields?
# [GRR:  TIFF allows both byte orders; GIF is probably little-endian]
#

# [GRR:  what the hell is this doing in here?]
#0	string		xbtoa		btoa'd file

# PBMPLUS
#					PBM file
0	string		P1		image/x-portable-bitmap	7bit
#					PGM file
0	string		P2		image/x-portable-greymap	7bit
#					PPM file
0	string		P3		image/x-portable-pixmap	7bit
#					PBM "rawbits" file
0	string		P4		image/x-portable-bitmap
#					PGM "rawbits" file
0	string		P5		image/x-portable-greymap
#					PPM "rawbits" file
0	string		P6		image/x-portable-pixmap

# NIFF (Navy Interchange File Format, a modification of TIFF)
# [GRR:  this *must* go before TIFF]
0	string		IIN1		image/x-niff

# TIFF and friends
#					TIFF file, big-endian
0	string		MM		image/tiff
#					TIFF file, little-endian
0	string		II		image/tiff

# possible GIF replacements; none yet released!
# (Greg Roelofs, newt@uchicago.edu)
#
# GRR 950115:  this was mine ("Zip GIF"):
#					ZIF image (GIF+deflate alpha)
0	string		GIF94z		image/unknown
#
# GRR 950115:  this is Jeremy Wohl's Free Graphics Format (better):
#					FGF image (GIF+deflate beta)
0	string		FGF95a		image/unknown
#
# GRR 950115:  this is Thomas Boutell's Portable Bitmap Format proposal
# (best; not yet implemented):
#					PBF image (deflate compression)
0	string		PBF		image/unknown

# GIF
0	string		GIF		image/gif

# JPEG images
0	beshort		0xffd8		image/jpeg

# PC bitmaps (OS/2, Windoze BMP files)  (Greg Roelofs, newt@uchicago.edu)
0	string		BM		image/bmp
#>14	byte		12		(OS/2 1.x format)
#>14	byte		64		(OS/2 2.x format)
#>14	byte		40		(Windows 3.x format)
#0	string		IC		icon
#0	string		PI		pointer
#0	string		CI		color icon
#0	string		CP		color pointer
#0	string		BA		bitmap array


#------------------------------------------------------------------------------
# lisp:  file(1) magic for lisp programs
#
# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
0	string	;;			text/plain	8bit
# Emacs 18 - this is always correct, but not very magical.
0	string	\012(			application/x-elc
# Emacs 19
0	string	;ELC\023\000\000\000	application/x-elc

#------------------------------------------------------------------------------
# mail.news:  file(1) magic for mail and news
#
# There are tests to ascmagic.c to cope with mail and news.
0	string		Relay-Version: 	message/rfc822	7bit
0	string		#!\ rnews	message/rfc822	7bit
0	string		N#!\ rnews	message/rfc822	7bit
0	string		Forward\ to 	message/rfc822	7bit
0	string		Pipe\ to 	message/rfc822	7bit
0	string		Return-Path:	message/rfc822	7bit
0	string		Path:		message/news	8bit
0	string		Xref:		message/news	8bit
0	string		From:		message/rfc822	7bit
0	string		Article 	message/news	8bit
#------------------------------------------------------------------------------
# msword: file(1) magic for MS Word files
#
# Contributor claims:
# Reversed-engineered MS Word magic numbers
#

0	string		\376\067\0\043			application/msword
0	string		\320\317\021\340\241\261	application/msword
0	string		\333\245-\0\0\0			application/msword



#------------------------------------------------------------------------------
# printer:  file(1) magic for printer-formatted files
#

# PostScript
0	string		%!		application/postscript
0	string		\004%!		application/postscript

# Acrobat
# (due to clamen@cs.cmu.edu)
0	string		%PDF-		application/pdf

#------------------------------------------------------------------------------
# sc:  file(1) magic for "sc" spreadsheet
#
38	string		Spreadsheet	application/x-sc

#------------------------------------------------------------------------------
# tex:  file(1) magic for TeX files
#
# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
#
# From <conklin@talisman.kaleida.com>

# Although we may know the offset of certain text fields in TeX DVI
# and font files, we can't use them reliably because they are not
# zero terminated. [but we do anyway, christos]
0	string		\367\002	application/x-dvi
#0	string		\367\203	TeX generic font data
#0	string		\367\131	TeX packed font data
#0	string		\367\312	TeX virtual font data
#0	string		This\ is\ TeX,	TeX transcript text	
#0	string		This\ is\ METAFONT,	METAFONT transcript text

# There is no way to detect TeX Font Metric (*.tfm) files without
# breaking them apart and reading the data.  The following patterns
# match most *.tfm files generated by METAFONT or afm2tfm.
2	string		\000\021	application/x-tex-tfm
2	string		\000\022	application/x-tex-tfm
#>34	string		>\0		(%s)

# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
#0	string		\\input\ texinfo	Texinfo source text
#0	string		This\ is\ Info\ file	GNU Info text

# correct TeX magic for Linux (and maybe more)
# from Peter Tobias (tobias@server.et-inf.fho-emden.de)
#
0	leshort		0x02f7		application/x-dvi

# RTF - Rich Text Format
0	string		{\\rtf		text/rtf

#------------------------------------------------------------------------------
# animation:  file(1) magic for animation/movie formats
#
# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8)
#						MPEG file
0	string		\000\000\001\263	video/mpeg
# FLI animation format
0	leshort		0xAF11				video/fli
# FLC animation format
0	leshort		0xAF12				video/flc
# AVI
>8	string		AVI\ 				video/avi
#
# SGI and Apple formats
#
0	string		MOVI				video/sgi
4	string		moov				video/quicktime	moov
4	string		mdat				video/quicktime	mdat
# The contributor claims:
#   I couldn't find a real magic number for these, however, this
#   -appears- to work.  Note that it might catch other files, too,
#   so BE CAREFUL!
#
# Note that title and author appear in the two 20-byte chunks
# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
# 255 (hex FF)! DL format SUCKS BIG ROCKS.
#
#						DL file version 1 , medium format (160x100, 4 images/screen)
0	byte		1			video/unknown
0	byte		2			video/unknown
#
# Databases
#
# GDBM magic numbers
#  Will be maintained as part of the GDBM distribution in the future.
#  <downsj@teeny.org>
0       belong  0x13579ace      application/x-gdbm
0       lelong  0x13579ace      application/x-gdbm
0       string  GDBM            application/x-gdbm
#
0       belong  0x061561        application/x-dbm
#
# Executables
#
0       string          \177ELF 
>4      byte            0
>4      byte            1
>4      byte            2
>5      byte            0
>5      byte            1
>>16    leshort         0
>>16    leshort         1               application/x-object
>>16    leshort         2               application/x-executable
>>16    leshort         3               application/x-sharedlib
>>16    leshort         4               application/x-coredump
#
# DOS
0		string			MZ				application/x-dosexec
#
# KDE
0		string	[KDE\ Desktop\ Entry]	application/x-kdelnk
0		string	\#\ KDE\ Config\ File	application/x-kdelnk
# xmcd database file for kscd
0		string	\#\ xmcd                text/xmcd

#------------------------------------------------------------------------------
# pkgadd:  file(1) magic for SysV R4 PKG Datastreams
#
0       string          #\ PaCkAgE\ DaTaStReAm  application/x-svr4-package




@ 追加しとくと良いもの
# PNG
0       string          \211PNG         image/png

(最終更新 Thu Mar 30 18:43:57 2006)