Main Page | Modules | Namespace List | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals | Related Pages | Examples

mod_mime_magic.c

Go to the documentation of this file.
00001 /* Copyright 1999-2005 The Apache Software Foundation or its licensors, as
00002  * applicable.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *     http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00017 /*
00018  * mod_mime_magic: MIME type lookup via file magic numbers
00019  * Copyright (c) 1996-1997 Cisco Systems, Inc.
00020  *
00021  * This software was submitted by Cisco Systems to the Apache Software Foundation in July
00022  * 1997.  Future revisions and derivatives of this source code must
00023  * acknowledge Cisco Systems as the original contributor of this module.
00024  * All other licensing and usage conditions are those of the Apache Software Foundation.
00025  *
00026  * Some of this code is derived from the free version of the file command
00027  * originally posted to comp.sources.unix.  Copyright info for that program
00028  * is included below as required.
00029  * ---------------------------------------------------------------------------
00030  * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
00031  *
00032  * This software is not subject to any license of the American Telephone and
00033  * Telegraph Company or of the Regents of the University of California.
00034  *
00035  * Permission is granted to anyone to use this software for any purpose on any
00036  * computer system, and to alter it and redistribute it freely, subject to
00037  * the following restrictions:
00038  *
00039  * 1. The author is not responsible for the consequences of use of this
00040  * software, no matter how awful, even if they arise from flaws in it.
00041  *
00042  * 2. The origin of this software must not be misrepresented, either by
00043  * explicit claim or by omission.  Since few users ever read sources, credits
00044  * must appear in the documentation.
00045  *
00046  * 3. Altered versions must be plainly marked as such, and must not be
00047  * misrepresented as being the original software.  Since few users ever read
00048  * sources, credits must appear in the documentation.
00049  *
00050  * 4. This notice may not be removed or altered.
00051  * -------------------------------------------------------------------------
00052  *
00053  * For compliance with Mr Darwin's terms: this has been very significantly
00054  * modified from the free "file" command.
00055  * - all-in-one file for compilation convenience when moving from one
00056  *   version of Apache to the next.
00057  * - Memory allocation is done through the Apache API's apr_pool_t structure.
00058  * - All functions have had necessary Apache API request or server
00059  *   structures passed to them where necessary to call other Apache API
00060  *   routines.  (i.e. usually for logging, files, or memory allocation in
00061  *   itself or a called function.)
00062  * - struct magic has been converted from an array to a single-ended linked
00063  *   list because it only grows one record at a time, it's only accessed
00064  *   sequentially, and the Apache API has no equivalent of realloc().
00065  * - Functions have been changed to get their parameters from the server
00066  *   configuration instead of globals.  (It should be reentrant now but has
00067  *   not been tested in a threaded environment.)
00068  * - Places where it used to print results to stdout now saves them in a
00069  *   list where they're used to set the MIME type in the Apache request
00070  *   record.
00071  * - Command-line flags have been removed since they will never be used here.
00072  *
00073  * Ian Kluft <ikluft@cisco.com>
00074  * Engineering Information Framework
00075  * Central Engineering
00076  * Cisco Systems, Inc.
00077  * San Jose, CA, USA
00078  *
00079  * Initial installation          July/August 1996
00080  * Misc bug fixes                May 1997
00081  * Submission to Apache Software Foundation    July 1997
00082  *
00083  */
00084 
00085 #include "apr.h"
00086 #include "apr_strings.h"
00087 #include "apr_lib.h"
00088 #define APR_WANT_STRFUNC
00089 #include "apr_want.h"
00090 
00091 #if APR_HAVE_UNISTD_H
00092 #include <unistd.h>
00093 #endif
00094 
00095 #include "ap_config.h"
00096 #include "httpd.h"
00097 #include "http_config.h"
00098 #include "http_request.h"
00099 #include "http_core.h"
00100 #include "http_log.h"
00101 #include "http_protocol.h"
00102 #include "util_script.h"
00103 
00104 /* ### this isn't set by configure? does anybody set this? */
00105 #ifdef HAVE_UTIME_H
00106 #include <utime.h>
00107 #endif
00108 
00109 /*
00110  * data structures and related constants
00111  */
00112 
00113 #define MODNAME        "mod_mime_magic"
00114 #define MIME_MAGIC_DEBUG        0
00115 
00116 #define MIME_BINARY_UNKNOWN    "application/octet-stream"
00117 #define MIME_TEXT_UNKNOWN    "text/plain"
00118 
00119 #define MAXMIMESTRING        256
00120 
00121 /* HOWMANY must be at least 4096 to make gzip -dcq work */
00122 #define HOWMANY 4096
00123 /* SMALL_HOWMANY limits how much work we do to figure out text files */
00124 #define SMALL_HOWMANY 1024
00125 #define MAXDESC    50           /* max leng of text description */
00126 #define MAXstring 64            /* max leng of "string" types */
00127 
00128 struct magic {
00129     struct magic *next;         /* link to next entry */
00130     int lineno;                 /* line number from magic file */
00131 
00132     short flag;
00133 #define INDIR    1              /* if '>(...)' appears,  */
00134 #define    UNSIGNED 2           /* comparison is unsigned */
00135     short cont_level;           /* level of ">" */
00136     struct {
00137         char type;              /* byte short long */
00138         long offset;            /* offset from indirection */
00139     } in;
00140     long offset;                /* offset to magic number */
00141     unsigned char reln;         /* relation (0=eq, '>'=gt, etc) */
00142     char type;                  /* int, short, long or string. */
00143     char vallen;                /* length of string value, if any */
00144 #define BYTE    1
00145 #define SHORT    2
00146 #define LONG    4
00147 #define STRING    5
00148 #define DATE    6
00149 #define BESHORT    7
00150 #define BELONG    8
00151 #define BEDATE    9
00152 #define LESHORT    10
00153 #define LELONG    11
00154 #define LEDATE    12
00155     union VALUETYPE {
00156         unsigned char b;
00157         unsigned short h;
00158         unsigned long l;
00159         char s[MAXstring];
00160         unsigned char hs[2];    /* 2 bytes of a fixed-endian "short" */
00161         unsigned char hl[4];    /* 2 bytes of a fixed-endian "long" */
00162     } value;                    /* either number or string */
00163     unsigned long mask;         /* mask before comparison with value */
00164     char nospflag;              /* supress space character */
00165 
00166     /* NOTE: this string is suspected of overrunning - find it! */
00167     char desc[MAXDESC];         /* description */
00168 };
00169 
00170 /*
00171  * data structures for tar file recognition
00172  * --------------------------------------------------------------------------
00173  * Header file for public domain tar (tape archive) program.
00174  *
00175  * @(#)tar.h 1.20 86/10/29    Public Domain. Created 25 August 1985 by John
00176  * Gilmore, ihnp4!hoptoad!gnu.
00177  *
00178  * Header block on tape.
00179  *
00180  * I'm going to use traditional DP naming conventions here. A "block" is a big
00181  * chunk of stuff that we do I/O on. A "record" is a piece of info that we
00182  * care about. Typically many "record"s fit into a "block".
00183  */
00184 #define RECORDSIZE    512
00185 #define NAMSIZ    100
00186 #define TUNMLEN    32
00187 #define TGNMLEN    32
00188 
00189 union record {
00190     char charptr[RECORDSIZE];
00191     struct header {
00192         char name[NAMSIZ];
00193         char mode[8];
00194         char uid[8];
00195         char gid[8];
00196         char size[12];
00197         char mtime[12];
00198         char chksum[8];
00199         char linkflag;
00200         char linkname[NAMSIZ];
00201         char magic[8];
00202         char uname[TUNMLEN];
00203         char gname[TGNMLEN];
00204         char devmajor[8];
00205         char devminor[8];
00206     } header;
00207 };
00208 
00209 /* The magic field is filled with this if uname and gname are valid. */
00210 #define    TMAGIC        "ustar  "      /* 7 chars and a null */
00211 
00212 /*
00213  * file-function prototypes
00214  */
00215 static int ascmagic(request_rec *, unsigned char *, apr_size_t);
00216 static int is_tar(unsigned char *, apr_size_t);
00217 static int softmagic(request_rec *, unsigned char *, apr_size_t);
00218 static int tryit(request_rec *, unsigned char *, apr_size_t, int);
00219 static int zmagic(request_rec *, unsigned char *, apr_size_t);
00220 
00221 static int getvalue(server_rec *, struct magic *, char **);
00222 static int hextoint(int);
00223 static char *getstr(server_rec *, char *, char *, int, int *);
00224 static int parse(server_rec *, apr_pool_t *p, char *, int);
00225 
00226 static int match(request_rec *, unsigned char *, apr_size_t);
00227 static int mget(request_rec *, union VALUETYPE *, unsigned char *,
00228                 struct magic *, apr_size_t);
00229 static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
00230 static void mprint(request_rec *, union VALUETYPE *, struct magic *);
00231 
00232 static int uncompress(request_rec *, int, 
00233                       unsigned char **, apr_size_t);
00234 static long from_oct(int, char *);
00235 static int fsmagic(request_rec *r, const char *fn);
00236 
00237 /*
00238  * includes for ASCII substring recognition formerly "names.h" in file
00239  * command
00240  *
00241  * Original notes: names and types used by ascmagic in file(1). These tokens are
00242  * here because they can appear anywhere in the first HOWMANY bytes, while
00243  * tokens in /etc/magic must appear at fixed offsets into the file. Don't
00244  * make HOWMANY too high unless you have a very fast CPU.
00245  */
00246 
00247 /* these types are used to index the apr_table_t 'types': keep em in sync! */
00248 /* HTML inserted in first because this is a web server module now */
00249 #define L_HTML    0             /* HTML */
00250 #define L_C       1             /* first and foremost on UNIX */
00251 #define L_FORT    2             /* the oldest one */
00252 #define L_MAKE    3             /* Makefiles */
00253 #define L_PLI     4             /* PL/1 */
00254 #define L_MACH    5             /* some kinda assembler */
00255 #define L_ENG     6             /* English */
00256 #define L_PAS     7             /* Pascal */
00257 #define L_MAIL    8             /* Electronic mail */
00258 #define L_NEWS    9             /* Usenet Netnews */
00259 
00260 static char *types[] =
00261 {
00262     "text/html",                /* HTML */
00263     "text/plain",               /* "c program text", */
00264     "text/plain",               /* "fortran program text", */
00265     "text/plain",               /* "make commands text", */
00266     "text/plain",               /* "pl/1 program text", */
00267     "text/plain",               /* "assembler program text", */
00268     "text/plain",               /* "English text", */
00269     "text/plain",               /* "pascal program text", */
00270     "message/rfc822",           /* "mail text", */
00271     "message/news",             /* "news text", */
00272     "application/binary",       /* "can't happen error on names.h/types", */
00273     0
00274 };
00275 
00276 static struct names {
00277     char *name;
00278     short type;
00279 } names[] = {
00280 
00281     /* These must be sorted by eye for optimal hit rate */
00282     /* Add to this list only after substantial meditation */
00283     {
00284         "<html>", L_HTML
00285     },
00286     {
00287         "<HTML>", L_HTML
00288     },
00289     {
00290         "<head>", L_HTML
00291     },
00292     {
00293         "<HEAD>", L_HTML
00294     },
00295     {
00296         "<title>", L_HTML
00297     },
00298     {
00299         "<TITLE>", L_HTML
00300     },
00301     {
00302         "<h1>", L_HTML
00303     },
00304     {
00305         "<H1>", L_HTML
00306     },
00307     {
00308         "<!--", L_HTML
00309     },
00310     {
00311         "<!DOCTYPE HTML", L_HTML
00312     },
00313     {
00314         "/*", L_C
00315     },                          /* must precede "The", "the", etc. */
00316     {
00317         "#include", L_C
00318     },
00319     {
00320         "char", L_C
00321     },
00322     {
00323         "The", L_ENG
00324     },
00325     {
00326         "the", L_ENG
00327     },
00328     {
00329         "double", L_C
00330     },
00331     {
00332         "extern", L_C
00333     },
00334     {
00335         "float", L_C
00336     },
00337     {
00338         "real", L_C
00339     },
00340     {
00341         "struct", L_C
00342     },
00343     {
00344         "union", L_C
00345     },
00346     {
00347         "CFLAGS", L_MAKE
00348     },
00349     {
00350         "LDFLAGS", L_MAKE
00351     },
00352     {
00353         "all:", L_MAKE
00354     },
00355     {
00356         ".PRECIOUS", L_MAKE
00357     },
00358     /*
00359      * Too many files of text have these words in them.  Find another way to
00360      * recognize Fortrash.
00361      */
00362 #ifdef    NOTDEF
00363     {
00364         "subroutine", L_FORT
00365     },
00366     {
00367         "function", L_FORT
00368     },
00369     {
00370         "block", L_FORT
00371     },
00372     {
00373         "common", L_FORT
00374     },
00375     {
00376         "dimension", L_FORT
00377     },
00378     {
00379         "integer", L_FORT
00380     },
00381     {
00382         "data", L_FORT
00383     },
00384 #endif /* NOTDEF */
00385     {
00386         ".ascii", L_MACH
00387     },
00388     {
00389         ".asciiz", L_MACH
00390     },
00391     {
00392         ".byte", L_MACH
00393     },
00394     {
00395         ".even", L_MACH
00396     },
00397     {
00398         ".globl", L_MACH
00399     },
00400     {
00401         "clr", L_MACH
00402     },
00403     {
00404         "(input,", L_PAS
00405     },
00406     {
00407         "dcl", L_PLI
00408     },
00409     {
00410         "Received:", L_MAIL
00411     },
00412     {
00413         ">From", L_MAIL
00414     },
00415     {
00416         "Return-Path:", L_MAIL
00417     },
00418     {
00419         "Cc:", L_MAIL
00420     },
00421     {
00422         "Newsgroups:", L_NEWS
00423     },
00424     {
00425         "Path:", L_NEWS
00426     },
00427     {
00428         "Organization:", L_NEWS
00429     },
00430     {
00431         NULL, 0
00432     }
00433 };
00434 
00435 #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
00436 
00437 /*
00438  * Result String List (RSL)
00439  *
00440  * The file(1) command prints its output.  Instead, we store the various
00441  * "printed" strings in a list (allocating memory as we go) and concatenate
00442  * them at the end when we finally know how much space they'll need.
00443  */
00444 
00445 typedef struct magic_rsl_s {
00446     char *str;                  /* string, possibly a fragment */
00447     struct magic_rsl_s *next;   /* pointer to next fragment */
00448 } magic_rsl;
00449 
00450 /*
00451  * Apache module configuration structures
00452  */
00453 
00454 /* per-server info */
00455 typedef struct {
00456     const char *magicfile;              /* where magic be found */
00457     struct magic *magic;        /* head of magic config list */
00458     struct magic *last;
00459 } magic_server_config_rec;
00460 
00461 /* per-request info */
00462 typedef struct {
00463     magic_rsl *head;            /* result string list */
00464     magic_rsl *tail;
00465     unsigned suf_recursion;     /* recursion depth in suffix check */
00466 } magic_req_rec;
00467 
00468 /*
00469  * configuration functions - called by Apache API routines
00470  */
00471 
00472 module AP_MODULE_DECLARE_DATA mime_magic_module;
00473 
00474 static void *create_magic_server_config(apr_pool_t *p, server_rec *d)
00475 {
00476     /* allocate the config - use pcalloc because it needs to be zeroed */
00477     return apr_pcalloc(p, sizeof(magic_server_config_rec));
00478 }
00479 
00480 static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv)
00481 {
00482     magic_server_config_rec *base = (magic_server_config_rec *) basev;
00483     magic_server_config_rec *add = (magic_server_config_rec *) addv;
00484     magic_server_config_rec *new = (magic_server_config_rec *)
00485                             apr_palloc(p, sizeof(magic_server_config_rec));
00486 
00487     new->magicfile = add->magicfile ? add->magicfile : base->magicfile;
00488     new->magic = NULL;
00489     new->last = NULL;
00490     return new;
00491 }
00492 
00493 static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg)
00494 {
00495     magic_server_config_rec *conf = (magic_server_config_rec *)
00496     ap_get_module_config(cmd->server->module_config,
00497                       &mime_magic_module);
00498 
00499     if (!conf) {
00500         return MODNAME ": server structure not allocated";
00501     }
00502     conf->magicfile = arg;
00503     return NULL;
00504 }
00505 
00506 /*
00507  * configuration file commands - exported to Apache API
00508  */
00509 
00510 static const command_rec mime_magic_cmds[] =
00511 {
00512     AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF,
00513      "Path to MIME Magic file (in file(1) format)"),
00514     {NULL}
00515 };
00516 
00517 /*
00518  * RSL (result string list) processing routines
00519  *
00520  * These collect strings that would have been printed in fragments by file(1)
00521  * into a list of magic_rsl structures with the strings. When complete,
00522  * they're concatenated together to become the MIME content and encoding
00523  * types.
00524  *
00525  * return value conventions for these functions: functions which return int:
00526  * failure = -1, other = result functions which return pointers: failure = 0,
00527  * other = result
00528  */
00529 
00530 /* allocate a per-request structure and put it in the request record */
00531 static magic_req_rec *magic_set_config(request_rec *r)
00532 {
00533     magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool,
00534                                                       sizeof(magic_req_rec));
00535 
00536     req_dat->head = req_dat->tail = (magic_rsl *) NULL;
00537     ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
00538     return req_dat;
00539 }
00540 
00541 /* add a string to the result string list for this request */
00542 /* it is the responsibility of the caller to allocate "str" */
00543 static int magic_rsl_add(request_rec *r, char *str)
00544 {
00545     magic_req_rec *req_dat = (magic_req_rec *)
00546                     ap_get_module_config(r->request_config, &mime_magic_module);
00547     magic_rsl *rsl;
00548 
00549     /* make sure we have a list to put it in */
00550     if (!req_dat) {
00551         ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r,
00552                     MODNAME ": request config should not be NULL");
00553         if (!(req_dat = magic_set_config(r))) {
00554             /* failure */
00555             return -1;
00556         }
00557     }
00558 
00559     /* allocate the list entry */
00560     rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl));
00561 
00562     /* fill it */
00563     rsl->str = str;
00564     rsl->next = (magic_rsl *) NULL;
00565 
00566     /* append to the list */
00567     if (req_dat->head && req_dat->tail) {
00568         req_dat->tail->next = rsl;
00569         req_dat->tail = rsl;
00570     }
00571     else {
00572         req_dat->head = req_dat->tail = rsl;
00573     }
00574 
00575     /* success */
00576     return 0;
00577 }
00578 
00579 /* RSL hook for puts-type functions */
00580 static int magic_rsl_puts(request_rec *r, char *str)
00581 {
00582     return magic_rsl_add(r, str);
00583 }
00584 
00585 /* RSL hook for printf-type functions */
00586 static int magic_rsl_printf(request_rec *r, char *str,...)
00587 {
00588     va_list ap;
00589 
00590     char buf[MAXMIMESTRING];
00591 
00592     /* assemble the string into the buffer */
00593     va_start(ap, str);
00594     apr_vsnprintf(buf, sizeof(buf), str, ap);
00595     va_end(ap);
00596 
00597     /* add the buffer to the list */
00598     return magic_rsl_add(r, apr_pstrdup(r->pool, buf));
00599 }
00600 
00601 /* RSL hook for putchar-type functions */
00602 static int magic_rsl_putchar(request_rec *r, char c)
00603 {
00604     char str[2];
00605 
00606     /* high overhead for 1 char - just hope they don't do this much */
00607     str[0] = c;
00608     str[1] = '\0';
00609     return magic_rsl_add(r, str);
00610 }
00611 
00612 /* allocate and copy a contiguous string from a result string list */
00613 static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len)
00614 {
00615     char *result;               /* return value */
00616     int cur_frag,               /* current fragment number/counter */
00617         cur_pos,                /* current position within fragment */
00618         res_pos;                /* position in result string */
00619     magic_rsl *frag;            /* list-traversal pointer */
00620     magic_req_rec *req_dat = (magic_req_rec *)
00621                     ap_get_module_config(r->request_config, &mime_magic_module);
00622 
00623     /* allocate the result string */
00624     result = (char *) apr_palloc(r->pool, len + 1);
00625 
00626     /* loop through and collect the string */
00627     res_pos = 0;
00628     for (frag = req_dat->head, cur_frag = 0;
00629          frag->next;
00630          frag = frag->next, cur_frag++) {
00631         /* loop to the first fragment */
00632         if (cur_frag < start_frag)
00633             continue;
00634 
00635         /* loop through and collect chars */
00636         for (cur_pos = (cur_frag == start_frag) ? start_pos : 0;
00637              frag->str[cur_pos];
00638              cur_pos++) {
00639             if (cur_frag >= start_frag
00640                 && cur_pos >= start_pos
00641                 && res_pos <= len) {
00642                 result[res_pos++] = frag->str[cur_pos];
00643                 if (res_pos > len) {
00644                     break;
00645                 }
00646             }
00647         }
00648     }
00649 
00650     /* clean up and return */
00651     result[res_pos] = 0;
00652 #if MIME_MAGIC_DEBUG
00653     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
00654              MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result);
00655 #endif
00656     return result;
00657 }
00658 
00659 /* states for the state-machine algorithm in magic_rsl_to_request() */
00660 typedef enum {
00661     rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
00662 } rsl_states;
00663 
00664 /* process the RSL and set the MIME info in the request record */
00665 static int magic_rsl_to_request(request_rec *r)
00666 {
00667     int cur_frag,               /* current fragment number/counter */
00668         cur_pos,                /* current position within fragment */
00669         type_frag,              /* content type starting point: fragment */
00670         type_pos,               /* content type starting point: position */
00671         type_len,               /* content type length */
00672         encoding_frag,          /* content encoding starting point: fragment */
00673         encoding_pos,           /* content encoding starting point: position */
00674         encoding_len;           /* content encoding length */
00675 
00676     magic_rsl *frag;            /* list-traversal pointer */
00677     rsl_states state;
00678 
00679     magic_req_rec *req_dat = (magic_req_rec *)
00680                     ap_get_module_config(r->request_config, &mime_magic_module);
00681 
00682     /* check if we have a result */
00683     if (!req_dat || !req_dat->head) {
00684         /* empty - no match, we defer to other Apache modules */
00685         return DECLINED;
00686     }
00687 
00688     /* start searching for the type and encoding */
00689     state = rsl_leading_space;
00690     type_frag = type_pos = type_len = 0;
00691     encoding_frag = encoding_pos = encoding_len = 0;
00692     for (frag = req_dat->head, cur_frag = 0;
00693          frag && frag->next;
00694          frag = frag->next, cur_frag++) {
00695         /* loop through the characters in the fragment */
00696         for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) {
00697             if (apr_isspace(frag->str[cur_pos])) {
00698                 /* process whitespace actions for each state */
00699                 if (state == rsl_leading_space) {
00700                     /* eat whitespace in this state */
00701                     continue;
00702                 }
00703                 else if (state == rsl_type) {
00704                     /* whitespace: type has no slash! */
00705                     return DECLINED;
00706                 }
00707                 else if (state == rsl_subtype) {
00708                     /* whitespace: end of MIME type */
00709                     state++;
00710                     continue;
00711                 }
00712                 else if (state == rsl_separator) {
00713                     /* eat whitespace in this state */
00714                     continue;
00715                 }
00716                 else if (state == rsl_encoding) {
00717                     /* whitespace: end of MIME encoding */
00718                     /* we're done */
00719                     frag = req_dat->tail;
00720                     break;
00721                 }
00722                 else {
00723                     /* should not be possible */
00724                     /* abandon malfunctioning module */
00725                     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00726                                 MODNAME ": bad state %d (ws)", state);
00727                     return DECLINED;
00728                 }
00729                 /* NOTREACHED */
00730             }
00731             else if (state == rsl_type &&
00732                      frag->str[cur_pos] == '/') {
00733                 /* copy the char and go to rsl_subtype state */
00734                 type_len++;
00735                 state++;
00736             }
00737             else {
00738                 /* process non-space actions for each state */
00739                 if (state == rsl_leading_space) {
00740                     /* non-space: begin MIME type */
00741                     state++;
00742                     type_frag = cur_frag;
00743                     type_pos = cur_pos;
00744                     type_len = 1;
00745                     continue;
00746                 }
00747                 else if (state == rsl_type ||
00748                          state == rsl_subtype) {
00749                     /* non-space: adds to type */
00750                     type_len++;
00751                     continue;
00752                 }
00753                 else if (state == rsl_separator) {
00754                     /* non-space: begin MIME encoding */
00755                     state++;
00756                     encoding_frag = cur_frag;
00757                     encoding_pos = cur_pos;
00758                     encoding_len = 1;
00759                     continue;
00760                 }
00761                 else if (state == rsl_encoding) {
00762                     /* non-space: adds to encoding */
00763                     encoding_len++;
00764                     continue;
00765                 }
00766                 else {
00767                     /* should not be possible */
00768                     /* abandon malfunctioning module */
00769                     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00770                                 MODNAME ": bad state %d (ns)", state);
00771                     return DECLINED;
00772                 }
00773                 /* NOTREACHED */
00774             }
00775             /* NOTREACHED */
00776         }
00777     }
00778 
00779     /* if we ended prior to state rsl_subtype, we had incomplete info */
00780     if (state != rsl_subtype && state != rsl_separator &&
00781         state != rsl_encoding) {
00782         /* defer to other modules */
00783         return DECLINED;
00784     }
00785 
00786     /* save the info in the request record */
00787     if (state == rsl_subtype || state == rsl_encoding ||
00788         state == rsl_encoding) {
00789         char *tmp;
00790         tmp = rsl_strdup(r, type_frag, type_pos, type_len);
00791         /* XXX: this could be done at config time I'm sure... but I'm
00792          * confused by all this magic_rsl stuff. -djg */
00793         ap_content_type_tolower(tmp);
00794         ap_set_content_type(r, tmp);
00795     }
00796     if (state == rsl_encoding) {
00797         char *tmp;
00798         tmp = rsl_strdup(r, encoding_frag,
00799                                          encoding_pos, encoding_len);
00800         /* XXX: this could be done at config time I'm sure... but I'm
00801          * confused by all this magic_rsl stuff. -djg */
00802         ap_str_tolower(tmp);
00803         r->content_encoding = tmp;
00804     }
00805 
00806     /* detect memory allocation or other errors */
00807     if (!r->content_type ||
00808         (state == rsl_encoding && !r->content_encoding)) {
00809         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00810                       MODNAME ": unexpected state %d; could be caused by bad "
00811                       "data in magic file",
00812                       state);
00813         return HTTP_INTERNAL_SERVER_ERROR;
00814     }
00815 
00816     /* success! */
00817     return OK;
00818 }
00819 
00820 /*
00821  * magic_process - process input file r        Apache API request record
00822  * (formerly called "process" in file command, prefix added for clarity) Opens
00823  * the file and reads a fixed-size buffer to begin processing the contents.
00824  */
00825 static int magic_process(request_rec *r)
00826 {
00827     apr_file_t *fd = NULL;
00828     unsigned char buf[HOWMANY + 1];     /* one extra for terminating '\0' */
00829     apr_size_t nbytes = 0;              /* number of bytes read from a datafile */
00830     int result;
00831 
00832     /*
00833      * first try judging the file based on its filesystem status
00834      */
00835     switch ((result = fsmagic(r, r->filename))) {
00836     case DONE:
00837         magic_rsl_putchar(r, '\n');
00838         return OK;
00839     case OK:
00840         break;
00841     default:
00842         /* fatal error, bail out */
00843         return result;
00844     }
00845 
00846     if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
00847         /* We can't open it, but we were able to stat it. */
00848         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00849                     MODNAME ": can't read `%s'", r->filename);
00850         /* let some other handler decide what the problem is */
00851         return DECLINED;
00852     }
00853 
00854     /*
00855      * try looking at the first HOWMANY bytes
00856      */
00857     nbytes = sizeof(buf) - 1;
00858     if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) {
00859         ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r,
00860                     MODNAME ": read failed: %s", r->filename);
00861         return HTTP_INTERNAL_SERVER_ERROR;
00862     }
00863 
00864     if (nbytes == 0) {
00865         return DECLINED;
00866     }
00867     else {
00868         buf[nbytes++] = '\0';   /* null-terminate it */
00869         result = tryit(r, buf, nbytes, 1);
00870         if (result != OK) {
00871             return result;
00872         }
00873     }
00874 
00875     (void) apr_file_close(fd);
00876     (void) magic_rsl_putchar(r, '\n');
00877 
00878     return OK;
00879 }
00880 
00881 
00882 static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb,
00883                  int checkzmagic)
00884 {
00885     /*
00886      * Try compression stuff
00887      */
00888         if (checkzmagic == 1) {  
00889                         if (zmagic(r, buf, nb) == 1)
00890                         return OK;
00891         }
00892 
00893     /*
00894      * try tests in /etc/magic (or surrogate magic file)
00895      */
00896     if (softmagic(r, buf, nb) == 1)
00897         return OK;
00898 
00899     /*
00900      * try known keywords, check for ascii-ness too.
00901      */
00902     if (ascmagic(r, buf, nb) == 1)
00903         return OK;
00904 
00905     /*
00906      * abandon hope, all ye who remain here
00907      */
00908     return DECLINED;
00909 }
00910 
00911 #define    EATAB {while (apr_isspace(*l))  ++l;}
00912 
00913 /*
00914  * apprentice - load configuration from the magic file r
00915  *  API request record
00916  */
00917 static int apprentice(server_rec *s, apr_pool_t *p)
00918 {
00919     apr_file_t *f = NULL;
00920     apr_status_t result;
00921     char line[BUFSIZ + 1];
00922     int errs = 0;
00923     int lineno;
00924 #if MIME_MAGIC_DEBUG
00925     int rule = 0;
00926     struct magic *m, *prevm;
00927 #endif
00928     magic_server_config_rec *conf = (magic_server_config_rec *)
00929                     ap_get_module_config(s->module_config, &mime_magic_module);
00930     const char *fname = ap_server_root_relative(p, conf->magicfile);
00931 
00932     if (!fname) {
00933         ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s,
00934                      MODNAME ": Invalid magic file path %s", conf->magicfile);
00935         return -1;
00936     }        
00937     if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED, 
00938                                 APR_OS_DEFAULT, p) != APR_SUCCESS)) {
00939         ap_log_error(APLOG_MARK, APLOG_ERR, result, s,
00940                      MODNAME ": can't read magic file %s", fname);
00941         return -1;
00942     }
00943 
00944     /* set up the magic list (empty) */
00945     conf->magic = conf->last = NULL;
00946 
00947     /* parse it */
00948     for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) {
00949         int ws_offset;
00950 
00951         /* delete newline */
00952         if (line[0]) {
00953             line[strlen(line) - 1] = '\0';
00954         }
00955 
00956         /* skip leading whitespace */
00957         ws_offset = 0;
00958         while (line[ws_offset] && apr_isspace(line[ws_offset])) {
00959             ws_offset++;
00960         }
00961 
00962         /* skip blank lines */
00963         if (line[ws_offset] == 0) {
00964             continue;
00965         }
00966 
00967         /* comment, do not parse */
00968         if (line[ws_offset] == '#')
00969             continue;
00970 
00971 #if MIME_MAGIC_DEBUG
00972         /* if we get here, we're going to use it so count it */
00973         rule++;
00974 #endif
00975 
00976         /* parse it */
00977         if (parse(s, p, line + ws_offset, lineno) != 0)
00978             ++errs;
00979     }
00980 
00981     (void) apr_file_close(f);
00982 
00983 #if MIME_MAGIC_DEBUG
00984     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s,
00985                 MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s",
00986                 conf,
00987                 conf->magicfile ? conf->magicfile : "NULL",
00988                 conf->magic ? "set" : "NULL",
00989                 (conf->magic && conf->magic->next) ? "set" : "NULL",
00990                 conf->last ? "set" : "NULL");
00991     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s,
00992                 MODNAME ": apprentice read %d lines, %d rules, %d errors",
00993                 lineno, rule, errs);
00994 #endif
00995 
00996 #if MIME_MAGIC_DEBUG
00997     prevm = 0;
00998     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s,
00999                 MODNAME ": apprentice test");
01000     for (m = conf->magic; m; m = m->next) {
01001         if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
01002             apr_isprint((((unsigned long) m) >> 16) & 255) &&
01003             apr_isprint((((unsigned long) m) >> 8) & 255) &&
01004             apr_isprint(((unsigned long) m) & 255)) {
01005             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s,
01006                         MODNAME ": apprentice: POINTER CLOBBERED! "
01007                         "m=\"%c%c%c%c\" line=%d",
01008                         (((unsigned long) m) >> 24) & 255,
01009                         (((unsigned long) m) >> 16) & 255,
01010                         (((unsigned long) m) >> 8) & 255,
01011                         ((unsigned long) m) & 255,
01012                         prevm ? prevm->lineno : -1);
01013             break;
01014         }
01015         prevm = m;
01016     }
01017 #endif
01018 
01019     return (errs ? -1 : 0);
01020 }
01021 
01022 /*
01023  * extend the sign bit if the comparison is to be signed
01024  */
01025 static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v)
01026 {
01027     if (!(m->flag & UNSIGNED))
01028         switch (m->type) {
01029             /*
01030              * Do not remove the casts below.  They are vital. When later
01031              * compared with the data, the sign extension must have happened.
01032              */
01033         case BYTE:
01034             v = (char) v;
01035             break;
01036         case SHORT:
01037         case BESHORT:
01038         case LESHORT:
01039             v = (short) v;
01040             break;
01041         case DATE:
01042         case BEDATE:
01043         case LEDATE:
01044         case LONG:
01045         case BELONG:
01046         case LELONG:
01047             v = (long) v;
01048             break;
01049         case STRING:
01050             break;
01051         default:
01052             ap_log_error(APLOG_MARK, APLOG_ERR, 0, s,
01053                         MODNAME ": can't happen: m->type=%d", m->type);
01054             return -1;
01055         }
01056     return v;
01057 }
01058 
01059 /*
01060  * parse one line from magic file, put into magic[index++] if valid
01061  */
01062 static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno)
01063 {
01064     struct magic *m;
01065     char *t, *s;
01066     magic_server_config_rec *conf = (magic_server_config_rec *)
01067                     ap_get_module_config(serv->module_config, &mime_magic_module);
01068 
01069     /* allocate magic structure entry */
01070     m = (struct magic *) apr_pcalloc(p, sizeof(struct magic));
01071 
01072     /* append to linked list */
01073     m->next = NULL;
01074     if (!conf->magic || !conf->last) {
01075         conf->magic = conf->last = m;
01076     }
01077     else {
01078         conf->last->next = m;
01079         conf->last = m;
01080     }
01081 
01082     /* set values in magic structure */
01083     m->flag = 0;
01084     m->cont_level = 0;
01085     m->lineno = lineno;
01086 
01087     while (*l == '>') {
01088         ++l;                    /* step over */
01089         m->cont_level++;
01090     }
01091 
01092     if (m->cont_level != 0 && *l == '(') {
01093         ++l;                    /* step over */
01094         m->flag |= INDIR;
01095     }
01096 
01097     /* get offset, then skip over it */
01098     m->offset = (int) strtol(l, &t, 0);
01099     if (l == t) {
01100         ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
01101                     MODNAME ": offset %s invalid", l);
01102     }
01103     l = t;
01104 
01105     if (m->flag & INDIR) {
01106         m->in.type = LONG;
01107         m->in.offset = 0;
01108         /*
01109          * read [.lbs][+-]nnnnn)
01110          */
01111         if (*l == '.') {
01112             switch (*++l) {
01113             case 'l':
01114                 m->in.type = LONG;
01115                 break;
01116             case 's':
01117                 m->in.type = SHORT;
01118                 break;
01119             case 'b':
01120                 m->in.type = BYTE;
01121                 break;
01122             default:
01123                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
01124                         MODNAME ": indirect offset type %c invalid", *l);
01125                 break;
01126             }
01127             l++;
01128         }
01129         s = l;
01130         if (*l == '+' || *l == '-')
01131             l++;
01132         if (apr_isdigit((unsigned char) *l)) {
01133             m->in.offset = strtol(l, &t, 0);
01134             if (*s == '-')
01135                 m->in.offset = -m->in.offset;
01136         }
01137         else
01138             t = l;
01139         if (*t++ != ')') {
01140             ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
01141                         MODNAME ": missing ')' in indirect offset");
01142         }
01143         l = t;
01144     }
01145 
01146 
01147     while (apr_isdigit((unsigned char) *l))
01148         ++l;
01149     EATAB;
01150 
01151 #define NBYTE           4
01152 #define NSHORT          5
01153 #define NLONG           4
01154 #define NSTRING         6
01155 #define NDATE           4
01156 #define NBESHORT        7
01157 #define NBELONG         6
01158 #define NBEDATE         6
01159 #define NLESHORT        7
01160 #define NLELONG         6
01161 #define NLEDATE         6
01162 
01163     if (*l == 'u') {
01164         ++l;
01165         m->flag |= UNSIGNED;
01166     }
01167 
01168     /* get type, skip it */
01169     if (strncmp(l, "byte", NBYTE) == 0) {
01170         m->type = BYTE;
01171         l += NBYTE;
01172     }
01173     else if (strncmp(l, "short", NSHORT) == 0) {
01174         m->type = SHORT;
01175         l += NSHORT;
01176     }
01177     else if (strncmp(l, "long", NLONG) == 0) {
01178         m->type = LONG;
01179         l += NLONG;
01180     }
01181     else if (strncmp(l, "string", NSTRING) == 0) {
01182         m->type = STRING;
01183         l += NSTRING;
01184     }
01185     else if (strncmp(l, "date", NDATE) == 0) {
01186         m->type = DATE;
01187         l += NDATE;
01188     }
01189     else if (strncmp(l, "beshort", NBESHORT) == 0) {
01190         m->type = BESHORT;
01191         l += NBESHORT;
01192     }
01193     else if (strncmp(l, "belong", NBELONG) == 0) {
01194         m->type = BELONG;
01195         l += NBELONG;
01196     }
01197     else if (strncmp(l, "bedate", NBEDATE) == 0) {
01198         m->type = BEDATE;
01199         l += NBEDATE;
01200     }
01201     else if (strncmp(l, "leshort", NLESHORT) == 0) {
01202         m->type = LESHORT;
01203         l += NLESHORT;
01204     }
01205     else if (strncmp(l, "lelong", NLELONG) == 0) {
01206         m->type = LELONG;
01207         l += NLELONG;
01208     }
01209     else if (strncmp(l, "ledate", NLEDATE) == 0) {
01210         m->type = LEDATE;
01211         l += NLEDATE;
01212     }
01213     else {
01214         ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
01215                     MODNAME ": type %s invalid", l);
01216         return -1;
01217     }
01218     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
01219     if (*l == '&') {
01220         ++l;
01221         m->mask = signextend(serv, m, strtol(l, &l, 0));
01222     }
01223     else
01224         m->mask = ~0L;
01225     EATAB;
01226 
01227     switch (*l) {
01228     case '>':
01229     case '<':
01230         /* Old-style anding: "0 byte &0x80 dynamically linked" */
01231     case '&':
01232     case '^':
01233     case '=':
01234         m->reln = *l;
01235         ++l;
01236         break;
01237     case '!':
01238         if (m->type != STRING) {
01239             m->reln = *l;
01240             ++l;
01241             break;
01242         }
01243         /* FALL THROUGH */
01244     default:
01245         if (*l == 'x' && apr_isspace(l[1])) {
01246             m->reln = *l;
01247             ++l;
01248             goto GetDesc;       /* Bill The Cat */
01249         }
01250         m->reln = '=';
01251         break;
01252     }
01253     EATAB;
01254 
01255     if (getvalue(serv, m, &l))
01256         return -1;
01257     /*
01258      * now get last part - the description
01259      */
01260   GetDesc:
01261     EATAB;
01262     if (l[0] == '\b') {
01263         ++l;
01264         m->nospflag = 1;
01265     }
01266     else if ((l[0] == '\\') && (l[1] == 'b')) {
01267         ++l;
01268         ++l;
01269         m->nospflag = 1;
01270     }
01271     else
01272         m->nospflag = 0;
01273     strncpy(m->desc, l, sizeof(m->desc) - 1);
01274     m->desc[sizeof(m->desc) - 1] = '\0';
01275 
01276 #if MIME_MAGIC_DEBUG
01277     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv,
01278                 MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s",
01279                 lineno, m, m->next, m->cont_level, m->desc);
01280 #endif /* MIME_MAGIC_DEBUG */
01281 
01282     return 0;
01283 }
01284 
01285 /*
01286  * Read a numeric value from a pointer, into the value union of a magic
01287  * pointer, according to the magic type.  Update the string pointer to point
01288  * just after the number read.  Return 0 for success, non-zero for failure.
01289  */
01290 static int getvalue(server_rec *s, struct magic *m, char **p)
01291 {
01292     int slen;
01293 
01294     if (m->type == STRING) {
01295         *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen);
01296         m->vallen = slen;
01297     }
01298     else if (m->reln != 'x')
01299         m->value.l = signextend(s, m, strtol(*p, p, 0));
01300     return 0;
01301 }
01302 
01303 /*
01304  * Convert a string containing C character escapes.  Stop at an unescaped
01305  * space or tab. Copy the converted version to "p", returning its length in
01306  * *slen. Return updated scan pointer as function result.
01307  */
01308 static char *getstr(server_rec *serv, register char *s, register char *p,
01309                     int plen, int *slen)
01310 {
01311     char *origs = s, *origp = p;
01312     char *pmax = p + plen - 1;
01313     register int c;
01314     register int val;
01315 
01316     while ((c = *s++) != '\0') {
01317         if (apr_isspace(c))
01318             break;
01319         if (p >= pmax) {
01320             ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
01321                         MODNAME<