Main Page | Modules | Namespace List | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals | Related Pages | Examples

mod_cern_meta.c

Go to the documentation of this file.
00001 /* Copyright 1999-2005 The Apache Software Foundation or its licensors, as
00002  * applicable.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *     http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00017 /*
00018  * mod_cern_meta.c
00019  * version 0.1.0
00020  * status beta
00021  * 
00022  * Andrew Wilson <Andrew.Wilson@cm.cf.ac.uk> 25.Jan.96
00023  *
00024  * *** IMPORTANT ***
00025  * This version of mod_cern_meta.c controls Meta File behaviour on a
00026  * per-directory basis.  Previous versions of the module defined behaviour
00027  * on a per-server basis.  The upshot is that you'll need to revisit your 
00028  * configuration files in order to make use of the new module.
00029  * ***
00030  *
00031  * Emulate the CERN HTTPD Meta file semantics.  Meta files are HTTP
00032  * headers that can be output in addition to the normal range of
00033  * headers for each file accessed.  They appear rather like the Apache
00034  * .asis files, and are able to provide a crude way of influencing
00035  * the Expires: header, as well as providing other curiosities.
00036  * There are many ways to manage meta information, this one was
00037  * chosen because there is already a large number of CERN users
00038  * who can exploit this module.  It should be noted that there are probably
00039  * more sensitive ways of managing the Expires: header specifically.
00040  *
00041  * The module obeys the following directives, which can appear 
00042  * in the server's .conf files and in .htaccess files.
00043  *
00044  *  MetaFiles <on|off> 
00045  *
00046  *    turns on|off meta file processing for any directory.  
00047  *    Default value is off
00048  *
00049  *        # turn on MetaFiles in this directory
00050  *        MetaFiles on
00051  *
00052  *  MetaDir <directory name>
00053  *      
00054  *    specifies the name of the directory in which Apache can find
00055  *    meta information files.  The directory is usually a 'hidden'
00056  *    subdirectory of the directory that contains the file being
00057  *    accessed.  eg:
00058  *
00059  *        # .meta files are in the *same* directory as the 
00060  *        # file being accessed
00061  *        MetaDir .
00062  *
00063  *    the default is to look in a '.web' subdirectory. This is the
00064  *    same as for CERN 3.+ webservers and behaviour is the same as 
00065  *    for the directive:
00066  *
00067  *        MetaDir .web
00068  *
00069  *  MetaSuffix <meta file suffix>
00070  *
00071  *    specifies the file name suffix for the file containing the
00072  *    meta information.  eg:
00073  *
00074  *       # our meta files are suffixed with '.cern_meta'
00075  *       MetaSuffix .cern_meta
00076  *
00077  *    the default is to look for files with the suffix '.meta'.  This
00078  *    behaviour is the same as for the directive:
00079  *
00080  *       MetaSuffix .meta
00081  *
00082  * When accessing the file
00083  *
00084  *   DOCUMENT_ROOT/somedir/index.html
00085  *
00086  * this module will look for the file
00087  *
00088  *   DOCUMENT_ROOT/somedir/.web/index.html.meta
00089  *
00090  * and will use its contents to generate additional MIME header 
00091  * information.
00092  *
00093  * For more information on the CERN Meta file semantics see:
00094  *
00095  *   http://www.w3.org/hypertext/WWW/Daemon/User/Config/General.html#MetaDir
00096  *
00097  * Change-log:
00098  * 29.Jan.96 pfopen/pfclose instead of fopen/fclose
00099  *           DECLINE when real file not found, we may be checking each
00100  *           of the index.html/index.shtml/index.htm variants and don't
00101  *           need to report missing ones as spurious errors. 
00102  * 31.Jan.96 log_error reports about a malformed .meta file, rather
00103  *           than a script error.
00104  * 20.Jun.96 MetaFiles <on|off> default off, added, so that module
00105  *           can be configured per-directory.  Prior to this the module
00106  *           was running for each request anywhere on the server, naughty..
00107  * 29.Jun.96 All directives made per-directory.
00108  */
00109 
00110 #include "apr.h"
00111 #include "apr_strings.h"
00112 
00113 #define APR_WANT_STRFUNC
00114 #include "apr_want.h"
00115 
00116 #if APR_HAVE_SYS_TYPES_H
00117 #include <sys/types.h>
00118 #endif
00119 
00120 #include "ap_config.h"
00121 #include "httpd.h"
00122 #include "http_config.h"
00123 #include "util_script.h"
00124 #include "http_log.h"
00125 #include "http_request.h"
00126 #include "http_protocol.h"
00127 #include "apr_lib.h"
00128 
00129 #define DIR_CMD_PERMS OR_INDEXES
00130 
00131 #define DEFAULT_METADIR         ".web"
00132 #define DEFAULT_METASUFFIX      ".meta"
00133 #define DEFAULT_METAFILES       0
00134 
00135 module AP_MODULE_DECLARE_DATA cern_meta_module;
00136 
00137 typedef struct {
00138     const char *metadir;
00139     const char *metasuffix;
00140     int metafiles;
00141 } cern_meta_dir_config;
00142 
00143 static void *create_cern_meta_dir_config(apr_pool_t *p, char *dummy)
00144 {
00145     cern_meta_dir_config *new =
00146     (cern_meta_dir_config *) apr_palloc(p, sizeof(cern_meta_dir_config));
00147 
00148     new->metadir = NULL;
00149     new->metasuffix = NULL;
00150     new->metafiles = DEFAULT_METAFILES;
00151 
00152     return new;
00153 }
00154 
00155 static void *merge_cern_meta_dir_configs(apr_pool_t *p, void *basev, void *addv)
00156 {
00157     cern_meta_dir_config *base = (cern_meta_dir_config *) basev;
00158     cern_meta_dir_config *add = (cern_meta_dir_config *) addv;
00159     cern_meta_dir_config *new =
00160     (cern_meta_dir_config *) apr_palloc(p, sizeof(cern_meta_dir_config));
00161 
00162     new->metadir = add->metadir ? add->metadir : base->metadir;
00163     new->metasuffix = add->metasuffix ? add->metasuffix : base->metasuffix;
00164     new->metafiles = add->metafiles;
00165 
00166     return new;
00167 }
00168 
00169 static const char *set_metadir(cmd_parms *parms, void *in_dconf, const char *arg)
00170 {
00171     cern_meta_dir_config *dconf = in_dconf;
00172 
00173     dconf->metadir = arg;
00174     return NULL;
00175 }
00176 
00177 static const char *set_metasuffix(cmd_parms *parms, void *in_dconf, const char *arg)
00178 {
00179     cern_meta_dir_config *dconf = in_dconf;
00180 
00181     dconf->metasuffix = arg;
00182     return NULL;
00183 }
00184 
00185 static const char *set_metafiles(cmd_parms *parms, void *in_dconf, int arg)
00186 {
00187     cern_meta_dir_config *dconf = in_dconf;
00188 
00189     dconf->metafiles = arg;
00190     return NULL;
00191 }
00192 
00193 
00194 static const command_rec cern_meta_cmds[] =
00195 {
00196     AP_INIT_FLAG("MetaFiles", set_metafiles, NULL, DIR_CMD_PERMS,
00197                  "Limited to 'on' or 'off'"),
00198     AP_INIT_TAKE1("MetaDir", set_metadir, NULL, DIR_CMD_PERMS,
00199                   "the name of the directory containing meta files"),
00200     AP_INIT_TAKE1("MetaSuffix", set_metasuffix, NULL, DIR_CMD_PERMS,
00201                   "the filename suffix for meta files"),
00202     {NULL}
00203 };
00204 
00205 /* XXX: this is very similar to ap_scan_script_header_err_core...
00206  * are the differences deliberate, or just a result of bit rot?
00207  */
00208 static int scan_meta_file(request_rec *r, apr_file_t *f)
00209 {
00210     char w[MAX_STRING_LEN];
00211     char *l;
00212     int p;
00213     apr_table_t *tmp_headers;
00214 
00215     tmp_headers = apr_table_make(r->pool, 5);
00216     while (apr_file_gets(w, MAX_STRING_LEN - 1, f) == APR_SUCCESS) {
00217 
00218         /* Delete terminal (CR?)LF */
00219 
00220         p = strlen(w);
00221         if (p > 0 && w[p - 1] == '\n') {
00222             if (p > 1 && w[p - 2] == '\015')
00223                 w[p - 2] = '\0';
00224             else
00225                 w[p - 1] = '\0';
00226         }
00227 
00228         if (w[0] == '\0') {
00229             return OK;
00230         }
00231 
00232         /* if we see a bogus header don't ignore it. Shout and scream */
00233 
00234         if (!(l = strchr(w, ':'))) {
00235             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00236                         "malformed header in meta file: %s", r->filename);
00237             return HTTP_INTERNAL_SERVER_ERROR;
00238         }
00239 
00240         *l++ = '\0';
00241         while (*l && apr_isspace(*l))
00242             ++l;
00243 
00244         if (!strcasecmp(w, "Content-type")) {
00245             char *tmp;
00246             /* Nuke trailing whitespace */
00247 
00248             char *endp = l + strlen(l) - 1;
00249             while (endp > l && apr_isspace(*endp))
00250                 *endp-- = '\0';
00251 
00252             tmp = apr_pstrdup(r->pool, l);
00253             ap_content_type_tolower(tmp);
00254             ap_set_content_type(r, tmp);
00255         }
00256         else if (!strcasecmp(w, "Status")) {
00257             sscanf(l, "%d", &r->status);
00258             r->status_line = apr_pstrdup(r->pool, l);
00259         }
00260         else {
00261             apr_table_set(tmp_headers, w, l);
00262         }
00263     }
00264     apr_table_overlap(r->headers_out, tmp_headers, APR_OVERLAP_TABLES_SET);
00265     return OK;
00266 }
00267 
00268 static int add_cern_meta_data(request_rec *r)
00269 {
00270     char *metafilename;
00271     char *leading_slash;
00272     char *last_slash;
00273     char *real_file;
00274     char *scrap_book;
00275     apr_file_t *f = NULL;
00276     apr_status_t retcode;
00277     cern_meta_dir_config *dconf;
00278     int rv;
00279     request_rec *rr;
00280 
00281     dconf = ap_get_module_config(r->per_dir_config, &cern_meta_module);
00282 
00283     if (!dconf->metafiles) {
00284         return DECLINED;
00285     };
00286 
00287     /* if ./.web/$1.meta exists then output 'asis' */
00288 
00289     if (r->finfo.filetype == 0) {
00290         return DECLINED;
00291     };
00292 
00293     /* is this a directory? */
00294     if (r->finfo.filetype == APR_DIR || r->uri[strlen(r->uri) - 1] == '/') {
00295         return DECLINED;
00296     };
00297 
00298     /* what directory is this file in? */
00299     scrap_book = apr_pstrdup(r->pool, r->filename);
00300 
00301     leading_slash = strchr(scrap_book, '/');
00302     last_slash = strrchr(scrap_book, '/');
00303     if ((last_slash != NULL) && (last_slash != leading_slash)) {
00304         /* skip over last slash */
00305         real_file = last_slash;
00306         real_file++;
00307         *last_slash = '\0';
00308     }
00309     else {
00310         /* no last slash, buh?! */
00311         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00312                     "internal error in mod_cern_meta: %s", r->filename);
00313         /* should really barf, but hey, let's be friends... */
00314         return DECLINED;
00315     };
00316 
00317     metafilename = apr_pstrcat(r->pool, scrap_book, "/",
00318                            dconf->metadir ? dconf->metadir : DEFAULT_METADIR,
00319                            "/", real_file,
00320                  dconf->metasuffix ? dconf->metasuffix : DEFAULT_METASUFFIX,
00321                            NULL);
00322 
00323     /* It sucks to require this subrequest to complete, because this
00324      * means people must leave their meta files accessible to the world.
00325      * A better solution might be a "safe open" feature of pfopen to avoid
00326      * pipes, symlinks, and crap like that.
00327      *
00328      * In fact, this doesn't suck.  Because <Location > blocks are never run
00329      * against sub_req_lookup_file, the meta can be somewhat protected by
00330      * either masking it with a <Location > directive or alias, or stowing
00331      * the file outside of the web document tree, while providing the
00332      * appropriate directory blocks to allow access to it as a file.
00333      */
00334     rr = ap_sub_req_lookup_file(metafilename, r, NULL);
00335     if (rr->status != HTTP_OK) {
00336         ap_destroy_sub_req(rr);
00337         return DECLINED;
00338     }
00339     ap_destroy_sub_req(rr);
00340 
00341     retcode = apr_file_open(&f, metafilename, APR_READ, APR_OS_DEFAULT, r->pool);
00342     if (retcode != APR_SUCCESS) {
00343         if (APR_STATUS_IS_ENOENT(retcode)) {
00344             return DECLINED;
00345         }
00346         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
00347               "meta file permissions deny server access: %s", metafilename);
00348         return HTTP_FORBIDDEN;
00349     };
00350 
00351     /* read the headers in */
00352     rv = scan_meta_file(r, f);
00353     apr_file_close(f);
00354 
00355     return rv;
00356 }
00357 
00358 static void register_hooks(apr_pool_t *p)
00359 {
00360     ap_hook_fixups(add_cern_meta_data,NULL,NULL,APR_HOOK_MIDDLE);
00361 }
00362 
00363 module AP_MODULE_DECLARE_DATA cern_meta_module =
00364 {
00365     STANDARD20_MODULE_STUFF,
00366     create_cern_meta_dir_config,/* dir config creater */
00367     merge_cern_meta_dir_configs,/* dir merger --- default is to override */
00368     NULL,                       /* server config */
00369     NULL,                       /* merge server configs */
00370     cern_meta_cmds,             /* command apr_table_t */
00371     register_hooks              /* register hooks */
00372 };