file/src/apprentice.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) Ian F. Darwin 1986-1995.
00003  * Software written by Ian F. Darwin and others;
00004  * maintained 1995-present by Christos Zoulas and others.
00005  * 
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice immediately at the beginning of the file, without modification,
00011  *    this list of conditions, and the following disclaimer.
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in the
00014  *    documentation and/or other materials provided with the distribution.
00015  *  
00016  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00018  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00019  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
00020  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00021  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00022  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00023  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00024  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00025  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00026  * SUCH DAMAGE.
00027  */
00028 /*
00029  * apprentice - make one pass through /etc/magic, learning its secrets.
00030  */
00031 
00032 #include "file.h"
00033 #include "magic.h"
00034 #include <stdlib.h>
00035 #ifdef HAVE_UNISTD_H
00036 #include <unistd.h>
00037 #endif
00038 #include <string.h>
00039 #include <ctype.h>
00040 #include <fcntl.h>
00041 #include <sys/stat.h>
00042 #include <sys/param.h>
00043 #ifdef QUICK
00044 #include <sys/mman.h>
00045 #endif
00046 
00047 #ifndef lint
00048 FILE_RCSID("@(#)$Id: apprentice.c,v 1.84 2005/03/25 18:03:18 christos Exp $")
00049 #endif  /* lint */
00050 
00051 #define EATAB {while (isascii((unsigned char) *l) && \
00052                       isspace((unsigned char) *l))  ++l;}
00053 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
00054                         tolower((unsigned char) (l)) : (l))
00055 /*
00056  * Work around a bug in headers on Digital Unix.
00057  * At least confirmed for: OSF1 V4.0 878
00058  */
00059 #if defined(__osf__) && defined(__DECC)
00060 #ifdef MAP_FAILED
00061 #undef MAP_FAILED
00062 #endif
00063 #endif
00064 
00065 #ifndef MAP_FAILED
00066 #define MAP_FAILED (void *) -1
00067 #endif
00068 
00069 #ifndef MAP_FILE
00070 #define MAP_FILE 0
00071 #endif
00072 
00073 #ifndef MAXPATHLEN
00074 #define MAXPATHLEN      1024
00075 #endif
00076 
00077 #define IS_PLAINSTRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \
00078     (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16)
00079     
00080 #define IS_STRING(t) (IS_PLAINSTRING(t) || (t) == FILE_REGEX || \
00081     (t) == FILE_SEARCH)
00082 
00083 /*@unchecked@*/
00084 private size_t maxmagic = 0;
00085 /*@unchecked@*/
00086 private size_t magicsize = sizeof(struct magic);
00087 
00088 private int getvalue(struct magic_set *ms, struct magic *m, char **p)
00089         /*@globals fileSystem @*/
00090         /*@modifies ms, m, *p, fileSystem @*/;
00091 private int hextoint(int)
00092         /*@*/;
00093 /*@null@*/
00094 private char *getstr(struct magic_set *ms, /*@returned@*/ char *s, char *p, int plen, int *slen)
00095         /*@modifies ms, *p, *slen @*/;
00096 private int parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, int action)
00097         /*@globals maxmagic, fileSystem @*/
00098         /*@modifies ms, *magicp, *nmagicp, maxmagic, fileSystem @*/;
00099 private void eatsize(char **p)
00100         /*@modifies *p @*/;
00101 private int apprentice_1(struct magic_set *ms, const char *fn, int action, struct mlist *mlist)
00102         /*@globals fileSystem, internalState @*/
00103         /*@modifies ms, mlist, fileSystem, internalState @*/;
00104 private int apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
00105     const char *fn, int action)
00106         /*@globals maxmagic, fileSystem @*/
00107         /*@modifies ms, *magicp, *nmagicp, maxmagic, fileSystem @*/;
00108 private void byteswap(struct magic *magic, uint32_t nmagic)
00109         /*@modifies magic @*/;
00110 private void bs1(struct magic *m)
00111         /*@modifies m @*/;
00112 private uint16_t swap2(uint16_t sv)
00113         /*@*/;
00114 private uint32_t swap4(uint32_t sv)
00115         /*@*/;
00116 private char *mkdbname(const char *fn, /*@returned@*/ char *buf, size_t, int)
00117         /*@modifies buf @*/;
00118 private int apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
00119     const char *fn)
00120         /*@globals fileSystem, internalState @*/
00121         /*@modifies ms, *magicp, *nmagicp, fileSystem, internalState @*/;
00122 private int apprentice_compile(struct magic_set *ms, struct magic **magicp, uint32_t * nmagicp,
00123     const char *fn)
00124         /*@globals fileSystem, internalState @*/
00125         /*@modifies ms, fileSystem, internalState @*/;
00126 private int check_format(struct magic_set *ms, struct magic *m)
00127         /*@globals fileSystem @*/
00128         /*@modifies fileSystem @*/;
00129 
00130 #ifdef COMPILE_ONLY
00131 
00132 int main(int, char *[]);
00133 
00134 int
00135 main(int argc, char *argv[])
00136 {
00137         int ret;
00138         struct magic_set *ms;
00139         char *progname;
00140 
00141         if ((progname = strrchr(argv[0], '/')) != NULL)
00142                 progname++;
00143         else
00144                 progname = argv[0];
00145 
00146         if (argc != 2) {
00147                 (void)fprintf(stderr, "Usage: %s file\n", progname);
00148                 return 1;
00149         }
00150 
00151         if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
00152                 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
00153                 return 1;
00154         }
00155         ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
00156         if (ret == 1)
00157                 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
00158         magic_close(ms);
00159         return ret;
00160 }
00161 #endif /* COMPILE_ONLY */
00162 
00163 
00164 /*
00165  * Handle one file.
00166  */
00167 private int
00168 apprentice_1(struct magic_set *ms, const char *fn, int action,
00169     struct mlist *mlist)
00170 {
00171         struct magic *magic = NULL;
00172         uint32_t nmagic = 0;
00173         struct mlist *ml;
00174         int rv = -1;
00175         int mapped;
00176 
00177         if (magicsize != FILE_MAGICSIZE) {
00178                 file_error(ms, 0, "magic element size %lu != %lu",
00179                     (unsigned long)sizeof(*magic),
00180                     (unsigned long)FILE_MAGICSIZE);
00181                 return -1;
00182         }
00183 
00184         if (action == FILE_COMPILE) {
00185                 rv = apprentice_file(ms, &magic, &nmagic, fn, action);
00186                 if (rv != 0)
00187                         return -1;
00188                 rv = apprentice_compile(ms, &magic, &nmagic, fn);
00189                 free(magic);
00190                 return rv;
00191         }
00192 #ifndef COMPILE_ONLY
00193         if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
00194                 if (ms->flags & MAGIC_CHECK)
00195                         file_magwarn(ms, "using regular magic file `%s'", fn);
00196                 rv = apprentice_file(ms, &magic, &nmagic, fn, action);
00197                 if (rv != 0)
00198                         return -1;
00199                 mapped = 0;
00200         }
00201 
00202         if (rv == -1)
00203                 return rv;
00204         mapped = rv;
00205              
00206         if (magic == NULL || nmagic == 0) {
00207                 file_delmagic(magic, mapped, nmagic);
00208                 return -1;
00209         }
00210 
00211         if ((ml = malloc(sizeof(*ml))) == NULL) {
00212                 file_delmagic(magic, mapped, nmagic);
00213                 file_oomem(ms);
00214                 return -1;
00215         }
00216 
00217         ml->magic = magic;
00218         ml->nmagic = nmagic;
00219         ml->mapped = mapped;
00220 
00221         mlist->prev->next = ml;
00222         ml->prev = mlist->prev;
00223         ml->next = mlist;
00224         mlist->prev = ml;
00225 
00226         return 0;
00227 #endif /* COMPILE_ONLY */
00228 }
00229 
00230 protected void
00231 file_delmagic(struct magic *p, int type, size_t entries)
00232 {
00233         if (p == NULL)
00234                 return;
00235         switch (type) {
00236         case 2:
00237                 p--;
00238                 (void)munmap((void *)p, sizeof(*p) * (entries + 1));
00239                 break;
00240         case 1:
00241                 p--;
00242                 /*@fallthrough@*/
00243         case 0:
00244                 free(p);
00245                 break;
00246         default:
00247                 abort();
00248         }
00249 }
00250 
00251 
00252 /* const char *fn: list of magic files */
00253 protected struct mlist *
00254 file_apprentice(struct magic_set *ms, const char *fn, int action)
00255 {
00256         char *p, *mfn, *afn = NULL;
00257         int file_err, errs = -1;
00258         struct mlist *mlist;
00259 
00260         if (fn == NULL)
00261                 fn = getenv("MAGIC");
00262         if (fn == NULL)
00263                 fn = MAGIC;
00264 
00265         if ((fn = mfn = strdup(fn)) == NULL) {
00266                 file_oomem(ms);
00267                 return NULL;
00268         }
00269 
00270         if ((mlist = malloc(sizeof(*mlist))) == NULL) {
00271                 free(mfn);
00272                 file_oomem(ms);
00273                 return NULL;
00274         }
00275         mlist->next = mlist->prev = mlist;
00276 
00277         while (fn) {
00278                 p = strchr(fn, PATHSEP);
00279                 if (p)
00280                         *p++ = '\0';
00281                 if (*fn == '\0')
00282                         break;
00283                 if (ms->flags & MAGIC_MIME) {
00284                         if ((afn = malloc(strlen(fn) + 5 + 1)) == NULL) {
00285                                 free(mfn);
00286                                 free(mlist);
00287                                 file_oomem(ms);
00288                                 return NULL;
00289                         }
00290                         (void)strcpy(afn, fn);
00291                         (void)strcat(afn, ".mime");
00292                         fn = afn;
00293                 }
00294                 file_err = apprentice_1(ms, fn, action, mlist);
00295                 if (file_err > errs)
00296                         errs = file_err;
00297                 if (afn) {
00298                         free(afn);
00299                         afn = NULL;
00300                 }
00301                 fn = p;
00302         }
00303         if (errs == -1) {
00304                 free(mfn);
00305                 free(mlist);
00306                 mlist = NULL;
00307                 file_error(ms, 0, "could not find any magic files!");
00308                 return NULL;
00309         }
00310         free(mfn);
00311         return mlist;
00312 }
00313 
00314 /*
00315  * parse from a file
00316  * const char *fn: name of magic file
00317  */
00318 private int
00319 apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
00320     const char *fn, int action)
00321 {
00322         private const char hdr[] =
00323                 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
00324         FILE *f;
00325         char line[BUFSIZ+1];
00326         int errs = 0;
00327 
00328         f = fopen(ms->file = fn, "r");
00329         if (f == NULL) {
00330                 if (errno != ENOENT)
00331                         file_error(ms, errno, "cannot read magic file `%s'",
00332                             fn);
00333                 return -1;
00334         }
00335 
00336         maxmagic = MAXMAGIS;
00337         *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
00338         if (*magicp == NULL) {
00339                 (void)fclose(f);
00340                 file_oomem(ms);
00341                 return -1;
00342         }
00343 
00344         /* print silly verbose header for USG compat. */
00345         if (action == FILE_CHECK)
00346                 (void)fprintf(stderr, "%s\n", hdr);
00347 
00348         /* parse it */
00349         for (ms->line = 1; fgets(line, BUFSIZ, f) != NULL; ms->line++) {
00350                 size_t len;
00351                 if (line[0]=='#')       /* comment, do not parse */
00352                         continue;
00353                 len = strlen(line);
00354                 if (len < 2) /* null line, garbage, etc */
00355                         continue;
00356                 line[len - 1] = '\0'; /* delete newline */
00357                 if (parse(ms, magicp, nmagicp, line, action) != 0)
00358                         errs = 1;
00359         }
00360 
00361         (void)fclose(f);
00362         if (errs) {
00363                 free(*magicp);
00364                 *magicp = NULL;
00365                 *nmagicp = 0;
00366         }
00367         return errs;
00368 }
00369 
00370 /*
00371  * extend the sign bit if the comparison is to be signed
00372  */
00373 protected uint32_t
00374 file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
00375 {
00376         if (!(m->flag & UNSIGNED))
00377                 switch(m->type) {
00378                 /*
00379                  * Do not remove the casts below.  They are
00380                  * vital.  When later compared with the data,
00381                  * the sign extension must have happened.
00382                  */
00383                 case FILE_BYTE:
00384                         v = (char) v;
00385                         break;
00386                 case FILE_SHORT:
00387                 case FILE_BESHORT:
00388                 case FILE_LESHORT:
00389                         v = (short) v;
00390                         break;
00391                 case FILE_DATE:
00392                 case FILE_BEDATE:
00393                 case FILE_LEDATE:
00394                 case FILE_LDATE:
00395                 case FILE_BELDATE:
00396                 case FILE_LELDATE:
00397                 case FILE_LONG:
00398                 case FILE_BELONG:
00399                 case FILE_LELONG:
00400                         v = (int32_t) v;
00401                         break;
00402                 case FILE_STRING:
00403                 case FILE_PSTRING:
00404                 case FILE_BESTRING16:
00405                 case FILE_LESTRING16:
00406                 case FILE_REGEX:
00407                 case FILE_SEARCH:
00408                         break;
00409                 default:
00410                         if (ms->flags & MAGIC_CHECK)
00411                             file_magwarn(ms, "cannot happen: m->type=%d\n",
00412                                     m->type);
00413                         return ~0U;
00414                 }
00415         return v;
00416 }
00417 
00418 /*
00419  * parse one line from magic file, put into magic[index++] if valid
00420  */
00421 private int
00422 parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
00423     int action)
00424 {
00425         int i = 0;
00426         struct magic *m;
00427         char *t;
00428 /*@observer@*/
00429         private const char *fops = FILE_OPS;
00430         uint32_t val;
00431 
00432 #define ALLOC_INCR      200
00433         if (*nmagicp + 1 >= maxmagic){
00434                 maxmagic += ALLOC_INCR;
00435 /*@-unqualifiedtrans@*/
00436                 if ((m = (struct magic *) realloc(*magicp,
00437                     sizeof(struct magic) * maxmagic)) == NULL) {
00438                         file_oomem(ms);
00439                         if (*magicp)
00440                                 free(*magicp);
00441                         return -1;
00442                 }
00443 /*@=unqualifiedtrans@*/
00444                 *magicp = m;
00445                 memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
00446                     * ALLOC_INCR);
00447         }
00448         m = &(*magicp)[*nmagicp];
00449         m->flag = 0;
00450         m->cont_level = 0;
00451 
00452         while (*l == '>') {
00453                 ++l;            /* step over */
00454                 m->cont_level++; 
00455         }
00456 
00457         if (m->cont_level != 0 && *l == '&') {
00458                 ++l;            /* step over */
00459                 m->flag |= OFFADD;
00460         }
00461         if (m->cont_level != 0 && *l == '(') {
00462                 ++l;            /* step over */
00463                 m->flag |= INDIR;
00464                 if (m->flag & OFFADD)
00465                         m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
00466         }
00467         if (m->cont_level != 0 && *l == '&') {
00468                 ++l;            /* step over */
00469                 m->flag |= OFFADD;
00470         }
00471 
00472         /* get offset, then skip over it */
00473         m->offset = (uint32_t)strtoul(l, &t, 0);
00474         if (l == t)
00475                 if (ms->flags & MAGIC_CHECK)
00476                         file_magwarn(ms, "offset `%s' invalid", l);
00477         l = t;
00478 
00479         if (m->flag & INDIR) {
00480                 m->in_type = FILE_LONG;
00481                 m->in_offset = 0;
00482                 /*
00483                  * read [.lbs][+-]nnnnn)
00484                  */
00485                 if (*l == '.') {
00486                         l++;
00487                         switch (*l) {
00488                         case 'l':
00489                                 m->in_type = FILE_LELONG;
00490                                 break;
00491                         case 'L':
00492                                 m->in_type = FILE_BELONG;
00493                                 break;
00494                         case 'h':
00495                         case 's':
00496                                 m->in_type = FILE_LESHORT;
00497                                 break;
00498                         case 'H':
00499                         case 'S':
00500                                 m->in_type = FILE_BESHORT;
00501                                 break;
00502                         case 'c':
00503                         case 'b':
00504                         case 'C':
00505                         case 'B':
00506                                 m->in_type = FILE_BYTE;
00507                                 break;
00508                         default:
00509                                 if (ms->flags & MAGIC_CHECK)
00510                                         file_magwarn(ms,
00511                                             "indirect offset type `%c' invalid",
00512                                             *l);
00513                                 break;
00514                         }
00515                         l++;
00516                 }
00517                 if (*l == '~') {
00518                         m->in_op |= FILE_OPINVERSE;
00519                         l++;
00520                 }
00521                 switch (*l) {
00522                 case '&':
00523                         m->in_op |= FILE_OPAND;
00524                         l++;
00525                         break;
00526                 case '|':
00527                         m->in_op |= FILE_OPOR;
00528                         l++;
00529                         break;
00530                 case '^':
00531                         m->in_op |= FILE_OPXOR;
00532                         l++;
00533                         break;
00534                 case '+':
00535                         m->in_op |= FILE_OPADD;
00536                         l++;
00537                         break;
00538                 case '-':
00539                         m->in_op |= FILE_OPMINUS;
00540                         l++;
00541                         break;
00542                 case '*':
00543                         m->in_op |= FILE_OPMULTIPLY;
00544                         l++;
00545                         break;
00546                 case '/':
00547                         m->in_op |= FILE_OPDIVIDE;
00548                         l++;
00549                         break;
00550                 case '%':
00551                         m->in_op |= FILE_OPMODULO;
00552                         l++;
00553                         break;
00554                 }
00555                 if (*l == '(') {
00556                         m->in_op |= FILE_OPINDIRECT;
00557                         l++;
00558                 }
00559                 if (isdigit((unsigned char)*l) || *l == '-') 
00560                         m->in_offset = (int32_t)strtol(l, &t, 0);
00561                 else
00562                         t = l;
00563                 if (*t++ != ')' ||
00564                     ((m->in_op & FILE_OPINDIRECT) && *t++ != ')')) 
00565                         if (ms->flags & MAGIC_CHECK)
00566                                 file_magwarn(ms,
00567                                     "missing ')' in indirect offset");
00568                 l = t;
00569         }
00570 
00571 
00572         while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
00573                 ++l;
00574         EATAB;
00575 
00576 #define NBYTE           4
00577 #define NSHORT          5
00578 #define NLONG           4
00579 #define NSTRING         6
00580 #define NDATE           4
00581 #define NBESHORT        7
00582 #define NBELONG         6
00583 #define NBEDATE         6
00584 #define NLESHORT        7
00585 #define NLELONG         6
00586 #define NLEDATE         6
00587 #define NPSTRING        7
00588 #define NLDATE          5
00589 #define NBELDATE        7
00590 #define NLELDATE        7
00591 #define NREGEX          5
00592 #define NBESTRING16     10
00593 #define NLESTRING16     10
00594 #define NSEARCH         6
00595 
00596         if (*l == 'u') {
00597                 ++l;
00598                 m->flag |= UNSIGNED;
00599         }
00600 
00601         /* get type, skip it */
00602         if (strncmp(l, "char", NBYTE)==0) {     /* HP/UX compat */
00603                 m->type = FILE_BYTE;
00604                 l += NBYTE;
00605         } else if (strncmp(l, "byte", NBYTE)==0) {
00606                 m->type = FILE_BYTE;
00607                 l += NBYTE;
00608         } else if (strncmp(l, "short", NSHORT)==0) {
00609                 m->type = FILE_SHORT;
00610                 l += NSHORT;
00611         } else if (strncmp(l, "long", NLONG)==0) {
00612                 m->type = FILE_LONG;
00613                 l += NLONG;
00614         } else if (strncmp(l, "string", NSTRING)==0) {
00615                 m->type = FILE_STRING;
00616                 l += NSTRING;
00617         } else if (strncmp(l, "date", NDATE)==0) {
00618                 m->type = FILE_DATE;
00619                 l += NDATE;
00620         } else if (strncmp(l, "beshort", NBESHORT)==0) {
00621                 m->type = FILE_BESHORT;
00622                 l += NBESHORT;
00623         } else if (strncmp(l, "belong", NBELONG)==0) {
00624                 m->type = FILE_BELONG;
00625                 l += NBELONG;
00626         } else if (strncmp(l, "bedate", NBEDATE)==0) {
00627                 m->type = FILE_BEDATE;
00628                 l += NBEDATE;
00629         } else if (strncmp(l, "leshort", NLESHORT)==0) {
00630                 m->type = FILE_LESHORT;
00631                 l += NLESHORT;
00632         } else if (strncmp(l, "lelong", NLELONG)==0) {
00633                 m->type = FILE_LELONG;
00634                 l += NLELONG;
00635         } else if (strncmp(l, "ledate", NLEDATE)==0) {
00636                 m->type = FILE_LEDATE;
00637                 l += NLEDATE;
00638         } else if (strncmp(l, "pstring", NPSTRING)==0) {
00639                 m->type = FILE_PSTRING;
00640                 l += NPSTRING;
00641         } else if (strncmp(l, "ldate", NLDATE)==0) {
00642                 m->type = FILE_LDATE;
00643                 l += NLDATE;
00644         } else if (strncmp(l, "beldate", NBELDATE)==0) {
00645                 m->type = FILE_BELDATE;
00646                 l += NBELDATE;
00647         } else if (strncmp(l, "leldate", NLELDATE)==0) {
00648                 m->type = FILE_LELDATE;
00649                 l += NLELDATE;
00650         } else if (strncmp(l, "regex", NREGEX)==0) {
00651                 m->type = FILE_REGEX;
00652                 l += NREGEX;
00653         } else if (strncmp(l, "bestring16", NBESTRING16)==0) {
00654                 m->type = FILE_BESTRING16;
00655                 l += NBESTRING16;
00656         } else if (strncmp(l, "lestring16", NLESTRING16)==0) {
00657                 m->type = FILE_LESTRING16;
00658                 l += NLESTRING16;
00659         } else if (strncmp(l, "search", NSEARCH)==0) {
00660                 m->type = FILE_SEARCH;
00661                 l += NSEARCH;
00662         } else {
00663                 if (ms->flags & MAGIC_CHECK)
00664                         file_magwarn(ms, "type `%s' invalid", l);
00665                 return -1;
00666         }
00667         /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
00668         /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
00669         if (*l == '~') {
00670                 if (!IS_STRING(m->type))
00671                         m->mask_op |= FILE_OPINVERSE;
00672                 ++l;
00673         }
00674         if ((t = strchr(fops,  *l)) != NULL) {
00675                 uint32_t op = (uint32_t)(t - fops);
00676                 if (op != FILE_OPDIVIDE || !IS_PLAINSTRING(m->type)) {
00677                         ++l;
00678                         m->mask_op |= op;
00679                         val = (uint32_t)strtoul(l, &l, 0);
00680                         m->mask = file_signextend(ms, m, val);
00681                         eatsize(&l);
00682                 } else {
00683                         m->mask = 0L;
00684                         while (!isspace((unsigned char)*++l)) {
00685                                 switch (*l) {
00686                                 case CHAR_IGNORE_LOWERCASE:
00687                                         m->mask |= STRING_IGNORE_LOWERCASE;
00688                                         /*@switchbreak@*/ break;
00689                                 case CHAR_COMPACT_BLANK:
00690                                         m->mask |= STRING_COMPACT_BLANK;
00691                                         /*@switchbreak@*/ break;
00692                                 case CHAR_COMPACT_OPTIONAL_BLANK:
00693                                         m->mask |=
00694                                             STRING_COMPACT_OPTIONAL_BLANK;
00695                                         /*@switchbreak@*/ break;
00696                                 default:
00697                                         if (ms->flags & MAGIC_CHECK)
00698                                                 file_magwarn(ms,
00699                                                 "string extension `%c' invalid",
00700                                                 *l);
00701                                         return -1;
00702                                 }
00703                         }
00704                         ++l;
00705                 }
00706         }
00707         /*
00708          * We used to set mask to all 1's here, instead let's just not do
00709          * anything if mask = 0 (unless you have a better idea)
00710          */
00711         EATAB;
00712   
00713         switch (*l) {
00714         case '>':
00715         case '<':
00716         /* Old-style anding: "0 byte &0x80 dynamically linked" */
00717         case '&':
00718         case '^':
00719         case '=':
00720                 m->reln = *l;
00721                 ++l;
00722                 if (*l == '=') {
00723                    /* HP compat: ignore &= etc. */
00724                    ++l;
00725                 }
00726                 break;
00727         case '!':
00728                 m->reln = *l;
00729                 ++l;
00730                 break;
00731         default:
00732                 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 
00733                     isspace((unsigned char)l[1])) || !l[1])) {
00734                         m->reln = *l;
00735                         ++l;
00736                         goto GetDesc;   /* Bill The Cat */
00737                 }
00738                 m->reln = '=';
00739                 break;
00740         }
00741         EATAB;
00742   
00743         if (getvalue(ms, m, &l))
00744                 return -1;
00745         /*
00746          * TODO finish this macro and start using it!
00747          * #define offsetcheck {if (offset > HOWMANY-1) 
00748          *      magwarn("offset too big"); }
00749          */
00750 
00751         /*
00752          * now get last part - the description
00753          */
00754 GetDesc:
00755         EATAB;
00756         if (l[0] == '\b') {
00757                 ++l;
00758                 m->nospflag = 1;
00759         } else if ((l[0] == '\\') && (l[1] == 'b')) {
00760                 ++l;
00761                 ++l;
00762                 m->nospflag = 1;
00763         } else
00764                 m->nospflag = 0;
00765         while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC)
00766                 /* NULLBODY */;
00767 
00768         if (ms->flags & MAGIC_CHECK) {
00769                 if (!check_format(ms, m))
00770                         return -1;
00771         }
00772 #ifndef COMPILE_ONLY
00773         if (action == FILE_CHECK) {
00774                 file_mdump(m);
00775         }
00776 #endif
00777         ++(*nmagicp);           /* make room for next */
00778         return 0;
00779 }
00780 
00781 /*
00782  * Check that the optional printf format in description matches
00783  * the type of the magic.
00784  */
00785 private int
00786 check_format(struct magic_set *ms, struct magic *m)
00787 {
00788 /*@-nullassign@*/
00789 /*@observer@*/
00790         static const char *formats[] = { FILE_FORMAT_STRING };
00791 /*@=nullassign@*/
00792 /*@observer@*/
00793         static const char *names[] = { FILE_FORMAT_NAME };
00794         char *ptr;
00795 
00796         for (ptr = m->desc; *ptr; ptr++)
00797                 if (*ptr == '%')
00798                         break;
00799         if (*ptr == '\0') {
00800                 /* No format string; ok */
00801                 return 1;
00802         }
00803         if (m->type >= sizeof(formats)/sizeof(formats[0])) {
00804                 file_magwarn(ms, "Internal error inconsistency between m->type"
00805                     " and format strings");
00806                 return 0;
00807         }
00808         if (formats[m->type] == NULL) {
00809                 file_magwarn(ms, "No format string for `%s' with description "
00810                     "`%s'", m->desc, names[m->type]);
00811                 return 0;
00812         }
00813         for (; *ptr; ptr++) {
00814                 if (*ptr == 'l' || *ptr == 'h') {
00815                         /* XXX: we should really fix this one day */
00816                         continue;
00817                 }
00818                 if (islower((unsigned char)*ptr) || *ptr == 'X')
00819                         break;
00820         }
00821         if (*ptr == '\0') {
00822                 /* Missing format string; bad */
00823                 file_magwarn(ms, "Invalid format `%s' for type `%s'",
00824                         m->desc, names[m->type]);
00825                 return 0;
00826         }
00827         if (strchr(formats[m->type], *ptr) == NULL) {
00828                 file_magwarn(ms, "Printf format `%c' is not valid for type `%s'"
00829                     " in description `%s'",
00830                         *ptr, names[m->type], m->desc);
00831                 return 0;
00832         }
00833         return 1;
00834 }
00835 
00836 /* 
00837  * Read a numeric value from a pointer, into the value union of a magic 
00838  * pointer, according to the magic type.  Update the string pointer to point 
00839  * just after the number read.  Return 0 for success, non-zero for failure.
00840  */
00841 private int
00842 getvalue(struct magic_set *ms, struct magic *m, char **p)
00843 {
00844         int slen;
00845 
00846         switch (m->type) {
00847         case FILE_BESTRING16:
00848         case FILE_LESTRING16:
00849         case FILE_STRING:
00850         case FILE_PSTRING:
00851         case FILE_REGEX:
00852         case FILE_SEARCH:
00853                 *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
00854                 if (*p == NULL) {
00855                         if (ms->flags & MAGIC_CHECK)
00856                                 file_magwarn(ms, "cannot get string from `%s'",
00857                                     m->value.s);
00858                         return -1;
00859                 }
00860                 m->vallen = slen;
00861                 return 0;
00862         default:
00863                 if (m->reln != 'x') {
00864                         m->value.l = file_signextend(ms, m,
00865                             (uint32_t)strtoul(*p, p, 0));
00866                         eatsize(p);
00867                 }
00868                 return 0;
00869         }
00870 }
00871 
00872 /*
00873  * Convert a string containing C character escapes.  Stop at an unescaped
00874  * space or tab.
00875  * Copy the converted version to "p", returning its length in *slen.
00876  * Return updated scan pointer as function result.
00877  */
00878 private char *
00879 getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
00880 {
00881         char    *origs = s, *origp = p;
00882         char    *pmax = p + plen - 1;
00883         int     c;
00884         int     val;
00885 
00886         while ((c = *s++) != '\0') {
00887                 if (isspace((unsigned char) c))
00888                         break;
00889                 if (p >= pmax) {
00890                         file_error(ms, 0, "string too long: `%s'", origs);
00891                         return NULL;
00892                 }
00893                 if(c == '\\') {
00894                         switch(c = *s++) {
00895 
00896                         case '\0':
00897                                 goto out;
00898 
00899                         default:
00900                                 *p++ = (char) c;
00901                                 /*@switchbreak@*/ break;
00902 
00903                         case 'n':
00904                                 *p++ = '\n';
00905                                 /*@switchbreak@*/ break;
00906 
00907                         case 'r':
00908                                 *p++ = '\r';
00909                                 /*@switchbreak@*/ break;
00910 
00911                         case 'b':
00912                                 *p++ = '\b';
00913                                 /*@switchbreak@*/ break;
00914 
00915                         case 't':
00916                                 *p++ = '\t';
00917                                 /*@switchbreak@*/ break;
00918 
00919                         case 'f':
00920                                 *p++ = '\f';
00921                                 /*@switchbreak@*/ break;
00922 
00923                         case 'v':
00924                                 *p++ = '\v';
00925                                 /*@switchbreak@*/ break;
00926 
00927                         /* \ and up to 3 octal digits */
00928                         case '0':
00929                         case '1':
00930                         case '2':
00931                         case '3':
00932                         case '4':
00933                         case '5':
00934                         case '6':
00935                         case '7':
00936                                 val = c - '0';
00937                                 c = *s++;  /* try for 2 */
00938                                 if(c >= '0' && c <= '7') {
00939                                         val = (val<<3) | (c - '0');
00940                                         c = *s++;  /* try for 3 */
00941                                         if(c >= '0' && c <= '7')
00942                                                 val = (val<<3) | (c-'0');
00943                                         else
00944                                                 --s;
00945                                 }
00946                                 else
00947                                         --s;
00948                                 *p++ = (char)val;
00949                                 /*@switchbreak@*/ break;
00950 
00951                         /* \x and up to 2 hex digits */
00952                         case 'x':
00953                                 val = 'x';      /* Default if no digits */
00954                                 c = hextoint(*s++);     /* Get next char */
00955                                 if (c >= 0) {
00956                                         val = c;
00957                                         c = hextoint(*s++);
00958                                         if (c >= 0)
00959                                                 val = (val << 4) + c;
00960                                         else
00961                                                 --s;
00962                                 } else
00963                                         --s;
00964                                 *p++ = (char)val;
00965                                 /*@switchbreak@*/ break;
00966                         }
00967                 } else
00968                         *p++ = (char)c;
00969         }
00970 out:
00971         *p = '\0';
00972         *slen = p - origp;
00973         return s;
00974 }
00975 
00976 
00977 /* Single hex char to int; -1 if not a hex char. */
00978 private int
00979 hextoint(int c)
00980 {
00981         if (!isascii((unsigned char) c))
00982                 return -1;
00983         if (isdigit((unsigned char) c))
00984                 return c - '0';
00985         if ((c >= 'a')&&(c <= 'f'))
00986                 return c + 10 - 'a';
00987         if (( c>= 'A')&&(c <= 'F'))
00988                 return c + 10 - 'A';
00989         return -1;
00990 }
00991 
00992 
00993 /*
00994  * Print a string containing C character escapes.
00995  */
00996 protected void
00997 file_showstr(FILE *fp, const char *s, size_t len)
00998 {
00999         char    c;
01000 
01001         for (;;) {
01002                 c = *s++;
01003                 if (len == ~0U) {
01004                         if (c == '\0')
01005                                 break;
01006                 }
01007                 else  {
01008                         if (len-- == 0)
01009                                 break;
01010                 }
01011                 if(c >= 040 && c <= 0176)       /* TODO isprint && !iscntrl */
01012                         (void) fputc(c, fp);
01013                 else {
01014                         (void) fputc('\\', fp);
01015                         switch (c) {
01016                         
01017                         case '\n':
01018                                 (void) fputc('n', fp);
01019                                 /*@switchbreak@*/ break;
01020 
01021                         case '\r':
01022                                 (void) fputc('r', fp);
01023                                 /*@switchbreak@*/ break;
01024 
01025                         case '\b':
01026                                 (void) fputc('b', fp);
01027                                 /*@switchbreak@*/ break;
01028 
01029                         case '\t':
01030                                 (void) fputc('t', fp);
01031                                 /*@switchbreak@*/ break;
01032 
01033                         case '\f':
01034                                 (void) fputc('f', fp);
01035                                 /*@switchbreak@*/ break;
01036 
01037                         case '\v':
01038                                 (void) fputc('v', fp);
01039                                 /*@switchbreak@*/ break;
01040 
01041                         default:
01042                                 (void) fprintf(fp, "%.3o", c & 0377);
01043                                 /*@switchbreak@*/ break;
01044                         }
01045                 }
01046         }
01047 }
01048 
01049 /*
01050  * eatsize(): Eat the size spec from a number [eg. 10UL]
01051  */
01052 private void
01053 eatsize(char **p)
01054 {
01055         char *l = *p;
01056 
01057         if (LOWCASE(*l) == 'u') 
01058                 l++;
01059 
01060         switch (LOWCASE(*l)) {
01061         case 'l':    /* long */
01062         case 's':    /* short */
01063         case 'h':    /* short */
01064         case 'b':    /* char/byte */
01065         case 'c':    /* char/byte */
01066                 l++;
01067                 /*@fallthrough@*/
01068         default:
01069                 break;
01070         }
01071 
01072         *p = l;
01073 }
01074 
01075 /*
01076  * handle a compiled file.
01077  */
01078 private int
01079 apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
01080     const char *fn)
01081 {
01082         int fd;
01083         struct stat st;
01084         uint32_t *ptr;
01085         uint32_t version;
01086         int needsbyteswap;
01087         char buf[MAXPATHLEN];
01088         char *dbname = mkdbname(fn, buf, sizeof(buf), 0);
01089         void *mm = NULL;
01090 
01091         if (dbname == NULL)
01092                 return -1;
01093 
01094         if ((fd = open(dbname, O_RDONLY)) == -1)
01095                 return -1;
01096 
01097         if (fstat(fd, &st) == -1) {
01098                 file_error(ms, errno, "cannot stat `%s'", dbname);
01099                 goto error;
01100         }
01101         if (st.st_size < 16) {
01102                 file_error(ms, 0, "file `%s' is too small", dbname);
01103                 goto error;
01104         }
01105 
01106 #ifdef QUICK
01107         if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
01108             MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
01109                 file_error(ms, errno, "cannot map `%s'", dbname);
01110                 goto error;
01111         }
01112 #define RET     2
01113 #else
01114         if ((mm = malloc((size_t)st.st_size)) == NULL) {
01115                 file_oomem(ms);
01116                 goto error;
01117         }
01118         if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
01119                 file_badread(ms);
01120                 goto error;
01121         }
01122 #define RET     1
01123 #endif
01124         *magicp = mm;
01125         (void)close(fd);
01126         fd = -1;
01127         ptr = (uint32_t *)(void *)*magicp;
01128         if (*ptr != MAGICNO) {
01129                 if (swap4(*ptr) != MAGICNO) {
01130                         file_error(ms, 0, "bad magic in `%s'");
01131                         goto error;
01132                 }
01133                 needsbyteswap = 1;
01134         } else
01135                 needsbyteswap = 0;
01136         if (needsbyteswap)
01137                 version = swap4(ptr[1]);
01138         else
01139                 version = ptr[1];
01140         if (version != VERSIONNO) {
01141                 file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
01142                     version, VERSIONNO, dbname);
01143                 goto error;
01144         }
01145         *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
01146         (*magicp)++;
01147         if (needsbyteswap)
01148                 byteswap(*magicp, *nmagicp);
01149         return RET;
01150 
01151 error:
01152         if (fd != -1)
01153                 (void)close(fd);
01154         if (mm) {
01155 #ifdef QUICK
01156                 (void)munmap((void *)mm, (size_t)st.st_size);
01157 #else
01158                 free(mm);
01159 #endif
01160         } else {
01161                 *magicp = NULL;
01162                 *nmagicp = 0;
01163         }
01164         return -1;
01165 }
01166 
01167 /*@unchecked@*/ /*@observer@*/
01168 private const uint32_t ar[] = {
01169     MAGICNO, VERSIONNO
01170 };
01171 /*
01172  * handle an mmaped file.
01173  */
01174 private int
01175 apprentice_compile(struct magic_set *ms, struct magic **magicp,
01176     uint32_t *nmagicp, const char *fn)
01177 {
01178         int fd;
01179         char buf[MAXPATHLEN];
01180         char *dbname = mkdbname(fn, buf, sizeof(buf), 1);
01181 
01182         if (dbname == NULL) 
01183                 return -1;
01184 
01185         if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
01186                 file_error(ms, errno, "cannot open `%s'", dbname);
01187                 return -1;
01188         }
01189 
01190         if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
01191                 file_error(ms, errno, "error writing `%s'", dbname);
01192                 return -1;
01193         }
01194 
01195         if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
01196             != sizeof(struct magic)) {
01197                 file_error(ms, errno, "error seeking `%s'", dbname);
01198                 return -1;
01199         }
01200 
01201         if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) 
01202             != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
01203                 file_error(ms, errno, "error writing `%s'", dbname);
01204                 return -1;
01205         }
01206 
01207         (void)close(fd);
01208         return 0;
01209 }
01210 
01211 /*@unchecked@*/ /*@observer@*/
01212 private const char ext[] = ".mgc";
01213 /*
01214  * make a dbname
01215  */
01216 private char *
01217 mkdbname(const char *fn, char *buf, size_t bufsiz, int strip)
01218 {
01219         if (strip) {
01220                 const char *p;
01221                 if ((p = strrchr(fn, '/')) != NULL)
01222                         fn = ++p;
01223         }
01224 
01225         (void)snprintf(buf, bufsiz, "%s%s", fn, ext);
01226         return buf;
01227 }
01228 
01229 /*
01230  * Byteswap an mmap'ed file if needed
01231  */
01232 private void
01233 byteswap(struct magic *magic, uint32_t nmagic)
01234 {
01235         uint32_t i;
01236         for (i = 0; i < nmagic; i++)
01237                 bs1(&magic[i]);
01238 }
01239 
01240 /*
01241  * swap a short
01242  */
01243 private uint16_t
01244 swap2(uint16_t sv)
01245 {
01246         uint16_t rv;
01247         uint8_t *s = (uint8_t *)(void *)&sv; 
01248         uint8_t *d = (uint8_t *)(void *)&rv; 
01249         d[0] = s[1];
01250         d[1] = s[0];
01251         return rv;
01252 }
01253 
01254 /*
01255  * swap an int
01256  */
01257 private uint32_t
01258 swap4(uint32_t sv)
01259 {
01260         uint32_t rv;
01261         uint8_t *s = (uint8_t *)(void *)&sv; 
01262         uint8_t *d = (uint8_t *)(void *)&rv; 
01263         d[0] = s[3];
01264         d[1] = s[2];
01265         d[2] = s[1];
01266         d[3] = s[0];
01267         return rv;
01268 }
01269 
01270 /*
01271  * byteswap a single magic entry
01272  */
01273 private void
01274 bs1(struct magic *m)
01275 {
01276         m->cont_level = swap2(m->cont_level);
01277         m->offset = swap4((uint32_t)m->offset);
01278         m->in_offset = swap4((uint32_t)m->in_offset);
01279         if (!IS_STRING(m->type))
01280                 m->value.l = swap4(m->value.l);
01281         m->mask = swap4(m->mask);
01282 }

Generated on Fri Oct 12 08:44:53 2007 for rpm by  doxygen 1.5.2