dect
/
asterisk
Archived
13
0
Fork 0
This repository has been archived on 2022-02-17. You can view files and clone it, but cannot push or open issues or pull requests.
asterisk/main/minimime/mimeparser.y

749 lines
17 KiB
Plaintext

%{
/*
* Copyright (c) 2004 Jann Fischer. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/**
* These are the grammatic definitions in yacc syntax to parse MIME conform
* messages.
*
* TODO:
* - honour parse flags passed to us (partly done)
* - parse Content-Disposition header (partly done)
* - parse Content-Encoding header
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include "mimeparser.h"
#include "mm.h"
#include "mm_internal.h"
int set_boundary(char *,struct parser_state *);
int mimeparser_yywrap(void);
void reset_environ(struct parser_state *pstate);
int PARSER_initialize(struct parser_state *pstate, void *yyscanner);
static char *PARSE_readmessagepart(size_t, size_t, size_t, size_t *,yyscan_t, struct parser_state *);
FILE *mimeparser_yyget_in (yyscan_t yyscanner );
%}
%pure-parser
%parse-param {struct parser_state *pstate}
%parse-param {void *yyscanner}
%lex-param {void *yyscanner}
%union
{
int number;
char *string;
struct s_position position;
}
%token ANY
%token COLON
%token DASH
%token DQUOTE
%token ENDOFHEADERS
%token EOL
%token EOM
%token EQUAL
%token MIMEVERSION_HEADER
%token SEMICOLON
%token <string> CONTENTDISPOSITION_HEADER
%token <string> CONTENTENCODING_HEADER
%token <string> CONTENTTYPE_HEADER
%token <string> MAIL_HEADER
%token <string> HEADERVALUE
%token <string> BOUNDARY
%token <string> ENDBOUNDARY
%token <string> CONTENTTYPE_VALUE
%token <string> TSPECIAL
%token <string> WORD
%token <position> BODY
%token <position> PREAMBLE
%token <position> POSTAMBLE
%type <string> content_disposition
%type <string> contenttype_parameter_value
%type <string> mimetype
%type <string> body
%start message
%%
/* This is a parser for a MIME-conform message, which is in either single
* part or multi part format.
*/
message :
multipart_message
|
singlepart_message
;
multipart_message:
headers preamble
{
mm_context_attachpart(pstate->ctx, pstate->current_mimepart);
pstate->current_mimepart = mm_mimepart_new();
pstate->have_contenttype = 0;
}
mimeparts endboundary postamble
{
dprintf2(pstate,"This was a multipart message\n");
}
;
singlepart_message:
headers body
{
dprintf2(pstate,"This was a single part message\n");
mm_context_attachpart(pstate->ctx, pstate->current_mimepart);
}
;
headers :
header headers
|
end_headers
{
/* If we did not find a Content-Type header for the current
* MIME part (or envelope), we create one and attach it.
* According to the RFC, a type of "text/plain" and a
* charset of "us-ascii" can be assumed.
*/
struct mm_content *ct;
struct mm_param *param;
if (!pstate->have_contenttype) {
ct = mm_content_new();
mm_content_settype(ct, "text/plain");
param = mm_param_new();
param->name = xstrdup("charset");
param->value = xstrdup("us-ascii");
mm_content_attachtypeparam(ct, param);
mm_mimepart_attachcontenttype(pstate->current_mimepart, ct);
}
pstate->have_contenttype = 0;
}
|
header
;
preamble:
PREAMBLE
{
char *preamble;
size_t offset;
if ($1.start != $1.end) {
preamble = PARSE_readmessagepart(0, $1.start, $1.end,
&offset,yyscanner,pstate);
if (preamble == NULL) {
return(-1);
}
pstate->ctx->preamble = preamble;
dprintf2(pstate,"PREAMBLE:\n%s\n", preamble);
}
}
|
;
postamble:
POSTAMBLE
{
}
|
;
mimeparts:
mimeparts mimepart
|
mimepart
;
mimepart:
boundary headers body
{
if (mm_context_attachpart(pstate->ctx, pstate->current_mimepart) == -1) {
mm_errno = MM_ERROR_ERRNO;
return(-1);
}
pstate->temppart = mm_mimepart_new();
pstate->current_mimepart = pstate->temppart;
pstate->mime_parts++;
}
;
header :
mail_header
|
contenttype_header
{
pstate->have_contenttype = 1;
if (mm_content_iscomposite(pstate->envelope->type)) {
pstate->ctx->messagetype = MM_MSGTYPE_MULTIPART;
} else {
pstate->ctx->messagetype = MM_MSGTYPE_FLAT;
}
}
|
contentdisposition_header
|
contentencoding_header
|
mimeversion_header
|
invalid_header
{
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("invalid header encountered");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
} else {
/* TODO: attach MM_WARNING_INVHDR */
}
}
;
mail_header:
MAIL_HEADER COLON WORD EOL
{
struct mm_mimeheader *hdr;
hdr = mm_mimeheader_generate($1, $3);
mm_mimepart_attachheader(pstate->current_mimepart, hdr);
}
|
MAIL_HEADER COLON EOL
{
struct mm_mimeheader *hdr;
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_MIME;
mm_error_setmsg("invalid header encountered");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
} else {
/* TODO: attach MM_WARNING_INVHDR */
}
hdr = mm_mimeheader_generate($1, xstrdup(""));
mm_mimepart_attachheader(pstate->current_mimepart, hdr);
}
;
contenttype_header:
CONTENTTYPE_HEADER COLON mimetype EOL
{
mm_content_settype(pstate->ctype, "%s", $3);
mm_mimepart_attachcontenttype(pstate->current_mimepart, pstate->ctype);
dprintf2(pstate,"Content-Type -> %s\n", $3);
pstate->ctype = mm_content_new();
}
|
CONTENTTYPE_HEADER COLON mimetype contenttype_parameters EOL
{
mm_content_settype(pstate->ctype, "%s", $3);
mm_mimepart_attachcontenttype(pstate->current_mimepart, pstate->ctype);
dprintf2(pstate,"Content-Type (P) -> %s\n", $3);
pstate->ctype = mm_content_new();
}
;
contentdisposition_header:
CONTENTDISPOSITION_HEADER COLON content_disposition EOL
{
dprintf2(pstate,"Content-Disposition -> %s\n", $3);
pstate->ctype->disposition_type = xstrdup($3);
}
|
CONTENTDISPOSITION_HEADER COLON content_disposition content_disposition_parameters EOL
{
dprintf2(pstate,"Content-Disposition (P) -> %s; params\n", $3);
pstate->ctype->disposition_type = xstrdup($3);
}
;
content_disposition:
WORD
{
/*
* According to RFC 2183, the content disposition value may
* only be "inline", "attachment" or an extension token. We
* catch invalid values here if we are not in loose parsing
* mode.
*/
if (strcasecmp($1, "inline") && strcasecmp($1, "attachment")
&& strncasecmp($1, "X-", 2)) {
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_MIME;
mm_error_setmsg("invalid content-disposition");
return(-1);
}
} else {
/* TODO: attach MM_WARNING_INVHDR */
}
$$ = $1;
}
;
contentencoding_header:
CONTENTENCODING_HEADER COLON WORD EOL
{
dprintf2(pstate,"Content-Transfer-Encoding -> %s\n", $3);
}
;
mimeversion_header:
MIMEVERSION_HEADER COLON WORD EOL
{
dprintf2(pstate,"MIME-Version -> '%s'\n", $3);
}
;
invalid_header:
any EOL
;
any:
any ANY
|
ANY
;
mimetype:
WORD '/' WORD
{
char type[255];
snprintf(type, sizeof(type), "%s/%s", $1, $3);
$$ = type;
}
;
contenttype_parameters:
SEMICOLON contenttype_parameter contenttype_parameters
|
SEMICOLON contenttype_parameter
|
SEMICOLON
{
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_MIME;
mm_error_setmsg("invalid Content-Type header");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
} else {
/* TODO: attach MM_WARNING_INVHDR */
}
}
;
content_disposition_parameters:
SEMICOLON content_disposition_parameter content_disposition_parameters
|
SEMICOLON content_disposition_parameter
|
SEMICOLON
{
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_MIME;
mm_error_setmsg("invalid Content-Disposition header");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
} else {
/* TODO: attach MM_WARNING_INVHDR */
}
}
;
contenttype_parameter:
WORD EQUAL contenttype_parameter_value
{
struct mm_param *param;
param = mm_param_new();
dprintf2(pstate,"Param: '%s', Value: '%s'\n", $1, $3);
/* Catch an eventual boundary identifier */
if (!strcasecmp($1, "boundary")) {
if (pstate->lstate.boundary_string == NULL) {
set_boundary($3,pstate);
} else {
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_MIME;
mm_error_setmsg("duplicate boundary "
"found");
return -1;
} else {
/* TODO: attach MM_WARNING_DUPPARAM */
}
}
}
param->name = xstrdup($1);
param->value = xstrdup($3);
mm_content_attachtypeparam(pstate->ctype, param);
}
;
content_disposition_parameter:
WORD EQUAL contenttype_parameter_value
{
struct mm_param *param;
param = mm_param_new();
param->name = xstrdup($1);
param->value = xstrdup($3);
mm_content_attachdispositionparam(pstate->ctype, param);
}
;
contenttype_parameter_value:
WORD
{
dprintf2(pstate,"contenttype_param_val: WORD=%s\n", $1);
$$ = $1;
}
|
TSPECIAL
{
dprintf2(pstate,"contenttype_param_val: TSPECIAL\n");
/* For broken MIME implementation */
if (pstate->parsemode != MM_PARSE_LOOSE) {
mm_errno = MM_ERROR_MIME;
mm_error_setmsg("tspecial without quotes");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
} else {
/* TODO: attach MM_WARNING_INVAL */
}
$$ = $1;
}
|
'"' TSPECIAL '"'
{
dprintf2(pstate,"contenttype_param_val: \"TSPECIAL\"\n" );
$$ = $2;
}
;
end_headers :
ENDOFHEADERS
{
dprintf2(pstate,"End of headers at line %d\n", pstate->lstate.lineno);
}
;
boundary :
BOUNDARY EOL
{
if (pstate->lstate.boundary_string == NULL) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("internal incosistency");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
}
if (strcmp(pstate->lstate.boundary_string, $1)) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("invalid boundary: '%s' (%d)", $1, strlen($1));
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
}
dprintf2(pstate,"New MIME part... (%s)\n", $1);
}
;
endboundary :
ENDBOUNDARY
{
if (pstate->lstate.endboundary_string == NULL) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("internal incosistency");
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
}
if (strcmp(pstate->lstate.endboundary_string, $1)) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("invalid end boundary: %s", $1);
mm_error_setlineno(pstate->lstate.lineno);
return(-1);
}
dprintf2(pstate,"End of MIME message\n");
}
;
body:
BODY
{
char *body;
size_t offset;
dprintf2(pstate,"BODY (%d/%d), SIZE %d\n", $1.start, $1.end, $1.end - $1.start);
body = PARSE_readmessagepart($1.opaque_start, $1.start, $1.end,
&offset,yyscanner,pstate);
if (body == NULL) {
return(-1);
}
pstate->current_mimepart->opaque_body = body;
pstate->current_mimepart->body = body + offset;
pstate->current_mimepart->opaque_length = $1.end - $1.start - 2 + offset;
pstate->current_mimepart->length = pstate->current_mimepart->opaque_length - offset;
}
;
%%
/*
* This function gets the specified part from the currently parsed message.
*/
static char *
PARSE_readmessagepart(size_t opaque_start, size_t real_start, size_t end,
size_t *offset, yyscan_t yyscanner, struct parser_state *pstate)
{
size_t body_size;
size_t current;
size_t start;
char *body;
/* calculate start and offset markers for the opaque and
* header stripped body message.
*/
if (opaque_start > 0) {
/* Multipart message */
if (real_start) {
if (real_start < opaque_start) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("internal incosistency (S:%d/O:%d)",
real_start,
opaque_start);
return(NULL);
}
start = opaque_start;
*offset = real_start - start;
/* Flat message */
} else {
start = opaque_start;
*offset = 0;
}
} else {
start = real_start;
*offset = 0;
}
/* The next three cases should NOT happen anytime */
if (end <= start) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("internal incosistency,2");
mm_error_setlineno(pstate->lstate.lineno);
return(NULL);
}
if (start < *offset) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("internal incosistency, S:%d,O:%d,L:%d", start, offset, pstate->lstate.lineno);
mm_error_setlineno(pstate->lstate.lineno);
return(NULL);
}
if (start < 0 || end < 0) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("internal incosistency,4");
mm_error_setlineno(pstate->lstate.lineno);
return(NULL);
}
/* XXX: do we want to enforce a maximum body size? make it a
* parser option? */
/* Read in the body message */
body_size = end - start;
if (body_size < 1) {
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("size of body cannot be < 1");
mm_error_setlineno(pstate->lstate.lineno);
return(NULL);
}
body = (char *)malloc(body_size + 1);
if (body == NULL) {
mm_errno = MM_ERROR_ERRNO;
return(NULL);
}
/* Get the message body either from a stream or a memory
* buffer.
*/
if (mimeparser_yyget_in(yyscanner) != NULL) {
FILE *x = mimeparser_yyget_in(yyscanner);
current = ftell(x);
fseek(x, start - 1, SEEK_SET);
fread(body, body_size - 1, 1, x);
fseek(x, current, SEEK_SET);
} else if (pstate->lstate.message_buffer != NULL) {
strlcpy(body, pstate->lstate.message_buffer + start - 1, body_size);
}
return(body);
}
int
yyerror(struct parser_state *pstate, void *yyscanner, const char *str)
{
mm_errno = MM_ERROR_PARSE;
mm_error_setmsg("%s", str);
mm_error_setlineno(pstate->lstate.lineno);
return -1;
}
int
mimeparser_yywrap(void)
{
return 1;
}
/**
* Sets the boundary value for the current message
*/
int
set_boundary(char *str, struct parser_state *pstate)
{
size_t blen;
blen = strlen(str);
pstate->lstate.boundary_string = (char *)malloc(blen + 3);
pstate->lstate.endboundary_string = (char *)malloc(blen + 5);
if (pstate->lstate.boundary_string == NULL || pstate->lstate.endboundary_string == NULL) {
if (pstate->lstate.boundary_string != NULL) {
free(pstate->lstate.boundary_string);
}
if (pstate->lstate.endboundary_string != NULL) {
free(pstate->lstate.endboundary_string);
}
return -1;
}
pstate->ctx->boundary = xstrdup(str);
snprintf(pstate->lstate.boundary_string, blen + 3, "--%s", str);
snprintf(pstate->lstate.endboundary_string, blen + 5, "--%s--", str);
return 0;
}
/**
* Debug printf()
*/
int
dprintf2(struct parser_state *pstate, const char *fmt, ...)
{
va_list ap;
char *msg;
if (pstate->debug == 0) return 1;
va_start(ap, fmt);
vasprintf(&msg, fmt, ap);
va_end(ap);
fprintf(stderr, "%s", msg);
free(msg);
return 0;
}
void reset_environ(struct parser_state *pstate)
{
pstate->lstate.lineno = 0;
pstate->lstate.boundary_string = NULL;
pstate->lstate.endboundary_string = NULL;
pstate->lstate.message_buffer = NULL;
pstate->mime_parts = 0;
pstate->debug = 0;
pstate->envelope = NULL;
pstate->temppart = NULL;
pstate->ctype = NULL;
pstate->current_mimepart = NULL;
pstate->have_contenttype = 0;
}
/**
* Initializes the parser engine.
*/
int
PARSER_initialize(struct parser_state *pstate, void *yyscanner)
{
void reset_lexer_state(void *yyscanner, struct parser_state *);
#if 0
if (pstate->ctx != NULL) {
xfree(pstate->ctx);
pstate->ctx = NULL;
}
if (pstate->envelope != NULL) {
xfree(pstate->envelope);
pstate->envelope = NULL;
}
if (pstate->ctype != NULL) {
xfree(pstate->ctype);
pstate->ctype = NULL;
}
#endif
/* yydebug = 1; */
reset_environ(pstate);
reset_lexer_state(yyscanner,pstate);
pstate->envelope = mm_mimepart_new();
pstate->current_mimepart = pstate->envelope;
pstate->ctype = mm_content_new();
pstate->have_contenttype = 0;
return 1;
}