commit 127d84b43cb25c22c94da5015d7108122accd85d Author: Harald Welte Date: Sat Mar 26 22:42:09 2011 +0100 Initial check-in of ETSI MAP specification ASN.1 extractor This program can extract the raw ASN.1 source from the MS Word for DOS file of the MAP ASN.1 spec, such as 380-6.DOC which is part of http://ftp.3gpp.org/specs/archive/09_series/09.02/0902-380.zip diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..070f053 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +CFLAGS = -O0 -g + +map_asn1_extract: main.o word_util.o word_file_fmt.o + $(CC) $(LDFLAGS) -o $@ $^ + +clean: + @rm -f *.o extract diff --git a/main.c b/main.c new file mode 100644 index 0000000..357e86e --- /dev/null +++ b/main.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include + +#include "word_util.h" + +static int handle_par_fmt_desc(struct word_handle *wh, struct word_par_fmt *pfmt, + uint32_t start_offs, uint32_t next_offs) +{ + fprintf(stderr, "Paragraph format (0x%08x-0x%08x):\n", start_offs, next_offs); + fprintf(stderr, "\tFormat length: %u\n", pfmt->length); + fprintf(stderr, "\tFormat code: 0x%02x\n", pfmt->fmt_code); + fprintf(stderr, "\tAlignment: %d\n", pfmt->par_align); + fprintf(stderr, "\tStd Par Fmt: 0x%02x\n", pfmt->std_par_fmt); + + if (pfmt->fmt_code == 0x4c && pfmt->std_par_fmt == 0x26) { + char *tmp = strndup(wh->base_addr + start_offs, next_offs-start_offs); + if (tmp) { + fprintf(stdout, tmp); + //fprintf(stderr, tmp); + free(tmp); + } + } + + return 0; +} + +static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr) +{ + uint8_t *block_base = ((uint8_t *)wh->base_addr) + word_bptr2offset(block_nr); + uint32_t offset = *((uint32_t *)block_base); + uint32_t offset_next = *(uint32_t *)(block_base+WORD_BLOCK_SIZE); + uint32_t num_fmts = *(block_base + 0x7f); + struct word_fmt_entry *fmt_tbl = block_base + 4; + struct word_par_fmt *pfmt; + uint32_t last_fmt_start = offset; + int i; + + fprintf(stderr, "Format block %u\n", block_nr); + fprintf(stderr, "Offset of first Paragraph: %u (0x%x)\n", offset, offset); + fprintf(stderr, "Number of format table entries: %u\n", num_fmts); + + for (i = 0; i < num_fmts; i++) { + if (i == num_fmts -1) { + /* in the last entry, check if there is another block */ + if (fmt_tbl[i].ptr_text == offset_next) { + handle_fmt_block(wh, block_nr+1); + continue; + } + } + fprintf(stderr, "Format tbl entry Text Ptr: %u (0x%x), Fmt: %u\n", + fmt_tbl[i].ptr_text, fmt_tbl[i].ptr_text, fmt_tbl[i].offset_fmt); + if (fmt_tbl[i].offset_fmt != 0xffff) { + pfmt = block_base + 4 + fmt_tbl[i].offset_fmt; + handle_par_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text); + } + last_fmt_start = fmt_tbl[i].ptr_text; + } +} + +static void process(struct word_handle *wh) +{ + struct word_file_hdr *wfh = wh->base_addr; + + fprintf(stderr, "Word file size: %u\n", wh->file_size); + fprintf(stderr, "Paragraph fmt Block PTR: %u, offset = %u\n", wfh->bptr_fmt_para, + word_bptr2offset(wfh->bptr_fmt_para)); + + handle_fmt_block(wh, wfh->bptr_fmt_para); +} + +int main(int argc, char **argv) +{ + struct word_handle *wh; + + wh = word_file_open(argv[1]); + if (!wh) + exit(1); + + process(wh); +} diff --git a/word_file_fmt.c b/word_file_fmt.c new file mode 100644 index 0000000..4c7d4ac --- /dev/null +++ b/word_file_fmt.c @@ -0,0 +1,4 @@ +#include + +const uint8_t word_file_magic[] = { 0x31, 0xBE, 0x00, 0x00 }; + diff --git a/word_file_fmt.h b/word_file_fmt.h new file mode 100644 index 0000000..61e13fa --- /dev/null +++ b/word_file_fmt.h @@ -0,0 +1,78 @@ +#ifndef _WORD_FILE_FMT_H +#define _WORD_FILE_FMT_H + +#include + +/* Header file defining common data structures of MS Word for DOS file format + * (C) 2011 by Harald Welte + * + * largely based on http://www.msxnet.org/word2rtf/formats/ffh-dosword5 + * Chapter 16 of Dr. Dobbs File Format Handbook */ + +struct word_file_hdr { + uint8_t magic[4]; + uint8_t reserved[8]; + uint16_t reserved2; + uint32_t ptr_end_of_text; + uint16_t bptr_fmt_para; + uint16_t bptr_footnote; + uint16_t bptr_fmt_sect; + uint16_t bptr_nation; + uint16_t bptr_page_breaks; + uint16_t bptr_file_mgr_info; + uint8_t print_format[66]; + uint16_t win_write_flag; + uint8_t printer_drvr[8]; + uint16_t num_blks_used; + uint16_t corrected_text; + uint8_t reserved3[18]; +} __attribute__ ((packed)); + +struct word_fmt_entry { + uint32_t ptr_text; /* pointer to first character in different fmt */ + uint16_t offset_fmt; /* pointer to format definition */ +} __attribute__ ((packed)); + +struct word_char_fmt { + uint8_t length; + uint8_t coding_print_tmpl; + uint8_t fmt_code; /* format code */ + uint8_t font_size; /* 1/2 point */ + uint8_t char_attr; /* character attributes */ + uint8_t reserved; + uint8_t char_pos; /* superscript, subscript, ... */ +} __attribute__ ((packed)); + +struct word_par_fmt { + uint8_t length; + uint8_t fmt_code; + uint8_t par_align:2, + par_same_page:1, + par_next_same_page:1, + par_two_columns:1, + par_reserved:3; + uint8_t std_par_fmt; + uint8_t heading_lvl; + uint16_t indent_right; + uint16_t indent_left; + uint16_t indent_left_first; + uint16_t line_space; + uint16_t heading_space; + uint16_t end_space; + uint8_t frame_lines:2, + frame_type:2, + hdr_ftr_1st_page:1, + hdr_ftr_even_page:1, + hdr_ftr_odd_page:1, + footer:1, + header:1; + uint32_t line_position; + uint32_t reserved; + uint8_t tab_descr[80]; +} __attribute__ ((packed)); + +extern const uint8_t word_file_magic[]; + +#define WORD_BLOCK_SIZE 0x80 + +#endif /* _WORD_FILE_FMT_H */ diff --git a/word_util.c b/word_util.c new file mode 100644 index 0000000..f1e18e5 --- /dev/null +++ b/word_util.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "word_file_fmt.h" +#include "word_util.h" + +struct word_handle *word_file_open(const char *fname) +{ + int fd; + struct stat st; + struct word_handle *wh = calloc(1, sizeof(*wh)); + + fd = open(fname, O_RDONLY); + if (fd < 0) { + free(wh); + return NULL; + } + wh->fd = fd; + + fstat(fd, &st); + wh->file_size = st.st_size; + + wh->base_addr = mmap(NULL, wh->file_size, PROT_READ, MAP_SHARED, wh->fd, 0); + if (!wh->base_addr) { + close(wh->fd); + free(wh); + return NULL; + } + + return wh; +} diff --git a/word_util.h b/word_util.h new file mode 100644 index 0000000..1158d61 --- /dev/null +++ b/word_util.h @@ -0,0 +1,21 @@ +#ifndef _WORD_UTIL_H +#define _WORD_UTIL_H + +#include + +#include "word_file_fmt.h" + +struct word_handle { + int fd; + uint8_t *base_addr; + uint32_t file_size; +}; + +static inline uint32_t word_bptr2offset(uint16_t bptr) +{ + return (bptr * WORD_BLOCK_SIZE); +} + +struct word_handle *word_file_open(const char *fname); + +#endif