"SfR Fresh" - the SfR Freeware/Shareware Archive

Member "odt2txt-0.4/regex.c" of archive odt2txt-0.4.tar.gz:


As a special service "SfR Fresh" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. That can be also achieved for any archive member file by clicking within an archive contents listing on the first character of the file(path) respectively on the according byte size field.
    1 /*
    2  * regex.c: String and regex operations for odt2txt
    3  *
    4  * Copyright (c) 2006-2008 Dennis Stosberg <dennis@stosberg.net>
    5  *
    6  * This program is free software; you can redistribute it and/or
    7  * modify it under the terms of the GNU General Public License,
    8  * version 2 as published by the Free Software Foundation
    9  */
   10 
   11 #include "mem.h"
   12 #include "regex.h"
   13 
   14 #define BUF_SZ 4096
   15 
   16 static char *headline(char line, const char *buf, regmatch_t matches[],
   17 		      size_t nmatch, size_t off);
   18 static size_t charlen_utf8(const char *s);
   19 
   20 static void print_regexp_err(int reg_errno, const regex_t *rx)
   21 {
   22 	char *buf = ymalloc(BUF_SZ);
   23 
   24 	regerror(reg_errno, rx, buf, BUF_SZ);
   25 	fprintf(stderr, "%s\n", buf);
   26 
   27 	yfree(buf);
   28 }
   29 
   30 int regex_subst(STRBUF *buf,
   31 		const char *regex, int regopt,
   32 		const void *subst)
   33 {
   34 	int r;
   35 	const char *bufp;
   36 	size_t off = 0;
   37 	const int i = 0;
   38 	int match_count = 0;
   39 
   40 	regex_t rx;
   41 	const size_t nmatches = 10;
   42 	regmatch_t matches[10];
   43 
   44 	r = regcomp(&rx, regex, REG_EXTENDED);
   45 	if (r) {
   46 		print_regexp_err(r, &rx);
   47 		exit(EXIT_FAILURE);
   48 	}
   49 
   50 	do {
   51 		if (off > strbuf_len(buf))
   52 			break;
   53 
   54 		bufp = strbuf_get(buf) + off;
   55 
   56 		if (0 != regexec(&rx, bufp, nmatches, matches, 0))
   57 			break;
   58 
   59 		if (matches[i].rm_so != -1) {
   60 			char *s;
   61 			int subst_len;
   62 
   63 			if (regopt & _REG_EXEC) {
   64 				s = (*(char *(*)
   65 				       (const char *buf, regmatch_t matches[],
   66 					size_t nmatch, size_t off))subst)
   67 					(strbuf_get(buf), matches, nmatches, off);
   68 			} else
   69 				s = (char*)subst;
   70 
   71 			subst_len = strbuf_subst(buf,
   72 						 matches[i].rm_so + off,
   73 						 matches[i].rm_eo + off,
   74 						 s);
   75 			match_count++;
   76 
   77 			if (regopt & _REG_EXEC)
   78 				yfree(s);
   79 
   80 			off += matches[i].rm_so;
   81 			if (subst_len >= 0)
   82 				off += subst_len + 1;
   83 		}
   84 	} while (regopt & _REG_GLOBAL);
   85 
   86 	regfree(&rx);
   87 	return match_count;
   88 }
   89 
   90 int regex_rm(STRBUF *buf,
   91 	     const char *regex, int regopt)
   92 {
   93 	return regex_subst(buf, regex, regopt, "");
   94 }
   95 
   96 char *underline(char linechar, const char *str)
   97 {
   98 	size_t i;
   99 	char *tmp;
  100 	STRBUF *line;
  101 	size_t charlen = charlen_utf8(str);
  102 
  103 	if (str[0] == '\0') {
  104 		tmp = ymalloc(1);
  105 		tmp[0] = '\0';
  106 		return tmp;
  107 	}
  108 
  109 	line = strbuf_new();
  110 	strbuf_append(line, str);
  111 	strbuf_append(line, "\n");
  112 
  113 	tmp = ymalloc(charlen);
  114 	for (i = 0; i < charlen; i++) {
  115 		tmp[i] = linechar;
  116 	}
  117 	strbuf_append_n(line, tmp, charlen);
  118 	yfree(tmp);
  119 
  120 	strbuf_append(line, "\n\n");
  121 	return strbuf_spit(line);
  122 }
  123 
  124 static char *headline(char line, const char *buf, regmatch_t matches[],
  125 		      size_t nmatch, size_t off)
  126 {
  127 	const int i = 1;
  128 	char *result;
  129 	size_t len;
  130 	char *match;
  131 
  132 	len = matches[i].rm_eo - matches[i].rm_so;
  133 	match = ymalloc(len + 1);
  134 
  135 	memcpy(match, buf + matches[i].rm_so + off, len);
  136 	match[len] = '\0' ;
  137 
  138 	result = underline(line, match);
  139 
  140 	yfree(match);
  141 	return result;
  142 }
  143 
  144 char *h1(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
  145 {
  146 	return headline('=', buf, matches, nmatch, off);
  147 }
  148 
  149 char *h2(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
  150 {
  151 	return headline('-', buf, matches, nmatch, off);
  152 }
  153 
  154 char *image(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
  155 {
  156 	const int i = 1;
  157 	const char *prefix = "[-- Image: ";
  158 	const char *postfix = " --]";
  159 	size_t pr_len, po_len, len;
  160 	char *match;
  161 
  162 	pr_len = strlen(prefix);
  163 	len = matches[i].rm_eo - matches[i].rm_so;
  164 	po_len = strlen(prefix);
  165 
  166 	match = ymalloc(pr_len + len + po_len + 1);
  167 	memcpy(match, prefix, pr_len);
  168 	memcpy(match + pr_len, buf + matches[i].rm_so + off, len);
  169 	memcpy(match + pr_len + len, postfix, po_len);
  170 	match[pr_len + len + po_len] = '\0' ;
  171 
  172 	return match;
  173 }
  174 
  175 static size_t charlen_utf8(const char *s)
  176 {
  177 	size_t count = 0;
  178 	unsigned char *t = (unsigned char*) s;
  179 	while (*t != '\0') {
  180 		if (*t > 0x80)
  181 			t += utf8_length[*t - 0x80];
  182 		count++;
  183 		t++;
  184 	}
  185 	return count;
  186 }
  187 
  188 STRBUF *wrap(STRBUF *buf, int width)
  189 {
  190 	const char *lf = "\n  ";
  191 	const size_t lflen = strlen(lf);
  192 	const char *bufp;
  193 	const char *last;
  194 	const char *lastspace = 0;
  195 	size_t linelen = 0;
  196 	STRBUF *out = strbuf_new();
  197 
  198 	bufp = strbuf_get(buf);
  199 	last = bufp;
  200 
  201 	if (width == -1) {
  202 		strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf));
  203 		return out;
  204 	}
  205 
  206 	strbuf_append_n(out, lf, lflen);
  207 	while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) {
  208 		if (*bufp == ' ')
  209 			lastspace = bufp;
  210 		else if (*bufp == '\n') {
  211 			strbuf_append_n(out, last, (size_t)(bufp - last));
  212 			do {
  213 				strbuf_append_n(out, lf, lflen);
  214 			} while (*++bufp == '\n');
  215 			lastspace = NULL;
  216 
  217 			while(*bufp == ' ') {
  218 				bufp++;
  219 			}
  220 			last = bufp;
  221 			linelen = 0;
  222 		}
  223 
  224 		if (NULL != lastspace && (int)linelen > width) {
  225 			strbuf_append_n(out, last, (size_t)(lastspace - last));
  226 			strbuf_append_n(out, lf, lflen);
  227 			last = lastspace;
  228 			lastspace = NULL;
  229 			linelen = (size_t)(bufp - last);
  230 
  231 			while(*last == ' ') {
  232 				last++;
  233 			}
  234 			if(last > bufp)
  235 				bufp = last;
  236 		}
  237 
  238 		bufp++;
  239 		linelen++;
  240 		if ((unsigned char)*bufp > 0x80)
  241 			bufp += utf8_length[(unsigned char)*bufp - 0x80];
  242 	}
  243 	strbuf_append_n(out, "\n", 1);
  244 	return out;
  245 }
  246