"SfR Fresh" - the SfR Freeware/Shareware Archive 
Member "odt2txt-0.4/regex.c" of archive odt2txt-0.4.tar.gz:
As a special service "SfR Fresh" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting with prefixed line numbers.
Alternatively you can here view or download the uninterpreted source code file.
That can be also achieved for any archive member file by clicking within an archive contents listing on the first character of the file(path) respectively on the according byte size field.
1 /*
2 * regex.c: String and regex operations for odt2txt
3 *
4 * Copyright (c) 2006-2008 Dennis Stosberg <dennis@stosberg.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License,
8 * version 2 as published by the Free Software Foundation
9 */
10
11 #include "mem.h"
12 #include "regex.h"
13
14 #define BUF_SZ 4096
15
16 static char *headline(char line, const char *buf, regmatch_t matches[],
17 size_t nmatch, size_t off);
18 static size_t charlen_utf8(const char *s);
19
20 static void print_regexp_err(int reg_errno, const regex_t *rx)
21 {
22 char *buf = ymalloc(BUF_SZ);
23
24 regerror(reg_errno, rx, buf, BUF_SZ);
25 fprintf(stderr, "%s\n", buf);
26
27 yfree(buf);
28 }
29
30 int regex_subst(STRBUF *buf,
31 const char *regex, int regopt,
32 const void *subst)
33 {
34 int r;
35 const char *bufp;
36 size_t off = 0;
37 const int i = 0;
38 int match_count = 0;
39
40 regex_t rx;
41 const size_t nmatches = 10;
42 regmatch_t matches[10];
43
44 r = regcomp(&rx, regex, REG_EXTENDED);
45 if (r) {
46 print_regexp_err(r, &rx);
47 exit(EXIT_FAILURE);
48 }
49
50 do {
51 if (off > strbuf_len(buf))
52 break;
53
54 bufp = strbuf_get(buf) + off;
55
56 if (0 != regexec(&rx, bufp, nmatches, matches, 0))
57 break;
58
59 if (matches[i].rm_so != -1) {
60 char *s;
61 int subst_len;
62
63 if (regopt & _REG_EXEC) {
64 s = (*(char *(*)
65 (const char *buf, regmatch_t matches[],
66 size_t nmatch, size_t off))subst)
67 (strbuf_get(buf), matches, nmatches, off);
68 } else
69 s = (char*)subst;
70
71 subst_len = strbuf_subst(buf,
72 matches[i].rm_so + off,
73 matches[i].rm_eo + off,
74 s);
75 match_count++;
76
77 if (regopt & _REG_EXEC)
78 yfree(s);
79
80 off += matches[i].rm_so;
81 if (subst_len >= 0)
82 off += subst_len + 1;
83 }
84 } while (regopt & _REG_GLOBAL);
85
86 regfree(&rx);
87 return match_count;
88 }
89
90 int regex_rm(STRBUF *buf,
91 const char *regex, int regopt)
92 {
93 return regex_subst(buf, regex, regopt, "");
94 }
95
96 char *underline(char linechar, const char *str)
97 {
98 size_t i;
99 char *tmp;
100 STRBUF *line;
101 size_t charlen = charlen_utf8(str);
102
103 if (str[0] == '\0') {
104 tmp = ymalloc(1);
105 tmp[0] = '\0';
106 return tmp;
107 }
108
109 line = strbuf_new();
110 strbuf_append(line, str);
111 strbuf_append(line, "\n");
112
113 tmp = ymalloc(charlen);
114 for (i = 0; i < charlen; i++) {
115 tmp[i] = linechar;
116 }
117 strbuf_append_n(line, tmp, charlen);
118 yfree(tmp);
119
120 strbuf_append(line, "\n\n");
121 return strbuf_spit(line);
122 }
123
124 static char *headline(char line, const char *buf, regmatch_t matches[],
125 size_t nmatch, size_t off)
126 {
127 const int i = 1;
128 char *result;
129 size_t len;
130 char *match;
131
132 len = matches[i].rm_eo - matches[i].rm_so;
133 match = ymalloc(len + 1);
134
135 memcpy(match, buf + matches[i].rm_so + off, len);
136 match[len] = '\0' ;
137
138 result = underline(line, match);
139
140 yfree(match);
141 return result;
142 }
143
144 char *h1(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
145 {
146 return headline('=', buf, matches, nmatch, off);
147 }
148
149 char *h2(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
150 {
151 return headline('-', buf, matches, nmatch, off);
152 }
153
154 char *image(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
155 {
156 const int i = 1;
157 const char *prefix = "[-- Image: ";
158 const char *postfix = " --]";
159 size_t pr_len, po_len, len;
160 char *match;
161
162 pr_len = strlen(prefix);
163 len = matches[i].rm_eo - matches[i].rm_so;
164 po_len = strlen(prefix);
165
166 match = ymalloc(pr_len + len + po_len + 1);
167 memcpy(match, prefix, pr_len);
168 memcpy(match + pr_len, buf + matches[i].rm_so + off, len);
169 memcpy(match + pr_len + len, postfix, po_len);
170 match[pr_len + len + po_len] = '\0' ;
171
172 return match;
173 }
174
175 static size_t charlen_utf8(const char *s)
176 {
177 size_t count = 0;
178 unsigned char *t = (unsigned char*) s;
179 while (*t != '\0') {
180 if (*t > 0x80)
181 t += utf8_length[*t - 0x80];
182 count++;
183 t++;
184 }
185 return count;
186 }
187
188 STRBUF *wrap(STRBUF *buf, int width)
189 {
190 const char *lf = "\n ";
191 const size_t lflen = strlen(lf);
192 const char *bufp;
193 const char *last;
194 const char *lastspace = 0;
195 size_t linelen = 0;
196 STRBUF *out = strbuf_new();
197
198 bufp = strbuf_get(buf);
199 last = bufp;
200
201 if (width == -1) {
202 strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf));
203 return out;
204 }
205
206 strbuf_append_n(out, lf, lflen);
207 while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) {
208 if (*bufp == ' ')
209 lastspace = bufp;
210 else if (*bufp == '\n') {
211 strbuf_append_n(out, last, (size_t)(bufp - last));
212 do {
213 strbuf_append_n(out, lf, lflen);
214 } while (*++bufp == '\n');
215 lastspace = NULL;
216
217 while(*bufp == ' ') {
218 bufp++;
219 }
220 last = bufp;
221 linelen = 0;
222 }
223
224 if (NULL != lastspace && (int)linelen > width) {
225 strbuf_append_n(out, last, (size_t)(lastspace - last));
226 strbuf_append_n(out, lf, lflen);
227 last = lastspace;
228 lastspace = NULL;
229 linelen = (size_t)(bufp - last);
230
231 while(*last == ' ') {
232 last++;
233 }
234 if(last > bufp)
235 bufp = last;
236 }
237
238 bufp++;
239 linelen++;
240 if ((unsigned char)*bufp > 0x80)
241 bufp += utf8_length[(unsigned char)*bufp - 0x80];
242 }
243 strbuf_append_n(out, "\n", 1);
244 return out;
245 }
246