Kannel: Open Source WAP and SMS gateway  svn-r5335
gw-regex.c
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2018 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * regex.c - POSIX regular expressions (REs)
59  *
60  * This modules implements wrapper functions to regcomp(3), regexec(3),
61  * et all functions from the POSIX compliance standard. Additinally
62  * it provides subexpression substitution routines in order to easily
63  * substitute strings arround regular expressions.
64  *
65  * See regex(3) man page for more details on POSIX regular expressions.
66  *
67  * Stipe Tolj <stolj@wapme.de>
68  */
69 
70 #include <ctype.h>
71 
72 #include "gwlib/gwlib.h"
73 #include "gw-regex.h"
74 
75 /*
76  * We allow to substitute the POSIX compliant regex routines via PCRE
77  * provided routines if no system own regex implementation is available.
78  */
79 #if defined(HAVE_REGEX) || defined(HAVE_PCRE)
80 
81 
82 /********************************************************************
83  * Generic regular expression functions.
84  */
85 
86 void gw_regex_destroy(regex_t *preg)
87 {
88  if (preg == NULL)
89  return;
90 
91  regfree(preg);
92  gw_free(preg);
93 }
94 
95 
96 regex_t *gw_regex_comp_real(const Octstr *pattern, int cflags, const char *file,
97  long line, const char *func)
98 {
99  int rc;
100  regex_t *preg;
101 
102  preg = gw_malloc(sizeof(regex_t));
103 
104  if ((rc = regcomp(preg, pattern ? octstr_get_cstr(pattern) : NULL, cflags)) != 0) {
105  char buffer[512];
106  regerror(rc, preg, buffer, sizeof(buffer));
107  error(0, "%s:%ld: %s: regex compilation `%s' failed: %s (Called from %s:%ld:%s.)",
108  __FILE__, (long) __LINE__, __func__, octstr_get_cstr(pattern), buffer,
109  (file), (long) (line), (func));
110  gw_free(preg);
111  return NULL;
112  }
113 
114  return preg;
115 }
116 
117 
118 int gw_regex_exec_real(const regex_t *preg, const Octstr *string, size_t nmatch,
119  regmatch_t pmatch[], int eflags, const char *file, long line,
120  const char *func)
121 {
122  int rc;
123 
124  gw_assert(preg != NULL);
125 
126  rc = regexec(preg, string ? octstr_get_cstr(string) : NULL, nmatch, pmatch, eflags);
127  if (rc != REG_NOMATCH && rc != 0) {
128  char buffer[512];
129  regerror(rc, preg, buffer, sizeof(buffer));
130  error(0, "%s:%ld: %s: regex execution on `%s' failed: %s (Called from %s:%ld:%s.)",
131  __FILE__, (long) __LINE__, __func__, octstr_get_cstr(string), buffer,
132  (file), (long) (line), (func));
133  }
134 
135  return rc;
136 }
137 
138 
139 Octstr *gw_regex_error(int errcode, const regex_t *preg)
140 {
141  char errbuf[512];
142  Octstr *os;
143 
144  regerror(errcode, preg, errbuf, sizeof(errbuf));
145  os = octstr_create(errbuf);
146 
147  return os;
148 }
149 
150 
151 /* Duplicate a string. */
152 static char *pstrdup(const char *s)
153 {
154  char *res;
155  size_t len;
156 
157  if (s == NULL)
158  return NULL;
159  len = strlen(s) + 1;
160  res = gw_malloc(len);
161  memcpy(res, s, len);
162  return res;
163 }
164 
165 
166 /* This function substitutes for $0-$9, filling in regular expression
167  * submatches. Pass it the same nmatch and pmatch arguments that you
168  * passed gw_regexec(). pmatch should not be greater than the maximum number
169  * of subexpressions - i.e. one more than the re_nsub member of regex_t.
170  *
171  * input should be the string with the $-expressions, source should be the
172  * string that was matched against.
173  *
174  * It returns the substituted string, or NULL on error.
175  * BEWARE: Caller must free allocated memory of the result.
176  *
177  * Parts of this code are based on Henry Spencer's regsub(), from his
178  * AT&T V8 regexp package. Function borrowed from apache-1.3/src/main/util.c
179  */
180 char *gw_regex_sub(const char *input, const char *source,
181  size_t nmatch, regmatch_t pmatch[])
182 {
183  const char *src = input;
184  char *dest, *dst;
185  char c;
186  size_t no;
187  int len;
188 
189  if (!source)
190  return NULL;
191  if (!nmatch)
192  return pstrdup(src);
193 
194  /* First pass, find the size */
195  len = 0;
196  while ((c = *src++) != '\0') {
197  if (c == '&')
198  no = 0;
199  else if (c == '$' && isdigit(*src))
200  no = *src++ - '0';
201  else
202  no = 10;
203 
204  if (no > 9) { /* Ordinary character. */
205  if (c == '\\' && (*src == '$' || *src == '&'))
206  c = *src++;
207  len++;
208  }
209  else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
210  len += pmatch[no].rm_eo - pmatch[no].rm_so;
211  }
212  }
213 
214  dest = dst = gw_malloc(len + 1);
215 
216  /* Now actually fill in the string */
217  src = input;
218  while ((c = *src++) != '\0') {
219  if (c == '&')
220  no = 0;
221  else if (c == '$' && isdigit(*src))
222  no = *src++ - '0';
223  else
224  no = 10;
225 
226  if (no > 9) { /* Ordinary character. */
227  if (c == '\\' && (*src == '$' || *src == '&'))
228  c = *src++;
229  *dst++ = c;
230  }
231  else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
232  len = pmatch[no].rm_eo - pmatch[no].rm_so;
233  memcpy(dst, source + pmatch[no].rm_so, len);
234  dst += len;
235  }
236  }
237  *dst = '\0';
238 
239  return dest;
240 }
241 
242 
243 /********************************************************************
244  * Matching and substitution wrapper functions.
245  *
246  * Beware that the regex compilation takes the most significant CPU time,
247  * so always try to have pre-compiled regular expressions that keep being
248  * reused and re-matched on variable string patterns.
249  */
250 
251 int gw_regex_match_real(const Octstr *re, const Octstr *os, const char *file,
252  long line, const char *func)
253 {
254  regex_t *regexp;
255  int rc;
256 
257  /* compile */
258  regexp = gw_regex_comp_real(re, REG_EXTENDED|REG_ICASE, file, line, func);
259  if (regexp == NULL)
260  return 0;
261 
262  /* execute and match */
263  rc = gw_regex_exec_real(regexp, os, 0, NULL, 0, file, line, func);
264 
265  gw_regex_destroy(regexp);
266 
267  return (rc == 0) ? 1 : 0;
268 }
269 
270 
271 int gw_regex_match_pre_real(const regex_t *preg, const Octstr *os, const char *file,
272  long line, const char *func)
273 {
274  int rc;
275 
276  gw_assert(preg != NULL);
277 
278  /* execute and match */
279  rc = gw_regex_exec_real(preg, os, 0, NULL, 0, file, line, func);
280 
281  return (rc == 0) ? 1 : 0;
282 }
283 
284 
285 Octstr *gw_regex_subst_real(const Octstr *re, const Octstr *os, const Octstr *rule,
286  const char *file, long line, const char *func)
287 {
288  Octstr *result;
289  regex_t *regexp;
290  regmatch_t pmatch[REGEX_MAX_SUB_MATCH];
291  int rc;
292  char *rsub;
293 
294  /* compile */
295  regexp = gw_regex_comp_real(re, REG_EXTENDED|REG_ICASE, file, line, func);
296  if (regexp == NULL)
297  return 0;
298 
299  /* execute and match */
300  rc = gw_regex_exec_real(regexp, os, REGEX_MAX_SUB_MATCH, &pmatch[0], 0,
301  file, line, func);
302  gw_regex_destroy(regexp);
303 
304  /* substitute via rule if matched */
305  if (rc != 0)
306  return NULL;
307 
308  rsub = gw_regex_sub(octstr_get_cstr(rule), octstr_get_cstr(os),
309  REGEX_MAX_SUB_MATCH, &pmatch[0]);
310  if (rsub == NULL)
311  return NULL;
312 
313  result = octstr_create(rsub);
314  gw_free(rsub);
315 
316  return result;
317 }
318 
319 
320 Octstr *gw_regex_subst_pre_real(const regex_t *preg, const Octstr *os, const Octstr *rule,
321  const char *file, long line, const char *func)
322 {
323  Octstr *result;
324  regmatch_t pmatch[REGEX_MAX_SUB_MATCH];
325  int rc;
326  char *rsub;
327 
328  gw_assert(preg != NULL);
329 
330  /* execute and match */
331  rc = gw_regex_exec_real(preg, os, REGEX_MAX_SUB_MATCH, &pmatch[0], 0,
332  file, line, func);
333 
334  /* substitute via rule if matched */
335  if (rc != 0)
336  return NULL;
337 
338  rsub = gw_regex_sub(octstr_get_cstr(rule), octstr_get_cstr(os),
339  REGEX_MAX_SUB_MATCH, &pmatch[0]);
340  if (rsub == NULL)
341  return NULL;
342 
343  result = octstr_create(rsub);
344  gw_free(rsub);
345 
346  return result;
347 }
348 
349 #endif /* HAVE_REGEX || HAVE_PCRE */
350 
void error(int err, const char *fmt,...)
Definition: log.c:648
gw_assert(wtls_machine->packet_to_send !=NULL)
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
FILE * file
Definition: log.c:169
#define octstr_create(cstr)
Definition: octstr.h:125
Definition: octstr.c:118
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.