Kannel: Open Source WAP and SMS gateway  svn-r5335
wml_compiler.c
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2018 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * wml_compiler.c - compiling WML to WML binary
59  *
60  * This is an implemention for WML compiler for compiling the WML text
61  * format to WML binary format, which is used for transmitting the
62  * decks to the mobile terminal to decrease the use of the bandwidth.
63  *
64  *
65  * Tuomas Luttinen for Wapit Ltd.
66  */
67 
68 #include <time.h>
69 #include <unistd.h>
70 #include <sys/types.h>
71 #include <sys/stat.h>
72 #include <fcntl.h>
73 #include <string.h>
74 #include <math.h>
75 #include <ctype.h>
76 #include <inttypes.h>
77 
78 #include <libxml/xmlmemory.h>
79 #include <libxml/tree.h>
80 #include <libxml/debugXML.h>
81 #include <libxml/encoding.h>
82 #include <libxml/parser.h>
83 #include <libxml/xmlerror.h>
84 
85 #include "gwlib/gwlib.h"
86 #include "wml_compiler.h"
87 #include "xml_definitions.h"
88 
89 /***********************************************************************
90  * Declarations of data types.
91  *
92  * Binary code values are defined by OMNA, see
93  * http://www.openmobilealliance.org/tech/omna/omna-wbxml-public-docid.htm
94  */
95 
97  char *string;
98  unsigned long value;
99 };
100 
102 
103 #define NUMBERED(name, strings) \
104  static const wml_externalid_t name##_strings[] = { strings };
105 #define ASSIGN(string, number) { string, number },
106 #include "wbxml_tokens.def"
107 
108 #define NUMBER_OF_WML_EXTERNALID ((long) sizeof(public_ids_strings)/sizeof(public_ids_strings[0]))
109 
111  char *string;
112  char value;
113 };
114 
116 
118  { "1.1", 0x01 },
119  { "1.2", 0x02 },
120  { "1.3", 0x03 },
121  { "1.4", 0x04 },
122  { "1.5", 0x05 }
123 };
124 
125 #define NUMBER_OF_WBXML_VERSION sizeof(wbxml_version)/sizeof(wbxml_version[0])
126 
127 
128 typedef enum { NOESC, ESC, UNESC, FAILED } var_esc_t;
129 
130 
131 /*
132  * The wml token table node with two fields.
133  */
134 
135 typedef struct {
136  char *text;
137  unsigned char token;
138 } wml_table_t;
139 
140 
141 /*
142  * The wml token table node with three fields.
143  */
144 
145 typedef struct {
146  char *text1;
147  char *text2;
148  unsigned char token;
149 } wml_table3_t;
150 
151 
152 /*
153  * The binary WML structure, that has been passed around between the
154  * internal functions. It contains the header fields for wbxml version,
155  * the WML public ID and the character set, the length of the string table,
156  * the list structure implementing the string table and the octet string
157  * containing the encoded WML binary.
158  */
159 
160 typedef struct {
161  unsigned char wbxml_version;
162  unsigned long wml_public_id;
163  unsigned long character_set;
164  unsigned long string_table_length;
167 } wml_binary_t;
168 
169 
170 /*
171  * The string table list node.
172  */
173 
174 typedef struct {
175  unsigned long offset;
178 
179 
180 /*
181  * The string table proposal list node.
182  */
183 
184 typedef struct {
185  int count;
188 
189 
190 /*
191  * The wml hash table node.
192  */
193 
194 typedef struct {
196  unsigned char binary;
197 } wml_hash_t;
198 
199 
200 /*
201  * The hash table node for attribute and values.
202  */
203 
204 typedef struct {
206  unsigned char binary;
209 
210 #include "xml_shared.h"
211 #include "wml_definitions.h"
212 
213 
214 /***********************************************************************
215  * Declarations of global variables.
216  */
217 
219 
221 
223 
225 
227 
228 
229 /***********************************************************************
230  * Declarations of internal functions. These are defined at the end of
231  * the file.
232  */
233 
234 
235 /*
236  * Parsing functions. These funtions operate on a single node or a
237  * smaller datatype. Look for more details on the functions at the
238  * definitions.
239  */
240 
241 static int parse_document(xmlDocPtr document, Octstr *charset,
242  wml_binary_t **wbxml, Octstr *version);
243 
244 static int parse_node(xmlNodePtr node, wml_binary_t **wbxml);
245 static int parse_element(xmlNodePtr node, wml_binary_t **wbxml);
246 static int parse_attribute(xmlAttrPtr attr, wml_binary_t **wbxml);
247 static int parse_attr_value(Octstr *attr_value, List *tokens,
248  wml_binary_t **wbxml, int charset, var_esc_t default_esc);
249 static int parse_text(xmlNodePtr node, wml_binary_t **wbxml);
250 static int parse_cdata(xmlNodePtr node, wml_binary_t **wbxml);
251 static int parse_st_octet_string(Octstr *ostr, int cdata, var_esc_t default_esc, wml_binary_t **wbxml);
252 static void parse_st_end(wml_binary_t **wbxml);
253 static void parse_entities(Octstr *wml_source);
254 
255 /*
256  * Variable functions. These functions are used to find and parse variables.
257  */
258 
259 static int parse_variable(Octstr *text, int start, var_esc_t default_esc, Octstr **output,
260  wml_binary_t **wbxml);
261 static Octstr *get_variable(Octstr *text, int start);
262 static var_esc_t check_variable_syntax(Octstr *variable, var_esc_t default_esc);
263 
264 
265 /*
266  * wml_binary-functions. These are used to create, destroy and modify
267  * wml_binary_t.
268  */
269 
270 static wml_binary_t *wml_binary_create(void);
271 static void wml_binary_destroy(wml_binary_t *wbxml);
272 static void wml_binary_output(Octstr *ostr, wml_binary_t *wbxml);
273 
274 /* Output into the wml_binary. */
275 
276 static void output_st_char(int byte, wml_binary_t **wbxml);
277 static void output_st_octet_string(Octstr *ostr, wml_binary_t **wbxml);
278 static void output_variable(Octstr *variable, Octstr **output,
279  var_esc_t escaped, wml_binary_t **wbxml);
280 
281 /*
282  * Memory allocation and deallocations.
283  */
284 
285 static wml_hash_t *hash_create(char *text, unsigned char token);
286 static wml_attribute_t *attribute_create(void);
287 static void attr_dict_construct(wml_table3_t *attributes, Dict *attr_dict);
288 
289 static void hash_destroy(void *p);
290 static void attribute_destroy(void *p);
291 
292 /*
293  * Comparison functions for the hash tables.
294  */
295 
296 static int hash_cmp(void *hash1, void *hash2);
297 
298 /*
299  * Miscellaneous help functions.
300  */
301 
302 static int check_do_elements(xmlNodePtr node);
303 static var_esc_t check_variable_name(xmlNodePtr node);
304 static Octstr *get_do_element_name(xmlNodePtr node);
305 static int check_if_url(int hex);
306 static int check_if_emphasis(xmlNodePtr node);
307 
308 static int wml_table_len(wml_table_t *table);
309 static int wml_table3_len(wml_table3_t *table);
310 
311 /*
312  * String table functions, used to add and remove strings into and from the
313  * string table.
314  */
315 
316 static string_table_t *string_table_create(int offset, Octstr *ostr);
317 static void string_table_destroy(string_table_t *node);
320 static void string_table_build(xmlNodePtr node, wml_binary_t **wbxml);
321 static void string_table_collect_strings(xmlNodePtr node, List *strings);
322 static List *string_table_collect_words(List *strings);
324 static List *string_table_add_many(List *sorted, wml_binary_t **wbxml);
325 static unsigned long string_table_add(Octstr *ostr, wml_binary_t **wbxml);
326 static void string_table_apply(Octstr *ostr, wml_binary_t **wbxml);
327 static void string_table_output(Octstr *ostr, wml_binary_t **wbxml);
328 
329 
330 /***********************************************************************
331  * Generic error message formater for libxml2 related errors
332  */
333 
334 static void xml_error(void)
335 {
336  xmlErrorPtr err;
337  Octstr *msg;
338 
339  /* we should have an error, but be more sensitive */
340  if ((err = xmlGetLastError()) == NULL)
341  return;
342 
343  /* replace annoying line feeds */
344  msg = octstr_format("%s", err->message);
345  octstr_replace(msg, octstr_imm("\n"), octstr_imm(" "));
346  error(0,"XML error: code: %d, level: %d, line: %d, %s",
347  err->code, err->level, err->line, octstr_get_cstr(msg));
349 }
350 
351 
352 /***********************************************************************
353  * Implementations of the functions declared in wml_compiler.h.
354  */
355 
356 /*
357  * The actual compiler function. This operates as interface to the compiler.
358  * For more information, look wml_compiler.h.
359  */
360 int wml_compile(Octstr *wml_text, Octstr *charset, Octstr **wml_binary,
361  Octstr *version)
362 {
363  int ret = 0;
364  size_t size;
365  xmlDocPtr pDoc = NULL;
366  char *wml_c_text;
367  wml_binary_t *wbxml = NULL;
368 
369  *wml_binary = octstr_create("");
370  wbxml = wml_binary_create();
371 
372  /* Remove the extra space from start and the end of the WML Document. */
373  octstr_strip_blanks(wml_text);
374 
375  /* Check the WML-code for \0-characters and for WML entities. Fast patch.
376  -- tuo */
377  parse_entities(wml_text);
378 
379  size = octstr_len(wml_text);
380  wml_c_text = octstr_get_cstr(wml_text);
381 
382  debug("wml_compile",0, "WML: Given charset: %s", octstr_get_cstr(charset));
383 
384  if (octstr_search_char(wml_text, '\0', 0) != -1) {
385  error(0, "WML compiler: Compiling error: "
386  "\\0 character found in the middle of the WML source.");
387  ret = -1;
388  } else {
389  /*
390  * An empty octet string for the binary output is created, the wml
391  * source is parsed into a parsing tree and the tree is then compiled
392  * into binary.
393  */
394 
395  pDoc = xmlReadMemory(wml_c_text, size, NULL, octstr_get_cstr(charset),
397 
398  if (pDoc != NULL) {
399  /*
400  * If we have a set internal encoding, then apply this information
401  * to the XML parsing tree document for later transcoding ability.
402  */
403  if (charset)
404  pDoc->charset = xmlParseCharEncoding(octstr_get_cstr(charset));
405 
406  ret = parse_document(pDoc, charset, &wbxml, version);
407  wml_binary_output(*wml_binary, wbxml);
408  } else {
409  error(0, "WML compiler: Compiling error: "
410  "libxml2 returned a NULL pointer");
411  xml_error();
412  ret = -1;
413  }
414  }
415 
416  wml_binary_destroy(wbxml);
417 
418  if (pDoc)
419  xmlFreeDoc(pDoc);
420 
421  return ret;
422 }
423 
424 
425 /*
426  * Initialization: makes up the hash tables for the compiler.
427  */
428 
430 {
431  int i = 0, len = 0;
432  wml_hash_t *temp = NULL;
433 
434  /* The wml elements into a hash table. */
437 
438  for (i = 0; i < len; i++) {
440  dict_put(wml_elements_dict, temp->item, temp);
441  }
442 
443  /* Attributes. */
447 
448  /* Attribute values. */
451 
452  for (i = 0; i < len; i++) {
456  }
457 
458  /* URL values. */
461 
462  for (i = 0; i < len; i++) {
465  }
466 
467  /* Strict XML parsing. */
469  (XML_PARSE_NOERROR | XML_PARSE_NONET) :
470  (XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NONET);
471 }
472 
473 
474 
475 /*
476  * Shutdown: Frees the memory allocated by initialization.
477  */
478 
480 {
485 }
486 
487 
488 
489 /***********************************************************************
490  * Internal functions.
491  */
492 
493 
494 /*
495  * parse_node - the recursive parsing function for the parsing tree.
496  * Function checks the type of the node, calls for the right parse
497  * function for the type, then calls itself for the first child of
498  * the current node if there's one and after that calls itself for the
499  * next child on the list.
500  */
501 
502 static int parse_node(xmlNodePtr node, wml_binary_t **wbxml)
503 {
504  int status = 0;
505 
506  /* Call for the parser function of the node type. */
507  switch (node->type) {
508  case XML_ELEMENT_NODE:
509  status = parse_element(node, wbxml);
510  break;
511  case XML_TEXT_NODE:
512  status = parse_text(node, wbxml);
513  break;
514  case XML_CDATA_SECTION_NODE:
515  status = parse_cdata(node, wbxml);
516  break;
517  case XML_COMMENT_NODE:
518  case XML_PI_NODE:
519  /* Comments and PIs are ignored. */
520  break;
521  /*
522  * XML has also many other node types, these are not needed with
523  * WML. Therefore they are assumed to be an error.
524  */
525  default:
526  error(0, "WML compiler: Unknown XML node in the WML source.");
527  return -1;
528  break;
529  }
530 
531  /*
532  * If node is an element with content, it will need an end tag after it's
533  * children. The status for it is returned by parse_element.
534  */
535  switch (status) {
536  case 0:
537 
538  if (node->children != NULL)
539  if (parse_node(node->children, wbxml) == -1)
540  return -1;
541  break;
542  case 1:
543  if (node->children != NULL)
544  if (parse_node(node->children, wbxml) == -1)
545  return -1;
546  parse_st_end(wbxml);
547  break;
548 
549  case -1: /* Something went wrong in the parsing. */
550  return -1;
551  default:
552  error(0,
553  "WML compiler: undefined return value in a parse function.");
554  return -1;
555  break;
556  }
557 
558  if (node->next != NULL)
559  if (parse_node(node->next, wbxml) == -1)
560  return -1;
561 
562  return 0;
563 }
564 
565 
566 /*
567  * parse_document - the parsing function for the document node.
568  * The function outputs the WBXML version, WML public id and the
569  * character set values into start of the wbxml.
570  */
571 static int parse_document(xmlDocPtr document, Octstr *charset,
572  wml_binary_t **wbxml, Octstr *version)
573 {
574  xmlNodePtr node;
575  Octstr *externalID = NULL;
576  long i;
577 
578  if (document == NULL) {
579  error(0, "WML compiler: XML parsing failed, no parsed document.");
580  error(0, "Most probably an error in the WML source.");
581  return -1;
582  }
583 
584  /* Return WBXML version dependent on device given Encoding-Version */
585  if (version == NULL) {
586  (*wbxml)->wbxml_version = 0x01; /* WBXML Version number 1.1 */
587  info(0, "WBXML: No wbxml version given, assuming 1.1");
588  } else {
589  for (i = 0; i < NUMBER_OF_WBXML_VERSION; i++) {
590  if (octstr_compare(version, octstr_imm(wbxml_version[i].string)) == 0) {
591  (*wbxml)->wbxml_version = wbxml_version[i].value;
592  debug("parse_document",0,"WBXML: Encoding with wbxml version <%s>",
593  octstr_get_cstr(version));
594  break;
595  }
596  }
597  if (i == NUMBER_OF_WBXML_VERSION) {
598  (*wbxml)->wbxml_version = 0x01; /* WBXML Version number 1.1 */
599  warning(0, "WBXML: Unknown wbxml version, assuming 1.1 (<%s> is unknown)",
600  octstr_get_cstr(version));
601  }
602  }
603 
604  /* Return WML Version dependent on xml ExternalID string */
605  if ((document->intSubset != NULL) && (document->intSubset->ExternalID != NULL))
606  externalID = octstr_create((char *)document->intSubset->ExternalID);
607  if (externalID == NULL) {
608  (*wbxml)->wml_public_id = 0x04; /* WML 1.1 Public ID */
609  warning(0, "WBXML: WML without ExternalID, assuming 1.1");
610  } else {
611  for (i = 0; i < NUMBER_OF_WML_EXTERNALID; i++) {
612  if (octstr_compare(externalID, octstr_imm(public_ids_strings[i].string)) == 0) {
613  (*wbxml)->wml_public_id = public_ids_strings[i].value;
614  debug("parse_document",0,"WBXML: WML with ExternalID <%s>",
615  octstr_get_cstr(externalID));
616  break;
617  }
618  }
619  if (i == NUMBER_OF_WML_EXTERNALID) {
620  (*wbxml)->wml_public_id = 0x04; /* WML 1.1 Public ID */
621  warning(0, "WBXML: WML with unknown ExternalID, assuming 1.1 "
622  "(<%s> is unknown)",
623  octstr_get_cstr(externalID));
624  }
625  }
626  octstr_destroy(externalID);
627 
628  (*wbxml)->string_table_length = 0x00; /* String table length=0 */
629 
630  /*
631  * Make sure we set the charset encoding right. If none is given
632  * then set UTF-8 as default.
633  */
634  (*wbxml)->character_set = charset ?
636 
637  node = xmlDocGetRootElement(document);
638 
639  if (node == NULL) {
640  error(0, "WML compiler: XML parsing failed, no document root element.");
641  error(0, "Most probably an error in the WML source.");
642  xml_error();
643  return -1;
644  }
645 
646  string_table_build(node, wbxml);
647 
648  return parse_node(node, wbxml);
649 }
650 
651 
652 /*
653  * parse_element - the parsing function for an element node.
654  * The element tag is encoded into one octet hexadecimal value,
655  * if possible. Otherwise it is encoded as text. If the element
656  * needs an end tag, the function returns 1, for no end tag 0
657  * and -1 for an error.
658  */
659 
660 static int parse_element(xmlNodePtr node, wml_binary_t **wbxml)
661 {
662  int add_end_tag = 0;
663  unsigned char wbxml_hex = 0, status_bits;
664  xmlAttrPtr attribute;
665  Octstr *name;
667 
668  name = octstr_create((char *)node->name);
669 
670  /* Check, if the tag can be found from the code page. */
671  if ((element = dict_get(wml_elements_dict, name)) != NULL) {
672  wbxml_hex = element->binary;
673  /* A conformance patch: no do-elements of same name in a card or
674  template. An extremely ugly patch. --tuo */
675  if (wbxml_hex == 0x27 || /* Card */
676  wbxml_hex == 0x3B) /* Template */
677  if (check_do_elements(node) == -1) {
678  add_end_tag = -1;
679  error(0, "WML compiler: Two or more do elements with same"
680  " name in a card or template element.");
681  }
682  /* A conformance patch: if variable in setvar has a bad name, it's
683  ignored. */
684  if (wbxml_hex == 0x3E) /* Setvar */
685  if (check_variable_name(node) == FAILED) {
687  return add_end_tag;
688  }
689  if ((status_bits = element_check_content(node)) > 0) {
690  wbxml_hex = wbxml_hex | status_bits;
691  /* If this node has children, the end tag must be added after
692  them. */
693  if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
694  add_end_tag = 1;
695  }
696 
697  output_st_char(wbxml_hex, wbxml);
698  } else {
699  /* The tag was not on the code page, it has to be encoded as a
700  string. */
701  wbxml_hex = WBXML_LITERAL;
702  if ((status_bits = element_check_content(node)) > 0) {
703  wbxml_hex = wbxml_hex | status_bits;
704  /* If this node has children, the end tag must be added after
705  them. */
706  if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
707  add_end_tag = 1;
708  }
709  output_st_char(wbxml_hex, wbxml);
710  octstr_append_uintvar((*wbxml)->wbxml_string,string_table_add(octstr_duplicate(name), wbxml));
711  warning(0, "WML compiler: Unknown tag in WML source: <%s>",
713  }
714 
715  /* Encode the attribute list for this node and add end tag after the
716  list. */
717 
718  if(node->properties != NULL) {
719  attribute = node->properties;
720  while (attribute != NULL) {
721  parse_attribute(attribute, wbxml);
722  attribute = attribute->next;
723  }
724  parse_st_end(wbxml);
725  }
726 
728  return add_end_tag;
729 }
730 
731 
732 /*
733  * parse_attribute - the parsing function for attributes. The function
734  * encodes the attribute (and probably start of the value) as a one
735  * hexadecimal octet. The value (or the rest of it) is coded as a string
736  * maybe using predefined attribute value tokens to reduce the length
737  * of the output. Returns 0 for success, -1 for error.
738  */
739 
740 static int parse_attribute(xmlAttrPtr attr, wml_binary_t **wbxml)
741 {
742  int status = 0;
743  int coded_length = 0;
744  unsigned char wbxml_hex = 0x00;
745  wml_hash_t *hit = NULL;
746  wml_attribute_t *attribute = NULL;
747  Octstr *name = NULL, *pattern = NULL, *p = NULL;
748 
749  name = octstr_create((char *)attr->name);
750 
751  if (attr->children != NULL)
752  pattern = create_octstr_from_node((char *)attr->children);
753  else
754  pattern = NULL;
755 
756  /* Check if the attribute is found on the code page. */
757 
758  if ((attribute = dict_get(wml_attributes_dict, name)) != NULL) {
759  if (attr->children == NULL ||
760  (hit = gwlist_search(attribute->value_list, (void *)pattern,
761  hash_cmp)) == NULL) {
762  if(attribute->binary == 0x00) {
763  warning(0, "WML compiler: can't compile attribute %s%s%s%s",
764  octstr_get_cstr(attribute->attribute),
765  (attr->children != NULL ? "=\"": ""),
766  (attr->children != NULL ? octstr_get_cstr(pattern) : ""),
767  (attr->children != NULL ? "\"": ""));
768  wbxml_hex = WBXML_LITERAL;
769  output_st_char(wbxml_hex, wbxml);
771  } else {
772  wbxml_hex = attribute->binary;
773  output_st_char(wbxml_hex, wbxml);
774  }
775  } else if (hit->binary) {
776  wbxml_hex = hit->binary;
777  coded_length = octstr_len(hit->item);
778  output_st_char(wbxml_hex, wbxml);
779  } else
780  status = -1;
781  } else {
782  /* The attribute was not on the code page, it has to be encoded as a
783  string. */
784  wbxml_hex = WBXML_LITERAL;
785  output_st_char(wbxml_hex, wbxml);
786  octstr_append_uintvar((*wbxml)->wbxml_string,string_table_add(octstr_duplicate(name), wbxml));
787  warning(0, "WML compiler: Unknown attribute in WML source: <%s>",
789  }
790 
791  if (status >= 0) {
792  var_esc_t default_esc;
793 
794  default_esc = (octstr_str_compare (name, "href") == 0) ? ESC : NOESC;
795 
796  /* The rest of the attribute is coded as a inline string. */
797  if (pattern != NULL &&
798  coded_length < (int) octstr_len(pattern)) {
799  if (coded_length == 0)
800  p = create_octstr_from_node((char *)attr->children);
801  else
802  p = octstr_copy(pattern, coded_length,
803  octstr_len(pattern) - coded_length);
804 
805  if (check_if_url(wbxml_hex))
807  wbxml, attr->doc->charset, default_esc);
808  else
810  wbxml, attr->doc->charset, default_esc);
811  if (status != 0)
812  error(0,
813  "WML compiler: could not output attribute "
814  "value as a string.");
815  octstr_destroy(p);
816  }
817  }
818 
819  /* Memory cleanup. */
821 
822  if (pattern != NULL)
823  octstr_destroy(pattern);
824 
825  return status;
826 }
827 
828 
829 
830 /*
831  * parse_attr_value - parses an attributes value using WML value codes.
832  */
833 
834 static int parse_attr_value(Octstr *attr_value, List *tokens,
835  wml_binary_t **wbxml, int charset, var_esc_t default_esc)
836 {
837  int i, pos, wbxml_hex;
838  wml_hash_t *temp = NULL;
839  Octstr *cut_text = NULL;
840  char *tmp;
841 
842  /*
843  * Beware that libxml2 does internal encoding in UTF-8 while parsing.
844  * So if our original WML source had a different encoding set, we have
845  * to transcode at least here. Only transcode if target encoding differs
846  * from libxml2's internal encoding (UTF-8).
847  */
848  tmp = (char*) xmlGetCharEncodingName(charset);
849  if (charset != XML_CHAR_ENCODING_UTF8 &&
850  charset_convert(attr_value, "UTF-8",
851  tmp) != 0) {
852  error(0, "Failed to convert XML attribute value from charset "
853  "<%s> to <%s>, will leave as is.", "UTF-8",
854  tmp ? tmp : "(undef)");
855  }
856 
857 
858  /*
859  * The attribute value is search for text strings that can be replaced
860  * with one byte codes. Note that the algorith is not foolproof; seaching
861  * is done in an order and the text before first hit is not checked for
862  * those tokens that are after the hit in the order. Most likely it would
863  * be waste of time anyway. String table is not used here, since at least
864  * Nokia 7110 doesn't seem to understand string table references here.
865  */
866 
867  /* A fast patch to allow reserved names to be variable names. May produce
868  a little longer binary at some points. --tuo */
869  if (octstr_search_char(attr_value, '$', 0) >= 0) {
870  if (parse_st_octet_string(attr_value, 0, default_esc, wbxml) != 0)
871  return -1;
872  } else {
873 
874  for (i = 0; i < gwlist_len(tokens); i++) {
875  temp = gwlist_get(tokens, i);
876  pos = octstr_search(attr_value, temp->item, 0);
877  switch (pos) {
878  case -1:
879  break;
880  case 0:
881  wbxml_hex = temp->binary;
882  output_st_char(wbxml_hex, wbxml);
883  octstr_delete(attr_value, 0, octstr_len(temp->item));
884  break;
885  default:
886  /*
887  * There is some text before the first hit, that has to
888  * be handled too.
889  */
890  gw_assert(pos <= octstr_len(attr_value));
891 
892  cut_text = octstr_copy(attr_value, 0, pos);
893  if (parse_st_octet_string(cut_text, 0, default_esc, wbxml) != 0)
894  return -1;
895  octstr_destroy(cut_text);
896 
897  wbxml_hex = temp->binary;
898  output_st_char(wbxml_hex, wbxml);
899 
900  octstr_delete(attr_value, 0, pos + octstr_len(temp->item));
901  break;
902  }
903  }
904 
905  /*
906  * If no hits, then the attr_value is handled as a normal text,
907  * otherwise the remaining part is searched for other hits too.
908  */
909 
910  if ((int) octstr_len(attr_value) > 0) {
911  if (i < gwlist_len(tokens))
912  parse_attr_value(attr_value, tokens, wbxml, charset, default_esc);
913  else
914  if (parse_st_octet_string(attr_value, 0, default_esc, wbxml) != 0)
915  return -1;
916  }
917  }
918 
919  return 0;
920 }
921 
922 
923 
924 /*
925  * parse_st_end - adds end tag to an element.
926  */
927 
928 static void parse_st_end(wml_binary_t **wbxml)
929 {
930  output_st_char(WBXML_END, wbxml);
931 }
932 
933 
934 
935 /*
936  * parse_text - a text string parsing function.
937  * This function parses a text node.
938  */
939 
940 static int parse_text(xmlNodePtr node, wml_binary_t **wbxml)
941 {
942  int ret;
943  Octstr *temp;
944  char* tmp;
945 
946  temp = create_octstr_from_node((char *)node); /* returns string in UTF-8 */
947 
948  /*
949  * Beware that libxml2 does internal encoding in UTF-8 while parsing.
950  * So if our original WML source had a different encoding set, we have
951  * to transcode at least here. Only transcode if target encoding differs
952  * from libxml2's internal encoding (UTF-8).
953  */
954  tmp = (char*) xmlGetCharEncodingName(node->doc->charset);
955  if (node->doc->charset != XML_CHAR_ENCODING_UTF8 &&
956  charset_convert(temp, "UTF-8",
957  tmp) != 0) {
958  error(0, "Failed to convert XML text entity from charset "
959  "<%s> to <%s>, will leave as is.", "UTF-8",
960  tmp ? tmp : "(undef)");
961  }
962 
963  octstr_shrink_blanks(temp);
964  if (!check_if_emphasis(node->prev) && !check_if_emphasis(node->next))
965  octstr_strip_blanks(temp);
966 
967  if (octstr_len(temp) == 0)
968  ret = 0;
969  else
970  ret = parse_st_octet_string(temp, 0, NOESC, wbxml);
971 
972  /* Memory cleanup. */
973  octstr_destroy(temp);
974 
975  return ret;
976 }
977 
978 
979 
980 /*
981  * parse_cdata - a cdata section parsing function.
982  * This function parses a cdata section that is outputted into the binary
983  * "as is".
984  */
985 
986 static int parse_cdata(xmlNodePtr node, wml_binary_t **wbxml)
987 {
988  int ret = 0;
989  Octstr *temp;
990 
991  temp = create_octstr_from_node((char *)node);
992 
993  parse_st_octet_string(temp, 1, NOESC, wbxml);
994 
995  /* Memory cleanup. */
996  octstr_destroy(temp);
997 
998  return ret;
999 }
1000 
1001 
1002 
1003 /*
1004  * parse_variable - a variable parsing function.
1005  * Arguments:
1006  * - text: the octet string containing a variable
1007  * - start: the starting position of the variable not including
1008  * trailing &
1009  * Returns: lenth of the variable for success, -1 for failure, 0 for
1010  * variable syntax error, when it will be ignored.
1011  * Parsed variable is returned as an octet string in Octstr **output.
1012  */
1013 
1014 static int parse_variable(Octstr *text, int start, var_esc_t default_esc, Octstr **output,
1015  wml_binary_t **wbxml)
1016 {
1017  var_esc_t esc;
1018  int ret;
1019  Octstr *variable;
1020 
1021  variable = get_variable(text, start + 1);
1022  octstr_truncate(*output, 0);
1023 
1024  if (variable == NULL)
1025  return 0;
1026 
1027  if (octstr_get_char(variable, 0) == '$') {
1028  octstr_append_char(*output, '$');
1029  octstr_destroy(variable);
1030  ret = 2;
1031  } else {
1032  if (octstr_get_char(text, start + 1) == '(')
1033  ret = octstr_len(variable) + 3;
1034  else
1035  ret = octstr_len(variable) + 1;
1036 
1037  if ((esc = check_variable_syntax(variable, default_esc)) != FAILED)
1038  output_variable(variable, output, esc, wbxml);
1039  else
1040  octstr_destroy(variable);
1041  }
1042 
1043  return ret;
1044 }
1045 
1046 
1047 
1048 /*
1049  * get_variable - get the variable name from text.
1050  * Octstr *text contains the text with a variable name starting at point
1051  * int start.
1052  */
1053 
1055 {
1056  Octstr *var = NULL;
1057  long end;
1058  int ch;
1059 
1060  gw_assert(text != NULL);
1061  gw_assert(start >= 0 && start <= (int) octstr_len(text));
1062 
1063  ch = octstr_get_char(text, start);
1064 
1065  if (ch == '$') {
1066  var = octstr_create("$");
1067  } else if (ch == '(') {
1068  start ++;
1069  end = octstr_search_char(text, ')', start);
1070  if (end == -1)
1071  error(0, "WML compiler: braces opened, but not closed for a "
1072  "variable.");
1073  else if (end - start == 0)
1074  error(0, "WML compiler: empty braces without variable.");
1075  else
1076  var = octstr_copy(text, start, end - start);
1077  } else {
1078  end = start + 1;
1079  while (isalnum(ch = octstr_get_char(text, end)) || (ch == '_'))
1080  end ++;
1081 
1082  var = octstr_copy(text, start, end - start);
1083  }
1084 
1085  return var;
1086 }
1087 
1088 
1089 
1090 /*
1091  * check_variable_syntax - checks the variable syntax and the possible
1092  * escape mode it has. Octstr *variable contains the variable string.
1093  */
1094 
1095 static var_esc_t check_variable_syntax(Octstr *variable, var_esc_t default_esc)
1096 {
1097  Octstr *escape;
1098  char ch;
1099  int pos, len, i;
1100  var_esc_t ret;
1101 
1102  if ((pos = octstr_search_char(variable, ':', 0)) > 0) {
1103  len = octstr_len(variable) - pos;
1104  escape = octstr_copy(variable, pos + 1, len - 1);
1105  octstr_truncate(variable, pos);
1106  octstr_truncate(escape, len);
1107  octstr_convert_range(escape, 0, octstr_len(escape), tolower);
1108 
1109  if (octstr_str_compare(escape, "noesc") == 0 ||
1110  octstr_str_compare(escape, "n") == 0 )
1111  ret = NOESC;
1112  else if (octstr_str_compare(escape, "unesc") == 0 ||
1113  octstr_str_compare(escape, "u") == 0 )
1114  ret = UNESC;
1115  else if (octstr_str_compare(escape, "escape") == 0 ||
1116  octstr_str_compare(escape, "e") == 0 )
1117  ret = ESC;
1118  else {
1119  error(0, "WML compiler: syntax error in variable escaping.");
1120  octstr_destroy(escape);
1121  return FAILED;
1122  }
1123  octstr_destroy(escape);
1124  } else
1125  ret = default_esc;
1126 
1127  ch = octstr_get_char(variable, 0);
1128  if (!(isalpha((int)ch)) && ch != '_') {
1129  error(0, "WML compiler: syntax error in variable; name starting "
1130  "with %c.", ch);
1131  return FAILED;
1132  } else
1133  for (i = 1; i < (int) octstr_len(variable); i++)
1134  if (!isalnum((int)(ch = octstr_get_char(variable, 0))) &&
1135  ch != '_') {
1136  warning(0, "WML compiler: syntax error in variable.");
1137  return FAILED;
1138  }
1139 
1140  return ret;
1141 }
1142 
1143 
1144 
1145 /*
1146  * parse_st_octet_string - parse an octet string into wbxml_string, the string
1147  * is checked for variables. If string is string table applicable, it will
1148  * be checked for string insrtances that are in the string table, otherwise
1149  * not. Returns 0 for success, -1 for error.
1150  */
1151 
1152 static int parse_st_octet_string(Octstr *ostr, int cdata, var_esc_t default_esc, wml_binary_t **wbxml)
1153 {
1154  Octstr *output, *var, *temp = NULL;
1155  int var_len;
1156  int start = 0, pos = 0, len;
1157 
1158  /* No variables? Ok, let's take the easy way... (CDATA never contains
1159  variables.) */
1160 
1161  if ((pos = octstr_search_char(ostr, '$', 0)) < 0 || cdata == 1) {
1162  string_table_apply(ostr, wbxml);
1163  return 0;
1164  }
1165 
1166  len = octstr_len(ostr);
1167  output = octstr_create("");
1168  var = octstr_create("");
1169 
1170  while (pos < len) {
1171  if (octstr_get_char(ostr, pos) == '$') {
1172  if (pos > start) {
1173  temp = octstr_copy(ostr, start, pos - start);
1174  octstr_insert(output, temp, octstr_len(output));
1175  octstr_destroy(temp);
1176  }
1177 
1178  if ((var_len = parse_variable(ostr, pos, default_esc, &var, wbxml)) > 0) {
1179  if (octstr_len(var) > 0) {
1180  if (octstr_get_char(var, 0) == '$')
1181  /*
1182  * No, it's not actually variable, but $-character
1183  * escaped as "$$". So everything should be packed
1184  * into one string.
1185  */
1186  octstr_insert(output, var, octstr_len(output));
1187  else {
1188  /*
1189  * The string is output as a inline string and the
1190  * variable as a string table variable reference.
1191  */
1192  if (octstr_len(output) > 0)
1193  string_table_apply(output, wbxml);
1194  octstr_truncate(output, 0);
1195  output_st_octet_string(var, wbxml);
1196  }
1197  /* Variable had a syntax error, so it's skipped. */
1198  }
1199 
1200  pos = pos + var_len;
1201  start = pos;
1202  } else
1203  return -1;
1204  } else
1205  pos ++;
1206  }
1207 
1208  /* Was there still something after the last variable? */
1209  if (start < pos) {
1210  if (octstr_len(output) == 0) {
1211  octstr_destroy(output);
1212  output = octstr_copy(ostr, start, pos - start);
1213  } else {
1214  temp = octstr_copy(ostr, start, pos - start);
1215  octstr_insert(output, temp, octstr_len(output));
1216  octstr_destroy(temp);
1217  }
1218  }
1219 
1220  if (octstr_len(output) > 0)
1221  string_table_apply(output, wbxml);
1222 
1223  octstr_destroy(output);
1224  octstr_destroy(var);
1225 
1226  return 0;
1227 }
1228 
1229 
1230 
1231 
1232 /*
1233  * parse_entities - replaces WML entites in the WML source with equivalent
1234  * numerical entities. A fast patch for WAP 1.1 compliance.
1235  */
1236 
1237 static void parse_entities(Octstr *wml_source)
1238 {
1239  static char entity_nbsp[] = "&nbsp;";
1240  static char entity_shy[] = "&shy;";
1241  static char nbsp[] = "&#160;";
1242  static char shy[] = "&#173;";
1243  int pos = 0;
1244  Octstr *temp;
1245 
1246  if ((pos = octstr_search(wml_source, octstr_imm(entity_nbsp),
1247  pos)) >= 0) {
1248  temp = octstr_create(nbsp);
1249  while (pos >= 0) {
1250  octstr_delete(wml_source, pos, strlen(entity_nbsp));
1251  octstr_insert(wml_source, temp, pos);
1252  pos = octstr_search(wml_source,
1253  octstr_imm(entity_nbsp), pos);
1254  }
1255  octstr_destroy(temp);
1256  }
1257 
1258  pos = 0;
1259  if ((pos = octstr_search(wml_source, octstr_imm(entity_shy),
1260  pos)) >= 0) {
1261  temp = octstr_create(shy);
1262  while (pos >= 0) {
1263  octstr_delete(wml_source, pos, strlen(entity_shy));
1264  octstr_insert(wml_source, temp, pos);
1265  pos = octstr_search(wml_source,
1266  octstr_imm(entity_shy), pos);
1267  }
1268  octstr_destroy(temp);
1269  }
1270 }
1271 
1272 
1273 
1274 /*
1275  * wml_binary_create - reserves memory for the wml_binary_t and sets the
1276  * fields to zeros and NULLs.
1277  */
1278 
1280 {
1281  wml_binary_t *wbxml;
1282 
1283  wbxml = gw_malloc(sizeof(wml_binary_t));
1284  wbxml->wbxml_version = 0x00;
1285  wbxml->wml_public_id = 0x00;
1286  wbxml->character_set = 0x00;
1287  wbxml->string_table_length = 0x00;
1288  wbxml->string_table = gwlist_create();
1289  wbxml->wbxml_string = octstr_create("");
1290 
1291  return wbxml;
1292 }
1293 
1294 
1295 
1296 /*
1297  * wml_binary_destroy - frees the memory allocated for the wml_binary_t.
1298  */
1299 
1301 {
1302  if (wbxml != NULL) {
1303  gwlist_destroy(wbxml->string_table, NULL);
1304  octstr_destroy(wbxml->wbxml_string);
1305  gw_free(wbxml);
1306  }
1307 }
1308 
1309 
1310 
1311 /*
1312  * wml_binary_output - outputs all the fiels of wml_binary_t into ostr.
1313  */
1314 
1315 static void wml_binary_output(Octstr *ostr, wml_binary_t *wbxml)
1316 {
1317  octstr_append_char(ostr, wbxml->wbxml_version);
1318  octstr_append_uintvar(ostr, wbxml->wml_public_id);
1319  octstr_append_uintvar(ostr, wbxml->character_set);
1321 
1322  if (wbxml->string_table_length > 0)
1323  string_table_output(ostr, &wbxml);
1324 
1325  octstr_insert(ostr, wbxml->wbxml_string, octstr_len(ostr));
1326 }
1327 
1328 
1329 
1330 /*
1331  * output_st_char - output a character into wbxml_string.
1332  * Returns 0 for success, -1 for error.
1333  */
1334 
1335 static void output_st_char(int byte, wml_binary_t **wbxml)
1336 {
1337  octstr_append_char((*wbxml)->wbxml_string, byte);
1338 }
1339 
1340 
1341 
1342 /*
1343  * output_st_octet_string - output an octet string into wbxml.
1344  * Returns 0 for success, -1 for an error. No conversions.
1345  */
1346 
1347 static void output_st_octet_string(Octstr *ostr, wml_binary_t **wbxml)
1348 {
1349  octstr_insert((*wbxml)->wbxml_string, ostr,
1350  octstr_len((*wbxml)->wbxml_string));
1351 }
1352 
1353 
1354 
1355 /*
1356  * output_variable - output a variable reference into the string table.
1357  */
1358 
1359 static void output_variable(Octstr *variable, Octstr **output,
1360  var_esc_t escaped, wml_binary_t **wbxml)
1361 {
1362  switch (escaped)
1363  {
1364  case ESC:
1366  break;
1367  case UNESC:
1369  break;
1370  default:
1372  break;
1373  }
1374 
1375  octstr_append_uintvar(*output, string_table_add(variable, wbxml));
1376 }
1377 
1378 
1379 
1380 /*
1381  * hash_create - allocates memory for a 2 field hash table node.
1382  */
1383 
1384 static wml_hash_t *hash_create(char *text, unsigned char token)
1385 {
1386  wml_hash_t *table_node;
1387 
1388  table_node = gw_malloc(sizeof(wml_hash_t));
1389  table_node->item = octstr_create(text);
1390  table_node->binary = token;
1391 
1392  return table_node;
1393 }
1394 
1395 
1396 
1397 /*
1398  * attribute_create - allocates memory for the attributes hash table node
1399  * that contains the attribute, the binary for it and a list of binary values
1400  * tied with the attribute.
1401  */
1402 
1404 {
1405  wml_attribute_t *attr;
1406 
1407  attr = gw_malloc(sizeof(wml_attribute_t));
1408  attr->attribute = NULL;
1409  attr->binary = 0;
1410  attr->value_list = gwlist_create();
1411 
1412  return attr;
1413 }
1414 
1415 
1416 
1417 /*
1418  * attr_dict_construct - takes a table of attributes and their values and
1419  * inputs these into a dictionary.
1420  */
1421 
1422 static void attr_dict_construct(wml_table3_t *attributes, Dict *attr_dict)
1423 {
1424  int i = 0;
1425  wml_attribute_t *node = NULL;
1426  wml_hash_t *temp = NULL;
1427 
1428  node = attribute_create();
1429 
1430  do {
1431  if (node->attribute == NULL)
1432  node->attribute = octstr_create(attributes[i].text1);
1433  else if (strcmp(attributes[i].text1, attributes[i-1].text1) != 0) {
1434  dict_put(attr_dict, node->attribute, node);
1435  node = attribute_create();
1436  node->attribute = octstr_create(attributes[i].text1);
1437  }
1438 
1439  if (attributes[i].text2 == NULL)
1440  node->binary = attributes[i].token;
1441  else {
1442  temp = hash_create(attributes[i].text2, attributes[i].token);
1443  gwlist_append(node->value_list, (void *)temp);
1444  }
1445  i++;
1446  } while (attributes[i].text1 != NULL);
1447 
1448  dict_put(attr_dict, node->attribute, node);
1449 }
1450 
1451 
1452 
1453 /*
1454  * hash_destroy - deallocates memory of a 2 field hash table node.
1455  */
1456 
1457 static void hash_destroy(void *p)
1458 {
1459  wml_hash_t *node;
1460 
1461  if (p == NULL)
1462  return;
1463 
1464  node = p;
1465 
1466  octstr_destroy(node->item);
1467  gw_free(node);
1468 }
1469 
1470 
1471 
1472 /*
1473  * attribute_destroy - deallocates memory of a attribute hash table node.
1474  */
1475 
1476 static void attribute_destroy(void *p)
1477 {
1478  wml_attribute_t *node;
1479 
1480  if (p == NULL)
1481  return;
1482 
1483  node = p;
1484 
1485  octstr_destroy(node->attribute);
1487  gw_free(node);
1488 }
1489 
1490 
1491 
1492 /*
1493  * hash_cmp - compares pattern against item and if the pattern matches the
1494  * item returns 1, else 0.
1495  */
1496 
1497 static int hash_cmp(void *item, void *pattern)
1498 {
1499  int ret = 0;
1500 
1501  gw_assert(item != NULL && pattern != NULL);
1502  gw_assert(((wml_hash_t *)item)->item != NULL);
1503 
1504  if (octstr_search(pattern, ((wml_hash_t *)item)->item, 0) == 0)
1505  ret = 1;
1506 
1507  return ret;
1508 }
1509 
1510 
1511 /*
1512  * check_do_elements - a helper function for parse_element for checking if a
1513  * card or template element has two or more do elements of the same name.
1514  * Returns 0 for OK and -1 for an error (== do elements with same name found).
1515  */
1516 
1517 static int check_do_elements(xmlNodePtr node)
1518 {
1519  xmlNodePtr child;
1520  int i, status = 0;
1521  Octstr *name = NULL;
1522  List *name_list = NULL;
1523 
1524  name_list = gwlist_create();
1525 
1526  if ((child = node->children) != NULL) {
1527  while (child != NULL) {
1528  if (child->name && strcmp((char *)child->name, "do") == 0) {
1529  name = get_do_element_name(child);
1530 
1531  if (name == NULL) {
1532  error(0, "WML compiler: no name or type in a do element");
1533  return -1;
1534  }
1535 
1536  for (i = 0; i < gwlist_len(name_list); i ++)
1537  if (octstr_compare(gwlist_get(name_list, i), name) == 0) {
1539  status = -1;
1540  break;
1541  }
1542  if (status != -1)
1543  gwlist_append(name_list, name);
1544  else
1545  break;
1546  }
1547  child = child->next;
1548  }
1549  }
1550 
1551  gwlist_destroy(name_list, octstr_destroy_item);
1552 
1553  return status;
1554 }
1555 
1556 
1557 
1558 /*
1559  * check_variable_name - checks the name for variable in a setvar element.
1560  * If the name has syntax error, -1 is returned, else 0.
1561  */
1562 
1563 static var_esc_t check_variable_name(xmlNodePtr node)
1564 {
1565  Octstr *name = NULL;
1566  xmlAttrPtr attr;
1567  var_esc_t ret = FAILED;
1568 
1569  if ((attr = node->properties) != NULL) {
1570  while (attr != NULL) {
1571  if (attr->name && strcmp((char *)attr->name, "name") == 0) {
1572  name = create_octstr_from_node((char *)attr->children);
1573  break;
1574  }
1575  attr = attr->next;
1576  }
1577  }
1578 
1579  if (attr == NULL) {
1580  error(0, "WML compiler: no name in a setvar element");
1581  return FAILED;
1582  }
1583 
1586 
1587  return ret;
1588 }
1589 
1590 
1591 
1592 /*
1593  * get_do_element_name - returns the name for a do element. Name is either
1594  * name when the element has the attribute or defaults to the type attribute
1595  * if there is no name.
1596  */
1597 
1598 static Octstr *get_do_element_name(xmlNodePtr node)
1599 {
1600  Octstr *name = NULL;
1601  xmlAttrPtr attr;
1602 
1603  if ((attr = node->properties) != NULL) {
1604  while (attr != NULL) {
1605  if (attr->name && strcmp((char *)attr->name, "name") == 0) {
1606  name = create_octstr_from_node((char *)attr->children);
1607  break;
1608  }
1609  attr = attr->next;
1610  }
1611 
1612  if (attr == NULL) {
1613  attr = node->properties;
1614  while (attr != NULL) {
1615  if (attr->name && strcmp((char *)attr->name, "type") == 0) {
1616  name = create_octstr_from_node((char *)attr->children);
1617  break;
1618  }
1619  attr = attr->next;
1620  }
1621  }
1622  }
1623 
1624  return name;
1625 }
1626 
1627 
1628 
1629 /*
1630  * check_if_url - checks whether the attribute value is an URL or some other
1631  * kind of value. Returns 1 for an URL and 0 otherwise.
1632  */
1633 
1634 static int check_if_url(int hex)
1635 {
1636  switch ((unsigned char) hex) {
1637  case 0x4A: case 0x4B: case 0x4C: /* href, href http://, href https:// */
1638  case 0x32: case 0x58: case 0x59: /* src, src http://, src https:// */
1639  return 1;
1640  break;
1641  }
1642  return 0;
1643 }
1644 
1645 
1646 
1647 /*
1648  * check_if_emphasis - checks if the node is an emphasis element.
1649  * Returns 1 for an emphasis and 0 otherwise.
1650  */
1651 
1652 static int check_if_emphasis(xmlNodePtr node)
1653 {
1654  if (node == NULL || node->name == NULL)
1655  return 0;
1656 
1657  if (strcmp((char *)node->name, "b") == 0)
1658  return 1;
1659  if (strcmp((char *)node->name, "big") == 0)
1660  return 1;
1661  if (strcmp((char *)node->name, "em") == 0)
1662  return 1;
1663  if (strcmp((char *)node->name, "i") == 0)
1664  return 1;
1665  if (strcmp((char *)node->name, "small") == 0)
1666  return 1;
1667  if (strcmp((char *)node->name, "strong") == 0)
1668  return 1;
1669  if (strcmp((char *)node->name, "u") == 0)
1670  return 1;
1671 
1672  return 0;
1673 }
1674 
1675 
1676 /*
1677  * wml_table_len - returns the length of a wml_table_t array.
1678  */
1679 
1681 {
1682  int i = 0;
1683 
1684  while (table[i].text != NULL)
1685  i++;
1686 
1687  return i;
1688 }
1689 
1690 
1691 
1692 /*
1693  * wml_table3_len - returns the length of a wml_table3_t array.
1694  */
1695 
1697 {
1698  int i = 0;
1699 
1700  while (table[i].text1 != NULL)
1701  i++;
1702 
1703  return i;
1704 }
1705 
1706 
1707 
1708 /*
1709  * string_table_create - reserves memory for the string_table_t and sets the
1710  * fields.
1711  */
1712 
1713 static string_table_t *string_table_create(int offset, Octstr *ostr)
1714 {
1715  string_table_t *node;
1716 
1717  node = gw_malloc(sizeof(string_table_t));
1718  node->offset = offset;
1719  node->string = ostr;
1720 
1721  return node;
1722 }
1723 
1724 
1725 
1726 /*
1727  * string_table_destroy - frees the memory allocated for the string_table_t.
1728  */
1729 
1731 {
1732  if (node != NULL) {
1733  octstr_destroy(node->string);
1734  gw_free(node);
1735  }
1736 }
1737 
1738 
1739 
1740 /*
1741  * string_table_proposal_create - reserves memory for the
1742  * string_table_proposal_t and sets the fields.
1743  */
1744 
1746 {
1748 
1749  node = gw_malloc(sizeof(string_table_proposal_t));
1750  node->count = 1;
1751  node->string = ostr;
1752 
1753  return node;
1754 }
1755 
1756 
1757 
1758 /*
1759  * string_table_proposal_destroy - frees the memory allocated for the
1760  * string_table_proposal_t.
1761  */
1762 
1764 {
1765  if (node != NULL) {
1766  octstr_destroy(node->string);
1767  gw_free(node);
1768  }
1769 }
1770 
1771 
1772 
1773 /*
1774  * string_table_build - collects the strings from the WML source into a list,
1775  * adds those strings that appear more than once into string table. The rest
1776  * of the strings are sliced into words and the same procedure is executed to
1777  * the list of these words.
1778  */
1779 
1780 static void string_table_build(xmlNodePtr node, wml_binary_t **wbxml)
1781 {
1782  string_table_proposal_t *item = NULL;
1783  List *list = NULL;
1784 
1785  list = gwlist_create();
1786 
1787  string_table_collect_strings(node, list);
1788 
1789  list = string_table_add_many(string_table_sort_list(list), wbxml);
1790 
1791  list = string_table_collect_words(list);
1792 
1793  /* Don't add strings if there aren't any. (no NULLs please) */
1794  if (list) {
1795  list = string_table_add_many(string_table_sort_list(list), wbxml);
1796  }
1797 
1798  /* Memory cleanup. */
1799  while (gwlist_len(list)) {
1800  item = gwlist_extract_first(list);
1802  }
1803 
1804  gwlist_destroy(list, NULL);
1805 }
1806 
1807 
1808 
1809 /*
1810  * string_table_collect_strings - collects the strings from the WML
1811  * ocument into a list that is then further processed to build the
1812  * string table for the document.
1813  */
1814 
1815 static void string_table_collect_strings(xmlNodePtr node, List *strings)
1816 {
1817  Octstr *string;
1818  xmlAttrPtr attribute;
1819 
1820  switch (node->type) {
1821  case XML_TEXT_NODE:
1822  string = create_octstr_from_node((char *)node);
1823 
1824  octstr_shrink_blanks(string);
1825  octstr_strip_blanks(string);
1826  if (octstr_len(string) > WBXML_STRING_TABLE_MIN)
1827  octstr_strip_nonalphanums(string);
1828 
1829  if (octstr_len(string) > WBXML_STRING_TABLE_MIN)
1830  gwlist_append(strings, string);
1831  else
1832  octstr_destroy(string);
1833  break;
1834  case XML_ELEMENT_NODE:
1835  if(node->properties != NULL) {
1836  attribute = node->properties;
1837  while (attribute != NULL) {
1838  if (attribute->children != NULL)
1839  string_table_collect_strings(attribute->children, strings);
1840  attribute = attribute->next;
1841  }
1842  }
1843  break;
1844  default:
1845  break;
1846  }
1847 
1848  if (node->children != NULL)
1849  string_table_collect_strings(node->children, strings);
1850 
1851  if (node->next != NULL)
1852  string_table_collect_strings(node->next, strings);
1853 }
1854 
1855 
1856 
1857 /*
1858  * string_table_sort_list - takes a list of octet strings and returns a list
1859  * of string_table_proposal_t:s that contains the same strings with number of
1860  * instants of every string in the input list.
1861  */
1862 
1864 {
1865  int i;
1866  Octstr *string = NULL;
1867  string_table_proposal_t *item = NULL;
1868  List *sorted = NULL;
1869 
1870  sorted = gwlist_create();
1871 
1872  while (gwlist_len(start)) {
1873  string = gwlist_extract_first(start);
1874 
1875  /* Check whether the string is unique. */
1876  for (i = 0; i < gwlist_len(sorted); i++) {
1877  item = gwlist_get(sorted, i);
1878  if (octstr_compare(item->string, string) == 0) {
1879  octstr_destroy(string);
1880  string = NULL;
1881  item->count ++;
1882  break;
1883  }
1884  }
1885 
1886  if (string != NULL) {
1887  item = string_table_proposal_create(string);
1888  gwlist_append(sorted, item);
1889  }
1890  }
1891 
1892  gwlist_destroy(start, NULL);
1893 
1894  return sorted;
1895 }
1896 
1897 
1898 
1899 /*
1900  * string_table_add_many - takes a list of string with number of instants and
1901  * adds those whose number is greater than 1 into the string table. Returns
1902  * the list ofrejected strings for memory cleanup.
1903  */
1904 
1905 static List *string_table_add_many(List *sorted, wml_binary_t **wbxml)
1906 {
1907  string_table_proposal_t *item = NULL;
1908  List *list = NULL;
1909 
1910  list = gwlist_create();
1911 
1912  while (gwlist_len(sorted)) {
1913  item = gwlist_extract_first(sorted);
1914 
1915  if (item->count > 1 && octstr_len(item->string) >
1917  string_table_add(octstr_duplicate(item->string), wbxml);
1919  } else
1920  gwlist_append(list, item);
1921  }
1922 
1923  gwlist_destroy(sorted, NULL);
1924 
1925  return list;
1926 }
1927 
1928 
1929 
1930 /*
1931  * string_table_collect_words - takes a list of strings and returns a list
1932  * of words contained by those strings.
1933  */
1934 
1936 {
1937  Octstr *word = NULL;
1938  string_table_proposal_t *item = NULL;
1939  List *list = NULL, *temp_list = NULL;
1940 
1941  while (gwlist_len(strings)) {
1942  item = gwlist_extract_first(strings);
1943 
1944  if (list == NULL) {
1945  list = octstr_split_words(item->string);
1947  } else {
1948  temp_list = octstr_split_words(item->string);
1949 
1950  while ((word = gwlist_extract_first(temp_list)) != NULL)
1951  gwlist_append(list, word);
1952 
1953  gwlist_destroy(temp_list, NULL);
1955  }
1956  }
1957 
1958  gwlist_destroy(strings, NULL);
1959 
1960  return list;
1961 }
1962 
1963 
1964 
1965 /*
1966  * string_table_add - adds a string to the string table. Duplicates are
1967  * discarded. The function returns the offset of the string in the
1968  * string table; if the string is already in the table then the offset
1969  * of the first copy.
1970  */
1971 
1972 static unsigned long string_table_add(Octstr *ostr, wml_binary_t **wbxml)
1973 {
1974  string_table_t *item = NULL;
1975  unsigned long i, offset = 0;
1976 
1977  /* Check whether the string is unique. */
1978  for (i = 0; i < (unsigned long)gwlist_len((*wbxml)->string_table); i++) {
1979  item = gwlist_get((*wbxml)->string_table, i);
1980  if (octstr_compare(item->string, ostr) == 0) {
1981  octstr_destroy(ostr);
1982  return item->offset;
1983  }
1984  }
1985 
1986  /* Create a new list item for the string table. */
1987  offset = (*wbxml)->string_table_length;
1988 
1989  item = string_table_create(offset, ostr);
1990 
1991  (*wbxml)->string_table_length =
1992  (*wbxml)->string_table_length + octstr_len(ostr) + 1;
1993  gwlist_append((*wbxml)->string_table, item);
1994 
1995  return offset;
1996 }
1997 
1998 
1999 
2000 /*
2001  * string_table_apply - takes a octet string of WML bnary and goes it
2002  * through searching for substrings that are in the string table and
2003  * replaces them with string table references.
2004  */
2005 
2006 static void string_table_apply(Octstr *ostr, wml_binary_t **wbxml)
2007 {
2008  Octstr *input = NULL;
2009  string_table_t *item = NULL;
2010  long i = 0, word_s = 0, str_e = 0;
2011 
2012  input = octstr_create("");
2013 
2014  for (i = 0; i < gwlist_len((*wbxml)->string_table); i++) {
2015  item = gwlist_get((*wbxml)->string_table, i);
2016 
2018  /* No use to replace 1 to 3 character substring, the reference
2019  will eat the saving up. A variable will be in the string table
2020  even though it's only 1 character long. */
2021  if ((word_s = octstr_search(ostr, item->string, 0)) >= 0) {
2022  /* Check whether the octet string are equal if they are equal
2023  in length. */
2024  if (octstr_len(ostr) == octstr_len(item->string)) {
2025  if ((word_s = octstr_compare(ostr, item->string)) == 0)
2026  {
2027  octstr_truncate(ostr, 0);
2029  octstr_append_uintvar(ostr, item->offset);
2030  str_e = 1;
2031  }
2032  }
2033  /* Check the possible substrings. */
2034  else if (octstr_len(ostr) > octstr_len(item->string))
2035  {
2036  if (word_s + octstr_len(item->string) == octstr_len(ostr))
2037  str_e = 1;
2038 
2039  octstr_delete(ostr, word_s, octstr_len(item->string));
2040 
2041  octstr_truncate(input, 0);
2042  /* Substring in the start? No STR_END then. */
2043  if (word_s > 0)
2045 
2047  octstr_append_uintvar(input, item->offset);
2048 
2049  /* Subtring the end? No need to start a new one. */
2050  if ( word_s < octstr_len(ostr))
2052 
2053  octstr_insert(ostr, input, word_s);
2054  }
2055  /* If te string table entry is longer than the string, it can
2056  be skipped. */
2057  }
2058  }
2059 
2060  octstr_destroy(input);
2061 
2062  if (octstr_get_char(ostr, 0) != WBXML_STR_T)
2063  output_st_char(WBXML_STR_I, wbxml);
2064  if (!str_e)
2066 
2067  output_st_octet_string(ostr, wbxml);
2068 }
2069 
2070 
2071 
2072 /*
2073  * string_table_output - writes the contents of the string table
2074  * into an octet string that is sent to the phone.
2075  */
2076 
2077 static void string_table_output(Octstr *ostr, wml_binary_t **wbxml)
2078 {
2079  string_table_t *item;
2080 
2081  while ((item = gwlist_extract_first((*wbxml)->string_table)) != NULL) {
2082  octstr_insert(ostr, item->string, octstr_len(ostr));
2084  string_table_destroy(item);
2085  }
2086 }
2087 
2088 
2089 
2090 
2091 
2092 
2093 
2094 
2095 
2096 
2097 
2098 
2099 
2100 
Dict * dict_create(long size_hint, void(*destroy_value)(void *))
Definition: dict.c:192
void error(int err, const char *fmt,...)
Definition: log.c:648
void info(int err, const char *fmt,...)
Definition: log.c:672
unsigned long offset
Definition: wml_compiler.c:175
static wml_attribute_t * attribute_create(void)
static void parse_st_end(wml_binary_t **wbxml)
Definition: wml_compiler.c:928
int size
Definition: wsasm.c:84
static int parse_cdata(xmlNodePtr node, wml_binary_t **wbxml)
Definition: wml_compiler.c:986
void octstr_replace(Octstr *haystack, Octstr *needle, Octstr *repl)
Definition: octstr.c:2649
void * gwlist_search(List *list, void *pattern, int(*cmp)(void *, void *))
Definition: list.c:486
static wml_table_t wml_URL_values[]
#define WBXML_EXT_T_2
unsigned char element_check_content(xmlNodePtr node)
Definition: xml_shared.c:242
unsigned char wbxml_version
Definition: wml_compiler.c:161
void octstr_convert_range(Octstr *ostr, long pos, long len, octstr_func_t map)
Definition: octstr.c:836
static int check_if_url(int hex)
static int parse_document(xmlDocPtr document, Octstr *charset, wml_binary_t **wbxml, Octstr *version)
Definition: wml_compiler.c:571
Octstr * wbxml_string
Definition: wml_compiler.c:166
gw_assert(wtls_machine->packet_to_send !=NULL)
void dict_put(Dict *dict, Octstr *key, void *value)
Definition: dict.c:240
void gwlist_append(List *list, void *item)
Definition: list.c:179
void wml_shutdown()
Definition: wml_compiler.c:479
static string_table_t * string_table_create(int offset, Octstr *ostr)
static void xml_error(void)
Definition: wml_compiler.c:334
long gwlist_len(List *list)
Definition: list.c:166
Octstr * string
Definition: wml_compiler.c:176
void * gwlist_get(List *list, long pos)
Definition: list.c:292
unsigned long wml_public_id
Definition: wml_compiler.c:162
static wbxml_version_t wbxml_version[]
Definition: wml_compiler.c:117
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1517
static wml_binary_t * wml_binary_create(void)
unsigned char binary
Definition: wml_compiler.c:206
long octstr_search(const Octstr *haystack, const Octstr *needle, long pos)
Definition: octstr.c:1070
static void attr_dict_construct(wml_table3_t *attributes, Dict *attr_dict)
unsigned char binary
Definition: wml_compiler.c:196
static void string_table_apply(Octstr *ostr, wml_binary_t **wbxml)
static int parse_attr_value(Octstr *attr_value, List *tokens, wml_binary_t **wbxml, int charset, var_esc_t default_esc)
Definition: wml_compiler.c:834
void octstr_strip_blanks(Octstr *text)
Definition: octstr.c:1346
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
static void output_st_octet_string(Octstr *ostr, wml_binary_t **wbxml)
#define WBXML_STR_END
#define octstr_copy(ostr, from, len)
Definition: octstr.h:178
#define WBXML_STRING_TABLE_MIN
long octstr_search_char(const Octstr *ostr, int ch, long pos)
Definition: octstr.c:1012
static void hash_destroy(void *p)
Octstr * charset
Definition: test_ota.c:68
static void attribute_destroy(void *p)
static int parse_node(xmlNodePtr node, wml_binary_t **wbxml)
Definition: wml_compiler.c:502
static string_table_proposal_t * string_table_proposal_create(Octstr *ostr)
static wml_hash_t * hash_create(char *text, unsigned char token)
var_esc_t
Definition: wml_compiler.c:128
Octstr * item
Definition: wml_compiler.c:195
static void output_st_char(int byte, wml_binary_t **wbxml)
static int parse_variable(Octstr *text, int start, var_esc_t default_esc, Octstr **output, wml_binary_t **wbxml)
static void wml_binary_output(Octstr *ostr, wml_binary_t *wbxml)
#define WBXML_STR_I
void octstr_strip_nonalphanums(Octstr *text)
Definition: octstr.c:1405
Octstr * octstr_imm(const char *cstr)
Definition: octstr.c:283
void octstr_insert(Octstr *ostr1, const Octstr *ostr2, long pos)
Definition: octstr.c:1303
int parse_charset(Octstr *os)
Definition: xml_shared.c:189
int token
Definition: wslexer.c:159
void * gwlist_extract_first(List *list)
Definition: list.c:305
unsigned long value
Definition: wml_compiler.c:98
static var_esc_t check_variable_name(xmlNodePtr node)
void * dict_get(Dict *dict, Octstr *key)
Definition: dict.c:286
static void string_table_proposal_destroy(string_table_proposal_t *node)
void octstr_delete(Octstr *ostr1, long pos, long len)
Definition: octstr.c:1527
#define WBXML_LITERAL
#define WBXML_EXT_T_0
static void string_table_collect_strings(xmlNodePtr node, List *strings)
List * wml_attr_values_list
Definition: wml_compiler.c:222
char * text
Definition: smsc_cimd2.c:921
static int hash_cmp(void *hash1, void *hash2)
unsigned long string_table_length
Definition: wml_compiler.c:164
static List * string_table_collect_words(List *strings)
Definition: dict.c:116
#define octstr_duplicate(ostr)
Definition: octstr.h:187
static void string_table_destroy(string_table_t *node)
static int wml_table3_len(wml_table3_t *table)
#define NUMBER_OF_WBXML_VERSION
Definition: wml_compiler.c:125
int wml_compile(Octstr *wml_text, Octstr *charset, Octstr **wml_binary, Octstr *version)
Definition: wml_compiler.c:360
#define WBXML_END
char * name
Definition: smsc_cimd2.c:212
void warning(int err, const char *fmt,...)
Definition: log.c:660
List * octstr_split_words(const Octstr *ostr)
Definition: octstr.c:1602
static int check_do_elements(xmlNodePtr node)
static List * string_table_sort_list(List *start)
Octstr * octstr_format(const char *fmt,...)
Definition: octstr.c:2464
void octstr_destroy(Octstr *ostr)
Definition: octstr.c:324
#define octstr_create(cstr)
Definition: octstr.h:125
static wml_table_t wml_elements[]
void octstr_destroy_item(void *os)
Definition: octstr.c:336
Dict * wml_attributes_dict
Definition: wml_compiler.c:220
static int wml_table_len(wml_table_t *table)
Octstr * attribute
Definition: wml_compiler.c:205
static void output_variable(Octstr *variable, Octstr **output, var_esc_t escaped, wml_binary_t **wbxml)
static Octstr * get_variable(Octstr *text, int start)
static wml_table_t wml_attribute_values[]
unsigned char token
Definition: wml_compiler.c:148
unsigned long character_set
Definition: wml_compiler.c:163
Dict * wml_elements_dict
Definition: wml_compiler.c:218
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
void dict_destroy(Dict *dict)
Definition: dict.c:215
void octstr_append_uintvar(Octstr *ostr, unsigned long value)
Definition: octstr.c:1931
static int parse_st_octet_string(Octstr *ostr, int cdata, var_esc_t default_esc, wml_binary_t **wbxml)
Definition: octstr.c:118
List * string_table
Definition: wml_compiler.c:165
static var_esc_t check_variable_syntax(Octstr *variable, var_esc_t default_esc)
#define NUMBER_OF_WML_EXTERNALID
Definition: wml_compiler.c:108
void debug(const char *place, int err, const char *fmt,...)
Definition: log.c:726
int octstr_str_compare(const Octstr *ostr, const char *str)
Definition: octstr.c:973
static int parse_text(xmlNodePtr node, wml_binary_t **wbxml)
Definition: wml_compiler.c:940
static int wml_xml_strict
Definition: wapbox.c:108
static int parse_element(xmlNodePtr node, wml_binary_t **wbxml)
Definition: wml_compiler.c:660
#define gwlist_create()
Definition: list.h:136
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1327
static void parse_entities(Octstr *wml_source)
static int parse_attribute(xmlAttrPtr attr, wml_binary_t **wbxml)
Definition: wml_compiler.c:740
#define WBXML_CONTENT_BIT
static Octstr * get_do_element_name(xmlNodePtr node)
static List * string_table_add_many(List *sorted, wml_binary_t **wbxml)
List * wml_URL_values_list
Definition: wml_compiler.c:224
#define create_octstr_from_node(node)
static int check_if_emphasis(xmlNodePtr node)
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
void octstr_shrink_blanks(Octstr *text)
Definition: octstr.c:1433
static void wml_binary_destroy(wml_binary_t *wbxml)
unsigned char token
Definition: wml_compiler.c:137
#define WBXML_STR_T
#define WBXML_EXT_T_1
Definition: list.c:102
static XMLRPCDocument * msg
Definition: test_xmlrpc.c:86
static int start
void wml_init(int wml_xml_strict)
Definition: wml_compiler.c:429
static void string_table_output(Octstr *ostr, wml_binary_t **wbxml)
static void string_table_build(xmlNodePtr node, wml_binary_t **wbxml)
static wml_table3_t wml_attributes[]
static unsigned long string_table_add(Octstr *ostr, wml_binary_t **wbxml)
int charset_convert(Octstr *string, char *charset_from, char *charset_to)
Definition: charset.c:589
int wml_xml_parser_opt
Definition: wml_compiler.c:226
int octstr_compare(const Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:871
void gwlist_destroy(List *list, gwlist_item_destructor_t *destructor)
Definition: list.c:145
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.