gwenhywfar  4.3.3
text.c
Go to the documentation of this file.
1 /***************************************************************************
2  begin : Sat Jun 28 2003
3  copyright : (C) 2003 by Martin Preuss
4  email : martin@libchipcard.de
5 
6  ***************************************************************************
7  * *
8  * This library is free software; you can redistribute it and/or *
9  * modify it under the terms of the GNU Lesser General Public *
10  * License as published by the Free Software Foundation; either *
11  * version 2.1 of the License, or (at your option) any later version. *
12  * *
13  * This library is distributed in the hope that it will be useful, *
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
16  * Lesser General Public License for more details. *
17  * *
18  * You should have received a copy of the GNU Lesser General Public *
19  * License along with this library; if not, write to the Free Software *
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21  * MA 02111-1307 USA *
22  * *
23  ***************************************************************************/
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 #define DISABLE_DEBUGLOG
30 
31 
32 #include "text.h"
33 #include <stdlib.h>
34 #include <assert.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <ctype.h>
38 #ifdef HAVE_LOCALE_H
39 # include <locale.h>
40 #endif
41 
42 #ifndef ICONV_CONST
43 # define ICONV_CONST
44 #endif
45 
46 #ifdef HAVE_ICONV_H
47 # include <iconv.h>
48 #endif
49 
50 
51 #include <gwenhywfar/gwenhywfarapi.h>
52 #include <gwenhywfar/debug.h>
53 #include <gwenhywfar/stringlist.h>
54 
55 
56 
57 typedef struct {
58  int character;
59  const char *replace;
61 
63 {'&', "&amp;"},
64 {'<', "&lt;"},
65 {'>', "&gt;"},
66 {'\'', "&apos;"},
67 {'\"', "&quot;"},
68 {0, 0}
69 };
70 
71 
72 
73 char *GWEN_Text_GetWord(const char *src,
74  const char *delims,
75  char *buffer,
76  unsigned int maxsize,
77  uint32_t flags,
78  const char **next){
79  unsigned int size;
80  int lastWasBlank;
81  int lastBlankPos;
82  int insideQuotes;
83  int lastWasEscape;
84 
85  assert(maxsize);
86 
87  /* skip leading blanks, if wanted */
89  while(*src && (unsigned char)(*src)<33)
90  src++;
91  }
92 
93  /* get word */
94  size=0;
95  lastWasBlank=0;
96  lastBlankPos=-1;
97  lastWasEscape=0;
98  insideQuotes=0;
99 
100  if (*src=='"') {
101  insideQuotes=1;
102  if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
103  src++;
104  }
105 
106  while(*src && size<(maxsize-1)) {
107  if (lastWasEscape) {
108  buffer[size]=*src;
109  size++;
110  lastWasEscape=0;
111  lastWasBlank=0;
112  lastBlankPos=-1;
113  }
114  else {
115  if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
116  lastWasEscape=1;
117  lastWasBlank=0;
118  lastBlankPos=-1;
119  }
120  else {
121  if (!insideQuotes && strchr(delims, *src)!=0)
122  break;
123  if (*src=='"') {
124  if (insideQuotes) {
125  insideQuotes=0;
126  src++;
127  break;
128  }
129  else {
131  "Found a closing \" without an opening one "
132  "(consider using a backslash to escape)");
133  return 0;
134  }
135  }
136 
137 
138  if (insideQuotes ||
139  !lastWasBlank ||
140  (lastWasBlank &&
142  /* only copy if last char was NOT blank or
143  * last was blank but the caller does not want to have multiple
144  * blanks removed */
145  buffer[size]=*src;
146  size++;
147  }
148  /* remember next loop whether this char was a blank */
149  if (isspace((int)((unsigned char)*src)) && !lastWasEscape) {
150  lastWasBlank=1;
151  lastBlankPos=size;
152  }
153  else {
154  lastWasBlank=0;
155  lastBlankPos=-1;
156  }
157  } /* if this is not a backslash */
158  } /* !lastWasEscape */
159  /* advance source pointer */
160  src++;
161  } /* while */
162 
163  /* add trailing null to correctly terminate the buffer */
164  buffer[size]=0;
165 
166  if (insideQuotes) {
167  DBG_DEBUG(GWEN_LOGDOMAIN, "Missing \" after word");
168  return 0;
169  }
170  /* check whether the source string was correctly terminated */
171  if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
172  if (*src) {
173  if (strchr(delims, *src)==0) {
174  DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
175  return 0;
176  }
177  }
178  else {
179  if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
180  DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
181  return 0;
182  }
183  }
184  }
185 
186  /* remove trailing blanks, if wanted */
188  if (lastBlankPos!=-1)
189  buffer[lastBlankPos]=0;
190  }
191 
192  *next=src;
193  return buffer;
194 }
195 
196 
197 
198 int GWEN_Text_GetWordToBuffer(const char *src,
199  const char *delims,
200  GWEN_BUFFER *buf,
201  uint32_t flags,
202  const char **next){
203  const char *savedSrc=src;
204  int lastWasBlank;
205  int lastBlankPos;
206  int insideQuotes;
207  int lastWasEscape;
208 
209  /* skip leading blanks, if wanted */
211  while(*src && (unsigned char)(*src)<33) {
212  if (strchr(delims, *src)) {
213  *next=src;
214  return 0;
215  }
216  src++;
217  }
218  }
219 
220  /* get word */
221  lastWasBlank=0;
222  lastBlankPos=-1;
223  lastWasEscape=0;
224  insideQuotes=0;
225 
226  if (*src=='"') {
227  insideQuotes=1;
228  if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
229  src++;
230  }
231 
232  while(*src) {
233  if (lastWasEscape) {
234  GWEN_Buffer_AppendByte(buf, *src);
235  lastWasEscape=0;
236  lastWasBlank=0;
237  lastBlankPos=-1;
238  }
239  else {
240  if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
241  lastWasEscape=1;
242  lastWasBlank=0;
243  lastBlankPos=-1;
244  }
245  else {
246  if (!insideQuotes && strchr(delims, *src)!=0)
247  break;
248  if (*src=='"') {
249  if (insideQuotes) {
250  insideQuotes=0;
251  src++;
252  break;
253  }
254  else {
256  "Found a closing \" without an opening one "
257  "(consider using a backslash to escape)");
258  return -1;
259  }
260  }
261 
262 
263  if (insideQuotes ||
264  !lastWasBlank ||
265  (lastWasBlank &&
267  /* only copy if last char was NOT blank or
268  * last was blank but the caller does not want to have multiple
269  * blanks removed */
270  GWEN_Buffer_AppendByte(buf, *src);
271  }
272  /* remember next loop whether this char was a blank */
273 
274  if (!lastWasEscape && *((unsigned char*)src)<33) {
275  lastWasBlank=1;
276  lastBlankPos=GWEN_Buffer_GetPos(buf);
277  }
278  else {
279  lastWasBlank=0;
280  lastBlankPos=-1;
281  }
282  } /* if this is not a backslash */
283  } /* !lastWasEscape */
284  /* advance source pointer */
285  src++;
286  } /* while */
287 
288  if (insideQuotes) {
289  DBG_ERROR(GWEN_LOGDOMAIN, "Missing \" after word (at %d: [%s])", (int)(src-savedSrc), savedSrc);
290  return -1;
291  }
292  /* check whether the source string was correctly terminated */
293  if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
294  if (*src) {
295  if (strchr(delims, *src)==0) {
296  DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
297  return -1;
298  }
299  }
300  else {
301  if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
302  DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
303  return -1;
304  }
305  }
306  }
307 
308  /* remove trailing blanks, if wanted */
310  if (lastBlankPos!=-1)
311  GWEN_Buffer_Crop(buf, 0, lastBlankPos);
312  }
313 
314  *next=src;
315  return 0;
316 }
317 
318 
319 
320 char *GWEN_Text_Escape(const char *src,
321  char *buffer,
322  unsigned int maxsize) {
323  unsigned int size;
324 
325  size=0;
326  while(*src) {
327  unsigned char x;
328 
329  x=(unsigned char)*src;
330  if (!(
331  (x>='A' && x<='Z') ||
332  (x>='a' && x<='z') ||
333  (x>='0' && x<='9'))) {
334  unsigned char c;
335 
336  if ((maxsize-1)<size+3) {
337  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
338  return 0;
339  }
340  buffer[size++]='%';
341  c=(((unsigned char)(*src))>>4)&0xf;
342  if (c>9)
343  c+=7;
344  c+='0';
345  buffer[size++]=c;
346  c=((unsigned char)(*src))&0xf;
347  if (c>9)
348  c+=7;
349  c+='0';
350  buffer[size++]=c;
351  }
352  else {
353  if (size<(maxsize-1))
354  buffer[size++]=*src;
355  else {
356  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
357  return 0;
358  }
359  }
360 
361  src++;
362  } /* while */
363 
364  buffer[size]=0;
365  return buffer;
366 }
367 
368 
369 
370 char *GWEN_Text_EscapeTolerant(const char *src,
371  char *buffer,
372  unsigned int maxsize) {
373  unsigned int size;
374 
375  size=0;
376  while(*src) {
377  unsigned char x;
378 
379  x=(unsigned char)*src;
380  if (!(
381  (x>='A' && x<='Z') ||
382  (x>='a' && x<='z') ||
383  (x>='0' && x<='9') ||
384  x==' ' ||
385  x=='.' ||
386  x==',' ||
387  x=='.' ||
388  x=='*' ||
389  x=='?'
390  )) {
391  unsigned char c;
392 
393  if ((maxsize-1)<size+3) {
394  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
395  return 0;
396  }
397  buffer[size++]='%';
398  c=(((unsigned char)(*src))>>4)&0xf;
399  if (c>9)
400  c+=7;
401  c+='0';
402  buffer[size++]=c;
403  c=((unsigned char)(*src))&0xf;
404  if (c>9)
405  c+=7;
406  c+='0';
407  buffer[size++]=c;
408  }
409  else {
410  if (size<(maxsize-1))
411  buffer[size++]=*src;
412  else {
413  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
414  return 0;
415  }
416  }
417 
418  src++;
419  } /* while */
420 
421  buffer[size]=0;
422  return buffer;
423 }
424 
425 
426 
427 char *GWEN_Text_UnescapeN(const char *src,
428  unsigned int srclen,
429  char *buffer,
430  unsigned int maxsize){
431  unsigned int size;
432 
433  size=0;
434 
435  while(*src && srclen>0) {
436  unsigned char x;
437 
438  x=(unsigned char)*src;
439  if (
440  (x>='A' && x<='Z') ||
441  (x>='a' && x<='z') ||
442  (x>='0' && x<='9')) {
443  if (size<(maxsize-1))
444  buffer[size++]=*src;
445  else {
446  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
447  return 0;
448  }
449  }
450  else {
451  if (*src=='%') {
452  unsigned char d1, d2;
453  unsigned char c;
454 
455  if (srclen<3) {
456  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
457  return 0;
458  }
459  /* skip '%' */
460  src++;
461  if (!(*src) || !isxdigit((int)*src)) {
462  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
463  return 0;
464  }
465  /* read first digit */
466  d1=(unsigned char)(toupper(*src));
467 
468  /* get second digit */
469  src++;
470  if (!(*src) || !isxdigit((int)*src)) {
471  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
472  return 0;
473  }
474  d2=(unsigned char)(toupper(*src));
475  /* compute character */
476  d1-='0';
477  if (d1>9)
478  d1-=7;
479  c=(d1<<4)&0xf0;
480  d2-='0';
481  if (d2>9)
482  d2-=7;
483  c+=(d2&0xf);
484  /* store character */
485  if (size<(maxsize-1))
486  buffer[size++]=(char)c;
487  else {
488  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
489  return 0;
490  }
491  srclen-=2;
492  }
493  else {
494  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
495  "characters in escaped string (\"%s\")",
496  src);
497  return 0;
498  }
499  }
500  srclen--;
501  src++;
502  } /* while */
503 
504  buffer[size]=0;
505  return buffer;
506 }
507 
508 
509 
510 char *GWEN_Text_Unescape(const char *src,
511  char *buffer,
512  unsigned int maxsize){
513  unsigned int srclen;
514 
515  srclen=strlen(src);
516  return GWEN_Text_UnescapeN(src, srclen, buffer, maxsize);
517 }
518 
519 
520 
521 char *GWEN_Text_UnescapeTolerantN(const char *src,
522  unsigned int srclen,
523  char *buffer,
524  unsigned int maxsize){
525  unsigned int size;
526 
527  size=0;
528 
529  while(*src && srclen>0) {
530  unsigned char x;
531 
532  x=(unsigned char)*src;
533  if (
534  (x>='A' && x<='Z') ||
535  (x>='a' && x<='z') ||
536  (x>='0' && x<='9') ||
537  x==' ' ||
538  x=='.' ||
539  x==',' ||
540  x=='.' ||
541  x=='*' ||
542  x=='?'
543  ) {
544  if (size<(maxsize-1))
545  buffer[size++]=*src;
546  else {
547  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
548  return 0;
549  }
550  }
551  else {
552  if (*src=='%') {
553  unsigned char d1, d2;
554  unsigned char c;
555 
556  if (srclen<3) {
557  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
558  return 0;
559  }
560  /* skip '%' */
561  src++;
562  if (!(*src) || !isxdigit((int)*src)) {
563  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
564  return 0;
565  }
566  /* read first digit */
567  d1=(unsigned char)(toupper(*src));
568 
569  /* get second digit */
570  src++;
571  if (!(*src) || !isxdigit((int)*src)) {
572  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
573  return 0;
574  }
575  d2=(unsigned char)(toupper(*src));
576  /* compute character */
577  d1-='0';
578  if (d1>9)
579  d1-=7;
580  c=(d1<<4)&0xf0;
581  d2-='0';
582  if (d2>9)
583  d2-=7;
584  c+=(d2&0xf);
585  /* store character */
586  if (size<(maxsize-1))
587  buffer[size++]=(char)c;
588  else {
589  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
590  return 0;
591  }
592  srclen-=2;
593  }
594  else {
595  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
596  "characters in escaped string (\"%s\")",
597  src);
598  return 0;
599  }
600  }
601  srclen--;
602  src++;
603  } /* while */
604 
605  buffer[size]=0;
606  return buffer;
607 }
608 
609 
610 
611 char *GWEN_Text_UnescapeTolerant(const char *src,
612  char *buffer,
613  unsigned int maxsize){
614  unsigned int srclen;
615 
616  srclen=strlen(src);
617  return GWEN_Text_UnescapeTolerantN(src, srclen, buffer, maxsize);
618 }
619 
620 
621 
622 char *GWEN_Text_ToHex(const char *src, unsigned l,
623  char *buffer, unsigned int maxsize) {
624  unsigned int pos;
625  unsigned int size;
626 
627  if ((l*2)+1 > maxsize) {
628  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
629  return 0;
630  }
631 
632  pos=0;
633  size=0;
634  while(pos<l) {
635  unsigned char c;
636 
637  c=(((unsigned char)(src[pos]))>>4)&0xf;
638  if (c>9)
639  c+=7;
640  c+='0';
641  buffer[size++]=c;
642  c=((unsigned char)(src[pos]))&0xf;
643  if (c>9)
644  c+=7;
645  c+='0';
646  buffer[size++]=c;
647  pos++;
648  }
649  buffer[size]=0;
650  return buffer;
651 }
652 
653 
654 
655 char *GWEN_Text_ToHexGrouped(const char *src,
656  unsigned l,
657  char *buffer,
658  unsigned maxsize,
659  unsigned int groupsize,
660  char delimiter,
661  int skipLeadingZeroes) {
662  unsigned int pos;
663  unsigned int size;
664  unsigned int j;
665 
666  j=0;
667 
668  pos=0;
669  size=0;
670  j=0;
671  while(pos<l) {
672  unsigned char c;
673  int skipThis;
674 
675  skipThis=0;
676  c=(((unsigned char)(src[pos]))>>4)&0xf;
677  if (skipLeadingZeroes) {
678  if (c==0)
679  skipThis=1;
680  else
681  skipLeadingZeroes=0;
682  }
683  if (c>9)
684  c+=7;
685  c+='0';
686  if (!skipThis) {
687  if (size+1>=maxsize) {
688  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
689  return 0;
690  }
691  buffer[size++]=c;
692  j++;
693  if (j==groupsize) {
694  if (size+1>=maxsize) {
695  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
696  return 0;
697  }
698  buffer[size++]=delimiter;
699  j=0;
700  }
701  }
702 
703  skipThis=0;
704  c=((unsigned char)(src[pos]))&0xf;
705  if (skipLeadingZeroes) {
706  if (c==0 && pos+1<l)
707  skipThis=1;
708  else
709  skipLeadingZeroes=0;
710  }
711  if (c>9)
712  c+=7;
713  c+='0';
714  if (size+1>=maxsize) {
715  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
716  return 0;
717  }
718  if (!skipThis) {
719  buffer[size++]=c;
720  j++;
721  if (j==groupsize) {
722  if (pos+1<l) {
723  if (size+1>=maxsize) {
724  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
725  return 0;
726  }
727  buffer[size++]=delimiter;
728  }
729  j=0;
730  }
731  }
732  pos++;
733  }
734  buffer[size]=0;
735  return buffer;
736 }
737 
738 
739 
740 int GWEN_Text_ToHexBuffer(const char *src, unsigned l,
741  GWEN_BUFFER *buf,
742  unsigned int groupsize,
743  char delimiter,
744  int skipLeadingZeroes){
745  unsigned int pos;
746  unsigned int size;
747  unsigned int j;
748 
749  j=0;
750 
751  pos=0;
752  size=0;
753  j=0;
754  while(pos<l) {
755  unsigned char c;
756  int skipThis;
757 
758  skipThis=0;
759  c=(((unsigned char)(src[pos]))>>4)&0xf;
760  if (skipLeadingZeroes) {
761  if (c==0)
762  skipThis=1;
763  else
764  skipLeadingZeroes=0;
765  }
766  if (c>9)
767  c+=7;
768  c+='0';
769  if (!skipThis) {
770  if (GWEN_Buffer_AppendByte(buf, c)) {
771  DBG_INFO(GWEN_LOGDOMAIN, "here");
772  return -1;
773  }
774  j++;
775  if (groupsize && j==groupsize) {
776  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
777  DBG_INFO(GWEN_LOGDOMAIN, "here");
778  return -1;
779  }
780  j=0;
781  }
782  }
783 
784  skipThis=0;
785  c=((unsigned char)(src[pos]))&0xf;
786  if (skipLeadingZeroes) {
787  if (c==0 && pos+1<l)
788  skipThis=1;
789  else
790  skipLeadingZeroes=0;
791  }
792  if (c>9)
793  c+=7;
794  c+='0';
795  if (!skipThis) {
796  if (GWEN_Buffer_AppendByte(buf, c)) {
797  DBG_INFO(GWEN_LOGDOMAIN, "here");
798  return -1;
799  }
800  j++;
801  if (groupsize && j==groupsize) {
802  if (pos+1<l) {
803  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
804  DBG_INFO(GWEN_LOGDOMAIN, "here");
805  return -1;
806  }
807  }
808  j=0;
809  }
810  }
811  pos++;
812  }
813  return 0;
814 }
815 
816 
817 
818 int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize){
819  unsigned int pos;
820  unsigned int size;
821 
822  pos=0;
823  size=0;
824  while(*src) {
825  unsigned char d1, d2;
826  unsigned char c;
827 
828  /* read first digit */
829  if (!isxdigit((int)*src)) {
830  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
831  return -1;
832  }
833  d1=(unsigned char)(toupper(*src));
834 
835  /* get second digit */
836  src++;
837  if (!(*src) || !isxdigit((int)*src)) {
838  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
839  return -1;
840  }
841  d2=(unsigned char)(toupper(*src));
842  src++;
843 
844  /* compute character */
845  d1-='0';
846  if (d1>9)
847  d1-=7;
848  c=(d1<<4)&0xf0;
849  d2-='0';
850  if (d2>9)
851  d2-=7;
852  c+=(d2&0xf);
853  /* store character */
854  if (size<(maxsize))
855  buffer[size++]=(char)c;
856  else {
857  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (maxsize=%d)", maxsize);
858  return -1;
859  }
860  } /* while */
861 
862  return size;
863 }
864 
865 
866 
867 int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf) {
868  while(*src) {
869  unsigned char d1, d2;
870  unsigned char c;
871 
872  /* read first digit */
873  if (isspace((int)*src)) {
874  src++;
875  }
876  else {
877  if (!isxdigit((int)*src)) {
878  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
879  return -1;
880  }
881  d1=(unsigned char)(toupper(*src));
882 
883  /* get second digit */
884  src++;
885  if (!(*src) || !isxdigit((int)*src)) {
886  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
887  return -1;
888  }
889  d2=(unsigned char)(toupper(*src));
890  src++;
891 
892  /* compute character */
893  d1-='0';
894  if (d1>9)
895  d1-=7;
896  c=(d1<<4)&0xf0;
897  d2-='0';
898  if (d2>9)
899  d2-=7;
900  c+=(d2&0xf);
901  /* store character */
902  GWEN_Buffer_AppendByte(buf, (char)c);
903  }
904  } /* while */
905 
906  return 0;
907 }
908 
909 
910 
911 int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf) {
912  unsigned int l;
913  int fakeByte;
914 
915  l=strlen(src);
916  fakeByte=(l%2);
917  while(*src) {
918  unsigned char d1, d2;
919  unsigned char c;
920 
921  if (fakeByte) {
922  d1=0;
923  fakeByte=0;
924  }
925  else {
926  /* read first digit */
927  if (!isdigit((int)*src)) {
928  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in bcd string");
929  return -1;
930  }
931  d1=(unsigned char)(*src);
932  src++;
933  }
934  /* get second digit */
935  if (!(*src) || !isxdigit((int)*src)) {
936  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete BCD byte (only 1 digit)");
937  return -1;
938  }
939  d2=(unsigned char)(*src);
940  src++;
941 
942  /* compute character */
943  d1-='0';
944  c=(d1<<4)&0xf0;
945  d2-='0';
946  c+=(d2&0xf);
947  /* store character */
948  GWEN_Buffer_AppendByte(buf, (char)c);
949  } /* while */
950 
951  return 0;
952 }
953 
954 
955 
956 int GWEN_Text_ToBcdBuffer(const char *src, unsigned l,
957  GWEN_BUFFER *buf,
958  unsigned int groupsize,
959  char delimiter,
960  int skipLeadingZeroes){
961  unsigned int pos;
962  unsigned int size;
963  unsigned int j;
964 
965  j=0;
966 
967  pos=0;
968  size=0;
969  j=0;
970  while(pos<l) {
971  unsigned char c;
972  int skipThis;
973 
974  skipThis=0;
975  c=(((unsigned char)(src[pos]))>>4)&0xf;
976  if (skipLeadingZeroes) {
977  if (c==0)
978  skipThis=1;
979  else
980  skipLeadingZeroes=0;
981  }
982  c+='0';
983  if (!skipThis) {
984  if (GWEN_Buffer_AppendByte(buf, c)) {
985  DBG_INFO(GWEN_LOGDOMAIN, "here");
986  return -1;
987  }
988  j++;
989  if (groupsize && j==groupsize) {
990  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
991  DBG_INFO(GWEN_LOGDOMAIN, "here");
992  return -1;
993  }
994  j=0;
995  }
996  }
997 
998  skipThis=0;
999  c=((unsigned char)(src[pos]))&0xf;
1000  if (skipLeadingZeroes) {
1001  if (c==0 && pos+1<l)
1002  skipThis=1;
1003  else
1004  skipLeadingZeroes=0;
1005  }
1006  c+='0';
1007  if (!skipThis) {
1008  if (GWEN_Buffer_AppendByte(buf, c)) {
1009  DBG_INFO(GWEN_LOGDOMAIN, "here");
1010  return -1;
1011  }
1012  j++;
1013  if (groupsize && j==groupsize) {
1014  if (pos+1<l) {
1015  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
1016  DBG_INFO(GWEN_LOGDOMAIN, "here");
1017  return -1;
1018  }
1019  }
1020  j=0;
1021  }
1022  }
1023  pos++;
1024  }
1025  return 0;
1026 }
1027 
1028 
1029 
1030 int GWEN_Text_Compare(const char *s1, const char *s2, int ign) {
1031  if (s1)
1032  if (*s1==0)
1033  s1=0;
1034  if (s2)
1035  if (*s2==0)
1036  s2=0;
1037  if (!s1 && !s2)
1038  return 0;
1039  if (!s1 && s2)
1040  return 1;
1041  if (s1 && !s2)
1042  return -1;
1043  if (ign)
1044  return strcasecmp(s1, s2);
1045  else
1046  return strcmp(s1, s2);
1047 }
1048 
1049 
1050 
1051 const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle) {
1052  while(*haystack) {
1053  while(*haystack && tolower(*haystack)!=tolower(*needle))
1054  haystack++;
1055  if (*haystack) {
1056  const char *t;
1057  const char *s;
1058 
1059  /* now haystack points to an area which begins with *needle */
1060  t=haystack;
1061  s=needle;
1062  t++;
1063  s++;
1064  while(*t && *s && (tolower(*t)==tolower(*s))) {
1065  t++;
1066  s++;
1067  }
1068  if (*s==0)
1069  return haystack;
1070  }
1071  else
1072  /* not found */
1073  break;
1074  /* no match here, advance haystack */
1075  haystack++;
1076  }
1077 
1078  /* not found */
1079  return NULL;
1080 }
1081 
1082 
1083 
1084 
1085 int GWEN_Text__cmpSegment(const char *w, unsigned int *wpos,
1086  const char *p, unsigned int *ppos,
1087  int sensecase,
1088  unsigned int *matches) {
1089  char a;
1090  char b;
1091  unsigned wlength;
1092  unsigned plength;
1093 
1094  unsigned int _wpos = *wpos, _ppos = *ppos, _matches = *matches;
1095 
1096  a=0;
1097  b=0;
1098  wlength=strlen(w);
1099  plength=strlen(p);
1100 
1101  while (_wpos<wlength && _ppos<plength) {
1102  a=w[_wpos];
1103  b=p[_ppos];
1104  if (b=='*') {
1105  *wpos = _wpos;
1106  *ppos = _ppos;
1107  *matches = _matches;
1108  return 1;
1109  }
1110  if (!sensecase) {
1111  a=toupper(a);
1112  b=toupper(b);
1113  }
1114  /* count matches */
1115  if (a==b)
1116  ++_matches;
1117  if (a!=b && b!='?') {
1118  *wpos = _wpos;
1119  *ppos = _ppos;
1120  *matches = _matches;
1121  return 0;
1122  }
1123  ++_wpos;
1124  ++_ppos;
1125  }
1126  /* both at end, would be ok */
1127  if (_wpos==wlength && _ppos==plength) {
1128  *wpos = _wpos;
1129  *ppos = _ppos;
1130  *matches = _matches;
1131  return 1;
1132  }
1133  /* word ends, pattern doesnt, would be ok if pattern is '*' here */
1134  if (_wpos>=wlength && _ppos<plength)
1135  if (p[_ppos]=='*') {
1136  *wpos = _wpos;
1137  *ppos = _ppos;
1138  *matches = _matches;
1139  return 1;
1140  }
1141  /* otherwise no match ;-/ */
1142  *wpos = _wpos;
1143  *ppos = _ppos;
1144  *matches = _matches;
1145  return 0;
1146 }
1147 
1148 
1149 
1150 int GWEN_Text__findSegment(const char *w, unsigned int *wpos,
1151  const char *p, unsigned int *ppos,
1152  int sensecase,
1153  unsigned int *matches) {
1154  unsigned int lwpos, lppos, lmatches;
1155  unsigned wlength;
1156 
1157  wlength=strlen(w);
1158  lwpos=*wpos;
1159  lppos=*ppos;
1160  lmatches=*matches;
1161  while(lwpos<wlength) {
1162  *ppos=lppos;
1163  *wpos=lwpos;
1164  *matches=lmatches;
1165  if (GWEN_Text__cmpSegment(w,wpos,p,ppos,sensecase,matches))
1166  return 1;
1167  lwpos++;
1168  }
1169  return 0;
1170 }
1171 
1172 
1173 int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase) {
1174  unsigned int ppos;
1175  unsigned int wpos;
1176  unsigned int matches;
1177  unsigned int plength;
1178 
1179  ppos=wpos=matches=0;
1180  plength=strlen(p);
1181 
1182  /* compare until first occurrence of '*' */
1183  if (!GWEN_Text__cmpSegment(w,&wpos,p,&ppos,sensecase,&matches)) {
1184  return -1;
1185  }
1186 
1187  while(1) {
1188  /* if pattern ends we have done it */
1189  if (ppos>=plength)
1190  return matches;
1191  /* skip '*' in pattern */
1192  ppos++;
1193  /* if pattern ends behind '*' the word matches */
1194  if (ppos>=plength)
1195  return matches;
1196  /* find next matching segment */
1197  if (!GWEN_Text__findSegment(w,&wpos,p,&ppos,sensecase,&matches)) {
1198  return -1;
1199  }
1200  } /* while */
1201  /* I know, we'll never get to here ;-) */
1202  return -1;
1203 }
1204 
1205 
1206 
1207 int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize,
1208  int fillchar){
1209  char lbuffer[128];
1210  unsigned int i;
1211 
1212  sprintf(lbuffer,"%d", num);
1213  i=strlen(lbuffer);
1214  if (i>=bufsize) {
1215  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (%d>=%d)", i, bufsize);
1216  return -1;
1217  }
1218  if (fillchar>0) {
1219  /* fill right, but first copy chars */
1220  strcpy(buffer, lbuffer);
1221  while(i<bufsize-1)
1222  buffer[i++]=fillchar;
1223  buffer[i]=0;
1224  return bufsize;
1225  }
1226  else if (fillchar<0) {
1227  int j, k;
1228 
1229  fillchar=-fillchar;
1230  j=bufsize-1-i;
1231  for (k=0; k<j; k++)
1232  buffer[k]=fillchar;
1233  buffer[k]=0;
1234  strcat(buffer, lbuffer);
1235  return bufsize;
1236  }
1237  else {
1238  /* dont fill, just copy */
1239  strcpy(buffer, lbuffer);
1240  return i;
1241  }
1242 }
1243 
1244 
1245 
1246 void GWEN_Text_DumpString(const char *s, unsigned int l,
1247  unsigned int insert) {
1248  unsigned int i;
1249  unsigned int j;
1250  unsigned int pos;
1251  unsigned k;
1252 
1253  pos=0;
1254  for (k=0; k<insert; k++)
1255  fprintf(stderr, " ");
1256  fprintf(stderr,"String size is %d:\n",l);
1257  while(pos<l) {
1258  for (k=0; k<insert; k++)
1259  fprintf(stderr, " ");
1260  fprintf(stderr,"%04x: ",pos);
1261  j=pos+16;
1262  if (j>=l)
1263  j=l;
1264 
1265  /* show hex dump */
1266  for (i=pos; i<j; i++) {
1267  fprintf(stderr,"%02x ",(unsigned char)s[i]);
1268  }
1269  if (j-pos<16)
1270  for (i=0; i<16-(j-pos); i++)
1271  fprintf(stderr," ");
1272  /* show text */
1273  for (i=pos; i<j; i++) {
1274  if (s[i]<32)
1275  fprintf(stderr,".");
1276  else
1277  fprintf(stderr,"%c",s[i]);
1278  }
1279  fprintf(stderr,"\n");
1280  pos+=16;
1281  }
1282 }
1283 
1284 
1285 
1286 void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l,
1287  GWEN_BUFFER *mbuf,
1288  unsigned int insert) {
1289  unsigned int i;
1290  unsigned int j;
1291  unsigned int pos;
1292  unsigned k;
1293  char numbuf[32];
1294 
1295  pos=0;
1296  for (k=0; k<insert; k++)
1297  GWEN_Buffer_AppendByte(mbuf, ' ');
1298  GWEN_Buffer_AppendString(mbuf,"String size is ");
1299  snprintf(numbuf, sizeof(numbuf), "%d", l);
1300  GWEN_Buffer_AppendString(mbuf, numbuf);
1301  GWEN_Buffer_AppendByte(mbuf, '\n');
1302  while(pos<l) {
1303  for (k=0; k<insert; k++)
1304  GWEN_Buffer_AppendByte(mbuf, ' ');
1305  snprintf(numbuf, sizeof(numbuf),"%04x: ",pos);
1306  GWEN_Buffer_AppendString(mbuf, numbuf);
1307  j=pos+16;
1308  if (j>=l)
1309  j=l;
1310 
1311  /* show hex dump */
1312  for (i=pos; i<j; i++) {
1313  snprintf(numbuf, sizeof(numbuf),"%02x ", (unsigned char)s[i]);
1314  GWEN_Buffer_AppendString(mbuf, numbuf);
1315  }
1316  if (j-pos<16)
1317  for (i=0; i<16-(j-pos); i++)
1318  GWEN_Buffer_AppendString(mbuf, " ");
1319  /* show text */
1320  for (i=pos; i<j; i++) {
1321  if (s[i]<32)
1322  GWEN_Buffer_AppendByte(mbuf, '.');
1323  else
1324  GWEN_Buffer_AppendByte(mbuf, s[i]);
1325  }
1326  GWEN_Buffer_AppendByte(mbuf, '\n');
1327  pos+=16;
1328  }
1329 }
1330 
1331 
1332 
1333 
1334 
1335 
1336 
1337 int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf) {
1338  while(*src) {
1339  unsigned char x;
1340 
1341  x=(unsigned char)*src;
1342  if (!(
1343  (x>='A' && x<='Z') ||
1344  (x>='a' && x<='z') ||
1345  (x>='0' && x<='9'))) {
1346  unsigned char c;
1347 
1348  GWEN_Buffer_AppendByte(buf, '%');
1349  c=(((unsigned char)(*src))>>4)&0xf;
1350  if (c>9)
1351  c+=7;
1352  c+='0';
1353  GWEN_Buffer_AppendByte(buf, c);
1354  c=((unsigned char)(*src))&0xf;
1355  if (c>9)
1356  c+=7;
1357  c+='0';
1358  GWEN_Buffer_AppendByte(buf, c);
1359  }
1360  else
1361  GWEN_Buffer_AppendByte(buf, *src);
1362 
1363  src++;
1364  } /* while */
1365 
1366  return 0;
1367 }
1368 
1369 
1370 
1371 int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf) {
1372  while(*src) {
1373  unsigned char x;
1374 
1375  x=(unsigned char)*src;
1376  if (
1377  (x>='A' && x<='Z') ||
1378  (x>='a' && x<='z') ||
1379  (x>='0' && x<='9')) {
1380  GWEN_Buffer_AppendByte(buf, *src);
1381  }
1382  else {
1383  if (*src=='%') {
1384  unsigned char d1, d2;
1385  unsigned char c;
1386 
1387  /* skip '%' */
1388  src++;
1389  if (!(*src) || !isxdigit((int)*src)) {
1390  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
1391  return 0;
1392  }
1393  /* read first digit */
1394  d1=(unsigned char)(toupper(*src));
1395 
1396  /* get second digit */
1397  src++;
1398  if (!(*src) || !isxdigit((int)*src)) {
1399  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
1400  return 0;
1401  }
1402  d2=(unsigned char)(toupper(*src));
1403  /* compute character */
1404  d1-='0';
1405  if (d1>9)
1406  d1-=7;
1407  c=(d1<<4)&0xf0;
1408  d2-='0';
1409  if (d2>9)
1410  d2-=7;
1411  c+=(d2&0xf);
1412  /* store character */
1413  GWEN_Buffer_AppendByte(buf, (char)c);
1414  }
1415  else {
1416  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
1417  "characters in escaped string (\"%s\")",
1418  src);
1419  return -1;
1420  }
1421  }
1422  src++;
1423  } /* while */
1424 
1425  return 0;
1426 }
1427 
1428 
1429 
1430 int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf) {
1431  while(*src) {
1432  unsigned char x;
1433 
1434  x=(unsigned char)*src;
1435  if (!(
1436  (x>='A' && x<='Z') ||
1437  (x>='a' && x<='z') ||
1438  (x>='0' && x<='9') ||
1439  x==' ' ||
1440  x=='.' ||
1441  x==',' ||
1442  x=='.' ||
1443  x=='_' ||
1444  x=='-' ||
1445  x=='*' ||
1446  x=='?'
1447  )) {
1448  unsigned char c;
1449 
1450  GWEN_Buffer_AppendByte(buf, '%');
1451  c=(((unsigned char)(*src))>>4)&0xf;
1452  if (c>9)
1453  c+=7;
1454  c+='0';
1455  GWEN_Buffer_AppendByte(buf, c);
1456  c=((unsigned char)(*src))&0xf;
1457  if (c>9)
1458  c+=7;
1459  c+='0';
1460  GWEN_Buffer_AppendByte(buf, c);
1461  }
1462  else
1463  GWEN_Buffer_AppendByte(buf, *src);
1464 
1465  src++;
1466  } /* while */
1467 
1468  return 0;
1469 }
1470 
1471 
1472 
1474  while(*src) {
1475  const char *srcBak;
1476  int charHandled;
1477 
1478  srcBak=src;
1479  charHandled=0;
1480  if (*src=='%') {
1481  if (strlen(src)>2) {
1482  unsigned char d1, d2;
1483  unsigned char c;
1484 
1485  if (isxdigit((int)src[1]) && isxdigit((int)src[2])) {
1486  /* skip '%' */
1487  src++;
1488  /* read first digit */
1489  d1=(unsigned char)(toupper(*src));
1490 
1491  /* get second digit */
1492  src++;
1493  d2=(unsigned char)(toupper(*src));
1494  /* compute character */
1495  d1-='0';
1496  if (d1>9)
1497  d1-=7;
1498  c=(d1<<4)&0xf0;
1499  d2-='0';
1500  if (d2>9)
1501  d2-=7;
1502  c+=(d2&0xf);
1503  /* store character */
1504  GWEN_Buffer_AppendByte(buf, (char)c);
1505  charHandled=1;
1506  }
1507  }
1508  }
1509  if (!charHandled)
1510  GWEN_Buffer_AppendByte(buf, *src);
1511  src++;
1512  } /* while */
1513 
1514  return 0;
1515 }
1516 
1517 
1518 
1520  while(GWEN_Buffer_GetBytesLeft(src)) {
1521  int z;
1522  unsigned char x;
1523 
1524  z=GWEN_Buffer_ReadByte(src);
1525  if (z==-1) {
1526  DBG_INFO(GWEN_LOGDOMAIN, "here");
1527  return -1;
1528  }
1529  x=(unsigned char)z;
1530  if (!(
1531  (x>='A' && x<='Z') ||
1532  (x>='a' && x<='z') ||
1533  (x>='0' && x<='9') ||
1534  x==' ' ||
1535  x=='.' ||
1536  x==',' ||
1537  x=='.' ||
1538  x=='*' ||
1539  x=='?'
1540  )) {
1541  unsigned char c;
1542 
1543  GWEN_Buffer_AppendByte(buf, '%');
1544  c=(((unsigned char)x)>>4)&0xf;
1545  if (c>9)
1546  c+=7;
1547  c+='0';
1548  GWEN_Buffer_AppendByte(buf, c);
1549  c=((unsigned char)x)&0xf;
1550  if (c>9)
1551  c+=7;
1552  c+='0';
1553  GWEN_Buffer_AppendByte(buf, c);
1554  }
1555  else
1556  GWEN_Buffer_AppendByte(buf, x);
1557  } /* while */
1558 
1559  return 0;
1560 }
1561 
1562 
1563 
1564 void GWEN_Text_LogString(const char *s, unsigned int l,
1565  const char *logDomain,
1566  GWEN_LOGGER_LEVEL lv){
1567  GWEN_BUFFER *mbuf;
1568 
1569  mbuf=GWEN_Buffer_new(0, ((l*16)<1024)?1024:l*16, 0, 1);
1570  GWEN_Text_DumpString2Buffer(s, l, mbuf, 0);
1571  GWEN_Logger_Log(logDomain, lv, GWEN_Buffer_GetStart(mbuf));
1572  GWEN_Buffer_free(mbuf);
1573 }
1574 
1575 
1576 
1578  const char *p;
1579  char *dst;
1580  unsigned int size;
1581  unsigned int i;
1582  int lastWasBlank;
1583  char *lastBlankPos;
1584 
1585  dst=GWEN_Buffer_GetStart(buf);
1586  p=dst;
1587  size=GWEN_Buffer_GetUsedBytes(buf);
1588  lastWasBlank=0;
1589  lastBlankPos=0;
1590 
1591  for (i=0; i<size; i++) {
1592  /* remember next loop whether this char was a blank */
1593  if (isspace((int)*p)) {
1594  if (!lastWasBlank) {
1595  /* store only one blank */
1596  lastWasBlank=1;
1597  lastBlankPos=dst;
1598  *(dst++)=*p;
1599  }
1600  }
1601  else {
1602  lastWasBlank=0;
1603  lastBlankPos=0;
1604  *(dst++)=*p;
1605  }
1606  p++;
1607  }
1608 
1609  /* remove trailing blanks */
1610  if (lastBlankPos!=0)
1611  dst=lastBlankPos;
1612 
1613  size=dst-GWEN_Buffer_GetStart(buf);
1614  GWEN_Buffer_Crop(buf, 0, size);
1615 }
1616 
1617 
1618 
1620  char numbuf[128];
1621  int rv;
1622 #ifdef HAVE_SETLOCALE
1623  const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1624  char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1625  setlocale(LC_NUMERIC,"C");
1626 #endif
1627 
1628  rv=snprintf(numbuf, sizeof(numbuf), "%f", num);
1629 
1630 #ifdef HAVE_SETLOCALE
1631  setlocale(LC_NUMERIC, currentLocale);
1632  free(currentLocale);
1633 #endif
1634 
1635  if (rv<1 || rv>=sizeof(numbuf))
1636  return -1;
1637  GWEN_Buffer_AppendString(buf, numbuf);
1638  return 0;
1639 }
1640 
1641 
1642 
1643 int GWEN_Text_StringToDouble(const char *s, double *num){
1644  int rv;
1645 #ifdef HAVE_SETLOCALE
1646  const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1647  char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1648  setlocale(LC_NUMERIC,"C");
1649 #endif
1650 
1651  rv=sscanf(s, "%lf", num);
1652 
1653 #ifdef HAVE_SETLOCALE
1654  setlocale(LC_NUMERIC, currentLocale);
1655  free(currentLocale);
1656 #endif
1657 
1658  if (rv!=1)
1659  return -1;
1660  return 0;
1661 }
1662 
1663 
1664 
1665 double GWEN_Text__CheckSimilarity(const char *s1, const char *s2, int ign){
1666  int nboth;
1667  int nmatch;
1668  double pc;
1669 
1670  nboth=strlen(s1)+strlen(s2);
1671  nmatch=0;
1672  if (ign) {
1673  while(*s1 && *s2) {
1674  const char *t;
1675  int lmatch;
1676 
1677  /* find next equal in s2 */
1678  t=s2;
1679  lmatch=0;
1680  while(*t) {
1681  if (toupper(*s1)==toupper(*t)) {
1682  lmatch=2;
1683  break;
1684  }
1685  if (isalnum((int)*s1) && isalnum((int)*t)) {
1686  lmatch=1;
1687  break;
1688  }
1689  t++;
1690  } /* while */
1691 
1692  if (lmatch) {
1693  nmatch+=lmatch;
1694  s2=t+1;
1695  }
1696 
1697  s1++;
1698  } /* while */
1699  }
1700  else {
1701  while(*s1 && *s2) {
1702  const char *t;
1703  int lmatch;
1704 
1705  /* find next equal in s2 */
1706  t=s2;
1707  lmatch=0;
1708  while(*t) {
1709  if (*s1==*t) {
1710  lmatch=2;
1711  break;
1712  }
1713  if (toupper(*s1)==toupper(*t)) {
1714  lmatch=1;
1715  break;
1716  }
1717  if (isalnum((int)*s1) && isalnum((int)*t)) {
1718  lmatch=1;
1719  break;
1720  }
1721  t++;
1722  } /* while */
1723 
1724  if (lmatch) {
1725  nmatch+=lmatch;
1726  s2=t+1;
1727  }
1728 
1729  s1++;
1730  } /* while */
1731  }
1732 
1733  pc=(nmatch*100)/nboth;
1734  return pc;
1735 }
1736 
1737 
1738 
1739 double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign){
1740  double pc1, pc2;
1741 
1742  pc1=GWEN_Text__CheckSimilarity(s1, s2, ign);
1743  pc2=GWEN_Text__CheckSimilarity(s2, s1, ign);
1744  if (pc2>pc1)
1745  return pc2;
1746  return pc1;
1747 }
1748 
1749 
1750 
1751 int GWEN_Text_CountUtf8Chars(const char *s, int len) {
1752  int count;
1753  int handled;
1754 
1755  if (len==0)
1756  len=strlen(s);
1757  count=0;
1758  handled=0;
1759  while(handled<len) {
1760  unsigned char c;
1761  int i;
1762 
1763  c=(unsigned char)*s;
1764  if ((c & 0xfe)==0xfc)
1765  i=5;
1766  else if ((c & 0xfc)==0xf8)
1767  i=4;
1768  else if ((c & 0xf8)==0xf0)
1769  i=3;
1770  else if ((c & 0xf0)==0xe0)
1771  i=2;
1772  else if ((c & 0xe0)==0xc0)
1773  i=1;
1774  else if (c & 0x80) {
1775  DBG_ERROR(GWEN_LOGDOMAIN, "Invalid UTF8 character at pos %d", handled);
1776  return -1;
1777  }
1778  else
1779  i=0;
1780  if (handled+i+1>len) {
1782  "Incomplete UTF8 sequence at pos %d", handled);
1783  return -1;
1784  }
1785  s++;
1786  if (i) {
1787  int j;
1788 
1789  for (j=0; j<i; j++) {
1790  if ((((unsigned char)*s) & 0xc0)!=0xc0) {
1792  "Invalid UTF8 sequence at pos %d (rel %d of %d)",
1793  handled, j, i);
1794  }
1795  s++;
1796  }
1797  }
1798  handled+=i+1;
1799  count++;
1800  } /* while */
1801 
1802  return count;
1803 }
1804 
1805 
1806 
1807 int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf) {
1808  char *pdst;
1809  uint32_t roomLeft;
1810  uint32_t bytesAdded;
1811 
1812 #define GWEN_TEXT__APPENDCHAR(chr) \
1813  if (roomLeft<2) { \
1814  if (bytesAdded) { \
1815  GWEN_Buffer_IncrementPos(buf, bytesAdded); \
1816  GWEN_Buffer_AdjustUsedBytes(buf); \
1817  } \
1818  GWEN_Buffer_AllocRoom(buf, 2); \
1819  pdst=GWEN_Buffer_GetPosPointer(buf); \
1820  roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf); \
1821  bytesAdded=0; \
1822  } \
1823  *(pdst++)=(unsigned char)chr; \
1824  *pdst=0; \
1825  bytesAdded++; \
1826  roomLeft--
1827 
1828  pdst=GWEN_Buffer_GetPosPointer(buf);
1829  roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf);
1830  bytesAdded=0;
1831 
1832  while(*src) {
1833  unsigned char x;
1834  int match;
1835 
1836  match=0;
1837  x=(unsigned char)*src;
1838  if (x=='&') {
1839  if (src[1]=='#') {
1840  unsigned char num=0;
1841 
1842  src++;
1843  src++;
1844  while(*src && isdigit((int)*src)) {
1845  num*=10;
1846  num+=(*src)-'0';
1847  src++;
1848  }
1849  src++;
1850  GWEN_TEXT__APPENDCHAR(num);
1851  }
1852  else if (strncmp(src+1, "szlig;", 6)==0) {
1853  GWEN_TEXT__APPENDCHAR(0xc3);
1854  GWEN_TEXT__APPENDCHAR(0x9f);
1855  src+=7;
1856  match=1;
1857  }
1858  else if (strncmp(src+1, "Auml;", 5)==0) {
1859  GWEN_TEXT__APPENDCHAR(0xc3);
1860  GWEN_TEXT__APPENDCHAR(0x84);
1861  src+=6;
1862  match=1;
1863  }
1864  else if (strncmp(src+1, "Ouml;", 5)==0) {
1865  GWEN_TEXT__APPENDCHAR(0xc3);
1866  GWEN_TEXT__APPENDCHAR(0x96);
1867  src+=6;
1868  match=1;
1869  }
1870  else if (strncmp(src+1, "Uuml;", 5)==0) {
1871  GWEN_TEXT__APPENDCHAR(0xc3);
1872  GWEN_TEXT__APPENDCHAR(0x9c);
1873  src+=6;
1874  match=1;
1875  }
1876  else if (strncmp(src+1, "auml;", 5)==0) {
1877  GWEN_TEXT__APPENDCHAR(0xc3);
1878  GWEN_TEXT__APPENDCHAR(0xa4);
1879  src+=6;
1880  match=1;
1881  }
1882  else if (strncmp(src+1, "ouml;", 5)==0) {
1883  GWEN_TEXT__APPENDCHAR(0xc3);
1884  GWEN_TEXT__APPENDCHAR(0xb6);
1885  src+=6;
1886  match=1;
1887  }
1888  else if (strncmp(src+1, "uuml;", 5)==0) {
1889  GWEN_TEXT__APPENDCHAR(0xc3);
1890  GWEN_TEXT__APPENDCHAR(0xbc);
1891  src+=6;
1892  match=1;
1893  }
1894  else {
1895  const GWEN_TEXT_ESCAPE_ENTRY *e;
1897  while(e->replace) {
1898  int l;
1899 
1900  l=strlen(e->replace);
1901  if (strncasecmp(src, e->replace, l)==0) {
1903  //GWEN_Buffer_AppendByte(buf, e->character);
1904  src+=l;
1905  match=1;
1906  break;
1907  }
1908  e++;
1909  } /* while */
1910  }
1911  }
1912  if (!match) {
1913  GWEN_TEXT__APPENDCHAR(*(src++));
1914  }
1915  } /* while */
1916 
1917  if (bytesAdded) {
1918  GWEN_Buffer_IncrementPos(buf, bytesAdded);
1920  }
1921 
1922  return 0;
1923 #undef GWEN_TEXT__APPENDCHAR
1924 }
1925 
1926 
1927 
1928 int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf) {
1929  while(*src) {
1930  unsigned char x;
1931  const GWEN_TEXT_ESCAPE_ENTRY *e;
1932  int match;
1933 
1934  match=0;
1935  x=(unsigned char)*src;
1937  while(e->replace) {
1938  if (x==e->character) {
1940  match=1;
1941  break;
1942  }
1943  e++;
1944  } /* while */
1945 
1946  if (!match) {
1947  if (0 && x>127) { /* disabled */
1948  char numbuf[32];
1949 
1950  snprintf(numbuf, sizeof(numbuf), "&#%d;", x);
1951  GWEN_Buffer_AppendString(buf, numbuf);
1952  }
1953  else
1954  GWEN_Buffer_AppendByte(buf, *src);
1955  }
1956  src++;
1957  } /* while */
1958 
1959  return 0;
1960 }
1961 
1962 
1963 
1964 int GWEN_Text_ConvertCharset(const char *fromCharset,
1965  const char *toCharset,
1966  const char *text, int len,
1967  GWEN_BUFFER *tbuf) {
1968  if (len) {
1969  if (fromCharset && *fromCharset && toCharset && *toCharset &&
1970  strcasecmp(fromCharset, toCharset)!=0) {
1971 #ifndef HAVE_ICONV
1973  "iconv not available, can not convert from \"%s\" to \"%s\"",
1974  fromCharset, toCharset);
1975 #else
1976  iconv_t ic;
1977 
1978  ic=iconv_open(toCharset, fromCharset);
1979  if (ic==((iconv_t)-1)) {
1980  DBG_ERROR(GWEN_LOGDOMAIN, "Charset \"%s\" or \"%s\" not available",
1981  fromCharset, toCharset);
1982  }
1983  else {
1984  char *outbuf;
1985  char *pOutbuf;
1986  /* Some systems have iconv in libc, some have it in libiconv
1987  (OSF/1 and those with the standalone portable GNU libiconv
1988  installed). Check which one is available. The define
1989  ICONV_CONST will be "" or "const" accordingly. */
1990  ICONV_CONST char *pInbuf;
1991  size_t inLeft;
1992  size_t outLeft;
1993  size_t done;
1994  size_t space;
1995 
1996  /* convert */
1997  pInbuf=(char*)text;
1998 
1999  outLeft=len*2;
2000  space=outLeft;
2001  outbuf=(char*)malloc(outLeft);
2002  assert(outbuf);
2003 
2004  inLeft=len;
2005  pInbuf=(char*)text;
2006  pOutbuf=outbuf;
2007  done=iconv(ic, &pInbuf, &inLeft, &pOutbuf, &outLeft);
2008  if (done==(size_t)-1) {
2009  DBG_ERROR(GWEN_LOGDOMAIN, "Error in conversion: %s (%d)",
2010  strerror(errno), errno);
2011  free(outbuf);
2012  iconv_close(ic);
2013  return GWEN_ERROR_GENERIC;
2014  }
2015 
2016  GWEN_Buffer_AppendBytes(tbuf, outbuf, space-outLeft);
2017  free(outbuf);
2018  DBG_DEBUG(GWEN_LOGDOMAIN, "Conversion done.");
2019  iconv_close(ic);
2020  return 0;
2021  }
2022 #endif
2023  }
2024 
2025  GWEN_Buffer_AppendBytes(tbuf, text, len);
2026  }
2027  return 0;
2028 }
2029