remove submodule

This commit is contained in:
Nikolaj Schlej 2016-06-26 10:14:44 +02:00
parent a2484fdb5f
commit 9bd71281b9
22 changed files with 17798 additions and 1 deletions

@ -1 +0,0 @@
Subproject commit 208b1f2a4dfc96b806ed499bd1909e87ec15981d

28
bstrlib/LICENSE Normal file
View File

@ -0,0 +1,28 @@
Copyright (c) 2014, Paul Hsieh
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of bstrlib nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

34
bstrlib/README.md Normal file
View File

@ -0,0 +1,34 @@
The Better String Library
The Better String Library is an abstraction of a string data type which is
superior to the C library char buffer string type, or C++'s std::string.
Among the features achieved are:
- Substantial mitigation of buffer overflow/overrun problems and other
failures that result from erroneous usage of the common C string
library functions
- Significantly simplified string manipulation
- High performance interoperability with other source/libraries which
expect '\0' terminated char buffers
- Improved overall performance of common string operations
- Functional equivalency with other more modern languages
The library is totally stand alone, portable (known to work with gcc/g++,
MSVC++, Intel C++, WATCOM C/C++, Turbo C, Borland C++, IBM's native CC
compiler on Windows, Linux and Mac OS X), high performance, easy to use and
is not part of some other collection of data structures. Even the file I/O
functions are totally abstracted (so that other stream-like mechanisms, like
sockets, can be used.) Nevertheless, it is adequate as a complete
replacement of the C string library for string manipulation in any C program.
The library includes a robust C++ wrapper that uses overloaded operators,
rich constructors, exceptions, stream I/O and STL to make the CBString
struct a natural and powerful string abstraction with more functionality and
higher performance than std::string.
Bstrlib is stable, well tested and suitable for any software production
environment.

82
bstrlib/bsafe.c Normal file
View File

@ -0,0 +1,82 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license. Refer to the accompanying documentation for details on usage and
* license.
*/
/*
* bsafe.c
*
* This is an optional module that can be used to help enforce a safety
* standard based on pervasive usage of bstrlib. This file is not necessarily
* portable, however, it has been tested to work correctly with Intel's C/C++
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
*/
#include <stdio.h>
#include <stdlib.h>
#include "bsafe.h"
static int bsafeShouldExit = 1;
char * strcpy (char *dst, const char *src);
char * strcat (char *dst, const char *src);
char * strcpy (char *dst, const char *src) {
(void) dst;
(void) src;
fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * strcat (char *dst, const char *src) {
(void) dst;
(void) src;
fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
char * (gets) (char * buf) {
(void) buf;
fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
#endif
char * (strncpy) (char *dst, const char *src, size_t n) {
(void) dst;
(void) src;
(void) n;
fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * (strncat) (char *dst, const char *src, size_t n) {
(void) dst;
(void) src;
(void) n;
fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * (strtok) (char *s1, const char *s2) {
(void) s1;
(void) s2;
fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * (strdup) (const char *s) {
(void) s;
fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}

43
bstrlib/bsafe.h Normal file
View File

@ -0,0 +1,43 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2004, and is covered by the BSD open source
* license. Refer to the accompanying documentation for details on usage and
* license.
*/
/*
* bsafe.h
*
* This is an optional module that can be used to help enforce a safety
* standard based on pervasive usage of bstrlib. This file is not necessarily
* portable, however, it has been tested to work correctly with Intel's C/C++
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
*/
#ifndef BSTRLIB_BSAFE_INCLUDE
#define BSTRLIB_BSAFE_INCLUDE
#ifdef __cplusplus
extern "C" {
#endif
#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
/* This is caught in the linker, so its not necessary for gcc. */
extern char * (gets) (char * buf);
#endif
extern char * (strncpy) (char *dst, const char *src, size_t n);
extern char * (strncat) (char *dst, const char *src, size_t n);
extern char * (strtok) (char *s1, const char *s2);
extern char * (strdup) (const char *s);
#undef strcpy
#undef strcat
#define strcpy(a,b) bsafe_strcpy(a,b)
#define strcat(a,b) bsafe_strcat(a,b)
#ifdef __cplusplus
}
#endif
#endif

3689
bstrlib/bstest.c Normal file

File diff suppressed because it is too large Load Diff

1161
bstrlib/bstraux.c Normal file

File diff suppressed because it is too large Load Diff

115
bstrlib/bstraux.h Normal file
View File

@ -0,0 +1,115 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* bstraux.h
*
* This file is not a necessary part of the core bstring library itself, but
* is just an auxilliary module which includes miscellaneous or trivial
* functions.
*/
#ifndef BSTRAUX_INCLUDE
#define BSTRAUX_INCLUDE
#include <time.h>
#include "bstrlib.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Safety mechanisms */
#define bstrDeclare(b) bstring (b) = NULL;
#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }}
/* Backward compatibilty with previous versions of Bstrlib */
#if !defined(BSTRLIB_REDUCE_NAMESPACE_POLLUTION)
#define bAssign(a,b) ((bassign)((a), (b)))
#define bSubs(b,pos,len,a,c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c)))
#define bStrchr(b,c) ((bstrchr)((b), (c)))
#define bStrchrFast(b,c) ((bstrchr)((b), (c)))
#define bCatCstr(b,s) ((bcatcstr)((b), (s)))
#define bCatBlk(b,s,len) ((bcatblk)((b),(s),(len)))
#define bCatStatic(b,s) bcatStatic(b,s)
#define bTrunc(b,n) ((btrunc)((b), (n)))
#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos)))
#define bUppercase(b) ((btoupper)(b))
#define bLowercase(b) ((btolower)(b))
#define bCaselessCmp(a,b) ((bstricmp)((a), (b)))
#define bCaselessNCmp(a,b,n) ((bstrnicmp)((a), (b), (n)))
#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL))
#define bUuDecode(b) (bUuDecodeEx ((b), NULL))
#endif
/* Unusual functions */
extern struct bStream * bsFromBstr (const_bstring b);
extern bstring bTail (bstring b, int n);
extern bstring bHead (bstring b, int n);
extern int bSetCstrChar (bstring a, int pos, char c);
extern int bSetChar (bstring b, int pos, char c);
extern int bFill (bstring a, char c, int len);
extern int bReplicate (bstring b, int n);
extern int bReverse (bstring b);
extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill);
extern bstring bStrfTime (const char * fmt, const struct tm * timeptr);
#define bAscTime(t) (bStrfTime ("%c\n", (t)))
#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL)
/* Spacing formatting */
extern int bJustifyLeft (bstring b, int space);
extern int bJustifyRight (bstring b, int width, int space);
extern int bJustifyMargin (bstring b, int width, int space);
extern int bJustifyCenter (bstring b, int width, int space);
/* Esoteric standards specific functions */
extern char * bStr2NetStr (const_bstring b);
extern bstring bNetStr2Bstr (const char * buf);
extern bstring bBase64Encode (const_bstring b);
extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError);
extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines);
extern bstring bUuDecodeEx (const_bstring src, int * badlines);
extern bstring bUuEncode (const_bstring src);
extern bstring bYEncode (const_bstring src);
extern bstring bYDecode (const_bstring src);
extern int bSGMLEncode (bstring b);
/* Writable stream */
typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm);
struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm);
int bwsWriteBstr (struct bwriteStream * stream, const_bstring b);
int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len);
int bwsWriteFlush (struct bwriteStream * stream);
int bwsIsEOF (const struct bwriteStream * stream);
int bwsBuffLength (struct bwriteStream * stream, int sz);
void * bwsClose (struct bwriteStream * stream);
/* Security functions */
#define bSecureDestroy(b) { \
bstring bstr__tmp = (b); \
if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \
(void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \
bdestroy (bstr__tmp); \
} \
}
#define bSecureWriteProtect(t) { \
if ((t).mlen >= 0) { \
if ((t).mlen > (t).slen)) { \
(void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \
} \
(t).mlen = -1; \
} \
}
extern bstring bSecureInput (int maxlen, int termchar,
bNgetc vgetchar, void * vgcCtx);
#ifdef __cplusplus
}
#endif
#endif

3153
bstrlib/bstrlib.c Normal file

File diff suppressed because it is too large Load Diff

316
bstrlib/bstrlib.h Normal file
View File

@ -0,0 +1,316 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* bstrlib.h
*
* This file is the interface for the core bstring functions.
*/
#ifndef BSTRLIB_INCLUDE
#define BSTRLIB_INCLUDE
#ifdef __cplusplus
extern "C" {
#endif
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
# if defined (__TURBOC__) && !defined (__BORLANDC__)
# define BSTRLIB_NOVSNP
# endif
#endif
#define BSTR_ERR (-1)
#define BSTR_OK (0)
#define BSTR_BS_BUFF_LENGTH_GET (0)
typedef struct tagbstring * bstring;
typedef const struct tagbstring * const_bstring;
/* Copy functions */
#define cstr2bstr bfromcstr
extern bstring bfromcstr (const char * str);
extern bstring bfromcstralloc (int mlen, const char * str);
extern bstring bfromcstrrangealloc (int minl, int maxl, const char* str);
extern bstring blk2bstr (const void * blk, int len);
extern char * bstr2cstr (const_bstring s, char z);
extern int bcstrfree (char * s);
extern bstring bstrcpy (const_bstring b1);
extern int bassign (bstring a, const_bstring b);
extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
extern int bassigncstr (bstring a, const char * str);
extern int bassignblk (bstring a, const void * s, int len);
/* Destroy function */
extern int bdestroy (bstring b);
/* Space allocation hinting functions */
extern int balloc (bstring s, int len);
extern int ballocmin (bstring b, int len);
/* Substring extraction */
extern bstring bmidstr (const_bstring b, int left, int len);
/* Various standard manipulations */
extern int bconcat (bstring b0, const_bstring b1);
extern int bconchar (bstring b0, char c);
extern int bcatcstr (bstring b, const char * s);
extern int bcatblk (bstring b, const void * s, int len);
extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
extern int binsertblk (bstring s1, int pos, const void * s2, int len, unsigned char fill);
extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
extern int bdelete (bstring s1, int pos, int len);
extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
extern int btrunc (bstring b, int n);
/* Scan/search functions */
extern int bstricmp (const_bstring b0, const_bstring b1);
extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
extern int biseqcaseless (const_bstring b0, const_bstring b1);
extern int biseqcaselessblk (const_bstring b, const void * blk, int len);
extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
extern int biseq (const_bstring b0, const_bstring b1);
extern int biseqblk (const_bstring b, const void * blk, int len);
extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
extern int biseqcstr (const_bstring b, const char * s);
extern int biseqcstrcaseless (const_bstring b, const char * s);
extern int bstrcmp (const_bstring b0, const_bstring b1);
extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
extern int binstr (const_bstring s1, int pos, const_bstring s2);
extern int binstrr (const_bstring s1, int pos, const_bstring s2);
extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
extern int bstrchrp (const_bstring b, int c, int pos);
extern int bstrrchrp (const_bstring b, int c, int pos);
#define bstrchr(b,c) bstrchrp ((b), (c), 0)
#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
extern int binchr (const_bstring b0, int pos, const_bstring b1);
extern int binchrr (const_bstring b0, int pos, const_bstring b1);
extern int bninchr (const_bstring b0, int pos, const_bstring b1);
extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
/* List of string container functions */
struct bstrList {
int qty, mlen;
bstring * entry;
};
extern struct bstrList * bstrListCreate (void);
extern int bstrListDestroy (struct bstrList * sl);
extern int bstrListAlloc (struct bstrList * sl, int msz);
extern int bstrListAllocMin (struct bstrList * sl, int msz);
/* String split and join functions */
extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
extern bstring bjoinblk (const struct bstrList * bl, const void * s, int len);
extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
int (* cb) (void * parm, int ofs, int len), void * parm);
extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
int (* cb) (void * parm, int ofs, int len), void * parm);
extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
int (* cb) (void * parm, int ofs, int len), void * parm);
/* Miscellaneous functions */
extern int bpattern (bstring b, int len);
extern int btoupper (bstring b);
extern int btolower (bstring b);
extern int bltrimws (bstring b);
extern int brtrimws (bstring b);
extern int btrimws (bstring b);
#if !defined (BSTRLIB_NOVSNP)
extern bstring bformat (const char * fmt, ...);
extern int bformata (bstring b, const char * fmt, ...);
extern int bassignformat (bstring b, const char * fmt, ...);
extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
#define bvformata(ret, b, fmt, lastarg) { \
bstring bstrtmp_b = (b); \
const char * bstrtmp_fmt = (fmt); \
int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
for (;;) { \
va_list bstrtmp_arglist; \
va_start (bstrtmp_arglist, lastarg); \
bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
va_end (bstrtmp_arglist); \
if (bstrtmp_r >= 0) { /* Everything went ok */ \
bstrtmp_r = BSTR_OK; \
break; \
} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
bstrtmp_r = BSTR_ERR; \
break; \
} \
bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
} \
ret = bstrtmp_r; \
}
#endif
typedef int (*bNgetc) (void *parm);
typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
/* Input functions */
extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
extern bstring bread (bNread readPtr, void * parm);
extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
extern int breada (bstring b, bNread readPtr, void * parm);
/* Stream functions */
extern struct bStream * bsopen (bNread readPtr, void * parm);
extern void * bsclose (struct bStream * s);
extern int bsbufflength (struct bStream * s, int sz);
extern int bsreadln (bstring b, struct bStream * s, char terminator);
extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
extern int bsread (bstring b, struct bStream * s, int n);
extern int bsreadlna (bstring b, struct bStream * s, char terminator);
extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
extern int bsreada (bstring b, struct bStream * s, int n);
extern int bsunread (struct bStream * s, const_bstring b);
extern int bspeek (bstring r, const struct bStream * s);
extern int bssplitscb (struct bStream * s, const_bstring splitStr,
int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
extern int bseof (const struct bStream * s);
struct tagbstring {
int mlen;
int slen;
unsigned char * data;
};
/* Accessor macros */
#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
#define blength(b) (blengthe ((b), 0))
#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
#define bdatae(b, e) (bdataofse (b, 0, e))
#define bdata(b) (bdataofs (b, 0))
#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
#define bchar(b, p) bchare ((b), (p), '\0')
/* Static constant string initialization macro */
#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
#if defined(_MSC_VER)
# define bsStatic(q) bsStaticMlen(q,-32)
#endif
#ifndef bsStatic
# define bsStatic(q) bsStaticMlen(q,-__LINE__)
#endif
/* Static constant block parameter pair */
#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
#define bcatStatic(b,s) ((bcatblk)((b), bsStaticBlkParms(s)))
#define bfromStatic(s) ((blk2bstr)(bsStaticBlkParms(s)))
#define bassignStatic(b,s) ((bassignblk)((b), bsStaticBlkParms(s)))
#define binsertStatic(b,p,s,f) ((binsertblk)((b), (p), bsStaticBlkParms(s), (f)))
#define bjoinStatic(b,s) ((bjoinblk)((b), bsStaticBlkParms(s)))
#define biseqStatic(b,s) ((biseqblk)((b), bsStaticBlkParms(s)))
#define bisstemeqStatic(b,s) ((bisstemeqblk)((b), bsStaticBlkParms(s)))
#define biseqcaselessStatic(b,s) ((biseqcaselessblk)((b), bsStaticBlkParms(s)))
#define bisstemeqcaselessStatic(b,s) ((bisstemeqcaselessblk)((b), bsStaticBlkParms(s)))
/* Reference building macros */
#define cstr2tbstr btfromcstr
#define btfromcstr(t,s) { \
(t).data = (unsigned char *) (s); \
(t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
(t).mlen = -1; \
}
#define blk2tbstr(t,s,l) { \
(t).data = (unsigned char *) (s); \
(t).slen = l; \
(t).mlen = -1; \
}
#define btfromblk(t,s,l) blk2tbstr(t,s,l)
#define bmid2tbstr(t,b,p,l) { \
const_bstring bstrtmp_s = (b); \
if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
int bstrtmp_left = (p); \
int bstrtmp_len = (l); \
if (bstrtmp_left < 0) { \
bstrtmp_len += bstrtmp_left; \
bstrtmp_left = 0; \
} \
if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
if (bstrtmp_len <= 0) { \
(t).data = (unsigned char *)""; \
(t).slen = 0; \
} else { \
(t).data = bstrtmp_s->data + bstrtmp_left; \
(t).slen = bstrtmp_len; \
} \
} else { \
(t).data = (unsigned char *)""; \
(t).slen = 0; \
} \
(t).mlen = -__LINE__; \
}
#define btfromblkltrimws(t,s,l) { \
int bstrtmp_idx = 0, bstrtmp_len = (l); \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
} \
} \
(t).data = bstrtmp_s + bstrtmp_idx; \
(t).slen = bstrtmp_len - bstrtmp_idx; \
(t).mlen = -__LINE__; \
}
#define btfromblkrtrimws(t,s,l) { \
int bstrtmp_len = (l) - 1; \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_len >= 0; bstrtmp_len--) { \
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
} \
} \
(t).data = bstrtmp_s; \
(t).slen = bstrtmp_len + 1; \
(t).mlen = -__LINE__; \
}
#define btfromblktrimws(t,s,l) { \
int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
} \
for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
} \
} \
(t).data = bstrtmp_s + bstrtmp_idx; \
(t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
(t).mlen = -__LINE__; \
}
/* Write protection macros */
#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
#define biswriteprotected(t) ((t).mlen <= 0)
#ifdef __cplusplus
}
#endif
#endif

3512
bstrlib/bstrlib.txt Normal file

File diff suppressed because it is too large Load Diff

1721
bstrlib/bstrwrap.cpp Normal file

File diff suppressed because it is too large Load Diff

446
bstrlib/bstrwrap.h Normal file
View File

@ -0,0 +1,446 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* bstrwrap.h
*
* This file is the C++ wrapper for the bstring functions.
*/
#ifndef BSTRWRAP_INCLUDE
#define BSTRWRAP_INCLUDE
/////////////////// Configuration defines //////////////////////////////
// WATCOM C/C++ has broken STL and std::iostream support. If you have
// ported over STLport, then you can #define BSTRLIB_CAN_USE_STL to use
// the CBStringList class.
#if defined(__WATCOMC__)
# if !defined (BSTRLIB_CAN_USE_STL) && !defined (BSTRLIB_CANNOT_USE_STL)
# define BSTRLIB_CANNOT_USE_STL
# endif
# if !defined (BSTRLIB_CAN_USE_IOSTREAM) && !defined (BSTRLIB_CANNOT_USE_IOSTREAM)
# define BSTRLIB_CANNOT_USE_IOSTREAM
# endif
#endif
// By default it assumed that STL has been installed and works for your
// compiler. If this is not the case, then #define BSTRLIB_CANNOT_USE_STL
#if !defined (BSTRLIB_CANNOT_USE_STL) && !defined (BSTRLIB_CAN_USE_STL)
#define BSTRLIB_CAN_USE_STL
#endif
// By default it assumed that std::iostream works well with your compiler.
// If this is not the case, then #define BSTRLIB_CAN_USE_IOSTREAM
#if !defined (BSTRLIB_CANNOT_USE_IOSTREAM) && !defined (BSTRLIB_CAN_USE_IOSTREAM)
#define BSTRLIB_CAN_USE_IOSTREAM
#endif
// By default it is assumed that your compiler can deal with and has enabled
// exception handlling. If this is not the case then you will need to
// #define BSTRLIB_DOESNT_THROW_EXCEPTIONS
#if !defined (BSTRLIB_THROWS_EXCEPTIONS) && !defined (BSTRLIB_DOESNT_THROW_EXCEPTIONS)
#define BSTRLIB_THROWS_EXCEPTIONS
#endif
////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include "bstrlib.h"
#include "../common/ubytearray.h"
#ifdef __cplusplus
#if defined(BSTRLIB_CAN_USE_STL)
#if defined(__WATCOMC__)
#pragma warning 604 10
#pragma warning 595 10
#pragma warning 594 10
#pragma warning 549 10
#endif
#include <vector>
#include <string>
#if defined(__WATCOMC__)
#pragma warning 604 9
#pragma warning 595 9
#pragma warning 594 9
#endif
#endif
namespace Bstrlib {
#ifdef BSTRLIB_THROWS_EXCEPTIONS
#if defined(BSTRLIB_CAN_USE_STL)
struct CBStringException : public std::exception {
private:
std::string msg;
public:
CBStringException (const std::string inmsg) : msg(inmsg) {}
virtual ~CBStringException () throw () {}
virtual const char *what () const throw () { return msg.c_str(); }
};
#else
struct CBStringException {
private:
char * msg;
int needToFree;
public:
CBStringException (const char * inmsg) : needToFree(0) {
if (inmsg) {
msg = (char *) malloc (1 + strlen (inmsg));
if (NULL == msg) msg = "Out of memory";
else {
strcpy (msg, inmsg);
needToFree = 1;
}
} else {
msg = "NULL exception message";
}
}
virtual ~CBStringException () throw () {
if (needToFree) {
free (msg);
needToFree = 0;
msg = NULL;
}
}
virtual const char *what () const throw () { return msg; }
};
#endif
#define bstringThrow(er) {\
CBStringException bstr__cppwrapper_exception ("CBString::" er "");\
throw bstr__cppwrapper_exception;\
}
#else
#define bstringThrow(er) {}
#endif
struct CBString;
#ifdef _MSC_VER
#pragma warning(disable:4512)
#endif
class CBCharWriteProtected {
friend struct CBString;
private:
const struct tagbstring& s;
unsigned int idx;
CBCharWriteProtected (const struct tagbstring& c, int i) : s(c), idx((unsigned int)i) {
if (idx >= (unsigned) s.slen) {
bstringThrow ("character index out of bounds");
}
}
public:
inline char operator = (char c) {
if (s.mlen <= 0) {
bstringThrow ("Write protection error");
} else {
#ifndef BSTRLIB_THROWS_EXCEPTIONS
if (idx >= (unsigned) s.slen) return '\0';
#endif
s.data[idx] = (unsigned char) c;
}
return (char) s.data[idx];
}
inline unsigned char operator = (unsigned char c) {
if (s.mlen <= 0) {
bstringThrow ("Write protection error");
} else {
#ifndef BSTRLIB_THROWS_EXCEPTIONS
if (idx >= (unsigned) s.slen) return '\0';
#endif
s.data[idx] = c;
}
return s.data[idx];
}
inline operator unsigned char () const {
#ifndef BSTRLIB_THROWS_EXCEPTIONS
if (idx >= (unsigned) s.slen) return (unsigned char) '\0';
#endif
return s.data[idx];
}
};
struct CBString : public tagbstring {
// Constructors
CBString ();
CBString (char c);
CBString (unsigned char c);
CBString (const char *s);
CBString (int len, const char *s);
CBString (const CBString& b);
CBString (const tagbstring& x);
CBString (char c, int len);
CBString (const void * blk, int len);
#if defined(BSTRLIB_CAN_USE_STL)
CBString (const struct CBStringList& l);
CBString (const struct CBStringList& l, const CBString& sep);
CBString (const struct CBStringList& l, char sep);
CBString (const struct CBStringList& l, unsigned char sep);
#endif
// Destructor
#if !defined(BSTRLIB_DONT_USE_VIRTUAL_DESTRUCTOR)
virtual
#endif
~CBString ();
// = operator
const CBString& operator = (char c);
const CBString& operator = (unsigned char c);
const CBString& operator = (const char *s);
const CBString& operator = (const CBString& b);
const CBString& operator = (const tagbstring& x);
// += operator
const CBString& operator += (char c);
const CBString& operator += (unsigned char c);
const CBString& operator += (const char *s);
const CBString& operator += (const CBString& b);
const CBString& operator += (const tagbstring& x);
// *= operator
inline const CBString& operator *= (int count) {
this->repeat (count);
return *this;
}
// + operator
const CBString operator + (char c) const;
const CBString operator + (unsigned char c) const;
const CBString operator + (const unsigned char *s) const;
const CBString operator + (const char *s) const;
const CBString operator + (const CBString& b) const;
const CBString operator + (const tagbstring& x) const;
// * operator
inline const CBString operator * (int count) const {
CBString retval (*this);
retval.repeat (count);
return retval;
}
// Comparison operators
bool operator == (const CBString& b) const;
bool operator == (const char * s) const;
bool operator == (const unsigned char * s) const;
bool operator != (const CBString& b) const;
bool operator != (const char * s) const;
bool operator != (const unsigned char * s) const;
bool operator < (const CBString& b) const;
bool operator < (const char * s) const;
bool operator < (const unsigned char * s) const;
bool operator <= (const CBString& b) const;
bool operator <= (const char * s) const;
bool operator <= (const unsigned char * s) const;
bool operator > (const CBString& b) const;
bool operator > (const char * s) const;
bool operator > (const unsigned char * s) const;
bool operator >= (const CBString& b) const;
bool operator >= (const char * s) const;
bool operator >= (const unsigned char * s) const;
// Casts
inline operator const char* () const { return (const char *)data; }
inline operator const unsigned char* () const { return (const unsigned char *)data; }
operator double () const;
operator float () const;
operator int () const;
operator unsigned int () const;
// Accessors
inline int length () const {return slen;}
inline unsigned char character (int i) const {
if (((unsigned) i) >= (unsigned) slen) {
#ifdef BSTRLIB_THROWS_EXCEPTIONS
bstringThrow ("character idx out of bounds");
#else
return '\0';
#endif
}
return data[i];
}
inline unsigned char operator [] (int i) const { return character(i); }
inline CBCharWriteProtected character (int i) {
return CBCharWriteProtected (*this, i);
}
inline CBCharWriteProtected operator [] (int i) { return character(i); }
// Space allocation hint method.
void alloc (int length);
// Search methods.
int caselessEqual (const CBString& b) const;
int caselessCmp (const CBString& b) const;
int find (const CBString& b, int pos = 0) const;
int find (const char * b, int pos = 0) const;
int caselessfind (const CBString& b, int pos = 0) const;
int caselessfind (const char * b, int pos = 0) const;
int find (char c, int pos = 0) const;
int reversefind (const CBString& b, int pos) const;
int reversefind (const char * b, int pos) const;
int caselessreversefind (const CBString& b, int pos) const;
int caselessreversefind (const char * b, int pos) const;
int reversefind (char c, int pos) const;
int findchr (const CBString& b, int pos = 0) const;
int findchr (const char * s, int pos = 0) const;
int reversefindchr (const CBString& b, int pos) const;
int reversefindchr (const char * s, int pos) const;
int nfindchr (const CBString& b, int pos = 0) const;
int nfindchr (const char * b, int pos = 0) const;
int nreversefindchr (const CBString& b, int pos) const;
int nreversefindchr (const char * b, int pos) const;
// Search and substitute methods.
void findreplace (const CBString& find, const CBString& repl, int pos = 0);
void findreplace (const CBString& find, const char * repl, int pos = 0);
void findreplace (const char * find, const CBString& repl, int pos = 0);
void findreplace (const char * find, const char * repl, int pos = 0);
void findreplacecaseless (const CBString& find, const CBString& repl, int pos = 0);
void findreplacecaseless (const CBString& find, const char * repl, int pos = 0);
void findreplacecaseless (const char * find, const CBString& repl, int pos = 0);
void findreplacecaseless (const char * find, const char * repl, int pos = 0);
// Extraction method.
const CBString midstr (int left, int len) const;
// Standard manipulation methods.
void setsubstr (int pos, const CBString& b, unsigned char fill = ' ');
void setsubstr (int pos, const char * b, unsigned char fill = ' ');
void insert (int pos, const CBString& b, unsigned char fill = ' ');
void insert (int pos, const char * b, unsigned char fill = ' ');
void insertchrs (int pos, int len, unsigned char fill = ' ');
void replace (int pos, int len, const CBString& b, unsigned char fill = ' ');
void replace (int pos, int len, const char * s, unsigned char fill = ' ');
void remove (int pos, int len);
void trunc (int len);
// Miscellaneous methods.
void format (const char * fmt, ...);
void formata (const char * fmt, ...);
void fill (int length, unsigned char fill = ' ');
void repeat (int count);
void ltrim (const CBString& b = CBString (bsStaticBlkParms (" \t\v\f\r\n")));
void rtrim (const CBString& b = CBString (bsStaticBlkParms (" \t\v\f\r\n")));
inline void trim (const CBString& b = CBString (bsStaticBlkParms (" \t\v\f\r\n"))) {
rtrim (b);
ltrim (b);
}
void toupper ();
void tolower ();
// Write protection methods.
void writeprotect ();
void writeallow ();
inline bool iswriteprotected () const { return mlen <= 0; }
// Join methods.
#if defined(BSTRLIB_CAN_USE_STL)
void join (const struct CBStringList& l);
void join (const struct CBStringList& l, const CBString& sep);
void join (const struct CBStringList& l, char sep);
void join (const struct CBStringList& l, unsigned char sep);
#endif
// CBStream methods
int gets (bNgetc getcPtr, void * parm, char terminator = '\n');
int read (bNread readPtr, void * parm);
// QString compatibility methods
bool isEmpty() const { return slen == 0; }
void clear() { *this = ""; }
static CBString fromUtf16(const ushort* str) { // Naive implementation assuming that only ASCII part of UCS2 is used
CBString msg; while (*str) { msg += *(char*)str; str++; } return msg;
}
CBString leftJustified(int length) { if (length > slen) { return *this + CBString(' ', length - slen); } return *this; }
};
extern const CBString operator + (const char *a, const CBString& b);
extern const CBString operator + (const unsigned char *a, const CBString& b);
extern const CBString operator + (char c, const CBString& b);
extern const CBString operator + (unsigned char c, const CBString& b);
extern const CBString operator + (const tagbstring& x, const CBString& b);
inline const CBString operator * (int count, const CBString& b) {
CBString retval (b);
retval.repeat (count);
return retval;
}
#if defined(BSTRLIB_CAN_USE_IOSTREAM)
extern std::ostream& operator << (std::ostream& sout, CBString b);
extern std::istream& operator >> (std::istream& sin, CBString& b);
extern std::istream& getline (std::istream& sin, CBString& b, char terminator='\n');
#endif
struct CBStream {
friend struct CBStringList;
private:
struct bStream * m_s;
public:
CBStream (bNread readPtr, void * parm);
~CBStream ();
int buffLengthSet (int sz);
int buffLengthGet ();
int eof () const;
CBString readLine (char terminator);
CBString readLine (const CBString& terminator);
void readLine (CBString& s, char terminator);
void readLine (CBString& s, const CBString& terminator);
void readLineAppend (CBString& s, char terminator);
void readLineAppend (CBString& s, const CBString& terminator);
CBString read ();
CBString& operator >> (CBString& s);
CBString read (int n);
void read (CBString& s);
void read (CBString& s, int n);
void readAppend (CBString& s);
void readAppend (CBString& s, int n);
void unread (const CBString& s);
inline CBStream& operator << (const CBString& s) {
this->unread (s);
return *this;
}
CBString peek () const;
void peek (CBString& s) const;
void peekAppend (CBString& s) const;
};
#if defined(BSTRLIB_CAN_USE_STL)
struct CBStringList : public std::vector<CBString> {
// split a string into a vector of strings.
void split (const CBString& b, unsigned char splitChar);
void split (const CBString& b, const CBString& s);
void splitstr (const CBString& b, const CBString& s);
void split (const CBStream& b, unsigned char splitChar);
void split (const CBStream& b, const CBString& s);
void splitstr (const CBStream& b, const CBString& s);
};
#endif
} // namespace Bstrlib
#if !defined (BSTRLIB_DONT_ASSUME_NAMESPACE)
using namespace Bstrlib;
#endif
#endif
#endif

274
bstrlib/buniutil.c Normal file
View File

@ -0,0 +1,274 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* buniutil.c
*
* This file is not necessarily part of the core bstring library itself, but
* is just an implementation of basic utf8 processing for bstrlib. Note that
* this module is dependent upon bstrlib.c and utf8util.c
*/
#include "bstrlib.h"
#include "buniutil.h"
#define UNICODE__CODE_POINT__REPLACEMENT_CHARACTER (0xFFFDL)
/* int buIsUTF8Content (const_bstring bu)
*
* Scan string and return 1 if its entire contents is entirely UTF8 code
* points. Otherwise return 0.
*/
int buIsUTF8Content (const_bstring bu) {
struct utf8Iterator iter;
if (NULL == bdata (bu)) return 0;
for (utf8IteratorInit (&iter, bu->data, bu->slen);
iter.next < iter.slen;) {
if (0 >= utf8IteratorGetNextCodePoint (&iter, -1)) return 0;
}
return 1;
}
/* int buGetBlkUTF16 (cpUcs2* ucs2, int len, cpUcs4 errCh, const_bstring bu,
* int pos)
*
* Convert a string of UTF8 codepoints (bu) skipping the first pos, into a
* sequence of UTF16 encoded code points. Returns the number of UCS2 16-bit
* words written to the output. No more than len words are written to the
* target array ucs2. If any code point in bu is unparsable, it will be
* translated to errCh.
*/
int buGetBlkUTF16 (/* @out */ cpUcs2* ucs2, int len, cpUcs4 errCh, const_bstring bu, int pos) {
struct tagbstring t;
struct utf8Iterator iter;
cpUcs4 ucs4;
int i, j;
if (!isLegalUnicodeCodePoint (errCh)) errCh = UNICODE__CODE_POINT__REPLACEMENT_CHARACTER;
if (NULL == ucs2 || 0 >= len || NULL == bdata (bu) || 0 > pos) return BSTR_ERR;
for (j=0, i=0; j < bu->slen; j++) {
if (0x80 != (0xC0 & bu->data[j])) {
if (i >= pos) break;
i++;
}
}
t.mlen = -1;
t.data = bu->data + j;
t.slen = bu->slen - j;
utf8IteratorInit (&iter, t.data, t.slen);
ucs4 = BSTR_ERR;
for (i=0; 0 < len && iter.next < iter.slen &&
0 <= (ucs4 = utf8IteratorGetNextCodePoint (&iter, errCh)); i++) {
if (ucs4 < 0x10000) {
*ucs2++ = (cpUcs2) ucs4;
len--;
} else {
if (len < 2) {
*ucs2++ = UNICODE__CODE_POINT__REPLACEMENT_CHARACTER;
len--;
} else {
long y = ucs4 - 0x10000;
ucs2[0] = (cpUcs2) (0xD800 | (y >> 10));
ucs2[1] = (cpUcs2) (0xDC00 | (y & 0x03FF));
len -= 2;
ucs2 += 2;
i++;
}
}
}
while (0 < len) {
*ucs2++ = 0;
len--;
}
utf8IteratorUninit (&iter);
if (0 > ucs4) return BSTR_ERR;
return i;
}
/*
Unicode UTF-8
------- -----
U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
UTF-32: U-000000 - U-10FFFF
*/
/* int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, cpUcs4 errCh)
*
* Convert an array of UCS4 code points (bu) to UTF8 codepoints b. Any
* invalid code point is replaced by errCh. If errCh is itself not a
* valid code point, then this translation will halt upon the first error
* and return BSTR_ERR. Otherwise BSTR_OK is returned.
*/
int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, cpUcs4 errCh) {
int i, oldSlen;
if (NULL == bu || NULL == b || 0 > len || 0 > (oldSlen = blengthe (b, -1))) return BSTR_ERR;
if (!isLegalUnicodeCodePoint (errCh)) errCh = ~0;
for (i=0; i < len; i++) {
unsigned char c[6];
cpUcs4 v = bu[i];
if (!isLegalUnicodeCodePoint (v)) {
if (~0 == errCh) {
b->slen = oldSlen;
return BSTR_ERR;
}
v = errCh;
}
if (v < 0x80) {
if (BSTR_OK != bconchar (b, (char) v)) {
b->slen = oldSlen;
return BSTR_ERR;
}
} else if (v < 0x800) {
c[0] = (unsigned char) ( (v >> 6) + 0xc0);
c[1] = (unsigned char) (( v & 0x3f) + 0x80);
if (BSTR_OK != bcatblk (b, c, 2)) {
b->slen = oldSlen;
return BSTR_ERR;
}
} else if (v < 0x10000) {
c[0] = (unsigned char) ( (v >> 12) + 0xe0);
c[1] = (unsigned char) (((v >> 6) & 0x3f) + 0x80);
c[2] = (unsigned char) (( v & 0x3f) + 0x80);
if (BSTR_OK != bcatblk (b, c, 3)) {
b->slen = oldSlen;
return BSTR_ERR;
}
} else
#if 0
if (v < 0x200000)
#endif
{
c[0] = (unsigned char) ( (v >> 18) + 0xf0);
c[1] = (unsigned char) (((v >> 12) & 0x3f) + 0x80);
c[2] = (unsigned char) (((v >> 6) & 0x3f) + 0x80);
c[3] = (unsigned char) (( v & 0x3f) + 0x80);
if (BSTR_OK != bcatblk (b, c, 4)) {
b->slen = oldSlen;
return BSTR_ERR;
}
}
#if 0
else if (v < 0x4000000) {
c[0] = (unsigned char) ( (v >> 24) + 0xf8);
c[1] = (unsigned char) (((v >> 18) & 0x3f) + 0x80);
c[2] = (unsigned char) (((v >> 12) & 0x3f) + 0x80);
c[3] = (unsigned char) (((v >> 6) & 0x3f) + 0x80);
c[4] = (unsigned char) (( v & 0x3f) + 0x80);
if (BSTR_OK != bcatblk (b, c, 5)) {
b->slen = oldSlen;
return BSTR_ERR;
}
} else {
c[0] = (unsigned char) ( (v >> 30) + 0xfc);
c[1] = (unsigned char) (((v >> 24) & 0x3f) + 0x80);
c[2] = (unsigned char) (((v >> 18) & 0x3f) + 0x80);
c[3] = (unsigned char) (((v >> 12) & 0x3f) + 0x80);
c[4] = (unsigned char) (((v >> 6) & 0x3f) + 0x80);
c[5] = (unsigned char) (( v & 0x3f) + 0x80);
if (BSTR_OK != bcatblk (b, c, 6)) {
b->slen = oldSlen;
return BSTR_ERR;
}
}
#endif
}
return BSTR_OK;
}
#define endSwap(cs,mode) ((mode) ? ((((cs) & 0xFF) << 8) | (((cs) >> 8) & 0xFF)) : (cs))
#define TEMP_UCS4_BUFFER_SIZE (64)
/* int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len,
* cpUcs2* bom, cpUcs4 errCh)
*
* Append an array of UCS2 code points (utf16) to UTF8 codepoints (bu). Any
* invalid code point is replaced by errCh. If errCh is itself not a
* valid code point, then this translation will halt upon the first error
* and return BSTR_ERR. Otherwise BSTR_OK is returned. If a byte order mark
* has been previously read, it may be passed in as bom, otherwise if *bom is
* set to 0, it will be filled in with the BOM as read from the first
* character if it is a BOM.
*/
int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len, cpUcs2* bom, cpUcs4 errCh) {
cpUcs4 buff[TEMP_UCS4_BUFFER_SIZE];
int cc, i, sm, oldSlen;
if (NULL == bdata(bu) || NULL == utf16 || len < 0) return BSTR_ERR;
if (!isLegalUnicodeCodePoint (errCh)) errCh = ~0;
if (len == 0) return BSTR_OK;
oldSlen = bu->slen;
i = 0;
/* Check for BOM character and select endianess. Also remove the
BOM from the stream, since there is no need for it in a UTF-8 encoding. */
if (bom && (cpUcs2) 0xFFFE == *bom) {
sm = 8;
} else if (bom && (cpUcs2) 0xFEFF == *bom) {
sm = 0;
} else if (utf16[i] == (cpUcs2) 0xFFFE) {
if (bom) *bom = utf16[i];
sm = 8;
i++;
} else if (utf16[i] == (cpUcs2) 0xFEFF) {
if (bom) *bom = utf16[i];
sm = 0;
i++;
} else {
sm = 0; /* Assume local endianness. */
}
cc = 0;
for (;i < len; i++) {
cpUcs4 c, v;
v = endSwap (utf16[i], sm);
if ((v | 0x7FF) == 0xDFFF) { /* Deal with surrogate pairs */
if (v >= 0xDC00 || i >= len) {
ErrMode:;
if (~0 == errCh) {
ErrReturn:;
bu->slen = oldSlen;
return BSTR_ERR;
}
v = errCh;
} else {
i++;
if ((c = endSwap (utf16[i], sm) - 0xDC00) > 0x3FF) goto ErrMode;
v = ((v - 0xD800) << 10) + c + 0x10000;
}
}
buff[cc] = v;
cc++;
if (cc >= TEMP_UCS4_BUFFER_SIZE) {
if (0 > buAppendBlkUcs4 (bu, buff, cc, errCh)) goto ErrReturn;
cc = 0;
}
}
if (cc > 0 && 0 > buAppendBlkUcs4 (bu, buff, cc, errCh)) goto ErrReturn;
return BSTR_OK;
}

37
bstrlib/buniutil.h Normal file
View File

@ -0,0 +1,37 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* buniutil.h
*
* This file is the interface for the buniutil basic "Unicode for bstrings"
* functions. Note that there are dependencies on bstrlib.h and utf8util.h .
*/
#ifndef BSTRLIB_UNICODE_UTILITIES
#define BSTRLIB_UNICODE_UTILITIES
#include "utf8util.h"
#include "bstrlib.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int buIsUTF8Content (const_bstring bu);
extern int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, cpUcs4 errCh);
/* For those unfortunate enough to be stuck supporting UTF16. */
extern int buGetBlkUTF16 (/* @out */ cpUcs2* ucs2, int len, cpUcs4 errCh, const_bstring bu, int pos);
extern int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len, cpUcs2* bom, cpUcs4 errCh);
#ifdef __cplusplus
}
#endif
#endif /* BSTRLIB_UNICODE_UTILITIES */

339
bstrlib/gpl.txt Normal file
View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

172
bstrlib/porting.txt Normal file
View File

@ -0,0 +1,172 @@
Better String library Porting Guide
-----------------------------------
by Paul Hsieh
The bstring library is an attempt to provide improved string processing
functionality to the C and C++ language. At the heart of the bstring library
is the management of "bstring"s which are a significant improvement over '\0'
terminated char buffers. See the accompanying documenation file bstrlib.txt
for more information.
===============================================================================
Identifying the Compiler
------------------------
Bstrlib has been tested on the following compilers:
Microsoft Visual C++
Watcom C/C++ (32 bit flat)
Intel's C/C++ compiler (on Windows)
The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64)
Borland C++
Turbo C
There are slight differences in these compilers which requires slight
differences in the implementation of Bstrlib. These are accomodated in the
same sources using #ifdef/#if defined() on compiler specific macros. To
port Bstrlib to a new compiler not listed above, it is recommended that the
same strategy be followed. If you are unaware of the compiler specific
identifying preprocessor macro for your compiler you might find it here:
http://predef.sourceforge.net/precomp.html
Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER.
16-bit vs. 32-bit vs. 64-bit Systems
------------------------------------
Bstrlib has been architected to deal with strings of length between 0 and
INT_MAX (inclusive). Since the values of int are never higher than size_t
there will be no issue here. Note that on most 64-bit systems int is 32-bit.
Dependency on The C-Library
---------------------------
Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and
vsnprintf. Many free standing C compiler implementations that have a mode in
which the C library is not available will typically not include these
functions which will make porting Bstrlib to it onerous. Bstrlib is not
designed for such bare bones compiler environments. This usually includes
compilers that target ROM environments.
Porting Issues
--------------
Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there
are still a few porting issues. These are described below.
1. The vsnprintf () function.
Unfortunately, the earlier ANSI/ISO C standards did not include this function.
If the compiler of interest does not support this function then the
BSTRLIB_NOVSNP should be defined via something like:
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
# if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__)
# define BSTRLIB_NOVSNP
# endif
#endif
which appears at the top of bstrlib.h. Note that the bformat(a) functions
will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If
the compiler has renamed vsnprintf() to some other named function, then
search for the definition of the exvsnprintf macro in bstrlib.c file and be
sure its defined appropriately:
#if defined (__COMPILERVENDORSPECIFICMACRO__)
# define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);}
#else
# define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);}
#endif
Take notice of the return value being captured in the variable r. It is
assumed that r exceeds n if and only if the underlying vsnprintf function has
determined what the true maximal output length would be for output if the
buffer were large enough to hold it. Non-modern implementations must output a
lesser number (the macro can and should be modified to ensure this).
2. Weak C++ compiler.
C++ is a much more complicated language to implement than C. This has lead
to varying quality of compiler implementations. The weaknesses isolated in
the initial ports are inclusion of the Standard Template Library,
std::iostream and exception handling. By default it is assumed that the C++
compiler supports all of these things correctly. If your compiler does not
support one or more of these define the corresponding macro:
BSTRLIB_CANNOT_USE_STL
BSTRLIB_CANNOT_USE_IOSTREAM
BSTRLIB_DOESNT_THROW_EXCEPTIONS
The compiler specific detected macro should be defined at the top of
bstrwrap.h in the Configuration defines section. Note that these disabling
macros can be overrided with the associated enabling macro if a subsequent
version of the compiler gains support. (For example, its possible to rig
up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL
can be passed in as a compiler option.)
3. The bsafe module, and reserved words.
The bsafe module is in gross violation of the ANSI/ISO C standard in the
sense that it redefines what could be implemented as reserved words on a
given compiler. The typical problem is that a compiler may inline some of the
functions and thus not be properly overridden by the definitions in the bsafe
module. It is also possible that a compiler may prohibit the redefinitions in
the bsafe module. Compiler specific action will be required to deal with
these situations.
Platform Specific Files
-----------------------
The makefiles for the examples are basically setup of for particular
environments for each platform. In general these makefiles are not portable
and should be constructed as necessary from scratch for each platform.
Testing a port
--------------
To test that a port compiles correctly do the following:
1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and
bsafe modules.
2. Compile bstest against the bstrlib module.
3. Run bstest and ensure that 0 errors are reported.
4. Compile test against the bstrlib and bstrwrap modules.
5. Run test and ensure that 0 errors are reported.
6. Compile each of the examples (except for the "re" example, which may be
complicated and is not a real test of bstrlib and except for the mfcbench
example which is Windows specific.)
7. Run each of the examples.
The builds must have 0 errors, and should have the absolute minimum number of
warnings (in most cases can be reduced to 0.) The result of execution should
be essentially identical on each platform.
Performance
-----------
Different CPU and compilers have different capabilities in terms of
performance. It is possible for Bstrlib to assume performance
characteristics that a platform doesn't have (since it was primarily
developed on just one platform). The goal of Bstrlib is to provide very good
performance on all platforms regardless of this but without resorting to
extreme measures (such as using assembly language, or non-portable intrinsics
or library extensions.)
There are two performance benchmarks that can be found in the example/
directory. They are: cbench.c and cppbench.cpp. These are variations and
expansions of a benchmark for another string library. They don't cover all
string functionality, but do include the most basic functions which will be
common in most string manipulation kernels.
...............................................................................
Feedback
--------
In all cases, you may email issues found to the primary author of Bstrlib at
the email address: websnarf@users.sourceforge.net
===============================================================================

217
bstrlib/security.txt Normal file
View File

@ -0,0 +1,217 @@
Better String library Security Statement
----------------------------------------
by Paul Hsieh
===============================================================================
Introduction
------------
The Better String library (hereafter referred to as Bstrlib) is an attempt to
provide improved string processing functionality to the C and C++ languages.
At the heart of the Bstrlib is the management of "bstring"s which are a
significant improvement over '\0' terminated char buffers. See the
accompanying documenation file bstrlib.txt for more information.
DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Like any software, there is always a possibility of failure due to a flawed
implementation. Nevertheless a good faith effort has been made to minimize
such flaws in Bstrlib. Use of Bstrlib by itself will not make an application
secure or free from implementation failures, however, it is the author's
conviction that use of Bstrlib can greatly facilitate the creation of
software meeting the highest possible standards of security.
Part of the reason why this document has been created, is for the purpose of
security auditing, or the creation of further "Statements on Security" for
software that is created that uses Bstrlib. An auditor may check the claims
below against Bstrlib, and use this as a basis for analysis of software which
uses Bstrlib.
===============================================================================
Statement on Security
---------------------
This is a document intended to give consumers of the Better String Library
who are interested in security an idea of where the Better String Library
stands on various security issues. Any deviation observed in the actual
library itself from the descriptions below should be considered an
implementation error, not a design flaw.
This statement is not an analytical proof of correctness or an outline of one
but rather an assertion similar to a scientific claim or hypothesis. By use,
testing and open independent examination (otherwise known as scientific
falsifiability), the credibility of the claims made below can rise to the
level of an established theory.
Common security issues:
.......................
1. Buffer Overflows
The Bstrlib API allows the programmer a way to deal with strings without
having to deal with the buffers containing them. Ordinary usage of the
Bstrlib API itself makes buffer overflows impossible.
Furthermore, the Bstrlib API has a superset of basic string functionality as
compared to the C library's char * functions, C++'s std::string class and
Microsoft's MFC based CString class. It also has abstracted mechanisms for
dealing with IO. This is important as it gives developers a way of migrating
all their code from a functionality point of view.
2. Memory size overflow/wrap around attack
By design, Bstrlib is impervious to memory size overflow attacks. The
reason is that it detects length overflows and leads to a result error before
the operation attempts to proceed. Attempted conversions of char* strings
which may have lengths greater than INT_MAX are detected and the conversion
is aborted. If the memory to hold the string exceeds the available memory
for it, again, the result is aborted without changing the prior state of the
strings.
3. Constant string protection
Bstrlib implements runtime enforced constant and read-only string semantics.
I.e., bstrings which are declared as constant via the bsStatic() macro cannot
be modified or deallocated directly through the Bstrlib API, and this cannot
be subverted by casting or other type coercion. This is independent of the
use of the const_bstring data type.
The Bstrlib C API uses the type const_bstring to specify bstring parameters
whose contents do not change. Although the C language cannot enforce this,
this is nevertheless guaranteed by the implementation of the Bstrlib library
of C functions. The C++ API enforces the const attribute on CBString types
correctly.
4. Aliased bstring support
Bstrlib detects and supports aliased parameter management throughout the API.
The kind of aliasing that is allowed is the one where pointers of the same
basic type may be pointing to overlapping objects (this is the assumption the
ANSI C99 specification makes.) Each function behaves as if all read-only
parameters were copied to temporaries which are used in their stead before
the function is enacted (it rarely actually does this). No function in the
Bstrlib uses the "restrict" parameter attribute from the ANSI C99
specification.
5. Information leaking
In bstraux.h, using the semantically equivalent macros bSecureDestroy() and
bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively
will ensure that stale data does not linger in the heap's free space after
strings have been released back to memory. Created bstrings or CBStrings
are not linked to anything external to themselves, and thus cannot expose
deterministic data leaking. If a bstring is resized, the preimage may exist
as a copy that is released to the heap. Thus for sensitive data, the bstring
should be sufficiently presized before manipulated so that it is not resized.
bSecureInput() has been supplied in bstraux.c, which can be used to obtain
input securely without any risk of leaving any part of the input image in the
heap except for the allocated bstring that is returned.
6. Memory leaking
Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG
macro. User generated definitions for malloc, realloc and free can then be
supplied which can implement special strategies for memory corruption
detection or memory leaking. Otherwise, bstrlib does not do anything out of
the ordinary to attempt to deal with the standard problem of memory leaking
(i.e., losing references to allocated memory) when programming in the C and
C++ languages. However, it does not compound the problem any more than exists
either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib
does not preclude the use of automatic garbage collection mechanisms such as
the Boehm garbage collector.
7. Encryption
Bstrlib does not present any built-in encryption mechanism. However, it
supports full binary contents in its data buffers, so any standard block
based encryption mechanism can make direct use of bstrings/CBStrings for
buffer management.
8. Double freeing
Freeing a pointer that is already free is an extremely rare, but nevertheless
a potentially ruthlessly corrupting operation (its possible to cause Win 98 to
reboot, by calling free mulitiple times on already freed data using the WATCOM
CRT.) Bstrlib invalidates the bstring header data before freeing, so that in
many cases a double free will be detected and an error will be reported
(though this behaviour is not guaranteed and should not be relied on).
Using bstrFree pervasively (instead of bdestroy) can lead to somewhat
improved invalid free avoidance (it is completely safe whenever bstring
instances are only stored in unique variables). For example:
struct tagbstring hw = bsStatic ("Hello, world");
bstring cpHw = bstrcpy (&hw);
#ifdef NOT_QUITE_AS_SAFE
bdestroy (cpHw); /* Never fail */
bdestroy (cpHw); /* Error sometimes detected at runtime */
bdestroy (&hw); /* Error detected at run time */
#else
bstrFree (cpHw); /* Never fail */
bstrFree (cpHw); /* Will do nothing */
bstrFree (&hw); /* Will lead to a compile time error */
#endif
9. Resource based denial of service
bSecureInput() has been supplied in bstraux.c. It has an optional upper limit
for input length. But unlike fgets(), it is also easily determined if the
buffer has been truncated early. In this way, a program can set an upper
limit on input sizes while still allowing for implementing context specific
truncation semantics (i.e., does the program consume but dump the extra
input, or does it consume it in later inputs?)
10. Mixing char *'s and bstrings
The bstring and char * representations are not identical. So there is a risk
when converting back and forth that data may lost. Essentially bstrings can
contain '\0' as a valid non-terminating character, while char * strings
cannot and in fact must use the character as a terminator. The risk of data
loss is very low, since:
A) the simple method of only using bstrings in a char * semantically
compatible way is both easy to achieve and pervasively supported.
B) obtaining '\0' content in a string is either deliberate or indicative
of another, likely more serious problem in the code.
C) the library comes with various functions which deal with this issue
(namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ())
Marginal security issues:
.........................
11. 8-bit versus 9-bit portability
Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent
possible to avoid portability problems. However, Bstrlib has not been tested
on any system that does not represent char as 8-bits. So whether or not it
works on 9-bit systems is an open question. It is recommended that Bstrlib be
carefully auditted by anyone using a system in which CHAR_BIT is not 8.
12. EBCDIC/ASCII/UTF-8 data representation attacks.
Bstrlib uses ctype.h functions to ensure that it remains portable to non-
ASCII systems. It also checks range to make sure it is well defined even for
data that ANSI does not define for the ctype functions.
Obscure issues:
...............
13. Data attributes
There is no support for a Perl-like "taint" attribute, although this is a
fairly straightforward exercise using C++'s type system.

1725
bstrlib/test.cpp Normal file

File diff suppressed because it is too large Load Diff

423
bstrlib/testaux.c Normal file
View File

@ -0,0 +1,423 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license. Refer to the accompanying documentation for details on usage and
* license.
*/
/*
* testaux.c
*
* This file is the C unit test for the bstraux module of Bstrlib.
*/
#include <stdio.h>
#include "bstrlib.h"
#include "bstraux.h"
static int tWrite (const void * buf, size_t elsize, size_t nelem, void * parm) {
bstring b = (bstring) parm;
size_t i;
if (NULL == b || NULL == buf || 0 == elsize || 0 == nelem)
return -__LINE__;
for (i=0; i < nelem; i++) {
if (0 > bcatblk (b, buf, elsize)) break;
buf = (const void *) (elsize + (const char *) buf);
}
return (int) i;
}
int test0 (void) {
struct bwriteStream * ws;
bstring s;
int ret = 0;
printf ("TEST: struct bwriteStream functions.\n");
ws = bwsOpen ((bNwrite) tWrite, (s = bfromcstr ("")));
bwsBuffLength (ws, 8);
ret += 8 != bwsBuffLength (ws, 0);
bwsWriteBlk (ws, bsStaticBlkParms ("Hello "));
ret += 0 == biseqcstr (s, "");
bwsWriteBlk (ws, bsStaticBlkParms ("World\n"));
ret += 0 == biseqcstr (s, "Hello Wo");
ret += s != bwsClose (ws);
ret += 0 == biseqcstr (s, "Hello World\n");
printf ("\t# failures: %d\n", ret);
return ret;
}
int test1 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b, c, d;
int ret = 0;
printf ("TEST: bTail and bHead functions.\n");
b = bTail (&t, 5);
c = bHead (&t, 5);
ret += 0 >= biseqcstr (b, "world");
ret += 0 >= biseqcstr (c, "Hello");
bdestroy (b);
bdestroy (c);
b = bTail (&t, 0);
c = bHead (&t, 0);
ret += 0 >= biseqcstr (b, "");
ret += 0 >= biseqcstr (c, "");
bdestroy (b);
bdestroy (c);
d = bstrcpy (&t);
b = bTail (d, 5);
c = bHead (d, 5);
ret += 0 >= biseqcstr (b, "world");
ret += 0 >= biseqcstr (c, "Hello");
bdestroy (b);
bdestroy (c);
bdestroy (d);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test2 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b;
int ret = 0, reto;
printf ("TEST: bSetChar function.\n");
ret += 0 <= bSetChar (&t, 4, ',');
ret += 0 > bSetChar (b = bstrcpy (&t), 4, ',');
ret += 0 >= biseqcstr (b, "Hell, world");
ret += 0 <= bSetChar (b, -1, 'x');
b->slen = 2;
ret += 0 > bSetChar (b, 1, 'i');
ret += 0 >= biseqcstr (b, "Hi");
ret += 0 > bSetChar (b, 2, 's');
ret += 0 >= biseqcstr (b, "His");
ret += 0 > bSetChar (b, 1, '\0');
ret += blength (b) != 3;
ret += bchare (b, 0, '?') != 'H';
ret += bchare (b, 1, '?') != '\0';
ret += bchare (b, 2, '?') != 's';
bdestroy (b);
printf ("\t# failures: %d\n", ret);
reto = ret;
ret = 0;
printf ("TEST: bSetCstrChar function.\n");
ret += 0 <= bSetCstrChar (&t, 4, ',');
ret += 0 > bSetCstrChar (b = bstrcpy (&t), 4, ',');
ret += 0 >= biseqcstr (b, "Hell, world");
ret += 0 <= bSetCstrChar (b, -1, 'x');
b->slen = 2;
ret += 0 > bSetCstrChar (b, 1, 'i');
ret += 0 >= biseqcstr (b, "Hi");
ret += 0 > bSetCstrChar (b, 2, 's');
ret += 0 >= biseqcstr (b, "His");
ret += 0 > bSetCstrChar (b, 1, '\0');
ret += blength (b) != 1;
ret += bchare (b, 0, '?') != 'H';
bdestroy (b);
printf ("\t# failures: %d\n", ret);
return reto + ret;
}
int test3 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b;
int ret = 0;
printf ("TEST: bFill function.\n");
ret += 0 <= bFill (&t, 'x', 7);
ret += 0 > bFill (b = bstrcpy (&t), 'x', 7);
ret += 0 >= biseqcstr (b, "xxxxxxx");
ret += 0 <= bFill (b, 'x', -1);
ret += 0 > bFill (b, 'x', 0);
ret += 0 >= biseqcstr (b, "");
bdestroy (b);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test4 (void) {
struct tagbstring t = bsStatic ("foo");
bstring b;
int ret = 0;
printf ("TEST: bReplicate function.\n");
ret += 0 <= bReplicate (&t, 4);
ret += 0 <= bReplicate (b = bstrcpy (&t), -1);
ret += 0 > bReplicate (b, 4);
ret += 0 >= biseqcstr (b, "foofoofoofoo");
ret += 0 > bReplicate (b, 0);
ret += 0 >= biseqcstr (b, "");
bdestroy (b);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test5 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b;
int ret = 0;
printf ("TEST: bReverse function.\n");
ret += 0 <= bReverse (&t);
ret += 0 > bReverse (b = bstrcpy (&t));
ret += 0 >= biseqcstr (b, "dlrow olleH");
b->slen = 0;
ret += 0 > bReverse (b);
ret += 0 >= biseqcstr (b, "");
bdestroy (b);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test6 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b;
int ret = 0;
printf ("TEST: bInsertChrs function.\n");
ret += 0 <= bInsertChrs (&t, 6, 4, 'x', '?');
ret += 0 > bInsertChrs (b = bstrcpy (&t), 6, 4, 'x', '?');
ret += 0 >= biseqcstr (b, "Hello xxxxworld");
bdestroy (b);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test7 (void) {
struct tagbstring t = bsStatic (" i am ");
bstring b;
int ret = 0;
printf ("TEST: bJustify functions.\n");
ret += 0 <= bJustifyLeft (&t, ' ');
ret += 0 <= bJustifyRight (&t, 8, ' ');
ret += 0 <= bJustifyMargin (&t, 8, ' ');
ret += 0 <= bJustifyCenter (&t, 8, ' ');
ret += 0 > bJustifyLeft (b = bstrcpy (&t), ' ');
ret += 0 >= biseqcstr (b, "i am");
ret += 0 > bJustifyRight (b, 8, ' ');
ret += 0 >= biseqcstr (b, " i am");
ret += 0 > bJustifyMargin (b, 8, ' ');
ret += 0 >= biseqcstr (b, "i am");
ret += 0 > bJustifyCenter (b, 8, ' ');
ret += 0 >= biseqcstr (b, " i am");
bdestroy (b);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test8 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b;
char * c;
int ret = 0;
printf ("TEST: NetStr functions.\n");
c = bStr2NetStr (&t);
ret += 0 != strcmp (c, "11:Hello world,");
b = bNetStr2Bstr (c);
ret += 0 >= biseq (b, &t);
bdestroy (b);
bcstrfree (c);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test9 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b, c;
int err, ret = 0;
printf ("TEST: Base 64 codec.\n");
b = bBase64Encode (&t);
ret += 0 >= biseqcstr (b, "SGVsbG8gd29ybGQ=");
c = bBase64DecodeEx (b, &err);
ret += 0 != err;
ret += 0 >= biseq (c, &t);
bdestroy (b);
bdestroy (c);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test10 (void) {
struct tagbstring t = bsStatic ("Hello world");
bstring b, c;
int err, ret = 0;
printf ("TEST: UU codec.\n");
b = bUuEncode (&t);
ret += 0 >= biseqcstr (b, "+2&5L;&\\@=V]R;&0`\r\n");
c = bUuDecodeEx (b, &err);
ret += 0 != err;
ret += 0 >= biseq (c, &t);
bdestroy (b);
bdestroy (c);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test11 (void) {
struct tagbstring t = bsStatic ("Hello world");
unsigned char Ytstr[] = {0x72, 0x8f, 0x96, 0x96, 0x99, 0x4a, 0xa1, 0x99, 0x9c, 0x96, 0x8e};
bstring b, c;
int ret = 0;
printf ("TEST: Y codec.\n");
b = bYEncode (&t);
ret += 11 != b->slen;
ret += 0 >= bisstemeqblk (b, Ytstr, 11);
c = bYDecode (b);
ret += 0 >= biseq (c, &t);
bdestroy (b);
bdestroy (c);
printf ("\t# failures: %d\n", ret);
return ret;
}
int test12 (void) {
struct tagbstring t = bsStatic ("Hello world");
struct bStream * s;
bstring b;
int ret = 0;
printf ("TEST: bsFromBstr.\n");
ret = bsread (b = bfromcstr (""), s = bsFromBstr (&t), 6);
ret += 1 != biseqcstr (b, "Hello ");
if (b) b->slen = 0;
ret = bsread (b, s, 6);
ret += 1 != biseqcstr (b, "world");
bdestroy (b);
bsclose (s);
printf ("\t# failures: %d\n", ret);
return ret;
}
struct vfgetc {
int ofs;
bstring base;
};
static int test13_fgetc (void * ctx) {
struct vfgetc * vctx = (struct vfgetc *) ctx;
int c;
if (NULL == vctx || NULL == vctx->base) return EOF;
if (vctx->ofs >= blength (vctx->base)) return EOF;
c = bchare (vctx->base, vctx->ofs, EOF);
vctx->ofs++;
return c;
}
int test13 (void) {
struct tagbstring t0 = bsStatic ("Random String, long enough to cause to reallocing");
struct vfgetc vctx;
bstring b;
int ret = 0;
int i;
printf ("TEST: bSecureInput, bSecureDestroy.\n");
for (i=0; i < 1000; i++) {
unsigned char * h;
vctx.ofs = 0;
vctx.base = &t0;
b = bSecureInput (INT_MAX, '\n', (bNgetc) test13_fgetc, &vctx);
ret += 1 != biseq (b, &t0);
h = b->data;
bSecureDestroy (b);
/* WARNING! Technically undefined code follows (h has been freed): */
ret += (0 == memcmp (h, t0.data, t0.slen));
if (ret) break;
}
printf ("\t# failures: %d\n", ret);
return ret;
}
int test14_aux(bstring b, const char* chkVal) {
int ret = 0;
ret += 0 != bSGMLEncode (b);
ret += 1 != biseqcstr (b, chkVal);
return ret;
}
int test14 (void) {
bstring b;
int ret = 0;
printf ("TEST: bSGMLEncode.\n");
ret += test14_aux (b = bfromStatic ("<\"Hello, you, me, & world\">"), "&lt;&quot;Hello, you, me, &amp; world&quot;&gt;");
printf ("\t# failures: %d\n", ret);
return ret;
}
int main () {
int ret = 0;
printf ("Direct case testing of bstraux functions\n");
ret += test0 ();
ret += test1 ();
ret += test2 ();
ret += test3 ();
ret += test4 ();
ret += test5 ();
ret += test6 ();
ret += test7 ();
ret += test8 ();
ret += test9 ();
ret += test10 ();
ret += test11 ();
ret += test12 ();
ret += test13 ();
ret += test14 ();
printf ("# test failures: %d\n", ret);
return 0;
}

249
bstrlib/utf8util.c Normal file
View File

@ -0,0 +1,249 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* utf8util.c
*
* This file is not necessarily part of the core bstring library itself, but
* is just an generic module for implementing utf8 utility functions.
*/
#include "utf8util.h"
#ifndef NULL
#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void *)0)
#endif
#endif
/* Surrogate range is wrong, there is a maximum, the BOM alias is illegal and 0xFFFF is illegal */
#define isLegalUnicodeCodePoint(v) ((((v) < 0xD800L) || ((v) > 0xDFFFL)) && (((unsigned long)(v)) <= 0x0010FFFFL) && (((v)|0x1F0001) != 0x1FFFFFL))
void utf8IteratorInit (struct utf8Iterator* iter, unsigned char* data, int slen) {
if (iter) {
iter->data = data;
iter->slen = (iter->data && slen >= 0) ? slen : -1;
iter->start = -1;
iter->next = (iter->slen >= 0) ? 0 : -1;
iter->error = (iter->slen >= 0) ? 0 : 1;
}
}
void utf8IteratorUninit (struct utf8Iterator* iter) {
if (iter) {
iter->data = NULL;
iter->slen = -1;
iter->start = iter->next = -1;
}
}
int utf8ScanBackwardsForCodePoint (unsigned char* msg, int len, int pos, cpUcs4* out) {
cpUcs4 v1, v2, v3, v4, x;
int ret;
if (NULL == msg || len < 0 || (unsigned) pos >= (unsigned) len) {
return -__LINE__;
}
if (!out) out = &x;
ret = 0;
if (msg[pos] < 0x80) {
*out = msg[pos];
return 0;
} else if (msg[pos] < 0xC0) {
if (0 == pos) return -__LINE__;
ret = -__LINE__;
if (msg[pos-1] >= 0xC1 && msg[pos-1] < 0xF8) {
pos--;
ret = 1;
} else {
if (1 == pos) return -__LINE__;
if ((msg[pos-1] | 0x3F) != 0xBF) return -__LINE__;
if (msg[pos-2] >= 0xE0 && msg[pos-2] < 0xF8) {
pos -= 2;
ret = 2;
} else {
if (2 == pos) return -__LINE__;
if ((msg[pos-2] | 0x3F) != 0xBF) return -__LINE__;
if ((msg[pos-3]|0x07) == 0xF7) {
pos -= 3;
ret = 3;
} else return -__LINE__;
}
}
}
if (msg[pos] < 0xE0) {
if (pos + 1 >= len) return -__LINE__;
v1 = msg[pos] & ~0xE0;
v2 = msg[pos+1] & ~0xC0;
v1 = (v1 << 6) + v2;
if (v1 < 0x80) return -__LINE__;
*out = v1;
return ret;
}
if (msg[pos] < 0xF0) {
if (pos + 2 >= len) return -__LINE__;
v1 = msg[pos] & ~0xF0;
v2 = msg[pos+1] & ~0xC0;
v3 = msg[pos+2] & ~0xC0;
v1 = (v1 << 12) + (v2 << 6) + v3;
if (v1 < 0x800) return -__LINE__;
if (!isLegalUnicodeCodePoint(v1)) return -__LINE__;
*out = v1;
return ret;
}
if (msg[pos] >= 0xF8) return -__LINE__;
if (pos + 3 >= len) return -__LINE__;
v1 = msg[pos] & ~0xF8;
v2 = msg[pos+1] & ~0xC0;
v3 = msg[pos+2] & ~0xC0;
v4 = msg[pos+3] & ~0xC0;
v1 = (v1 << 18) + (v2 << 12) + (v3 << 6) + v4;
if (v1 < 0x10000) return -__LINE__;
if (!isLegalUnicodeCodePoint(v1)) return -__LINE__;
*out = v1;
return ret;
}
/*
Code point UTF-8
---------- -----
U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
/*
* Returns next read code point for iterator.
*
* iter->data + iter->start points at the characters just read.
*
* iter->data + iter->next points at the characters that will be read next.
*
* iter->error is boolean indicating whether or not last read contained an error.
*/
cpUcs4 utf8IteratorGetNextCodePoint (struct utf8Iterator* iter, cpUcs4 errCh) {
unsigned char * chrs;
unsigned char c, d, e;
long v;
int i, ofs;
if (NULL == iter || iter->next < 0) return errCh;
if (iter->next >= iter->slen) {
iter->start = iter->slen;
return errCh;
}
if (NULL == iter->data || iter->next < 0 || utf8IteratorNoMore(iter)) return errCh;
chrs = iter->data + iter->next;
iter->error = 0;
c = chrs[0];
ofs = 0;
if (c < 0xC0 || c > 0xFD) {
if (c >= 0x80) goto ErrMode;
v = c;
ofs = 1;
} else if (c < 0xE0) {
if (iter->next >= iter->slen + 1) goto ErrMode;
v = (c << 6u) - (0x0C0 << 6u);
c = (unsigned char) ((unsigned) chrs[1] - 0x080);
v += c;
if (c >= 0x40 || v < 0x80) goto ErrMode;
ofs = 2;
} else if (c < 0xF0) {
if (iter->next >= iter->slen + 2) goto ErrMode;
v = (c << 12) - (0x0E0 << 12u);
c = (unsigned char) ((unsigned) chrs[1] - 0x080);
d = (unsigned char) ((unsigned) chrs[2] - 0x080);
v += (c << 6u) + d;
if ((c|d) >= 0x40 || v < 0x800 || !isLegalUnicodeCodePoint (v)) goto ErrMode;
ofs = 3;
} else if (c < 0xF8) {
if (iter->next >= iter->slen + 3) goto ErrMode;
v = (c << 18) - (0x0F0 << 18u);
c = (unsigned char) ((unsigned) chrs[1] - 0x080);
d = (unsigned char) ((unsigned) chrs[2] - 0x080);
e = (unsigned char) ((unsigned) chrs[3] - 0x080);
v += (c << 12u) + (d << 6u) + e;
if ((c|d|e) >= 0x40 || v < 0x10000 || !isLegalUnicodeCodePoint (v)) goto ErrMode;
ofs = 4;
} else { /* 5 and 6 byte encodings are invalid */
ErrMode:;
iter->error = 1;
v = errCh;
for (i = iter->next+1; i < iter->slen; i++) if ((iter->data[i] & 0xC0) != 0x80) break;
ofs = i - iter->next;
}
iter->start = iter->next;
iter->next += ofs;
return v;
}
/*
* Returns next read code point for iterator.
*
* iter->data + iter->start points at the characters to be read.
*
* iter->data + iter->next points at the characters that will be read next.
*
* iter->error is boolean indicating whether or not last read contained an error.
*/
cpUcs4 utf8IteratorGetCurrCodePoint (struct utf8Iterator* iter, cpUcs4 errCh) {
unsigned char * chrs;
unsigned char c, d, e;
long v;
if (NULL == iter || iter->next < 0) return errCh;
if (iter->next >= iter->slen) {
iter->start = iter->slen;
return errCh;
}
if (NULL == iter->data || iter->next < 0 || utf8IteratorNoMore(iter)) return errCh;
chrs = iter->data + iter->next;
iter->error = 0;
c = chrs[0];
if (c < 0xC0 || c > 0xFD) {
if (c >= 0x80) goto ErrMode;
v = c;
} else if (c < 0xE0) {
if (iter->next >= iter->slen + 1) goto ErrMode;
v = (c << 6u) - (0x0C0 << 6u);
c = (unsigned char) ((unsigned) chrs[1] - 0x080);
v += c;
if (c >= 0x40 || v < 0x80) goto ErrMode;
} else if (c < 0xF0) {
if (iter->next >= iter->slen + 2) goto ErrMode;
v = (c << 12lu) - (0x0E0 << 12u);
c = (unsigned char) ((unsigned) chrs[1] - 0x080);
d = (unsigned char) ((unsigned) chrs[2] - 0x080);
v += (c << 6u) + d;
if ((c|d) >= 0x40 || v < 0x800 || !isLegalUnicodeCodePoint (v)) goto ErrMode;
} else if (c < 0xF8) {
if (iter->next >= iter->slen + 3) goto ErrMode;
v = (c << 18lu) - (0x0F0 << 18u);
c = (unsigned char) ((unsigned) chrs[1] - 0x080);
d = (unsigned char) ((unsigned) chrs[2] - 0x080);
e = (unsigned char) ((unsigned) chrs[3] - 0x080);
v += (c << 12lu) + (d << 6u) + e;
if ((c|d|e) >= 0x40 || v < 0x10000 || !isLegalUnicodeCodePoint (v)) goto ErrMode;
} else { /* 5 and 6 byte encodings are invalid */
ErrMode:;
iter->error = 1;
v = errCh;
}
return v;
}

62
bstrlib/utf8util.h Normal file
View File

@ -0,0 +1,62 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2015, and is covered by the BSD open source
* license and the GPL. Refer to the accompanying documentation for details
* on usage and license.
*/
/*
* utf8util.h
*
* This file defines the interface to the utf8 utility functions.
*/
#ifndef UTF8_UNICODE_UTILITIES
#define UTF8_UNICODE_UTILITIES
#include <limits.h>
#ifdef __cplusplus
extern "C" {
#endif
#if INT_MAX >= 0x7fffffffUL
typedef int cpUcs4;
#elif LONG_MAX >= 0x7fffffffUL
typedef long cpUcs4;
#else
#error This compiler is not supported
#endif
#if UINT_MAX == 0xFFFF
typedef unsigned int cpUcs2;
#elif USHRT_MAX == 0xFFFF
typedef unsigned short cpUcs2;
#elif UCHAR_MAX == 0xFFFF
typedef unsigned char cpUcs2;
#else
#error This compiler is not supported
#endif
#define isLegalUnicodeCodePoint(v) ((((v) < 0xD800L) || ((v) > 0xDFFFL)) && (((unsigned long)(v)) <= 0x0010FFFFL) && (((v)|0x1F0001) != 0x1FFFFFL))
struct utf8Iterator {
unsigned char* data;
int slen;
int start, next;
int error;
};
#define utf8IteratorNoMore(it) (!(it) || (it)->next >= (it)->slen)
extern void utf8IteratorInit (struct utf8Iterator* iter, unsigned char* data, int slen);
extern void utf8IteratorUninit (struct utf8Iterator* iter);
extern cpUcs4 utf8IteratorGetNextCodePoint (struct utf8Iterator* iter, cpUcs4 errCh);
extern cpUcs4 utf8IteratorGetCurrCodePoint (struct utf8Iterator* iter, cpUcs4 errCh);
extern int utf8ScanBackwardsForCodePoint (unsigned char* msg, int len, int pos, cpUcs4* out);
#ifdef __cplusplus
}
#endif
#endif /* UTF8_UNICODE_UTILITIES */