diff --git a/bstrlib b/bstrlib deleted file mode 160000 index 208b1f2..0000000 --- a/bstrlib +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 208b1f2a4dfc96b806ed499bd1909e87ec15981d diff --git a/bstrlib/LICENSE b/bstrlib/LICENSE new file mode 100644 index 0000000..28ab228 --- /dev/null +++ b/bstrlib/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2014, Paul Hsieh +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of bstrlib nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/bstrlib/README.md b/bstrlib/README.md new file mode 100644 index 0000000..367650b --- /dev/null +++ b/bstrlib/README.md @@ -0,0 +1,34 @@ +The Better String Library + +The Better String Library is an abstraction of a string data type which is +superior to the C library char buffer string type, or C++'s std::string. +Among the features achieved are: + + - Substantial mitigation of buffer overflow/overrun problems and other + failures that result from erroneous usage of the common C string + library functions + + - Significantly simplified string manipulation + + - High performance interoperability with other source/libraries which + expect '\0' terminated char buffers + + - Improved overall performance of common string operations + + - Functional equivalency with other more modern languages + +The library is totally stand alone, portable (known to work with gcc/g++, +MSVC++, Intel C++, WATCOM C/C++, Turbo C, Borland C++, IBM's native CC +compiler on Windows, Linux and Mac OS X), high performance, easy to use and +is not part of some other collection of data structures. Even the file I/O +functions are totally abstracted (so that other stream-like mechanisms, like +sockets, can be used.) Nevertheless, it is adequate as a complete +replacement of the C string library for string manipulation in any C program. + +The library includes a robust C++ wrapper that uses overloaded operators, +rich constructors, exceptions, stream I/O and STL to make the CBString +struct a natural and powerful string abstraction with more functionality and +higher performance than std::string. + +Bstrlib is stable, well tested and suitable for any software production +environment. diff --git a/bstrlib/bsafe.c b/bstrlib/bsafe.c new file mode 100644 index 0000000..dc32e90 --- /dev/null +++ b/bstrlib/bsafe.c @@ -0,0 +1,82 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license. Refer to the accompanying documentation for details on usage and + * license. + */ + +/* + * bsafe.c + * + * This is an optional module that can be used to help enforce a safety + * standard based on pervasive usage of bstrlib. This file is not necessarily + * portable, however, it has been tested to work correctly with Intel's C/C++ + * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. + */ + +#include +#include +#include "bsafe.h" + +static int bsafeShouldExit = 1; + +char * strcpy (char *dst, const char *src); +char * strcat (char *dst, const char *src); + +char * strcpy (char *dst, const char *src) { + (void) dst; + (void) src; + fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * strcat (char *dst, const char *src) { + (void) dst; + (void) src; + fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +char * (gets) (char * buf) { + (void) buf; + fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} +#endif + +char * (strncpy) (char *dst, const char *src, size_t n) { + (void) dst; + (void) src; + (void) n; + fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * (strncat) (char *dst, const char *src, size_t n) { + (void) dst; + (void) src; + (void) n; + fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * (strtok) (char *s1, const char *s2) { + (void) s1; + (void) s2; + fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * (strdup) (const char *s) { + (void) s; + fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} diff --git a/bstrlib/bsafe.h b/bstrlib/bsafe.h new file mode 100644 index 0000000..6ee49cf --- /dev/null +++ b/bstrlib/bsafe.h @@ -0,0 +1,43 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2004, and is covered by the BSD open source + * license. Refer to the accompanying documentation for details on usage and + * license. + */ + +/* + * bsafe.h + * + * This is an optional module that can be used to help enforce a safety + * standard based on pervasive usage of bstrlib. This file is not necessarily + * portable, however, it has been tested to work correctly with Intel's C/C++ + * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. + */ + +#ifndef BSTRLIB_BSAFE_INCLUDE +#define BSTRLIB_BSAFE_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +/* This is caught in the linker, so its not necessary for gcc. */ +extern char * (gets) (char * buf); +#endif + +extern char * (strncpy) (char *dst, const char *src, size_t n); +extern char * (strncat) (char *dst, const char *src, size_t n); +extern char * (strtok) (char *s1, const char *s2); +extern char * (strdup) (const char *s); + +#undef strcpy +#undef strcat +#define strcpy(a,b) bsafe_strcpy(a,b) +#define strcat(a,b) bsafe_strcat(a,b) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bstrlib/bstest.c b/bstrlib/bstest.c new file mode 100644 index 0000000..773768d --- /dev/null +++ b/bstrlib/bstest.c @@ -0,0 +1,3689 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license. Refer to the accompanying documentation for details on usage and + * license. + */ + +/* + * bstest.c + * + * This file is the C unit test for Bstrlib. + */ + +#include +#include +#include +#include +#include +#include "bstrlib.h" +#include "bstraux.h" + +static bstring dumpOut[16]; +static int rot = 0; + +static int incorrectBstring (const struct tagbstring * b) { + if (NULL == b) return 1; + if (NULL == b->data) return 1; + if (b->slen < 0) return 1; + if (b->mlen > 0 && b->slen > b->mlen) return 1; + if (b->data[b->slen] != '\0') return 1; + return 0; +} + +static char * dumpBstring (const struct tagbstring * b) { + rot = (rot + 1) % (unsigned)16; + if (dumpOut[rot] == NULL) { + dumpOut[rot] = bfromcstr (""); + if (dumpOut[rot] == NULL) return "FATAL INTERNAL ERROR"; + } + dumpOut[rot]->slen = 0; + if (b == NULL) { + bcatcstr (dumpOut[rot], "NULL"); + } else { + static char msg[256]; + sprintf (msg, "%p", (void *)b); + bcatcstr (dumpOut[rot], msg); + + if (b->slen < 0) { + sprintf (msg, ":[err:slen=%d<0]", b->slen); + bcatcstr (dumpOut[rot], msg); + } else { + if (b->mlen > 0 && b->mlen < b->slen) { + sprintf (msg, ":[err:mlen=%dmlen, b->slen); + bcatcstr (dumpOut[rot], msg); + } else { + if (b->mlen == -1) { + bcatcstr (dumpOut[rot], "[p]"); + } else if (b->mlen < 0) { + bcatcstr (dumpOut[rot], "[c]"); + } + bcatcstr (dumpOut[rot], ":"); + if (b->data == NULL) { + bcatcstr (dumpOut[rot], "[err:data=NULL]"); + } else { + bcatcstr (dumpOut[rot], "\""); + bcatcstr (dumpOut[rot], (const char *) b->data); + bcatcstr (dumpOut[rot], "\""); + } + } + } + } + return (char *) dumpOut[rot]->data; +} + +static char* dumpCstring (const char* s) { + rot = (rot + 1) % (unsigned)16; + if (dumpOut[rot] == NULL) { + dumpOut[rot] = bfromcstr (""); + if (dumpOut[rot] == NULL) return "FATAL INTERNAL ERROR"; + } + dumpOut[rot]->slen = 0; + if (s == NULL) { + bcatcstr (dumpOut[rot], "NULL"); + } else { + static char msg[64]; + int i; + + sprintf (msg, "cstr[%p] -> ", (void *)s); + bcatcstr (dumpOut[rot], msg); + + bcatStatic (dumpOut[rot], "\""); + for (i = 0; s[i]; i++) { + if (i > 1024) { + bcatStatic (dumpOut[rot], " ..."); + break; + } + bconchar (dumpOut[rot], s[i]); + } + bcatStatic (dumpOut[rot], "\""); + } + + return (char *) dumpOut[rot]->data; +} + +static int test0_0 (const char * s, const char * res) { +bstring b0 = bfromcstr (s); +int ret = 0; + + if (s == NULL) { + if (res != NULL) ret++; + printf (".\tbfromcstr (NULL) = %s\n", dumpBstring (b0)); + return ret; + } + + ret += (res == NULL) || ((int) strlen (res) != b0->slen) + || (0 != memcmp (res, b0->data, b0->slen)); + ret += b0->data[b0->slen] != '\0'; + + printf (".\tbfromcstr (\"%s\") = %s\n", s, dumpBstring (b0)); + bdestroy (b0); + return ret; +} + +static int test0_1 (const char * s, int len, const char * res) { +bstring b0 = bfromcstralloc (len, s); +int ret = 0; + + if (s == NULL) { + if (res != NULL) ret++; + printf (".\tbfromcstralloc (*, NULL) = %s\n", dumpBstring (b0)); + return ret; + } + + ret += (res == NULL) || ((int) strlen (res) != b0->slen) + || (0 != memcmp (res, b0->data, b0->slen)); + ret += b0->data[b0->slen] != '\0'; + ret += len > b0->mlen; + + printf (".\tbfromcstralloc (%d, \"%s\") = %s\n", len, s, dumpBstring (b0)); + bdestroy (b0); + return ret; +} + +#define EMPTY_STRING "" +#define SHORT_STRING "bogus" +#define EIGHT_CHAR_STRING "Waterloo" +#define LONG_STRING "This is a bogus but reasonably long string. Just long enough to cause some mallocing." + +static int test0_2 (char* s) { +int l = s?strlen(s):2; +int i, j, k; +int ret = 0; + + for (i = 0; i < l*2; i++) { + for (j = 0; j < l*2; j++) { + for (k = 0; k <= l; k++) { + char* t = s ? (s + k) : NULL; + bstring b = bfromcstrrangealloc (i, j, t); + if (NULL == b) { + if (i < j && t != NULL) { + printf ("[%d] i = %d, j = %d, l = %d, k = %d\n", __LINE__, i, j, l, k); + } + ret += (i < j && t != NULL); + continue; + } + if (NULL == t) { + printf ("[%d] i = %d, j = %d, l = %d, k = %d\n", __LINE__, i, j, l, k); + ret++; + bdestroy (b); + continue; + } + if (b->data == NULL) { + printf ("[%d] i = %d, j = %d, l = %d, k = %d\n", __LINE__, i, j, l, k); + ret++; + continue; + } + if (b->slen != l-k || b->data[l-k] != '\0' || b->mlen <= b->slen) { + printf ("[%d] i = %d, j = %d, l = %d, k = %d, b->slen = %d\n", __LINE__, i, j, l, k, b->slen); + ret++; + } else if (0 != memcmp (t, b->data, l-k+1)) { + printf ("[%d] \"%s\" != \"%s\"\n", b->data, t); + ret++; + } + bdestroy (b); + continue; + } + } + } + + printf (".\tbfromcstrrangealloc (*,*,%s) correct\n", dumpCstring(s)); + return ret; +} + +static int test0 (void) { +int ret = 0; + + printf ("TEST: bstring bfromcstr (const char * str);\n"); + + /* tests with NULL */ + ret += test0_0 (NULL, NULL); + + /* normal operation tests */ + ret += test0_0 (EMPTY_STRING, EMPTY_STRING); + ret += test0_0 (SHORT_STRING, SHORT_STRING); + ret += test0_0 (LONG_STRING, LONG_STRING); + printf ("\t# failures: %d\n", ret); + + printf ("TEST: bstring bfromcstralloc (int len, const char * str);\n"); + + /* tests with NULL */ + ret += test0_1 (NULL, 0, NULL); + ret += test0_1 (NULL, 30, NULL); + + /* normal operation tests */ + ret += test0_1 (EMPTY_STRING, 0, EMPTY_STRING); + ret += test0_1 (EMPTY_STRING, 30, EMPTY_STRING); + ret += test0_1 (SHORT_STRING, 0, SHORT_STRING); + ret += test0_1 (SHORT_STRING, 30, SHORT_STRING); + ret += test0_1 ( LONG_STRING, 0, LONG_STRING); + ret += test0_1 ( LONG_STRING, 30, LONG_STRING); + + printf ("TEST: bstring bfromcstrrangealloc (int minl, int maxl, const char * str);\n"); + + ret += test0_2 (NULL); + ret += test0_2 (EMPTY_STRING); + ret += test0_2 ( LONG_STRING); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +static int test1_0 (const void * blk, int len, const char * res) { +bstring b0 = blk2bstr (blk, len); +int ret = 0; + if (b0 == NULL) { + if (res != NULL) ret++; + printf (".\tblk2bstr (NULL, len=%d) = %s\n", len, dumpBstring (b0)); + } else { + ret += (res == NULL) || (len != b0->slen) + || (0 != memcmp (res, b0->data, len)); + ret += b0->data[b0->slen] != '\0'; + printf (".\tblk2bstr (blk=%p, len=%d) = %s\n", blk, len, dumpBstring (b0)); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + bdestroy (b0); + return ret; +} + +static int test1 (void) { +int ret = 0; + + printf ("TEST: bstring blk2bstr (const void * blk, int len);\n"); + + /* tests with NULL */ + ret += test1_0 (NULL, 10, NULL); + ret += test1_0 (NULL, 0, NULL); + ret += test1_0 (NULL, -1, NULL); + + /* normal operation tests */ + ret += test1_0 (SHORT_STRING, sizeof (SHORT_STRING)-1, SHORT_STRING); + ret += test1_0 (LONG_STRING, sizeof (LONG_STRING)-1, LONG_STRING); + ret += test1_0 (LONG_STRING, 5, "This "); + ret += test1_0 (LONG_STRING, 0, ""); + ret += test1_0 (LONG_STRING, -1, NULL); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test2_0 (const_bstring b, char z, const unsigned char * res) { +char * s = bstr2cstr (b, z); +int ret = 0; + if (s == NULL) { + if (res != NULL) ret++; + printf (".\tbstr2cstr (%s, %02X) = NULL\n", dumpBstring (b), z); + free(s); + return ret; + } + + if (res == NULL) ret++; + else { + if (z != '\0') if ((int) strlen (s) != b->slen) ret++; + if (!ret) { + ret += (0 != memcmp (res, b->data, b->slen)); + } + } + + printf (".\tbstr2cstr (%s, %02X) = \"%s\"\n", dumpBstring (b), z, s); + free (s); + return ret; +} + +struct tagbstring emptyBstring = bsStatic (""); +struct tagbstring shortBstring = bsStatic ("bogus"); +struct tagbstring longBstring = bsStatic ("This is a bogus but reasonably long string. Just long enough to cause some mallocing."); + +struct tagbstring badBstring1 = {8, 4, NULL}; +struct tagbstring badBstring2 = {2, -5, (unsigned char *) "bogus"}; +struct tagbstring badBstring3 = {2, 5, (unsigned char *) "bogus"}; + +struct tagbstring xxxxxBstring = bsStatic ("xxxxx"); + +static int test2 (void) { +int ret = 0; + + printf ("TEST: char * bstr2cstr (const_bstring s, char z);\n"); + + /* tests with NULL */ + ret += test2_0 (NULL, (char) '?', NULL); + + /* normal operation tests */ + ret += test2_0 (&emptyBstring, (char) '?', emptyBstring.data); + ret += test2_0 (&shortBstring, (char) '?', shortBstring.data); + ret += test2_0 (&longBstring, (char) '?', longBstring.data); + ret += test2_0 (&badBstring1, (char) '?', NULL); + ret += test2_0 (&badBstring2, (char) '?', NULL); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test3_0 (const_bstring b) { +bstring b0 = bstrcpy (b); +int ret = 0; + printf (".\tbstrcpy (%s) = %s\n", dumpBstring (b), dumpBstring (b0)); + if (b0 == NULL) { + if (b != NULL && b->data != NULL && b->slen >= 0) ret++; + } else { + ret += (b == NULL) || (b->slen != b0->slen) + || (0 != memcmp (b->data, b0->data, b->slen)); + ret += b0->data[b0->slen] != '\0'; + } + bdestroy (b0); + return ret; +} + +static int test3 (void) { +int ret = 0; + + printf ("TEST: bstring bstrcpy (const_bstring b1);\n"); + + /* tests with NULL to make sure that there is NULL propogation */ + ret += test3_0 (NULL); + ret += test3_0 (&badBstring1); + ret += test3_0 (&badBstring2); + + /* normal operation tests */ + ret += test3_0 (&emptyBstring); + ret += test3_0 (&shortBstring); + ret += test3_0 (&longBstring); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test4_0 (const_bstring b, int left, int len, const char * res) { +bstring b0 = bmidstr (b, left, len); +int ret = 0; + printf (".\tbmidstr (%s, %d, %d) = %s\n", dumpBstring (b), left, len, dumpBstring (b0)); + if (b0 == NULL) { + if (b != NULL && b->data != NULL && b->slen >= 0 && len >= 0) ret++; + } else { + ret += (b == NULL) || (res == NULL) || (b0->slen > len && len >= 0) + || (b0->slen != (int) strlen (res)) + || (b0->slen > 0 && 0 != memcmp (res, b0->data, b0->slen)); + ret += b0->data[b0->slen] != '\0'; + } + if (ret) { + printf ("(b == NULL) = %d\n", (b == NULL)); + printf ("(res == NULL) = %d\n", (res == NULL)); + printf ("(b0->slen > len && len >= 0) = %d\n", (b0->slen > len && len >= 0)); + if (res) printf ("(b0->slen != strlen (res)) = %d\n", (b0->slen != (int) strlen (res))); + printf ("(b0->slen > 0 && 0 != memcmp (res, b0->data, b0->slen) = %d\n", (b0->slen > 0 && 0 != memcmp (res, b0->data, b0->slen))); + + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + bdestroy (b0); + return ret; +} + +static int test4 (void) { +int ret = 0; + + printf ("TEST: bstring bmidstr (const_bstring b, int left, int len);\n"); + + /* tests with NULL to make sure that there is NULL propogation */ + ret += test4_0 (NULL, 0, 0, NULL); + ret += test4_0 (NULL, 0, 2, NULL); + ret += test4_0 (NULL, 0, -2, NULL); + ret += test4_0 (NULL, -5, 2, NULL); + ret += test4_0 (NULL, -5, -2, NULL); + ret += test4_0 (&badBstring1, 1, 3, NULL); + ret += test4_0 (&badBstring2, 1, 3, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test4_0 (&emptyBstring, 0, 0, ""); + ret += test4_0 (&emptyBstring, 0, -1, ""); + ret += test4_0 (&emptyBstring, 1, 3, ""); + ret += test4_0 (&shortBstring, 0, 0, ""); + ret += test4_0 (&shortBstring, 0, -1, ""); + ret += test4_0 (&shortBstring, 1, 3, "ogu"); + ret += test4_0 (&shortBstring, -1, 3, "bo"); + ret += test4_0 (&shortBstring, -1, 9, "bogus"); + ret += test4_0 (&shortBstring, 3, -1, ""); + ret += test4_0 (&shortBstring, 9, 3, ""); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test5_0 (bstring b0, const_bstring b1, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + b1 != NULL && b1->data != NULL && b1->slen >= 0 ) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbconcat (%s, ", dumpBstring (b2)); + + rv = bconcat (b2, b1); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%s) = %s\n", dumpBstring (b1), dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbconcat (%s, ", dumpBstring (b2)); + + rv = bconcat (b2, b1); + + printf ("%s) = %s\n", dumpBstring (b1), dumpBstring (b2)); + + if (b1) ret += (b2->slen != b0->slen + b1->slen); + ret += ((0 != rv) && (b1 != NULL)) || ((0 == rv) && (b1 == NULL)); + ret += (res == NULL) || ((int) strlen (res) > b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bconcat (b0, b1))); + printf (".\tbconcat (%s, %s) = %d\n", dumpBstring (b0), dumpBstring (b1), rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test5_1 (void) { +bstring b, c; +struct tagbstring t; +int i, ret; + + printf ("TEST: bconcat aliasing\n"); + for (ret=i=0; i < longBstring.slen; i++) { + b = bstrcpy (&longBstring); + c = bstrcpy (&longBstring); + bmid2tbstr (t, b, i, longBstring.slen); + ret += 0 != bconcat (c, &t); + ret += 0 != bconcat (b, &t); + ret += !biseq (b, c); + bdestroy (b); + bdestroy (c); + } + + b = bfromcstr ("abcd"); + c = bfromcstr ("abcd"); + + for (ret=i=0; i < 100; i++) { + bmid2tbstr (t, b, 0, 3); + ret += 0 != bcatcstr (c, "abc"); + ret += 0 != bconcat (b, &t); + ret += !biseq (b, c); + } + + bdestroy (b); + bdestroy (c); + + if (ret) { + printf ("\t\talias failures(%d) = %d\n", __LINE__, ret); + } + + return ret; +} + +static int test5 (void) { +int ret = 0; + + printf ("TEST: int bconcat (bstring b0, const_bstring b1);\n"); + + /* tests with NULL */ + ret += test5_0 (NULL, NULL, NULL); + ret += test5_0 (NULL, &emptyBstring, NULL); + ret += test5_0 (&emptyBstring, NULL, ""); + ret += test5_0 (&emptyBstring, &badBstring1, NULL); + ret += test5_0 (&emptyBstring, &badBstring2, NULL); + ret += test5_0 (&badBstring1, &emptyBstring, NULL); + ret += test5_0 (&badBstring2, &emptyBstring, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test5_0 (&emptyBstring, &emptyBstring, ""); + ret += test5_0 (&emptyBstring, &shortBstring, "bogus"); + ret += test5_0 (&shortBstring, &emptyBstring, "bogus"); + ret += test5_0 (&shortBstring, &shortBstring, "bogusbogus"); + + ret += test5_1 (); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test6_0 (bstring b, char c, const char * res) { +bstring b0; +int rv, ret = 0; + + if (b != NULL && b->data != NULL && b->slen >= 0) { + b0 = bstrcpy (b); + bwriteprotect (*b0); + rv = bconchar (b0, c); + ret += (rv == 0); + if (!biseq (b0, b)) ret++; + + printf (".\tbconchar (%s, %c) = %s\n", dumpBstring (b), c, dumpBstring (b0)); + + bwriteallow (*b0); + rv = bconchar (b0, c); + ret += (0 != rv); + ret += (b0->slen != b->slen + 1); + ret += (res == NULL) || ((int) strlen (res) > b0->slen) + || (0 != memcmp (b0->data, res, b0->slen)); + ret += b0->data[b0->slen] != '\0'; + printf (".\tbconchar (%s, %c) = %s\n", dumpBstring (b), c, dumpBstring (b0)); + + bdestroy (b0); + } else { + ret += (BSTR_ERR != (rv = bconchar (b, c))); + printf (".\tbconchar (%s, %c) = %d\n", dumpBstring (b), c, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test6 (void) { +int ret = 0; + + printf ("TEST: int bconchar (bstring b, char c);\n"); + + /* tests with NULL */ + ret += test6_0 (NULL, (char) 'x', NULL); + ret += test6_0 (&badBstring1, (char) 'x', NULL); + ret += test6_0 (&badBstring2, (char) 'x', NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test6_0 (&emptyBstring, (char) 'x', "x"); + ret += test6_0 (&shortBstring, (char) 'x', "bogusx"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test7x8_0 (char * fnname, int (* fnptr) (const struct tagbstring *, const struct tagbstring *), const struct tagbstring * b0, const struct tagbstring * b1, int res) { +int rv, ret = 0; + + ret += (res != (rv = fnptr (b0, b1))); + printf (".\t%s (%s, %s) = %d\n", fnname, dumpBstring (b0), dumpBstring (b1), rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test7x8 (char * fnname, int (* fnptr) (const struct tagbstring *, const struct tagbstring *), + int retFail, int retLT, int retGT, int retEQ) { +int ret = 0; + + printf ("TEST: int %s (const_bstring b0, const_bstring b1);\n", fnname); + + /* tests with NULL */ + ret += test7x8_0 (fnname, fnptr, NULL, NULL, retFail); + ret += test7x8_0 (fnname, fnptr, &emptyBstring, NULL, retFail); + ret += test7x8_0 (fnname, fnptr, NULL, &emptyBstring, retFail); + ret += test7x8_0 (fnname, fnptr, &shortBstring, NULL, retFail); + ret += test7x8_0 (fnname, fnptr, NULL, &shortBstring, retFail); + ret += test7x8_0 (fnname, fnptr, &badBstring1, &badBstring1, retFail); + ret += test7x8_0 (fnname, fnptr, &badBstring2, &badBstring2, retFail); + ret += test7x8_0 (fnname, fnptr, &shortBstring, &badBstring2, retFail); + ret += test7x8_0 (fnname, fnptr, &badBstring2, &shortBstring, retFail); + + /* normal operation tests on all sorts of subranges */ + ret += test7x8_0 (fnname, fnptr, &emptyBstring, &emptyBstring, retEQ); + ret += test7x8_0 (fnname, fnptr, &shortBstring, &emptyBstring, retGT); + ret += test7x8_0 (fnname, fnptr, &emptyBstring, &shortBstring, retLT); + ret += test7x8_0 (fnname, fnptr, &shortBstring, &shortBstring, retEQ); + + { + bstring b = bstrcpy (&shortBstring); + b->data[1]++; + ret += test7x8_0 (fnname, fnptr, b, &shortBstring, retGT); + bdestroy (b); + } + + if (fnptr == biseq) { + ret += test7x8_0 (fnname, fnptr, &shortBstring, &longBstring, retGT); + ret += test7x8_0 (fnname, fnptr, &longBstring, &shortBstring, retLT); + } else { + ret += test7x8_0 (fnname, fnptr, &shortBstring, &longBstring, 'b'-'T'); + ret += test7x8_0 (fnname, fnptr, &longBstring, &shortBstring, 'T'-'b'); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +#define test7() test7x8 ("biseq", biseq, -1, 0, 0, 1) +#define test8() test7x8 ("bstrcmp", bstrcmp, SHRT_MIN, -1, 1, 0) + +static int test47_0 (const struct tagbstring* b, const unsigned char* blk, int len, int res) { +int rv, ret = 0; + + ret += (res != (rv = biseqblk (b, blk, len))); + printf (".\tbiseqblk (%s, %s) = %d\n", dumpBstring (b), dumpCstring (blk), rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test47 (void) { +int ret = 0; + + printf ("TEST: int biseqblk (const_bstring b, const void * blk, int len);\n"); + + /* tests with NULL */ + ret += test47_0 (NULL, NULL, 0, -1); + ret += test47_0 (&emptyBstring, NULL, 0, -1); + ret += test47_0 (NULL, emptyBstring.data, 0, -1); + ret += test47_0 (&shortBstring, NULL, shortBstring.slen, -1); + ret += test47_0 (NULL, shortBstring.data, 0, -1); + ret += test47_0 (&badBstring1, badBstring1.data, badBstring1.slen, -1); + ret += test47_0 (&badBstring2, badBstring2.data, badBstring2.slen, -1); + ret += test47_0 (&shortBstring, badBstring2.data, badBstring2.slen, -1); + ret += test47_0 (&badBstring2, shortBstring.data, shortBstring.slen, -1); + + /* normal operation tests on all sorts of subranges */ + ret += test47_0 (&emptyBstring, emptyBstring.data, emptyBstring.slen, 1); + ret += test47_0 (&shortBstring, emptyBstring.data, emptyBstring.slen, 0); + ret += test47_0 (&emptyBstring, shortBstring.data, shortBstring.slen, 0); + ret += test47_0 (&shortBstring, shortBstring.data, shortBstring.slen, 1); + + { + bstring b = bstrcpy (&shortBstring); + b->data[1]++; + ret += test47_0 (b, shortBstring.data, shortBstring.slen, 0); + bdestroy (b); + } + ret += test47_0 (&shortBstring, longBstring.data, longBstring.slen, 0); + ret += test47_0 (&longBstring, shortBstring.data, shortBstring.slen, 0); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test9_0 (const_bstring b0, const_bstring b1, int n, int res) { +int rv, ret = 0; + + ret += (res != (rv = bstrncmp (b0, b1, n))); + printf (".\tbstrncmp (%s, %s, %d) = %d\n", dumpBstring (b0), dumpBstring (b1), n, rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test9 (void) { +int ret = 0; + + printf ("TEST: int bstrncmp (const_bstring b0, const_bstring b1, int n);\n"); + + /* tests with NULL */ + ret += test9_0 (NULL, NULL, 0, SHRT_MIN); + ret += test9_0 (NULL, NULL, -1, SHRT_MIN); + ret += test9_0 (NULL, NULL, 1, SHRT_MIN); + ret += test9_0 (&emptyBstring, NULL, 0, SHRT_MIN); + ret += test9_0 (NULL, &emptyBstring, 0, SHRT_MIN); + ret += test9_0 (&emptyBstring, NULL, 1, SHRT_MIN); + ret += test9_0 (NULL, &emptyBstring, 1, SHRT_MIN); + ret += test9_0 (&badBstring1, &badBstring1, 1, SHRT_MIN); + ret += test9_0 (&badBstring2, &badBstring2, 1, SHRT_MIN); + ret += test9_0 (&emptyBstring, &badBstring1, 1, SHRT_MIN); + ret += test9_0 (&emptyBstring, &badBstring2, 1, SHRT_MIN); + ret += test9_0 (&badBstring1, &emptyBstring, 1, SHRT_MIN); + ret += test9_0 (&badBstring2, &emptyBstring, 1, SHRT_MIN); + + /* normal operation tests on all sorts of subranges */ + ret += test9_0 (&emptyBstring, &emptyBstring, -1, 0); + ret += test9_0 (&emptyBstring, &emptyBstring, 0, 0); + ret += test9_0 (&emptyBstring, &emptyBstring, 1, 0); + ret += test9_0 (&shortBstring, &shortBstring, -1, 0); + ret += test9_0 (&shortBstring, &shortBstring, 0, 0); + ret += test9_0 (&shortBstring, &shortBstring, 1, 0); + ret += test9_0 (&shortBstring, &shortBstring, 9, 0); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test10_0 (bstring b, int res, int nochange) { +struct tagbstring sb = bsStatic (""); +int rv, x, ret = 0; + + if (b) sb = *b; + printf (".\tbdestroy (%s) = ", dumpBstring (b)); + rv = bdestroy (b); + printf ("%d\n", rv); + + if (b != NULL) { + if (rv >= 0) + /* If the bdestroy was successful we have to assume + the contents were "changed" */ + x = 1; + else + x = memcmp (&sb, b, sizeof sb); + } else x = !nochange; + ret += (rv != res); + ret += (!nochange) == (!x); + if (ret) { + printf ("\t\tfailure(%d) res = %d nochange = %d, x = %d, sb.slen = %d, sb.mlen = %d, sb.data = %p\n", __LINE__, res, nochange, x, sb.slen, sb.mlen, sb.data); + } + return ret; +} + +static int test10 (void) { +bstring c = bstrcpy (&shortBstring); +bstring b = bstrcpy (&emptyBstring); +int ret = 0; + + printf ("TEST: int bdestroy (const_bstring b);\n"); + /* tests with NULL */ + ret += test10_0 (NULL, BSTR_ERR, 1); + + /* protected, constant and regular instantiations on empty or not */ + bwriteprotect (*b); + bwriteprotect (*c); + ret += test10_0 (b, BSTR_ERR, 1); + ret += test10_0 (c, BSTR_ERR, 1); + bwriteallow (*b); + bwriteallow (*c); + ret += test10_0 (b, BSTR_OK, 0); + ret += test10_0 (c, BSTR_OK, 0); + ret += test10_0 (&emptyBstring, BSTR_ERR, 1); + bwriteallow (emptyBstring); + ret += test10_0 (&emptyBstring, BSTR_ERR, 1); + ret += test10_0 (&shortBstring, BSTR_ERR, 1); + bwriteallow (emptyBstring); + ret += test10_0 (&shortBstring, BSTR_ERR, 1); + ret += test10_0 (&badBstring1, BSTR_ERR, 1); + ret += test10_0 (&badBstring2, BSTR_ERR, 1); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test11_0 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbinstr (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = binstr (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test11_1 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbinstrcaseless (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = binstrcaseless (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test11 (void) { +bstring b, c; +int ret = 0; + + printf ("TEST: int binstr (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test11_0 (NULL, 0, NULL, BSTR_ERR); + ret += test11_0 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test11_0 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test11_0 (&emptyBstring, 0, &badBstring1, BSTR_ERR); + ret += test11_0 (&emptyBstring, 0, &badBstring2, BSTR_ERR); + ret += test11_0 (&badBstring1, 0, &emptyBstring, BSTR_ERR); + ret += test11_0 (&badBstring2, 0, &emptyBstring, BSTR_ERR); + ret += test11_0 (&badBstring1, 0, &badBstring2, BSTR_ERR); + ret += test11_0 (&badBstring2, 0, &badBstring1, BSTR_ERR); + + ret += test11_0 (&emptyBstring, 0, &emptyBstring, 0); + ret += test11_0 (&emptyBstring, 1, &emptyBstring, BSTR_ERR); + ret += test11_0 (&shortBstring, 1, &shortBstring, BSTR_ERR); + ret += test11_0 (&shortBstring, 5, &emptyBstring, 5); + ret += test11_0 (&shortBstring, -1, &shortBstring, BSTR_ERR); + ret += test11_0 (&shortBstring, 0, &shortBstring, 0); + ret += test11_0 (&shortBstring, 0, b = bstrcpy (&shortBstring), 0); + bdestroy (b); + ret += test11_0 (&shortBstring, 0, b = bfromcstr ("BOGUS"), BSTR_ERR); + bdestroy (b); + ret += test11_0 (&longBstring, 0, &shortBstring, 10); + ret += test11_0 (&longBstring, 20, &shortBstring, BSTR_ERR); + + ret += test11_0 (c = bfromcstr ("sssssssssap"), 0, b = bfromcstr ("sap"), 8); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("sssssssssap"), 3, b = bfromcstr ("sap"), 8); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("ssssssssssap"), 3, b = bfromcstr ("sap"), 9); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("sssssssssap"), 0, b = bfromcstr ("s"), 0); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("sssssssssap"), 3, b = bfromcstr ("s"), 3); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("sssssssssap"), 0, b = bfromcstr ("a"), 9); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("sssssssssap"), 5, b = bfromcstr ("a"), 9); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("sasasasasap"), 0, b = bfromcstr ("sap"), 8); + bdestroy (c); + bdestroy (b); + ret += test11_0 (c = bfromcstr ("ssasasasasap"), 0, b = bfromcstr ("sap"), 9); + bdestroy (c); + bdestroy (b); + + printf ("TEST: int binstrcaseless (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test11_1 (NULL, 0, NULL, BSTR_ERR); + ret += test11_1 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test11_1 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test11_1 (&emptyBstring, 0, &badBstring1, BSTR_ERR); + ret += test11_1 (&emptyBstring, 0, &badBstring2, BSTR_ERR); + ret += test11_1 (&badBstring1, 0, &emptyBstring, BSTR_ERR); + ret += test11_1 (&badBstring2, 0, &emptyBstring, BSTR_ERR); + ret += test11_1 (&badBstring1, 0, &badBstring2, BSTR_ERR); + ret += test11_1 (&badBstring2, 0, &badBstring1, BSTR_ERR); + + ret += test11_1 (&emptyBstring, 0, &emptyBstring, 0); + ret += test11_1 (&emptyBstring, 1, &emptyBstring, BSTR_ERR); + ret += test11_1 (&shortBstring, 1, &shortBstring, BSTR_ERR); + ret += test11_1 (&shortBstring, 5, &emptyBstring, 5); + ret += test11_1 (&shortBstring, -1, &shortBstring, BSTR_ERR); + ret += test11_1 (&shortBstring, 0, &shortBstring, 0); + ret += test11_1 (&shortBstring, 0, b = bstrcpy (&shortBstring), 0); + bdestroy (b); + ret += test11_1 (&shortBstring, 0, b = bfromcstr ("BOGUS"), 0); + bdestroy (b); + ret += test11_1 (&longBstring, 0, &shortBstring, 10); + ret += test11_1 (&longBstring, 20, &shortBstring, BSTR_ERR); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test12_0 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbinstrr (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = binstrr (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test12_1 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbinstrrcaseless (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = binstrrcaseless (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test12 (void) { +bstring b; +int ret = 0; + + printf ("TEST: int binstrr (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test12_0 (NULL, 0, NULL, BSTR_ERR); + ret += test12_0 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test12_0 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test12_0 (&emptyBstring, 0, &badBstring1, BSTR_ERR); + ret += test12_0 (&emptyBstring, 0, &badBstring2, BSTR_ERR); + ret += test12_0 (&badBstring1, 0, &emptyBstring, BSTR_ERR); + ret += test12_0 (&badBstring2, 0, &emptyBstring, BSTR_ERR); + ret += test12_0 (&badBstring1, 0, &badBstring2, BSTR_ERR); + ret += test12_0 (&badBstring2, 0, &badBstring1, BSTR_ERR); + + ret += test12_0 (&emptyBstring, 0, &emptyBstring, 0); + ret += test12_0 (&emptyBstring, 1, &emptyBstring, BSTR_ERR); + ret += test12_0 (&shortBstring, 1, &shortBstring, 0); + ret += test12_0 (&shortBstring, 5, &emptyBstring, 5); + ret += test12_0 (&shortBstring, -1, &shortBstring, BSTR_ERR); + ret += test12_0 (&shortBstring, 0, &shortBstring, 0); + ret += test12_0 (&shortBstring, 0, b = bstrcpy (&shortBstring), 0); + bdestroy (b); + ret += test12_0 (&shortBstring, 0, b = bfromcstr ("BOGUS"), BSTR_ERR); + bdestroy (b); + ret += test12_0 (&longBstring, 0, &shortBstring, BSTR_ERR); + ret += test12_0 (&longBstring, 20, &shortBstring, 10); + + printf ("TEST: int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test12_1 (NULL, 0, NULL, BSTR_ERR); + ret += test12_1 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test12_1 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test12_1 (&emptyBstring, 0, &badBstring1, BSTR_ERR); + ret += test12_1 (&emptyBstring, 0, &badBstring2, BSTR_ERR); + ret += test12_1 (&badBstring1, 0, &emptyBstring, BSTR_ERR); + ret += test12_1 (&badBstring2, 0, &emptyBstring, BSTR_ERR); + ret += test12_1 (&badBstring1, 0, &badBstring2, BSTR_ERR); + ret += test12_1 (&badBstring2, 0, &badBstring1, BSTR_ERR); + + ret += test12_1 (&emptyBstring, 0, &emptyBstring, 0); + ret += test12_1 (&emptyBstring, 1, &emptyBstring, BSTR_ERR); + ret += test12_1 (&shortBstring, 1, &shortBstring, 0); + ret += test12_1 (&shortBstring, 5, &emptyBstring, 5); + ret += test12_1 (&shortBstring, -1, &shortBstring, BSTR_ERR); + ret += test12_1 (&shortBstring, 0, &shortBstring, 0); + ret += test12_1 (&shortBstring, 0, b = bstrcpy (&shortBstring), 0); + bdestroy (b); + ret += test12_1 (&shortBstring, 0, b = bfromcstr ("BOGUS"), 0); + bdestroy (b); + ret += test12_1 (&longBstring, 0, &shortBstring, BSTR_ERR); + ret += test12_1 (&longBstring, 20, &shortBstring, 10); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test13_0 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbinchr (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = binchr (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test13 (void) { +bstring b; +int ret = 0; +struct tagbstring multipleOs = bsStatic ("ooooo"); + + printf ("TEST: int binchr (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test13_0 (NULL, 0, NULL, BSTR_ERR); + ret += test13_0 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test13_0 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test13_0 (&emptyBstring, 0, &badBstring1, BSTR_ERR); + ret += test13_0 (&emptyBstring, 0, &badBstring2, BSTR_ERR); + ret += test13_0 (&badBstring1, 0, &emptyBstring, BSTR_ERR); + ret += test13_0 (&badBstring2, 0, &emptyBstring, BSTR_ERR); + ret += test13_0 (&badBstring2, 0, &badBstring1, BSTR_ERR); + ret += test13_0 (&badBstring1, 0, &badBstring2, BSTR_ERR); + + ret += test13_0 (&emptyBstring, 0, &emptyBstring, BSTR_ERR); + ret += test13_0 (&shortBstring, 0, &emptyBstring, BSTR_ERR); + ret += test13_0 (&shortBstring, 0, &shortBstring, 0); + ret += test13_0 (&shortBstring, 0, &multipleOs, 1); + ret += test13_0 (&shortBstring, 0, b = bstrcpy (&shortBstring), 0); + bdestroy (b); + ret += test13_0 (&shortBstring, -1, &shortBstring, BSTR_ERR); + ret += test13_0 (&shortBstring, 10, &shortBstring, BSTR_ERR); + ret += test13_0 (&shortBstring, 1, &shortBstring, 1); + ret += test13_0 (&emptyBstring, 0, &shortBstring, BSTR_ERR); + ret += test13_0 (&xxxxxBstring, 0, &shortBstring, BSTR_ERR); + ret += test13_0 (&longBstring, 0, &shortBstring, 3); + ret += test13_0 (&longBstring, 10, &shortBstring, 10); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test14_0 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbinchrr (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = binchrr (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test14 (void) { +bstring b; +int ret = 0; + + printf ("TEST: int binchrr (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test14_0 (NULL, 0, NULL, BSTR_ERR); + ret += test14_0 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test14_0 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test14_0 (&emptyBstring, 0, &emptyBstring, BSTR_ERR); + ret += test14_0 (&shortBstring, 0, &emptyBstring, BSTR_ERR); + ret += test14_0 (&emptyBstring, 0, &badBstring1, BSTR_ERR); + ret += test14_0 (&emptyBstring, 0, &badBstring2, BSTR_ERR); + ret += test14_0 (&badBstring1, 0, &emptyBstring, BSTR_ERR); + ret += test14_0 (&badBstring2, 0, &emptyBstring, BSTR_ERR); + ret += test14_0 (&badBstring2, 0, &badBstring1, BSTR_ERR); + ret += test14_0 (&badBstring1, 0, &badBstring2, BSTR_ERR); + + ret += test14_0 (&shortBstring, 0, &shortBstring, 0); + ret += test14_0 (&shortBstring, 0, b = bstrcpy (&shortBstring), 0); + bdestroy (b); + ret += test14_0 (&shortBstring, -1, &shortBstring, BSTR_ERR); + ret += test14_0 (&shortBstring, 5, &shortBstring, 4); + ret += test14_0 (&shortBstring, 4, &shortBstring, 4); + ret += test14_0 (&shortBstring, 1, &shortBstring, 1); + ret += test14_0 (&emptyBstring, 0, &shortBstring, BSTR_ERR); + ret += test14_0 (&xxxxxBstring, 4, &shortBstring, BSTR_ERR); + ret += test14_0 (&longBstring, 0, &shortBstring, BSTR_ERR); + ret += test14_0 (&longBstring, 10, &shortBstring, 10); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test15_0 (bstring b0, int pos, const_bstring b1, unsigned char fill, char * res) { +bstring b2; +int rv, ret = 0, linenum = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + b1 != NULL && b1->data != NULL && b1->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbsetstr (%s, ", dumpBstring (b2)); + + rv = bsetstr (b2, pos, b1, fill); + ret += (rv == 0); if (ret && 0 == linenum) linenum = __LINE__; + if (!biseq (b0, b2)) ret++; if (ret && 0 == linenum) linenum = __LINE__; + + printf ("%d, %s, %02X) = %s\n", pos, dumpBstring (b1), fill, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbsetstr (%s, ", dumpBstring (b2)); + + rv = bsetstr (b2, pos, b1, fill); + if (b1) { + ret += (pos >= 0) && (b2->slen != b0->slen + b1->slen) && (b2->slen != pos + b1->slen); if (ret && 0 == linenum) linenum = __LINE__; + ret += (pos < 0) && (b2->slen != b0->slen); if (ret && 0 == linenum) linenum = __LINE__; + } + + ret += ((rv == 0) != (pos >= 0)); if (ret && 0 == linenum) linenum = __LINE__; + ret += (res == NULL); if (ret && 0 == linenum) linenum = __LINE__; + ret += ((int) strlen (res) > b2->slen); if (ret && 0 == linenum) linenum = __LINE__; + ret += (0 != memcmp (b2->data, res, b2->slen)); if (ret && 0 == linenum) linenum = __LINE__; + ret += b2->data[b2->slen] != '\0'; if (ret && 0 == linenum) linenum = __LINE__; + + printf ("%d, %s, %02X) = %s\n", pos, dumpBstring (b1), fill, dumpBstring (b2)); + + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bsetstr (b0, pos, b1, fill))); if (ret && 0 == linenum) linenum = __LINE__; + printf (".\tbsetstr (%s, %d, %s, %02X) = %d\n", dumpBstring (b0), pos, dumpBstring (b1), fill, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", linenum, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test15 (void) { +int ret = 0; + printf ("TEST: int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);\n"); + /* tests with NULL */ + ret += test15_0 (NULL, 0, NULL, (unsigned char) '?', NULL); + ret += test15_0 (NULL, 0, &emptyBstring, (unsigned char) '?', NULL); + ret += test15_0 (&badBstring1, 0, NULL, (unsigned char) '?', NULL); + ret += test15_0 (&badBstring1, 0, &badBstring1, (unsigned char) '?', NULL); + ret += test15_0 (&emptyBstring, 0, &badBstring1, (unsigned char) '?', NULL); + ret += test15_0 (&badBstring1, 0, &emptyBstring, (unsigned char) '?', NULL); + ret += test15_0 (&badBstring2, 0, NULL, (unsigned char) '?', NULL); + ret += test15_0 (&badBstring2, 0, &badBstring2, (unsigned char) '?', NULL); + ret += test15_0 (&emptyBstring, 0, &badBstring2, (unsigned char) '?', NULL); + ret += test15_0 (&badBstring2, 0, &emptyBstring, (unsigned char) '?', NULL); + + /* normal operation tests */ + ret += test15_0 (&emptyBstring, 0, &emptyBstring, (unsigned char) '?', ""); + ret += test15_0 (&emptyBstring, 5, &emptyBstring, (unsigned char) '?', "?????"); + ret += test15_0 (&emptyBstring, 5, &shortBstring, (unsigned char) '?', "?????bogus"); + ret += test15_0 (&shortBstring, 0, &emptyBstring, (unsigned char) '?', "bogus"); + ret += test15_0 (&emptyBstring, 0, &shortBstring, (unsigned char) '?', "bogus"); + ret += test15_0 (&shortBstring, 0, &shortBstring, (unsigned char) '?', "bogus"); + ret += test15_0 (&shortBstring, -1, &shortBstring, (unsigned char) '?', "bogus"); + ret += test15_0 (&shortBstring, 2, &shortBstring, (unsigned char) '?', "bobogus"); + ret += test15_0 (&shortBstring, 6, &shortBstring, (unsigned char) '?', "bogus?bogus"); + ret += test15_0 (&shortBstring, 6, NULL, (unsigned char) '?', "bogus?"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test16_0 (bstring b0, int pos, const_bstring b1, unsigned char fill, char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + b1 != NULL && b1->data != NULL && b1->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbinsert (%s, ", dumpBstring (b2)); + + rv = binsert (b2, pos, b1, fill); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%d, %s, %02X) = %s\n", pos, dumpBstring (b1), fill, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbinsert (%s, ", dumpBstring (b2)); + + rv = binsert (b2, pos, b1, fill); + if (b1) { + ret += (pos >= 0) && (b2->slen != b0->slen + b1->slen) && (b2->slen != pos + b1->slen); + ret += (pos < 0) && (b2->slen != b0->slen); + ret += ((rv == 0) != (pos >= 0 && pos <= b2->slen)); + } + + ret += (res == NULL) || ((int) strlen (res) > b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + + printf ("%d, %s, %02X) = %s\n", pos, dumpBstring (b1), fill, dumpBstring (b2)); + + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = binsert (b0, pos, b1, fill))); + printf (".\tbinsert (%s, %d, %s, %02X) = %d\n", dumpBstring (b0), pos, dumpBstring (b1), fill, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test16_1 (void) { +bstring b0 = bfromStatic ("aaaaabbbbb"); +struct tagbstring b1; +int res, ret = 0; + + bmid2tbstr (b1, b0, 4, 4); + b0->slen = 6; + + printf (".\tbinsert (%s, 2, %s, '?') = ", dumpBstring (b0), dumpBstring (&b1)); + res = binsert (b0, 2, &b1, '?'); + printf ("%s (Alias test)\n", dumpBstring (b0)); + + ret += (res != 0); + ret += !biseqStatic(b0, "aaabbbaaab"); + + return ret; +} + +static int test16 (void) { +int ret = 0; + printf ("TEST: int binsert (bstring b0, int pos, const_bstring b1, unsigned char fill);\n"); + /* tests with NULL */ + ret += test16_0 (NULL, 0, NULL, (unsigned char) '?', NULL); + ret += test16_0 (NULL, 0, &emptyBstring, (unsigned char) '?', NULL); + ret += test16_0 (&badBstring1, 0, NULL, (unsigned char) '?', NULL); + ret += test16_0 (&badBstring1, 0, &badBstring1, (unsigned char) '?', NULL); + ret += test16_0 (&emptyBstring, 0, &badBstring1, (unsigned char) '?', NULL); + ret += test16_0 (&badBstring1, 0, &emptyBstring, (unsigned char) '?', NULL); + ret += test16_0 (&badBstring2, 0, NULL, (unsigned char) '?', NULL); + ret += test16_0 (&badBstring2, 0, &badBstring2, (unsigned char) '?', NULL); + ret += test16_0 (&emptyBstring, 0, &badBstring2, (unsigned char) '?', NULL); + ret += test16_0 (&badBstring2, 0, &emptyBstring, (unsigned char) '?', NULL); + + /* normal operation tests */ + ret += test16_0 (&emptyBstring, 0, &emptyBstring, (unsigned char) '?', ""); + ret += test16_0 (&emptyBstring, 5, &emptyBstring, (unsigned char) '?', "?????"); + ret += test16_0 (&emptyBstring, 5, &shortBstring, (unsigned char) '?', "?????bogus"); + ret += test16_0 (&shortBstring, 0, &emptyBstring, (unsigned char) '?', "bogus"); + ret += test16_0 (&emptyBstring, 0, &shortBstring, (unsigned char) '?', "bogus"); + ret += test16_0 (&shortBstring, 0, &shortBstring, (unsigned char) '?', "bogusbogus"); + ret += test16_0 (&shortBstring, -1, &shortBstring, (unsigned char) '?', "bogus"); + ret += test16_0 (&shortBstring, 2, &shortBstring, (unsigned char) '?', "bobogusgus"); + ret += test16_0 (&shortBstring, 6, &shortBstring, (unsigned char) '?', "bogus?bogus"); + ret += test16_0 (&shortBstring, 6, NULL, (unsigned char) '?', "bogus"); + + /* Alias testing */ + ret += test16_1 (); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test17_0 (bstring s1, int pos, int len, char * res) { +bstring b2; +int rv, ret = 0; + + if (s1 != NULL && s1->data != NULL && s1->slen >= 0) { + b2 = bstrcpy (s1); + bwriteprotect (*b2); + + printf (".\tbdelete (%s, ", dumpBstring (b2)); + + rv = bdelete (b2, pos, len); + ret += (rv == 0); + if (!biseq (s1, b2)) ret++; + + printf ("%d, %d) = %s\n", pos, len, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbdelete (%s, ", dumpBstring (b2)); + + rv = bdelete (b2, pos, len); + ret += (len >= 0) != (rv == 0); + ret += (b2->slen > s1->slen) || (b2->slen < pos && s1->slen >= pos); + + ret += (res == NULL) || ((int) strlen (res) > b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + + printf ("%d, %d) = %s\n", pos, len, dumpBstring (b2)); + + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bdelete (s1, pos, len))); + printf (".\tbdelete (%s, %d, %d) = %d\n", dumpBstring (s1), pos, len, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test17 (void) { +int ret = 0; + printf ("TEST: int bdelete (bstring s1, int pos, int len);\n"); + /* tests with NULL */ + ret += test17_0 (NULL, 0, 0, NULL); + ret += test17_0 (&badBstring1, 0, 0, NULL); + ret += test17_0 (&badBstring2, 0, 0, NULL); + + /* normal operation tests */ + ret += test17_0 (&emptyBstring, 0, 0, ""); + ret += test17_0 (&shortBstring, 1, 3, "bs"); + ret += test17_0 (&shortBstring, -1, 3, "gus"); + ret += test17_0 (&shortBstring, 1, -3, "bogus"); + ret += test17_0 (&shortBstring, 3, 9, "bog"); + ret += test17_0 (&shortBstring, 3, 1, "bogs"); + ret += test17_0 (&longBstring, 4, 300, "This"); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test18_0 (bstring b, int len, int res, int mlen) { +int ret = 0; +int rv; +int ol = 0; + + printf (".\tballoc (%s, %d) = ", dumpBstring (b), len); + if (b) ol = b->mlen; + rv = balloc (b, len); + printf ("%d\n", rv); + + if (b != NULL && b->data != NULL && b->slen >=0 && ol > b->mlen) { + printf ("\t\tfailure(%d) oldmlen = %d, newmlen %d\n", __LINE__, ol, b->mlen); + ret++; + } + + if (rv != res) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + ret++; + } + if (b != NULL && (mlen > b->mlen || b->mlen == 0)) { + printf ("\t\tfailure(%d) b->mlen = %d mlen = %d\n", __LINE__, b->mlen, mlen); + ret++; + } + return ret; +} + +static int test18_1_int (bstring b, int len, int res, int mlen, int __line__) { +int ret = 0; +int rv; +int ol = 0; + + printf (".\tballocmin (%s, %d) = ", dumpBstring (b), len); + if (b) ol = b->mlen; + + rv = ballocmin (b, len); + printf ("[%d] %d\n", __LINE__, rv); + + if (b != NULL && b->data != NULL && b->mlen != mlen) { + printf ("\t\t[%d] failure(%d) oldmlen = %d, newmlen = %d, mlen = %d len = %d\n", __line__, __LINE__, ol, b->mlen, mlen, b->slen); + ret++; + } + + if (rv != res) { + printf ("\t\t[%d] failure(%d) res = %d\n", __line__, __LINE__, res); + ret++; + } + + return ret; +} + +#define test18_1(b, len, res, mlen) test18_1_int (b, len, res, mlen, __LINE__) + +static int test18 (void) { +int ret = 0, reto; +bstring b = bfromcstr ("test"); + + printf ("TEST: int balloc (bstring s, int len);\n"); + /* tests with NULL */ + ret += test18_0 (NULL, 2, BSTR_ERR, 0); + ret += test18_0 (&badBstring1, 2, BSTR_ERR, 0); + ret += test18_0 (&badBstring2, 2, BSTR_ERR, 0); + + /* normal operation tests */ + ret += test18_0 (b, 2, 0, b->mlen); + ret += test18_0 (b, -1, BSTR_ERR, b->mlen); + ret += test18_0 (b, 9, 0, 9); + ret += test18_0 (b, 2, 0, 9); + bwriteprotect (*b); + ret += test18_0 (b, 4, BSTR_ERR, b->mlen); + bwriteallow (*b); + ret += test18_0 (b, 2, 0, b->mlen); + ret += test18_0 (&emptyBstring, 9, BSTR_ERR, emptyBstring.mlen); + + bdestroy (b); + printf ("\t# failures: %d\n", ret); + + reto = ret; + ret = 0; + + b = bfromcstr ("test"); + + printf ("TEST: int ballocmin (bstring s, int len);\n"); + /* tests with NULL */ + ret += test18_1 (NULL, 2, BSTR_ERR, 0); + ret += test18_1 (&badBstring1, 2, BSTR_ERR, 0); + ret += test18_1 (&badBstring2, 2, BSTR_ERR, 2); + + /* normal operation tests */ + ret += test18_1 (b, 2, 0, b->slen + 1); + ret += test18_1 (b, -1, BSTR_ERR, b->mlen); + ret += test18_1 (b, 9, 0, 9); + ret += test18_1 (b, 2, 0, b->slen + 1); + ret += test18_1 (b, 9, 0, 9); + bwriteprotect (*b); + ret += test18_1 (b, 4, BSTR_ERR, -1); + bwriteallow (*b); + ret += test18_1 (b, 2, 0, b->slen + 1); + ret += test18_1 (&emptyBstring, 9, BSTR_ERR, emptyBstring.mlen); + + bdestroy (b); + printf ("\t# failures: %d\n", ret); + + return reto + ret; +} + +static int test19_0 (bstring b, int len, const char * res, int erv) { +int rv, ret = 0; +bstring b1; + + if (b != NULL && b->data != NULL && b->slen >= 0) { + b1 = bstrcpy (b); + bwriteprotect (*b1); + ret += bpattern (b1, len) != BSTR_ERR; + ret += !biseq (b1, b); + bwriteallow (*b1); + + printf (".\tbpattern (%s, %d) = ", dumpBstring (b1), len); + + rv = bpattern (b1, len); + + printf ("%s\n", dumpBstring (b1)); + + ret += (rv != erv); + ret += (res == NULL) || ((int) strlen (res) > b1->slen) + || (0 != memcmp (b1->data, res, b1->slen)); + ret += b1->data[b1->slen] != '\0'; + } else { + ret += BSTR_ERR != (rv = bpattern (b, len)); + printf (".\tbpattern (%s, %d) = %d\n", dumpBstring (b), len, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) rv = %d erv = %d (res = %p", __LINE__, rv, erv, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test19 (void) { +int ret = 0; + + printf ("TEST: int bpattern (bstring b, int len);\n"); + /* tests with NULL */ + ret += test19_0 (NULL, 0, NULL, BSTR_ERR); + ret += test19_0 (NULL, 5, NULL, BSTR_ERR); + ret += test19_0 (NULL, -5, NULL, BSTR_ERR); + ret += test19_0 (&badBstring1, 5, NULL, BSTR_ERR); + ret += test19_0 (&badBstring2, 5, NULL, BSTR_ERR); + + /* normal operation tests */ + ret += test19_0 (&emptyBstring, 0, "", BSTR_ERR); + ret += test19_0 (&emptyBstring, 10, "", BSTR_ERR); + ret += test19_0 (&emptyBstring, -1, "", BSTR_ERR); + ret += test19_0 (&shortBstring, 0, "", 0); + ret += test19_0 (&shortBstring, 12, "bogusbogusbo", 0); + ret += test19_0 (&shortBstring, -1, "bogus", BSTR_ERR); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test20 (void) { +int ret = 0; + +#if !defined (BSTRLIB_NOVSNP) +int rv; +bstring b, c; + + printf ("TEST: bstring bformat (const char * fmt, ...);\n"); + /* tests with NULL */ + printf (".\tbformat (NULL, 1, 2) = "); + b = bformat (NULL, 1, 2); + printf ("%s\n", dumpBstring (b)); + ret += b != NULL; + + /* normal operation tests */ + printf (".\tbformat (\"%%d %%s\", 1, \"xy\") = "); + b = bformat ("%d %s", 1, "xy"); + printf ("%s\n", dumpBstring (b)); + ret += !biseq (c = bfromcstr ("1 xy"), b); + bdestroy (b); + + printf (".\tbformat (\"%%d %%s(%%s)\", 6, %s, %s) = ", dumpBstring (c), dumpBstring (&shortBstring)); + b = bformat ("%d %s(%s)", 6, c->data, shortBstring.data); + printf ("%s\n", dumpBstring (b)); + bdestroy (c); + ret += !biseq (c = bfromcstr ("6 1 xy(bogus)"), b); + bdestroy (c); + bdestroy (b); + + printf (".\tbformat (\"%%s%%s%%s%%s%%s%%s%%s%%s\", ...) ...\n"); + b = bformat ("%s%s%s%s%s%s%s%s", longBstring.data, longBstring.data + , longBstring.data, longBstring.data + , longBstring.data, longBstring.data + , longBstring.data, longBstring.data); + c = bstrcpy (&longBstring); + bconcat (c, c); + bconcat (c, c); + bconcat (c, c); + ret += !biseq (c, b); + bdestroy (c); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + b = bfromcstr (""); + printf ("TEST: int bformata (bstring b, const char * fmt, ...);\n"); + /* tests with NULL */ + printf (".\tbformata (%s, NULL, 1, 2) = ", dumpBstring (b)); + rv = bformata (b, NULL, 1, 2); + printf ("%d\n", rv); + ret += rv != BSTR_ERR; + printf (".\tbformata (%s, \"%%d %%d\", 1, 2) = ", dumpBstring (&badBstring1)); + rv = bformata (&badBstring1, "%d %d", 1, 2); + printf ("%d\n", rv); + ret += rv != BSTR_ERR; + printf (".\tbformata (%s, \"%%d %%d\", 1, 2) = ", dumpBstring (b)); + rv = bformata (b, "%d %d", 1, 2); + printf ("%s\n", dumpBstring (b)); + ret += !biseq (c = bfromcstr ("1 2"), b); + bdestroy (c); + bdestroy (b); + + printf (".\tbformata (\"x\", \"%%s%%s%%s%%s%%s%%s%%s%%s\", ...) ...\n"); + rv = bformata (b = bfromcstr ("x"), "%s%s%s%s%s%s%s%s", + longBstring.data, longBstring.data, + longBstring.data, longBstring.data, + longBstring.data, longBstring.data, + longBstring.data, longBstring.data); + ret += rv == BSTR_ERR; + c = bstrcpy (&longBstring); + bconcat (c, c); + bconcat (c, c); + bconcat (c, c); + binsertch (c, 0, 1, (char) 'x'); + ret += !biseq (c, b); + bdestroy (c); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + b = bfromcstr ("Initial"); + printf ("TEST: int bassignformat (bstring b, const char * fmt, ...);\n"); + /* tests with NULL */ + printf (".\tbassignformat (%s, NULL, 1, 2) = ", dumpBstring (b)); + rv = bassignformat (b, NULL, 1, 2); + printf ("%d\n", rv); + ret += rv != BSTR_ERR; + printf (".\tbassignformat (%s, \"%%d %%d\", 1, 2) = ", dumpBstring (&badBstring1)); + rv = bassignformat (&badBstring1, "%d %d", 1, 2); + printf ("%d\n", rv); + ret += rv != BSTR_ERR; + printf (".\tbassignformat (%s, \"%%d %%d\", 1, 2) = ", dumpBstring (b)); + rv = bassignformat (b, "%d %d", 1, 2); + printf ("%s\n", dumpBstring (b)); + ret += !biseq (c = bfromcstr ("1 2"), b); + bdestroy (c); + bdestroy (b); + + printf (".\tbassignformat (\"x\", \"%%s%%s%%s%%s%%s%%s%%s%%s\", ...) ...\n"); + rv = bassignformat (b = bfromcstr ("x"), "%s%s%s%s%s%s%s%s", + longBstring.data, longBstring.data, + longBstring.data, longBstring.data, + longBstring.data, longBstring.data, + longBstring.data, longBstring.data); + ret += rv == BSTR_ERR; + c = bstrcpy (&longBstring); + bconcat (c, c); + bconcat (c, c); + bconcat (c, c); + ret += !biseq (c, b); + bdestroy (c); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); +#endif + + return ret; +} + +static int test21_0 (bstring b, char sc, int ns) { +struct bstrList * l; +int ret = 0; + + printf (".\tbsplit (%s, '%c') = ", dumpBstring (b), sc); + + if (b != NULL && b->data != NULL && b->slen >= 0) { + bstring c; + struct tagbstring t; + + blk2tbstr(t,&sc,1); + + printf ("{"); + + l = bsplit (b, sc); + + if (l) { + int i; + for (i=0; i < l->qty; i++) { + if (i != 0) printf (", "); + printf ("%s", dumpBstring (l->entry[i])); + } + printf (":<%d>", l->qty); + if (ns != l->qty) ret++; + } else { + printf ("NULL"); + ret ++; + } + + printf ("}\n"); + + c = bjoin (l, &t); + ret += !biseq (c, b); + ret += incorrectBstring (c); + bdestroy (c); + ret += 0 != bstrListDestroy (l); + } else { + l = bsplit (b, sc); + ret += (l != NULL); + printf ("%p\n", (void *) l); + } + + if (ret) { + printf ("\t\tfailure(%d) ns = %d\n", __LINE__, ns); + } + + return ret; +} + +static int test21_1 (bstring b, const_bstring sc, int ns) { +struct bstrList * l; +int ret = 0; + + printf (".\tbsplitstr (%s, %s) = ", dumpBstring (b), dumpBstring (sc)); + + if (b != NULL && b->data != NULL && b->slen >= 0) { + bstring c; + + printf ("{"); + + l = bsplitstr (b, sc); + + if (l) { + int i; + for (i=0; i < l->qty; i++) { + if (i != 0) printf (", "); + printf ("%s", dumpBstring (l->entry[i])); + } + printf (":<%d>", l->qty); + if (ns != l->qty) ret++; + } else { + printf ("NULL"); + ret ++; + } + + printf ("}\n"); + + c = bjoin (l, sc); + ret += !biseq (c, b); + ret += incorrectBstring (c); + bdestroy (c); + ret += 0 != bstrListDestroy (l); + } else { + l = bsplitstr (b, sc); + ret += (l != NULL); + printf ("%p\n", (void *) l); + } + + if (ret) { + printf ("\t\tfailure(%d) ns = %d\n", __LINE__, ns); + } + + return ret; +} + +static int test21 (void) { +struct tagbstring is = bsStatic ("is"); +struct tagbstring ng = bsStatic ("ng"); +struct tagbstring commas = bsStatic (",,,,"); +int ret = 0; + + printf ("TEST: struct bstrList * bsplit (const_bstring str, unsigned char splitChar);\n"); + /* tests with NULL */ + ret += test21_0 (NULL, (char) '?', 0); + ret += test21_0 (&badBstring1, (char) '?', 0); + ret += test21_0 (&badBstring2, (char) '?', 0); + + /* normal operation tests */ + ret += test21_0 (&emptyBstring, (char) '?', 1); + ret += test21_0 (&shortBstring, (char) 'o', 2); + ret += test21_0 (&shortBstring, (char) 's', 2); + ret += test21_0 (&shortBstring, (char) 'b', 2); + ret += test21_0 (&longBstring, (char) 'o', 9); + ret += test21_0 (&commas, (char) ',', 5); + + printf ("TEST: struct bstrList * bsplitstr (bstring str, const_bstring splitStr);\n"); + + ret += test21_1 (NULL, NULL, 0); + ret += test21_1 (&badBstring1, &emptyBstring, 0); + ret += test21_1 (&badBstring2, &emptyBstring, 0); + + /* normal operation tests */ + ret += test21_1 (&shortBstring, &emptyBstring, 5); + ret += test21_1 (&longBstring, &is, 3); + ret += test21_1 (&longBstring, &ng, 5); + + if (0 == ret) { + struct bstrList * l; + unsigned char c; + struct tagbstring t; + bstring b; + bstring list[3] = { &emptyBstring, &shortBstring, &longBstring }; + int i; + + blk2tbstr (t, &c, 1); + + for (i=0; i < 3; i++) { + c = (unsigned char) '\0'; + for (;;) { + b = bjoin (l = bsplit (list[i], c), &t); + if (!biseq (b, list[i])) { + printf ("\t\tfailure(%d) ", __LINE__); + printf ("join (bsplit (%s, x%02X), {x%02X}) = %s\n", dumpBstring (list[i]), c, c, dumpBstring (b)); + ret++; + } + bdestroy (b); + bstrListDestroy (l); + if (ret) break; + + b = bjoin (l = bsplitstr (list[i], &t), &t); + if (!biseq (b, list[i])) { + printf ("\t\tfailure(%d) ", __LINE__); + printf ("join (bsplitstr (%s, {x%02X}), {x%02X}) = %s\n", dumpBstring (list[i]), c, c, dumpBstring (b)); + ret++; + } + bdestroy (b); + bstrListDestroy (l); + if (ret) break; + + if (UCHAR_MAX == c) break; + c++; + } + if (ret) break; + } + + l = bsplit (&emptyBstring, 'x'); + bdestroy (l->entry[0]); + l->qty--; + b = bjoin (l, &longBstring); + ret += incorrectBstring (b); + bstrListDestroy (l); + if (b->slen) { + printf ("\t\tfailure(%d) ", __LINE__); + ret++; + } + bdestroy (b); + + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test22_0 (const_bstring b, const_bstring sep, int ns, ...) { +va_list arglist; +struct bstrList * l; +int ret = 0; + + printf (".\tbsplits (%s, %s)", dumpBstring (b), dumpBstring (sep)); + if ( b != NULL && b->data != NULL && b->slen >= 0 && + sep != NULL && sep->data != NULL && sep->slen >= 0) { + printf (" {"); + + l = bsplits (b, sep); + + if (l) { + int i; + va_start (arglist, ns); + + for (i=0; i < l->qty; i++) { + char * res; + + res = va_arg (arglist, char *); + + if (i != 0) printf (", "); + printf ("%s", dumpBstring (l->entry[i])); + + ret += (res == NULL) || ((int) strlen (res) > l->entry[i]->slen) + || (0 != memcmp (l->entry[i]->data, res, l->entry[i]->slen)); + ret += l->entry[i]->data[l->entry[i]->slen] != '\0'; + } + + va_end (arglist); + + printf (":<%d>", l->qty); + if (ns != l->qty) ret++; + } else { + printf ("NULL"); + ret += (ns != 0); + } + + printf ("}\n"); + + ret += (0 != bstrListDestroy (l) && l != NULL); + } else { + l = bsplits (b, sep); + ret += (l != NULL); + printf (" = %p\n", (void *) l); + } + + if (ret) { + printf ("\t\tfailure(%d) ns = %d\n", __LINE__, ns); + } + + return ret; +} + +static int test22 (void) { +int ret = 0; +struct tagbstring o=bsStatic("o"); +struct tagbstring s=bsStatic("s"); +struct tagbstring b=bsStatic("b"); +struct tagbstring bs=bsStatic("bs"); +struct tagbstring uo=bsStatic("uo"); + + printf ("TEST: extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);\n"); + /* tests with NULL */ + ret += test22_0 (NULL, &o, 0); + ret += test22_0 (&o, NULL, 0); + + /* normal operation tests */ + ret += test22_0 (&emptyBstring, &o, 1, ""); + ret += test22_0 (&emptyBstring, &uo, 1, ""); + ret += test22_0 (&shortBstring, &emptyBstring, 1, "bogus"); + ret += test22_0 (&shortBstring, &o, 2, "b", "gus"); + ret += test22_0 (&shortBstring, &s, 2, "bogu", ""); + ret += test22_0 (&shortBstring, &b, 2, "" , "ogus"); + ret += test22_0 (&shortBstring, &bs, 3, "" , "ogu", ""); + ret += test22_0 (&longBstring, &o, 9, "This is a b", "gus but reas", "nably l", "ng string. Just l", "ng en", "ugh t", " cause s", "me mall", "cing."); + ret += test22_0 (&shortBstring, &uo, 3, "b", "g", "s"); + + if (0 == ret) { + struct bstrList * l; + unsigned char c; + struct tagbstring t; + bstring bb; + bstring list[3] = { &emptyBstring, &shortBstring, &longBstring }; + int i; + + blk2tbstr (t, &c, 1); + + for (i=0; i < 3; i++) { + c = (unsigned char) '\0'; + for (;;) { + bb = bjoin (l = bsplits (list[i], &t), &t); + if (!biseq (bb, list[i])) { + printf ("\t\tfailure(%d) ", __LINE__); + printf ("join (bsplits (%s, {x%02X}), {x%02X}) = %s\n", dumpBstring (list[i]), c, c, dumpBstring (bb)); + ret++; + } + bdestroy (bb); + bstrListDestroy (l); + if (ret) break; + if (UCHAR_MAX == c) break; + c++; + } + if (ret) break; + } + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +struct sbstr { + int ofs; + bstring b; +}; + +static size_t test23_aux_read (void *buff, size_t elsize, size_t nelem, void *parm) { +struct sbstr * sb = (struct sbstr *)parm; +int els, len; + + if (parm == NULL || elsize == 0 || nelem == 0) return 0; + len = (int) (nelem * elsize); if (len <= 0) return 0; + if (len + sb->ofs > sb->b->slen) len = sb->b->slen - sb->ofs; + els = (int) (len / elsize); + len = (int) (els * elsize); + if (len > 0) { + memcpy (buff, sb->b->data + sb->ofs, len); + sb->ofs += len; + } + return els; +} + +static int test23_aux_open (struct sbstr * sb, bstring b) { + if (!sb || b == NULL || b->data == NULL) return -__LINE__; + sb->ofs = 0; + sb->b = b; + return 0; +} + +static int test23_aux_splitcb (void * parm, int ofs, const struct tagbstring * entry) { +bstring b = (bstring) parm; + + ofs = ofs; + if (b->slen > 0) bconchar (b, (char) '|'); + bconcat (b, entry); + return 0; +} + +struct tagBss { + int first; + unsigned char sc; + bstring b; +}; + +static int test23_aux_splitcbx (void * parm, int ofs, const struct tagbstring * entry) { +struct tagBss * p = (struct tagBss *) parm; + + ofs = ofs; + if (!p->first) { + bconchar (p->b, (char) p->sc); + } else p->first = 0; + + bconcat (p->b, entry); + return 0; +} + +static int test23 (void) { +struct tagbstring space = bsStatic (" "); +struct sbstr sb; +struct bStream * bs; +bstring b; +int l, ret = 0; + + printf ("TEST: bstream integrated test\n"); + test23_aux_open (&sb, &longBstring); + ret += NULL != (bs = bsopen ((bNread) NULL, &sb)); + ret += NULL == (bs = bsopen ((bNread) test23_aux_read, &sb)); + ret += (bseof (bs) != 0); + ret += BSTR_ERR != bsbufflength (NULL, -1); + ret += BSTR_ERR != bsbufflength (NULL, 1); + ret += BSTR_ERR != bsbufflength (bs, -1); + printf (".\tbsbufflength (bs, 0) -> %d\n", bsbufflength (bs, 0)); + ret += BSTR_ERR == bsbufflength (bs, 1); + ret += BSTR_ERR != bspeek (NULL, bs); + ret += BSTR_ERR != bsreadln (NULL, bs, (char) '?'); + ret += BSTR_ERR != bsreadln (&emptyBstring, bs, (char) '?'); + ret += BSTR_ERR != bspeek (&emptyBstring, bs); + + ret += BSTR_ERR == bspeek (b = bfromcstr (""), bs); + + printf (".\tbspeek () -> %s\n", dumpBstring (b)); + ret += BSTR_ERR != bsreadln (b, NULL, (char) '?'); + b->slen = 0; + ret += BSTR_ERR == bsreadln (b, bs, (char) '?'); + ret += (bseof (bs) <= 0); + ret += biseq (b, &longBstring) < 0; + printf (".\tbsreadln ('?') -> %s\n", dumpBstring (b)); + ret += BSTR_ERR == bsunread (bs, b); + ret += (bseof (bs) != 0); + printf (".\tbsunread (%s)\n", dumpBstring (b)); + b->slen = 0; + ret += BSTR_ERR == bspeek (b, bs); + ret += biseq (b, &longBstring) < 0; + printf (".\tbspeek () -> %s\n", dumpBstring (b)); + b->slen = 0; + ret += BSTR_ERR == bsreadln (b, bs, (char) '?'); + ret += (bseof (bs) <= 0); + ret += biseq (b, &longBstring) < 0; + printf (".\tbsreadln ('?') -> %s\n", dumpBstring (b)); + ret += NULL == bsclose (bs); + sb.ofs = 0; + + ret += NULL == (bs = bsopen ((bNread) test23_aux_read, &sb)); + b->slen = 0; + ret += BSTR_ERR == bsreadln (b, bs, (char) '.'); + l = b->slen; + ret += (0 != bstrncmp (b, &longBstring, l)) || (longBstring.data[l-1] != '.'); + printf (".\tbsreadln ('.') -> %s\n", dumpBstring (b)); + ret += BSTR_ERR == bsunread (bs, b); + + printf (".\tbsunread (%s)\n", dumpBstring (b)); + b->slen = 0; + ret += BSTR_ERR == bspeek (b, bs); + ret += biseq (b, &longBstring) < 0; + printf (".\tbspeek () -> %s\n", dumpBstring (b)); + b->slen = 0; + ret += BSTR_ERR == bsreadln (b, bs, (char) '.'); + + ret += b->slen != l || (0 != bstrncmp (b, &longBstring, l)) || (longBstring.data[l-1] != '.'); + printf (".\tbsreadln ('.') -> %s\n", dumpBstring (b)); + ret += NULL == bsclose (bs); + + test23_aux_open (&sb, &longBstring); + ret += NULL == (bs = bsopen ((bNread) test23_aux_read, &sb)); + ret += (bseof (bs) != 0); + b->slen = 0; + l = bssplitscb (bs, &space, test23_aux_splitcb, b); + ret += (bseof (bs) <= 0); + ret += NULL == bsclose (bs); + printf (".\tbssplitscb (' ') -> %s\n", dumpBstring (b)); + + for (l=1; l < 4; l++) { + char * str; + for (str = (char *) longBstring.data; *str; str++) { + test23_aux_open (&sb, &longBstring); + ret += NULL == (bs = bsopen ((bNread) test23_aux_read, &sb)); + ret += bseof (bs) != 0; + ret += 0 > bsbufflength (bs, l); + b->slen = 0; + while (0 == bsreadlna (b, bs, *str)) ; + ret += 0 == biseq (b, &longBstring); + ret += bseof (bs) <= 0; + ret += NULL == bsclose (bs); + if (ret) break; + } + if (ret) break; + } + + bdestroy (b); + + if (0 == ret) { + unsigned char c; + struct tagbstring t; + bstring list[3] = { &emptyBstring, &shortBstring, &longBstring }; + int i; + + blk2tbstr (t, &c, 1); + + for (i=0; i < 3; i++) { + c = (unsigned char) '\0'; + for (;;) { + struct tagBss bss; + + bss.sc = c; + bss.b = bfromcstr (""); + bss.first = 1; + + test23_aux_open (&sb, list[i]); + bs = bsopen ((bNread) test23_aux_read, &sb); + bssplitscb (bs, &t, test23_aux_splitcbx, &bss); + bsclose (bs); + + if (!biseq (bss.b, list[i])) { + printf ("\t\tfailure(%d) ", __LINE__); + printf ("join (bssplitscb (%s, {x%02X}), {x%02X}) = %s\n", dumpBstring (list[i]), c, c, dumpBstring (bss.b)); + ret++; + } + bdestroy (bss.b); + if (ret) break; + if (UCHAR_MAX == c) break; + c++; + } + if (ret) break; + + for (;;) { + struct tagBss bss; + + bss.sc = c; + bss.b = bfromcstr (""); + bss.first = 1; + + test23_aux_open (&sb, list[i]); + bs = bsopen ((bNread) test23_aux_read, &sb); + bssplitstrcb (bs, &t, test23_aux_splitcbx, &bss); + bsclose (bs); + + if (!biseq (bss.b, list[i])) { + printf ("\t\tfailure(%d) ", __LINE__); + printf ("join (bssplitstrcb (%s, {x%02X}), {x%02X}) = %s\n", dumpBstring (list[i]), c, c, dumpBstring (bss.b)); + ret++; + } + bdestroy (bss.b); + if (ret) break; + if (UCHAR_MAX == c) break; + c++; + } + if (ret) break; + } + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test24_0 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbninchr (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = bninchr (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test24 (void) { +bstring b; +int ret = 0; + + printf ("TEST: int bninchr (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test24_0 (NULL, 0, NULL, BSTR_ERR); + ret += test24_0 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test24_0 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test24_0 (&shortBstring, 3, &badBstring1, BSTR_ERR); + ret += test24_0 (&badBstring1, 3, &shortBstring, BSTR_ERR); + + ret += test24_0 (&emptyBstring, 0, &emptyBstring, BSTR_ERR); + ret += test24_0 (&shortBstring, 0, &emptyBstring, BSTR_ERR); + ret += test24_0 (&shortBstring, 0, &shortBstring, BSTR_ERR); + ret += test24_0 (&shortBstring, 1, &shortBstring, BSTR_ERR); + ret += test24_0 (&longBstring, 3, &shortBstring, 4); + ret += test24_0 (&longBstring, 3, b = bstrcpy (&shortBstring), 4); + bdestroy (b); + ret += test24_0 (&longBstring, -1, &shortBstring, BSTR_ERR); + ret += test24_0 (&longBstring, 1000, &shortBstring, BSTR_ERR); + ret += test24_0 (&xxxxxBstring, 0, &shortBstring, 0); + ret += test24_0 (&xxxxxBstring, 1, &shortBstring, 1); + ret += test24_0 (&emptyBstring, 0, &shortBstring, BSTR_ERR); + + ret += test24_0 (&longBstring, 0, &shortBstring, 0); + ret += test24_0 (&longBstring, 10, &shortBstring, 15); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test25_0 (bstring s1, int pos, const_bstring s2, int res) { +int rv, ret = 0; + + printf (".\tbninchrr (%s, %d, %s) = ", dumpBstring (s1), pos, dumpBstring (s2)); + rv = bninchrr (s1, pos, s2); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test25 (void) { +bstring b; +int ret = 0; + + printf ("TEST: int bninchrr (const_bstring s1, int pos, const_bstring s2);\n"); + ret += test25_0 (NULL, 0, NULL, BSTR_ERR); + ret += test25_0 (&emptyBstring, 0, NULL, BSTR_ERR); + ret += test25_0 (NULL, 0, &emptyBstring, BSTR_ERR); + ret += test25_0 (&emptyBstring, 0, &emptyBstring, BSTR_ERR); + ret += test25_0 (&shortBstring, 0, &emptyBstring, BSTR_ERR); + ret += test25_0 (&shortBstring, 0, &badBstring1, BSTR_ERR); + ret += test25_0 (&badBstring1, 0, &shortBstring, BSTR_ERR); + + ret += test25_0 (&shortBstring, 0, &shortBstring, BSTR_ERR); + ret += test25_0 (&shortBstring, 4, &shortBstring, BSTR_ERR); + ret += test25_0 (&longBstring, 10, &shortBstring, 9); + ret += test25_0 (&longBstring, 10, b = bstrcpy (&shortBstring), 9); + bdestroy (b); + ret += test25_0 (&xxxxxBstring, 4, &shortBstring, 4); + ret += test25_0 (&emptyBstring, 0, &shortBstring, BSTR_ERR); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test26_0 (bstring b0, int pos, int len, const_bstring b1, unsigned char fill, char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + b1 != NULL && b1->data != NULL && b1->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbreplace (%s, ", dumpBstring (b2)); + + rv = breplace (b2, pos, len, b1, fill); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%d, %d, %s, %02X) = %s\n", pos, len, dumpBstring (b1), fill, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbreplace (%s, ", dumpBstring (b2)); + + rv = breplace (b2, pos, len, b1, fill); + if (b1) { + ret += (pos < 0) && (b2->slen != b0->slen); + ret += ((rv == 0) != (pos >= 0 && pos <= b2->slen)); + } + + ret += (res == NULL) || ((int) strlen (res) > b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + + printf ("%d, %d, %s, %02X) = %s\n", pos, len, dumpBstring (b1), fill, dumpBstring (b2)); + + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = breplace (b0, pos, len, b1, fill))); + printf (".\tbreplace (%s, %d, %d, %s, %02X) = %d\n", dumpBstring (b0), pos, len, dumpBstring (b1), fill, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test26 (void) { +int ret = 0; + printf ("TEST: int breplace (bstring b0, int pos, int len, const_bstring b1, unsigned char fill);\n"); + /* tests with NULL */ + ret += test26_0 (NULL, 0, 0, NULL, (unsigned char) '?', NULL); + ret += test26_0 (NULL, 0, 0, &emptyBstring, (unsigned char) '?', NULL); + ret += test26_0 (&badBstring1, 1, 3, &shortBstring, (unsigned char) '?', NULL); + ret += test26_0 (&shortBstring, 1, 3, &badBstring1, (unsigned char) '?', NULL); + + /* normal operation tests */ + ret += test26_0 (&emptyBstring, 0, 0, &emptyBstring, (unsigned char) '?', ""); + ret += test26_0 (&emptyBstring, 5, 0, &emptyBstring, (unsigned char) '?', "?????"); + ret += test26_0 (&emptyBstring, 5, 0, &shortBstring, (unsigned char) '?', "?????bogus"); + ret += test26_0 (&shortBstring, 0, 0, &emptyBstring, (unsigned char) '?', "bogus"); + ret += test26_0 (&emptyBstring, 0, 0, &shortBstring, (unsigned char) '?', "bogus"); + ret += test26_0 (&shortBstring, 0, 0, &shortBstring, (unsigned char) '?', "bogusbogus"); + ret += test26_0 (&shortBstring, 1, 3, &shortBstring, (unsigned char) '?', "bboguss"); + ret += test26_0 (&shortBstring, 3, 8, &shortBstring, (unsigned char) '?', "bogbogus"); + ret += test26_0 (&shortBstring, -1, 0, &shortBstring, (unsigned char) '?', "bogus"); + ret += test26_0 (&shortBstring, 2, 0, &shortBstring, (unsigned char) '?', "bobogusgus"); + ret += test26_0 (&shortBstring, 6, 0, &shortBstring, (unsigned char) '?', "bogus?bogus"); + ret += test26_0 (&shortBstring, 6, 0, NULL, (unsigned char) '?', "bogus"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test27_0 (bstring b0, const_bstring b1, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + b1 != NULL && b1->data != NULL && b1->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbassign (%s, ", dumpBstring (b2)); + + rv = bassign (b2, b1); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%s) = %s\n", dumpBstring (b1), dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbassign (%s, ", dumpBstring (b2)); + + rv = bassign (b2, b1); + + printf ("%s) = %s\n", dumpBstring (b1), dumpBstring (b2)); + + if (b1) ret += (b2->slen != b1->slen); + ret += ((0 != rv) && (b1 != NULL)) || ((0 == rv) && (b1 == NULL)); + ret += (res == NULL) || ((int) strlen (res) != b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bassign (b0, b1))); + printf (".\tbassign (%s, %s) = %d\n", dumpBstring (b0), dumpBstring (b1), rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test27 (void) { +int ret = 0; + + printf ("TEST: int bassign (bstring b0, const_bstring b1);\n"); + + /* tests with NULL */ + ret += test27_0 (NULL, NULL, NULL); + ret += test27_0 (NULL, &emptyBstring, NULL); + ret += test27_0 (&emptyBstring, NULL, ""); + ret += test27_0 (&badBstring1, &emptyBstring, NULL); + ret += test27_0 (&badBstring2, &emptyBstring, NULL); + ret += test27_0 (&emptyBstring, &badBstring1, NULL); + ret += test27_0 (&emptyBstring, &badBstring2, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test27_0 (&emptyBstring, &emptyBstring, ""); + ret += test27_0 (&emptyBstring, &shortBstring, "bogus"); + ret += test27_0 (&shortBstring, &emptyBstring, ""); + ret += test27_0 (&shortBstring, &shortBstring, "bogus"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test28_0 (bstring s1, int c, int res) { +int rv, ret = 0; + + printf (".\tbstrchr (%s, %d) = ", dumpBstring (s1), c); + rv = bstrchr (s1, c); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test28_1 (bstring s1, int c, int res) { +int rv, ret = 0; + + printf (".\tbstrrchr (%s, %d) = ", dumpBstring (s1), c); + rv = bstrrchr (s1, c); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d rv = %d\n", __LINE__, res, rv); + } + return ret; +} + +static int test28_2 (bstring s1, int c, int pos, int res) { +int rv, ret = 0; + + printf (".\tbstrchrp (%s, %d, %d) = ", dumpBstring (s1), c, pos); + rv = bstrchrp (s1, c, pos); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d\n", __LINE__, res); + } + return ret; +} + +static int test28_3 (bstring s1, int c, int pos, int res) { +int rv, ret = 0; + + printf (".\tbstrrchrp (%s, %d, %d) = ", dumpBstring (s1), c, pos); + rv = bstrrchrp (s1, c, pos); + printf ("%d\n", rv); + ret += (rv != res); + if (ret) { + printf ("\t\tfailure(%d) res = %d rv = %d\n", __LINE__, res, rv); + } + return ret; +} + +static int test28 (void) { +int ret = 0; + + printf ("TEST: int bstrchr (const_bstring s1, int c);\n"); + ret += test28_0 (NULL, 0, BSTR_ERR); + ret += test28_0 (&badBstring1, 'b', BSTR_ERR); + ret += test28_0 (&badBstring2, 's', BSTR_ERR); + + ret += test28_0 (&emptyBstring, 0, BSTR_ERR); + ret += test28_0 (&shortBstring, 0, BSTR_ERR); + ret += test28_0 (&shortBstring, 'b', 0); + ret += test28_0 (&shortBstring, 's', 4); + ret += test28_0 (&shortBstring, 'q', BSTR_ERR); + ret += test28_0 (&xxxxxBstring, 0, BSTR_ERR); + ret += test28_0 (&xxxxxBstring, 'b', BSTR_ERR); + ret += test28_0 (&longBstring, 'i', 2); + + printf ("TEST: int bstrrchr (const_bstring s1, int c);\n"); + ret += test28_1 (NULL, 0, BSTR_ERR); + ret += test28_1 (&badBstring1, 'b', BSTR_ERR); + ret += test28_1 (&badBstring2, 's', BSTR_ERR); + + ret += test28_1 (&emptyBstring, 0, BSTR_ERR); + ret += test28_1 (&shortBstring, 0, BSTR_ERR); + ret += test28_1 (&shortBstring, 'b', 0); + ret += test28_1 (&shortBstring, 's', 4); + ret += test28_1 (&shortBstring, 'q', BSTR_ERR); + ret += test28_1 (&xxxxxBstring, 0, BSTR_ERR); + ret += test28_1 (&xxxxxBstring, 'b', BSTR_ERR); + ret += test28_1 (&longBstring, 'i', 82); + + printf ("TEST: int bstrchrp (const_bstring s1, int c, int pos);\n"); + ret += test28_2 (NULL, 0, 0, BSTR_ERR); + ret += test28_2 (&badBstring1, 'b', 0, BSTR_ERR); + ret += test28_2 (&badBstring2, 's', 0, BSTR_ERR); + ret += test28_2 (&shortBstring, 'b', -1, BSTR_ERR); + ret += test28_2 (&shortBstring, 'b', shortBstring.slen, BSTR_ERR); + + ret += test28_2 (&emptyBstring, 0, 0, BSTR_ERR); + ret += test28_2 (&shortBstring, 0, 0, BSTR_ERR); + ret += test28_2 (&shortBstring, 'b', 0, 0); + ret += test28_2 (&shortBstring, 'b', 1, BSTR_ERR); + ret += test28_2 (&shortBstring, 's', 0, 4); + ret += test28_2 (&shortBstring, 'q', 0, BSTR_ERR); + + printf ("TEST: int bstrrchrp (const_bstring s1, int c, int pos);\n"); + ret += test28_3 (NULL, 0, 0, BSTR_ERR); + ret += test28_3 (&badBstring1, 'b', 0, BSTR_ERR); + ret += test28_3 (&badBstring2, 's', 0, BSTR_ERR); + ret += test28_3 (&shortBstring, 'b', -1, BSTR_ERR); + ret += test28_3 (&shortBstring, 'b', shortBstring.slen, BSTR_ERR); + + ret += test28_3 (&emptyBstring, 0, 0, BSTR_ERR); + ret += test28_3 (&shortBstring, 0, 0, BSTR_ERR); + ret += test28_3 (&shortBstring, 'b', 0, 0); + ret += test28_3 (&shortBstring, 'b', shortBstring.slen - 1, 0); + ret += test28_3 (&shortBstring, 's', shortBstring.slen - 1, 4); + ret += test28_3 (&shortBstring, 's', 0, BSTR_ERR); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test29_0 (bstring b0, char * s, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbcatcstr (%s, ", dumpBstring (b2)); + + rv = bcatcstr (b2, s); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%p) = %s\n", s, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbcatcstr (%s, ", dumpBstring (b2)); + + rv = bcatcstr (b2, s); + + printf ("%p) = %s\n", s, dumpBstring (b2)); + + if (s) ret += (b2->slen != b0->slen + (int) strlen (s)); + ret += ((0 != rv) && (s != NULL)) || ((0 == rv) && (s == NULL)); + ret += (res == NULL) || ((int) strlen (res) != b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bcatcstr (b0, s))); + printf (".\tbcatcstr (%s, %p) = %d\n", dumpBstring (b0), s, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test29 (void) { +int ret = 0; + + printf ("TEST: int bcatcstr (bstring b0, const char * s);\n"); + + /* tests with NULL */ + ret += test29_0 (NULL, NULL, NULL); + ret += test29_0 (NULL, "", NULL); + ret += test29_0 (&emptyBstring, NULL, ""); + ret += test29_0 (&badBstring1, "bogus", NULL); + ret += test29_0 (&badBstring2, "bogus", NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test29_0 (&emptyBstring, "", ""); + ret += test29_0 (&emptyBstring, "bogus", "bogus"); + ret += test29_0 (&shortBstring, "", "bogus"); + ret += test29_0 (&shortBstring, "bogus", "bogusbogus"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test30_0 (bstring b0, const unsigned char * s, int len, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbcatblk (%s, ", dumpBstring (b2)); + + rv = bcatblk (b2, s, len); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%p) = %s\n", s, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbcatblk (%s, ", dumpBstring (b2)); + + rv = bcatblk (b2, s, len); + + printf ("%p) = %s\n", s, dumpBstring (b2)); + + if (s) { + if (len >= 0) ret += (b2->slen != b0->slen + len); + else ret += (b2->slen != b0->slen); + } + ret += ((0 != rv) && (s != NULL && len >= 0)) || ((0 == rv) && (s == NULL || len < 0)); + ret += (res == NULL) || ((int) strlen (res) != b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bcatblk (b0, s, len))); + printf (".\tbcatblk (%s, %p, %d) = %d\n", dumpBstring (b0), s, len, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test30 (void) { +int ret = 0; + + printf ("TEST: int bcatblk (bstring b0, const char * s);\n"); + + /* tests with NULL */ + ret += test30_0 (NULL, NULL, 0, NULL); + ret += test30_0 (NULL, (unsigned char *) "", 0, NULL); + ret += test30_0 (&emptyBstring, NULL, 0, ""); + ret += test30_0 (&emptyBstring, NULL, -1, ""); + ret += test30_0 (&badBstring1, NULL, 0, NULL); + ret += test30_0 (&badBstring2, NULL, 0, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test30_0 (&emptyBstring, (unsigned char *) "", -1, ""); + ret += test30_0 (&emptyBstring, (unsigned char *) "", 0, ""); + ret += test30_0 (&emptyBstring, (unsigned char *) "bogus", 5, "bogus"); + ret += test30_0 (&shortBstring, (unsigned char *) "", 0, "bogus"); + ret += test30_0 (&shortBstring, (unsigned char *) "bogus", 5, "bogusbogus"); + ret += test30_0 (&shortBstring, (unsigned char *) "bogus", -1, "bogus"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test31_0 (bstring b0, const_bstring find, const_bstring replace, int pos, char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + find != NULL && find->data != NULL && find->slen >= 0 && + replace != NULL && replace->data != NULL && replace->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbfindreplace (%s, %s, %s, %d) = ", dumpBstring (b2), dumpBstring (find), dumpBstring (replace), pos); + + rv = bfindreplace (b2, find, replace, pos); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%d\n", rv); + + bwriteallow (*b2); + + printf (".\tbfindreplace (%s, %s, %s, %d)", dumpBstring (b2), dumpBstring (find), dumpBstring (replace), pos); + + rv = bfindreplace (b2, find, replace, pos); + + ret += (res == NULL) || ((int) strlen (res) > b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + + printf (" -> %s\n", dumpBstring (b2)); + + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bfindreplace (b0, find, replace, pos))); + printf (".\tbfindreplace (%s, %s, %s, %d) = %d\n", dumpBstring (b0), dumpBstring (find), dumpBstring (replace), pos, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test31_1 (bstring b0, const_bstring find, const_bstring replace, int pos, char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + find != NULL && find->data != NULL && find->slen >= 0 && + replace != NULL && replace->data != NULL && replace->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbfindreplacecaseless (%s, %s, %s, %d) = ", dumpBstring (b2), dumpBstring (find), dumpBstring (replace), pos); + + rv = bfindreplacecaseless (b2, find, replace, pos); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%d\n", rv); + + bwriteallow (*b2); + + printf (".\tbfindreplacecaseless (%s, %s, %s, %d)", dumpBstring (b2), dumpBstring (find), dumpBstring (replace), pos); + + rv = bfindreplacecaseless (b2, find, replace, pos); + + ret += (res == NULL) || ((int) strlen (res) > b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + + printf (" -> %s\n", dumpBstring (b2)); + + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bfindreplacecaseless (b0, find, replace, pos))); + printf (".\tbfindreplacecaseless (%s, %s, %s, %d) = %d\n", dumpBstring (b0), dumpBstring (find), dumpBstring (replace), pos, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +#define LOTS_OF_S "ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss" + +static int test31 (void) { +int ret = 0; +struct tagbstring t0 = bsStatic ("funny"); +struct tagbstring t1 = bsStatic ("weird"); +struct tagbstring t2 = bsStatic ("s"); +struct tagbstring t3 = bsStatic ("long"); +struct tagbstring t4 = bsStatic ("big"); +struct tagbstring t5 = bsStatic ("ss"); +struct tagbstring t6 = bsStatic ("sstsst"); +struct tagbstring t7 = bsStatic ("xx" LOTS_OF_S "xx"); +struct tagbstring t8 = bsStatic ("S"); +struct tagbstring t9 = bsStatic ("LONG"); + + printf ("TEST: int bfindreplace (bstring b, const_bstring f, const_bstring r, int pos);\n"); + /* tests with NULL */ + ret += test31_0 (NULL, NULL, NULL, 0, NULL); + ret += test31_0 (&shortBstring, NULL, &t1, 0, (char *) shortBstring.data); + ret += test31_0 (&shortBstring, &t2, NULL, 0, (char *) shortBstring.data); + ret += test31_0 (&badBstring1, &t2, &t1, 0, NULL); + ret += test31_0 (&badBstring2, &t2, &t1, 0, NULL); + + /* normal operation tests */ + ret += test31_0 (&longBstring, &shortBstring, &t0, 0, "This is a funny but reasonably long string. Just long enough to cause some mallocing."); + ret += test31_0 (&longBstring, &t2, &t1, 0, "Thiweird iweird a boguweird but reaweirdonably long weirdtring. Juweirdt long enough to cauweirde weirdome mallocing."); + ret += test31_0 (&shortBstring, &t2, &t1, 0, "boguweird"); + ret += test31_0 (&shortBstring, &t8, &t1, 0, "bogus"); + ret += test31_0 (&longBstring, &t2, &t1, 27, "This is a bogus but reasonably long weirdtring. Juweirdt long enough to cauweirde weirdome mallocing."); + ret += test31_0 (&longBstring, &t3, &t4, 0, "This is a bogus but reasonably big string. Just big enough to cause some mallocing."); + ret += test31_0 (&longBstring, &t9, &t4, 0, "This is a bogus but reasonably long string. Just long enough to cause some mallocing."); + ret += test31_0 (&t6, &t2, &t5, 0, "sssstsssst"); + ret += test31_0 (&t7, &t2, &t5, 0, "xx" LOTS_OF_S LOTS_OF_S "xx"); + + printf ("TEST: int bfindreplacecaseless (bstring b, const_bstring f, const_bstring r, int pos);\n"); + /* tests with NULL */ + ret += test31_1 (NULL, NULL, NULL, 0, NULL); + ret += test31_1 (&shortBstring, NULL, &t1, 0, (char *) shortBstring.data); + ret += test31_1 (&shortBstring, &t2, NULL, 0, (char *) shortBstring.data); + ret += test31_1 (&badBstring1, &t2, &t1, 0, NULL); + ret += test31_1 (&badBstring2, &t2, &t1, 0, NULL); + + /* normal operation tests */ + ret += test31_1 (&longBstring, &shortBstring, &t0, 0, "This is a funny but reasonably long string. Just long enough to cause some mallocing."); + ret += test31_1 (&longBstring, &t2, &t1, 0, "Thiweird iweird a boguweird but reaweirdonably long weirdtring. Juweirdt long enough to cauweirde weirdome mallocing."); + ret += test31_1 (&shortBstring, &t2, &t1, 0, "boguweird"); + ret += test31_1 (&shortBstring, &t8, &t1, 0, "boguweird"); + ret += test31_1 (&longBstring, &t2, &t1, 27, "This is a bogus but reasonably long weirdtring. Juweirdt long enough to cauweirde weirdome mallocing."); + ret += test31_1 (&longBstring, &t3, &t4, 0, "This is a bogus but reasonably big string. Just big enough to cause some mallocing."); + ret += test31_1 (&longBstring, &t9, &t4, 0, "This is a bogus but reasonably big string. Just big enough to cause some mallocing."); + ret += test31_1 (&t6, &t2, &t5, 0, "sssstsssst"); + ret += test31_1 (&t6, &t8, &t5, 0, "sssstsssst"); + ret += test31_1 (&t7, &t2, &t5, 0, "xx" LOTS_OF_S LOTS_OF_S "xx"); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test32_0 (const_bstring b, const char * s, int res) { +int rv, ret = 0; + + ret += (res != (rv = biseqcstr (b, s))); + printf (".\tbiseqcstr (%s, %p:<%s>) = %d\n", dumpBstring (b), s, (s ? s : NULL), rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test32_1 (const_bstring b, const char * s, int res) { +int rv, ret = 0; + + ret += (res != (rv = biseqcstrcaseless (b, s))); + printf (".\tbiseqcstrcaseless (%s, %p:<%s>) = %d\n", dumpBstring (b), s, (s ? s : NULL), rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + + +static int test32 (void) { +int ret = 0; + + printf ("TEST: int biseqcstr (const_bstring b, const char * s);\n"); + + /* tests with NULL */ + ret += test32_0 (NULL, NULL, BSTR_ERR); + ret += test32_0 (&emptyBstring, NULL, BSTR_ERR); + ret += test32_0 (NULL, "", BSTR_ERR); + ret += test32_0 (&badBstring1, "", BSTR_ERR); + ret += test32_0 (&badBstring2, "bogus", BSTR_ERR); + + /* normal operation tests on all sorts of subranges */ + ret += test32_0 (&emptyBstring, "", 1); + ret += test32_0 (&shortBstring, "bogus", 1); + ret += test32_0 (&emptyBstring, "bogus", 0); + ret += test32_0 (&shortBstring, "", 0); + + { + bstring b = bstrcpy (&shortBstring); + b->data[1]++; + ret += test32_0 (b, (char *) shortBstring.data, 0); + bdestroy (b); + } + + printf ("TEST: int biseqcstrcaseless (const_bstring b, const char * s);\n"); + + /* tests with NULL */ + ret += test32_1 (NULL, NULL, BSTR_ERR); + ret += test32_1 (&emptyBstring, NULL, BSTR_ERR); + ret += test32_1 (NULL, "", BSTR_ERR); + ret += test32_1 (&badBstring1, "", BSTR_ERR); + ret += test32_1 (&badBstring2, "bogus", BSTR_ERR); + + /* normal operation tests on all sorts of subranges */ + ret += test32_1 (&emptyBstring, "", 1); + ret += test32_1 (&shortBstring, "bogus", 1); + ret += test32_1 (&shortBstring, "BOGUS", 1); + ret += test32_1 (&emptyBstring, "bogus", 0); + ret += test32_1 (&shortBstring, "", 0); + + { + bstring b = bstrcpy (&shortBstring); + b->data[1]++; + ret += test32_1 (b, (char *) shortBstring.data, 0); + bdestroy (b); + } + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test33_0 (bstring b0, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbtoupper (%s)", dumpBstring (b2)); + + rv = btoupper (b2); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf (" = %s\n", dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbtoupper (%s)", dumpBstring (b2)); + + rv = btoupper (b2); + + printf (" = %s\n", dumpBstring (b2)); + + ret += (b2->slen != b0->slen); + ret += (0 != rv); + ret += (res == NULL) || ((int) strlen (res) != b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = btoupper (b0))); + printf (".\tbtoupper (%s) = %d\n", dumpBstring (b0), rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test33 (void) { +int ret = 0; + + printf ("TEST: int btoupper (bstring b);\n"); + + /* tests with NULL */ + ret += test33_0 (NULL, NULL); + ret += test33_0 (&badBstring1, NULL); + ret += test33_0 (&badBstring2, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test33_0 (&emptyBstring, ""); + ret += test33_0 (&shortBstring, "BOGUS"); + ret += test33_0 (&longBstring, "THIS IS A BOGUS BUT REASONABLY LONG STRING. JUST LONG ENOUGH TO CAUSE SOME MALLOCING."); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test34_0 (bstring b0, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbtolower (%s)", dumpBstring (b2)); + + rv = btolower (b2); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf (" = %s\n", dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbtolower (%s)", dumpBstring (b2)); + + rv = btolower (b2); + + printf (" = %s\n", dumpBstring (b2)); + + ret += (b2->slen != b0->slen); + ret += (0 != rv); + ret += (res == NULL) || ((int) strlen (res) != b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = btolower (b0))); + printf (".\tbtolower (%s) = %d\n", dumpBstring (b0), rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test34 (void) { +int ret = 0; + + printf ("TEST: int btolower (bstring b);\n"); + + /* tests with NULL */ + ret += test34_0 (NULL, NULL); + ret += test34_0 (&badBstring1, NULL); + ret += test34_0 (&badBstring2, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test34_0 (&emptyBstring, ""); + ret += test34_0 (&shortBstring, "bogus"); + ret += test34_0 (&longBstring, "this is a bogus but reasonably long string. just long enough to cause some mallocing."); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test35_0 (const_bstring b0, const_bstring b1, int res) { +int rv, ret = 0; + + ret += (res != (rv = bstricmp (b0, b1))); + printf (".\tbstricmp (%s, %s) = %d\n", dumpBstring (b0), dumpBstring (b1), rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test35 (void) { +int ret = 0; +struct tagbstring t0 = bsStatic ("bOgUs"); +struct tagbstring t1 = bsStatic ("bOgUR"); +struct tagbstring t2 = bsStatic ("bOgUt"); + + printf ("TEST: int bstricmp (const_bstring b0, const_bstring b1);\n"); + + /* tests with NULL */ + ret += test35_0 (NULL, NULL, SHRT_MIN); + ret += test35_0 (&emptyBstring, NULL, SHRT_MIN); + ret += test35_0 (NULL, &emptyBstring, SHRT_MIN); + ret += test35_0 (&emptyBstring, &badBstring1, SHRT_MIN); + ret += test35_0 (&badBstring1, &emptyBstring, SHRT_MIN); + ret += test35_0 (&shortBstring, &badBstring2, SHRT_MIN); + ret += test35_0 (&badBstring2, &shortBstring, SHRT_MIN); + + /* normal operation tests on all sorts of subranges */ + ret += test35_0 (&emptyBstring, &emptyBstring, 0); + ret += test35_0 (&shortBstring, &t0, 0); + ret += test35_0 (&shortBstring, &t1, tolower (shortBstring.data[4]) - tolower (t1.data[4])); + ret += test35_0 (&shortBstring, &t2, tolower (shortBstring.data[4]) - tolower (t2.data[4])); + + t0.slen++; + ret += test35_0 (&shortBstring, &t0, -(UCHAR_MAX+1)); + ret += test35_0 (&t0, &shortBstring, (UCHAR_MAX+1)); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test36_0 (const_bstring b0, const_bstring b1, int n, int res) { +int rv, ret = 0; + + ret += (res != (rv = bstrnicmp (b0, b1, n))); + printf (".\tbstrnicmp (%s, %s, %d) = %d\n", dumpBstring (b0), dumpBstring (b1), n, rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test36 (void) { +int ret = 0; +struct tagbstring t0 = bsStatic ("bOgUs"); +struct tagbstring t1 = bsStatic ("bOgUR"); +struct tagbstring t2 = bsStatic ("bOgUt"); + + printf ("TEST: int bstrnicmp (const_bstring b0, const_bstring b1);\n"); + + /* tests with NULL */ + ret += test36_0 (NULL, NULL, 0, SHRT_MIN); + ret += test36_0 (&emptyBstring, NULL, 0, SHRT_MIN); + ret += test36_0 (NULL, &emptyBstring, 0, SHRT_MIN); + ret += test36_0 (&emptyBstring, &badBstring1, 0, SHRT_MIN); + ret += test36_0 (&badBstring1, &emptyBstring, 0, SHRT_MIN); + ret += test36_0 (&shortBstring, &badBstring2, 5, SHRT_MIN); + ret += test36_0 (&badBstring2, &shortBstring, 5, SHRT_MIN); + + /* normal operation tests on all sorts of subranges */ + ret += test36_0 (&emptyBstring, &emptyBstring, 0, 0); + ret += test36_0 (&shortBstring, &t0, 0, 0); + ret += test36_0 (&shortBstring, &t0, 5, 0); + ret += test36_0 (&shortBstring, &t0, 4, 0); + ret += test36_0 (&shortBstring, &t0, 6, 0); + ret += test36_0 (&shortBstring, &t1, 5, shortBstring.data[4] - t1.data[4]); + ret += test36_0 (&shortBstring, &t1, 4, 0); + ret += test36_0 (&shortBstring, &t1, 6, shortBstring.data[4] - t1.data[4]); + ret += test36_0 (&shortBstring, &t2, 5, shortBstring.data[4] - t2.data[4]); + ret += test36_0 (&shortBstring, &t2, 4, 0); + ret += test36_0 (&shortBstring, &t2, 6, shortBstring.data[4] - t2.data[4]); + + t0.slen++; + ret += test36_0 (&shortBstring, &t0, 5, 0); + ret += test36_0 (&shortBstring, &t0, 6, -(UCHAR_MAX+1)); + ret += test36_0 (&t0, &shortBstring, 6, (UCHAR_MAX+1)); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test37_0 (const_bstring b0, const_bstring b1, int res) { +int rv, ret = 0; + + ret += (res != (rv = biseqcaseless (b0, b1))); + printf (".\tbiseqcaseless (%s, %s) = %d\n", dumpBstring (b0), dumpBstring (b1), rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test37 (void) { +int ret = 0; +struct tagbstring t0 = bsStatic ("bOgUs"); +struct tagbstring t1 = bsStatic ("bOgUR"); +struct tagbstring t2 = bsStatic ("bOgUt"); + + printf ("TEST: int biseqcaseless (const_bstring b0, const_bstring b1);\n"); + + /* tests with NULL */ + ret += test37_0 (NULL, NULL, BSTR_ERR); + ret += test37_0 (&emptyBstring, NULL, BSTR_ERR); + ret += test37_0 (NULL, &emptyBstring, BSTR_ERR); + ret += test37_0 (&emptyBstring, &badBstring1, BSTR_ERR); + ret += test37_0 (&badBstring1, &emptyBstring, BSTR_ERR); + ret += test37_0 (&shortBstring, &badBstring2, BSTR_ERR); + ret += test37_0 (&badBstring2, &shortBstring, BSTR_ERR); + + /* normal operation tests on all sorts of subranges */ + ret += test37_0 (&emptyBstring, &emptyBstring, 1); + ret += test37_0 (&shortBstring, &t0, 1); + ret += test37_0 (&shortBstring, &t1, 0); + ret += test37_0 (&shortBstring, &t2, 0); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test48_0 (const_bstring b, const unsigned char * blk, int len, int res) { +int rv, ret = 0; + + ret += (res != (rv = biseqcaselessblk (b, blk, len))); + printf (".\tbiseqcaselessblk (%s, %s, %d) = %d\n", dumpBstring (b), dumpCstring (blk), len, rv); + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %d)\n", __LINE__, ret, res); + } + return ret; +} + +static int test48 (void) { +int ret = 0; +struct tagbstring t0 = bsStatic ("bOgUs"); +struct tagbstring t1 = bsStatic ("bOgUR"); +struct tagbstring t2 = bsStatic ("bOgUt"); + + printf ("TEST: int biseqcaselessblk (const_bstring b, const void * blk, int len);\n"); + + /* tests with NULL */ + ret += test48_0 (NULL, NULL, 0, BSTR_ERR); + ret += test48_0 (&emptyBstring, NULL, 0, BSTR_ERR); + ret += test48_0 (NULL, emptyBstring.data, 0, BSTR_ERR); + ret += test48_0 (&emptyBstring, badBstring1.data, emptyBstring.slen, BSTR_ERR); + ret += test48_0 (&badBstring1, emptyBstring.data, badBstring1.slen, BSTR_ERR); + ret += test48_0 (&shortBstring, badBstring2.data, badBstring2.slen, BSTR_ERR); + ret += test48_0 (&badBstring2, shortBstring.data, badBstring2.slen, BSTR_ERR); + + /* normal operation tests on all sorts of subranges */ + ret += test48_0 (&emptyBstring, emptyBstring.data, emptyBstring.slen, 1); + ret += test48_0 (&shortBstring, t0.data, t0.slen, 1); + ret += test48_0 (&shortBstring, t1.data, t1.slen, 0); + ret += test48_0 (&shortBstring, t2.data, t2.slen, 0); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +struct emuFile { + int ofs; + bstring contents; +}; + +static int test38_aux_bNgetc (struct emuFile * f) { +int v = EOF; + if (NULL != f && EOF != (v = bchare (f->contents, f->ofs, EOF))) f->ofs++; + return v; +} + +static size_t test38_aux_bNread (void *buff, size_t elsize, size_t nelem, struct emuFile * f) { +char * b = (char *) buff; +int v; +size_t i, j, c = 0; + + if (NULL == f || NULL == b) return c; + for (i=0; i < nelem; i++) for (j=0; j < elsize; j++) { + v = test38_aux_bNgetc (f); + if (EOF == v) { + *b = (char) '\0'; + return c; + } else { + *b = (char) v; + b++; + c++; + } + } + + return c; +} + +static int test38_aux_bNopen (struct emuFile * f, bstring b) { + if (NULL == f || NULL == b) return -__LINE__; + f->ofs = 0; + f->contents = b; + return 0; +} + +static int test38 (void) { +struct emuFile f; +bstring b0, b1, b2, b3; +int ret = 0; + + printf ("TEST: bgets/breads test\n"); + + test38_aux_bNopen (&f, &shortBstring); + + /* Creation/reads */ + + b0 = bgets ((bNgetc) test38_aux_bNgetc, &f, (char) 'b'); + b1 = bread ((bNread) test38_aux_bNread, &f); + b2 = bgets ((bNgetc) test38_aux_bNgetc, &f, (char) '\0'); + b3 = bread ((bNread) test38_aux_bNread, &f); + + ret += 1 != biseqcstr (b0, "b"); + ret += 1 != biseqcstr (b1, "ogus"); + ret += NULL != b2; + ret += 1 != biseqcstr (b3, ""); + + /* Bogus accumulations */ + + f.ofs = 0; + + ret += 0 <= bgetsa (NULL, (bNgetc) test38_aux_bNgetc, &f, (char) 'o'); + ret += 0 <= breada (NULL, (bNread) test38_aux_bNread, &f); + ret += 0 <= bgetsa (&shortBstring, (bNgetc) test38_aux_bNgetc, &f, (char) 'o'); + ret += 0 <= breada (&shortBstring, (bNread) test38_aux_bNread, &f); + + /* Normal accumulations */ + + ret += 0 > bgetsa (b0, (bNgetc) test38_aux_bNgetc, &f, (char) 'o'); + ret += 0 > breada (b1, (bNread) test38_aux_bNread, &f); + + ret += 1 != biseqcstr (b0, "bbo"); + ret += 1 != biseqcstr (b1, "ogusgus"); + + /* Attempt to append past end should do nothing */ + + ret += 0 > bgetsa (b0, (bNgetc) test38_aux_bNgetc, &f, (char) 'o'); + ret += 0 > breada (b1, (bNread) test38_aux_bNread, &f); + + ret += 1 != biseqcstr (b0, "bbo"); + ret += 1 != biseqcstr (b1, "ogusgus"); + + bdestroy (b0); + bdestroy (b1); + bdestroy (b2); + bdestroy (b3); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test39_0 (const_bstring b, const_bstring lt, const_bstring rt, const_bstring t) { +bstring r; +int ret = 0; + + ret += 0 <= bltrimws (NULL); + ret += 0 <= brtrimws (NULL); + ret += 0 <= btrimws (NULL); + + r = bstrcpy (b); + bwriteprotect (*r); + ret += 0 <= bltrimws (r); + ret += 0 <= brtrimws (r); + ret += 0 <= btrimws (r); + bwriteallow(*r); + ret += 0 != bltrimws (r); + printf (".\tbltrim (%s) = %s\n", dumpBstring (b), dumpBstring (r)); + ret += !biseq (r, lt); + bdestroy (r); + + r = bstrcpy (b); + ret += 0 != brtrimws (r); + printf (".\tbrtrim (%s) = %s\n", dumpBstring (b), dumpBstring (r)); + ret += !biseq (r, rt); + bdestroy (r); + + r = bstrcpy (b); + ret += 0 != btrimws (r); + printf (".\tbtrim (%s) = %s\n", dumpBstring (b), dumpBstring (r)); + ret += !biseq (r, t); + bdestroy (r); + + return ret; +} + +static int test39 (void) { +int ret = 0; +struct tagbstring t0 = bsStatic (" bogus string "); +struct tagbstring t1 = bsStatic ("bogus string "); +struct tagbstring t2 = bsStatic (" bogus string"); +struct tagbstring t3 = bsStatic ("bogus string"); +struct tagbstring t4 = bsStatic (" "); +struct tagbstring t5 = bsStatic (""); + + printf ("TEST: trim functions\n"); + + ret += test39_0 (&t0, &t1, &t2, &t3); + ret += test39_0 (&t1, &t1, &t3, &t3); + ret += test39_0 (&t2, &t3, &t2, &t3); + ret += test39_0 (&t3, &t3, &t3, &t3); + ret += test39_0 (&t4, &t5, &t5, &t5); + ret += test39_0 (&t5, &t5, &t5, &t5); + + if (ret) printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test40_0 (bstring b0, const_bstring b1, int left, int len, const char * res) { +bstring b2; +int rv, ret = 0; + + if (b0 != NULL && b0->data != NULL && b0->slen >= 0 && + b1 != NULL && b1->data != NULL && b1->slen >= 0) { + b2 = bstrcpy (b0); + bwriteprotect (*b2); + + printf (".\tbassignmidstr (%s, ", dumpBstring (b2)); + + rv = bassignmidstr (b2, b1, left, len); + ret += (rv == 0); + if (!biseq (b0, b2)) ret++; + + printf ("%s, %d, %d) = %s\n", dumpBstring (b1), left, len, dumpBstring (b2)); + + bwriteallow (*b2); + + printf (".\tbassignmidstr (%s, ", dumpBstring (b2)); + + rv = bassignmidstr (b2, b1, left, len); + + printf ("%s, %d, %d) = %s\n", dumpBstring (b1), left, len, dumpBstring (b2)); + + if (b1) ret += (b2->slen > len) | (b2->slen < 0); + ret += ((0 != rv) && (b1 != NULL)) || ((0 == rv) && (b1 == NULL)); + ret += (res == NULL) || ((int) strlen (res) != b2->slen) + || (0 != memcmp (b2->data, res, b2->slen)); + ret += b2->data[b2->slen] != '\0'; + bdestroy (b2); + } else { + ret += (BSTR_ERR != (rv = bassignmidstr (b0, b1, left, len))); + printf (".\tbassignmidstr (%s, %s, %d, %d) = %d\n", dumpBstring (b0), dumpBstring (b1), left, len, rv); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test40 (void) { +int ret = 0; + + printf ("TEST: int bassignmidstr (bstring b0, const_bstring b1, int left, int len);\n"); + + /* tests with NULL */ + ret += test40_0 (NULL, NULL, 0, 1, NULL); + ret += test40_0 (NULL, &emptyBstring, 0, 1, NULL); + ret += test40_0 (&emptyBstring, NULL, 0, 1, ""); + ret += test40_0 (&badBstring1, &emptyBstring, 0, 1, NULL); + ret += test40_0 (&badBstring2, &emptyBstring, 0, 1, NULL); + ret += test40_0 (&emptyBstring, &badBstring1, 0, 1, NULL); + ret += test40_0 (&emptyBstring, &badBstring2, 0, 1, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test40_0 (&emptyBstring, &emptyBstring, 0, 1, ""); + ret += test40_0 (&emptyBstring, &shortBstring, 1, 3, "ogu"); + ret += test40_0 (&shortBstring, &emptyBstring, 0, 1, ""); + ret += test40_0 (&shortBstring, &shortBstring, 1, 3, "ogu"); + ret += test40_0 (&shortBstring, &shortBstring, -1, 4, "bog"); + ret += test40_0 (&shortBstring, &shortBstring, 1, 9, "ogus"); + ret += test40_0 (&shortBstring, &shortBstring, 9, 1, ""); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test41_0 (bstring b1, int left, int len, const char * res) { +struct tagbstring t; +bstring b2, b3; +int ret = 0; + + if (b1 != NULL && b1->data != NULL && b1->slen >= 0) { + b2 = bfromcstr (""); + + bassignmidstr (b2, b1, left, len); + + bmid2tbstr (t, b1, left, len); + b3 = bstrcpy (&t); + + printf (".\tbmid2tbstr (%s, %d, %d) = %s\n", dumpBstring (b1), left, len, dumpBstring (b3)); + + ret += !biseq (&t, b2); + + bdestroy (b2); + bdestroy (b3); + } else { + bmid2tbstr (t, b1, left, len); + b3 = bstrcpy (&t); + ret += t.slen != 0; + + printf (".\tbmid2tbstr (%s, %d, %d) = %s\n", dumpBstring (b1), left, len, dumpBstring (b3)); + bdestroy (b3); + } + + if (ret) { + printf ("\t\tfailure(%d) = %d (res = %p", __LINE__, ret, res); + if (res) printf (" = \"%s\"", res); + printf (")\n"); + } + return ret; +} + +static int test41 (void) { +int ret = 0; + + printf ("TEST: int bmid2tbstr (struct tagbstring &t, const_bstring b1, int left, int len);\n"); + + /* tests with NULL */ + ret += test41_0 (NULL, 0, 1, NULL); + ret += test41_0 (&emptyBstring, 0, 1, NULL); + ret += test41_0 (NULL, 0, 1, ""); + ret += test41_0 (&emptyBstring, 0, 1, NULL); + ret += test41_0 (&emptyBstring, 0, 1, NULL); + ret += test41_0 (&badBstring1, 0, 1, NULL); + ret += test41_0 (&badBstring2, 0, 1, NULL); + + /* normal operation tests on all sorts of subranges */ + ret += test41_0 (&emptyBstring, 0, 1, ""); + ret += test41_0 (&shortBstring, 1, 3, "ogu"); + ret += test41_0 (&emptyBstring, 0, 1, ""); + ret += test41_0 (&shortBstring, 1, 3, "ogu"); + ret += test41_0 (&shortBstring, -1, 4, "bog"); + ret += test41_0 (&shortBstring, 1, 9, "ogus"); + ret += test41_0 (&shortBstring, 9, 1, ""); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test42_0 (const_bstring bi, int len, const char * res) { +bstring b; +int rv, ret = 0; + + rv = btrunc (b = bstrcpy (bi), len); + ret += (len >= 0) ? (rv < 0) : (rv >= 0); + if (res) ret += (0 == biseqcstr (b, res)); + printf (".\tbtrunc (%s, %d) = %s\n", dumpBstring (bi), len, dumpBstring (b)); + bdestroy (b); + return ret; +} + +static int test42 (void) { +int ret = 0; + + printf ("TEST: int btrunc (bstring b, int n);\n"); + + /* tests with NULL */ + ret += 0 <= btrunc (NULL, 2); + ret += 0 <= btrunc (NULL, 0); + ret += 0 <= btrunc (NULL, -1); + + /* write protected */ + ret += 0 <= btrunc (&shortBstring, 2); + ret += 0 <= btrunc (&shortBstring, 0); + ret += 0 <= btrunc (&shortBstring, -1); + + ret += test42_0 (&emptyBstring, 10, ""); + ret += test42_0 (&emptyBstring, 0, ""); + ret += test42_0 (&emptyBstring, -1, NULL); + + ret += test42_0 (&shortBstring, 10, "bogus"); + ret += test42_0 (&shortBstring, 3, "bog"); + ret += test42_0 (&shortBstring, 0, ""); + ret += test42_0 (&shortBstring, -1, NULL); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test43 (void) { +static struct tagbstring ts0 = bsStatic (""); +static struct tagbstring ts1 = bsStatic (" "); +static struct tagbstring ts2 = bsStatic (" abc"); +static struct tagbstring ts3 = bsStatic ("abc "); +static struct tagbstring ts4 = bsStatic (" abc "); +static struct tagbstring ts5 = bsStatic ("abc"); +bstring tstrs[6] = { &ts0, &ts1, &ts2, &ts3, &ts4, &ts5 }; +int ret = 0; +int i; + + printf ("TEST: int btfromblk*trim (struct tagbstring t, void * s, int l);\n"); + + for (i=0; i < 6; i++) { + struct tagbstring t; + bstring b; + + btfromblkltrimws (t, tstrs[i]->data, tstrs[i]->slen); + bltrimws (b = bstrcpy (tstrs[i])); + if (!biseq (b, &t)) { + ret++; + bassign (b, &t); + printf ("btfromblkltrimws failure: <%s> -> <%s>\n", tstrs[i]->data, b->data); + } + printf (".\tbtfromblkltrimws (\"%s\", \"%s\", %d)\n", (char *) bdatae (b, NULL), tstrs[i]->data, tstrs[i]->slen); + bdestroy (b); + + btfromblkrtrimws (t, tstrs[i]->data, tstrs[i]->slen); + brtrimws (b = bstrcpy (tstrs[i])); + if (!biseq (b, &t)) { + ret++; + bassign (b, &t); + printf ("btfromblkrtrimws failure: <%s> -> <%s>\n", tstrs[i]->data, b->data); + } + printf (".\tbtfromblkrtrimws (\"%s\", \"%s\", %d)\n", (char *) bdatae (b, NULL), tstrs[i]->data, tstrs[i]->slen); + bdestroy (b); + + btfromblktrimws (t, tstrs[i]->data, tstrs[i]->slen); + btrimws (b = bstrcpy (tstrs[i])); + if (!biseq (b, &t)) { + ret++; + bassign (b, &t); + printf ("btfromblktrimws failure: <%s> -> <%s>\n", tstrs[i]->data, b->data); + } + printf (".\tbtfromblktrimws (\"%s\", \"%s\", %d)\n", (char *) bdatae (b, NULL), tstrs[i]->data, tstrs[i]->slen); + bdestroy (b); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test44_0 (const char * str) { +int ret = 0, v; +bstring b; + if (NULL == str) { + ret += 0 <= bassigncstr (NULL, "test"); + printf (".\tbassigncstr (b = %s, NULL)", dumpBstring (b = bfromcstr (""))); + ret += 0 <= (v = bassigncstr (b, NULL)); + printf (" = %d; b -> %s\n", v, dumpBstring (b)); + ret += 0 <= bassigncstr (&shortBstring, NULL); + bdestroy (b); + return ret; + } + + ret += 0 <= bassigncstr (NULL, str); + printf (".\tbassigncstr (b = %s, \"%s\")", dumpBstring (b = bfromcstr ("")), str); + ret += 0 > (v = bassigncstr (b, str)); + printf (" = %d; b -> %s\n", v, dumpBstring (b)); + ret += 0 != strcmp (bdatae (b, ""), str); + ret += ((size_t) b->slen) != strlen (str); + ret += 0 > bassigncstr (b, "xxxxx"); + bwriteprotect(*b) + printf (".\tbassigncstr (b = %s, \"%s\")", dumpBstring (b), str); + ret += 0 <= (v = bassigncstr (b, str)); + printf (" = %d; b -> %s\n", v, dumpBstring (b)); + ret += 0 != strcmp (bdatae (b, ""), "xxxxx"); + ret += ((size_t) b->slen) != strlen ("xxxxx"); + bwriteallow(*b) + ret += 0 <= bassigncstr (&shortBstring, str); + bdestroy (b); + printf (".\tbassigncstr (a = %s, \"%s\")", dumpBstring (&shortBstring), str); + ret += 0 <= (v = bassigncstr (&shortBstring, str)); + printf (" = %d; a -> %s\n", v, dumpBstring (&shortBstring)); + return ret; +} + +static int test44 (void) { +int ret = 0; + + printf ("TEST: int bassigncstr (bstring a, char * str);\n"); + + /* tests with NULL */ + ret += test44_0 (NULL); + + ret += test44_0 (EMPTY_STRING); + ret += test44_0 (SHORT_STRING); + ret += test44_0 (LONG_STRING); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test45_0 (const char * str) { +int ret = 0, v, len; +bstring b; + if (NULL == str) { + ret += 0 <= bassignblk (NULL, "test", 4); + printf (".\tbassignblk (b = %s, NULL, 1)", dumpBstring (b = bfromcstr (""))); + ret += 0 <= (v = bassignblk (b, NULL, 1)); + printf (" = %d; b -> %s\n", v, dumpBstring (b)); + ret += 0 <= bassignblk (&shortBstring, NULL, 1); + bdestroy (b); + return ret; + } + + len = (int) strlen (str); + ret += 0 <= bassignblk (NULL, str, len); + printf (".\tbassignblk (b = %s, \"%s\", %d)", dumpBstring (b = bfromcstr ("")), str, len); + ret += 0 > (v = bassignblk (b, str, len)); + printf (" = %d; b -> %s\n", v, dumpBstring (b)); + ret += 0 != strcmp (bdatae (b, ""), str); + ret += b->slen != len; + ret += 0 > bassigncstr (b, "xxxxx"); + bwriteprotect(*b) + printf (".\tbassignblk (b = %s, \"%s\", %d)", dumpBstring (b), str, len); + ret += 0 <= (v = bassignblk (b, str, len)); + printf (" = %d; b -> %s\n", v, dumpBstring (b)); + ret += 0 != strcmp (bdatae (b, ""), "xxxxx"); + ret += ((size_t) b->slen) != strlen ("xxxxx"); + bwriteallow(*b) + ret += 0 <= bassignblk (&shortBstring, str, len); + bdestroy (b); + printf (".\tbassignblk (a = %s, \"%s\", %d)", dumpBstring (&shortBstring), str, len); + ret += 0 <= (v = bassignblk (&shortBstring, str, len)); + printf (" = %d; a -> %s\n", v, dumpBstring (&shortBstring)); + return ret; +} + +static int test45 (void) { +int ret = 0; + + printf ("TEST: int bassignblk (bstring a, const void * s, int len);\n"); + + /* tests with NULL */ + ret += test45_0 (NULL); + + ret += test45_0 (EMPTY_STRING); + ret += test45_0 (SHORT_STRING); + ret += test45_0 (LONG_STRING); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test46_0 (const_bstring r, bstring b, int count, const char * fmt, ...) { +int ret; +va_list arglist; + + printf (".\tbvcformata (%s, %d, \"%s\", ...) -> ", dumpBstring (b), count, fmt); + va_start (arglist, fmt); + ret = bvcformata (b, count, fmt, arglist); + va_end (arglist); + printf ("%d, %s (%s)\n", ret, dumpBstring (b), dumpBstring (r)); + if (ret < 0) return (NULL != r); + ret += 1 != biseq (r, b); + if (0 != ret) printf ("\t->failed\n"); + return ret; +} + +static int test46_1 (bstring b, const char * fmt, const_bstring r, ...) { +int ret; + + printf (".\tbvformata (&, %s, \"%s\", ...) -> ", dumpBstring (b), fmt); + bvformata (ret, b, fmt, r); + printf ("%d, %s (%s)\n", ret, dumpBstring (b), dumpBstring (r)); + if (ret < 0) return (NULL != r); + ret += 1 != biseq (r, b); + if (0 != ret) printf ("\t->failed\n"); + return ret; +} + +static int test46 (void) { +bstring b, b2; +int ret = 0; + + printf ("TEST: int bvcformata (bstring b, int count, const char * fmt, va_list arg);\n"); + + ret += test46_0 (NULL, NULL, 8, "[%d]", 15); + ret += test46_0 (NULL, &shortBstring, 8, "[%d]", 15); + ret += test46_0 (NULL, &badBstring1, 8, "[%d]", 15); + ret += test46_0 (NULL, &badBstring2, 8, "[%d]", 15); + ret += test46_0 (NULL, &badBstring3, 8, "[%d]", 15); + + b = bfromcstr (""); + ret += test46_0 (&shortBstring, b, shortBstring.slen, "%s", (char *) shortBstring.data); + b->slen = 0; + ret += test46_0 (&shortBstring, b, shortBstring.slen + 1, "%s", (char *) shortBstring.data); + b->slen = 0; + ret += test46_0 (NULL, b, shortBstring.slen-1, "%s", (char *) shortBstring.data); + + printf ("TEST: bvformata (int &ret, bstring b, const char * fmt, lastarg);\n"); + + ret += test46_1 (NULL, "[%d]", NULL, 15); + ret += test46_1 (&shortBstring, "[%d]", NULL, 15); + ret += test46_1 (&badBstring1, "[%d]", NULL, 15); + ret += test46_1 (&badBstring2, "[%d]", NULL, 15); + ret += test46_1 (&badBstring3, "[%d]", NULL, 15); + + b->slen = 0; + ret += test46_1 (b, "%s", &shortBstring, (char *) shortBstring.data); + + b->slen = 0; + ret += test46_1 (b, "%s", &longBstring, (char *) longBstring.data); + + b->slen = 0; + b2 = bfromcstr (EIGHT_CHAR_STRING); + bconcat (b2, b2); + bconcat (b2, b2); + bconcat (b2, b2); + ret += test46_1 (b, "%s%s%s%s%s%s%s%s", b2, + EIGHT_CHAR_STRING, EIGHT_CHAR_STRING, EIGHT_CHAR_STRING, EIGHT_CHAR_STRING, + EIGHT_CHAR_STRING, EIGHT_CHAR_STRING, EIGHT_CHAR_STRING, EIGHT_CHAR_STRING); + bdestroy (b2); + + bdestroy (b); + printf ("\t# failures: %d\n", ret); + return ret; +} + +int main (int argc, char * argv[]) { +int ret = 0; + + argc = argc; + argv = argv; + + printf ("Direct case testing of bstring core functions\n"); + + ret += test0 (); + ret += test1 (); + ret += test2 (); + ret += test3 (); + ret += test4 (); + ret += test5 (); + ret += test6 (); + ret += test7 (); + ret += test8 (); + ret += test9 (); + ret += test10 (); + ret += test11 (); + ret += test12 (); + ret += test13 (); + ret += test14 (); + ret += test15 (); + ret += test16 (); + ret += test17 (); + ret += test18 (); + ret += test19 (); + ret += test20 (); + ret += test21 (); + ret += test22 (); + ret += test23 (); + ret += test24 (); + ret += test25 (); + ret += test26 (); + ret += test27 (); + ret += test28 (); + ret += test29 (); + ret += test30 (); + ret += test31 (); + ret += test32 (); + ret += test33 (); + ret += test34 (); + ret += test35 (); + ret += test36 (); + ret += test37 (); + ret += test38 (); + ret += test39 (); + ret += test40 (); + ret += test41 (); + ret += test42 (); + ret += test43 (); + ret += test44 (); + ret += test45 (); + ret += test46 (); + ret += test47 (); + ret += test48 (); + + printf ("# test failures: %d\n", ret); + + return 0; +} diff --git a/bstrlib/bstraux.c b/bstrlib/bstraux.c new file mode 100644 index 0000000..ac97836 --- /dev/null +++ b/bstrlib/bstraux.c @@ -0,0 +1,1161 @@ + +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * bstraux.c + * + * This file is not necessarily part of the core bstring library itself, but + * is just an auxilliary module which includes miscellaneous or trivial + * functions. + */ + +#if defined (_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include +#include "bstrlib.h" +#include "bstraux.h" + +#ifndef UNUSED +#define UNUSED(x) (void)(x) +#endif + +/* bstring bTail (bstring b, int n) + * + * Return with a string of the last n characters of b. + */ +bstring bTail (bstring b, int n) { + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy (b); + return bmidstr (b, b->slen - n, n); +} + +/* bstring bHead (bstring b, int n) + * + * Return with a string of the first n characters of b. + */ +bstring bHead (bstring b, int n) { + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy (b); + return bmidstr (b, 0, n); +} + +/* int bFill (bstring a, char c, int len) + * + * Fill a given bstring with the character in parameter c, for a length n. + */ +int bFill (bstring b, char c, int len) { + if (b == NULL || len < 0 || (b->mlen < b->slen && b->mlen > 0)) return -__LINE__; + b->slen = 0; + return bsetstr (b, len, NULL, c); +} + +/* int bReplicate (bstring b, int n) + * + * Replicate the contents of b end to end n times and replace it in b. + */ +int bReplicate (bstring b, int n) { + return bpattern (b, n * b->slen); +} + +/* int bReverse (bstring b) + * + * Reverse the contents of b in place. + */ +int bReverse (bstring b) { +int i, n, m; +unsigned char t; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + n = b->slen; + if (2 <= n) { + m = ((unsigned)n) >> 1; + n--; + for (i=0; i < m; i++) { + t = b->data[n - i]; + b->data[n - i] = b->data[i]; + b->data[i] = t; + } + } + return 0; +} + +/* int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) + * + * Insert a repeated sequence of a given character into the string at + * position pos for a length len. + */ +int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) { + if (b == NULL || b->slen < 0 || b->mlen < b->slen || pos < 0 || len <= 0) return -__LINE__; + + if (pos > b->slen + && 0 > bsetstr (b, pos, NULL, fill)) return -__LINE__; + + if (0 > balloc (b, b->slen + len)) return -__LINE__; + if (pos < b->slen) memmove (b->data + pos + len, b->data + pos, b->slen - pos); + memset (b->data + pos, c, len); + b->slen += len; + b->data[b->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bJustifyLeft (bstring b, int space) + * + * Left justify a string. + */ +int bJustifyLeft (bstring b, int space) { +int j, i, s, t; +unsigned char c = (unsigned char) space; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + if (space != (int) c) return BSTR_OK; + + for (s=j=i=0; i < b->slen; i++) { + t = s; + s = c != (b->data[j] = b->data[i]); + j += (t|s); + } + if (j > 0 && b->data[j-1] == c) j--; + + b->data[j] = (unsigned char) '\0'; + b->slen = j; + return BSTR_OK; +} + +/* int bJustifyRight (bstring b, int width, int space) + * + * Right justify a string to within a given width. + */ +int bJustifyRight (bstring b, int width, int space) { +int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft (b, space))) return ret; + if (b->slen <= width) + return bInsertChrs (b, 0, width - b->slen, (unsigned char) space, (unsigned char) space); + return BSTR_OK; +} + +/* int bJustifyCenter (bstring b, int width, int space) + * + * Center a string's non-white space characters to within a given width by + * inserting whitespaces at the beginning. + */ +int bJustifyCenter (bstring b, int width, int space) { +int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft (b, space))) return ret; + if (b->slen <= width) + return bInsertChrs (b, 0, (width - b->slen + 1) >> 1, (unsigned char) space, (unsigned char) space); + return BSTR_OK; +} + +/* int bJustifyMargin (bstring b, int width, int space) + * + * Stretch a string to flush against left and right margins by evenly + * distributing additional white space between words. If the line is too + * long to be margin justified, it is left justified. + */ +int bJustifyMargin (bstring b, int width, int space) { +struct bstrList * sl; +int i, l, c; + + if (b == NULL || b->slen < 0 || b->mlen == 0 || b->mlen < b->slen) return -__LINE__; + if (NULL == (sl = bsplit (b, (unsigned char) space))) return -__LINE__; + for (l=c=i=0; i < sl->qty; i++) { + if (sl->entry[i]->slen > 0) { + c ++; + l += sl->entry[i]->slen; + } + } + + if (l + c >= width || c < 2) { + bstrListDestroy (sl); + return bJustifyLeft (b, space); + } + + b->slen = 0; + for (i=0; i < sl->qty; i++) { + if (sl->entry[i]->slen > 0) { + if (b->slen > 0) { + int s = (width - l + (c / 2)) / c; + bInsertChrs (b, b->slen, s, (unsigned char) space, (unsigned char) space); + l += s; + } + bconcat (b, sl->entry[i]); + c--; + if (c <= 0) break; + } + } + + bstrListDestroy (sl); + return BSTR_OK; +} + +static size_t readNothing (void *buff, size_t elsize, size_t nelem, void *parm) { + UNUSED(buff); + UNUSED(elsize); + UNUSED(nelem); + UNUSED(parm); + return 0; /* Immediately indicate EOF. */ +} + +/* struct bStream * bsFromBstr (const_bstring b); + * + * Create a bStream whose contents are a copy of the bstring passed in. + * This allows the use of all the bStream APIs with bstrings. + */ +struct bStream * bsFromBstr (const_bstring b) { +struct bStream * s = bsopen ((bNread) readNothing, NULL); + bsunread (s, b); /* Push the bstring data into the empty bStream. */ + return s; +} + +static size_t readRef (void *buff, size_t elsize, size_t nelem, void *parm) { +struct tagbstring * t = (struct tagbstring *) parm; +size_t tsz = elsize * nelem; + + if (tsz > (size_t) t->slen) tsz = (size_t) t->slen; + if (tsz > 0) { + memcpy (buff, t->data, tsz); + t->slen -= (int) tsz; + t->data += tsz; + return tsz / elsize; + } + return 0; +} + +/* The "by reference" version of the above function. This function puts + * a number of restrictions on the call site (the passed in struct + * tagbstring *will* be modified by this function, and the source data + * must remain alive and constant for the lifetime of the bStream). + * Hence it is not presented as an extern. + */ +static struct bStream * bsFromBstrRef (struct tagbstring * t) { + if (!t) return NULL; + return bsopen ((bNread) readRef, t); +} + +/* char * bStr2NetStr (const_bstring b) + * + * Convert a bstring to a netstring. See + * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. + * Note: 1) The value returned should be freed with a call to bcstrfree() at + * the point when it will no longer be referenced to avoid a memory + * leak. + * 2) If the returned value is non-NULL, then it also '\0' terminated + * in the character position one past the "," terminator. + */ +char * bStr2NetStr (const_bstring b) { +char strnum[sizeof (b->slen) * 3 + 1]; +bstring s; +unsigned char * buff; + + if (b == NULL || b->data == NULL || b->slen < 0) return NULL; + sprintf (strnum, "%d:", b->slen); + if (NULL == (s = bfromcstr (strnum)) + || bconcat (s, b) == BSTR_ERR || bconchar (s, (char) ',') == BSTR_ERR) { + bdestroy (s); + return NULL; + } + buff = s->data; + bcstrfree ((char *) s); + return (char *) buff; +} + +/* bstring bNetStr2Bstr (const char * buf) + * + * Convert a netstring to a bstring. See + * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. + * Note that the terminating "," *must* be present, however a following '\0' + * is *not* required. + */ +bstring bNetStr2Bstr (const char * buff) { +int i, x; +bstring b; + if (buff == NULL) return NULL; + x = 0; + for (i=0; buff[i] != ':'; i++) { + unsigned int v = buff[i] - '0'; + if (v > 9 || x > ((INT_MAX - (signed int)v) / 10)) return NULL; + x = (x * 10) + v; + } + + /* This thing has to be properly terminated */ + if (buff[i + 1 + x] != ',') return NULL; + + if (NULL == (b = bfromcstr (""))) return NULL; + if (balloc (b, x + 1) != BSTR_OK) { + bdestroy (b); + return NULL; + } + memcpy (b->data, buff + i + 1, x); + b->data[x] = (unsigned char) '\0'; + b->slen = x; + return b; +} + +static char b64ETable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/* bstring bBase64Encode (const_bstring b) + * + * Generate a base64 encoding. See: RFC1341 + */ +bstring bBase64Encode (const_bstring b) { +int i, c0, c1, c2, c3; +bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + out = bfromcstr (""); + for (i=0; i + 2 < b->slen; i += 3) { + if (i && ((i % 57) == 0)) { + if (bconchar (out, (char) '\015') < 0 || bconchar (out, (char) '\012') < 0) { + bdestroy (out); + return NULL; + } + } + c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i+1] >> 4)) & 0x3F; + c2 = ((b->data[i+1] << 2) | + (b->data[i+2] >> 6)) & 0x3F; + c3 = b->data[i+2] & 0x3F; + if (bconchar (out, b64ETable[c0]) < 0 || + bconchar (out, b64ETable[c1]) < 0 || + bconchar (out, b64ETable[c2]) < 0 || + bconchar (out, b64ETable[c3]) < 0) { + bdestroy (out); + return NULL; + } + } + + if (i && ((i % 57) == 0)) { + if (bconchar (out, (char) '\015') < 0 || bconchar (out, (char) '\012') < 0) { + bdestroy (out); + return NULL; + } + } + + switch (i + 2 - b->slen) { + case 0: c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i+1] >> 4)) & 0x3F; + c2 = (b->data[i+1] << 2) & 0x3F; + if (bconchar (out, b64ETable[c0]) < 0 || + bconchar (out, b64ETable[c1]) < 0 || + bconchar (out, b64ETable[c2]) < 0 || + bconchar (out, (char) '=') < 0) { + bdestroy (out); + return NULL; + } + break; + case 1: c0 = b->data[i] >> 2; + c1 = (b->data[i] << 4) & 0x3F; + if (bconchar (out, b64ETable[c0]) < 0 || + bconchar (out, b64ETable[c1]) < 0 || + bconchar (out, (char) '=') < 0 || + bconchar (out, (char) '=') < 0) { + bdestroy (out); + return NULL; + } + break; + case 2: break; + } + + return out; +} + +#define B64_PAD (-2) +#define B64_ERR (-1) + +static int base64DecodeSymbol (unsigned char alpha) { + if ((alpha >= 'A') && (alpha <= 'Z')) return (int)(alpha - 'A'); + else if ((alpha >= 'a') && (alpha <= 'z')) + return 26 + (int)(alpha - 'a'); + else if ((alpha >= '0') && (alpha <= '9')) + return 52 + (int)(alpha - '0'); + else if (alpha == '+') return 62; + else if (alpha == '/') return 63; + else if (alpha == '=') return B64_PAD; + else return B64_ERR; +} + +/* bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) + * + * Decode a base64 block of data. All MIME headers are assumed to have been + * removed. See: RFC1341 + */ +bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) { +int i, v; +unsigned char c0, c1, c2; +bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + if (boolTruncError) *boolTruncError = 0; + out = bfromcstr (""); + i = 0; + for (;;) { + do { + if (i >= b->slen) return out; + if (b->data[i] == '=') { /* Bad "too early" truncation */ + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c0 = (unsigned char) (v << 2); + do { + if (i >= b->slen || b->data[i] == '=') { /* Bad "too early" truncation */ + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c0 |= (unsigned char) (v >> 4); + c1 = (unsigned char) (v << 4); + do { + if (i >= b->slen) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + if (b->data[i] == '=') { + i++; + if (i >= b->slen || b->data[i] != '=' || bconchar (out, c0) < 0) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); /* Missing "=" at the end. */ + return NULL; + } + return out; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c1 |= (unsigned char) (v >> 2); + c2 = (unsigned char) (v << 6); + do { + if (i >= b->slen) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + if (b->data[i] == '=') { + if (bconchar (out, c0) < 0 || bconchar (out, c1) < 0) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + if (boolTruncError) *boolTruncError = 0; + return out; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c2 |= (unsigned char) (v); + if (bconchar (out, c0) < 0 || + bconchar (out, c1) < 0 || + bconchar (out, c2) < 0) { + if (boolTruncError) { + *boolTruncError = -1; + return out; + } + bdestroy (out); + return NULL; + } + } +} + +#define UU_DECODE_BYTE(b) (((b) == (signed int)'`') ? 0 : (b) - (signed int)' ') + +struct bUuInOut { + bstring src, dst; + int * badlines; +}; + +#define UU_MAX_LINELEN 45 + +static int bUuDecLine (void * parm, int ofs, int len) { +struct bUuInOut * io = (struct bUuInOut *) parm; +bstring s = io->src; +bstring t = io->dst; +int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; + + if (len == 0) return 0; + llen = UU_DECODE_BYTE (s->data[ofs]); + ret = 0; + + otlen = t->slen; + + if (((unsigned) llen) > UU_MAX_LINELEN) { ret = -__LINE__; + goto bl; + } + + llen += t->slen; + + for (i=1; i < s->slen && t->slen < llen;i += 4) { + unsigned char outoctet[3]; + c0 = UU_DECODE_BYTE (d0 = (int) bchare (s, i+ofs+0, ' ' - 1)); + c1 = UU_DECODE_BYTE (d1 = (int) bchare (s, i+ofs+1, ' ' - 1)); + c2 = UU_DECODE_BYTE (d2 = (int) bchare (s, i+ofs+2, ' ' - 1)); + c3 = UU_DECODE_BYTE (d3 = (int) bchare (s, i+ofs+3, ' ' - 1)); + + if (((unsigned) (c0|c1) >= 0x40)) { if (!ret) ret = -__LINE__; + if (d0 > 0x60 || (d0 < (' ' - 1) && !isspace (d0)) || + d1 > 0x60 || (d1 < (' ' - 1) && !isspace (d1))) { + t->slen = otlen; + goto bl; + } + c0 = c1 = 0; + } + outoctet[0] = (unsigned char) ((c0 << 2) | ((unsigned) c1 >> 4)); + if (t->slen+1 >= llen) { + if (0 > bconchar (t, (char) outoctet[0])) return -__LINE__; + break; + } + if ((unsigned) c2 >= 0x40) { if (!ret) ret = -__LINE__; + if (d2 > 0x60 || (d2 < (' ' - 1) && !isspace (d2))) { + t->slen = otlen; + goto bl; + } + c2 = 0; + } + outoctet[1] = (unsigned char) ((c1 << 4) | ((unsigned) c2 >> 2)); + if (t->slen+2 >= llen) { + if (0 > bcatblk (t, outoctet, 2)) return -__LINE__; + break; + } + if ((unsigned) c3 >= 0x40) { if (!ret) ret = -__LINE__; + if (d3 > 0x60 || (d3 < (' ' - 1) && !isspace (d3))) { + t->slen = otlen; + goto bl; + } + c3 = 0; + } + outoctet[2] = (unsigned char) ((c2 << 6) | ((unsigned) c3)); + if (0 > bcatblk (t, outoctet, 3)) return -__LINE__; + } + if (t->slen < llen) { if (0 == ret) ret = -__LINE__; + t->slen = otlen; + } + bl:; + if (ret && io->badlines) { + (*io->badlines)++; + return 0; + } + return ret; +} + +/* bstring bUuDecodeEx (const_bstring src, int * badlines) + * + * Performs a UUDecode of a block of data. If there are errors in the + * decoding, they are counted up and returned in "badlines", if badlines is + * not NULL. It is assumed that the "begin" and "end" lines have already + * been stripped off. The potential security problem of writing the + * filename in the begin line is something that is beyond the scope of a + * portable library. + */ + +#ifdef _MSC_VER +#pragma warning(disable:4204) +#endif + +bstring bUuDecodeEx (const_bstring src, int * badlines) { +struct tagbstring t; +struct bStream * s; +struct bStream * d; +bstring b; + + if (!src) return NULL; + t = *src; /* Short lifetime alias to header of src */ + s = bsFromBstrRef (&t); /* t is undefined after this */ + if (!s) return NULL; + d = bsUuDecode (s, badlines); + b = bfromcstralloc (256, ""); + if (NULL == b || 0 > bsread (b, d, INT_MAX)) { + bdestroy (b); + b = NULL; + } + bsclose (d); + bsclose (s); + return b; +} + +struct bsUuCtx { + struct bUuInOut io; + struct bStream * sInp; +}; + +static size_t bsUuDecodePart (void *buff, size_t elsize, size_t nelem, void *parm) { +static struct tagbstring eol = bsStatic ("\r\n"); +struct bsUuCtx * luuCtx = (struct bsUuCtx *) parm; +size_t tsz; +int l, lret; + + if (NULL == buff || NULL == parm) return 0; + tsz = elsize * nelem; + + CheckInternalBuffer:; + /* If internal buffer has sufficient data, just output it */ + if (((size_t) luuCtx->io.dst->slen) > tsz) { + memcpy (buff, luuCtx->io.dst->data, tsz); + bdelete (luuCtx->io.dst, 0, (int) tsz); + return nelem; + } + + DecodeMore:; + if (0 <= (l = binchr (luuCtx->io.src, 0, &eol))) { + int ol = 0; + struct tagbstring t; + bstring s = luuCtx->io.src; + luuCtx->io.src = &t; + + do { + if (l > ol) { + bmid2tbstr (t, s, ol, l - ol); + lret = bUuDecLine (&luuCtx->io, 0, t.slen); + if (0 > lret) { + luuCtx->io.src = s; + goto Done; + } + } + ol = l + 1; + if (((size_t) luuCtx->io.dst->slen) > tsz) break; + l = binchr (s, ol, &eol); + } while (BSTR_ERR != l); + bdelete (s, 0, ol); + luuCtx->io.src = s; + goto CheckInternalBuffer; + } + + if (BSTR_ERR != bsreada (luuCtx->io.src, luuCtx->sInp, bsbufflength (luuCtx->sInp, BSTR_BS_BUFF_LENGTH_GET))) { + goto DecodeMore; + } + + bUuDecLine (&luuCtx->io, 0, luuCtx->io.src->slen); + + Done:; + /* Output any lingering data that has been translated */ + if (((size_t) luuCtx->io.dst->slen) > 0) { + if (((size_t) luuCtx->io.dst->slen) > tsz) goto CheckInternalBuffer; + memcpy (buff, luuCtx->io.dst->data, luuCtx->io.dst->slen); + tsz = luuCtx->io.dst->slen / elsize; + luuCtx->io.dst->slen = 0; + if (tsz > 0) return tsz; + } + + /* Deallocate once EOF becomes triggered */ + bdestroy (luuCtx->io.dst); + bdestroy (luuCtx->io.src); + free (luuCtx); + return 0; +} + +/* bStream * bsUuDecode (struct bStream * sInp, int * badlines) + * + * Creates a bStream which performs the UUDecode of an an input stream. If + * there are errors in the decoding, they are counted up and returned in + * "badlines", if badlines is not NULL. It is assumed that the "begin" and + * "end" lines have already been stripped off. The potential security + * problem of writing the filename in the begin line is something that is + * beyond the scope of a portable library. + */ + +struct bStream * bsUuDecode (struct bStream * sInp, int * badlines) { +struct bsUuCtx * luuCtx = (struct bsUuCtx *) malloc (sizeof (struct bsUuCtx)); +struct bStream * sOut; + + if (NULL == luuCtx) return NULL; + + luuCtx->io.src = bfromcstr (""); + luuCtx->io.dst = bfromcstr (""); + if (NULL == luuCtx->io.dst || NULL == luuCtx->io.src) { + CleanUpFailureToAllocate:; + bdestroy (luuCtx->io.dst); + bdestroy (luuCtx->io.src); + free (luuCtx); + return NULL; + } + luuCtx->io.badlines = badlines; + if (badlines) *badlines = 0; + + luuCtx->sInp = sInp; + + sOut = bsopen ((bNread) bsUuDecodePart, luuCtx); + if (NULL == sOut) goto CleanUpFailureToAllocate; + return sOut; +} + +#define UU_ENCODE_BYTE(b) (char) (((b) == 0) ? '`' : ((b) + ' ')) + +/* bstring bUuEncode (const_bstring src) + * + * Performs a UUEncode of a block of data. The "begin" and "end" lines are + * not appended. + */ +bstring bUuEncode (const_bstring src) { +bstring out; +int i, j, jm; +unsigned int c0, c1, c2; + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr ("")) == NULL) return NULL; + for (i=0; i < src->slen; i += UU_MAX_LINELEN) { + if ((jm = i + UU_MAX_LINELEN) > src->slen) jm = src->slen; + if (bconchar (out, UU_ENCODE_BYTE (jm - i)) < 0) { + bstrFree (out); + break; + } + for (j = i; j < jm; j += 3) { + c0 = (unsigned int) bchar (src, j ); + c1 = (unsigned int) bchar (src, j + 1); + c2 = (unsigned int) bchar (src, j + 2); + if (bconchar (out, UU_ENCODE_BYTE ( (c0 & 0xFC) >> 2)) < 0 || + bconchar (out, UU_ENCODE_BYTE (((c0 & 0x03) << 4) | ((c1 & 0xF0) >> 4))) < 0 || + bconchar (out, UU_ENCODE_BYTE (((c1 & 0x0F) << 2) | ((c2 & 0xC0) >> 6))) < 0 || + bconchar (out, UU_ENCODE_BYTE ( (c2 & 0x3F))) < 0) { + bstrFree (out); + goto End; + } + } + if (bconchar (out, (char) '\r') < 0 || bconchar (out, (char) '\n') < 0) { + bstrFree (out); + break; + } + } + End:; + return out; +} + +/* bstring bYEncode (const_bstring src) + * + * Performs a YEncode of a block of data. No header or tail info is + * appended. See: http://www.yenc.org/whatis.htm and + * http://www.yenc.org/yenc-draft.1.3.txt + */ +bstring bYEncode (const_bstring src) { +int i; +bstring out; +unsigned char c; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr ("")) == NULL) return NULL; + for (i=0; i < src->slen; i++) { + c = (unsigned char)(src->data[i] + 42); + if (c == '=' || c == '\0' || c == '\r' || c == '\n') { + if (0 > bconchar (out, (char) '=')) { + bdestroy (out); + return NULL; + } + c += (unsigned char) 64; + } + if (0 > bconchar (out, c)) { + bdestroy (out); + return NULL; + } + } + return out; +} + +/* bstring bYDecode (const_bstring src) + * + * Performs a YDecode of a block of data. See: + * http://www.yenc.org/whatis.htm and http://www.yenc.org/yenc-draft.1.3.txt + */ +#define MAX_OB_LEN (64) + +bstring bYDecode (const_bstring src) { +int i; +bstring out; +unsigned char c; +unsigned char octetbuff[MAX_OB_LEN]; +int obl; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr ("")) == NULL) return NULL; + + obl = 0; + + for (i=0; i < src->slen; i++) { + if ('=' == (c = src->data[i])) { /* The = escape mode */ + i++; + if (i >= src->slen) { + bdestroy (out); + return NULL; + } + c = (unsigned char) (src->data[i] - 64); + } else { + if ('\0' == c) { + bdestroy (out); + return NULL; + } + + /* Extraneous CR/LFs are to be ignored. */ + if (c == '\r' || c == '\n') continue; + } + + octetbuff[obl] = (unsigned char) ((int) c - 42); + obl++; + + if (obl >= MAX_OB_LEN) { + if (0 > bcatblk (out, octetbuff, obl)) { + bdestroy (out); + return NULL; + } + obl = 0; + } + } + + if (0 > bcatblk (out, octetbuff, obl)) { + bdestroy (out); + out = NULL; + } + return out; +} + +/* int bSGMLEncode (bstring b) + * + * Change the string into a version that is quotable in SGML (HTML, XML). + */ +int bSGMLEncode (bstring b) { +static struct tagbstring fr[4][2] = { + { bsStatic("&"), bsStatic("&") }, + { bsStatic("\""), bsStatic(""") }, + { bsStatic("<"), bsStatic("<") }, + { bsStatic(">"), bsStatic(">") } }; +int i; + for (i = 0; i < 4; i++) { + int ret = bfindreplace (b, &fr[i][0], &fr[i][1], 0); + if (0 > ret) return ret; + } + return 0; +} + +/* bstring bStrfTime (const char * fmt, const struct tm * timeptr) + * + * Takes a format string that is compatible with strftime and a struct tm + * pointer, formats the time according to the format string and outputs + * the bstring as a result. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +bstring bStrfTime (const char * fmt, const struct tm * timeptr) { +#if defined (__TURBOC__) && !defined (__BORLANDC__) +static struct tagbstring ns = bsStatic ("bStrfTime Not supported"); + fmt = fmt; + timeptr = timeptr; + return &ns; +#else +bstring buff; +int n; +size_t r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "strftime" call on increasing + potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < 16) n = 16; + buff = bfromcstralloc (n+2, ""); + + for (;;) { + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return NULL; + } + + r = strftime ((char *) buff->data, n + 1, fmt, timeptr); + + if (r > 0) { + buff->slen = (int) r; + break; + } + + n += n; + } + + return buff; +#endif +} + +/* int bSetCstrChar (bstring a, int pos, char c) + * + * Sets the character at position pos to the character c in the bstring a. + * If the character c is NUL ('\0') then the string is truncated at this + * point. Note: this does not enable any other '\0' character in the bstring + * as terminator indicator for the string. pos must be in the position + * between 0 and b->slen inclusive, otherwise BSTR_ERR will be returned. + */ +int bSetCstrChar (bstring b, int pos, char c) { + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) { + if ('\0' != c) return bconchar (b, c); + return 0; + } + + b->data[pos] = (unsigned char) c; + if ('\0' == c) b->slen = pos; + + return 0; +} + +/* int bSetChar (bstring b, int pos, char c) + * + * Sets the character at position pos to the character c in the bstring a. + * The string is not truncated if the character c is NUL ('\0'). pos must + * be in the position between 0 and b->slen inclusive, otherwise BSTR_ERR + * will be returned. + */ +int bSetChar (bstring b, int pos, char c) { + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) { + return bconchar (b, c); + } + + b->data[pos] = (unsigned char) c; + return 0; +} + +#define INIT_SECURE_INPUT_LENGTH (256) + +/* bstring bSecureInput (int maxlen, int termchar, + * bNgetc vgetchar, void * vgcCtx) + * + * Read input from an abstracted input interface, for a length of at most + * maxlen characters. If maxlen <= 0, then there is no length limit put + * on the input. The result is terminated early if vgetchar() return EOF + * or the user specified value termchar. + * + */ +bstring bSecureInput (int maxlen, int termchar, bNgetc vgetchar, void * vgcCtx) { +int i, m, c; +bstring b, t; + + if (!vgetchar) return NULL; + + b = bfromcstralloc (INIT_SECURE_INPUT_LENGTH, ""); + if ((c = UCHAR_MAX + 1) == termchar) c++; + + for (i=0; ; i++) { + if (termchar == c || (maxlen > 0 && i >= maxlen)) break; + c = vgetchar (vgcCtx); + if (EOF == c) break; + + if (i+1 >= b->mlen) { + + /* Double size, and deal with numeric overflows */ + + if (b->mlen <= INT_MAX / 2) m = b->mlen << 1; + else if (b->mlen <= INT_MAX - 1024) m = b->mlen + 1024; + else if (b->mlen <= INT_MAX - 16) m = b->mlen + 16; + else if (b->mlen <= INT_MAX - 1) m = b->mlen + 1; + else { + bSecureDestroy (b); /* Cleanse partial buffer */ + return NULL; + } + + t = bfromcstrrangealloc (b->mlen + 1, m, ""); + if (t) memcpy (t->data, b->data, i); + bSecureDestroy (b); /* Cleanse previous buffer */ + b = t; + if (!b) return b; + } + + b->data[i] = (unsigned char) c; + } + + b->slen = i; + b->data[i] = (unsigned char) '\0'; + return b; +} + +#define BWS_BUFF_SZ (1024) + +struct bwriteStream { + bstring buff; /* Buffer for underwrites */ + void * parm; /* The stream handle for core stream */ + bNwrite writeFn; /* fwrite work-a-like fnptr for core stream */ + int isEOF; /* track stream's EOF state */ + int minBuffSz; +}; + +/* struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) + * + * Wrap a given open stream (described by a fwrite work-a-like function + * pointer and stream handle) into an open bwriteStream suitable for write + * streaming functions. + */ +struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) { +struct bwriteStream * ws; + + if (NULL == writeFn) return NULL; + ws = (struct bwriteStream *) malloc (sizeof (struct bwriteStream)); + if (ws) { + if (NULL == (ws->buff = bfromcstr (""))) { + free (ws); + ws = NULL; + } else { + ws->parm = parm; + ws->writeFn = writeFn; + ws->isEOF = 0; + ws->minBuffSz = BWS_BUFF_SZ; + } + } + return ws; +} + +#define internal_bwswriteout(ws,b) { \ + if ((b)->slen > 0) { \ + if (1 != (ws->writeFn ((b)->data, (b)->slen, 1, ws->parm))) { \ + ws->isEOF = 1; \ + return BSTR_ERR; \ + } \ + } \ +} + +/* int bwsWriteFlush (struct bwriteStream * ws) + * + * Force any pending data to be written to the core stream. + */ +int bwsWriteFlush (struct bwriteStream * ws) { + if (NULL == ws || ws->isEOF || 0 >= ws->minBuffSz || + NULL == ws->writeFn || NULL == ws->buff) return BSTR_ERR; + internal_bwswriteout (ws, ws->buff); + ws->buff->slen = 0; + return 0; +} + +/* int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) + * + * Send a bstring to a bwriteStream. If the stream is at EOF BSTR_ERR is + * returned. Note that there is no deterministic way to determine the exact + * cut off point where the core stream stopped accepting data. + */ +int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) { +struct tagbstring t; +int l; + + if (NULL == ws || NULL == b || NULL == ws->buff || + ws->isEOF || 0 >= ws->minBuffSz || NULL == ws->writeFn) + return BSTR_ERR; + + /* Buffer prepacking optimization */ + if (b->slen > 0 && ws->buff->mlen - ws->buff->slen > b->slen) { + static struct tagbstring empty = bsStatic (""); + if (0 > bconcat (ws->buff, b)) return BSTR_ERR; + return bwsWriteBstr (ws, &empty); + } + + if (0 > (l = ws->minBuffSz - ws->buff->slen)) { + internal_bwswriteout (ws, ws->buff); + ws->buff->slen = 0; + l = ws->minBuffSz; + } + + if (b->slen < l) return bconcat (ws->buff, b); + + if (0 > bcatblk (ws->buff, b->data, l)) return BSTR_ERR; + internal_bwswriteout (ws, ws->buff); + ws->buff->slen = 0; + + bmid2tbstr (t, (bstring) b, l, b->slen); + + if (t.slen >= ws->minBuffSz) { + internal_bwswriteout (ws, &t); + return 0; + } + + return bassign (ws->buff, &t); +} + +/* int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) + * + * Send a block of data a bwriteStream. If the stream is at EOF BSTR_ERR is + * returned. + */ +int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) { +struct tagbstring t; + if (NULL == blk || len < 0) return BSTR_ERR; + blk2tbstr (t, blk, len); + return bwsWriteBstr (ws, &t); +} + +/* int bwsIsEOF (const struct bwriteStream * ws) + * + * Returns 0 if the stream is currently writable, 1 if the core stream has + * responded by not accepting the previous attempted write. + */ +int bwsIsEOF (const struct bwriteStream * ws) { + if (NULL == ws || NULL == ws->buff || 0 > ws->minBuffSz || + NULL == ws->writeFn) return BSTR_ERR; + return ws->isEOF; +} + +/* int bwsBuffLength (struct bwriteStream * ws, int sz) + * + * Set the length of the buffer used by the bwsStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bwsBuffLength (struct bwriteStream * ws, int sz) { +int oldSz; + if (ws == NULL || sz < 0) return BSTR_ERR; + oldSz = ws->minBuffSz; + if (sz > 0) ws->minBuffSz = sz; + return oldSz; +} + +/* void * bwsClose (struct bwriteStream * s) + * + * Close the bwriteStream, and return the handle to the stream that was + * originally used to open the given stream. Note that even if the stream + * is at EOF it still needs to be closed with a call to bwsClose. + */ +void * bwsClose (struct bwriteStream * ws) { +void * parm; + if (NULL == ws || NULL == ws->buff || 0 >= ws->minBuffSz || + NULL == ws->writeFn) return NULL; + bwsWriteFlush (ws); + parm = ws->parm; + ws->parm = NULL; + ws->minBuffSz = -1; + ws->writeFn = NULL; + bstrFree (ws->buff); + free (ws); + return parm; +} diff --git a/bstrlib/bstraux.h b/bstrlib/bstraux.h new file mode 100644 index 0000000..9f30e3c --- /dev/null +++ b/bstrlib/bstraux.h @@ -0,0 +1,115 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * bstraux.h + * + * This file is not a necessary part of the core bstring library itself, but + * is just an auxilliary module which includes miscellaneous or trivial + * functions. + */ + +#ifndef BSTRAUX_INCLUDE +#define BSTRAUX_INCLUDE + +#include +#include "bstrlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Safety mechanisms */ +#define bstrDeclare(b) bstring (b) = NULL; +#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }} + +/* Backward compatibilty with previous versions of Bstrlib */ +#if !defined(BSTRLIB_REDUCE_NAMESPACE_POLLUTION) +#define bAssign(a,b) ((bassign)((a), (b))) +#define bSubs(b,pos,len,a,c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c))) +#define bStrchr(b,c) ((bstrchr)((b), (c))) +#define bStrchrFast(b,c) ((bstrchr)((b), (c))) +#define bCatCstr(b,s) ((bcatcstr)((b), (s))) +#define bCatBlk(b,s,len) ((bcatblk)((b),(s),(len))) +#define bCatStatic(b,s) bcatStatic(b,s) +#define bTrunc(b,n) ((btrunc)((b), (n))) +#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos))) +#define bUppercase(b) ((btoupper)(b)) +#define bLowercase(b) ((btolower)(b)) +#define bCaselessCmp(a,b) ((bstricmp)((a), (b))) +#define bCaselessNCmp(a,b,n) ((bstrnicmp)((a), (b), (n))) +#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL)) +#define bUuDecode(b) (bUuDecodeEx ((b), NULL)) +#endif + +/* Unusual functions */ +extern struct bStream * bsFromBstr (const_bstring b); +extern bstring bTail (bstring b, int n); +extern bstring bHead (bstring b, int n); +extern int bSetCstrChar (bstring a, int pos, char c); +extern int bSetChar (bstring b, int pos, char c); +extern int bFill (bstring a, char c, int len); +extern int bReplicate (bstring b, int n); +extern int bReverse (bstring b); +extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill); +extern bstring bStrfTime (const char * fmt, const struct tm * timeptr); +#define bAscTime(t) (bStrfTime ("%c\n", (t))) +#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL) + +/* Spacing formatting */ +extern int bJustifyLeft (bstring b, int space); +extern int bJustifyRight (bstring b, int width, int space); +extern int bJustifyMargin (bstring b, int width, int space); +extern int bJustifyCenter (bstring b, int width, int space); + +/* Esoteric standards specific functions */ +extern char * bStr2NetStr (const_bstring b); +extern bstring bNetStr2Bstr (const char * buf); +extern bstring bBase64Encode (const_bstring b); +extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError); +extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines); +extern bstring bUuDecodeEx (const_bstring src, int * badlines); +extern bstring bUuEncode (const_bstring src); +extern bstring bYEncode (const_bstring src); +extern bstring bYDecode (const_bstring src); +extern int bSGMLEncode (bstring b); + +/* Writable stream */ +typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm); + +struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm); +int bwsWriteBstr (struct bwriteStream * stream, const_bstring b); +int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len); +int bwsWriteFlush (struct bwriteStream * stream); +int bwsIsEOF (const struct bwriteStream * stream); +int bwsBuffLength (struct bwriteStream * stream, int sz); +void * bwsClose (struct bwriteStream * stream); + +/* Security functions */ +#define bSecureDestroy(b) { \ +bstring bstr__tmp = (b); \ + if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \ + (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \ + bdestroy (bstr__tmp); \ + } \ +} +#define bSecureWriteProtect(t) { \ + if ((t).mlen >= 0) { \ + if ((t).mlen > (t).slen)) { \ + (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \ + } \ + (t).mlen = -1; \ + } \ +} +extern bstring bSecureInput (int maxlen, int termchar, + bNgetc vgetchar, void * vgcCtx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bstrlib/bstrlib.c b/bstrlib/bstrlib.c new file mode 100644 index 0000000..aa2c000 --- /dev/null +++ b/bstrlib/bstrlib.c @@ -0,0 +1,3153 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * bstrlib.c + * + * This file is the core module for implementing the bstring functions. + */ + +#if defined (_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "bstrlib.h" + +/* Optionally include a mechanism for debugging memory */ + +#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) +#include "memdbg.h" +#endif + +#ifndef bstr__alloc +#if defined (BSTRLIB_TEST_CANARY) +void* bstr__alloc (size_t sz) { + char* p = (char *) malloc (sz); + memset (p, 'X', sz); + return p; +} +#else +#define bstr__alloc(x) malloc (x) +#endif +#endif + +#ifndef bstr__free +#define bstr__free(p) free (p) +#endif + +#ifndef bstr__realloc +#define bstr__realloc(p,x) realloc ((p), (x)) +#endif + +#ifndef bstr__memcpy +#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) +#endif + +#ifndef bstr__memmove +#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) +#endif + +#ifndef bstr__memset +#define bstr__memset(d,c,l) memset ((d), (c), (l)) +#endif + +#ifndef bstr__memcmp +#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) +#endif + +#ifndef bstr__memchr +#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) +#endif + +/* Just a length safe wrapper for memmove. */ + +#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } + +/* Compute the snapped size for a given requested size. By snapping to powers + of 2 like this, repeated reallocations are avoided. */ +static int snapUpSize (int i) { + if (i < 8) { + i = 8; + } else { + unsigned int j; + j = (unsigned int) i; + + j |= (j >> 1); + j |= (j >> 2); + j |= (j >> 4); + j |= (j >> 8); /* Ok, since int >= 16 bits */ +#if (UINT_MAX != 0xffff) + j |= (j >> 16); /* For 32 bit int systems */ +#if (UINT_MAX > 0xffffffffUL) + j |= (j >> 32); /* For 64 bit int systems */ +#endif +#endif + /* Least power of two greater than i */ + j++; + if ((int) j >= i) i = (int) j; + } + return i; +} + +/* int balloc (bstring b, int len) + * + * Increase the size of the memory backing the bstring b to at least len. + */ +int balloc (bstring b, int olen) { + int len; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || + b->mlen < b->slen || olen <= 0) { + return BSTR_ERR; + } + + if (olen >= b->mlen) { + unsigned char * x; + + if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK; + + /* Assume probability of a non-moving realloc is 0.125 */ + if (7 * b->mlen < 8 * b->slen) { + + /* If slen is close to mlen in size then use realloc to reduce + the memory defragmentation */ + + reallocStrategy:; + + x = (unsigned char *) bstr__realloc (b->data, (size_t) len); + if (x == NULL) { + + /* Since we failed, try allocating the tighest possible + allocation */ + + len = olen; + x = (unsigned char *) bstr__realloc (b->data, (size_t) olen); + if (NULL == x) { + return BSTR_ERR; + } + } + } else { + + /* If slen is not close to mlen then avoid the penalty of copying + the extra bytes that are allocated, but not considered part of + the string */ + + if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) { + + /* Perhaps there is no available memory for the two + allocations to be in memory at once */ + + goto reallocStrategy; + + } else { + if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, + (size_t) b->slen); + bstr__free (b->data); + } + } + b->data = x; + b->mlen = len; + b->data[b->slen] = (unsigned char) '\0'; + +#if defined (BSTRLIB_TEST_CANARY) + if (len > b->slen + 1) { + memchr (b->data + b->slen + 1, 'X', len - (b->slen + 1)); + } +#endif + } + + return BSTR_OK; +} + +/* int ballocmin (bstring b, int len) + * + * Set the size of the memory backing the bstring b to len or b->slen+1, + * whichever is larger. Note that repeated use of this function can degrade + * performance. + */ +int ballocmin (bstring b, int len) { + unsigned char * s; + + if (b == NULL || b->data == NULL) return BSTR_ERR; + if (b->slen >= INT_MAX || b->slen < 0) return BSTR_ERR; + if (b->mlen <= 0 || b->mlen < b->slen || len <= 0) { + return BSTR_ERR; + } + + if (len < b->slen + 1) len = b->slen + 1; + + if (len != b->mlen) { + s = (unsigned char *) bstr__realloc (b->data, (size_t) len); + if (NULL == s) return BSTR_ERR; + s[b->slen] = (unsigned char) '\0'; + b->data = s; + b->mlen = len; + } + + return BSTR_OK; +} + +/* bstring bfromcstr (const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. + */ +bstring bfromcstr (const char * str) { +bstring b; +int i; +size_t j; + + if (str == NULL) return NULL; + j = (strlen) (str); + i = snapUpSize ((int) (j + (2 - (j != 0)))); + if (i <= (int) j) return NULL; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (NULL == b) return NULL; + b->slen = (int) j; + if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { + bstr__free (b); + return NULL; + } + + bstr__memcpy (b->data, str, j+1); + return b; +} + +/* bstring bfromcstrrangealloc (int minl, int maxl, const char* str) + * + * Create a bstring which contains the contents of the '\0' terminated + * char* buffer str. The memory buffer backing the string is at least + * minl characters in length, but an attempt is made to allocate up to + * maxl characters. + */ +bstring bfromcstrrangealloc (int minl, int maxl, const char* str) { +bstring b; +int i; +size_t j; + + /* Bad parameters? */ + if (str == NULL) return NULL; + if (maxl < minl || minl < 0) return NULL; + + /* Adjust lengths */ + j = (strlen) (str); + if ((size_t) minl < (j+1)) minl = (int) (j+1); + if (maxl < minl) maxl = minl; + i = maxl; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b == NULL) return NULL; + b->slen = (int) j; + + while (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { + int k = (i >> 1) + (minl >> 1); + if (i == k || i < minl) { + bstr__free (b); + return NULL; + } + i = k; + } + + bstr__memcpy (b->data, str, j+1); + return b; +} + +/* bstring bfromcstralloc (int mlen, const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated + * char* buffer str. The memory buffer backing the string is at least + * mlen characters in length. + */ +bstring bfromcstralloc (int mlen, const char * str) { + return bfromcstrrangealloc (mlen, mlen, str); +} + +/* bstring blk2bstr (const void * blk, int len) + * + * Create a bstring which contains the content of the block blk of length + * len. + */ +bstring blk2bstr (const void * blk, int len) { +bstring b; +int i; + + if (blk == NULL || len < 0) return NULL; + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b == NULL) return NULL; + b->slen = len; + + i = len + (2 - (len != 0)); + i = snapUpSize (i); + + b->mlen = i; + + b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen); + if (b->data == NULL) { + bstr__free (b); + return NULL; + } + + if (len > 0) bstr__memcpy (b->data, blk, (size_t) len); + b->data[len] = (unsigned char) '\0'; + + return b; +} + +/* char * bstr2cstr (const_bstring s, char z) + * + * Create a '\0' terminated char * buffer which is equal to the contents of + * the bstring s, except that any contained '\0' characters are converted + * to the character in z. This returned value should be freed with a + * bcstrfree () call, by the calling application. + */ +char * bstr2cstr (const_bstring b, char z) { +int i, l; +char * r; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + l = b->slen; + r = (char *) bstr__alloc ((size_t) (l + 1)); + if (r == NULL) return r; + + for (i=0; i < l; i ++) { + r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i])); + } + + r[l] = (unsigned char) '\0'; + + return r; +} + +/* int bcstrfree (char * s) + * + * Frees a C-string generated by bstr2cstr (). This is normally unnecessary + * since it just wraps a call to bstr__free (), however, if bstr__alloc () + * and bstr__free () have been redefined as a macros within the bstrlib + * module (via defining them in memdbg.h after defining + * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std + * library functions, then this allows a correct way of freeing the memory + * that allows higher level code to be independent from these macro + * redefinitions. + */ +int bcstrfree (char * s) { + if (s) { + bstr__free (s); + return BSTR_OK; + } + return BSTR_ERR; +} + +/* int bconcat (bstring b0, const_bstring b1) + * + * Concatenate the bstring b1 to the bstring b0. + */ +int bconcat (bstring b0, const_bstring b1) { +int len, d; +bstring aux = (bstring) b1; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) + return BSTR_ERR; + + d = b0->slen; + len = b1->slen; + if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; + + if (b0->mlen <= d + len + 1) { + ptrdiff_t pd = b1->data - b0->data; + if (0 <= pd && pd < b0->mlen) { + if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; + } + if (balloc (b0, d + len + 1) != BSTR_OK) { + if (aux != b1) bdestroy (aux); + return BSTR_ERR; + } + } + + bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len); + b0->data[d + len] = (unsigned char) '\0'; + b0->slen = d + len; + if (aux != b1) bdestroy (aux); + return BSTR_OK; +} + +/* int bconchar (bstring b, char c) + * + * Concatenate the single character c to the bstring b. + */ +int bconchar (bstring b, char c) { +int d; + + if (b == NULL) return BSTR_ERR; + d = b->slen; + if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) + return BSTR_ERR; + b->data[d] = (unsigned char) c; + b->data[d + 1] = (unsigned char) '\0'; + b->slen++; + return BSTR_OK; +} + +/* int bcatcstr (bstring b, const char * s) + * + * Concatenate a char * string to a bstring. + */ +int bcatcstr (bstring b, const char * s) { +char * d; +int i, l; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL) return BSTR_ERR; + + /* Optimistically concatenate directly */ + l = b->mlen - b->slen; + d = (char *) &b->data[b->slen]; + for (i=0; i < l; i++) { + if ((*d++ = *s++) == '\0') { + b->slen += i; + return BSTR_OK; + } + } + b->slen += i; + + /* Need to explicitely resize and concatenate tail */ + return bcatblk (b, (const void *) s, (int) strlen (s)); +} + +/* int bcatblk (bstring b, const void * s, int len) + * + * Concatenate a fixed length buffer to a bstring. + */ +int bcatblk (bstring b, const void * s, int len) { +int nl; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; + + if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ + if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR; + + bBlockCopy (&b->data[b->slen], s, (size_t) len); + b->slen = nl; + b->data[nl] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* bstring bstrcpy (const_bstring b) + * + * Create a copy of the bstring b. + */ +bstring bstrcpy (const_bstring b) { +bstring b0; +int i,j; + + /* Attempted to copy an invalid string? */ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + b0 = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b0 == NULL) { + /* Unable to allocate memory for string header */ + return NULL; + } + + i = b->slen; + j = snapUpSize (i + 1); + + b0->data = (unsigned char *) bstr__alloc (j); + if (b0->data == NULL) { + j = i + 1; + b0->data = (unsigned char *) bstr__alloc (j); + if (b0->data == NULL) { + /* Unable to allocate memory for string data */ + bstr__free (b0); + return NULL; + } + } + + b0->mlen = j; + b0->slen = i; + + if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i); + b0->data[b0->slen] = (unsigned char) '\0'; + + return b0; +} + +/* int bassign (bstring a, const_bstring b) + * + * Overwrite the string a with the contents of string b. + */ +int bassign (bstring a, const_bstring b) { + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + if (b->slen != 0) { + if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR; + bstr__memmove (a->data, b->data, b->slen); + } else { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + } + a->data[b->slen] = (unsigned char) '\0'; + a->slen = b->slen; + return BSTR_OK; +} + +/* int bassignmidstr (bstring a, const_bstring b, int left, int len) + * + * Overwrite the string a with the middle of contents of string b + * starting from position left and running for a length len. left and + * len are clamped to the ends of b as with the function bmidstr. + */ +int bassignmidstr (bstring a, const_bstring b, int left, int len) { + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + + if (left < 0) { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + + if (len > 0) { + if (balloc (a, len) != BSTR_OK) return BSTR_ERR; + bstr__memmove (a->data, b->data + left, len); + a->slen = len; + } else { + a->slen = 0; + } + a->data[a->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bassigncstr (bstring a, const char * str) + * + * Overwrite the string a with the contents of char * string str. Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned however a may be partially overwritten. + */ +int bassigncstr (bstring a, const char * str) { +int i; +size_t len; + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == str) + return BSTR_ERR; + + for (i=0; i < a->mlen; i++) { + if ('\0' == (a->data[i] = str[i])) { + a->slen = i; + return BSTR_OK; + } + } + + a->slen = i; + len = strlen (str + i); + if (len + 1 > INT_MAX - i || + 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR; + bBlockCopy (a->data + i, str + i, (size_t) len + 1); + a->slen += (int) len; + return BSTR_OK; +} + +/* int bassignblk (bstring a, const void * s, int len) + * + * Overwrite the string a with the contents of the block (s, len). Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned and a is not overwritten. + */ +int bassignblk (bstring a, const void * s, int len) { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == s || len < 0 || len >= INT_MAX) + return BSTR_ERR; + if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR; + bBlockCopy (a->data, s, (size_t) len); + a->data[len] = (unsigned char) '\0'; + a->slen = len; + return BSTR_OK; +} + +/* int btrunc (bstring b, int n) + * + * Truncate the bstring to at most n characters. + */ +int btrunc (bstring b, int n) { + if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b->slen > n) { + b->slen = n; + b->data[n] = (unsigned char) '\0'; + } + return BSTR_OK; +} + +#define upcase(c) (toupper ((unsigned char) c)) +#define downcase(c) (tolower ((unsigned char) c)) +#define wspace(c) (isspace ((unsigned char) c)) + +/* int btoupper (bstring b) + * + * Convert contents of bstring to upper case. + */ +int btoupper (bstring b) { +int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i=0, len = b->slen; i < len; i++) { + b->data[i] = (unsigned char) upcase (b->data[i]); + } + return BSTR_OK; +} + +/* int btolower (bstring b) + * + * Convert contents of bstring to lower case. + */ +int btolower (bstring b) { +int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i=0, len = b->slen; i < len; i++) { + b->data[i] = (unsigned char) downcase (b->data[i]); + } + return BSTR_OK; +} + +/* int bstricmp (const_bstring b0, const_bstring b1) + * + * Compare two strings without differentiating between case. The return + * value is the difference of the values of the characters where the two + * strings first differ after lower case transformation, otherwise 0 is + * returned indicating that the strings are equal. If the lengths are + * different, then a difference from 0 is given, but if the first extra + * character is '\0', then it is taken to be the value UCHAR_MAX+1. + */ +int bstricmp (const_bstring b0, const_bstring b1) { +int i, v, n; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN; + if ((n = b0->slen) > b1->slen) n = b1->slen; + else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; + + for (i = 0; i < n; i ++) { + v = (char) downcase (b0->data[i]) + - (char) downcase (b1->data[i]); + if (0 != v) return v; + } + + if (b0->slen > n) { + v = (char) downcase (b0->data[n]); + if (v) return v; + return UCHAR_MAX + 1; + } + if (b1->slen > n) { + v = - (char) downcase (b1->data[n]); + if (v) return v; + return - (int) (UCHAR_MAX + 1); + } + return BSTR_OK; +} + +/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) + * + * Compare two strings without differentiating between case for at most n + * characters. If the position where the two strings first differ is + * before the nth position, the return value is the difference of the values + * of the characters, otherwise 0 is returned. If the lengths are different + * and less than n characters, then a difference from 0 is given, but if the + * first extra character is '\0', then it is taken to be the value + * UCHAR_MAX+1. + */ +int bstrnicmp (const_bstring b0, const_bstring b1, int n) { +int i, v, m; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) { + for (i = 0; i < m; i ++) { + v = (char) downcase (b0->data[i]); + v -= (char) downcase (b1->data[i]); + if (v != 0) return b0->data[i] - b1->data[i]; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) { + v = (char) downcase (b0->data[m]); + if (v) return v; + return UCHAR_MAX + 1; + } + + v = - (char) downcase (b1->data[m]); + if (v) return v; + return - (int) (UCHAR_MAX + 1); +} + +/* int biseqcaselessblk (const_bstring b, const void * blk, int len) + * + * Compare content of b and the array of bytes in blk for length len for + * equality without differentiating between character case. If the content + * differs other than in case, 0 is returned, if, ignoring case, the content + * is the same, 1 is returned, if there is an error, -1 is returned. If the + * length of the strings are different, this function is O(1). '\0' + * characters are not treated in any special way. + */ +int biseqcaselessblk (const_bstring b, const void * blk, int len) { +int i; + + if (bdata (b) == NULL || b->slen < 0 || + blk == NULL || len < 0) return BSTR_ERR; + if (b->slen != len) return 0; + if (len == 0 || b->data == blk) return 1; + for (i=0; i < len; i++) { + if (b->data[i] != ((unsigned char*)blk)[i]) { + unsigned char c = (unsigned char) downcase (b->data[i]); + if (c != (unsigned char) downcase (((unsigned char*)blk)[i])) + return 0; + } + } + return 1; +} + + +/* int biseqcaseless (const_bstring b0, const_bstring b1) + * + * Compare two strings for equality without differentiating between case. + * If the strings differ other than in case, 0 is returned, if the strings + * are the same, 1 is returned, if there is an error, -1 is returned. If + * the length of the strings are different, this function is O(1). '\0' + * termination characters are not treated in any special way. + */ +int biseqcaseless (const_bstring b0, const_bstring b1) { +#if 0 +int i, n; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + for (i=0, n=b0->slen; i < n; i++) { + if (b0->data[i] != b1->data[i]) { + unsigned char c = (unsigned char) downcase (b0->data[i]); + if (c != (unsigned char) downcase (b1->data[i])) return 0; + } + } + return 1; +#else + if (NULL == b1) return BSTR_ERR; + return biseqcaselessblk (b0, b1->data, b1->slen); +#endif +} + +/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len + * without differentiating between case for equality. If the beginning of b0 + * differs from the memory block other than in case (or if b0 is too short), + * 0 is returned, if the strings are the same, 1 is returned, if there is an + * error, -1 is returned. '\0' characters are not treated in any special + * way. + */ +int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) { +int i; + + if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *) blk || len == 0) return 1; + + for (i = 0; i < len; i ++) { + if (b0->data[i] != ((const unsigned char *) blk)[i]) { + if (downcase (b0->data[i]) != + downcase (((const unsigned char *) blk)[i])) return 0; + } + } + return 1; +} + +/* + * int bltrimws (bstring b) + * + * Delete whitespace contiguous from the left end of the string. + */ +int bltrimws (bstring b) { +int i, len; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (len = b->slen, i = 0; i < len; i++) { + if (!wspace (b->data[i])) { + return bdelete (b, 0, i); + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int brtrimws (bstring b) + * + * Delete whitespace contiguous from the right end of the string. + */ +int brtrimws (bstring b) { +int i; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) { + if (!wspace (b->data[i])) { + if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; + b->slen = i + 1; + return BSTR_OK; + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int btrimws (bstring b) + * + * Delete whitespace contiguous from both ends of the string. + */ +int btrimws (bstring b) { +int i, j; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) { + if (!wspace (b->data[i])) { + if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; + b->slen = i + 1; + for (j = 0; wspace (b->data[j]); j++) {} + return bdelete (b, 0, j); + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* int biseqblk (const_bstring b, const void * blk, int len) + * + * Compare the string b with the character block blk of length len. If the + * content differs, 0 is returned, if the content is the same, 1 is returned, + * if there is an error, -1 is returned. If the length of the strings are + * different, this function is O(1). '\0' characters are not treated in any + * special way. + */ +int biseqblk (const_bstring b, const void * blk, int len) { + if (len < 0 || b == NULL || blk == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + if (b->slen != len) return 0; + if (len == 0 || b->data == blk) return 1; + return !bstr__memcmp (b->data, blk, len); +} + +/* int biseq (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If the strings differ, 0 is returned, if + * the strings are the same, 1 is returned, if there is an error, -1 is + * returned. If the length of the strings are different, this function is + * O(1). '\0' termination characters are not treated in any special way. + */ +int biseq (const_bstring b0, const_bstring b1) { + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + return !bstr__memcmp (b0->data, b1->data, b0->slen); +} + +/* int bisstemeqblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len for + * equality. If the beginning of b0 differs from the memory block (or if b0 + * is too short), 0 is returned, if the strings are the same, 1 is returned, + * if there is an error, -1 is returned. '\0' characters are not treated in + * any special way. + */ +int bisstemeqblk (const_bstring b0, const void * blk, int len) { +int i; + + if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *) blk || len == 0) return 1; + + for (i = 0; i < len; i ++) { + if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK; + } + return 1; +} + +/* int biseqcstr (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical with the bstring b with no '\0' + * characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal when comparing them in the same format after converting one or the + * other. If the strings are equal 1 is returned, if they are unequal 0 is + * returned and if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstr (const_bstring b, const char * s) { +int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + for (i=0; i < b->slen; i++) { + if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) + return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int biseqcstrcaseless (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical except for case with the bstring b with + * no '\0' characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal ignoring case when comparing them in the same format after + * converting one or the other. If the strings are equal, except for case, + * 1 is returned, if they are unequal regardless of case 0 is returned and + * if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstrcaseless (const_bstring b, const char * s) { +int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + for (i=0; i < b->slen; i++) { + if (s[i] == '\0' || + (b->data[i] != (unsigned char) s[i] && + downcase (b->data[i]) != (unsigned char) downcase (s[i]))) + return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int bstrcmp (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, + * otherwise a value less than or greater than zero, indicating that the + * string pointed to by b0 is lexicographically less than or greater than + * the string pointed to by b1 is returned. If the the string lengths are + * unequal but the characters up until the length of the shorter are equal + * then a value less than, or greater than zero, indicating that the string + * pointed to by b0 is shorter or longer than the string pointed to by b1 is + * returned. 0 is returned if and only if the two strings are the same. If + * the length of the strings are different, this function is O(n). Like its + * standard C library counter part strcmp, the comparison does not proceed + * past any '\0' termination characters encountered. + */ +int bstrcmp (const_bstring b0, const_bstring b1) { +int i, v, n; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + n = b0->slen; if (n > b1->slen) n = b1->slen; + if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) + return BSTR_OK; + + for (i = 0; i < n; i ++) { + v = ((char) b0->data[i]) - ((char) b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; + } + + if (b0->slen > n) return 1; + if (b1->slen > n) return -1; + return BSTR_OK; +} + +/* int bstrncmp (const_bstring b0, const_bstring b1, int n) + * + * Compare the string b0 and b1 for at most n characters. If there is an + * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and + * b1 were first truncated to at most n characters then bstrcmp was called + * with these new strings are paremeters. If the length of the strings are + * different, this function is O(n). Like its standard C library counter + * part strcmp, the comparison does not proceed past any '\0' termination + * characters encountered. + */ +int bstrncmp (const_bstring b0, const_bstring b1, int n) { +int i, v, m; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) { + for (i = 0; i < m; i ++) { + v = ((char) b0->data[i]) - ((char) b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) return 1; + return -1; +} + +/* bstring bmidstr (const_bstring b, int left, int len) + * + * Create a bstring which is the substring of b starting from position left + * and running for a length len (clamped by the end of the bstring b.) If + * b is detectably invalid, then NULL is returned. The section described + * by (left, len) is clamped to the boundaries of b. + */ +bstring bmidstr (const_bstring b, int left, int len) { + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + if (left < 0) { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (len <= 0) return bfromcstr (""); + return blk2bstr (b->data + left, len); +} + +/* int bdelete (bstring b, int pos, int len) + * + * Removes characters from pos to pos+len-1 inclusive and shifts the tail of + * the bstring starting from pos+len to pos. len must be positive for this + * call to have any effect. The section of the string described by (pos, + * len) is clamped to boundaries of the bstring b. + */ +int bdelete (bstring b, int pos, int len) { + /* Clamp to left side of bstring */ + if (pos < 0) { + len += pos; + pos = 0; + } + + if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || + b->mlen < b->slen || b->mlen <= 0) + return BSTR_ERR; + if (len > 0 && pos < b->slen) { + if (pos + len >= b->slen) { + b->slen = pos; + } else { + bBlockCopy ((char *) (b->data + pos), + (char *) (b->data + pos + len), + b->slen - (pos+len)); + b->slen -= len; + } + b->data[b->slen] = (unsigned char) '\0'; + } + return BSTR_OK; +} + +/* int bdestroy (bstring b) + * + * Free up the bstring. Note that if b is detectably invalid or not writable + * then no action is performed and BSTR_ERR is returned. Like a freed memory + * allocation, dereferences, writes or any other action on b after it has + * been bdestroyed is undefined. + */ +int bdestroy (bstring b) { + if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || + b->data == NULL) + return BSTR_ERR; + + bstr__free (b->data); + + /* In case there is any stale usage, there is one more chance to + notice this error. */ + + b->slen = -1; + b->mlen = -__LINE__; + b->data = NULL; + + bstr__free (b); + return BSTR_OK; +} + +/* int binstr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstr (const_bstring b1, int pos, const_bstring b2) { +int j, ii, ll, lf; +unsigned char * d0; +unsigned char c0; +register unsigned char * d1; +register unsigned char c1; +register int i; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* No space to find such a string? */ + if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return 0; + + i = pos; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + /* Peel off the b2->slen == 1 case */ + c0 = d0[0]; + if (1 == ll) { + for (;i < lf; i++) if (c0 == d1[i]) return i; + return BSTR_ERR; + } + + c1 = c0; + j = 0; + lf = b1->slen - 1; + + ii = -1; + if (i < lf) do { + /* Unrolled current character test */ + if (c1 != d1[i]) { + if (c1 != d1[1+i]) { + i += 2; + continue; + } + i++; + } + + /* Take note if this is the start of a potential match */ + if (0 == j) ii = i; + + /* Shift the test character down by one */ + j++; + i++; + + /* If this isn't past the last character continue */ + if (j < ll) { + c1 = d0[j]; + continue; + } + + N0:; + + /* If no characters mismatched, then we matched */ + if (i == ii+j) return ii; + + /* Shift back to the beginning */ + i -= j; + j = 0; + c1 = c0; + } while (i < lf); + + /* Deal with last case if unrolling caused a misalignment */ + if (i == lf && ll == j+1 && c1 == d1[i]) goto N0; + + return BSTR_ERR; +} + +/* int binstrr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstrr (const_bstring b1, int pos, const_bstring b2) { +int j, i, l; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j]) { + j ++; + if (j >= l) return i; + } else { + i --; + if (i < 0) break; + j=0; + } + } + + return BSTR_ERR; +} + +/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrcaseless (const_bstring b1, int pos, const_bstring b2) { +int j, i, l, ll; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + l = b1->slen - b2->slen + 1; + + /* No space to find such a string? */ + if (l <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return BSTR_OK; + + i = pos; + j = 0; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { + j ++; + if (j >= ll) return i; + } else { + i ++; + if (i >= l) break; + j=0; + } + } + + return BSTR_ERR; +} + +/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) { +int j, i, l; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) + return BSTR_OK; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { + j ++; + if (j >= l) return i; + } else { + i --; + if (i < 0) break; + j=0; + } + } + + return BSTR_ERR; +} + + +/* int bstrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b forwards from the position pos + * (inclusive). + */ +int bstrchrp (const_bstring b, int c, int pos) { +unsigned char * p; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) + return BSTR_ERR; + p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, + (b->slen - pos)); + if (p) return (int) (p - b->data); + return BSTR_ERR; +} + +/* int bstrrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b backwards from the position pos in string + * (inclusive). + */ +int bstrrchrp (const_bstring b, int c, int pos) { +int i; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) + return BSTR_ERR; + for (i=pos; i >= 0; i--) { + if (b->data[i] == (unsigned char) c) return i; + } + return BSTR_ERR; +} + +#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) +#define LONG_LOG_BITS_QTY (3) +#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) +#define LONG_TYPE unsigned char + +#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) +struct charField { LONG_TYPE content[CFCLEN]; }; +#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & \ + (((long)1) << ((c) & (LONG_BITS_QTY-1)))) +#define setInCharField(cf,idx) { \ + unsigned int c = (unsigned int) (idx); \ + (cf)->content[c >> LONG_LOG_BITS_QTY] |= \ + (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ +} + +#else + +#define CFCLEN (1 << CHAR_BIT) +struct charField { unsigned char content[CFCLEN]; }; +#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)]) +#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0 + +#endif + +/* Convert a bstring to charField */ +static int buildCharField (struct charField * cf, const_bstring b) { +int i; + if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; + memset ((void *) cf->content, 0, sizeof (struct charField)); + for (i=0; i < b->slen; i++) { + setInCharField (cf, b->data[i]); + } + return BSTR_OK; +} + +static void invertCharField (struct charField * cf) { +int i; + for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i]; +} + +/* Inner engine for binchr */ +static int binchrCF (const unsigned char * data, int len, int pos, + const struct charField * cf) { +int i; + for (i=pos; i < len; i++) { + unsigned char c = (unsigned char) data[i]; + if (testInCharField (cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * one of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int binchr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos); + if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; + return binchrCF (b0->data, b0->slen, pos, &chrs); +} + +/* Inner engine for binchrr */ +static int binchrrCF (const unsigned char * data, int pos, + const struct charField * cf) { +int i; + for (i=pos; i >= 0; i--) { + unsigned int c = (unsigned int) data[i]; + if (testInCharField (cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which one of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int binchrr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos); + if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; + return binchrrCF (b0->data, pos, &chrs); +} + +/* int bninchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * none of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int bninchr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; + invertCharField (&chrs); + return binchrCF (b0->data, b0->slen, pos, &chrs); +} + +/* int bninchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which none of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int bninchrr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; + invertCharField (&chrs); + return binchrrCF (b0->data, pos, &chrs); +} + +/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) + * + * Overwrite the string b0 starting at position pos with the string b1. If + * the position pos is past the end of b0, then the character "fill" is + * appended as necessary to make up the gap between the end of b0 and pos. + * If b1 is NULL, it behaves as if it were a 0-length string. + */ +int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) { +int d, newlen; +ptrdiff_t pd; +bstring aux = (bstring) b1; + + if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || + b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; + if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; + + d = pos; + + /* Aliasing case */ + if (NULL != aux) { + if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && + pd < (ptrdiff_t) b0->mlen) { + if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; + } + d += aux->slen; + } + + /* Increase memory size if necessary */ + if (balloc (b0, d + 1) != BSTR_OK) { + if (aux != b1) bdestroy (aux); + return BSTR_ERR; + } + + newlen = b0->slen; + + /* Fill in "fill" character as necessary */ + if (pos > newlen) { + bstr__memset (b0->data + b0->slen, (int) fill, + (size_t) (pos - b0->slen)); + newlen = pos; + } + + /* Copy b1 to position pos in b0. */ + if (aux != NULL) { + bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen); + if (aux != b1) bdestroy (aux); + } + + /* Indicate the potentially increased size of b0 */ + if (d > newlen) newlen = d; + + b0->slen = newlen; + b0->data[newlen] = (unsigned char) '\0'; + + return BSTR_OK; +} + +/* int binsertblk (bstring b, int pos, const void * blk, int len, + * unsigned char fill) + * + * Inserts the block of characters at blk with length len into b at position + * pos. If the position pos is past the end of b, then the character "fill" + * is appended as necessary to make up the gap between the end of b1 and pos. + * Unlike bsetstr, binsert does not allow b2 to be NULL. + */ +int binsertblk (bstring b, int pos, const void * blk, int len, + unsigned char fill) { +int d, l; +unsigned char* aux = (unsigned char*) blk; + + if (b == NULL || blk == NULL || pos < 0 || len < 0 || b->slen < 0 || + b->mlen <= 0 || b->mlen < b->slen) return BSTR_ERR; + + /* Compute the two possible end pointers */ + d = b->slen + len; + l = pos + len; + if ((d|l) < 0) return BSTR_ERR; /* Integer wrap around. */ + + /* Aliasing case */ + if (((size_t) ((unsigned char*) blk + len)) >= ((size_t) b->data) && + ((size_t) blk) < ((size_t) (b->data + b->mlen))) { + if (NULL == (aux = (unsigned char*) bstr__alloc (len))) + return BSTR_ERR; + bstr__memcpy (aux, blk, len); + } + + if (l > d) { + /* Inserting past the end of the string */ + if (balloc (b, l + 1) != BSTR_OK) { + if (aux != (unsigned char*) blk) bstr__free (aux); + return BSTR_ERR; + } + bstr__memset (b->data + b->slen, (int) fill, + (size_t) (pos - b->slen)); + b->slen = l; + } else { + /* Inserting in the middle of the string */ + if (balloc (b, d + 1) != BSTR_OK) { + if (aux != (unsigned char*) blk) bstr__free (aux); + return BSTR_ERR; + } + bBlockCopy (b->data + l, b->data + pos, d - l); + b->slen = d; + } + bBlockCopy (b->data + pos, aux, len); + b->data[b->slen] = (unsigned char) '\0'; + if (aux != (unsigned char*) blk) bstr__free (aux); + return BSTR_OK; +} + +/* int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) + * + * Inserts the string b2 into b1 at position pos. If the position pos is + * past the end of b1, then the character "fill" is appended as necessary to + * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert + * does not allow b2 to be NULL. + */ +int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) { + if (NULL == b2 || (b2->mlen > 0 && b2->slen > b2->mlen)) return BSTR_ERR; + return binsertblk (b1, pos, b2->data, b2->slen, fill); +} + +/* int breplace (bstring b1, int pos, int len, bstring b2, + * unsigned char fill) + * + * Replace a section of a string from pos for a length len with the string + * b2. fill is used is pos > b1->slen. + */ +int breplace (bstring b1, int pos, int len, const_bstring b2, + unsigned char fill) { +int pl, ret; +ptrdiff_t pd; +bstring aux = (bstring) b2; + + if (pos < 0 || len < 0) return BSTR_ERR; + if (pos > INT_MAX - len) return BSTR_ERR; /* Overflow */ + pl = pos + len; + if (b1 == NULL || b2 == NULL || b1->data == NULL || b2->data == NULL || + b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || + b1->mlen <= 0) return BSTR_ERR; + + /* Straddles the end? */ + if (pl >= b1->slen) { + if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret; + if (pos + b2->slen < b1->slen) { + b1->slen = pos + b2->slen; + b1->data[b1->slen] = (unsigned char) '\0'; + } + return ret; + } + + /* Aliasing case */ + if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && + pd < (ptrdiff_t) b1->slen) { + if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; + } + + if (aux->slen > len) { + if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + } + + if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, + b1->data + pos + len, + b1->slen - (pos + len)); + bstr__memcpy (b1->data + pos, aux->data, aux->slen); + b1->slen += aux->slen - len; + b1->data[b1->slen] = (unsigned char) '\0'; + if (aux != b2) bdestroy (aux); + return BSTR_OK; +} + +/* + * findreplaceengine is used to implement bfindreplace and + * bfindreplacecaseless. It works by breaking the three cases of + * expansion, reduction and replacement, and solving each of these + * in the most efficient way possible. + */ + +typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); + +#define INITIAL_STATIC_FIND_INDEX_COUNT 32 + +static int findreplaceengine (bstring b, const_bstring find, + const_bstring repl, int pos, + instr_fnptr instr) { +int i, ret, slen, mlen, delta, acc; +int * d; +int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is for LINT. */ +ptrdiff_t pd; +bstring auxf = (bstring) find; +bstring auxr = (bstring) repl; + + if (b == NULL || b->data == NULL || find == NULL || + find->data == NULL || repl == NULL || repl->data == NULL || + pos < 0 || find->slen <= 0 || b->mlen <= 0 || b->slen > b->mlen || + b->slen < 0 || repl->slen < 0) return BSTR_ERR; + if (pos > b->slen - find->slen) return BSTR_OK; + + /* Alias with find string */ + pd = (ptrdiff_t) (find->data - b->data); + if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) { + if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR; + } + + /* Alias with repl string */ + pd = (ptrdiff_t) (repl->data - b->data); + if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) { + if (NULL == (auxr = bstrcpy (repl))) { + if (auxf != find) bdestroy (auxf); + return BSTR_ERR; + } + } + + delta = auxf->slen - auxr->slen; + + /* in-place replacement since find and replace strings are of equal + length */ + if (delta == 0) { + while ((pos = instr (b, pos, auxf)) >= 0) { + bstr__memcpy (b->data + pos, auxr->data, auxr->slen); + pos += auxf->slen; + } + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return BSTR_OK; + } + + /* shrinking replacement since auxf->slen > auxr->slen */ + if (delta > 0) { + acc = 0; + + while ((i = instr (b, pos, auxf)) >= 0) { + if (acc && i > pos) + bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); + if (auxr->slen) + bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen); + acc += delta; + pos = i + auxf->slen; + } + + if (acc) { + i = b->slen; + if (i > pos) + bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); + b->slen -= acc; + b->data[b->slen] = (unsigned char) '\0'; + } + + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return BSTR_OK; + } + + /* expanding replacement since find->slen < repl->slen. Its a lot + more complicated. This works by first finding all the matches and + storing them to a growable array, then doing at most one resize of + the destination bstring and then performing the direct memory transfers + of the string segment pieces to form the final result. The growable + array of matches uses a deferred doubling reallocing strategy. What + this means is that it starts as a reasonably fixed sized auto array in + the hopes that many if not most cases will never need to grow this + array. But it switches as soon as the bounds of the array will be + exceeded. An extra find result is always appended to this array that + corresponds to the end of the destination string, so slen is checked + against mlen - 1 rather than mlen before resizing. + */ + + mlen = INITIAL_STATIC_FIND_INDEX_COUNT; + d = (int *) static_d; /* Avoid malloc for trivial/initial cases */ + acc = slen = 0; + + while ((pos = instr (b, pos, auxf)) >= 0) { + if (slen >= mlen - 1) { + int *t; + int sl; + /* Overflow */ + if (mlen > (INT_MAX / sizeof(int *)) / 2) { + ret = BSTR_ERR; + goto done; + } + mlen += mlen; + sl = sizeof (int *) * mlen; + if (static_d == d) d = NULL; /* static_d cannot be realloced */ + if (NULL == (t = (int *) bstr__realloc (d, sl))) { + ret = BSTR_ERR; + goto done; + } + if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d)); + d = t; + } + d[slen] = pos; + slen++; + acc -= delta; + pos += auxf->slen; + if (pos < 0 || acc < 0) { + ret = BSTR_ERR; + goto done; + } + } + + /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ + d[slen] = b->slen; + + if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) { + b->slen += acc; + for (i = slen-1; i >= 0; i--) { + int s, l; + s = d[i] + auxf->slen; + l = d[i+1] - s; /* d[slen] may be accessed here. */ + if (l) { + bstr__memmove (b->data + s + acc, b->data + s, l); + } + if (auxr->slen) { + bstr__memmove (b->data + s + acc - auxr->slen, + auxr->data, auxr->slen); + } + acc += delta; + } + b->data[b->slen] = (unsigned char) '\0'; + } + + done:; + if (static_d != d) bstr__free (d); + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return ret; +} + +/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string with a replace string after a + * given point in a bstring. + */ +int bfindreplace (bstring b, const_bstring find, const_bstring repl, + int pos) { + return findreplaceengine (b, find, repl, pos, binstr); +} + +/* int bfindreplacecaseless (bstring b, const_bstring find, + * const_bstring repl, int pos) + * + * Replace all occurrences of a find string, ignoring case, with a replace + * string after a given point in a bstring. + */ +int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, + int pos) { + return findreplaceengine (b, find, repl, pos, binstrcaseless); +} + +/* int binsertch (bstring b, int pos, int len, unsigned char fill) + * + * Inserts the character fill repeatedly into b at position pos for a + * length len. If the position pos is past the end of b, then the + * character "fill" is appended as necessary to make up the gap between the + * end of b and the position pos + len. + */ +int binsertch (bstring b, int pos, int len, unsigned char fill) { +int d, l, i; + + if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || len < 0) return BSTR_ERR; + + /* Compute the two possible end pointers */ + d = b->slen + len; + l = pos + len; + if ((d|l) < 0) return BSTR_ERR; + + if (l > d) { + /* Inserting past the end of the string */ + if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR; + pos = b->slen; + b->slen = l; + } else { + /* Inserting in the middle of the string */ + if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR; + for (i = d - 1; i >= l; i--) { + b->data[i] = b->data[i - len]; + } + b->slen = d; + } + + for (i=pos; i < l; i++) b->data[i] = fill; + b->data[b->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bpattern (bstring b, int len) + * + * Replicate the bstring, b in place, end to end repeatedly until it + * surpasses len characters, then chop the result to exactly len characters. + * This function operates in-place. The function will return with BSTR_ERR + * if b is NULL or of length 0, otherwise BSTR_OK is returned. + */ +int bpattern (bstring b, int len) { +int i, d; + + d = blength (b); + if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR; + if (len > 0) { + if (d == 1) return bsetstr (b, len, NULL, b->data[0]); + for (i = d; i < len; i++) b->data[i] = b->data[i - d]; + } + b->data[len] = (unsigned char) '\0'; + b->slen = len; + return BSTR_OK; +} + +#define BS_BUFF_SZ (1024) + +/* int breada (bstring b, bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to concatenate to the + * bstring b the entire contents of file-like source data in a roughly + * efficient way. + */ +int breada (bstring b, bNread readPtr, void * parm) { +int i, l, n; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + readPtr == NULL) return BSTR_ERR; + + i = b->slen; + for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) { + if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR; + l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm); + i += l; + b->slen = i; + if (i < n) break; + } + + b->data[i] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* bstring bread (bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to create a bstring + * filled with the entire contents of file-like source data in a roughly + * efficient way. + */ +bstring bread (bNread readPtr, void * parm) { +bstring buff; + + if (0 > breada (buff = bfromcstr (""), readPtr, parm)) { + bdestroy (buff); + return NULL; + } + return buff; +} + +/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result in b. If there is an empty partial + * result, 1 is returned. If no characters are read, or there is some other + * detectable error, BSTR_ERR is returned. + */ +int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) { +int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + getcPtr == NULL) return BSTR_ERR; + d = 0; + e = b->mlen - 2; + + while ((c = getcPtr (parm)) >= 0) { + if (d > e) { + b->slen = d; + if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char) c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char) '\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result concatentated to b. If there is + * an empty partial result, 1 is returned. If no characters are read, or + * there is some other detectable error, BSTR_ERR is returned. + */ +int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) { +int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + getcPtr == NULL) return BSTR_ERR; + d = b->slen; + e = b->mlen - 2; + + while ((c = getcPtr (parm)) >= 0) { + if (d > e) { + b->slen = d; + if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char) c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char) '\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated into a bstring. + * The stream read is terminated by the passed in terminator function. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * result obtained thus far is returned. If no characters are read, or + * there is some other detectable error, NULL is returned. + */ +bstring bgets (bNgetc getcPtr, void * parm, char terminator) { +bstring buff; + + if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || + 0 >= buff->slen) { + bdestroy (buff); + buff = NULL; + } + return buff; +} + +struct bStream { + bstring buff; /* Buffer for over-reads */ + void * parm; /* The stream handle for core stream */ + bNread readFnPtr; /* fread compatible fnptr for core stream */ + int isEOF; /* track file's EOF state */ + int maxBuffSz; +}; + +/* struct bStream * bsopen (bNread readPtr, void * parm) + * + * Wrap a given open stream (described by a fread compatible function + * pointer and stream handle) into an open bStream suitable for the bstring + * library streaming functions. + */ +struct bStream * bsopen (bNread readPtr, void * parm) { +struct bStream * s; + + if (readPtr == NULL) return NULL; + s = (struct bStream *) bstr__alloc (sizeof (struct bStream)); + if (s == NULL) return NULL; + s->parm = parm; + s->buff = bfromcstr (""); + s->readFnPtr = readPtr; + s->maxBuffSz = BS_BUFF_SZ; + s->isEOF = 0; + return s; +} + +/* int bsbufflength (struct bStream * s, int sz) + * + * Set the length of the buffer used by the bStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bsbufflength (struct bStream * s, int sz) { +int oldSz; + if (s == NULL || sz < 0) return BSTR_ERR; + oldSz = s->maxBuffSz; + if (sz > 0) s->maxBuffSz = sz; + return oldSz; +} + +int bseof (const struct bStream * s) { + if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; + return s->isEOF && (s->buff->slen == 0); +} + +/* void * bsclose (struct bStream * s) + * + * Close the bStream, and return the handle to the stream that was originally + * used to open the given stream. + */ +void * bsclose (struct bStream * s) { +void * parm; + if (s == NULL) return NULL; + s->readFnPtr = NULL; + if (s->buff) bdestroy (s->buff); + s->buff = NULL; + parm = s->parm; + s->parm = NULL; + s->isEOF = 1; + bstr__free (s); + return parm; +} + +/* int bsreadlna (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadlna (bstring r, struct bStream * s, char terminator) { +int i, l, ret, rlo; +char * b; +struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || + r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; + l = s->buff->slen; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) s->buff->data; + x.data = (unsigned char *) b; + + /* First check if the current buffer holds the terminator */ + b[l] = terminator; /* Set sentinel */ + for (i=0; b[i] != terminator; i++) ; + if (i < l) { + x.slen = i + 1; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) { + if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) + return BSTR_ERR; + b = (char *) (r->data + r->slen); + l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); + if (l <= 0) { + r->data[r->slen] = (unsigned char) '\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + b[l] = terminator; /* Set sentinel */ + for (i=0; b[i] != terminator; i++) ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy (s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) { +int i, l, ret, rlo; +unsigned char * b; +struct tagbstring x; +struct charField cf; + + if (s == NULL || s->buff == NULL || r == NULL || term == NULL || + term->data == NULL || r->mlen <= 0 || r->slen < 0 || + r->mlen < r->slen) return BSTR_ERR; + if (term->slen == 1) return bsreadlna (r, s, term->data[0]); + if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR; + + l = s->buff->slen; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *) s->buff->data; + x.data = b; + + /* First check if the current buffer holds the terminator */ + b[l] = term->data[0]; /* Set sentinel */ + for (i=0; !testInCharField (&cf, b[i]); i++) ; + if (i < l) { + x.slen = i + 1; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) { + if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) + return BSTR_ERR; + b = (unsigned char *) (r->data + r->slen); + l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); + if (l <= 0) { + r->data[r->slen] = (unsigned char) '\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + + b[l] = term->data[0]; /* Set sentinel */ + for (i=0; !testInCharField (&cf, b[i]); i++) ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy (s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bsreada (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsreada (bstring r, struct bStream * s, int n) { +int l, ret, orslen; +char * b; +struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; + + if (n > INT_MAX - r->slen) return BSTR_ERR; + n += r->slen; + + l = s->buff->slen; + + orslen = r->slen; + + if (0 == l) { + if (s->isEOF) return BSTR_ERR; + if (r->mlen > n) { + l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, + s->parm); + if (0 >= l || l > n - r->slen) { + s->isEOF = 1; + return BSTR_ERR; + } + r->slen += l; + r->data[r->slen] = (unsigned char) '\0'; + return 0; + } + } + + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) s->buff->data; + x.data = (unsigned char *) b; + + do { + if (l + r->slen >= n) { + x.slen = n - r->slen; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen); + return BSTR_ERR & -(r->slen == orslen); + } + + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) break; + + l = n - r->slen; + if (l > s->maxBuffSz) l = s->maxBuffSz; + + l = (int) s->readFnPtr (b, 1, l, s->parm); + + } while (l > 0); + if (l < 0) l = 0; + if (l == 0) s->isEOF = 1; + s->buff->slen = l; + return BSTR_ERR & -(r->slen == orslen); +} + +/* int bsreadln (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadln (bstring r, struct bStream * s, char terminator) { + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) + return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlna (r, s, terminator); +} + +/* int bsreadlns (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlns (bstring r, struct bStream * s, const_bstring term) { + if (s == NULL || s->buff == NULL || r == NULL || term == NULL + || term->data == NULL || r->mlen <= 0) return BSTR_ERR; + if (term->slen == 1) return bsreadln (r, s, term->data[0]); + if (term->slen < 1) return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlnsa (r, s, term); +} + +/* int bsread (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsread (bstring r, struct bStream * s, int n) { + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || n <= 0) return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreada (r, s, n); +} + +/* int bsunread (struct bStream * s, const_bstring b) + * + * Insert a bstring into the bStream at the current position. These + * characters will be read prior to those that actually come from the core + * stream. + */ +int bsunread (struct bStream * s, const_bstring b) { + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return binsert (s->buff, 0, b, (unsigned char) '?'); +} + +/* int bspeek (bstring r, const struct bStream * s) + * + * Return the currently buffered characters from the bStream that will be + * read prior to reads from the core stream. + */ +int bspeek (bstring r, const struct bStream * s) { + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return bassign (r, s->buff); +} + +/* bstring bjoinblk (const struct bstrList * bl, void * blk, int len); + * + * Join the entries of a bstrList into one bstring by sequentially + * concatenating them with the content from blk for length len in between. + * If there is an error NULL is returned, otherwise a bstring with the + * correct result is returned. + */ +bstring bjoinblk (const struct bstrList * bl, const void * blk, int len) { +bstring b; +unsigned char * p; +int i, c, v; + + if (bl == NULL || bl->qty < 0) return NULL; + if (len < 0) return NULL; + if (len > 0 && blk == NULL) return NULL; + if (bl->qty < 1) return bfromStatic (""); + + for (i = 0, c = 1; i < bl->qty; i++) { + v = bl->entry[i]->slen; + if (v < 0) return NULL; /* Invalid input */ + if (v > INT_MAX - c) return NULL; /* Overflow */ + c += v; + } + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (len == 0) { + p = b->data = (unsigned char *) bstr__alloc (c); + if (p == NULL) { + bstr__free (b); + return NULL; + } + for (i = 0; i < bl->qty; i++) { + v = bl->entry[i]->slen; + bstr__memcpy (p, bl->entry[i]->data, v); + p += v; + } + } else { + v = (bl->qty - 1) * len; + if ((bl->qty > 512 || len > 127) && + v / len != bl->qty - 1) return NULL; /* Overflow */ + if (v > INT_MAX - c) return NULL; /* Overflow */ + c += v; + p = b->data = (unsigned char *) bstr__alloc (c); + if (p == NULL) { + bstr__free (b); + return NULL; + } + v = bl->entry[0]->slen; + bstr__memcpy (p, bl->entry[0]->data, v); + p += v; + for (i = 1; i < bl->qty; i++) { + bstr__memcpy (p, blk, len); + p += len; + v = bl->entry[i]->slen; + if (v) { + bstr__memcpy (p, bl->entry[i]->data, v); + p += v; + } + } + } + b->mlen = c; + b->slen = c-1; + b->data[c-1] = (unsigned char) '\0'; + return b; +} + +/* bstring bjoin (const struct bstrList * bl, const_bstring sep); + * + * Join the entries of a bstrList into one bstring by sequentially + * concatenating them with the sep string in between. If there is an error + * NULL is returned, otherwise a bstring with the correct result is returned. + */ +bstring bjoin (const struct bstrList * bl, const_bstring sep) { + if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; + return bjoinblk (bl, sep->data, sep->slen); +} + +#define BSSSC_BUFF_LEN (256) + +/* int bssplitscb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), + * void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by any of the characters in splitStr. An empty splitStr causes + * the whole stream to be iterated once. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { +struct charField chrs; +bstring buff; +int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL || + splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; + + if (splitStr->slen == 0) { + while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ; + if ((ret = cb (parm, 0, buff)) > 0) + ret = 0; + } else { + buildCharField (&chrs, splitStr); + ret = p = i = 0; + for (;;) { + if (i >= buff->slen) { + bsreada (buff, s, BSSSC_BUFF_LEN); + if (i >= buff->slen) { + if (0 < (ret = cb (parm, p, buff))) ret = 0; + break; + } + } + if (testInCharField (&chrs, buff->data[i])) { + struct tagbstring t; + unsigned char c; + + blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1)); + if ((ret = bsunread (s, &t)) < 0) break; + buff->slen = i; + c = buff->data[i]; + buff->data[i] = (unsigned char) '\0'; + if ((ret = cb (parm, p, buff)) < 0) break; + buff->data[i] = c; + buff->slen = 0; + p += i + 1; + i = -1; + } + i++; + } + } + + bdestroy (buff); + return ret; +} + +/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), + * void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by the entire substring splitStr. An empty splitStr causes + * each character of the stream to be iterated. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { +bstring buff; +int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm); + + if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; + + if (splitStr->slen == 0) { + for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) { + if ((ret = cb (parm, 0, buff)) < 0) { + bdestroy (buff); + return ret; + } + buff->slen = 0; + } + return BSTR_OK; + } else { + ret = p = i = 0; + for (i=p=0;;) { + if ((ret = binstr (buff, 0, splitStr)) >= 0) { + struct tagbstring t; + blk2tbstr (t, buff->data, ret); + i = ret + splitStr->slen; + if ((ret = cb (parm, p, &t)) < 0) break; + p += i; + bdelete (buff, 0, i); + } else { + bsreada (buff, s, BSSSC_BUFF_LEN); + if (bseof (s)) { + if ((ret = cb (parm, p, buff)) > 0) ret = 0; + break; + } + } + } + } + + bdestroy (buff); + return ret; +} + +/* int bstrListCreate (void) + * + * Create a bstrList. + */ +struct bstrList * bstrListCreate (void) { +struct bstrList * sl = + (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (sl) { + sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring)); + if (!sl->entry) { + bstr__free (sl); + sl = NULL; + } else { + sl->qty = 0; + sl->mlen = 1; + } + } + return sl; +} + +/* int bstrListDestroy (struct bstrList * sl) + * + * Destroy a bstrList that has been created by bsplit, bsplits or + * bstrListCreate. + */ +int bstrListDestroy (struct bstrList * sl) { +int i; + if (sl == NULL || sl->qty < 0) return BSTR_ERR; + for (i=0; i < sl->qty; i++) { + if (sl->entry[i]) { + bdestroy (sl->entry[i]); + sl->entry[i] = NULL; + } + } + sl->qty = -1; + sl->mlen = -1; + bstr__free (sl->entry); + sl->entry = NULL; + bstr__free (sl); + return BSTR_OK; +} + +/* int bstrListAlloc (struct bstrList * sl, int msz) + * + * Ensure that there is memory for at least msz number of entries for the + * list. + */ +int bstrListAlloc (struct bstrList * sl, int msz) { +bstring * l; +int smsz; +size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || + sl->qty > sl->mlen) return BSTR_ERR; + if (sl->mlen >= msz) return BSTR_OK; + smsz = snapUpSize (msz); + nsz = ((size_t) smsz) * sizeof (bstring); + if (nsz < (size_t) smsz) return BSTR_ERR; + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) { + smsz = msz; + nsz = ((size_t) smsz) * sizeof (bstring); + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) return BSTR_ERR; + } + sl->mlen = smsz; + sl->entry = l; + return BSTR_OK; +} + +/* int bstrListAllocMin (struct bstrList * sl, int msz) + * + * Try to allocate the minimum amount of memory for the list to include at + * least msz entries or sl->qty whichever is greater. + */ +int bstrListAllocMin (struct bstrList * sl, int msz) { +bstring * l; +size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || + sl->qty > sl->mlen) return BSTR_ERR; + if (msz < sl->qty) msz = sl->qty; + if (sl->mlen == msz) return BSTR_OK; + nsz = ((size_t) msz) * sizeof (bstring); + if (nsz < (size_t) msz) return BSTR_ERR; + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) return BSTR_ERR; + sl->mlen = msz; + sl->entry = l; + return BSTR_OK; +} + +/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * character in splitChar. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitcb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitcb will continue in an undefined manner. + */ +int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) + return BSTR_ERR; + + p = pos; + do { + for (i=p; i < str->slen; i++) { + if (str->data[i] == splitChar) break; + } + if ((ret = cb (parm, p, i - p)) < 0) return ret; + p = i + 1; + } while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by any + * of the characters in splitStr. An empty splitStr causes the whole str to + * be iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitscb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +struct charField chrs; +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + if (splitStr->slen == 0) { + if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0; + return ret; + } + + if (splitStr->slen == 1) + return bsplitcb (str, splitStr->data[0], pos, cb, parm); + + buildCharField (&chrs, splitStr); + + p = pos; + do { + for (i=p; i < str->slen; i++) { + if (testInCharField (&chrs, str->data[i])) break; + } + if ((ret = cb (parm, p, i - p)) < 0) return ret; + p = i + 1; + } while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * substring splitStr. An empty splitStr causes the whole str to be + * iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitstrcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (0 == splitStr->slen) { + for (i=pos; i < str->slen; i++) { + if ((ret = cb (parm, i, 1)) < 0) return ret; + } + return BSTR_OK; + } + + if (splitStr->slen == 1) + return bsplitcb (str, splitStr->data[0], pos, cb, parm); + + for (i=p=pos; i <= str->slen - splitStr->slen; i++) { + if (0 == bstr__memcmp (splitStr->data, str->data + i, + splitStr->slen)) { + if ((ret = cb (parm, p, i - p)) < 0) return ret; + i += splitStr->slen; + p = i; + } + } + if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; + return BSTR_OK; +} + +struct genBstrList { + bstring b; + struct bstrList * bl; +}; + +static int bscb (void * parm, int ofs, int len) { +struct genBstrList * g = (struct genBstrList *) parm; + if (g->bl->qty >= g->bl->mlen) { + int mlen = g->bl->mlen * 2; + bstring * tbl; + + while (g->bl->qty >= mlen) { + if (mlen < g->bl->mlen) return BSTR_ERR; + mlen += mlen; + } + + tbl = (bstring *) bstr__realloc (g->bl->entry, + sizeof (bstring) * mlen); + if (tbl == NULL) return BSTR_ERR; + + g->bl->entry = tbl; + g->bl->mlen = mlen; + } + + g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len); + g->bl->qty++; + return BSTR_OK; +} + +/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) + * + * Create an array of sequential substrings from str divided by the character + * splitChar. + */ +struct bstrList * bsplit (const_bstring str, unsigned char splitChar) { +struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + + g.b = (bstring) str; + g.bl->qty = 0; + if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) + * + * Create an array of sequential substrings from str divided by the entire + * substring splitStr. + */ +struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) { +struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + + g.b = (bstring) str; + g.bl->qty = 0; + if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplits (const_bstring str, bstring splitStr) + * + * Create an array of sequential substrings from str divided by any of the + * characters in splitStr. An empty splitStr causes a single entry bstrList + * containing a copy of str to be returned. + */ +struct bstrList * bsplits (const_bstring str, const_bstring splitStr) { +struct genBstrList g; + + if ( str == NULL || str->slen < 0 || str->data == NULL || + splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) + return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + g.b = (bstring) str; + g.bl->qty = 0; + + if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +#if defined (__TURBOC__) && !defined (__BORLANDC__) +# ifndef BSTRLIB_NOVSNP +# define BSTRLIB_NOVSNP +# endif +#endif + +/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ +#if defined(__WATCOMC__) || defined(_MSC_VER) +#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} +#else +#ifdef BSTRLIB_NOVSNP +/* This is just a hack. If you are using a system without a vsnprintf, it is + not recommended that bformat be used at all. */ +#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} +#define START_VSNBUFF (256) +#else + +#if defined(__GNUC__) && !defined(__APPLE__) +/* Something is making gcc complain about this prototype not being here, so + I've just gone ahead and put it in. */ +extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); +#endif + +#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} +#endif +#endif + +#if !defined (BSTRLIB_NOVSNP) + +#ifndef START_VSNBUFF +#define START_VSNBUFF (16) +#endif + +/* On IRIX vsnprintf returns n-1 when the operation would overflow the target + buffer, WATCOM and MSVC both return -1, while C99 requires that the + returned value be exactly what the length would be if the buffer would be + large enough. This leads to the idea that if the return value is larger + than n, then changing n to the return value will reduce the number of + iterations required. */ + +/* int bformata (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it appends the results to + * a bstring which contains what would have been output. Note that if there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bformata (bstring b, const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return BSTR_ERR; + } + } + + r = bconcat (b, buff); + bdestroy (buff); + return r; +} + +/* int bassignformat (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it outputs the results to + * the bstring parameter b. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +int bassignformat (bstring b, const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return BSTR_ERR; + } + } + + r = bassign (b, buff); + bdestroy (buff); + return r; +} + +/* bstring bformat (const char * fmt, ...) + * + * Takes the same parameters as printf (), but rather than outputting results + * to stdio, it forms a bstring which contains what would have been output. + * Note that if there is an early generation of a '\0' character, the + * bstring will be truncated to this end point. + */ +bstring bformat (const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return NULL; + } + } + + return buff; +} + +/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) + * + * The bvcformata function formats data under control of the format control + * string fmt and attempts to append the result to b. The fmt parameter is + * the same as that of the printf function. The variable argument list is + * replaced with arglist, which has been initialized by the va_start macro. + * The size of the output is upper bounded by count. If the required output + * exceeds count, the string b is not augmented with any contents and a value + * below BSTR_ERR is returned. If a value below -count is returned then it + * is recommended that the negative of this value be used as an update to the + * count in a subsequent pass. On other errors, such as running out of + * memory, parameter errors or numeric wrap around BSTR_ERR is returned. + * BSTR_OK is returned when the output is successfully generated and + * appended to b. + * + * Note: There is no sanity checking of arglist, and this function is + * destructive of the contents of b from the b->slen point onward. If there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bvcformata (bstring b, int count, const char * fmt, va_list arg) { +int n, r, l; + + if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL + || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + if (count > (n = b->slen + count) + 2) return BSTR_ERR; + if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR; + + exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg); + b->data[b->slen + count + 2] = '\0'; + + /* Did the operation complete successfully within bounds? */ + + if (n >= (l = b->slen + (int) (strlen) ((char *) b->data + b->slen))) { + b->slen = l; + return BSTR_OK; + } + + /* Abort, since the buffer was not large enough. The return value + tries to help set what the retry length should be. */ + + b->data[b->slen] = '\0'; + if (r > count+1) { + l = r; + } else { + if (count > INT_MAX / 2) + l = INT_MAX; + else + l = count + count; + } + n = -l; + if (n > BSTR_ERR-1) n = BSTR_ERR-1; + return n; +} + +#endif diff --git a/bstrlib/bstrlib.h b/bstrlib/bstrlib.h new file mode 100644 index 0000000..fd0874c --- /dev/null +++ b/bstrlib/bstrlib.h @@ -0,0 +1,316 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * bstrlib.h + * + * This file is the interface for the core bstring functions. + */ + +#ifndef BSTRLIB_INCLUDE +#define BSTRLIB_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) +# if defined (__TURBOC__) && !defined (__BORLANDC__) +# define BSTRLIB_NOVSNP +# endif +#endif + +#define BSTR_ERR (-1) +#define BSTR_OK (0) +#define BSTR_BS_BUFF_LENGTH_GET (0) + +typedef struct tagbstring * bstring; +typedef const struct tagbstring * const_bstring; + +/* Copy functions */ +#define cstr2bstr bfromcstr +extern bstring bfromcstr (const char * str); +extern bstring bfromcstralloc (int mlen, const char * str); +extern bstring bfromcstrrangealloc (int minl, int maxl, const char* str); +extern bstring blk2bstr (const void * blk, int len); +extern char * bstr2cstr (const_bstring s, char z); +extern int bcstrfree (char * s); +extern bstring bstrcpy (const_bstring b1); +extern int bassign (bstring a, const_bstring b); +extern int bassignmidstr (bstring a, const_bstring b, int left, int len); +extern int bassigncstr (bstring a, const char * str); +extern int bassignblk (bstring a, const void * s, int len); + +/* Destroy function */ +extern int bdestroy (bstring b); + +/* Space allocation hinting functions */ +extern int balloc (bstring s, int len); +extern int ballocmin (bstring b, int len); + +/* Substring extraction */ +extern bstring bmidstr (const_bstring b, int left, int len); + +/* Various standard manipulations */ +extern int bconcat (bstring b0, const_bstring b1); +extern int bconchar (bstring b0, char c); +extern int bcatcstr (bstring b, const char * s); +extern int bcatblk (bstring b, const void * s, int len); +extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); +extern int binsertblk (bstring s1, int pos, const void * s2, int len, unsigned char fill); +extern int binsertch (bstring s1, int pos, int len, unsigned char fill); +extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill); +extern int bdelete (bstring s1, int pos, int len); +extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); +extern int btrunc (bstring b, int n); + +/* Scan/search functions */ +extern int bstricmp (const_bstring b0, const_bstring b1); +extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); +extern int biseqcaseless (const_bstring b0, const_bstring b1); +extern int biseqcaselessblk (const_bstring b, const void * blk, int len); +extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); +extern int biseq (const_bstring b0, const_bstring b1); +extern int biseqblk (const_bstring b, const void * blk, int len); +extern int bisstemeqblk (const_bstring b0, const void * blk, int len); +extern int biseqcstr (const_bstring b, const char * s); +extern int biseqcstrcaseless (const_bstring b, const char * s); +extern int bstrcmp (const_bstring b0, const_bstring b1); +extern int bstrncmp (const_bstring b0, const_bstring b1, int n); +extern int binstr (const_bstring s1, int pos, const_bstring s2); +extern int binstrr (const_bstring s1, int pos, const_bstring s2); +extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); +extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); +extern int bstrchrp (const_bstring b, int c, int pos); +extern int bstrrchrp (const_bstring b, int c, int pos); +#define bstrchr(b,c) bstrchrp ((b), (c), 0) +#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1) +extern int binchr (const_bstring b0, int pos, const_bstring b1); +extern int binchrr (const_bstring b0, int pos, const_bstring b1); +extern int bninchr (const_bstring b0, int pos, const_bstring b1); +extern int bninchrr (const_bstring b0, int pos, const_bstring b1); +extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos); +extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos); + +/* List of string container functions */ +struct bstrList { + int qty, mlen; + bstring * entry; +}; +extern struct bstrList * bstrListCreate (void); +extern int bstrListDestroy (struct bstrList * sl); +extern int bstrListAlloc (struct bstrList * sl, int msz); +extern int bstrListAllocMin (struct bstrList * sl, int msz); + +/* String split and join functions */ +extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar); +extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr); +extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr); +extern bstring bjoin (const struct bstrList * bl, const_bstring sep); +extern bstring bjoinblk (const struct bstrList * bl, const void * s, int len); +extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); +extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); +extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + +/* Miscellaneous functions */ +extern int bpattern (bstring b, int len); +extern int btoupper (bstring b); +extern int btolower (bstring b); +extern int bltrimws (bstring b); +extern int brtrimws (bstring b); +extern int btrimws (bstring b); + +#if !defined (BSTRLIB_NOVSNP) +extern bstring bformat (const char * fmt, ...); +extern int bformata (bstring b, const char * fmt, ...); +extern int bassignformat (bstring b, const char * fmt, ...); +extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); + +#define bvformata(ret, b, fmt, lastarg) { \ +bstring bstrtmp_b = (b); \ +const char * bstrtmp_fmt = (fmt); \ +int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ + for (;;) { \ + va_list bstrtmp_arglist; \ + va_start (bstrtmp_arglist, lastarg); \ + bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ + va_end (bstrtmp_arglist); \ + if (bstrtmp_r >= 0) { /* Everything went ok */ \ + bstrtmp_r = BSTR_OK; \ + break; \ + } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ + bstrtmp_r = BSTR_ERR; \ + break; \ + } \ + bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ + } \ + ret = bstrtmp_r; \ +} + +#endif + +typedef int (*bNgetc) (void *parm); +typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); + +/* Input functions */ +extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); +extern bstring bread (bNread readPtr, void * parm); +extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int breada (bstring b, bNread readPtr, void * parm); + +/* Stream functions */ +extern struct bStream * bsopen (bNread readPtr, void * parm); +extern void * bsclose (struct bStream * s); +extern int bsbufflength (struct bStream * s, int sz); +extern int bsreadln (bstring b, struct bStream * s, char terminator); +extern int bsreadlns (bstring r, struct bStream * s, const_bstring term); +extern int bsread (bstring b, struct bStream * s, int n); +extern int bsreadlna (bstring b, struct bStream * s, char terminator); +extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term); +extern int bsreada (bstring b, struct bStream * s, int n); +extern int bsunread (struct bStream * s, const_bstring b); +extern int bspeek (bstring r, const struct bStream * s); +extern int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); +extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); +extern int bseof (const struct bStream * s); + +struct tagbstring { + int mlen; + int slen; + unsigned char * data; +}; + +/* Accessor macros */ +#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen)) +#define blength(b) (blengthe ((b), 0)) +#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o)) +#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0)) +#define bdatae(b, e) (bdataofse (b, 0, e)) +#define bdata(b) (bdataofs (b, 0)) +#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e)) +#define bchar(b, p) bchare ((b), (p), '\0') + +/* Static constant string initialization macro */ +#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} +#if defined(_MSC_VER) +# define bsStatic(q) bsStaticMlen(q,-32) +#endif +#ifndef bsStatic +# define bsStatic(q) bsStaticMlen(q,-__LINE__) +#endif + +/* Static constant block parameter pair */ +#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1) + +#define bcatStatic(b,s) ((bcatblk)((b), bsStaticBlkParms(s))) +#define bfromStatic(s) ((blk2bstr)(bsStaticBlkParms(s))) +#define bassignStatic(b,s) ((bassignblk)((b), bsStaticBlkParms(s))) +#define binsertStatic(b,p,s,f) ((binsertblk)((b), (p), bsStaticBlkParms(s), (f))) +#define bjoinStatic(b,s) ((bjoinblk)((b), bsStaticBlkParms(s))) +#define biseqStatic(b,s) ((biseqblk)((b), bsStaticBlkParms(s))) +#define bisstemeqStatic(b,s) ((bisstemeqblk)((b), bsStaticBlkParms(s))) +#define biseqcaselessStatic(b,s) ((biseqcaselessblk)((b), bsStaticBlkParms(s))) +#define bisstemeqcaselessStatic(b,s) ((bisstemeqcaselessblk)((b), bsStaticBlkParms(s))) + +/* Reference building macros */ +#define cstr2tbstr btfromcstr +#define btfromcstr(t,s) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ + (t).mlen = -1; \ +} +#define blk2tbstr(t,s,l) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = l; \ + (t).mlen = -1; \ +} +#define btfromblk(t,s,l) blk2tbstr(t,s,l) +#define bmid2tbstr(t,b,p,l) { \ + const_bstring bstrtmp_s = (b); \ + if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ + int bstrtmp_left = (p); \ + int bstrtmp_len = (l); \ + if (bstrtmp_left < 0) { \ + bstrtmp_len += bstrtmp_left; \ + bstrtmp_left = 0; \ + } \ + if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \ + bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \ + if (bstrtmp_len <= 0) { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } else { \ + (t).data = bstrtmp_s->data + bstrtmp_left; \ + (t).slen = bstrtmp_len; \ + } \ + } else { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } \ + (t).mlen = -__LINE__; \ +} +#define btfromblkltrimws(t,s,l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l); \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} +#define btfromblkrtrimws(t,s,l) { \ + int bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_len >= 0; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s; \ + (t).slen = bstrtmp_len + 1; \ + (t).mlen = -__LINE__; \ +} +#define btfromblktrimws(t,s,l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} + +/* Write protection macros */ +#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; } +#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); } +#define biswriteprotected(t) ((t).mlen <= 0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bstrlib/bstrlib.txt b/bstrlib/bstrlib.txt new file mode 100644 index 0000000..9d2231b --- /dev/null +++ b/bstrlib/bstrlib.txt @@ -0,0 +1,3512 @@ +Better String library +--------------------- + +by Paul Hsieh + +The bstring library is an attempt to provide improved string processing +functionality to the C and C++ language. At the heart of the bstring library +(Bstrlib for short) is the management of "bstring"s which are a significant +improvement over '\0' terminated char buffers. + +=============================================================================== + +Motivation +---------- + +The standard C string library has serious problems: + + 1) Its use of '\0' to denote the end of the string means knowing a + string's length is O(n) when it could be O(1). + 2) It imposes an interpretation for the character value '\0'. + 3) gets() always exposes the application to a buffer overflow. + 4) strtok() modifies the string its parsing and thus may not be usable in + programs which are re-entrant or multithreaded. + 5) fgets has the unusual semantic of ignoring '\0's that occur before + '\n's are consumed. + 6) There is no memory management, and actions performed such as strcpy, + strcat and sprintf are common places for buffer overflows. + 7) strncpy() doesn't '\0' terminate the destination in some cases. + 8) Passing NULL to C library string functions causes an undefined NULL + pointer access. + 9) Parameter aliasing (overlapping, or self-referencing parameters) + within most C library functions has undefined behavior. + 10) Many C library string function calls take integer parameters with + restricted legal ranges. Parameters passed outside these ranges are + not typically detected and cause undefined behavior. + +So the desire is to create an alternative string library that does not suffer +from the above problems and adds in the following functionality: + + 1) Incorporate string functionality seen from other languages. + a) MID$() - from BASIC + b) split()/join() - from Python + c) string/char x n - from Perl + 2) Implement analogs to functions that combine stream IO and char buffers + without creating a dependency on stream IO functionality. + 3) Implement the basic text editor-style functions insert, delete, find, + and replace. + 4) Implement reference based sub-string access (as a generalization of + pointer arithmetic.) + 5) Implement runtime write protection for strings. + +There is also a desire to avoid "API-bloat". So functionality that can be +implemented trivially in other functionality is omitted. So there is no +left$() or right$() or reverse() or anything like that as part of the core +functionality. + +Explaining Bstrings +------------------- + +A bstring is basically a header which wraps a pointer to a char buffer. Lets +start with the declaration of a struct tagbstring: + + struct tagbstring { + int mlen; + int slen; + unsigned char * data; + }; + +This definition is considered exposed, not opaque (though it is neither +necessary nor recommended that low level maintenance of bstrings be performed +whenever the abstract interfaces are sufficient). The mlen field (usually) +describes a lower bound for the memory allocated for the data field. The +slen field describes the exact length for the bstring. The data field is a +single contiguous buffer of unsigned chars. Note that the existence of a '\0' +character in the unsigned char buffer pointed to by the data field does not +necessarily denote the end of the bstring. + +To be a well formed modifiable bstring the mlen field must be at least the +length of the slen field, and slen must be non-negative. Furthermore, the +data field must point to a valid buffer in which access to the first mlen +characters has been acquired. So the minimal check for correctness is: + + (slen >= 0 && mlen >= slen && data != NULL) + +bstrings returned by bstring functions can be assumed to be either NULL or +satisfy the above property. (When bstrings are only readable, the mlen >= +slen restriction is not required; this is discussed later in this section.) +A bstring itself is just a pointer to a struct tagbstring: + + typedef struct tagbstring * bstring; + +Note that use of the prefix "tag" in struct tagbstring is required to work +around the inconsistency between C and C++'s struct namespace usage. This +definition is also considered exposed. + +Bstrlib basically manages bstrings allocated as a header and an associated +data-buffer. Since the implementation is exposed, they can also be +constructed manually. Functions which mutate bstrings assume that the header +and data buffer have been malloced; the bstring library may perform free() or +realloc() on both the header and data buffer of any bstring parameter. +Functions which return bstring's create new bstrings. The string memory is +freed by a bdestroy() call (or using the bstrFree macro). + +The following related typedef is also provided: + + typedef const struct tagbstring * const_bstring; + +which is also considered exposed. These are directly bstring compatible (no +casting required) but are just used for parameters which are meant to be +non-mutable. So in general, bstring parameters which are read as input but +not meant to be modified will be declared as const_bstring, and bstring +parameters which may be modified will be declared as bstring. This convention +is recommended for user written functions as well. + +Since bstrings maintain interoperability with C library char-buffer style +strings, all functions which modify, update or create bstrings also append a +'\0' character into the position slen + 1. This trailing '\0' character is +not required for bstrings input to the bstring functions; this is provided +solely as a convenience for interoperability with standard C char-buffer +functionality. + +Analogs for the ANSI C string library functions have been created when they +are necessary, but have also been left out when they are not. In particular +there are no functions analogous to fwrite, or puts just for the purposes of +bstring. The ->data member of any string is exposed, and therefore can be +used just as easily as char buffers for C functions which read strings. + +For those that wish to hand construct bstrings, the following should be kept +in mind: + + 1) While bstrlib can accept constructed bstrings without terminating + '\0' characters, the rest of the C language string library will not + function properly on such non-terminated strings. This is obvious + but must be kept in mind. + 2) If it is intended that a constructed bstring be written to by the + bstring library functions then the data portion should be allocated + by the malloc function and the slen and mlen fields should be entered + properly. The struct tagbstring header is not reallocated, and only + freed by bdestroy. + 3) Writing arbitrary '\0' characters at various places in the string + will not modify its length as perceived by the bstring library + functions. In fact, '\0' is a legitimate non-terminating character + for a bstring to contain. + 4) For read only parameters, bstring functions do not check the mlen. + I.e., the minimal correctness requirements are reduced to: + + (slen >= 0 && data != NULL) + +Better pointer arithmetic +------------------------- + +One built-in feature of '\0' terminated char * strings, is that its very easy +and fast to obtain a reference to the tail of any string using pointer +arithmetic. Bstrlib does one better by providing a way to get a reference to +any substring of a bstring (or any other length delimited block of memory.) +So rather than just having pointer arithmetic, with bstrlib one essentially +has segment arithmetic. This is achieved using the macro blk2tbstr() which +builds a reference to a block of memory and the macro bmid2tbstr() which +builds a reference to a segment of a bstring. Bstrlib also includes +functions for direct consumption of memory blocks into bstrings, namely +bcatblk () and blk2bstr (). + +One scenario where this can be extremely useful is when string contains many +substrings which one would like to pass as read-only reference parameters to +some string consuming function without the need to allocate entire new +containers for the string data. More concretely, imagine parsing a command +line string whose parameters are space delimited. This can only be done for +tails of the string with '\0' terminated char * strings. + +Improved NULL semantics and error handling +------------------------------------------ + +Unless otherwise noted, if a NULL pointer is passed as a bstring or any other +detectably illegal parameter, the called function will return with an error +indicator (either NULL or BSTR_ERR) rather than simply performing a NULL +pointer access, or having undefined behavior. + +To illustrate the value of this, consider the following example: + + strcpy (p = malloc (13 * sizeof (char)), "Hello,"); + strcat (p, " World"); + +This is not correct because malloc may return NULL (due to an out of memory +condition), and the behaviour of strcpy is undefined if either of its +parameters are NULL. However: + + bstrcat (p = bfromcstr ("Hello,"), q = bfromcstr (" World")); + bdestroy (q); + +is well defined, because if either p or q are assigned NULL (indicating a +failure to allocate memory) both bstrcat and bdestroy will recognize it and +perform no detrimental action. + +Note that it is not necessary to check any of the members of a returned +bstring for internal correctness (in particular the data member does not need +to be checked against NULL when the header is non-NULL), since this is +assured by the bstring library itself. + +bStreams +-------- + +In addition to the bgets and bread functions, bstrlib can abstract streams +with a high performance read only stream called a bStream. In general, the +idea is to open a core stream (with something like fopen) then pass its +handle as well as a bNread function pointer (like fread) to the bsopen +function which will return a handle to an open bStream. Then the functions +bsread, bsreadln or bsreadlns can be called to read portions of the stream. +Finally, the bsclose function is called to close the bStream -- it will +return a handle to the original (core) stream. So bStreams, essentially, +wrap other streams. + +The bStreams have two main advantages over the bgets and bread (as well as +fgets/ungetc) paradigms: + +1) Improved functionality via the bunread function which allows a stream to + unread characters, giving the bStream stack-like functionality if so + desired. +2) A very high performance bsreadln function. The C library function fgets() + (and the bgets function) can typically be written as a loop on top of + fgetc(), thus paying all of the overhead costs of calling fgetc on a per + character basis. bsreadln will read blocks at a time, thus amortizing the + overhead of fread calls over many characters at once. + +However, clearly bStreams are suboptimal or unusable for certain kinds of +streams (stdin) or certain usage patterns (a few spotty, or non-sequential +reads from a slow stream.) For those situations, using bgets will be more +appropriate. + +The semantics of bStreams allows practical construction of layerable data +streams. What this means is that by writing a bNread compatible function on +top of a bStream, one can construct a new bStream on top of it. This can be +useful for writing multi-pass parsers that don't actually read the entire +input more than once and don't require the use of intermediate storage. + +Aliasing +-------- + +Aliasing occurs when a function is given two parameters which point to data +structures which overlap in the memory they occupy. While this does not +disturb read only functions, for many libraries this can make functions that +write to these memory locations malfunction. This is a common problem of the +C standard library and especially the string functions in the C standard +library. + +The C standard string library is entirely char by char oriented (as is +bstring) which makes conforming implementations alias safe for some +scenarios. However no actual detection of aliasing is typically performed, +so it is easy to find cases where the aliasing will cause anomolous or +undesirable behaviour (consider: strcat (p, p).) The C99 standard includes +the "restrict" pointer modifier which allows the compiler to document and +assume a no-alias condition on usage. However, only the most trivial cases +can be caught (if at all) by the compiler at compile time, and thus there is +no actual enforcement of non-aliasing. + +Bstrlib, by contrast, permits aliasing and is completely aliasing safe, in +the C99 sense of aliasing. That is to say, under the assumption that +pointers of incompatible types from distinct objects can never alias, bstrlib +is completely aliasing safe. (In practice this means that the data buffer +portion of any bstring and header of any bstring are assumed to never alias.) +With the exception of the reference building macros, the library behaves as +if all read-only parameters are first copied and replaced by temporary +non-aliased parameters before any writing to any output bstring is performed +(though actual copying is extremely rarely ever done.) + +Besides being a useful safety feature, bstring searching/comparison +functions can improve to O(1) execution when aliasing is detected. + +Note that aliasing detection and handling code in Bstrlib is generally +extremely cheap. There is almost never any appreciable performance penalty +for using aliased parameters. + +Reenterancy +----------- + +Nearly every function in Bstrlib is a leaf function, and is completely +reenterable with the exception of writing to common bstrings. The split +functions which use a callback mechanism requires only that the source string +not be destroyed by the callback function unless the callback function returns +with an error status (note that Bstrlib functions which return an error do +not modify the string in any way.) The string can in fact be modified by the +callback and the behaviour is deterministic. See the documentation of the +various split functions for more details. + +Undefined scenarios +------------------- + +One of the basic important premises for Bstrlib is to not to increase the +propogation of undefined situations from parameters that are otherwise legal +in of themselves. In particular, except for extremely marginal cases, usages +of bstrings that use the bstring library functions alone cannot lead to any +undefined action. But due to C/C++ language and library limitations, there +is no way to define a non-trivial library that is completely without +undefined operations. All such possible undefined operations are described +below: + +1) bstrings or struct tagbstrings that are not explicitely initialized cannot + be passed as a parameter to any bstring function. +2) The members of the NULL bstring cannot be accessed directly. (Though all + APIs and macros detect the NULL bstring.) +3) A bstring whose data member has not been obtained from a malloc or + compatible call and which is write accessible passed as a writable + parameter will lead to undefined results. (i.e., do not writeAllow any + constructed bstrings unless the data portion has been obtained from the + heap.) +4) If the headers of two strings alias but are not identical (which can only + happen via a defective manual construction), then passing them to a + bstring function in which one is writable is not defined. +5) If the mlen member is larger than the actual accessible length of the data + member for a writable bstring, or if the slen member is larger than the + readable length of the data member for a readable bstring, then the + corresponding bstring operations are undefined. +6) Any bstring definition whose header or accessible data portion has been + assigned to inaccessible or otherwise illegal memory clearly cannot be + acted upon by the bstring library in any way. +7) Destroying the source of an incremental split from within the callback + and not returning with a negative value (indicating that it should abort) + will lead to undefined behaviour. (Though *modifying* or adjusting the + state of the source data, even if those modification fail within the + bstrlib API, has well defined behavior.) +8) Modifying a bstring which is write protected by direct access has + undefined behavior. + +While this may seem like a long list, with the exception of invalid uses of +the writeAllow macro, and source destruction during an iterative split +without an accompanying abort, no usage of the bstring API alone can cause +any undefined scenario to occurr. I.e., the policy of restricting usage of +bstrings to the bstring API can significantly reduce the risk of runtime +errors (in practice it should eliminate them) related to string manipulation +due to undefined action. + +C++ wrapper +----------- + +A C++ wrapper has been created to enable bstring functionality for C++ in the +most natural (for C++ programers) way possible. The mandate for the C++ +wrapper is different from the base C bstring library. Since the C++ language +has far more abstracting capabilities, the CBString structure is considered +fully abstracted -- i.e., hand generated CBStrings are not supported (though +conversion from a struct tagbstring is allowed) and all detectable errors are +manifest as thrown exceptions. + +- The C++ class definitions are all under the namespace Bstrlib. bstrwrap.h + enables this namespace (with a using namespace Bstrlib; directive at the + end) unless the macro BSTRLIB_DONT_ASSUME_NAMESPACE has been defined before + it is included. + +- Erroneous accesses results in an exception being thrown. The exception + parameter is of type "struct CBStringException" which is derived from + std::exception if STL is used. A verbose description of the error message + can be obtained from the what() method. + +- CBString is a C++ structure derived from a struct tagbstring. An address + of a CBString cast to a bstring must not be passed to bdestroy. The bstring + C API has been made C++ safe and can be used directly in a C++ project. + +- It includes constructors which can take a char, '\0' terminated char + buffer, tagbstring, (char, repeat-value), a length delimited buffer or a + CBStringList to initialize it. + +- Concatenation is performed with the + and += operators. Comparisons are + done with the ==, !=, <, >, <= and >= operators. Note that == and != use + the biseq call, while <, >, <= and >= use bstrcmp. + +- CBString's can be directly cast to const character buffers. + +- CBString's can be directly cast to double, float, int or unsigned int so + long as the CBString are decimal representations of those types (otherwise + an exception will be thrown). Converting the other way should be done with + the format(a) method(s). + +- CBString contains the length, character and [] accessor methods. The + character and [] accessors are aliases of each other. If the bounds for + the string are exceeded, an exception is thrown. To avoid the overhead for + this check, first cast the CBString to a (const char *) and use [] to + dereference the array as normal. Note that the character and [] accessor + methods allows both reading and writing of individual characters. + +- The methods: format, formata, find, reversefind, findcaseless, + reversefindcaseless, midstr, insert, insertchrs, replace, findreplace, + findreplacecaseless, remove, findchr, nfindchr, alloc, toupper, tolower, + gets, read are analogous to the functions that can be found in the C API. + +- The caselessEqual and caselessCmp methods are analogous to biseqcaseless + and bstricmp functions respectively. + +- Note that just like the bformat function, the format and formata methods do + not automatically cast CBStrings into char * strings for "%s"-type + substitutions: + + CBString w("world"); + CBString h("Hello"); + CBString hw; + + /* The casts are necessary */ + hw.format ("%s, %s", (const char *)h, (const char *)w); + +- The methods trunc and repeat have been added instead of using pattern. + +- ltrim, rtrim and trim methods have been added. These remove characters + from a given character string set (defaulting to the whitespace characters) + from either the left, right or both ends of the CBString, respectively. + +- The method setsubstr is also analogous in functionality to bsetstr, except + that it cannot be passed NULL. Instead the method fill and the fill-style + constructor have been supplied to enable this functionality. + +- The writeprotect(), writeallow() and iswriteprotected() methods are + analogous to the bwriteprotect(), bwriteallow() and biswriteprotected() + macros in the C API. Write protection semantics in CBString are stronger + than with the C API in that indexed character assignment is checked for + write protection. However, unlike with the C API, a write protected + CBString can be destroyed by the destructor. + +- CBStream is a C++ structure which wraps a struct bStream (its not derived + from it, since destruction is slightly different). It is constructed by + passing in a bNread function pointer and a stream parameter cast to void *. + This structure includes methods for detecting eof, setting the buffer + length, reading the whole stream or reading entries line by line or block + by block, an unread function, and a peek function. + +- If STL is available, the CBStringList structure is derived from a vector of + CBString with various split methods. The split method has been overloaded + to accept either a character or CBString as the second parameter (when the + split parameter is a CBString any character in that CBString is used as a + seperator). The splitstr method takes a CBString as a substring seperator. + Joins can be performed via a CBString constructor which takes a + CBStringList as a parameter, or just using the CBString::join() method. + +- If there is proper support for std::iostreams, then the >> and << operators + and the getline() function have been added (with semantics the same as + those for std::string). + +Multithreading +-------------- + +A mutable bstring is kind of analogous to a small (two entry) linked list +allocated by malloc, with all aliasing completely under programmer control. +I.e., manipulation of one bstring will never affect any other distinct +bstring unless explicitely constructed to do so by the programmer via hand +construction or via building a reference. Bstrlib also does not use any +static or global storage, so there are no hidden unremovable race conditions. +Bstrings are also clearly not inherently thread local. So just like +char *'s, bstrings can be passed around from thread to thread and shared and +so on, so long as modifications to a bstring correspond to some kind of +exclusive access lock as should be expected (or if the bstring is read-only, +which can be enforced by bstring write protection) for any sort of shared +object in a multithreaded environment. + +Bsafe module +------------ + +For convenience, a bsafe module has been included. The idea is that if this +module is included, inadvertant usage of the most dangerous C functions will +be overridden and lead to an immediate run time abort. Of course, it should +be emphasized that usage of this module is completely optional. The +intention is essentially to provide an option for creating project safety +rules which can be enforced mechanically rather than socially. This is +useful for larger, or open development projects where its more difficult to +enforce social rules or "coding conventions". + +Problems not solved +------------------- + +Bstrlib is written for the C and C++ languages, which have inherent weaknesses +that cannot be easily solved: + +1. Memory leaks: Forgetting to call bdestroy on a bstring that is about to be + unreferenced, just as forgetting to call free on a heap buffer that is + about to be dereferenced. Though bstrlib itself is leak free. +2. Read before write usage: In C, declaring an auto bstring does not + automatically fill it with legal/valid contents. This problem has been + somewhat mitigated in C++. (The bstrDeclare and bstrFree macros from + bstraux can be used to help mitigate this problem.) + +Other problems not addressed: + +3. Built-in mutex usage to automatically avoid all bstring internal race + conditions in multitasking environments: The problem with trying to + implement such things at this low a level is that it is typically more + efficient to use locks in higher level primitives. There is also no + platform independent way to implement locks or mutexes. + +Note that except for spotty support of wide characters, the default C +standard library does not address any of these problems either. + +Configurable compilation options +-------------------------------- + +The Better String Library is not an application, it is a library. To compile +it, you need to compile bstrlib.c to an object file that is linked to your +application. A Makefile might contain entries such as the following to +accomplish this: + +BSTRDIR = $(CDIR)/bstrlib +INCLUDES = -I$(BSTRDIR) +BSTROBJS = $(ODIR)/bstrlib.o +DEFINES = +CFLAGS = -O3 -Wall -pedantic -ansi -s $(DEFINES) + +application: $(ODIR)/main.o $(BSTROBJS) + echo Linking: $@ + $(CC) $< $(BSTROBJS) -o $@ + +$(ODIR)/%.o : $(BSTRDIR)/%.c + echo Compiling: $< + $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(ODIR)/%.o : %.c + echo Compiling: $< + $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ + +You can configure bstrlib using with the standard macro defines passed to +the compiler. All configuration options are meant solely for the purpose of +compiler compatibility. Configuration options are not meant to change the +semantics or capabilities of the library, except where it is unavoidable. + +Since some C++ compilers don't include the Standard Template Library and some +have the options of disabling exception handling, a number of macros can be +used to conditionally compile support for each of this: + +BSTRLIB_CAN_USE_STL + + - defining this will enable the used of the Standard Template Library. + Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. + +BSTRLIB_CANNOT_USE_STL + + - defining this will disable the use of the Standard Template Library. + Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. + +BSTRLIB_CAN_USE_IOSTREAM + + - defining this will enable the used of streams from class std. Defining + BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. + +BSTRLIB_CANNOT_USE_IOSTREAM + + - defining this will disable the use of streams from class std. Defining + BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. + +BSTRLIB_THROWS_EXCEPTIONS + + - defining this will enable the exception handling within bstring. + Defining BSTRLIB_THROWS_EXCEPTIONS overrides the + BSTRLIB_DOESNT_THROWS_EXCEPTIONS macro. + +BSTRLIB_DOESNT_THROW_EXCEPTIONS + + - defining this will disable the exception handling within bstring. + Defining BSTRLIB_THROWS_EXCEPTIONS overrides the + BSTRLIB_DOESNT_THROW_EXCEPTIONS macro. + +Note that these macros must be defined consistently throughout all modules +that use CBStrings including bstrwrap.cpp. + +Some older C compilers do not support functions such as vsnprintf. This is +handled by the following macro variables: + +BSTRLIB_NOVSNP + + - defining this indicates that the compiler does not support vsnprintf. + This will cause bformat and bformata to not be declared. Note that + for some compilers, such as Turbo C, this is set automatically. + Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. + +BSTRLIB_VSNP_OK + + - defining this will disable the autodetection of compilers that do not + vsnprintf. + Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. + +Semantic compilation options +---------------------------- + +Bstrlib comes with very few compilation options for changing the semantics of +of the library. These are described below. + +BSTRLIB_DONT_ASSUME_NAMESPACE + + - Defining this before including bstrwrap.h will disable the automatic + enabling of the Bstrlib namespace for the C++ declarations. + +BSTRLIB_DONT_USE_VIRTUAL_DESTRUCTOR + + - Defining this will make the CBString destructor non-virtual. + +BSTRLIB_MEMORY_DEBUG + + - Defining this will cause the bstrlib modules bstrlib.c and bstrwrap.cpp + to invoke a #include "memdbg.h". memdbg.h has to be supplied by the user. + +Note that these macros must be defined consistently throughout all modules +that use bstrings or CBStrings including bstrlib.c, bstraux.c and +bstrwrap.cpp. + +=============================================================================== + +Files +----- + +Core C files (required for C and C++): +bstrlib.c - C implementaion of bstring functions. +bstrlib.h - C header file for bstring functions. + +Core C++ files (required for C++): +bstrwrap.cpp - C++ implementation of CBString. +bstrwrap.h - C++ header file for CBString. + +Base Unicode support: +utf8util.c - C implemention of generic utf8 parsing functions. +utf8util.h - C head file for generic utf8 parsing functions. +buniutil.c - C implemention utf8 bstring packing and unpacking functions. +buniutil.c - C header file for utf8 bstring functions. + +Extra utility functions: +bstraux.c - C example that implements trivial additional functions. +bstraux.h - C header for bstraux.c + +Miscellaneous: +bstest.c - C unit/regression test for bstrlib.c +test.cpp - C++ unit/regression test for bstrwrap.cpp +bsafe.c - C runtime stubs to abort usage of unsafe C functions. +bsafe.h - C header file for bsafe.c functions. + +C modules need only include bstrlib.h and compile/link bstrlib.c to use the +basic bstring library. C++ projects need to additionally include bstrwrap.h +and compile/link bstrwrap.cpp. For both, there may be a need to make choices +about feature configuration as described in the "Configurable compilation +options" in the section above. + +Other files that are included in this archive are: + +license.txt - The BSD license for Bstrlib +gpl.txt - The GPL version 2 +security.txt - A security statement useful for auditting Bstrlib +porting.txt - A guide to porting Bstrlib +bstrlib.txt - This file + +=============================================================================== + +The functions +------------- + + extern bstring bfromcstr (const char * str); + + Take a standard C library style '\0' terminated char buffer and generate + a bstring with the same contents as the char buffer. If an error occurs + NULL is returned. + + So for example: + + bstring b = bfromcstr ("Hello"); + if (!b) { + fprintf (stderr, "Out of memory"); + } else { + puts ((char *) b->data); + } + + .......................................................................... + + extern bstring bfromcstralloc (int mlen, const char * str); + + Create a bstring which contains the contents of the '\0' terminated + char * buffer str. The memory buffer backing the bstring is at least + mlen characters in length. The buffer is also at least size required + to hold the string with the '\0' terminator. If an error occurs NULL + is returned. + + So for example: + + bstring b = bfromcstralloc (64, someCstr); + if (b) b->data[63] = 'x'; + + The idea is that this will set the 64th character of b to 'x' if it is at + least 64 characters long otherwise do nothing. And we know this is well + defined so long as b was successfully created, since it will have been + allocated with at least 64 characters. + + .......................................................................... + + extern bstring bfromcstrrangealloc (int minl, int maxl, const char* str); + + Create a bstring which contains the contents of the '\0' terminated + char * buffer str. The memory buffer backing the string is at least + minl characters in length, but an attempt is made to allocate up to + maxl characters. The buffer is also at least size required to hold + the string with the '\0' terminator. If an error occurs NULL is + returned. + + So for example: + + bstring b = bfromcstrrangealloc (0, 128, "Hello."); + if (b) b->data[5] = '!'; + + The idea is that this will set the 6th character of b to '!' if it was + allocated otherwise do nothing. And we know this is well defined so + long as b was successfully created, since it will have been allocated + with at least 7 (strlen("Hello.")) characters. + + .......................................................................... + + extern bstring blk2bstr (const void * blk, int len); + + Create a bstring whose contents are described by the contiguous buffer + pointing to by blk with a length of len bytes. Note that this function + creates a copy of the data in blk, rather than simply referencing it. + Compare with the blk2tbstr macro. If an error occurs NULL is returned. + + .......................................................................... + + extern char * bstr2cstr (const_bstring s, char z); + + Create a '\0' terminated char buffer which contains the contents of the + bstring s, except that any contained '\0' characters are converted to the + character in z. This returned value should be freed with bcstrfree(), by + the caller. If an error occurs NULL is returned. + + .......................................................................... + + extern int bcstrfree (char * s); + + Frees a C-string generated by bstr2cstr (). This is normally unnecessary + since it just wraps a call to free (), however, if malloc () and free () + have been redefined as a macros within the bstrlib module (via macros in + the memdbg.h backdoor) with some difference in behaviour from the std + library functions, then this allows a correct way of freeing the memory + that allows higher level code to be independent from these macro + redefinitions. + + .......................................................................... + + extern bstring bstrcpy (const_bstring b1); + + Make a copy of the passed in bstring. The copied bstring is returned if + there is no error, otherwise NULL is returned. + + .......................................................................... + + extern int bassign (bstring a, const_bstring b); + + Overwrite the bstring a with the contents of bstring b. Note that the + bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + int bassigncstr (bstring a, const char * str); + + Overwrite the string a with the contents of char * string str. Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a may be partially overwritten. + + .......................................................................... + + int bassignblk (bstring a, const void * s, int len); + + Overwrite the string a with the contents of the block (s, len). Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + extern int bassignmidstr (bstring a, const_bstring b, int left, int len); + + Overwrite the bstring a with the middle of contents of bstring b + starting from position left and running for a length len. left and + len are clamped to the ends of b as with the function bmidstr. Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + extern bstring bmidstr (const_bstring b, int left, int len); + + Create a bstring which is the substring of b starting from position left + and running for a length len (clamped by the end of the bstring b.) If + there was no error, the value of this constructed bstring is returned + otherwise NULL is returned. + + .......................................................................... + + extern int bdelete (bstring s1, int pos, int len); + + Removes characters from pos to pos+len-1 and shifts the tail of the + bstring starting from pos+len to pos. len must be positive for this call + to have any effect. The section of the bstring described by (pos, len) + is clamped to boundaries of the bstring b. The value BSTR_OK is returned + if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int bconcat (bstring b0, const_bstring b1); + + Concatenate the bstring b1 to the end of bstring b0. The value BSTR_OK + is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bconchar (bstring b, char c); + + Concatenate the character c to the end of bstring b. The value BSTR_OK + is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bcatcstr (bstring b, const char * s); + + Concatenate the char * string s to the end of bstring b. The value + BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bcatblk (bstring b, const void * s, int len); + + Concatenate a fixed length buffer (s, len) to the end of bstring b. The + value BSTR_OK is returned if the operation is successful, otherwise + BSTR_ERR is returned. + + .......................................................................... + + extern int biseq (const_bstring b0, const_bstring b1); + + Compare the bstring b0 and b1 for equality. If the bstrings differ, 0 + is returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. If the length of the bstrings are different, this + function has O(1) complexity. Contained '\0' characters are not treated + as a termination character. + + Note that the semantics of biseq are not completely compatible with + bstrcmp because of its different treatment of the '\0' character. + + .......................................................................... + + extern int bisstemeqblk (const_bstring b, const void * blk, int len); + + Compare beginning of bstring b0 with a block of memory of length len for + equality. If the beginning of b0 differs from the memory block (or if b0 + is too short), 0 is returned, if the bstrings are the same, 1 is returned, + if there is an error, -1 is returned. + + .......................................................................... + + extern int biseqcaseless (const_bstring b0, const_bstring b1); + + Compare two bstrings for equality without differentiating between case. + If the bstrings differ other than in case, 0 is returned, if the bstrings + are the same, 1 is returned, if there is an error, -1 is returned. If + the length of the bstrings are different, this function is O(1). '\0' + termination characters are not treated in any special way. + + .......................................................................... + + extern int biseqcaselessblk (const_bstring b, const void * blk, int len); + + Compare content of b and the array of bytes in blk for length len for + equality without differentiating between character case. If the content + differs other than in case, 0 is returned, if, ignoring case, the content + is the same, 1 is returned, if there is an error, -1 is returned. If the + length of the strings are different, this function is O(1). '\0' + termination characters are not treated in any special way. + + .......................................................................... + + extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); + + Compare beginning of bstring b0 with a block of memory of length len + without differentiating between case for equality. If the beginning of b0 + differs from the memory block other than in case (or if b0 is too short), + 0 is returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. + + .......................................................................... + + int biseqblk (const_bstring b, const void * blk, int len) + + Compare the string b with the character block blk of length len. If the + content differs, 0 is returned, if the content is the same, 1 is returned, + if there is an error, -1 is returned. If the length of the strings are + different, this function is O(1). '\0' characters are not treated in + any special way. + + .......................................................................... + + extern int biseqcstr (const_bstring b, const char *s); + + Compare the bstring b and char * bstring s. The C string s must be '\0' + terminated at exactly the length of the bstring b, and the contents + between the two must be identical with the bstring b with no '\0' + characters for the two contents to be considered equal. This is + equivalent to the condition that their current contents will be always be + equal when comparing them in the same format after converting one or the + other. If they are equal 1 is returned, if they are unequal 0 is + returned and if there is a detectable error BSTR_ERR is returned. + + .......................................................................... + + extern int biseqcstrcaseless (const_bstring b, const char *s); + + Compare the bstring b and char * string s. The C string s must be '\0' + terminated at exactly the length of the bstring b, and the contents + between the two must be identical except for case with the bstring b with + no '\0' characters for the two contents to be considered equal. This is + equivalent to the condition that their current contents will be always be + equal ignoring case when comparing them in the same format after + converting one or the other. If they are equal, except for case, 1 is + returned, if they are unequal regardless of case 0 is returned and if + there is a detectable error BSTR_ERR is returned. + + .......................................................................... + + extern int bstrcmp (const_bstring b0, const_bstring b1); + + Compare the bstrings b0 and b1 for ordering. If there is an error, + SHRT_MIN is returned, otherwise a value less than or greater than zero, + indicating that the bstring pointed to by b0 is lexicographically less + than or greater than the bstring pointed to by b1 is returned. If the + bstring lengths are unequal but the characters up until the length of the + shorter are equal then a value less than, or greater than zero, + indicating that the bstring pointed to by b0 is shorter or longer than the + bstring pointed to by b1 is returned. 0 is returned if and only if the + two bstrings are the same. If the length of the bstrings are different, + this function is O(n). Like its standard C library counter part, the + comparison does not proceed past any '\0' termination characters + encountered. + + The seemingly odd error return value, merely provides slightly more + granularity than the undefined situation given in the C library function + strcmp. The function otherwise behaves very much like strcmp(). + + Note that the semantics of bstrcmp are not completely compatible with + biseq because of its different treatment of the '\0' termination + character. + + .......................................................................... + + extern int bstrncmp (const_bstring b0, const_bstring b1, int n); + + Compare the bstrings b0 and b1 for ordering for at most n characters. If + there is an error, SHRT_MIN is returned, otherwise a value is returned as + if b0 and b1 were first truncated to at most n characters then bstrcmp + was called with these new bstrings are paremeters. If the length of the + bstrings are different, this function is O(n). Like its standard C + library counter part, the comparison does not proceed past any '\0' + termination characters encountered. + + The seemingly odd error return value, merely provides slightly more + granularity than the undefined situation given in the C library function + strncmp. The function otherwise behaves very much like strncmp(). + + .......................................................................... + + extern int bstricmp (const_bstring b0, const_bstring b1); + + Compare two bstrings without differentiating between case. The return + value is the difference of the values of the characters where the two + bstrings first differ, otherwise 0 is returned indicating that the + bstrings are equal. If the lengths are different, then a difference from + 0 is given, but if the first extra character is '\0', then it is taken to + be the value UCHAR_MAX+1. + + .......................................................................... + + extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); + + Compare two bstrings without differentiating between case for at most n + characters. If the position where the two bstrings first differ is + before the nth position, the return value is the difference of the values + of the characters, otherwise 0 is returned. If the lengths are different + and less than n characters, then a difference from 0 is given, but if the + first extra character is '\0', then it is taken to be the value + UCHAR_MAX+1. + + .......................................................................... + + extern int bdestroy (bstring b); + + Deallocate the bstring passed. Passing NULL in as a parameter will have + no effect. Note that both the header and the data portion of the bstring + will be freed. No other bstring function which modifies one of its + parameters will free or reallocate the header. Because of this, in + general, bdestroy cannot be called on any declared struct tagbstring even + if it is not write protected. A bstring which is write protected cannot + be destroyed via the bdestroy call. Any attempt to do so will result in + no action taken, and BSTR_ERR will be returned. + + Note to C++ users: Passing in a CBString cast to a bstring will lead to + undefined behavior (free will be called on the header, rather than the + CBString destructor.) Instead just use the ordinary C++ language + facilities to dealloc a CBString. + + .......................................................................... + + extern int binstr (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + forward (increasing) direction. If it is found then it returns with the + first position after pos where it is found, otherwise it returns BSTR_ERR. + The algorithm used is brute force; O(m*n). + + .......................................................................... + + extern int binstrr (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + backward (decreasing) direction. If it is found then it returns with the + first position after pos where it is found, otherwise return BSTR_ERR. + Note that the current position at pos is tested as well -- so to be + disjoint from a previous forward search it is recommended that the + position be backed up (decremented) by one position. The algorithm used + is brute force; O(m*n). + + .......................................................................... + + extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + forward (increasing) direction but without regard to case. If it is + found then it returns with the first position after pos where it is + found, otherwise it returns BSTR_ERR. The algorithm used is brute force; + O(m*n). + + .......................................................................... + + extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + backward (decreasing) direction but without regard to case. If it is + found then it returns with the first position after pos where it is + found, otherwise return BSTR_ERR. Note that the current position at pos + is tested as well -- so to be disjoint from a previous forward search it + is recommended that the position be backed up (decremented) by one + position. The algorithm used is brute force; O(m*n). + + .......................................................................... + + extern int binchr (const_bstring b0, int pos, const_bstring b1); + + Search for the first position in b0 starting from pos or after, in which + one of the characters in b1 is found. This function has an execution + time of O(b0->slen + b1->slen). If such a position does not exist in b0, + then BSTR_ERR is returned. + + .......................................................................... + + extern int binchrr (const_bstring b0, int pos, const_bstring b1); + + Search for the last position in b0 no greater than pos, in which one of + the characters in b1 is found. This function has an execution time + of O(b0->slen + b1->slen). If such a position does not exist in b0, + then BSTR_ERR is returned. + + .......................................................................... + + extern int bninchr (const_bstring b0, int pos, const_bstring b1); + + Search for the first position in b0 starting from pos or after, in which + none of the characters in b1 is found and return it. This function has + an execution time of O(b0->slen + b1->slen). If such a position does + not exist in b0, then BSTR_ERR is returned. + + .......................................................................... + + extern int bninchrr (const_bstring b0, int pos, const_bstring b1); + + Search for the last position in b0 no greater than pos, in which none of + the characters in b1 is found and return it. This function has an + execution time of O(b0->slen + b1->slen). If such a position does not + exist in b0, then BSTR_ERR is returned. + + .......................................................................... + + extern int bstrchr (const_bstring b, int c); + + Search for the character c in the bstring b forwards from the start of + the bstring. Returns the position of the found character or BSTR_ERR if + it is not found. + + NOTE: This has been implemented as a macro on top of bstrchrp (). + + .......................................................................... + + extern int bstrrchr (const_bstring b, int c); + + Search for the character c in the bstring b backwards from the end of the + bstring. Returns the position of the found character or BSTR_ERR if it is + not found. + + NOTE: This has been implemented as a macro on top of bstrrchrp (). + + .......................................................................... + + extern int bstrchrp (const_bstring b, int c, int pos); + + Search for the character c in b forwards from the position pos + (inclusive). Returns the position of the found character or BSTR_ERR if + it is not found. + + .......................................................................... + + extern int bstrrchrp (const_bstring b, int c, int pos); + + Search for the character c in b backwards from the position pos in bstring + (inclusive). Returns the position of the found character or BSTR_ERR if + it is not found. + + .......................................................................... + + extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); + + Overwrite the bstring b0 starting at position pos with the bstring b1. If + the position pos is past the end of b0, then the character "fill" is + appended as necessary to make up the gap between the end of b0 and pos. + If b1 is NULL, it behaves as if it were a 0-length bstring. The value + BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); + + Inserts the bstring s2 into s1 at position pos. If the position pos is + past the end of s1, then the character "fill" is appended as necessary to + make up the gap between the end of s1 and pos. The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + int binsertblk (bstring b, int pos, const void * blk, int len, + unsigned char fill) + + Inserts the block of characters at blk with length len into b at position + pos. If the position pos is past the end of b, then the character "fill" + is appended as necessary to make up the gap between the end of b1 and pos. + Unlike bsetstr, binsert does not allow b2 to be NULL. + + .......................................................................... + + extern int binsertch (bstring s1, int pos, int len, unsigned char fill); + + Inserts the character fill repeatedly into s1 at position pos for a + length len. If the position pos is past the end of s1, then the + character "fill" is appended as necessary to make up the gap between the + end of s1 and the position pos + len (exclusive). The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int breplace (bstring b1, int pos, int len, const_bstring b2, + unsigned char fill); + + Replace a section of a bstring from pos for a length len with the bstring + b2. If the position pos is past the end of b1 then the character "fill" + is appended as necessary to make up the gap between the end of b1 and + pos. + + .......................................................................... + + extern int bfindreplace (bstring b, const_bstring find, + const_bstring replace, int position); + + Replace all occurrences of the find substring with a replace bstring + after a given position in the bstring b. The find bstring must have a + length > 0 otherwise BSTR_ERR is returned. This function does not + perform recursive per character replacement; that is to say successive + searches resume at the position after the last replace. + + So for example: + + bfindreplace (a0 = bfromcstr("aabaAb"), a1 = bfromcstr("a"), + a2 = bfromcstr("aa"), 0); + + Should result in changing a0 to "aaaabaaAb". + + This function performs exactly (b->slen - position) bstring comparisons, + and data movement is bounded above by character volume equivalent to size + of the output bstring. + + .......................................................................... + + extern int bfindreplacecaseless (bstring b, const_bstring find, + const_bstring replace, int position); + + Replace all occurrences of the find substring, ignoring case, with a + replace bstring after a given position in the bstring b. The find bstring + must have a length > 0 otherwise BSTR_ERR is returned. This function + does not perform recursive per character replacement; that is to say + successive searches resume at the position after the last replace. + + So for example: + + bfindreplacecaseless (a0 = bfromcstr("AAbaAb"), a1 = bfromcstr("a"), + a2 = bfromcstr("aa"), 0); + + Should result in changing a0 to "aaaabaaaab". + + This function performs exactly (b->slen - position) bstring comparisons, + and data movement is bounded above by character volume equivalent to size + of the output bstring. + + .......................................................................... + + extern int balloc (bstring b, int length); + + Increase the allocated memory backing the data buffer for the bstring b + to a length of at least length. If the memory backing the bstring b is + already large enough, not action is performed. This has no effect on the + bstring b that is visible to the bstring API. Usually this function will + only be used when a minimum buffer size is required coupled with a direct + access to the ->data member of the bstring structure. + + Be warned that like any other bstring function, the bstring must be well + defined upon entry to this function. I.e., doing something like: + + b->slen *= 2; /* ?? Most likely incorrect */ + balloc (b, b->slen); + + is invalid, and should be implemented as: + + int t; + if (BSTR_OK == balloc (b, t = (b->slen * 2))) b->slen = t; + + This function will return with BSTR_ERR if b is not detected as a valid + bstring or length is not greater than 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int ballocmin (bstring b, int length); + + Change the amount of memory backing the bstring b to at least length. + This operation will never truncate the bstring data including the + extra terminating '\0' and thus will not decrease the length to less than + b->slen + 1. Note that repeated use of this function may cause + performance problems (realloc may be called on the bstring more than + the O(log(INT_MAX)) times). This function will return with BSTR_ERR if b + is not detected as a valid bstring or length is not greater than 0, + otherwise BSTR_OK is returned. + + So for example: + + if (BSTR_OK == ballocmin (b, 64)) b->data[63] = 'x'; + + The idea is that this will set the 64th character of b to 'x' if it is at + least 64 characters long otherwise do nothing. And we know this is well + defined so long as the ballocmin call was successfully, since it will + ensure that b has been allocated with at least 64 characters. + + .......................................................................... + + int btrunc (bstring b, int n); + + Truncate the bstring to at most n characters. This function will return + with BSTR_ERR if b is not detected as a valid bstring or n is less than + 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bpattern (bstring b, int len); + + Replicate the starting bstring, b, end to end repeatedly until it + surpasses len characters, then chop the result to exactly len characters. + This function operates in-place. This function will return with BSTR_ERR + if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int btoupper (bstring b); + + Convert contents of bstring to upper case. This function will return with + BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int btolower (bstring b); + + Convert contents of bstring to lower case. This function will return with + BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bltrimws (bstring b); + + Delete whitespace contiguous from the left end of the bstring. This + function will return with BSTR_ERR if b is NULL or of length 0, otherwise + BSTR_OK is returned. + + .......................................................................... + + extern int brtrimws (bstring b); + + Delete whitespace contiguous from the right end of the bstring. This + function will return with BSTR_ERR if b is NULL or of length 0, otherwise + BSTR_OK is returned. + + .......................................................................... + + extern int btrimws (bstring b); + + Delete whitespace contiguous from both ends of the bstring. This function + will return with BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK + is returned. + + .......................................................................... + + extern struct bstrList* bstrListCreate (void); + + Create an empty struct bstrList. The struct bstrList output structure is + declared as follows: + + struct bstrList { + int qty, mlen; + bstring * entry; + }; + + The entry field actually is an array with qty number entries. The mlen + record counts the maximum number of bstring's for which there is memory + in the entry record. + + The Bstrlib API does *NOT* include a comprehensive set of functions for + full management of struct bstrList in an abstracted way. The reason for + this is because aliasing semantics of the list are best left to the user + of this function, and performance varies wildly depending on the + assumptions made. For a complete list of bstring data type it is + recommended that the C++ public std::vector be used, since its + semantics are usage are more standard. + + .......................................................................... + + extern int bstrListDestroy (struct bstrList * sl); + + Destroy a struct bstrList structure that was returned by the bsplit + function. Note that this will destroy each bstring in the ->entry array + as well. See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + extern int bstrListAlloc (struct bstrList * sl, int msz); + + Ensure that there is memory for at least msz number of entries for the + list. + + .......................................................................... + + extern int bstrListAllocMin (struct bstrList * sl, int msz); + + Try to allocate the minimum amount of memory for the list to include at + least msz entries or sl->qty whichever is greater. + + .......................................................................... + + extern struct bstrList * bsplit (bstring str, unsigned char splitChar); + + Create an array of sequential substrings from str divided by the + character splitChar. Successive occurrences of the splitChar will be + divided by empty bstring entries, following the semantics from the Python + programming language. To reclaim the memory from this output structure, + bstrListDestroy () should be called. See bstrListCreate() above for + structure of struct bstrList. + + .......................................................................... + + extern struct bstrList * bsplits (bstring str, const_bstring splitStr); + + Create an array of sequential substrings from str divided by any + character contained in splitStr. An empty splitStr causes a single entry + bstrList containing a copy of str to be returned. See bstrListCreate() + above for structure of struct bstrList. + + .......................................................................... + + extern struct bstrList * bsplitstr (bstring str, const_bstring splitStr); + + Create an array of sequential substrings from str divided by the entire + substring splitStr. An empty splitStr causes a single entry bstrList + containing a copy of str to be returned. See bstrListCreate() above for + structure of struct bstrList. + + .......................................................................... + + extern bstring bjoin (const struct bstrList * bl, const_bstring sep); + + Join the entries of a bstrList into one bstring by sequentially + concatenating them with the sep bstring in between. If sep is NULL, it + is treated as if it were the empty bstring. Note that: + + bjoin (l = bsplit (b, s->data[0]), s); + + should result in a copy of b, if s->slen is 1. If there is an error NULL + is returned, otherwise a bstring with the correct result is returned. + See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + bstring bjoinblk (const struct bstrList * bl, void * blk, int len); + + Join the entries of a bstrList into one bstring by sequentially + concatenating them with the content from blk for length len in between. + If there is an error NULL is returned, otherwise a bstring with the + correct result is returned. + + .......................................................................... + + extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by the character splitChar. The parm passed to + bsplitcb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitcb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitcb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitcb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplit that is + abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by any of the characters in splitStr. An empty + splitStr causes the whole str to be iterated once. The parm passed to + bsplitcb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitscb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitscb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitscb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplits that + is abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by the entire substring splitStr. An empty splitStr + causes each character of str to be iterated. The parm passed to bsplitcb + is passed on to cb. If the function cb returns a value < 0, then further + iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitstrcb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitstrcb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitscb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplitstr that + is abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern bstring bformat (const char * fmt, ...); + + Takes the same parameters as printf (), but rather than outputting + results to stdio, it forms a bstring which contains what would have been + output. Note that if there is an early generation of a '\0' character, + the bstring will be truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + b0 = bformat ("Hello, %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bformat function is not present. + + .......................................................................... + + extern int bformata (bstring b, const char * fmt, ...); + + In addition to the initial output buffer b, bformata takes the same + parameters as printf (), but rather than outputting results to stdio, it + appends the results to the initial bstring parameter. Note that if + there is an early generation of a '\0' character, the bstring will be + truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + bformata (b0 = bfromcstr ("Hello"), ", %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bformata function is not present. + + .......................................................................... + + extern int bassignformat (bstring b, const char * fmt, ...); + + After the first parameter, it takes the same parameters as printf (), but + rather than outputting results to stdio, it outputs the results to + the bstring parameter b. Note that if there is an early generation of a + '\0' character, the bstring will be truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + bassignformat (b0 = bfromcstr ("Hello"), ", %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bassignformat function is not present. + + .......................................................................... + + extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); + + The bvcformata function formats data under control of the format control + string fmt and attempts to append the result to b. The fmt parameter is + the same as that of the printf function. The variable argument list is + replaced with arglist, which has been initialized by the va_start macro. + The size of the output is upper bounded by count. If the required output + exceeds count, the string b is not augmented with any contents and a value + below BSTR_ERR is returned. If a value below -count is returned then it + is recommended that the negative of this value be used as an update to the + count in a subsequent pass. On other errors, such as running out of + memory, parameter errors or numeric wrap around BSTR_ERR is returned. + BSTR_OK is returned when the output is successfully generated and + appended to b. + + Note: There is no sanity checking of arglist, and this function is + destructive of the contents of b from the b->slen point onward. If there + is an early generation of a '\0' character, the bstring will be truncated + to this end point. + + Although this function is part of the external API for Bstrlib, the + interface and semantics (length limitations, and unusual return codes) + are fairly atypical. The real purpose for this function is to provide an + engine for the bvformata macro. + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bvcformata function is not present. + + .......................................................................... + + extern bstring bread (bNread readPtr, void * parm); + typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, + void *parm); + + Read an entire stream into a bstring, verbatum. The readPtr function + pointer is compatible with fread sematics, except that it need not obtain + the stream data from a file. The intention is that parm would contain + the stream data context/state required (similar to the role of the FILE* + I/O stream parameter of fread.) + + Abstracting the block read function allows for block devices other than + file streams to be read if desired. Note that there is an ANSI + compatibility issue if "fread" is used directly; see the ANSI issues + section below. + + .......................................................................... + + extern int breada (bstring b, bNread readPtr, void * parm); + + Read an entire stream and append it to a bstring, verbatum. Behaves + like bread, except that it appends it results to the bstring b. + BSTR_ERR is returned on error, otherwise 0 is returned. + + .......................................................................... + + extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); + typedef int (* bNgetc) (void * parm); + + Read a bstring from a stream. As many bytes as is necessary are read + until the terminator is consumed or no more characters are available from + the stream. If read from the stream, the terminator character will be + appended to the end of the returned bstring. The getcPtr function must + have the same semantics as the fgetc C library function (i.e., returning + an integer whose value is negative when there are no more characters + available, otherwise the value of the next available unsigned character + from the stream.) The intention is that parm would contain the stream + data context/state required (similar to the role of the FILE* I/O stream + parameter of fgets.) If no characters are read, or there is some other + detectable error, NULL is returned. + + bgets will never call the getcPtr function more often than necessary to + construct its output (including a single call, if required, to determine + that the stream contains no more characters.) + + Abstracting the character stream function and terminator character allows + for different stream devices and string formats other than '\n' + terminated lines in a file if desired (consider \032 terminated email + messages, in a UNIX mailbox for example.) + + For files, this function can be used analogously as fgets as follows: + + fp = fopen ( ... ); + if (fp) b = bgets ((bNgetc) fgetc, fp, '\n'); + + (Note that only one terminator character can be used, and that '\0' is + not assumed to terminate the stream in addition to the terminator + character. This is consistent with the semantics of fgets.) + + .......................................................................... + + extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); + + Read from a stream and concatenate to a bstring. Behaves like bgets, + except that it appends it results to the bstring b. The value 1 is + returned if no characters are read before a negative result is returned + from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned + in other normal cases. + + .......................................................................... + + extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); + + Read from a stream and concatenate to a bstring. Behaves like bgets, + except that it assigns the results to the bstring b. The value 1 is + returned if no characters are read before a negative result is returned + from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned + in other normal cases. + + .......................................................................... + + extern struct bStream * bsopen (bNread readPtr, void * parm); + + Wrap a given open stream (described by a fread compatible function + pointer and stream handle) into an open bStream suitable for the bstring + library streaming functions. + + .......................................................................... + + extern void * bsclose (struct bStream * s); + + Close the bStream, and return the handle to the stream that was + originally used to open the given stream. If s is NULL or detectably + invalid, NULL will be returned. + + .......................................................................... + + extern int bsbufflength (struct bStream * s, int sz); + + Set the length of the buffer used by the bStream. If sz is the macro + BSTR_BS_BUFF_LENGTH_GET (which is 0), the length is not set. If s is + NULL or sz is negative, the function will return with BSTR_ERR, otherwise + this function returns with the previous length. + + .......................................................................... + + extern int bsreadln (bstring r, struct bStream * s, char terminator); + + Read a bstring terminated by the terminator character or the end of the + stream from the bStream (s) and return it into the parameter r. The + matched terminator, if found, appears at the end of the line read. If + the stream has been exhausted of all available data, before any can be + read, BSTR_ERR is returned. This function may read additional characters + into the stream buffer from the core stream that are not returned, but + will be retained for subsequent read operations. When reading from high + speed streams, this function can perform significantly faster than bgets. + + .......................................................................... + + extern int bsreadlna (bstring r, struct bStream * s, char terminator); + + Read a bstring terminated by the terminator character or the end of the + stream from the bStream (s) and concatenate it to the parameter r. The + matched terminator, if found, appears at the end of the line read. If + the stream has been exhausted of all available data, before any can be + read, BSTR_ERR is returned. This function may read additional characters + into the stream buffer from the core stream that are not returned, but + will be retained for subsequent read operations. When reading from high + speed streams, this function can perform significantly faster than bgets. + + .......................................................................... + + extern int bsreadlns (bstring r, struct bStream * s, bstring terminators); + + Read a bstring terminated by any character in the terminators bstring or + the end of the stream from the bStream (s) and return it into the + parameter r. This function may read additional characters from the core + stream that are not returned, but will be retained for subsequent read + operations. + + .......................................................................... + + extern int bsreadlnsa (bstring r, struct bStream * s, bstring terminators); + + Read a bstring terminated by any character in the terminators bstring or + the end of the stream from the bStream (s) and concatenate it to the + parameter r. If the stream has been exhausted of all available data, + before any can be read, BSTR_ERR is returned. This function may read + additional characters from the core stream that are not returned, but + will be retained for subsequent read operations. + + .......................................................................... + + extern int bsread (bstring r, struct bStream * s, int n); + + Read a bstring of length n (or, if it is fewer, as many bytes as is + remaining) from the bStream. This function will read the minimum + required number of additional characters from the core stream. When the + stream is at the end of the file BSTR_ERR is returned, otherwise BSTR_OK + is returned. + + .......................................................................... + + extern int bsreada (bstring r, struct bStream * s, int n); + + Read a bstring of length n (or, if it is fewer, as many bytes as is + remaining) from the bStream and concatenate it to the parameter r. This + function will read the minimum required number of additional characters + from the core stream. When the stream is at the end of the file BSTR_ERR + is returned, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bsunread (struct bStream * s, const_bstring b); + + Insert a bstring into the bStream at the current position. These + characters will be read prior to those that actually come from the core + stream. + + .......................................................................... + + extern int bspeek (bstring r, const struct bStream * s); + + Return the number of currently buffered characters from the bStream that + will be read prior to reads from the core stream, and append it to the + the parameter r. + + .......................................................................... + + extern int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); + + Iterate the set of disjoint sequential substrings over the stream s + divided by any character from the bstring splitStr. The parm passed to + bssplitscb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this return value is returned by + bssplitscb. + + Note: At the point of calling the cb function, the bStream pointer is + pointed exactly at the position right after having read the split + character. The cb function can act on the stream by causing the bStream + pointer to move, and bssplitscb will continue by starting the next split + at the position of the pointer after the return from cb. + + However, if the cb causes the bStream s to be destroyed then the cb must + return with a negative value, otherwise bssplitscb will continue in an + undefined manner. + + This function is provided as way to incrementally parse through a file + or other generic stream that in total size may otherwise exceed the + practical or desired memory available. As with the other split callback + based functions this is abortable and does not impose additional memory + allocation. + + .......................................................................... + + extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); + + Iterate the set of disjoint sequential substrings over the stream s + divided by the entire substring splitStr. The parm passed to + bssplitstrcb is passed on to cb. If the function cb returns a + value < 0, then further iterating is halted and this return value is + returned by bssplitstrcb. + + Note: At the point of calling the cb function, the bStream pointer is + pointed exactly at the position right after having read the split + character. The cb function can act on the stream by causing the bStream + pointer to move, and bssplitstrcb will continue by starting the next + split at the position of the pointer after the return from cb. + + However, if the cb causes the bStream s to be destroyed then the cb must + return with a negative value, otherwise bssplitscb will continue in an + undefined manner. + + This function is provided as way to incrementally parse through a file + or other generic stream that in total size may otherwise exceed the + practical or desired memory available. As with the other split callback + based functions this is abortable and does not impose additional memory + allocation. + + .......................................................................... + + extern int bseof (const struct bStream * s); + + Return the defacto "EOF" (end of file) state of a stream (1 if the + bStream is in an EOF state, 0 if not, and BSTR_ERR if stream is closed or + detectably erroneous.) When the readPtr callback returns a value <= 0 + the stream reaches its "EOF" state. Note that bunread with non-empty + content will essentially turn off this state, and the stream will not be + in its "EOF" state so long as its possible to read more data out of it. + + Also note that the semantics of bseof() are slightly different from + something like feof(). I.e., reaching the end of the stream does not + necessarily guarantee that bseof() will return with a value indicating + that this has happened. bseof() will only return indicating that it has + reached the "EOF" and an attempt has been made to read past the end of + the bStream. + +The macros +---------- + + The macros described below are shown in a prototype form indicating their + intended usage. Note that the parameters passed to these macros will be + referenced multiple times. As with all macros, programmer care is + required to guard against unintended side effects. + + int blengthe (const_bstring b, int err); + + Returns the length of the bstring. If the bstring is NULL err is + returned. + + .......................................................................... + + int blength (const_bstring b); + + Returns the length of the bstring. If the bstring is NULL, the length + returned is 0. + + .......................................................................... + + int bchare (const_bstring b, int p, int c); + + Returns the p'th character of the bstring b. If the position p refers to + a position that does not exist in the bstring or the bstring is NULL, + then c is returned. + + .......................................................................... + + char bchar (const_bstring b, int p); + + Returns the p'th character of the bstring b. If the position p refers to + a position that does not exist in the bstring or the bstring is NULL, + then '\0' is returned. + + .......................................................................... + + char * bdatae (bstring b, char * err); + + Returns the char * data portion of the bstring b. If b is NULL, err is + returned. + + .......................................................................... + + char * bdata (bstring b); + + Returns the char * data portion of the bstring b. If b is NULL, NULL is + returned. + + .......................................................................... + + char * bdataofse (bstring b, int ofs, char * err); + + Returns the char * data portion of the bstring b offset by ofs. If b is + NULL, err is returned. + + .......................................................................... + + char * bdataofs (bstring b, int ofs); + + Returns the char * data portion of the bstring b offset by ofs. If b is + NULL, NULL is returned. + + .......................................................................... + + struct tagbstring var = bsStatic ("..."); + + The bsStatic macro allows for static declarations of literal string + constants as struct tagbstring structures. The resulting tagbstring does + not need to be freed or destroyed. Note that this macro is only well + defined for string literal arguments. For more general string pointers, + use the btfromcstr macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + <- bsStaticBlkParms ("...") + + The bsStaticBlkParms macro emits a pair of comma seperated parameters + corresponding to the block parameters for the block functions in Bstrlib + (i.e., blk2bstr, bcatblk, blk2tbstr, bisstemeqblk, bisstemeqcaselessblk.) + Note that this macro is only well defined for string literal arguments. + + Examples: + + bstring b = blk2bstr (bsStaticBlkParms ("Fast init. ")); + bcatblk (b, bsStaticBlkParms ("No frills fast concatenation.")); + + These are faster than using bfromcstr() and bcatcstr() respectively + because the length of the inline string is known as a compile time + constant. Also note that seperate struct tagbstring declarations for + holding the output of a bsStatic() macro are not required. + + .......................................................................... + + void btfromcstr (struct tagbstring& t, const char * s); + + Fill in the tagbstring t with the '\0' terminated char buffer s. This + action is purely reference oriented; no memory management is done. The + data member is just assigned s, and slen is assigned the strlen of s. + The s parameter is accessed exactly once in this macro. + + The resulting struct tagbstring is initially write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoke the + bwriteallow on this struct tagbstring to make it writeable (though this + requires that s be obtained from a function compatible with malloc.) + + .......................................................................... + + void btfromblk (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len. This + action is purely reference oriented; no memory management is done. The + data member of t is just assigned s, and slen is assigned len. Note that + the buffer is not appended with a '\0' character. The s and len + parameters are accessed exactly once each in this macro. + + The resulting struct tagbstring is initially write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoke the + bwriteallow on this struct tagbstring to make it writeable (though this + requires that s be obtained from a function compatible with malloc.) + + .......................................................................... + + void btfromblkltrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been left trimmed. This action is purely reference oriented; no + memory management is done. The data member of t is just assigned to a + pointer inside the buffer s. Note that the buffer is not appended with a + '\0' character. The s and len parameters are accessed exactly once each + in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void btfromblkrtrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been right trimmed. This action is purely reference oriented; no + memory management is done. The data member of t is just assigned to a + pointer inside the buffer s. Note that the buffer is not appended with a + '\0' character. The s and len parameters are accessed exactly once each + in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void btfromblktrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been left and right trimmed. This action is purely reference + oriented; no memory management is done. The data member of t is just + assigned to a pointer inside the buffer s. Note that the buffer is not + appended with a '\0' character. The s and len parameters are accessed + exactly once each in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void bmid2tbstr (struct tagbstring& t, bstring b, int pos, int len); + + Fill the tagbstring t with the substring from b, starting from position + pos with a length len. The segment is clamped by the boundaries of + the bstring b. This action is purely reference oriented; no memory + management is done. Note that the buffer is not appended with a '\0' + character. Note that the t parameter to this macro may be accessed + multiple times. Note that the contents of t will become undefined + if the contents of b change or are destroyed. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoking the + bwriteallow macro on this struct tagbstring will have no effect. + + .......................................................................... + + bstring bfromStatic("..."); + + Allocate a bstring with the contents of a string literal. Returns + NULL if an error has occurred (ran out of memory). The string literal + parameter is enforced as literal at compile time. + + .......................................................................... + + int bcatStatic (bstring b, "..."); + + Append a string literal to bstring b. Returns 0 if successful, or + BSTR_ERR if some error has occurred. The string literal parameter is + enforced as literal at compile time. + + .......................................................................... + + int binsertStatic (bstring s1, int pos, " ... ", char fill); + + Inserts the string literal into s1 at position pos. If the position pos + is past the end of s1, then the character "fill" is appended as necessary + to make up the gap between the end of s1 and pos. The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + int bassignStatic (bstring b, " ... "); + + Assign the contents of a string literal to the bstring b. The string + literal parameter is enforced as literal at compile time. + + .......................................................................... + + int biseqStatic (const_bstring b, " ... "); + + Compare the string b with the string literal. If the content differs, 0 + is returned, if the content is the same, 1 is returned, if there is an + error, -1 is returned. If the length of the strings are different, this + function is O(1). '\0' characters are not treated in any special way. + + .......................................................................... + + int biseqcaselessStatic (const_bstring b, " ... "); + + Compare content of b and the string literal for equality without + differentiating between character case. If the content differs other + than in case, 0 is returned, if, ignoring case, the content is the same, + 1 is returned, if there is an error, -1 is returned. If the length of + the strings are different, this function is O(1). '\0' characters are + not treated in any special way. + + .......................................................................... + + int bisstemeqStatic (bstring b, " ... "); + + Compare beginning of bstring b with a string literal for equality. If + the beginning of b differs from the memory block (or if b is too short), + 0 is returned, if the bstrings are the same, 1 is returned, if there is + an error, -1 is returned. The string literal parameter is enforced as + literal at compile time. + + .......................................................................... + + int bisstemeqcaselessStatic (bstring b, " ... "); + + Compare beginning of bstring b with a string literal without + differentiating between case for equality. If the beginning of b differs + from the memory block other than in case (or if b is too short), 0 is + returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. The string literal parameter is enforced as + literal at compile time. + + .......................................................................... + + bstring bjoinStatic (const struct bstrList * bl, " ... "); + + Join the entries of a bstrList into one bstring by sequentially + concatenating them with the string literal in between. If there is an + error NULL is returned, otherwise a bstring with the correct result is + returned. See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + void bvformata (int& ret, bstring b, const char * format, lastarg); + + Append the bstring b with printf like formatting with the format control + string, and the arguments taken from the ... list of arguments after + lastarg passed to the containing function. If the containing function + does not have ... parameters or lastarg is not the last named parameter + before the ... then the results are undefined. If successful, the + results are appended to b and BSTR_OK is assigned to ret. Otherwise + BSTR_ERR is assigned to ret. + + Example: + + void dbgerror (FILE * fp, const char * fmt, ...) { + int ret; + bstring b; + bvformata (ret, b = bfromcstr ("DBG: "), fmt, fmt); + if (BSTR_OK == ret) fputs ((char *) bdata (b), fp); + bdestroy (b); + } + + Note that if the BSTRLIB_NOVSNP macro was set when bstrlib had been + compiled the bvformata macro will not link properly. If the + BSTRLIB_NOVSNP macro has been set, the bvformata macro will not be + available. + + .......................................................................... + + void bwriteprotect (struct tagbstring& t); + + Disallow bstring from being written to via the bstrlib API. Attempts to + write to the resulting tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. + + Note: bstrings which are write protected cannot be destroyed via bdestroy. + + Note to C++ users: Setting a CBString as write protected will not prevent + it from being destroyed by the destructor. + + .......................................................................... + + void bwriteallow (struct tagbstring& t); + + Allow bstring to be written to via the bstrlib API. Note that such an + action makes the bstring both writable and destroyable. If the bstring is + not legitimately writable (as is the case for struct tagbstrings + initialized with a bsStatic value), the results of this are undefined. + + Note that invoking the bwriteallow macro may increase the number of + reallocs by one more than necessary for every call to bwriteallow + interleaved with any bstring API which writes to this bstring. + + .......................................................................... + + int biswriteprotected (struct tagbstring& t); + + Returns 1 if the bstring is write protected, otherwise 0 is returned. + +=============================================================================== + +Unicode functions +----------------- + + The two modules utf8util.c and buniutil.c implement basic functions for + parsing and collecting Unicode data in the UTF8 format. Unicode is + described by a sequence of "code points" which are values between 0 and + 1114111 inclusive mapped to symbol content corresponding to nearly all + the standardized scripts of the world. + + The semantics of Unicode code points is varied and complicated. The + base support of the better string library does not attempt to perform + any interpretation of these code points. The better string library + solely provides support for iterating through unicode code points, + appending and extracting code points to and from bstrings, and parsing + UTF8 and UTF16 from raw data. + + The types cpUcs4 and cpUcs2 respectively are defined as 4 byte and 2 byte + encoding formats corresponding to UCS4 and UCS2 respectively. To test + if a raw code point is valid, the macro isLegalUnicodeCodePoint() has + been defined. The utf8 iterator is defined by struct utf8Iterator. To + test if the iterator has more code points to walk through the macro + utf8IteratorNoMore() has been defined. + + To use these functions compile and link utf8util.c and buniutil.c + + .......................................................................... + + extern void utf8IteratorInit (struct utf8Iterator* iter, + unsigned char* data, int slen); + + Initialize a unicode utf8 iterator to traverse an array of utf8 encoded + code points pointed to by data, with length slen from the start. The + iterator iter is only valid for as long as the array it is pointed to + is valid and not modified. + + .......................................................................... + + extern void utf8IteratorUninit (struct utf8Iterator* iter); + + Invalidate utf8 iterator. After calling this the iterator iter, should + yield false when passed to the utf8IteratorNoMore() macro. + + .......................................................................... + + extern cpUcs4 utf8IteratorGetNextCodePoint (struct utf8Iterator* iter, + cpUcs4 errCh); + + Parse code point the iterator is pointing at and advance the iterator to + the next code point. If the iterator was pointing at a valid code point + the code point is returned, otherwise, errCh will be returned. + + .......................................................................... + + extern cpUcs4 utf8IteratorGetCurrCodePoint (struct utf8Iterator* iter, + cpUcs4 errCh); + + Parse code point the iterator is pointing at. If the iterator was + pointing at a valid code point the code point is returned, otherwise, + errCh will be returned. + + .......................................................................... + + extern int utf8ScanBackwardsForCodePoint (unsigned char* msg, int len, + int pos, cpUcs4* out); + + From the position "pos" in the array msg of length len, search for the + last position before or at pos where from which a valid Unicode code + point can be parsed. If such an offset is found it is returned otherwise + a negative value is returned. The code point parsed is put into *out if + it is not NULL. + + .......................................................................... + + extern int buIsUTF8Content (const_bstring bu); + + Scan a bstring and determine if it is made entirely of unicode code + valid points. If it is, 1 is returned, otherwise 0 is returned. + + .......................................................................... + + extern int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, + cpUcs4 errCh); + + Append the code points passed in the UCS4 format (raw numbers) in the + array bu of length len. Any unparsable characters are replaced by errCh. + If errCh is not a valid Unicode code point, then parsing errors will cause + BSTR_ERR to be returned. + + .......................................................................... + + extern int buGetBlkUTF16 (cpUcs2* ucs2, int len, cpUcs4 errCh, + const_bstring bu, int pos); + + Convert a string of UTF8 codepoints (bu), skipping the first pos, into a + sequence of UTF16 encoded code points. Returns the number of UCS2 16-bit + words written to the output. No more than len words are written to the + target array ucs2. If any code point in bu is unparsable, it will be + translated to errCh. + + .......................................................................... + + extern int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len, + cpUcs2* bom, cpUcs4 errCh); + + Append an array of UCS2 code points (utf16) to UTF8 codepoints (bu). Any + invalid code point is replaced by errCh. If errCh is itself not a + valid code point, then this translation will halt upon the first error + and return BSTR_ERR. Otherwise BSTR_OK is returned. If a byte order mark + has been previously read, it may be passed in as bom, otherwise if *bom is + set to 0, it will be filled in with the BOM as read from the first + character if it is a BOM. + +=============================================================================== + +The bstest module +----------------- + +The bstest module is just a unit test for the bstrlib module. For correct +implementations of bstrlib, it should execute with 0 failures being reported. +This test should be utilized if modifications/customizations to bstrlib have +been performed. It tests each core bstrlib function with bstrings of every +mode (read-only, NULL, static and mutable) and ensures that the expected +semantics are observed (including results that should indicate an error). It +also tests for aliasing support. Passing bstest is a necessary but not a +sufficient condition for ensuring the correctness of the bstrlib module. + + +The test module +--------------- + +The test module is just a unit test for the bstrwrap module. For correct +implementations of bstrwrap, it should execute with 0 failures being +reported. This test should be utilized if modifications/customizations to +bstrwrap have been performed. It tests each core bstrwrap function with +CBStrings write protected or not and ensures that the expected semantics are +observed (including expected exceptions.) Note that exceptions cannot be +disabled to run this test. Passing test is a necessary but not a sufficient +condition for ensuring the correctness of the bstrwrap module. + +=============================================================================== + +Using Bstring and CBString as an alternative to the C library +------------------------------------------------------------- + +First let us give a table of C library functions and the alternative bstring +functions and CBString methods that should be used instead of them. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +gets bgets ::gets +strcpy bassign = operator +strncpy bassignmidstr ::midstr +strcat bconcat += operator +strncat bconcat + btrunc += operator + ::trunc +strtok bsplit, bsplits ::split +sprintf b(assign)format ::format +snprintf b(assign)format + btrunc ::format + ::trunc +vsprintf bvformata bvformata + +vsnprintf bvformata + btrunc bvformata + btrunc +vfprintf bvformata + fputs use bvformata + fputs +strcmp biseq, bstrcmp comparison operators. +strncmp bstrncmp, memcmp bstrncmp, memcmp +strlen ->slen, blength ::length +strdup bstrcpy constructor +strset bpattern ::fill +strstr binstr ::find +strpbrk binchr ::findchr +stricmp bstricmp cast & use bstricmp +strlwr btolower cast & use btolower +strupr btoupper cast & use btoupper +strrev bReverse (aux module) cast & use bReverse +strchr bstrchr cast & use bstrchr +strspnp use strspn use strspn +ungetc bsunread bsunread + +The top 9 C functions listed here are troublesome in that they impose memory +management in the calling function. The Bstring and CBstring interfaces have +built-in memory management, so there is far less code with far less potential +for buffer overrun problems. strtok can only be reliably called as a "leaf" +calculation, since it (quite bizarrely) maintains hidden internal state. And +gets is well known to be broken no matter what. The Bstrlib alternatives do +not suffer from those sorts of problems. + +The substitute for strncat can be performed with higher performance by using +the blk2tbstr macro to create a presized second operand for bconcat. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +strspn strspn acceptable strspn acceptable +strcspn strcspn acceptable strcspn acceptable +strnset strnset acceptable strnset acceptable +printf printf acceptable printf acceptable +puts puts acceptable puts acceptable +fprintf fprintf acceptable fprintf acceptable +fputs fputs acceptable fputs acceptable +memcmp memcmp acceptable memcmp acceptable + +Remember that Bstring (and CBstring) functions will automatically append the +'\0' character to the character data buffer. So by simply accessing the data +buffer directly, ordinary C string library functions can be called directly +on them. Note that bstrcmp is not the same as memcmp in exactly the same way +that strcmp is not the same as memcmp. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +fread balloc + fread ::alloc + fread +fgets balloc + fgets ::alloc + fgets + +These are odd ones because of the exact sizing of the buffer required. The +Bstring and CBString alternatives requires that the buffers are forced to +hold at least the prescribed length, then just use fread or fgets directly. +However, typically the automatic memory management of Bstring and CBstring +will make the typical use of fgets and fread to read specifically sized +strings unnecessary. + +Implementation Choices +---------------------- + +Overhead: +......... + +The bstring library has more overhead versus straight char buffers for most +functions. This overhead is essentially just the memory management and +string header allocation. This overhead usually only shows up for small +string manipulations. The performance loss has to be considered in +light of the following: + +1) What would be the performance loss of trying to write this management + code in one's own application? +2) Since the bstring library source code is given, a sufficiently powerful + modern inlining globally optimizing compiler can remove function call + overhead. + +Since the data type is exposed, a developer can replace any unsatisfactory +function with their own inline implementation. And that is besides the main +point of what the better string library is mainly meant to provide. Any +overhead lost has to be compared against the value of the safe abstraction +for coupling memory management and string functionality. + +Performance of the C interface: +............................... + +The algorithms used have performance advantages versus the analogous C +library functions. For example: + +1. bfromcstr/blk2str/bstrcpy versus strcpy/strdup. By using memmove instead + of strcpy, the break condition of the copy loop is based on an independent + counter (that should be allocated in a register) rather than having to + check the results of the load. Modern out-of-order executing CPUs can + parallelize the final branch mis-predict penality with the loading of the + source string. Some CPUs will also tend to have better built-in hardware + support for counted memory moves than load-compare-store. (This is a + minor, but non-zero gain.) +2. biseq versus strcmp. If the strings are unequal in length, bsiseq will + return in O(1) time. If the strings are aliased, or have aliased data + buffers, biseq will return in O(1) time. strcmp will always be O(k), + where k is the length of the common prefix or the whole string if they are + identical. +3. ->slen versus strlen. ->slen is obviously always O(1), while strlen is + always O(n) where n is the length of the string. +4. bconcat versus strcat. Both rely on precomputing the length of the + destination string argument, which will favor the bstring library. On + iterated concatenations the performance difference can be enormous. +5. bsreadln versus fgets. The bsreadln function reads large blocks at a time + from the given stream, then parses out lines from the buffers directly. + Some C libraries will implement fgets as a loop over single fgetc calls. + Testing indicates that the bsreadln approach can be several times faster + for fast stream devices (such as a file that has been entirely cached.) +6. bsplits/bsplitscb versus strspn. Accelerators for the set of match + characters are generated only once. +7. binstr versus strstr. The binstr implementation unrolls the loops to + help reduce loop overhead. This will matter if the target string is + long and source string is not found very early in the target string. + With strstr, while it is possible to unroll the source contents, it is + not possible to do so with the destination contents in a way that is + effective because every destination character must be tested against + '\0' before proceeding to the next character. +8. bReverse versus strrev. The C function must find the end of the string + first before swaping character pairs. +9. bstrrchr versus no comparable C function. Its not hard to write some C + code to search for a character from the end going backwards. But there + is no way to do this without computing the length of the string with + strlen. + +Practical testing indicates that in general Bstrlib is never signifcantly +slower than the C library for common operations, while very often having a +performance advantage that ranges from significant to massive. Even for +functions like b(n)inchr versus str(c)spn() (where, in theory, there is no +advantage for the Bstrlib architecture) the performance of Bstrlib is vastly +superior to most tested C library implementations. + +Some of Bstrlib's extra functionality also lead to inevitable performance +advantages over typical C solutions. For example, using the blk2tbstr macro, +one can (in O(1) time) generate an internal substring by reference while not +disturbing the original string. If disturbing the original string is not an +option, typically, a comparable char * solution would have to make a copy of +the substring to provide similar functionality. Another example is reverse +character set scanning -- the str(c)spn functions only scan in a forward +direction which can complicate some parsing algorithms. + +Where high performance char * based algorithms are available, Bstrlib can +still leverage them by accessing the ->data field on bstrings. So +realistically Bstrlib can never be significantly slower than any standard +'\0' terminated char * based solutions. + +Performance of the C++ interface: +................................. + +The C++ interface has been designed with an emphasis on abstraction and safety +first. However, since it is substantially a wrapper for the C bstring +functions, for longer strings the performance comments described in the +"Performance of the C interface" section above still apply. Note that the +(CBString *) type can be directly cast to a (bstring) type, and passed as +parameters to the C functions (though a CBString must never be passed to +bdestroy.) + +Probably the most controversial choice is performing full bounds checking on +the [] operator. This decision was made because 1) the fast alternative of +not bounds checking is still available by first casting the CBString to a +(const char *) buffer or to a (struct tagbstring) then derefencing .data and +2) because the lack of bounds checking is seen as one of the main weaknesses +of C/C++ versus other languages. This check being done on every access leads +to individual character extraction being actually slower than other languages +in this one respect (other language's compilers will normally dedicate more +resources on hoisting or removing bounds checking as necessary) but otherwise +bring C++ up to the level of other languages in terms of functionality. + +It is common for other C++ libraries to leverage the abstractions provided by +C++ to use reference counting and "copy on write" policies. While these +techniques can speed up some scenarios, they impose a problem with respect to +thread safety. bstrings and CBStrings can be properly protected with +"per-object" mutexes, meaning that two bstrlib calls can be made and execute +simultaneously, so long as the bstrings and CBstrings are distinct. With a +reference count and alias before copy on write policy, global mutexes are +required that prevent multiple calls to the strings library to execute +simultaneously regardless of whether or not the strings represent the same +string. + +One interesting trade off in CBString is that the default constructor is not +trivial. I.e., it always prepares a ready to use memory buffer. The purpose +is to ensure that there is a uniform internal composition for any functioning +CBString that is compatible with bstrings. It also means that the other +methods in the class are not forced to perform "late initialization" checks. +In the end it means that construction of CBStrings are slower than other +comparable C++ string classes. Initial testing, however, indicates that +CBString outperforms std::string and MFC's CString, for example, in all other +operations. So to work around this weakness it is recommended that CBString +declarations be pushed outside of inner loops. + +Practical testing indicates that with the exception of the caveats given +above (constructors and safe index character manipulations) the C++ API for +Bstrlib generally outperforms popular standard C++ string classes. Amongst +the standard libraries and compilers, the quality of concatenation operations +varies wildly and very little care has gone into search functions. Bstrlib +dominates those performance benchmarks. + +Memory management: +.................. + +The bstring functions which write and modify bstrings will automatically +reallocate the backing memory for the char buffer whenever it is required to +grow. The algorithm for resizing chosen is to snap up to sizes that are a +power of two which are sufficient to hold the intended new size. Memory +reallocation is not performed when the required size of the buffer is +decreased. This behavior can be relied on, and is necessary to make the +behaviour of balloc deterministic. This trades off additional memory usage +for decreasing the frequency for required reallocations: + +1. For any bstring whose size never exceeds n, its buffer is not ever + reallocated more than log_2(n) times for its lifetime. +2. For any bstring whose size never exceeds n, its buffer is never more than + 2*(n+1) in length. (The extra characters beyond 2*n are to allow for the + implicit '\0' which is always added by the bstring modifying functions.) + +Decreasing the buffer size when the string decreases in size would violate 1) +above and in real world case lead to pathological heap thrashing. Similarly, +allocating more tightly than "least power of 2 greater than necessary" would +lead to a violation of 1) and have the same potential for heap thrashing. + +Property 2) needs emphasizing. Although the memory allocated is always a +power of 2, for a bstring that grows linearly in size, its buffer memory also +grows linearly, not exponentially. The reason is that the amount of extra +space increases with each reallocation, which decreases the frequency of +future reallocations. + +Obviously, given that bstring writing functions may reallocate the data +buffer backing the target bstring, one should not attempt to cache the data +buffer address and use it after such bstring functions have been called. +This includes making reference struct tagbstrings which alias to a writable +bstring. + +balloc or bfromcstralloc can be used to preallocate the minimum amount of +space used for a given bstring. This will reduce even further the number of +times the data portion is reallocated. If the length of the string is never +more than one less than the memory length then there will be no further +reallocations. + +Note that invoking the bwriteallow macro may increase the number of reallocs +by one more than necessary for every call to bwriteallow interleaved with any +bstring API which writes to this bstring. + +The library does not use any mechanism for automatic clean up for the C API. +Thus explicit clean up via calls to bdestroy() are required to avoid memory +leaks. + +Constant and static tagbstrings: +................................ + +A struct tagbstring can be write protected from any bstrlib function using +the bwriteprotect macro. A write protected struct tagbstring can then be +reset to being writable via the bwriteallow macro. There is, of course, no +protection from attempts to directly access the bstring members. Modifying a +bstring which is write protected by direct access has undefined behavior. + +static struct tagbstrings can be declared via the bsStatic macro. They are +considered permanently unwritable. Such struct tagbstrings's are declared +such that attempts to write to it are not well defined. Invoking either +bwriteallow or bwriteprotect on static struct tagbstrings has no effect. + +struct tagbstring's initialized via btfromcstr or blk2tbstr are protected by +default but can be made writeable via the bwriteallow macro. If bwriteallow +is called on such struct tagbstring's, it is the programmer's responsibility +to ensure that: + +1) the buffer supplied was allocated from the heap. +2) bdestroy is not called on this tagbstring (unless the header itself has + also been allocated from the heap.) +3) free is called on the buffer to reclaim its memory. + +bwriteallow and bwriteprotect can be invoked on ordinary bstrings (they have +to be dereferenced with the (*) operator to get the levels of indirection +correct) to give them write protection. + +Buffer declaration: +................... + +The memory buffer is actually declared "unsigned char *" instead of "char *". +The reason for this is to trigger compiler warnings whenever uncasted char +buffers are assigned to the data portion of a bstring. This will draw more +diligent programmers into taking a second look at the code where they +have carelessly left off the typically required cast. (Research from +AT&T/Lucent indicates that additional programmer eyeballs is one of the most +effective mechanisms at ferreting out bugs.) + +Function pointers: +.................. + +The bgets, bread and bStream functions use function pointers to obtain +strings from data streams. The function pointer declarations have been +specifically chosen to be compatible with the fgetc and fread functions. +While this may seem to be a convoluted way of implementing fgets and fread +style functionality, it has been specifically designed this way to ensure +that there is no dependency on a single narrowly defined set of device +interfaces, such as just stream I/O. In the embedded world, its quite +possible to have environments where such interfaces may not exist in the +standard C library form. Furthermore, the generalization that this opens up +allows for more sophisticated uses for these functions (performing an fgets +like function on a socket, for example.) By using function pointers, it also +allows such abstract stream interfaces to be created using the bstring library +itself while not creating a circular dependency. + +Use of int's for sizes: +....................... + +This is just a recognition that 16bit platforms with requirements for strings +that are larger than 64K and 32bit+ platforms with requirements for strings +that are larger than 4GB are pretty marginal. The main focus is for 32bit +platforms, and emerging 64bit platforms with reasonable < 4GB string +requirements. Using ints allows for negative values which has meaning +internally to bstrlib. + +Semantic consideration: +....................... + +Certain care needs to be taken when copying and aliasing bstrings. A bstring +is essentially a pointer type which points to a multipart abstract data +structure. Thus usage, and lifetime of bstrings have semantics that follow +these considerations. For example: + + bstring a, b; + struct tagbstring t; + + a = bfromcstr("Hello"); /* Create new bstring and copy "Hello" into it. */ + b = a; /* Alias b to the contents of a. */ + t = *a; /* Create a current instance pseudo-alias of a. */ + bconcat (a, b); /* Double a and b, t is now undefined. */ + bdestroy (a); /* Destroy the contents of both a and b. */ + +Variables of type bstring are really just references that point to real +bstring objects. The equal operator (=) creates aliases, and the asterisk +dereference operator (*) creates a kind of alias to the current instance (which +is generally not useful for any purpose.) Using bstrcpy() is the correct way +of creating duplicate instances. The ampersand operator (&) is useful for +creating aliases to struct tagbstrings (remembering that constructed struct +tagbstrings are not writable by default.) + +CBStrings use complete copy semantics for the equal operator (=), and thus do +not have these sorts of issues. + +Debugging: +.......... + +Bstrings have a simple, exposed definition and construction, and the library +itself is open source. So most debugging is going to be fairly straight- +forward. But the memory for bstrings come from the heap, which can often be +corrupted indirectly, and it might not be obvious what has happened even from +direct examination of the contents in a debugger or a core dump. There are +some tools such as Purify, Insure++ and Electric Fence which can help solve +such problems, however another common approach is to directly instrument the +calls to malloc, realloc, calloc, free, memcpy, memmove and/or other calls +by overriding them with macro definitions. + +Although the user could hack on the Bstrlib sources directly as necessary to +perform such an instrumentation, Bstrlib comes with a built-in mechanism for +doing this. By defining the macro BSTRLIB_MEMORY_DEBUG and providing an +include file named memdbg.h this will force the core Bstrlib modules to +attempt to include this file. In such a file, macros could be defined which +overrides Bstrlib's useage of the C standard library. + +Rather than calling malloc, realloc, free, memcpy or memmove directly, Bstrlib +emits the macros bstr__alloc, bstr__realloc, bstr__free, bstr__memcpy and +bstr__memmove in their place respectively. By default these macros are simply +assigned to be equivalent to their corresponding C standard library function +call. However, if they are given earlier macro definitions (via the back +door include file) they will not be given their default definition. In this +way Bstrlib's interface to the standard library can be changed but without +having to directly redefine or link standard library symbols (both of which +are not strictly ANSI C compliant.) + +An example definition might include: + + #define bstr__alloc(sz) X_malloc ((sz), __LINE__, __FILE__) + +which might help contextualize heap entries in a debugging environment. + +The NULL parameter and sanity checking of bstrings is part of the Bstrlib +API, and thus Bstrlib itself does not present any different modes which would +correspond to "Debug" or "Release" modes. Bstrlib always contains mechanisms +which one might think of as debugging features, but retains the performance +and small memory footprint one would normally associate with release mode +code. + +Integration Microsoft's Visual Studio debugger: +............................................... + +Microsoft's Visual Studio debugger has a capability of customizable mouse +float over data type descriptions. This is accomplished by editting the +AUTOEXP.DAT file to include the following: + + ; new for CBString + tagbstring =slen= mlen= + Bstrlib::CBStringList =count= + +In Visual C++ 6.0 this file is located in the directory: + + C:\Program Files\Microsoft Visual Studio\Common\MSDev98\Bin + +and in Visual Studio .NET 2003 its located here: + + C:\Program Files\Microsoft Visual Studio .NET 2003\Common7\Packages\Debugger + +This will improve the ability of debugging with Bstrlib under Visual Studio. + +Security +-------- + +Bstrlib does not come with explicit security features outside of its fairly +comprehensive error detection, coupled with its strict semantic support. +That is to say that certain common security problems, such as buffer overrun, +constant overwrite, arbitrary truncation etc, are far less likely to happen +inadvertently. Where it does help, Bstrlib maximizes its advantage by +providing developers a simple adoption path that lets them leave less secure +string mechanisms behind. The library will not leave developers wanting, so +they will be less likely to add new code using a less secure string library +to add functionality that might be missing from Bstrlib. + +That said there are a number of security ideas not addressed by Bstrlib: + +1. Race condition exploitation (i.e., verifying a string's contents, then +raising the privilege level and execute it as a shell command as two +non-atomic steps) is well beyond the scope of what Bstrlib can provide. It +should be noted that MFC's built-in string mutex actually does not solve this +problem either -- it just removes immediate data corruption as a possible +outcome of such exploit attempts (it can be argued that this is worse, since +it will leave no trace of the exploitation). In general race conditions have +to be dealt with by careful design and implementation; it cannot be assisted +by a string library. + +2. Any kind of access control or security attributes to prevent usage in +dangerous interfaces such as system(). Perl includes a "trust" attribute +which can be endowed upon strings that are intended to be passed to such +dangerous interfaces. However, Perl's solution reflects its own limitations +-- notably that it is not a strongly typed language. In the example code for +Bstrlib, there is a module called taint.cpp. It demonstrates how to write a +simple wrapper class for managing "untainted" or trusted strings using the +type system to prevent questionable mixing of ordinary untrusted strings with +untainted ones then passing them to dangerous interfaces. In this way the +security correctness of the code reduces to auditing the direct usages of +dangerous interfaces or promotions of tainted strings to untainted ones. + +3. Encryption of string contents is way beyond the scope of Bstrlib. +Maintaining encrypted string contents in the futile hopes of thwarting things +like using system-level debuggers to examine sensitive string data is likely +to be a wasted effort (imagine a debugger that runs at a higher level than a +virtual processor where the application runs). For more standard encryption +usages, since the bstring contents are simply binary blocks of data, this +should pose no problem for usage with other standard encryption libraries. + +Compatibility +------------- + +The Better String Library is known to compile and function correctly with the +following compilers: + + - Microsoft Visual C++ + - Watcom C/C++ + - Intel's C/C++ compiler (Windows) + - The GNU C/C++ compiler (cygwin and Linux on PPC64) + - Borland C + - Turbo C + +Setting of configuration options should be unnecessary for these compilers +(unless exceptions are being disabled or STLport has been added to WATCOM +C/C++). Bstrlib has been developed with an emphasis on portability. As such +porting it to other compilers should be straight forward. This package +includes a porting guide (called porting.txt) which explains what issues may +exist for porting Bstrlib to different compilers and environments. + +ANSI issues +----------- + +1. The function pointer types bNgetc and bNread have prototypes which are very +similar to, but not exactly the same as fgetc and fread respectively. +Basically the FILE * parameter is replaced by void *. The purpose of this +was to allow one to create other functions with fgetc and fread like +semantics without being tied to ANSI C's file streaming mechanism. I.e., one +could very easily adapt it to sockets, or simply reading a block of memory, +or procedurally generated strings (for fractal generation, for example.) + +The problem is that invoking the functions (bNgetc)fgetc and (bNread)fread is +not technically legal in ANSI C. The reason being that the compiler is only +able to coerce the function pointers themselves into the target type, however +are unable to perform any cast (implicit or otherwise) on the parameters +passed once invoked. I.e., if internally void * and FILE * need some kind of +mechanical coercion, the compiler will not properly perform this conversion +and thus lead to undefined behavior. + +Apparently a platform from Data General called "Eclipse" and another from +Tandem called "NonStop" have a different representation for pointers to bytes +and pointers to words, for example, where coercion via casting is necessary. +(Actual confirmation of the existence of such machines is hard to come by, so +it is prudent to be skeptical about this information.) However, this is not +an issue for any known contemporary platforms. One may conclude that such +platforms are effectively apocryphal even if they do exist. + +To correctly work around this problem to the satisfaction of the ANSI +limitations, one needs to create wrapper functions for fgets and/or +fread with the prototypes of bNgetc and/or bNread respectively which performs +no other action other than to explicitely cast the void * parameter to a +FILE *, and simply pass the remaining parameters straight to the function +pointer call. + +The wrappers themselves are trivial: + + size_t freadWrap (void * buff, size_t esz, size_t eqty, void * parm) { + return fread (buff, esz, eqty, (FILE *) parm); + } + + int fgetcWrap (void * parm) { + return fgetc ((FILE *) parm); + } + +These have not been supplied in bstrlib or bstraux to prevent unnecessary +linking with file I/O functions. + +2. vsnprintf is not available on all compilers. Because of this, the bformat +and bformata functions (and format and formata methods) are not guaranteed to +work properly. For those compilers that don't have vsnprintf, the +BSTRLIB_NOVSNP macro should be set before compiling bstrlib, and the format +functions/method will be disabled. + +The more recent ANSI C standards have specified the required inclusion of a +vsnprintf function. + +3. The bstrlib function names are not unique in the first 6 characters. This +is only an issue for older C compiler environments which do not store more +than 6 characters for function names. + +4. The bsafe module defines macros and function names which are part of the +C library. This simply overrides the definition as expected on all platforms +tested, however it is not sanctioned by the ANSI standard. This module is +clearly optional and should be omitted on platforms which disallow its +undefined semantics. + +In practice the real issue is that some compilers in some modes of operation +can/will inline these standard library functions on a module by module basis +as they appear in each. The linker will thus have no opportunity to override +the implementation of these functions for those cases. This can lead to +inconsistent behaviour of the bsafe module on different platforms and +compilers. + +=============================================================================== + +Comparison with Microsoft's CString class +----------------------------------------- + +Although developed independently, CBStrings have very similar functionality to +Microsoft's CString class. However, the bstring library has significant +advantages over CString: + +1. Bstrlib is a C-library as well as a C++ library (using the C++ wrapper). + + - Thus it is compatible with more programming environments and + available to a wider population of programmers. + +2. The internal structure of a bstring is considered exposed. + + - A single contiguous block of data can be cut into read-only pieces by + simply creating headers, without allocating additional memory to create + reference copies of each of these sub-strings. + - In this way, using bstrings in a totally abstracted way becomes a choice + rather than an imposition. Further this choice can be made differently + at different layers of applications that use it. + +3. Static declaration support precludes the need for constructor + invocation. + + - Allows for static declarations of constant strings that has no + additional constructor overhead. + +4. Bstrlib is not attached to another library. + + - Bstrlib is designed to be easily plugged into any other library + collection, without dependencies on other libraries or paradigms (such + as "MFC".) + +The bstring library also comes with a few additional functions that are not +available in the CString class: + + - bsetstr + - bsplit + - bread + - breplace (this is different from CString::Replace()) + - Writable indexed characters (for example a[i]='x') + +Interestingly, although Microsoft did implement mid$(), left$() and right$() +functional analogues (these are functions from GWBASIC) they seem to have +forgotten that mid$() could be also used to write into the middle of a string. +This functionality exists in Bstrlib with the bsetstr() and breplace() +functions. + +Among the disadvantages of Bstrlib is that there is no special support for +localization or wide characters. Such things are considered beyond the scope +of what bstrings are trying to deliver. CString essentially supports the +older UCS-2 version of Unicode via widechar_t as an application-wide compile +time switch. + +CString's also use built-in mechanisms for ensuring thread safety under all +situations. While this makes writing thread safe code that much easier, this +built-in safety feature has a price -- the inner loops of each CString method +runs in its own critical section (grabbing and releasing a light weight mutex +on every operation.) The usual way to decrease the impact of a critical +section performance penalty is to amortize more operations per critical +section. But since the implementation of CStrings is fixed as a one critical +section per-operation cost, there is no way to leverage this common +performance enhancing idea. + +The search facilities in Bstrlib are comparable to those in MFC's CString +class, though it is missing locale specific collation. But because Bstrlib +is interoperable with C's char buffers, it will allow programmers to write +their own string searching mechanism (such as Boyer-Moore), or be able to +choose from a variety of available existing string searching libraries (such +as those for regular expressions) without difficulty. + +Microsoft used a very non-ANSI conforming trick in its implementation to +allow printf() to use the "%s" specifier to output a CString correctly. This +can be convenient, but it is inherently not portable. CBString requires an +explicit cast, while bstring requires the data member to be dereferenced. +Microsoft's own documentation recommends casting, instead of relying on this +feature. + +Comparison with C++'s std::string +--------------------------------- + +This is the C++ language's standard STL based string class. + +1. There is no C implementation. +2. The [] operator is not bounds checked. +3. Missing a lot of useful functions like printf-like formatting. +4. Some sub-standard std::string implementations (SGI) are necessarily unsafe + to use with multithreading. +5. Limited by STL's std::iostream which in turn is limited by ifstream which + can only take input from files. (Compare to CBStream's API which can take + abstracted input.) +6. Extremely uneven performance across implementations. + +Comparison with ISO C TR 24731 proposal +--------------------------------------- + +Following the ISO C99 standard, Microsoft has proposed a group of C library +extensions which are supposedly "safer and more secure". This proposal is +expected to be adopted by the ISO C standard which follows C99. + +The proposal reveals itself to be very similar to Microsoft's "StrSafe" +library. The functions are basically the same as other standard C library +string functions except that destination parameters are paired with an +additional length parameter of type rsize_t. rsize_t is the same as size_t, +however, the range is checked to make sure its between 1 and RSIZE_MAX. Like +Bstrlib, the functions perform a "parameter check". Unlike Bstrlib, when a +parameter check fails, rather than simply outputing accumulatable error +statuses, they call a user settable global error function handler, and upon +return of control performs no (additional) detrimental action. The proposal +covers basic string functions as well as a few non-reenterable functions +(asctime, ctime, and strtok). + +1. Still based solely on char * buffers (and therefore strlen() and strcat() + is still O(n), and there are no faster streq() comparison functions.) +2. No growable string semantics. +3. Requires manual buffer length synchronization in the source code. +4. No attempt to enhance functionality of the C library. +5. Introduces a new error scenario (strings exceeding RSIZE_MAX length). + +The hope is that by exposing the buffer length requirements there will be +fewer buffer overrun errors. However, the error modes are really just +transformed, rather than removed. The real problem of buffer overflows is +that they all happen as a result of erroneous programming. So forcing +programmers to manually deal with buffer limits, will make them more aware of +the problem but doesn't remove the possibility of erroneous programming. So +a programmer that erroneously mixes up the rsize_t parameters is no better off +from a programmer that introduces potential buffer overflows through other +more typical lapses. So at best this may reduce the rate of erroneous +programming, rather than making any attempt at removing failure modes. + +The error handler can discriminate between types of failures, but does not +take into account any callsite context. So the problem is that the error is +going to be manifest in a piece of code, but there is no pointer to that +code. It would seem that passing in the call site __FILE__, __LINE__ as +parameters would be very useful, but the API clearly doesn't support such a +thing (it would increase code bloat even more than the extra length +parameter does, and would require macro tricks to implement). + +The Bstrlib C API takes the position that error handling needs to be done at +the callsite, and just tries to make it as painless as possible. Furthermore, +error modes are removed by supporting auto-growing strings and aliasing. For +capturing errors in more central code fragments, Bstrlib's C++ API uses +exception handling extensively, which is superior to the leaf-only error +handler approach. + +Comparison with Managed String Library CERT proposal +---------------------------------------------------- + +The main webpage for the managed string library: +http://www.cert.org/secure-coding/managedstring.html + +Robert Seacord at CERT has proposed a C string library that he calls the +"Managed String Library" for C. Like Bstrlib, it introduces a new type +which is called a managed string. The structure of a managed string +(string_m) is like a struct tagbstring but missing the length field. This +internal structure is considered opaque. The length is, like the C standard +library, always computed on the fly by searching for a terminating NUL on +every operation that requires it. So it suffers from every performance +problem that the C standard library suffers from. Interoperating with C +string APIs (like printf, fopen, or anything else that takes a string +parameter) requires copying to additionally allocating buffers that have to +be manually freed -- this makes this library probably slower and more +cumbersome than any other string library in existence. + +The library gives a fully populated error status as the return value of every +string function. The hope is to be able to diagnose all problems +specifically from the return code alone. Comparing this to Bstrlib, which +aways returns one consistent error message, might make it seem that Bstrlib +would be harder to debug; but this is not true. With Bstrlib, if an error +occurs there is always enough information from just knowing there was an error +and examining the parameters to deduce exactly what kind of error has +happened. The managed string library thus gives up nested function calls +while achieving little benefit, while Bstrlib does not. + +One interesting feature that "managed strings" has is the idea of data +sanitization via character set whitelisting. That is to say, a globally +definable filter that makes any attempt to put invalid characters into strings +lead to an error and not modify the string. The author gives the following +example: + + // create valid char set + if (retValue = strcreate_m(&str1, "abc") ) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + if (retValue = setcharset(str1)) { + fprintf( + stderr, + "Error %d from setcharset().\n", + retValue + ); + } + if (retValue = strcreate_m(&str1, "aabbccabc")) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + // create string with invalid char set + if (retValue = strcreate_m(&str1, "abbccdabc")) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + +Which we can compare with a more Bstrlib way of doing things: + + bstring bCreateWithFilter (const char * cstr, const_bstring filter) { + bstring b = bfromcstr (cstr); + if (BSTR_ERR != bninchr (b, filter) && NULL != b) { + fprintf (stderr, "Filter violation.\n"); + bdestroy (b); + b = NULL; + } + return b; + } + + struct tagbstring charFilter = bsStatic ("abc"); + bstring str1 = bCreateWithFilter ("aabbccabc", &charFilter); + bstring str2 = bCreateWithFilter ("aabbccdabc", &charFilter); + +The first thing we should notice is that with the Bstrlib approach you can +have different filters for different strings if necessary. Furthermore, +selecting a charset filter in the Managed String Library is uni-contextual. +That is to say, there can only be one such filter active for the entire +program, which means its usage is not well defined for intermediate library +usage (a library that uses it will interfere with user code that uses it, and +vice versa.) It is also likely to be poorly defined in multi-threading +environments. + +There is also a question as to whether the data sanitization filter is checked +on every operation, or just on creation operations. Since the charset can be +set arbitrarily at run time, it might be set *after* some managed strings have +been created. This would seem to imply that all functions should run this +additional check every time if there is an attempt to enforce this. This +would make things tremendously slow. On the other hand, if it is assumed that +only creates and other operations that take char *'s as input need be checked +because the charset was only supposed to be called once at and before any +other managed string was created, then one can see that its easy to cover +Bstrlib with equivalent functionality via a few wrapper calls such as the +example given above. + +And finally we have to question the value of sanitation in the first place. +For example, for httpd servers, there is generally a requirement that the +URLs parsed have some form that avoids undesirable translation to local file +system filenames or resources. The problem is that the way URLs can be +encoded, it must be completely parsed and translated to know if it is using +certain invalid character combinations. That is to say, merely filtering +each character one at a time is not necessarily the right way to ensure that +a string has safe contents. + +In the article that describes this proposal, it is claimed that it fairly +closely approximates the existing C API semantics. On this point we should +compare this "closeness" with Bstrlib: + + Bstrlib Managed String Library + ------- ---------------------- + +Pointer arithmetic Segment arithmetic N/A + +Use in C Std lib ->data, or bdata{e} getstr_m(x,*) ... free(x) + +String literals bsStatic, bsStaticBlk strcreate_m() + +Transparency Complete None + +Its pretty clear that the semantic mapping from C strings to Bstrlib is fairly +straightforward, and that in general semantic capabilities are the same or +superior in Bstrlib. On the other hand the Managed String Library is either +missing semantics or changes things fairly significantly. + +Comparison with Annexia's c2lib library +--------------------------------------- + +This library is available at: +http://www.annexia.org/freeware/c2lib + +1. Still based solely on char * buffers (and therefore strlen() and strcat() + is still O(n), and there are no faster streq() comparison functions.) + Their suggestion that alternatives which wrap the string data type (such as + bstring does) imposes a difficulty in interoperating with the C langauge's + ordinary C string library is not founded. +2. Introduction of memory (and vector?) abstractions imposes a learning + curve, and some kind of memory usage policy that is outside of the strings + themselves (and therefore must be maintained by the developer.) +3. The API is massive, and filled with all sorts of trivial (pjoin) and + controvertial (pmatch -- regular expression are not sufficiently + standardized, and there is a very large difference in performance between + compiled and non-compiled, REs) functions. Bstrlib takes a decidely + minimal approach -- none of the functionality in c2lib is difficult or + challenging to implement on top of Bstrlib (except the regex stuff, which + is going to be difficult, and controvertial no matter what.) +4. Understanding why c2lib is the way it is pretty much requires a working + knowledge of Perl. bstrlib requires only knowledge of the C string library + while providing just a very select few worthwhile extras. +5. It is attached to a lot of cruft like a matrix math library (that doesn't + include any functions for getting the determinant, eigenvectors, + eigenvalues, the matrix inverse, test for singularity, test for + orthogonality, a grahm schmit orthogonlization, LU decomposition ... I + mean why bother?) + +Convincing a development house to use c2lib is likely quite difficult. It +introduces too much, while not being part of any kind of standards body. The +code must therefore be trusted, or maintained by those that use it. While +bstring offers nothing more on this front, since its so much smaller, covers +far less in terms of scope, and will typically improve string performance, +the barrier to usage should be much smaller. + +Comparison with stralloc/qmail +------------------------------ + +More information about this library can be found here: +http://www.canonical.org/~kragen/stralloc.html or here: +http://cr.yp.to/lib/stralloc.html + +1. Library is very very minimal. A little too minimal. +2. Untargetted source parameters are not declared const. +3. Slightly different expected emphasis (like _cats function which takes an + ordinary C string char buffer as a parameter.) Its clear that the + remainder of the C string library is still required to perform more + useful string operations. + +The struct declaration for their string header is essentially the same as that +for bstring. But its clear that this was a quickly written hack whose goals +are clearly a subset of what Bstrlib supplies. For anyone who is served by +stralloc, Bstrlib is complete substitute that just adds more functionality. + +stralloc actually uses the interesting policy that a NULL data pointer +indicates an empty string. In this way, non-static empty strings can be +declared without construction. This advantage is minimal, since static empty +bstrings can be declared inline without construction, and if the string needs +to be written to it should be constructed from an empty string (or its first +initializer) in any event. + +wxString class +-------------- + +This is the string class used in the wxWindows project. A description of +wxString can be found here: +http://www.wxwindows.org/manuals/2.4.2/wx368.htm#wxstring + +This C++ library is similar to CBString. However, it is littered with +trivial functions (IsAscii, UpperCase, RemoveLast etc.) + +1. There is no C implementation. +2. The memory management strategy is to allocate a bounded fixed amount of + additional space on each resize, meaning that it does not have the + log_2(n) property that Bstrlib has (it will thrash very easily, cause + massive fragmentation in common heap implementations, and can easily be a + common source of performance problems). +3. The library uses a "copy on write" strategy, meaning that it has to deal + with multithreading problems. + +Vstr +---- + +This is a highly orthogonal C string library with an emphasis on +networking/realtime programming. It can be found here: +http://www.and.org/vstr/ + +1. The convoluted internal structure does not contain a '\0' char * compatible + buffer, so interoperability with the C library a non-starter. +2. The API and implementation is very large (owing to its orthogonality) and + can lead to difficulty in understanding its exact functionality. +3. An obvious dependency on gnu tools (confusing make configure step) +4. Uses a reference counting system, meaning that it is not likely to be + thread safe. + +The implementation has an extreme emphasis on performance for nontrivial +actions (adds, inserts and deletes are all constant or roughly O(#operations) +time) following the "zero copy" principle. This trades off performance of +trivial functions (character access, char buffer access/coersion, alias +detection) which becomes significantly slower, as well as incremental +accumulative costs for its searching/parsing functions. Whether or not Vstr +wins any particular performance benchmark will depend a lot on the benchmark, +but it should handily win on some, while losing dreadfully on others. + +The learning curve for Vstr is very steep, and it doesn't come with any +obvious way to build for Windows or other platforms without gnu tools. At +least one mechanism (the iterator) introduces a new undefined scenario +(writing to a Vstr while iterating through it.) Vstr has a very large +footprint, and is very ambitious in its total functionality. Vstr has no C++ +API. + +Vstr usage requires context initialization via vstr_init() which must be run +in a thread-local context. Given the totally reference based architecture +this means that sharing Vstrings across threads is not well defined, or at +least not safe from race conditions. This API is clearly geared to the older +standard of fork() style multitasking in UNIX, and is not safely transportable +to modern shared memory multithreading available in Linux and Windows. There +is no portable external solution making the library thread safe (since it +requires a mutex around each Vstr context -- not each string.) + +In the documentation for this library, a big deal is made of its self hosted +s(n)printf-like function. This is an issue for older compilers that don't +include vsnprintf(), but also an issue because Vstr has a slow conversion to +'\0' terminated char * mechanism. That is to say, using "%s" to format data +that originates from Vstr would be slow without some sort of native function +to do so. Bstrlib sidesteps the issue by relying on what snprintf-like +functionality does exist and having a high performance conversion to a char * +compatible string so that "%s" can be used directly. + +Str Library +----------- + +This is a fairly extensive string library, that includes full unicode support +and targetted at the goal of out performing MFC and STL. The architecture, +similarly to MFC's CStrings, is a copy on write reference counting mechanism. + +http://www.utilitycode.com/str/default.aspx + +1. Commercial. +2. C++ only. + +This library, like Vstr, uses a ref counting system. There is only so deeply +I can analyze it, since I don't have a license for it. However, performance +improvements over MFC's and STL, doesn't seem like a sufficient reason to +move your source base to it. For example, in the future, Microsoft may +improve the performance CString. + +It should be pointed out that performance testing of Bstrlib has indicated +that its relative performance advantage versus MFC's CString and STL's +std::string is at least as high as that for the Str library. + +libmib astrings +--------------- + +A handful of functional extensions to the C library that add dynamic string +functionality. +http://www.mibsoftware.com/libmib/astring/ + +This package basically references strings through char ** pointers and assumes +they are pointing to the top of an allocated heap entry (or NULL, in which +case memory will be newly allocated from the heap.) So its still up to user +to mix and match the older C string functions with these functions whenever +pointer arithmetic is used (i.e., there is no leveraging of the type system +to assert semantic differences between references and base strings as Bstrlib +does since no new types are introduced.) Unlike Bstrlib, exact string length +meta data is not stored, thus requiring a strlen() call on *every* string +writing operation. The library is very small, covering only a handful of C's +functions. + +While this is better than nothing, it is clearly slower than even the +standard C library, less safe and less functional than Bstrlib. + +To explain the advantage of using libmib, their website shows an example of +how dangerous C code: + + char buf[256]; + char *pszExtraPath = ";/usr/local/bin"; + + strcpy(buf,getenv("PATH")); /* oops! could overrun! */ + strcat(buf,pszExtraPath); /* Could overrun as well! */ + + printf("Checking...%s\n",buf); /* Some printfs overrun too! */ + +is avoided using libmib: + + char *pasz = 0; /* Must initialize to 0 */ + char *paszOut = 0; + char *pszExtraPath = ";/usr/local/bin"; + + if (!astrcpy(&pasz,getenv("PATH"))) /* malloc error */ exit(-1); + if (!astrcat(&pasz,pszExtraPath)) /* malloc error */ exit(-1); + + /* Finally, a "limitless" printf! we can use */ + asprintf(&paszOut,"Checking...%s\n",pasz);fputs(paszOut,stdout); + + astrfree(&pasz); /* Can use free(pasz) also. */ + astrfree(&paszOut); + +However, compare this to Bstrlib: + + bstring b, out; + + bcatcstr (b = bfromcstr (getenv ("PATH")), ";/usr/local/bin"); + out = bformat ("Checking...%s\n", bdatae (b, "")); + /* if (out && b) */ fputs (bdatae (out, ""), stdout); + bdestroy (b); + bdestroy (out); + +Besides being shorter, we can see that error handling can be deferred right +to the very end. Also, unlike the above two versions, if getenv() returns +with NULL, the Bstrlib version will not exhibit undefined behavior. +Initialization starts with the relevant content rather than an extra +autoinitialization step. + +libclc +------ + +An attempt to add to the standard C library with a number of common useful +functions, including additional string functions. +http://libclc.sourceforge.net/ + +1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass + the responsibility to guard against aliasing to the programmer. +2. Adds no safety or memory management whatsoever. +3. Most of the supplied string functions are completely trivial. + +The goals of libclc and Bstrlib are clearly quite different. + +fireString +---------- + +http://firestuff.org/ + +1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass + the responsibility to guard against aliasing to the programmer. +2. Mixes char * and length wrapped buffers (estr) functions, doubling the API + size, with safety limited to only half of the functions. + +Firestring was originally just a wrapper of char * functionality with extra +length parameters. However, it has been augmented with the inclusion of the +estr type which has similar functionality to stralloc. But firestring does +not nearly cover the functional scope of Bstrlib. + +Safe C String Library +--------------------- + +A library written for the purpose of increasing safety and power to C's string +handling capabilities. +http://www.zork.org/safestr/safestr.html + +1. While the safestr_* functions are safe in of themselves, interoperating + with char * string has dangerous unsafe modes of operation. +2. The architecture of safestr's causes the base pointer to change. Thus, + its not practical/safe to store a safestr in multiple locations if any + single instance can be manipulated. +3. Dependent on an additional error handling library. +4. Uses reference counting, meaning that it is either not thread safe or + slow and not portable. + +I think the idea of reallocating (and hence potentially changing) the base +pointer is a serious design flaw that is fatal to this architecture. True +safety is obtained by having automatic handling of all common scenarios +without creating implicit constraints on the user. + +Because of its automatic temporary clean up system, it cannot use "const" +semantics on input arguments. Interesting anomolies such as: + + safestr_t s, t; + s = safestr_replace (t = SAFESTR_TEMP ("This is a test"), + SAFESTR_TEMP (" "), SAFESTR_TEMP (".")); + /* t is now undefined. */ + +are possible. If one defines a function which takes a safestr_t as a +parameter, then the function would not know whether or not the safestr_t is +defined after it passes it to a safestr library function. The author +recommended method for working around this problem is to examine the +attributes of the safestr_t within the function which is to modify any of +its parameters and play games with its reference count. I think, therefore, +that the whole SAFESTR_TEMP idea is also fatally broken. + +The library implements immutability, optional non-resizability, and a "trust" +flag. This trust flag is interesting, and suggests that applying any +arbitrary sequence of safestr_* function calls on any set of trusted strings +will result in a trusted string. It seems to me, however, that if one wanted +to implement a trusted string semantic, one might do so by actually creating +a different *type* and only implement the subset of string functions that are +deemed safe (i.e., user input would be excluded, for example.) This, in +essence, would allow the compiler to enforce trust propogation at compile +time rather than run time. Non-resizability is also interesting, however, +it seems marginal (i.e., to want a string that cannot be resized, yet can be +modified and yet where a fixed sized buffer is undesirable.) + +Libsrt +------ + +This is a length based string library based on a slightly different strategy. +The string contents are appended to the end of the header directly so strings +only require a single allocation. However, whenever a reallocation occurs, +the header is replicated and the base pointer for the string is changed. +That means references to the string are only valid so long as they are not +resized after any such reference is cached. The internal structure maintains +a lot some state used to accelerate unicode manipulation. This makes +sustainable usage of the library essentially opaque. This also creates a +bottleneck for whatever extensions to the library one desires (write all +extensions on top of the base library, put in a request to the author, or +dedicate an expert to learn the internals of the library). The library is +committed to Unicode representation of its string data, and therefore cannot +be used as a generic buffer library. + +=============================================================================== + +Examples +-------- + + Dumping a line numbered file: + + FILE * fp; + int i, ret; + struct bstrList * lines; + struct tagbstring prefix = bsStatic ("-> "); + + if (NULL != (fp = fopen ("bstrlib.txt", "rb"))) { + bstring b = bread ((bNread) fread, fp); + fclose (fp); + if (NULL != (lines = bsplit (b, '\n'))) { + for (i=0; i < lines->qty; i++) { + binsert (lines->entry[i], 0, &prefix, '?'); + printf ("%04d: %s\n", i, bdatae (lines->entry[i], "NULL")); + } + bstrListDestroy (lines); + } + bdestroy (b); + } + +For numerous other examples, see bstraux.c, bstraux.h and the example archive. + +=============================================================================== + +License +------- + +The Better String Library is available under either the BSD license (see the +accompanying license.txt) or the Gnu Public License version 2 (see the +accompanying gpl.txt) at the option of the user. + +=============================================================================== + +Acknowledgements +---------------- + +The following individuals have made significant contributions to the design +and testing of the Better String Library: + +Bjorn Augestad +Clint Olsen +Darryl Bleau +Fabian Cenedese +Graham Wideman +Ignacio Burgueno +International Business Machines Corporation +Ira Mica +John Kortink +Manuel Woelker +Marcel van Kervinck +Michael Hsieh +Richard A. Smith +Simon Ekstrom +Wayne Scott +Zed A. Shaw + +=============================================================================== diff --git a/bstrlib/bstrwrap.cpp b/bstrlib/bstrwrap.cpp new file mode 100644 index 0000000..ce77f17 --- /dev/null +++ b/bstrlib/bstrwrap.cpp @@ -0,0 +1,1721 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * bstrwrap.c + * + * This file is the C++ wrapper for the bstring functions. + */ + +#if defined (_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include +#include "bstrwrap.h" + +#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) +#include "memdbg.h" +#endif + +#ifndef bstr__alloc +#define bstr__alloc(x) malloc (x) +#endif + +#ifndef bstr__free +#define bstr__free(p) free (p) +#endif + +#ifndef bstr__realloc +#define bstr__realloc(p,x) realloc ((p), (x)) +#endif + +#ifndef bstr__memcpy +#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) +#endif + +#ifndef bstr__memmove +#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) +#endif + +#ifndef bstr__memset +#define bstr__memset(d,c,l) memset ((d), (c), (l)) +#endif + +#ifndef bstr__memcmp +#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) +#endif + +#ifndef bstr__memchr +#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) +#endif + +#if defined(BSTRLIB_CAN_USE_IOSTREAM) +#include +#endif + +namespace Bstrlib { + +// Constructors. + +CBString::CBString () { + slen = 0; + mlen = 8; + data = (unsigned char *) bstr__alloc (mlen); + if (!data) { + mlen = 0; + bstringThrow ("Failure in default constructor"); + } else { + data[0] = '\0'; + } +} + +CBString::CBString (const void * blk, int len) { + data = NULL; + if (len >= 0) { + mlen = len + 1; + slen = len; + data = (unsigned char *) bstr__alloc (mlen); + } + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in block constructor"); + } else { + if (slen > 0) bstr__memcpy (data, blk, slen); + data[slen] = '\0'; + } +} + +CBString::CBString (char c, int len) { + data = NULL; + if (len >= 0) { + mlen = len + 1; + slen = len; + data = (unsigned char *) bstr__alloc (mlen); + } + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in repeat(char) constructor"); + } else { + if (slen > 0) bstr__memset (data, c, slen); + data[slen] = '\0'; + } +} + +CBString::CBString (char c) { + mlen = 2; + slen = 1; + if (NULL == (data = (unsigned char *) bstr__alloc (mlen))) { + mlen = slen = 0; + bstringThrow ("Failure in (char) constructor"); + } else { + data[0] = (unsigned char) c; + data[1] = '\0'; + } +} + +CBString::CBString (unsigned char c) { + mlen = 2; + slen = 1; + if (NULL == (data = (unsigned char *) bstr__alloc (mlen))) { + mlen = slen = 0; + bstringThrow ("Failure in (char) constructor"); + } else { + data[0] = c; + data[1] = '\0'; + } +} + +CBString::CBString (const char *s) { + if (s) { + size_t sslen = strlen (s); + if (sslen >= INT_MAX) bstringThrow ("Failure in (char *) constructor, string too large") + slen = (int) sslen; + mlen = slen + 1; + if (NULL != (data = (unsigned char *) bstr__alloc (mlen))) { + bstr__memcpy (data, s, mlen); + return; + } + } + data = NULL; + bstringThrow ("Failure in (char *) constructor"); +} + +CBString::CBString (int len, const char *s) { + if (s) { + size_t sslen = strlen (s); + if (sslen >= INT_MAX) bstringThrow ("Failure in (char *) constructor, string too large") + slen = (int) sslen; + mlen = slen + 1; + if (mlen < len) mlen = len; + if (NULL != (data = (unsigned char *) bstr__alloc (mlen))) { + bstr__memcpy (data, s, slen + 1); + return; + } + } + data = NULL; + bstringThrow ("Failure in (int len, char *) constructor"); +} + +CBString::CBString (const CBString& b) { + slen = b.slen; + mlen = slen + 1; + data = NULL; + if (mlen > 0) data = (unsigned char *) bstr__alloc (mlen); + if (!data) { + bstringThrow ("Failure in (CBString) constructor"); + } else { + bstr__memcpy (data, b.data, slen); + data[slen] = '\0'; + } +} + +CBString::CBString (const tagbstring& x) { + slen = x.slen; + mlen = slen + 1; + data = NULL; + if (slen >= 0 && x.data != NULL) data = (unsigned char *) bstr__alloc (mlen); + if (!data) { + bstringThrow ("Failure in (tagbstring) constructor"); + } else { + bstr__memcpy (data, x.data, slen); + data[slen] = '\0'; + } +} + +// Destructor. + +CBString::~CBString () { + if (data != NULL) { + bstr__free (data); + data = NULL; + } + mlen = 0; + slen = -__LINE__; +} + +// = operator. + +const CBString& CBString::operator = (char c) { + if (mlen <= 0) bstringThrow ("Write protection error"); + if (2 >= mlen) alloc (2); + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in =(char) operator"); + } else { + slen = 1; + data[0] = (unsigned char) c; + data[1] = '\0'; + } + return *this; +} + +const CBString& CBString::operator = (unsigned char c) { + if (mlen <= 0) bstringThrow ("Write protection error"); + if (2 >= mlen) alloc (2); + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in =(char) operator"); + } else { + slen = 1; + data[0] = c; + data[1] = '\0'; + } + return *this; +} + +const CBString& CBString::operator = (const char *s) { +size_t tmpSlen; + + if (mlen <= 0) bstringThrow ("Write protection error"); + if (NULL == s) s = ""; + if ((tmpSlen = strlen (s)) >= (size_t) mlen) { + if (tmpSlen >= INT_MAX-1) bstringThrow ("Failure in =(const char *) operator, string too large"); + alloc ((int) tmpSlen); + } + + if (data) { + slen = (int) tmpSlen; + bstr__memcpy (data, s, tmpSlen + 1); + } else { + mlen = slen = 0; + bstringThrow ("Failure in =(const char *) operator"); + } + return *this; +} + +const CBString& CBString::operator = (const CBString& b) { + if (mlen <= 0) bstringThrow ("Write protection error"); + if (b.slen >= mlen) alloc (b.slen); + + slen = b.slen; + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in =(CBString) operator"); + } else { + bstr__memcpy (data, b.data, slen); + data[slen] = '\0'; + } + return *this; +} + +const CBString& CBString::operator = (const tagbstring& x) { + if (mlen <= 0) bstringThrow ("Write protection error"); + if (x.slen < 0) bstringThrow ("Failure in =(tagbstring) operator, badly formed tagbstring"); + if (x.slen >= mlen) alloc (x.slen); + + slen = x.slen; + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in =(tagbstring) operator"); + } else { + bstr__memcpy (data, x.data, slen); + data[slen] = '\0'; + } + return *this; +} + +const CBString& CBString::operator += (const CBString& b) { + if (BSTR_ERR == bconcat (this, (bstring) &b)) { + bstringThrow ("Failure in concatenate"); + } + return *this; +} + +const CBString& CBString::operator += (const char *s) { + char * d; + int i, l; + + if (mlen <= 0) bstringThrow ("Write protection error"); + + /* Optimistically concatenate directly */ + l = mlen - slen; + d = (char *) &data[slen]; + for (i=0; i < l; i++) { + if ((*d++ = *s++) == '\0') { + slen += i; + return *this; + } + } + slen += i; + + if (BSTR_ERR == bcatcstr (this, s)) { + bstringThrow ("Failure in concatenate"); + } + return *this; +} + +const CBString& CBString::operator += (char c) { + if (BSTR_ERR == bconchar (this, c)) { + bstringThrow ("Failure in concatenate"); + } + return *this; +} + +const CBString& CBString::operator += (unsigned char c) { + if (BSTR_ERR == bconchar (this, (char) c)) { + bstringThrow ("Failure in concatenate"); + } + return *this; +} + +const CBString& CBString::operator += (const tagbstring& x) { + if (mlen <= 0) bstringThrow ("Write protection error"); + if (x.slen < 0) bstringThrow ("Failure in +=(tagbstring) operator, badly formed tagbstring"); + alloc (x.slen + slen + 1); + + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in +=(tagbstring) operator"); + } else { + bstr__memcpy (data + slen, x.data, x.slen); + slen += x.slen; + data[slen] = '\0'; + } + return *this; +} + +const CBString CBString::operator + (char c) const { + CBString retval (*this); + retval += c; + return retval; +} + +const CBString CBString::operator + (unsigned char c) const { + CBString retval (*this); + retval += c; + return retval; +} + +const CBString CBString::operator + (const CBString& b) const { + CBString retval (*this); + retval += b; + return retval; +} + +const CBString CBString::operator + (const char *s) const { + if (s == NULL) bstringThrow ("Failure in + (char *) operator, NULL"); + CBString retval (*this); + retval += s; + return retval; +} + +const CBString CBString::operator + (const unsigned char *s) const { + if (s == NULL) bstringThrow ("Failure in + (unsigned char *) operator, NULL"); + CBString retval (*this); + retval += (const char *) s; + return retval; +} + +const CBString CBString::operator + (const tagbstring& x) const { + if (x.slen < 0) bstringThrow ("Failure in + (tagbstring) operator, badly formed tagbstring"); + CBString retval (*this); + retval += x; + return retval; +} + +bool CBString::operator == (const CBString& b) const { + int retval; + if (BSTR_ERR == (retval = biseq ((bstring)this, (bstring)&b))) { + bstringThrow ("Failure in compare (==)"); + } + return retval > 0; +} + +bool CBString::operator == (const char * s) const { + int retval; + if (NULL == s) { + bstringThrow ("Failure in compare (== NULL)"); + } + if (BSTR_ERR == (retval = biseqcstr ((bstring) this, s))) { + bstringThrow ("Failure in compare (==)"); + } + return retval > 0; +} + +bool CBString::operator == (const unsigned char * s) const { + int retval; + if (NULL == s) { + bstringThrow ("Failure in compare (== NULL)"); + } + if (BSTR_ERR == (retval = biseqcstr ((bstring) this, (const char *) s))) { + bstringThrow ("Failure in compare (==)"); + } + return retval > 0; +} + +bool CBString::operator != (const CBString& b) const { + return ! ((*this) == b); +} + +bool CBString::operator != (const char * s) const { + return ! ((*this) == s); +} + +bool CBString::operator != (const unsigned char * s) const { + return ! ((*this) == s); +} + +bool CBString::operator < (const CBString& b) const { + int retval; + if (SHRT_MIN == (retval = bstrcmp ((bstring) this, (bstring)&b))) { + bstringThrow ("Failure in compare (<)"); + } + return retval < 0; +} + +bool CBString::operator < (const char * s) const { + if (s == NULL) { + bstringThrow ("Failure in compare (<)"); + } + return strcmp ((const char *)this->data, s) < 0; +} + +bool CBString::operator < (const unsigned char * s) const { + if (s == NULL) { + bstringThrow ("Failure in compare (<)"); + } + return strcmp ((const char *)this->data, (const char *)s) < 0; +} + +bool CBString::operator <= (const CBString& b) const { + int retval; + if (SHRT_MIN == (retval = bstrcmp ((bstring) this, (bstring)&b))) { + bstringThrow ("Failure in compare (<=)"); + } + return retval <= 0; +} + +bool CBString::operator <= (const char * s) const { + if (s == NULL) { + bstringThrow ("Failure in compare (<=)"); + } + return strcmp ((const char *)this->data, s) <= 0; +} + +bool CBString::operator <= (const unsigned char * s) const { + if (s == NULL) { + bstringThrow ("Failure in compare (<=)"); + } + return strcmp ((const char *)this->data, (const char *)s) <= 0; +} + +bool CBString::operator > (const CBString& b) const { + return ! ((*this) <= b); +} + +bool CBString::operator > (const char * s) const { + return ! ((*this) <= s); +} + +bool CBString::operator > (const unsigned char * s) const { + return ! ((*this) <= s); +} + +bool CBString::operator >= (const CBString& b) const { + return ! ((*this) < b); +} + +bool CBString::operator >= (const char * s) const { + return ! ((*this) < s); +} + +bool CBString::operator >= (const unsigned char * s) const { + return ! ((*this) < s); +} + +CBString::operator double () const { +double d = 0; + if (1 != sscanf ((const char *)this->data, "%lf", &d)) { + bstringThrow ("Unable to convert to a double"); + } + return d; +} + +CBString::operator float () const { +float d = 0; + if (1 != sscanf ((const char *)this->data, "%f", &d)) { + bstringThrow ("Unable to convert to a float"); + } + return d; +} + +CBString::operator int () const { +int d = 0; + if (1 != sscanf ((const char *)this->data, "%d", &d)) { + bstringThrow ("Unable to convert to an int"); + } + return d; +} + +CBString::operator unsigned int () const { +unsigned int d = 0; + if (1 != sscanf ((const char *)this->data, "%u", &d)) { + bstringThrow ("Unable to convert to an unsigned int"); + } + return d; +} + +#ifdef __TURBOC__ +# ifndef BSTRLIB_NOVSNP +# define BSTRLIB_NOVSNP +# endif +#endif + +/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ +#if defined(__WATCOMC__) || defined(_MSC_VER) +#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} +#else +#ifdef BSTRLIB_NOVSNP +/* This is just a hack. If you are using a system without a vsnprintf, it is + not recommended that bformat be used at all. */ +#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} +#define START_VSNBUFF (256) +#else + +#if defined (__GNUC__) && !defined (__PPC__) +/* Something is making gcc complain about this prototype not being here, so + I've just gone ahead and put it in. */ +extern "C" { +extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); +} +#endif + +#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} +#endif +#endif + +#ifndef START_VSNBUFF +#define START_VSNBUFF (16) +#endif + +/* + * Yeah I'd like to just call a vformat function or something, but because of + * the ANSI specified brokeness of the va_* macros, it is actually not + * possible to do this correctly. + */ + +void CBString::format (const char * fmt, ...) { + bstring b; + va_list arglist; + int r, n; + + if (mlen <= 0) bstringThrow ("Write protection error"); + if (fmt == NULL) { + *this = ""; + bstringThrow ("CBString::format (NULL, ...) is erroneous."); + } else { + + if ((b = bfromcstr ("")) == NULL) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::format out of memory."); +#else + *this = ""; +#endif + } else { + if ((n = (int) (2 * (strlen) (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + for (;;) { + if (BSTR_OK != balloc (b, n + 2)) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::format out of memory."); +#else + b = bformat (""); + break; +#endif + } + + va_start (arglist, fmt); + exvsnprintf (r, (char *) b->data, n + 1, fmt, arglist); + va_end (arglist); + + b->data[n] = '\0'; + b->slen = (int) (strlen) ((char *) b->data); + + if (b->slen < n) break; + if (r > n) n = r; else n += n; + } + *this = *b; + bdestroy (b); + } + } +} + +void CBString::formata (const char * fmt, ...) { + bstring b; + va_list arglist; + int r, n; + + if (mlen <= 0) bstringThrow ("Write protection error"); + if (fmt == NULL) { + *this += ""; + bstringThrow ("CBString::formata (NULL, ...) is erroneous."); + } else { + + if ((b = bfromcstr ("")) == NULL) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::format out of memory."); +#else + *this += ""; +#endif + } else { + if ((n = (int) (2 * (strlen) (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + for (;;) { + if (BSTR_OK != balloc (b, n + 2)) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::format out of memory."); +#else + b = bformat (""); + break; +#endif + } + + va_start (arglist, fmt); + exvsnprintf (r, (char *) b->data, n + 1, fmt, arglist); + va_end (arglist); + + b->data[n] = '\0'; + b->slen = (int) (strlen) ((char *) b->data); + + if (b->slen < n) break; + if (r > n) n = r; else n += n; + } + *this += *b; + bdestroy (b); + } + } +} + +int CBString::caselessEqual (const CBString& b) const { +int ret; + if (BSTR_ERR == (ret = biseqcaseless ((bstring) this, (bstring) &b))) { + bstringThrow ("CBString::caselessEqual Unable to compare"); + } + return ret; +} + +int CBString::caselessCmp (const CBString& b) const { +int ret; + if (SHRT_MIN == (ret = bstricmp ((bstring) this, (bstring) &b))) { + bstringThrow ("CBString::caselessCmp Unable to compare"); + } + return ret; +} + +int CBString::find (const CBString& b, int pos) const { + return binstr ((bstring) this, pos, (bstring) &b); +} + +/* + int CBString::find (const char * b, int pos) const; + + Uses and unrolling and sliding paired indexes character matching. Since + the unrolling is the primary real world impact the true purpose of this + algorithm choice is maximize the effectiveness of the unrolling. The + idea is to scan until at least one match of the current indexed character + from each string, and then shift indexes of both down by and repeat until + the last character form b matches. When the last character from b + matches if the were no mismatches in previous strlen(b) characters then + we know we have a full match, otherwise shift both indexes back strlen(b) + characters and continue. + + In general, if there is any character in b that is not at all in this + CBString, then this algorithm is O(slen). The algorithm does not easily + degenerate into O(slen * strlen(b)) performance except in very uncommon + situations. Thus from a real world perspective, the overhead of + precomputing suffix shifts in the Boyer-Moore algorithm is avoided, while + delivering an unrolled matching inner loop most of the time. + */ + +int CBString::find (const char * b, int pos) const { +int ii, j; +unsigned char c0; +register int i, l; +register unsigned char cx; +register unsigned char * pdata; + + if (NULL == b) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::find NULL."); +#else + return BSTR_ERR; +#endif + } + + if ((unsigned int) pos > (unsigned int) slen) return BSTR_ERR; + if ('\0' == b[0]) return pos; + if (pos == slen) return BSTR_ERR; + if ('\0' == b[1]) return find (b[0], pos); + + cx = c0 = (unsigned char) b[0]; + l = slen - 1; + + pdata = data; + for (ii = -1, i = pos, j = 0; i < l;) { + /* Unrolled current character test */ + if (cx != pdata[i]) { + if (cx != pdata[1+i]) { + i += 2; + continue; + } + i++; + } + + /* Take note if this is the start of a potential match */ + if (0 == j) ii = i; + + /* Shift the test character down by one */ + j++; + i++; + + /* If this isn't past the last character continue */ + if ('\0' != (cx = b[j])) continue; + + N0:; + + /* If no characters mismatched, then we matched */ + if (i == ii+j) return ii; + + /* Shift back to the beginning */ + i -= j; + j = 0; + cx = c0; + } + + /* Deal with last case if unrolling caused a misalignment */ + if (i == l && cx == pdata[i] && '\0' == b[j+1]) goto N0; + + return BSTR_ERR; +} + +int CBString::caselessfind (const CBString& b, int pos) const { + return binstrcaseless ((bstring) this, pos, (bstring) &b); +} + +int CBString::caselessfind (const char * b, int pos) const { +struct tagbstring t; + + if (NULL == b) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::caselessfind NULL."); +#else + return BSTR_ERR; +#endif + } + + if ((unsigned int) pos > (unsigned int) slen) return BSTR_ERR; + if ('\0' == b[0]) return pos; + if (pos == slen) return BSTR_ERR; + + btfromcstr (t, b); + return binstrcaseless ((bstring) this, pos, (bstring) &t); +} + +int CBString::find (char c, int pos) const { + if (pos < 0) return BSTR_ERR; + for (;pos < slen; pos++) { + if (data[pos] == (unsigned char) c) return pos; + } + return BSTR_ERR; +} + +int CBString::reversefind (const CBString& b, int pos) const { + return binstrr ((bstring) this, pos, (bstring) &b); +} + +int CBString::reversefind (const char * b, int pos) const { +struct tagbstring t; + if (NULL == b) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::reversefind NULL."); +#else + return BSTR_ERR; +#endif + } + cstr2tbstr (t, b); + return binstrr ((bstring) this, pos, &t); +} + +int CBString::caselessreversefind (const CBString& b, int pos) const { + return binstrrcaseless ((bstring) this, pos, (bstring) &b); +} + +int CBString::caselessreversefind (const char * b, int pos) const { +struct tagbstring t; + + if (NULL == b) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::caselessreversefind NULL."); +#else + return BSTR_ERR; +#endif + } + + if ((unsigned int) pos > (unsigned int) slen) return BSTR_ERR; + if ('\0' == b[0]) return pos; + if (pos == slen) return BSTR_ERR; + + btfromcstr (t, b); + return binstrrcaseless ((bstring) this, pos, (bstring) &t); +} + +int CBString::reversefind (char c, int pos) const { + if (pos > slen) return BSTR_ERR; + if (pos == slen) pos--; + for (;pos >= 0; pos--) { + if (data[pos] == (unsigned char) c) return pos; + } + return BSTR_ERR; +} + +int CBString::findchr (const CBString& b, int pos) const { + return binchr ((bstring) this, pos, (bstring) &b); +} + +int CBString::findchr (const char * s, int pos) const { +struct tagbstring t; + if (NULL == s) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::findchr NULL."); +#else + return BSTR_ERR; +#endif + } + cstr2tbstr (t, s); + return binchr ((bstring) this, pos, (bstring) &t); +} + +int CBString::nfindchr (const CBString& b, int pos) const { + return bninchr ((bstring) this, pos, (bstring) &b); +} + +int CBString::nfindchr (const char * s, int pos) const { +struct tagbstring t; + if (NULL == s) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::nfindchr NULL."); +#else + return BSTR_ERR; +#endif + } + cstr2tbstr (t, s); + return bninchr ((bstring) this, pos, &t); +} + +int CBString::reversefindchr (const CBString& b, int pos) const { + return binchrr ((bstring) this, pos, (bstring) &b); +} + +int CBString::reversefindchr (const char * s, int pos) const { +struct tagbstring t; + if (NULL == s) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::reversefindchr NULL."); +#else + return BSTR_ERR; +#endif + } + cstr2tbstr (t, s); + return binchrr ((bstring) this, pos, &t); +} + +int CBString::nreversefindchr (const CBString& b, int pos) const { + return bninchrr ((bstring) this, pos, (bstring) &b); +} + +int CBString::nreversefindchr (const char * s, int pos) const { +struct tagbstring t; + if (NULL == s) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("CBString::nreversefindchr NULL."); +#else + return BSTR_ERR; +#endif + } + cstr2tbstr (t, s); + return bninchrr ((bstring) this, pos, &t); +} + +const CBString CBString::midstr (int left, int len) const { +struct tagbstring t; + if (left < 0) { + len += left; + left = 0; + } + if (len > slen - left) len = slen - left; + if (len <= 0) return CBString (""); + blk2tbstr (t, data + left, len); + return CBString (t); +} + +void CBString::alloc (int len) { + if (BSTR_ERR == balloc ((bstring)this, len)) { + bstringThrow ("Failure in alloc"); + } +} + +void CBString::fill (int len, unsigned char cfill) { + slen = 0; + if (BSTR_ERR == bsetstr (this, len, NULL, cfill)) { + bstringThrow ("Failure in fill"); + } +} + +void CBString::setsubstr (int pos, const CBString& b, unsigned char cfill) { + if (BSTR_ERR == bsetstr (this, pos, (bstring) &b, cfill)) { + bstringThrow ("Failure in setsubstr"); + } +} + +void CBString::setsubstr (int pos, const char * s, unsigned char cfill) { +struct tagbstring t; + if (NULL == s) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("setsubstr NULL."); +#else + return; +#endif + } + cstr2tbstr (t, s); + if (BSTR_ERR == bsetstr (this, pos, &t, cfill)) { + bstringThrow ("Failure in setsubstr"); + } +} + +void CBString::insert (int pos, const CBString& b, unsigned char cfill) { + if (BSTR_ERR == binsert (this, pos, (bstring) &b, cfill)) { + bstringThrow ("Failure in insert"); + } +} + +void CBString::insert (int pos, const char * s, unsigned char cfill) { +struct tagbstring t; + if (NULL == s) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("insert NULL."); +#else + return; +#endif + } + cstr2tbstr (t, s); + if (BSTR_ERR == binsert (this, pos, &t, cfill)) { + bstringThrow ("Failure in insert"); + } +} + +void CBString::insertchrs (int pos, int len, unsigned char cfill) { + if (BSTR_ERR == binsertch (this, pos, len, cfill)) { + bstringThrow ("Failure in insertchrs"); + } +} + +void CBString::replace (int pos, int len, const CBString& b, unsigned char cfill) { + if (BSTR_ERR == breplace (this, pos, len, (bstring) &b, cfill)) { + bstringThrow ("Failure in replace"); + } +} + +void CBString::replace (int pos, int len, const char * s, unsigned char cfill) { +struct tagbstring t; +size_t q; + + if (mlen <= 0) bstringThrow ("Write protection error"); + if (NULL == s || (pos|len) < 0) { + bstringThrow ("Failure in replace"); + } else { + if (pos + len >= slen) { + cstr2tbstr (t, s); + if (BSTR_ERR == bsetstr (this, pos, &t, cfill)) { + bstringThrow ("Failure in replace"); + } else if (pos + t.slen < slen) { + slen = pos + t.slen; + data[slen] = '\0'; + } + } else { + + /* Aliasing case */ + if ((unsigned int) (data - (unsigned char *) s) < (unsigned int) slen) { + replace (pos, len, CBString(s), cfill); + return; + } + + if ((q = strlen (s)) > (size_t) len || len < 0) { + if (slen + q - len >= INT_MAX) bstringThrow ("Failure in replace, result too long."); + alloc ((int) (slen + q - len)); + if (NULL == data) return; + } + if ((int) q != len) bstr__memmove (data + pos + q, data + pos + len, slen - (pos + len)); + bstr__memcpy (data + pos, s, q); + slen += ((int) q) - len; + data[slen] = '\0'; + } + } +} + +void CBString::findreplace (const CBString& sfind, const CBString& repl, int pos) { + if (BSTR_ERR == bfindreplace (this, (bstring) &sfind, (bstring) &repl, pos)) { + bstringThrow ("Failure in findreplace"); + } +} + +void CBString::findreplace (const CBString& sfind, const char * repl, int pos) { +struct tagbstring t; + if (NULL == repl) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("findreplace NULL."); +#else + return; +#endif + } + cstr2tbstr (t, repl); + if (BSTR_ERR == bfindreplace (this, (bstring) &sfind, (bstring) &t, pos)) { + bstringThrow ("Failure in findreplace"); + } +} + +void CBString::findreplace (const char * sfind, const CBString& repl, int pos) { +struct tagbstring t; + if (NULL == sfind) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("findreplace NULL."); +#else + return; +#endif + } + cstr2tbstr (t, sfind); + if (BSTR_ERR == bfindreplace (this, (bstring) &t, (bstring) &repl, pos)) { + bstringThrow ("Failure in findreplace"); + } +} + +void CBString::findreplace (const char * sfind, const char * repl, int pos) { +struct tagbstring t, u; + if (NULL == repl || NULL == sfind) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("findreplace NULL."); +#else + return; +#endif + } + cstr2tbstr (t, sfind); + cstr2tbstr (u, repl); + if (BSTR_ERR == bfindreplace (this, (bstring) &t, (bstring) &u, pos)) { + bstringThrow ("Failure in findreplace"); + } +} + +void CBString::findreplacecaseless (const CBString& sfind, const CBString& repl, int pos) { + if (BSTR_ERR == bfindreplacecaseless (this, (bstring) &sfind, (bstring) &repl, pos)) { + bstringThrow ("Failure in findreplacecaseless"); + } +} + +void CBString::findreplacecaseless (const CBString& sfind, const char * repl, int pos) { +struct tagbstring t; + if (NULL == repl) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("findreplacecaseless NULL."); +#else + return; +#endif + } + cstr2tbstr (t, repl); + if (BSTR_ERR == bfindreplacecaseless (this, (bstring) &sfind, (bstring) &t, pos)) { + bstringThrow ("Failure in findreplacecaseless"); + } +} + +void CBString::findreplacecaseless (const char * sfind, const CBString& repl, int pos) { +struct tagbstring t; + if (NULL == sfind) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("findreplacecaseless NULL."); +#else + return; +#endif + } + cstr2tbstr (t, sfind); + if (BSTR_ERR == bfindreplacecaseless (this, (bstring) &t, (bstring) &repl, pos)) { + bstringThrow ("Failure in findreplacecaseless"); + } +} + +void CBString::findreplacecaseless (const char * sfind, const char * repl, int pos) { +struct tagbstring t, u; + if (NULL == repl || NULL == sfind) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("findreplacecaseless NULL."); +#else + return; +#endif + } + cstr2tbstr (t, sfind); + cstr2tbstr (u, repl); + if (BSTR_ERR == bfindreplacecaseless (this, (bstring) &t, (bstring) &u, pos)) { + bstringThrow ("Failure in findreplacecaseless"); + } +} + +void CBString::remove (int pos, int len) { + if (BSTR_ERR == bdelete (this, pos, len)) { + bstringThrow ("Failure in remove"); + } +} + +void CBString::trunc (int len) { + if (len < 0) { + bstringThrow ("Failure in trunc"); + } + if (len < slen) { + slen = len; + data[len] = '\0'; + } +} + +void CBString::ltrim (const CBString& b) { + int l = nfindchr (b, 0); + if (l == BSTR_ERR) l = slen; + remove (0, l); +} + +void CBString::rtrim (const CBString& b) { + int l = nreversefindchr (b, slen - 1); +#if BSTR_ERR != -1 + if (l == BSTR_ERR) l = -1; +#endif + slen = l + 1; + if (mlen > slen) data[slen] = '\0'; +} + +void CBString::toupper () { + if (BSTR_ERR == btoupper ((bstring) this)) { + bstringThrow ("Failure in toupper"); + } +} + +void CBString::tolower () { + if (BSTR_ERR == btolower ((bstring) this)) { + bstringThrow ("Failure in tolower"); + } +} + +void CBString::repeat (int count) { + count *= slen; + if (count == 0) { + trunc (0); + return; + } + if (count < 0 || BSTR_ERR == bpattern (this, count)) { + bstringThrow ("Failure in repeat"); + } +} + +int CBString::gets (bNgetc getcPtr, void * parm, char terminator) { + if (mlen <= 0) bstringThrow ("Write protection error"); + bstring b = bgets (getcPtr, parm, terminator); + if (b == NULL) { + slen = 0; + return -1; + } + *this = *b; + bdestroy (b); + return 0; +} + +int CBString::read (bNread readPtr, void * parm) { + if (mlen <= 0) bstringThrow ("Write protection error"); + bstring b = bread (readPtr, parm); + if (b == NULL) { + slen = 0; + return -1; + } + *this = *b; + bdestroy (b); + return 0; +} + +const CBString operator + (const char *a, const CBString& b) { + return CBString(a) + b; +} + +const CBString operator + (const unsigned char *a, const CBString& b) { + return CBString((const char *)a) + b; +} + +const CBString operator + (char c, const CBString& b) { + return CBString(c) + b; +} + +const CBString operator + (unsigned char c, const CBString& b) { + return CBString(c) + b; +} + +const CBString operator + (const tagbstring& x, const CBString& b) { + return CBString(x) + b; +} + +void CBString::writeprotect () { + if (mlen >= 0) mlen = -1; +} + +void CBString::writeallow () { + if (mlen == -1) mlen = slen + (slen == 0); + else if (mlen < 0) { + bstringThrow ("Cannot unprotect a constant"); + } +} + +#if defined(BSTRLIB_CAN_USE_STL) + +// Constructors. + +CBString::CBString (const CBStringList& l) { +int c; +size_t i; + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen; + } + + mlen = c; + slen = 0; + data = (unsigned char *) bstr__alloc (c); + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + *this += l.at(i); + } + } +} + +CBString::CBString (const struct CBStringList& l, const CBString& sep) { +int c, sl = sep.length (); +size_t i; + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen + sl; + } + + mlen = c; + slen = 0; + data = (unsigned char *) bstr__alloc (mlen); + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + if (i > 0) *this += sep; + *this += l.at(i); + } + } +} + +CBString::CBString (const struct CBStringList& l, char sep) { +int c; +size_t i; + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen + 1; + } + + mlen = c; + slen = 0; + data = (unsigned char *) bstr__alloc (mlen); + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + if (i > 0) *this += sep; + *this += l.at(i); + } + } +} + +CBString::CBString (const struct CBStringList& l, unsigned char sep) { +int c; +size_t i; + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen + 1; + } + + mlen = c; + slen = 0; + data = (unsigned char *) bstr__alloc (mlen); + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + if (i > 0) *this += sep; + *this += l.at(i); + } + } +} + +void CBString::join (const struct CBStringList& l) { +int c; +size_t i; + + if (mlen <= 0) { + bstringThrow ("Write protection error"); + } + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen; + if (c < 0) bstringThrow ("Failure in (CBStringList) constructor, too long"); + } + + alloc (c); + slen = 0; + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + *this += l.at(i); + } + } +} + +void CBString::join (const struct CBStringList& l, const CBString& sep) { +int c, sl = sep.length(); +size_t i; + + if (mlen <= 0) { + bstringThrow ("Write protection error"); + } + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen + sl; + if (c < sl) bstringThrow ("Failure in (CBStringList) constructor, too long"); + } + + alloc (c); + slen = 0; + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + if (i > 0) *this += sep; + *this += l.at(i); + } + } +} + + +void CBString::join (const struct CBStringList& l, char sep) { +int c; +size_t i; + + if (mlen <= 0) { + bstringThrow ("Write protection error"); + } + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen + 1; + if (c <= 0) bstringThrow ("Failure in (CBStringList) constructor, too long"); + } + + alloc (c); + slen = 0; + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + if (i > 0) *this += sep; + *this += l.at(i); + } + } +} + +void CBString::join (const struct CBStringList& l, unsigned char sep) { +int c; +size_t i; + + if (mlen <= 0) { + bstringThrow ("Write protection error"); + } + + for (c=1, i=0; i < l.size(); i++) { + c += l.at(i).slen + 1; + if (c <= 0) bstringThrow ("Failure in (CBStringList) constructor, too long"); + } + + alloc (c); + slen = 0; + if (!data) { + mlen = slen = 0; + bstringThrow ("Failure in (CBStringList) constructor"); + } else { + for (i=0; i < l.size(); i++) { + if (i > 0) *this += sep; + *this += l.at(i); + } + } +} + +// Split functions. + +void CBStringList::split (const CBString& b, unsigned char splitChar) { +int p, i; + + p = 0; + do { + for (i = p; i < b.length (); i++) { + if (b.character (i) == splitChar) break; + } + if (i >= p) this->push_back (CBString (&(b.data[p]), i - p)); + p = i + 1; + } while (p <= b.length ()); +} + +void CBStringList::split (const CBString& b, const CBString& s) { +struct { unsigned long content[(1 << CHAR_BIT) / 32]; } chrs; +unsigned char c; +int p, i; + + if (s.length() == 0) bstringThrow ("Null splitstring failure"); + if (s.length() == 1) { + this->split (b, s.character (0)); + } else { + + for (i=0; i < ((1 << CHAR_BIT) / 32); i++) chrs.content[i] = 0x0; + for (i=0; i < s.length(); i++) { + c = s.character (i); + chrs.content[c >> 5] |= ((long)1) << (c & 31); + } + + p = 0; + do { + for (i = p; i < b.length (); i++) { + c = b.character (i); + if (chrs.content[c >> 5] & ((long)1) << (c & 31)) break; + } + if (i >= p) this->push_back (CBString (&(b.data[p]), i - p)); + p = i + 1; + } while (p <= b.length ()); + } +} + +void CBStringList::splitstr (const CBString& b, const CBString& s) { +int p, i; + + if (s.length() == 1) { + this->split (b, s.character (0)); + } else if (s.length() == 0) { + for (i=0; i < b.length (); i++) { + this->push_back (CBString (b.data[i])); + } + } else { + for (p=0; (i = b.find (s, p)) >= 0; p = i + s.length ()) { + this->push_back (b.midstr (p, i - p)); + } + if (p <= b.length ()) { + this->push_back (b.midstr (p, b.length () - p)); + } + } +} + +static int streamSplitCb (void * parm, int ofs, const_bstring entry) { +CBStringList * r = (CBStringList *) parm; + + ofs = ofs; + r->push_back (CBString (*entry)); + return 0; +} + +void CBStringList::split (const CBStream& b, const CBString& s) { + if (0 > bssplitscb (b.m_s, (bstring) &s, streamSplitCb, + (void *) this)) { + bstringThrow ("Split bstream failure"); + } +} + +void CBStringList::split (const CBStream& b, unsigned char splitChar) { +CBString sc (splitChar); + if (0 > bssplitscb (b.m_s, (bstring) &sc, + streamSplitCb, (void *) this)) { + bstringThrow ("Split bstream failure"); + } +} + +void CBStringList::splitstr (const CBStream& b, const CBString& s) { + if (0 > bssplitstrcb (b.m_s, (bstring) &s, streamSplitCb, + (void *) this)) { + bstringThrow ("Split bstream failure"); + } +} + +#endif + +#if defined(BSTRLIB_CAN_USE_IOSTREAM) + +std::ostream& operator << (std::ostream& sout, CBString b) { + return sout.write ((const char *)b, b.length()); +} + +#include + +static int istreamGets (void * parm) { + char c = '\n'; + ((std::istream *)parm)->get(c); + if (isspace (c)) c = '\n'; + return c; +} + +std::istream& operator >> (std::istream& sin, CBString& b) { + do { + b.gets ((bNgetc) istreamGets, &sin, '\n'); + if (b.slen > 0 && b.data[b.slen-1] == '\n') b.slen--; + } while (b.slen == 0 && !sin.eof ()); + return sin; +} + +struct sgetc { + std::istream * sin; + char terminator; +}; + +static int istreamGetc (void * parm) { + char c = ((struct sgetc *)parm)->terminator; + ((struct sgetc *)parm)->sin->get(c); + return c; +} + +std::istream& getline (std::istream& sin, CBString& b, char terminator) { +struct sgetc parm; + parm.sin = &sin; + parm.terminator = terminator; + b.gets ((bNgetc) istreamGetc, &parm, terminator); + if (b.slen > 0 && b.data[b.slen-1] == terminator) b.slen--; + return sin; +} + +#endif + +CBStream::CBStream (bNread readPtr, void * parm) { + m_s = bsopen (readPtr, parm); +} + +CBStream::~CBStream () { + bsclose (m_s); +} + +int CBStream::buffLengthSet (int sz) { + if (sz <= 0) { + bstringThrow ("buffLengthSet parameter failure"); + } + return bsbufflength (m_s, sz); +} + +int CBStream::buffLengthGet () { + return bsbufflength (m_s, 0); +} + +CBString CBStream::readLine (char terminator) { + CBString ret(""); + if (0 > bsreadln ((bstring) &ret, m_s, terminator) && eof () < 0) { + bstringThrow ("Failed readLine"); + } + return ret; +} + +CBString CBStream::readLine (const CBString& terminator) { + CBString ret(""); + if (0 > bsreadlns ((bstring) &ret, m_s, (bstring) &terminator) && eof () < 0) { + bstringThrow ("Failed readLine"); + } + return ret; +} + +void CBStream::readLine (CBString& s, char terminator) { + if (0 > bsreadln ((bstring) &s, m_s, terminator) && eof () < 0) { + bstringThrow ("Failed readLine"); + } +} + +void CBStream::readLine (CBString& s, const CBString& terminator) { + if (0 > bsreadlns ((bstring) &s, m_s, (bstring) &terminator) && eof () < 0) { + bstringThrow ("Failed readLine"); + } +} + +void CBStream::readLineAppend (CBString& s, char terminator) { + if (0 > bsreadlna ((bstring) &s, m_s, terminator) && eof () < 0) { + bstringThrow ("Failed readLineAppend"); + } +} + +void CBStream::readLineAppend (CBString& s, const CBString& terminator) { + if (0 > bsreadlnsa ((bstring) &s, m_s, (bstring) &terminator) && eof () < 0) { + bstringThrow ("Failed readLineAppend"); + } +} + +#define BS_BUFF_SZ (1024) + +CBString CBStream::read () { + CBString ret(""); + while (!bseof (m_s)) { + if (0 > bsreada ((bstring) &ret, m_s, BS_BUFF_SZ) && eof () < 0) { + bstringThrow ("Failed read"); + } + } + return ret; +} + +CBString& CBStream::operator >> (CBString& s) { + while (!bseof (m_s)) { + if (0 > bsreada ((bstring) &s, m_s, BS_BUFF_SZ) && eof () < 0) { + bstringThrow ("Failed read"); + } + } + return s; +} + +CBString CBStream::read (int n) { + CBString ret(""); + if (0 > bsread ((bstring) &ret, m_s, n) && eof () < 0) { + bstringThrow ("Failed read"); + } + return ret; +} + +void CBStream::read (CBString& s) { + s.slen = 0; + while (!bseof (m_s)) { + if (0 > bsreada ((bstring) &s, m_s, BS_BUFF_SZ)) { + bstringThrow ("Failed read"); + } + } +} + +void CBStream::read (CBString& s, int n) { + if (0 > bsread ((bstring) &s, m_s, n)) { + bstringThrow ("Failed read"); + } +} + +void CBStream::readAppend (CBString& s) { + while (!bseof (m_s)) { + if (0 > bsreada ((bstring) &s, m_s, BS_BUFF_SZ)) { + bstringThrow ("Failed readAppend"); + } + } +} + +void CBStream::readAppend (CBString& s, int n) { + if (0 > bsreada ((bstring) &s, m_s, n)) { + bstringThrow ("Failed readAppend"); + } +} + +void CBStream::unread (const CBString& s) { + if (0 > bsunread (m_s, (bstring) &s)) { + bstringThrow ("Failed unread"); + } +} + +CBString CBStream::peek () const { + CBString ret (""); + if (0 > bspeek ((bstring) &ret, m_s)) { + bstringThrow ("Failed peek"); + } + return ret; +} + +void CBStream::peek (CBString& s) const { + s.slen = 0; + if (0 > bspeek ((bstring) &s, m_s)) { + bstringThrow ("Failed peek"); + } +} + +void CBStream::peekAppend (CBString& s) const { + if (0 > bspeek ((bstring) &s, m_s)) { + bstringThrow ("Failed peekAppend"); + } +} + +int CBStream::eof () const { + int ret = bseof (m_s); + if (0 > ret) { + bstringThrow ("Failed eof"); + } + return ret; +} + +} // namespace Bstrlib diff --git a/bstrlib/bstrwrap.h b/bstrlib/bstrwrap.h new file mode 100644 index 0000000..dcb0ecc --- /dev/null +++ b/bstrlib/bstrwrap.h @@ -0,0 +1,446 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * bstrwrap.h + * + * This file is the C++ wrapper for the bstring functions. + */ + +#ifndef BSTRWRAP_INCLUDE +#define BSTRWRAP_INCLUDE + +/////////////////// Configuration defines ////////////////////////////// + +// WATCOM C/C++ has broken STL and std::iostream support. If you have +// ported over STLport, then you can #define BSTRLIB_CAN_USE_STL to use +// the CBStringList class. +#if defined(__WATCOMC__) +# if !defined (BSTRLIB_CAN_USE_STL) && !defined (BSTRLIB_CANNOT_USE_STL) +# define BSTRLIB_CANNOT_USE_STL +# endif +# if !defined (BSTRLIB_CAN_USE_IOSTREAM) && !defined (BSTRLIB_CANNOT_USE_IOSTREAM) +# define BSTRLIB_CANNOT_USE_IOSTREAM +# endif +#endif + +// By default it assumed that STL has been installed and works for your +// compiler. If this is not the case, then #define BSTRLIB_CANNOT_USE_STL +#if !defined (BSTRLIB_CANNOT_USE_STL) && !defined (BSTRLIB_CAN_USE_STL) +#define BSTRLIB_CAN_USE_STL +#endif + +// By default it assumed that std::iostream works well with your compiler. +// If this is not the case, then #define BSTRLIB_CAN_USE_IOSTREAM +#if !defined (BSTRLIB_CANNOT_USE_IOSTREAM) && !defined (BSTRLIB_CAN_USE_IOSTREAM) +#define BSTRLIB_CAN_USE_IOSTREAM +#endif + +// By default it is assumed that your compiler can deal with and has enabled +// exception handlling. If this is not the case then you will need to +// #define BSTRLIB_DOESNT_THROW_EXCEPTIONS +#if !defined (BSTRLIB_THROWS_EXCEPTIONS) && !defined (BSTRLIB_DOESNT_THROW_EXCEPTIONS) +#define BSTRLIB_THROWS_EXCEPTIONS +#endif + +//////////////////////////////////////////////////////////////////////// + +#include +#include "bstrlib.h" +#include "../common/ubytearray.h" + +#ifdef __cplusplus + +#if defined(BSTRLIB_CAN_USE_STL) + +#if defined(__WATCOMC__) +#pragma warning 604 10 +#pragma warning 595 10 +#pragma warning 594 10 +#pragma warning 549 10 +#endif + +#include +#include + +#if defined(__WATCOMC__) +#pragma warning 604 9 +#pragma warning 595 9 +#pragma warning 594 9 +#endif + +#endif + +namespace Bstrlib { + +#ifdef BSTRLIB_THROWS_EXCEPTIONS +#if defined(BSTRLIB_CAN_USE_STL) +struct CBStringException : public std::exception { +private: + std::string msg; +public: + CBStringException (const std::string inmsg) : msg(inmsg) {} + virtual ~CBStringException () throw () {} + virtual const char *what () const throw () { return msg.c_str(); } +}; +#else +struct CBStringException { +private: + char * msg; + int needToFree; +public: + CBStringException (const char * inmsg) : needToFree(0) { + if (inmsg) { + msg = (char *) malloc (1 + strlen (inmsg)); + if (NULL == msg) msg = "Out of memory"; + else { + strcpy (msg, inmsg); + needToFree = 1; + } + } else { + msg = "NULL exception message"; + } + } + virtual ~CBStringException () throw () { + if (needToFree) { + free (msg); + needToFree = 0; + msg = NULL; + } + } + virtual const char *what () const throw () { return msg; } +}; +#endif +#define bstringThrow(er) {\ + CBStringException bstr__cppwrapper_exception ("CBString::" er "");\ + throw bstr__cppwrapper_exception;\ +} +#else +#define bstringThrow(er) {} +#endif + +struct CBString; + +#ifdef _MSC_VER +#pragma warning(disable:4512) +#endif + +class CBCharWriteProtected { +friend struct CBString; + private: + const struct tagbstring& s; + unsigned int idx; + CBCharWriteProtected (const struct tagbstring& c, int i) : s(c), idx((unsigned int)i) { + if (idx >= (unsigned) s.slen) { + bstringThrow ("character index out of bounds"); + } + } + + public: + + inline char operator = (char c) { + if (s.mlen <= 0) { + bstringThrow ("Write protection error"); + } else { +#ifndef BSTRLIB_THROWS_EXCEPTIONS + if (idx >= (unsigned) s.slen) return '\0'; +#endif + s.data[idx] = (unsigned char) c; + } + return (char) s.data[idx]; + } + inline unsigned char operator = (unsigned char c) { + if (s.mlen <= 0) { + bstringThrow ("Write protection error"); + } else { +#ifndef BSTRLIB_THROWS_EXCEPTIONS + if (idx >= (unsigned) s.slen) return '\0'; +#endif + s.data[idx] = c; + } + return s.data[idx]; + } + inline operator unsigned char () const { +#ifndef BSTRLIB_THROWS_EXCEPTIONS + if (idx >= (unsigned) s.slen) return (unsigned char) '\0'; +#endif + return s.data[idx]; + } +}; + +struct CBString : public tagbstring { + + // Constructors + CBString (); + CBString (char c); + CBString (unsigned char c); + CBString (const char *s); + CBString (int len, const char *s); + CBString (const CBString& b); + CBString (const tagbstring& x); + CBString (char c, int len); + CBString (const void * blk, int len); + +#if defined(BSTRLIB_CAN_USE_STL) + CBString (const struct CBStringList& l); + CBString (const struct CBStringList& l, const CBString& sep); + CBString (const struct CBStringList& l, char sep); + CBString (const struct CBStringList& l, unsigned char sep); +#endif + + // Destructor +#if !defined(BSTRLIB_DONT_USE_VIRTUAL_DESTRUCTOR) + virtual +#endif + ~CBString (); + + // = operator + const CBString& operator = (char c); + const CBString& operator = (unsigned char c); + const CBString& operator = (const char *s); + const CBString& operator = (const CBString& b); + const CBString& operator = (const tagbstring& x); + + // += operator + const CBString& operator += (char c); + const CBString& operator += (unsigned char c); + const CBString& operator += (const char *s); + const CBString& operator += (const CBString& b); + const CBString& operator += (const tagbstring& x); + + // *= operator + inline const CBString& operator *= (int count) { + this->repeat (count); + return *this; + } + + // + operator + const CBString operator + (char c) const; + const CBString operator + (unsigned char c) const; + const CBString operator + (const unsigned char *s) const; + const CBString operator + (const char *s) const; + const CBString operator + (const CBString& b) const; + const CBString operator + (const tagbstring& x) const; + + // * operator + inline const CBString operator * (int count) const { + CBString retval (*this); + retval.repeat (count); + return retval; + } + + // Comparison operators + bool operator == (const CBString& b) const; + bool operator == (const char * s) const; + bool operator == (const unsigned char * s) const; + bool operator != (const CBString& b) const; + bool operator != (const char * s) const; + bool operator != (const unsigned char * s) const; + bool operator < (const CBString& b) const; + bool operator < (const char * s) const; + bool operator < (const unsigned char * s) const; + bool operator <= (const CBString& b) const; + bool operator <= (const char * s) const; + bool operator <= (const unsigned char * s) const; + bool operator > (const CBString& b) const; + bool operator > (const char * s) const; + bool operator > (const unsigned char * s) const; + bool operator >= (const CBString& b) const; + bool operator >= (const char * s) const; + bool operator >= (const unsigned char * s) const; + + // Casts + inline operator const char* () const { return (const char *)data; } + inline operator const unsigned char* () const { return (const unsigned char *)data; } + operator double () const; + operator float () const; + operator int () const; + operator unsigned int () const; + + // Accessors + inline int length () const {return slen;} + + inline unsigned char character (int i) const { + if (((unsigned) i) >= (unsigned) slen) { +#ifdef BSTRLIB_THROWS_EXCEPTIONS + bstringThrow ("character idx out of bounds"); +#else + return '\0'; +#endif + } + return data[i]; + } + inline unsigned char operator [] (int i) const { return character(i); } + + inline CBCharWriteProtected character (int i) { + return CBCharWriteProtected (*this, i); + } + inline CBCharWriteProtected operator [] (int i) { return character(i); } + + // Space allocation hint method. + void alloc (int length); + + // Search methods. + int caselessEqual (const CBString& b) const; + int caselessCmp (const CBString& b) const; + int find (const CBString& b, int pos = 0) const; + int find (const char * b, int pos = 0) const; + int caselessfind (const CBString& b, int pos = 0) const; + int caselessfind (const char * b, int pos = 0) const; + int find (char c, int pos = 0) const; + int reversefind (const CBString& b, int pos) const; + int reversefind (const char * b, int pos) const; + int caselessreversefind (const CBString& b, int pos) const; + int caselessreversefind (const char * b, int pos) const; + int reversefind (char c, int pos) const; + int findchr (const CBString& b, int pos = 0) const; + int findchr (const char * s, int pos = 0) const; + int reversefindchr (const CBString& b, int pos) const; + int reversefindchr (const char * s, int pos) const; + int nfindchr (const CBString& b, int pos = 0) const; + int nfindchr (const char * b, int pos = 0) const; + int nreversefindchr (const CBString& b, int pos) const; + int nreversefindchr (const char * b, int pos) const; + + // Search and substitute methods. + void findreplace (const CBString& find, const CBString& repl, int pos = 0); + void findreplace (const CBString& find, const char * repl, int pos = 0); + void findreplace (const char * find, const CBString& repl, int pos = 0); + void findreplace (const char * find, const char * repl, int pos = 0); + void findreplacecaseless (const CBString& find, const CBString& repl, int pos = 0); + void findreplacecaseless (const CBString& find, const char * repl, int pos = 0); + void findreplacecaseless (const char * find, const CBString& repl, int pos = 0); + void findreplacecaseless (const char * find, const char * repl, int pos = 0); + + // Extraction method. + const CBString midstr (int left, int len) const; + + // Standard manipulation methods. + void setsubstr (int pos, const CBString& b, unsigned char fill = ' '); + void setsubstr (int pos, const char * b, unsigned char fill = ' '); + void insert (int pos, const CBString& b, unsigned char fill = ' '); + void insert (int pos, const char * b, unsigned char fill = ' '); + void insertchrs (int pos, int len, unsigned char fill = ' '); + void replace (int pos, int len, const CBString& b, unsigned char fill = ' '); + void replace (int pos, int len, const char * s, unsigned char fill = ' '); + void remove (int pos, int len); + void trunc (int len); + + // Miscellaneous methods. + void format (const char * fmt, ...); + void formata (const char * fmt, ...); + void fill (int length, unsigned char fill = ' '); + void repeat (int count); + void ltrim (const CBString& b = CBString (bsStaticBlkParms (" \t\v\f\r\n"))); + void rtrim (const CBString& b = CBString (bsStaticBlkParms (" \t\v\f\r\n"))); + inline void trim (const CBString& b = CBString (bsStaticBlkParms (" \t\v\f\r\n"))) { + rtrim (b); + ltrim (b); + } + void toupper (); + void tolower (); + + // Write protection methods. + void writeprotect (); + void writeallow (); + inline bool iswriteprotected () const { return mlen <= 0; } + + // Join methods. +#if defined(BSTRLIB_CAN_USE_STL) + void join (const struct CBStringList& l); + void join (const struct CBStringList& l, const CBString& sep); + void join (const struct CBStringList& l, char sep); + void join (const struct CBStringList& l, unsigned char sep); +#endif + + // CBStream methods + int gets (bNgetc getcPtr, void * parm, char terminator = '\n'); + int read (bNread readPtr, void * parm); + + // QString compatibility methods + bool isEmpty() const { return slen == 0; } + void clear() { *this = ""; } + static CBString fromUtf16(const ushort* str) { // Naive implementation assuming that only ASCII part of UCS2 is used + CBString msg; while (*str) { msg += *(char*)str; str++; } return msg; + } + CBString leftJustified(int length) { if (length > slen) { return *this + CBString(' ', length - slen); } return *this; } +}; +extern const CBString operator + (const char *a, const CBString& b); +extern const CBString operator + (const unsigned char *a, const CBString& b); +extern const CBString operator + (char c, const CBString& b); +extern const CBString operator + (unsigned char c, const CBString& b); +extern const CBString operator + (const tagbstring& x, const CBString& b); +inline const CBString operator * (int count, const CBString& b) { + CBString retval (b); + retval.repeat (count); + return retval; +} + +#if defined(BSTRLIB_CAN_USE_IOSTREAM) +extern std::ostream& operator << (std::ostream& sout, CBString b); +extern std::istream& operator >> (std::istream& sin, CBString& b); +extern std::istream& getline (std::istream& sin, CBString& b, char terminator='\n'); +#endif + +struct CBStream { +friend struct CBStringList; +private: + struct bStream * m_s; +public: + CBStream (bNread readPtr, void * parm); + ~CBStream (); + int buffLengthSet (int sz); + int buffLengthGet (); + int eof () const; + + CBString readLine (char terminator); + CBString readLine (const CBString& terminator); + void readLine (CBString& s, char terminator); + void readLine (CBString& s, const CBString& terminator); + void readLineAppend (CBString& s, char terminator); + void readLineAppend (CBString& s, const CBString& terminator); + + CBString read (); + CBString& operator >> (CBString& s); + + CBString read (int n); + void read (CBString& s); + void read (CBString& s, int n); + void readAppend (CBString& s); + void readAppend (CBString& s, int n); + + void unread (const CBString& s); + inline CBStream& operator << (const CBString& s) { + this->unread (s); + return *this; + } + + CBString peek () const; + void peek (CBString& s) const; + void peekAppend (CBString& s) const; +}; + +#if defined(BSTRLIB_CAN_USE_STL) +struct CBStringList : public std::vector { + // split a string into a vector of strings. + void split (const CBString& b, unsigned char splitChar); + void split (const CBString& b, const CBString& s); + void splitstr (const CBString& b, const CBString& s); + void split (const CBStream& b, unsigned char splitChar); + void split (const CBStream& b, const CBString& s); + void splitstr (const CBStream& b, const CBString& s); +}; +#endif + +} // namespace Bstrlib + +#if !defined (BSTRLIB_DONT_ASSUME_NAMESPACE) +using namespace Bstrlib; +#endif + +#endif +#endif diff --git a/bstrlib/buniutil.c b/bstrlib/buniutil.c new file mode 100644 index 0000000..f8d38e5 --- /dev/null +++ b/bstrlib/buniutil.c @@ -0,0 +1,274 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * buniutil.c + * + * This file is not necessarily part of the core bstring library itself, but + * is just an implementation of basic utf8 processing for bstrlib. Note that + * this module is dependent upon bstrlib.c and utf8util.c + */ + +#include "bstrlib.h" +#include "buniutil.h" + +#define UNICODE__CODE_POINT__REPLACEMENT_CHARACTER (0xFFFDL) + +/* int buIsUTF8Content (const_bstring bu) + * + * Scan string and return 1 if its entire contents is entirely UTF8 code + * points. Otherwise return 0. + */ +int buIsUTF8Content (const_bstring bu) { +struct utf8Iterator iter; + + if (NULL == bdata (bu)) return 0; + for (utf8IteratorInit (&iter, bu->data, bu->slen); + iter.next < iter.slen;) { + if (0 >= utf8IteratorGetNextCodePoint (&iter, -1)) return 0; + } + return 1; +} + +/* int buGetBlkUTF16 (cpUcs2* ucs2, int len, cpUcs4 errCh, const_bstring bu, + * int pos) + * + * Convert a string of UTF8 codepoints (bu) skipping the first pos, into a + * sequence of UTF16 encoded code points. Returns the number of UCS2 16-bit + * words written to the output. No more than len words are written to the + * target array ucs2. If any code point in bu is unparsable, it will be + * translated to errCh. + */ +int buGetBlkUTF16 (/* @out */ cpUcs2* ucs2, int len, cpUcs4 errCh, const_bstring bu, int pos) { +struct tagbstring t; +struct utf8Iterator iter; +cpUcs4 ucs4; +int i, j; + + if (!isLegalUnicodeCodePoint (errCh)) errCh = UNICODE__CODE_POINT__REPLACEMENT_CHARACTER; + if (NULL == ucs2 || 0 >= len || NULL == bdata (bu) || 0 > pos) return BSTR_ERR; + + for (j=0, i=0; j < bu->slen; j++) { + if (0x80 != (0xC0 & bu->data[j])) { + if (i >= pos) break; + i++; + } + } + + t.mlen = -1; + t.data = bu->data + j; + t.slen = bu->slen - j; + + utf8IteratorInit (&iter, t.data, t.slen); + + ucs4 = BSTR_ERR; + for (i=0; 0 < len && iter.next < iter.slen && + 0 <= (ucs4 = utf8IteratorGetNextCodePoint (&iter, errCh)); i++) { + if (ucs4 < 0x10000) { + *ucs2++ = (cpUcs2) ucs4; + len--; + } else { + if (len < 2) { + *ucs2++ = UNICODE__CODE_POINT__REPLACEMENT_CHARACTER; + len--; + } else { + long y = ucs4 - 0x10000; + ucs2[0] = (cpUcs2) (0xD800 | (y >> 10)); + ucs2[1] = (cpUcs2) (0xDC00 | (y & 0x03FF)); + len -= 2; + ucs2 += 2; + i++; + } + } + } + while (0 < len) { + *ucs2++ = 0; + len--; + } + + utf8IteratorUninit (&iter); + if (0 > ucs4) return BSTR_ERR; + return i; +} + +/* + +Unicode UTF-8 +------- ----- +U-00000000 - U-0000007F: 0xxxxxxx +U-00000080 - U-000007FF: 110xxxxx 10xxxxxx +U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx +U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx +U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + +UTF-32: U-000000 - U-10FFFF + +*/ + +/* int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, cpUcs4 errCh) + * + * Convert an array of UCS4 code points (bu) to UTF8 codepoints b. Any + * invalid code point is replaced by errCh. If errCh is itself not a + * valid code point, then this translation will halt upon the first error + * and return BSTR_ERR. Otherwise BSTR_OK is returned. + */ +int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, cpUcs4 errCh) { +int i, oldSlen; + + if (NULL == bu || NULL == b || 0 > len || 0 > (oldSlen = blengthe (b, -1))) return BSTR_ERR; + if (!isLegalUnicodeCodePoint (errCh)) errCh = ~0; + + for (i=0; i < len; i++) { + unsigned char c[6]; + cpUcs4 v = bu[i]; + + if (!isLegalUnicodeCodePoint (v)) { + if (~0 == errCh) { + b->slen = oldSlen; + return BSTR_ERR; + } + v = errCh; + } + + if (v < 0x80) { + if (BSTR_OK != bconchar (b, (char) v)) { + b->slen = oldSlen; + return BSTR_ERR; + } + } else if (v < 0x800) { + c[0] = (unsigned char) ( (v >> 6) + 0xc0); + c[1] = (unsigned char) (( v & 0x3f) + 0x80); + if (BSTR_OK != bcatblk (b, c, 2)) { + b->slen = oldSlen; + return BSTR_ERR; + } + } else if (v < 0x10000) { + c[0] = (unsigned char) ( (v >> 12) + 0xe0); + c[1] = (unsigned char) (((v >> 6) & 0x3f) + 0x80); + c[2] = (unsigned char) (( v & 0x3f) + 0x80); + if (BSTR_OK != bcatblk (b, c, 3)) { + b->slen = oldSlen; + return BSTR_ERR; + } + } else +#if 0 + if (v < 0x200000) +#endif + { + c[0] = (unsigned char) ( (v >> 18) + 0xf0); + c[1] = (unsigned char) (((v >> 12) & 0x3f) + 0x80); + c[2] = (unsigned char) (((v >> 6) & 0x3f) + 0x80); + c[3] = (unsigned char) (( v & 0x3f) + 0x80); + if (BSTR_OK != bcatblk (b, c, 4)) { + b->slen = oldSlen; + return BSTR_ERR; + } + } +#if 0 + else if (v < 0x4000000) { + c[0] = (unsigned char) ( (v >> 24) + 0xf8); + c[1] = (unsigned char) (((v >> 18) & 0x3f) + 0x80); + c[2] = (unsigned char) (((v >> 12) & 0x3f) + 0x80); + c[3] = (unsigned char) (((v >> 6) & 0x3f) + 0x80); + c[4] = (unsigned char) (( v & 0x3f) + 0x80); + if (BSTR_OK != bcatblk (b, c, 5)) { + b->slen = oldSlen; + return BSTR_ERR; + } + } else { + c[0] = (unsigned char) ( (v >> 30) + 0xfc); + c[1] = (unsigned char) (((v >> 24) & 0x3f) + 0x80); + c[2] = (unsigned char) (((v >> 18) & 0x3f) + 0x80); + c[3] = (unsigned char) (((v >> 12) & 0x3f) + 0x80); + c[4] = (unsigned char) (((v >> 6) & 0x3f) + 0x80); + c[5] = (unsigned char) (( v & 0x3f) + 0x80); + if (BSTR_OK != bcatblk (b, c, 6)) { + b->slen = oldSlen; + return BSTR_ERR; + } + } +#endif + } + return BSTR_OK; +} + +#define endSwap(cs,mode) ((mode) ? ((((cs) & 0xFF) << 8) | (((cs) >> 8) & 0xFF)) : (cs)) +#define TEMP_UCS4_BUFFER_SIZE (64) + +/* int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len, + * cpUcs2* bom, cpUcs4 errCh) + * + * Append an array of UCS2 code points (utf16) to UTF8 codepoints (bu). Any + * invalid code point is replaced by errCh. If errCh is itself not a + * valid code point, then this translation will halt upon the first error + * and return BSTR_ERR. Otherwise BSTR_OK is returned. If a byte order mark + * has been previously read, it may be passed in as bom, otherwise if *bom is + * set to 0, it will be filled in with the BOM as read from the first + * character if it is a BOM. + */ +int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len, cpUcs2* bom, cpUcs4 errCh) { +cpUcs4 buff[TEMP_UCS4_BUFFER_SIZE]; +int cc, i, sm, oldSlen; + + if (NULL == bdata(bu) || NULL == utf16 || len < 0) return BSTR_ERR; + if (!isLegalUnicodeCodePoint (errCh)) errCh = ~0; + if (len == 0) return BSTR_OK; + + oldSlen = bu->slen; + i = 0; + + /* Check for BOM character and select endianess. Also remove the + BOM from the stream, since there is no need for it in a UTF-8 encoding. */ + if (bom && (cpUcs2) 0xFFFE == *bom) { + sm = 8; + } else if (bom && (cpUcs2) 0xFEFF == *bom) { + sm = 0; + } else if (utf16[i] == (cpUcs2) 0xFFFE) { + if (bom) *bom = utf16[i]; + sm = 8; + i++; + } else if (utf16[i] == (cpUcs2) 0xFEFF) { + if (bom) *bom = utf16[i]; + sm = 0; + i++; + } else { + sm = 0; /* Assume local endianness. */ + } + + cc = 0; + for (;i < len; i++) { + cpUcs4 c, v; + v = endSwap (utf16[i], sm); + + if ((v | 0x7FF) == 0xDFFF) { /* Deal with surrogate pairs */ + if (v >= 0xDC00 || i >= len) { + ErrMode:; + if (~0 == errCh) { + ErrReturn:; + bu->slen = oldSlen; + return BSTR_ERR; + } + v = errCh; + } else { + i++; + if ((c = endSwap (utf16[i], sm) - 0xDC00) > 0x3FF) goto ErrMode; + v = ((v - 0xD800) << 10) + c + 0x10000; + } + } + buff[cc] = v; + cc++; + if (cc >= TEMP_UCS4_BUFFER_SIZE) { + if (0 > buAppendBlkUcs4 (bu, buff, cc, errCh)) goto ErrReturn; + cc = 0; + } + } + if (cc > 0 && 0 > buAppendBlkUcs4 (bu, buff, cc, errCh)) goto ErrReturn; + + return BSTR_OK; +} diff --git a/bstrlib/buniutil.h b/bstrlib/buniutil.h new file mode 100644 index 0000000..1017212 --- /dev/null +++ b/bstrlib/buniutil.h @@ -0,0 +1,37 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * buniutil.h + * + * This file is the interface for the buniutil basic "Unicode for bstrings" + * functions. Note that there are dependencies on bstrlib.h and utf8util.h . + */ + +#ifndef BSTRLIB_UNICODE_UTILITIES +#define BSTRLIB_UNICODE_UTILITIES + +#include "utf8util.h" +#include "bstrlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int buIsUTF8Content (const_bstring bu); +extern int buAppendBlkUcs4 (bstring b, const cpUcs4* bu, int len, cpUcs4 errCh); + +/* For those unfortunate enough to be stuck supporting UTF16. */ +extern int buGetBlkUTF16 (/* @out */ cpUcs2* ucs2, int len, cpUcs4 errCh, const_bstring bu, int pos); +extern int buAppendBlkUTF16 (bstring bu, const cpUcs2* utf16, int len, cpUcs2* bom, cpUcs4 errCh); + +#ifdef __cplusplus +} +#endif + +#endif /* BSTRLIB_UNICODE_UTILITIES */ + diff --git a/bstrlib/gpl.txt b/bstrlib/gpl.txt new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/bstrlib/gpl.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/bstrlib/porting.txt b/bstrlib/porting.txt new file mode 100644 index 0000000..11d8d13 --- /dev/null +++ b/bstrlib/porting.txt @@ -0,0 +1,172 @@ +Better String library Porting Guide +----------------------------------- + +by Paul Hsieh + +The bstring library is an attempt to provide improved string processing +functionality to the C and C++ language. At the heart of the bstring library +is the management of "bstring"s which are a significant improvement over '\0' +terminated char buffers. See the accompanying documenation file bstrlib.txt +for more information. + +=============================================================================== + +Identifying the Compiler +------------------------ + +Bstrlib has been tested on the following compilers: + + Microsoft Visual C++ + Watcom C/C++ (32 bit flat) + Intel's C/C++ compiler (on Windows) + The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64) + Borland C++ + Turbo C + +There are slight differences in these compilers which requires slight +differences in the implementation of Bstrlib. These are accomodated in the +same sources using #ifdef/#if defined() on compiler specific macros. To +port Bstrlib to a new compiler not listed above, it is recommended that the +same strategy be followed. If you are unaware of the compiler specific +identifying preprocessor macro for your compiler you might find it here: + +http://predef.sourceforge.net/precomp.html + +Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER. + +16-bit vs. 32-bit vs. 64-bit Systems +------------------------------------ + +Bstrlib has been architected to deal with strings of length between 0 and +INT_MAX (inclusive). Since the values of int are never higher than size_t +there will be no issue here. Note that on most 64-bit systems int is 32-bit. + +Dependency on The C-Library +--------------------------- + +Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and +vsnprintf. Many free standing C compiler implementations that have a mode in +which the C library is not available will typically not include these +functions which will make porting Bstrlib to it onerous. Bstrlib is not +designed for such bare bones compiler environments. This usually includes +compilers that target ROM environments. + +Porting Issues +-------------- + +Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there +are still a few porting issues. These are described below. + +1. The vsnprintf () function. + +Unfortunately, the earlier ANSI/ISO C standards did not include this function. +If the compiler of interest does not support this function then the +BSTRLIB_NOVSNP should be defined via something like: + + #if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) + # if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__) + # define BSTRLIB_NOVSNP + # endif + #endif + +which appears at the top of bstrlib.h. Note that the bformat(a) functions +will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If +the compiler has renamed vsnprintf() to some other named function, then +search for the definition of the exvsnprintf macro in bstrlib.c file and be +sure its defined appropriately: + + #if defined (__COMPILERVENDORSPECIFICMACRO__) + # define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);} + #else + # define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);} + #endif + +Take notice of the return value being captured in the variable r. It is +assumed that r exceeds n if and only if the underlying vsnprintf function has +determined what the true maximal output length would be for output if the +buffer were large enough to hold it. Non-modern implementations must output a +lesser number (the macro can and should be modified to ensure this). + +2. Weak C++ compiler. + +C++ is a much more complicated language to implement than C. This has lead +to varying quality of compiler implementations. The weaknesses isolated in +the initial ports are inclusion of the Standard Template Library, +std::iostream and exception handling. By default it is assumed that the C++ +compiler supports all of these things correctly. If your compiler does not +support one or more of these define the corresponding macro: + + BSTRLIB_CANNOT_USE_STL + BSTRLIB_CANNOT_USE_IOSTREAM + BSTRLIB_DOESNT_THROW_EXCEPTIONS + +The compiler specific detected macro should be defined at the top of +bstrwrap.h in the Configuration defines section. Note that these disabling +macros can be overrided with the associated enabling macro if a subsequent +version of the compiler gains support. (For example, its possible to rig +up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL +can be passed in as a compiler option.) + +3. The bsafe module, and reserved words. + +The bsafe module is in gross violation of the ANSI/ISO C standard in the +sense that it redefines what could be implemented as reserved words on a +given compiler. The typical problem is that a compiler may inline some of the +functions and thus not be properly overridden by the definitions in the bsafe +module. It is also possible that a compiler may prohibit the redefinitions in +the bsafe module. Compiler specific action will be required to deal with +these situations. + +Platform Specific Files +----------------------- + +The makefiles for the examples are basically setup of for particular +environments for each platform. In general these makefiles are not portable +and should be constructed as necessary from scratch for each platform. + +Testing a port +-------------- + +To test that a port compiles correctly do the following: + +1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and + bsafe modules. +2. Compile bstest against the bstrlib module. +3. Run bstest and ensure that 0 errors are reported. +4. Compile test against the bstrlib and bstrwrap modules. +5. Run test and ensure that 0 errors are reported. +6. Compile each of the examples (except for the "re" example, which may be + complicated and is not a real test of bstrlib and except for the mfcbench + example which is Windows specific.) +7. Run each of the examples. + +The builds must have 0 errors, and should have the absolute minimum number of +warnings (in most cases can be reduced to 0.) The result of execution should +be essentially identical on each platform. + +Performance +----------- + +Different CPU and compilers have different capabilities in terms of +performance. It is possible for Bstrlib to assume performance +characteristics that a platform doesn't have (since it was primarily +developed on just one platform). The goal of Bstrlib is to provide very good +performance on all platforms regardless of this but without resorting to +extreme measures (such as using assembly language, or non-portable intrinsics +or library extensions.) + +There are two performance benchmarks that can be found in the example/ +directory. They are: cbench.c and cppbench.cpp. These are variations and +expansions of a benchmark for another string library. They don't cover all +string functionality, but do include the most basic functions which will be +common in most string manipulation kernels. + +............................................................................... + +Feedback +-------- + +In all cases, you may email issues found to the primary author of Bstrlib at +the email address: websnarf@users.sourceforge.net + +=============================================================================== diff --git a/bstrlib/security.txt b/bstrlib/security.txt new file mode 100644 index 0000000..d3f5c90 --- /dev/null +++ b/bstrlib/security.txt @@ -0,0 +1,217 @@ +Better String library Security Statement +---------------------------------------- + +by Paul Hsieh + +=============================================================================== + +Introduction +------------ + +The Better String library (hereafter referred to as Bstrlib) is an attempt to +provide improved string processing functionality to the C and C++ languages. +At the heart of the Bstrlib is the management of "bstring"s which are a +significant improvement over '\0' terminated char buffers. See the +accompanying documenation file bstrlib.txt for more information. + +DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Like any software, there is always a possibility of failure due to a flawed +implementation. Nevertheless a good faith effort has been made to minimize +such flaws in Bstrlib. Use of Bstrlib by itself will not make an application +secure or free from implementation failures, however, it is the author's +conviction that use of Bstrlib can greatly facilitate the creation of +software meeting the highest possible standards of security. + +Part of the reason why this document has been created, is for the purpose of +security auditing, or the creation of further "Statements on Security" for +software that is created that uses Bstrlib. An auditor may check the claims +below against Bstrlib, and use this as a basis for analysis of software which +uses Bstrlib. + +=============================================================================== + +Statement on Security +--------------------- + +This is a document intended to give consumers of the Better String Library +who are interested in security an idea of where the Better String Library +stands on various security issues. Any deviation observed in the actual +library itself from the descriptions below should be considered an +implementation error, not a design flaw. + +This statement is not an analytical proof of correctness or an outline of one +but rather an assertion similar to a scientific claim or hypothesis. By use, +testing and open independent examination (otherwise known as scientific +falsifiability), the credibility of the claims made below can rise to the +level of an established theory. + +Common security issues: +....................... + +1. Buffer Overflows + +The Bstrlib API allows the programmer a way to deal with strings without +having to deal with the buffers containing them. Ordinary usage of the +Bstrlib API itself makes buffer overflows impossible. + +Furthermore, the Bstrlib API has a superset of basic string functionality as +compared to the C library's char * functions, C++'s std::string class and +Microsoft's MFC based CString class. It also has abstracted mechanisms for +dealing with IO. This is important as it gives developers a way of migrating +all their code from a functionality point of view. + +2. Memory size overflow/wrap around attack + +By design, Bstrlib is impervious to memory size overflow attacks. The +reason is that it detects length overflows and leads to a result error before +the operation attempts to proceed. Attempted conversions of char* strings +which may have lengths greater than INT_MAX are detected and the conversion +is aborted. If the memory to hold the string exceeds the available memory +for it, again, the result is aborted without changing the prior state of the +strings. + +3. Constant string protection + +Bstrlib implements runtime enforced constant and read-only string semantics. +I.e., bstrings which are declared as constant via the bsStatic() macro cannot +be modified or deallocated directly through the Bstrlib API, and this cannot +be subverted by casting or other type coercion. This is independent of the +use of the const_bstring data type. + +The Bstrlib C API uses the type const_bstring to specify bstring parameters +whose contents do not change. Although the C language cannot enforce this, +this is nevertheless guaranteed by the implementation of the Bstrlib library +of C functions. The C++ API enforces the const attribute on CBString types +correctly. + +4. Aliased bstring support + +Bstrlib detects and supports aliased parameter management throughout the API. +The kind of aliasing that is allowed is the one where pointers of the same +basic type may be pointing to overlapping objects (this is the assumption the +ANSI C99 specification makes.) Each function behaves as if all read-only +parameters were copied to temporaries which are used in their stead before +the function is enacted (it rarely actually does this). No function in the +Bstrlib uses the "restrict" parameter attribute from the ANSI C99 +specification. + +5. Information leaking + +In bstraux.h, using the semantically equivalent macros bSecureDestroy() and +bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively +will ensure that stale data does not linger in the heap's free space after +strings have been released back to memory. Created bstrings or CBStrings +are not linked to anything external to themselves, and thus cannot expose +deterministic data leaking. If a bstring is resized, the preimage may exist +as a copy that is released to the heap. Thus for sensitive data, the bstring +should be sufficiently presized before manipulated so that it is not resized. +bSecureInput() has been supplied in bstraux.c, which can be used to obtain +input securely without any risk of leaving any part of the input image in the +heap except for the allocated bstring that is returned. + +6. Memory leaking + +Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG +macro. User generated definitions for malloc, realloc and free can then be +supplied which can implement special strategies for memory corruption +detection or memory leaking. Otherwise, bstrlib does not do anything out of +the ordinary to attempt to deal with the standard problem of memory leaking +(i.e., losing references to allocated memory) when programming in the C and +C++ languages. However, it does not compound the problem any more than exists +either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib +does not preclude the use of automatic garbage collection mechanisms such as +the Boehm garbage collector. + +7. Encryption + +Bstrlib does not present any built-in encryption mechanism. However, it +supports full binary contents in its data buffers, so any standard block +based encryption mechanism can make direct use of bstrings/CBStrings for +buffer management. + +8. Double freeing + +Freeing a pointer that is already free is an extremely rare, but nevertheless +a potentially ruthlessly corrupting operation (its possible to cause Win 98 to +reboot, by calling free mulitiple times on already freed data using the WATCOM +CRT.) Bstrlib invalidates the bstring header data before freeing, so that in +many cases a double free will be detected and an error will be reported +(though this behaviour is not guaranteed and should not be relied on). + +Using bstrFree pervasively (instead of bdestroy) can lead to somewhat +improved invalid free avoidance (it is completely safe whenever bstring +instances are only stored in unique variables). For example: + + struct tagbstring hw = bsStatic ("Hello, world"); + bstring cpHw = bstrcpy (&hw); + + #ifdef NOT_QUITE_AS_SAFE + bdestroy (cpHw); /* Never fail */ + bdestroy (cpHw); /* Error sometimes detected at runtime */ + bdestroy (&hw); /* Error detected at run time */ + #else + bstrFree (cpHw); /* Never fail */ + bstrFree (cpHw); /* Will do nothing */ + bstrFree (&hw); /* Will lead to a compile time error */ + #endif + +9. Resource based denial of service + +bSecureInput() has been supplied in bstraux.c. It has an optional upper limit +for input length. But unlike fgets(), it is also easily determined if the +buffer has been truncated early. In this way, a program can set an upper +limit on input sizes while still allowing for implementing context specific +truncation semantics (i.e., does the program consume but dump the extra +input, or does it consume it in later inputs?) + +10. Mixing char *'s and bstrings + +The bstring and char * representations are not identical. So there is a risk +when converting back and forth that data may lost. Essentially bstrings can +contain '\0' as a valid non-terminating character, while char * strings +cannot and in fact must use the character as a terminator. The risk of data +loss is very low, since: + + A) the simple method of only using bstrings in a char * semantically + compatible way is both easy to achieve and pervasively supported. + B) obtaining '\0' content in a string is either deliberate or indicative + of another, likely more serious problem in the code. + C) the library comes with various functions which deal with this issue + (namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ()) + +Marginal security issues: +......................... + +11. 8-bit versus 9-bit portability + +Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent +possible to avoid portability problems. However, Bstrlib has not been tested +on any system that does not represent char as 8-bits. So whether or not it +works on 9-bit systems is an open question. It is recommended that Bstrlib be +carefully auditted by anyone using a system in which CHAR_BIT is not 8. + +12. EBCDIC/ASCII/UTF-8 data representation attacks. + +Bstrlib uses ctype.h functions to ensure that it remains portable to non- +ASCII systems. It also checks range to make sure it is well defined even for +data that ANSI does not define for the ctype functions. + +Obscure issues: +............... + +13. Data attributes + +There is no support for a Perl-like "taint" attribute, although this is a +fairly straightforward exercise using C++'s type system. + diff --git a/bstrlib/test.cpp b/bstrlib/test.cpp new file mode 100644 index 0000000..9503342 --- /dev/null +++ b/bstrlib/test.cpp @@ -0,0 +1,1725 @@ +// +// This source file is part of the bstring string library. This code was +// written by Paul Hsieh in 2002-2015, and is covered by the BSD open source +// license. Refer to the accompanying documentation for details on usage and +// license. +// + +// +// test.cpp +// +// This file is the C++ unit test for Bstrlib +// + +#if defined (_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include "bstrlib.h" +#include "bstrwrap.h" + +// Exceptions must be turned on in the compiler to successfully run +// this test. The compiler must also support STL. + +#define dumpOutQty (32) +static bstring dumpOut[dumpOutQty]; +static unsigned int rot = 0; + +const char * dumpBstring (const bstring b) { + rot = (rot + 1) % (unsigned) dumpOutQty; + if (dumpOut[rot] == NULL) { + dumpOut[rot] = bfromcstr (""); + if (dumpOut[rot] == NULL) return "FATAL INTERNAL ERROR"; + } + dumpOut[rot]->slen = 0; + if (b == NULL) { + bcatcstr (dumpOut[rot], "NULL"); + } else { + char msg[32]; + sprintf (msg, "%p", (void *)b); + bcatcstr (dumpOut[rot], msg); + + if (b->slen < 0) { + sprintf (msg, ":[err:slen=%d<0]", b->slen); + bcatcstr (dumpOut[rot], msg); + } else { + if (b->mlen > 0 && b->mlen < b->slen) { + sprintf (msg, ":[err:mlen=%dmlen, b->slen); + bcatcstr (dumpOut[rot], msg); + } else { + if (b->mlen == -1) { + bcatcstr (dumpOut[rot], "[p]"); + } else if (b->mlen < 0) { + bcatcstr (dumpOut[rot], "[c]"); + } + bcatcstr (dumpOut[rot], ":"); + if (b->data == NULL) { + bcatcstr (dumpOut[rot], "[err:data=NULL]"); + } else { + bcatcstr (dumpOut[rot], "\""); + bcatcstr (dumpOut[rot], (const char *) b->data); + bcatcstr (dumpOut[rot], "\""); + } + } + } + } + return (const char *) dumpOut[rot]->data; +} + +int test0 (void) { +int ret = 0; + + printf ("TEST: CBString constructor\n"); + + try { + printf ("\tCBString c;\n"); + CBString c0; + ret += (0 != c0.length()); + ret += '\0' != ((const char *)c0)[c0.length()]; + + printf ("\tCBString c(\"test\");\n"); + CBString c1 ("test"); + ret += (c1 != "test"); + ret += '\0' != ((const char *)c1)[c1.length()]; + + printf ("\tCBString c(25, \"test\");\n"); + CBString c8 (25, "test"); + ret += (c8 != "test"); + ret += c8.mlen < 25; + ret += '\0' != ((const char *)c8)[c8.length()]; + + printf ("\tCBString c('t');\n"); + CBString c2 ('t'); + ret += (c2 != "t"); + ret += '\0' != ((const char *)c2)[c2.length()]; + + printf ("\tCBString c('\\0');\n"); + CBString c3 ('\0'); + ret += (1 != c3.length()) || ('\0' != c3[0]); + ret += '\0' != ((const char *)c3)[c3.length()]; + + printf ("\tCBString c(bstr[\"test\"]);\n"); + struct tagbstring t = bsStatic ("test"); + CBString c4 (t); + ret += (c4 != t.data); + ret += '\0' != ((const char *)c4)[c4.length()]; + + printf ("\tCBString c(CBstr[\"test\"]);\n"); + CBString c5 (c1); + ret += (c1 != c5); + ret += '\0' != ((const char *)c5)[c5.length()]; + + printf ("\tCBString c('x',5);\n"); + CBString c6 ('x',5); + ret += (c6 != "xxxxx"); + ret += '\0' != ((const char *)c6)[c6.length()]; + + printf ("\tCBString c(\"123456\",4);\n"); + CBString c7 ((void *)"123456",4); + ret += (c7 != "1234"); + ret += '\0' != ((const char *)c7)[c7.length()]; + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +#define EXCEPTION_EXPECTED(line) \ + try { \ + line; \ + ret++; \ + printf ("\tException was expected\n"); \ + } \ + catch (struct CBStringException) { } + +int test1 (void) { +int ret = 0; + + printf ("TEST: CBString = operator\n"); + + try { + CBString c0; + struct tagbstring t = bsStatic ("test"); + + ret += c0.iswriteprotected(); + c0.writeprotect (); + ret += 1 != c0.iswriteprotected(); + EXCEPTION_EXPECTED (c0 = 'x'); + EXCEPTION_EXPECTED (c0 = (unsigned char) 'x'); + EXCEPTION_EXPECTED (c0 = "test"); + EXCEPTION_EXPECTED (c0 = CBString ("test")); + EXCEPTION_EXPECTED (c0 = t); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1; + struct tagbstring t = bsStatic ("test"); + + printf ("\tc = 'x';\n"); + c0 = 'x'; + ret += (c0 != "x"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc = (unsigned char)'x';\n"); + c0 = (unsigned char) 'x'; + ret += (c0 != "x"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc = \"test\";\n"); + c0 = "test"; + ret += (c0 != "test"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc = CBStr[\"test\"];\n"); + c1 = c0; + ret += (c0 != c1); + ret += '\0' != ((const char *)c1)[c1.length()]; + printf ("\tc = tbstr[\"test\"];\n"); + c0 = t; + ret += (c0 != "test"); + ret += '\0' != ((const char *)c0)[c0.length()]; + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test2 (void) { +int ret = 0; + + printf ("TEST: CBString += operator\n"); + + try { + CBString c0; + struct tagbstring t = bsStatic ("test"); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0 += 'x'); + EXCEPTION_EXPECTED (c0 += (unsigned char) 'x'); + EXCEPTION_EXPECTED (c0 += "test"); + EXCEPTION_EXPECTED (c0 += CBString ("test")); + EXCEPTION_EXPECTED (c0 += t); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + struct tagbstring t = bsStatic ("extra"); + + c0 = "test"; + printf ("\tc += 'x';\n"); + c0 += 'x'; + ret += (c0 != "testx"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc += (unsigned char)'x';\n"); + c0 += (unsigned char) 'y'; + ret += (c0 != "testxy"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc += \"test\";\n"); + c0 += "test"; + ret += (c0 != "testxytest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc += CBStr[\"test\"];\n"); + c0 += CBString (c0); + ret += (c0 != "testxytesttestxytest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc += tbstr[\"test\"];\n"); + c0 += t; + ret += (c0 != "testxytesttestxytestextra"); + ret += '\0' != ((const char *)c0)[c0.length()]; + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test3 (void) { +int ret = 0; + + try { + CBString c0, c1; + struct tagbstring t = bsStatic ("extra"); + + printf ("TEST: CBString + operator\n"); + + c1 = "test"; + printf ("\tc + 'x';\n"); + c0 = c1 + 'x'; + ret += (c0 != "testx"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc + (unsigned char)'x';\n"); + c0 = c1 + (unsigned char) 'y'; + ret += (c0 != "testy"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc + \"test\";\n"); + c0 = c1 + (const char *) "stuff"; + ret += (c0 != "teststuff"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc + (unsigned char *) \"test\";\n"); + c0 = c1 + (const unsigned char *) "stuff"; + ret += (c0 != "teststuff"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc + CBStr[\"test\"];\n"); + c0 = c1 + CBString ("other"); + ret += (c0 != "testother"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\tc + tbstr[\"test\"];\n"); + c0 = c1 + t; + ret += (c0 != "testextra"); + ret += '\0' != ((const char *)c0)[c0.length()]; + + printf ("TEST: + CBString operator\n"); + + printf ("\t'x' + c;\n"); + c0 = 'x' + c1; + ret += (c0 != "xtest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\t(unsigned char)'y' + c;\n"); + c0 = (unsigned char) 'y' + c1; + ret += (c0 != "ytest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\t\"test\" + c;\n"); + c0 = (const char *) "stuff" + c1; + ret += (c0 != "stufftest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\t(unsigned char *) \"test\" + c;\n"); + c0 = (const unsigned char *) "stuff" + c1; + ret += (c0 != "stufftest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + printf ("\ttbstr[\"extra\"] + c;\n"); + c0 = t + c1; + ret += (c0 != "extratest"); + ret += '\0' != ((const char *)c0)[c0.length()]; + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test4 (void) { +int ret = 0; + + try { + printf ("TEST: CBString == operator\n"); + + CBString c0, c1, c2; + + c0 = c1 = "test"; + c2 = "other"; + + printf ("\tc == d;\n"); + ret += !(c0 == c1); + ret += (c0 == c2); + + printf ("\tc == \"test\";\n"); + ret += !(c0 == "test"); + ret += (c2 == "test"); + + printf ("\tc == (unsigned char *) \"test\";\n"); + ret += !(c0 == (unsigned char *) "test"); + ret += (c2 == (unsigned char *) "test"); + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test5 (void) { +int ret = 0; + + try { + printf ("TEST: CBString != operator\n"); + + CBString c0, c1, c2; + + c0 = c1 = "test"; + c2 = "other"; + + printf ("\tc != d;\n"); + ret += (c0 != c1); + ret += !(c0 != c2); + + printf ("\tc != \"test\";\n"); + ret += (c0 != "test"); + ret += !(c2 != "test"); + + printf ("\tc != (unsigned char *) \"test\";\n"); + ret += (c0 != (unsigned char *) "test"); + ret += !(c2 != (unsigned char *) "test"); + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test6 (void) { +int ret = 0; + + try { + printf ("TEST: CBString <, <= operators\n"); + + CBString c0, c1, c2; + + c0 = c1 = "test"; + c2 = "other"; + + printf ("\tc < d;\n"); + ret += (c0 < c1); + ret += (c0 < c2); + ret += (c1 < c0); + ret += !(c2 < c0); + + printf ("\tc <= d;\n"); + ret += !(c0 <= c1); + ret += (c0 <= c2); + ret += !(c1 <= c0); + ret += !(c2 <= c0); + + printf ("\tc < \"test\";\n"); + ret += (c0 < "test"); + ret += (c1 < "test"); + ret += !(c2 < "test"); + ret += (c0 < "other"); + ret += (c1 < "other"); + ret += (c2 < "other"); + + printf ("\tc <= \"test\";\n"); + ret += !(c0 <= "test"); + ret += !(c1 <= "test"); + ret += !(c2 <= "test"); + ret += (c0 <= "other"); + ret += (c1 <= "other"); + ret += !(c2 <= "other"); + + printf ("\tc < (unsigned char *) \"test\";\n"); + ret += (c0 < (const char *) "test"); + ret += (c1 < (const char *) "test"); + ret += !(c2 < (const char *) "test"); + ret += (c0 < (const char *) "other"); + ret += (c1 < (const char *) "other"); + ret += (c2 < (const char *) "other"); + + printf ("\tc <= (unsigned char *) \"test\";\n"); + ret += !(c0 <= (const char *) "test"); + ret += !(c1 <= (const char *) "test"); + ret += !(c2 <= (const char *) "test"); + ret += (c0 <= (const char *) "other"); + ret += (c1 <= (const char *) "other"); + ret += !(c2 <= (const char *) "other"); + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test7 (void) { +int ret = 0; + + try { + printf ("TEST: CBString >, >= operators\n"); + + CBString c0, c1, c2; + + c0 = c1 = "test"; + c2 = "other"; + + printf ("\tc >= d;\n"); + ret += !(c0 >= c1); + ret += !(c0 >= c2); + ret += !(c1 >= c0); + ret += (c2 >= c0); + + printf ("\tc > d;\n"); + ret += (c0 > c1); + ret += !(c0 > c2); + ret += (c1 > c0); + ret += (c2 > c0); + + printf ("\tc >= \"test\";\n"); + ret += !(c0 >= "test"); + ret += !(c1 >= "test"); + ret += (c2 >= "test"); + ret += !(c0 >= "other"); + ret += !(c1 >= "other"); + ret += !(c2 >= "other"); + + printf ("\tc > \"test\";\n"); + ret += (c0 > "test"); + ret += (c1 > "test"); + ret += (c2 > "test"); + ret += !(c0 > "other"); + ret += !(c1 > "other"); + ret += (c2 > "other"); + + printf ("\tc >= (unsigned char *) \"test\";\n"); + ret += !(c0 >= (const char *) "test"); + ret += !(c1 >= (const char *) "test"); + ret += (c2 >= (const char *) "test"); + ret += !(c0 >= (const char *) "other"); + ret += !(c1 >= (const char *) "other"); + ret += !(c2 >= (const char *) "other"); + + printf ("\tc > (unsigned char *) \"test\";\n"); + ret += (c0 > (const char *) "test"); + ret += (c1 > (const char *) "test"); + ret += (c2 > (const char *) "test"); + ret += !(c0 > (const char *) "other"); + ret += !(c1 > (const char *) "other"); + ret += (c2 > (const char *) "other"); + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test8 (void) { +int ret = 0; + + try { + printf ("TEST: (const char *) CBString operator\n"); + + CBString c0 ("test"), c1 ("other"); + + printf ("\t(const char *) CBString\n"); + ret += 0 != memcmp ((const char *) c0, "test", 5); + ret += 0 != memcmp ((const char *) c1, "other", 6); + + printf ("\t(const unsigned char *) CBString\n"); + ret += 0 != memcmp ((const unsigned char *) c0, "test", 5); + ret += 0 != memcmp ((const unsigned char *) c1, "other", 6); + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test9 (void) { +int ret = 0; + + try { + printf ("TEST: (double), (float), (int) CBString operators\n"); + CBString c0 ("1.2e3"), c1("100"), c2("100.55"); + printf ("\t(double) \"%s\"\n", (const char *) c0); + ret += 1.2e3 != (double) c0; + printf ("\t(float) \"%s\"\n", (const char *) c0); + ret += 1.2e3 != (float) c0; + printf ("\t(int) \"%s\"\n", (const char *) c1); + ret += 100 != (float) c1; + printf ("\t(int) \"%s\"\n", (const char *) c2); + ret += 100 != (int) c2; + printf ("\t(unsigned int) \"%s\"\n", (const char *) c2); + ret += 100 != (unsigned int) c2; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0 ("xxxxx"); + printf ("\t(double) \"%s\"\n", (const char *) c0); + ret += -1.2e3 != (double) c0; + } + catch (struct CBStringException err) { + printf ("\tException (%s) correctly thrown\n", err.what()); + } + + try { + CBString c0 ("xxxxx"); + printf ("\t(float) \"%s\"\n", (const char *) c0); + ret += -1.2e3 != (float) c0; + } + catch (struct CBStringException err) { + printf ("\tException (%s) correctly thrown\n", err.what()); + } + + try { + CBString c0 ("xxxxx"); + printf ("\t(int) \"%s\"\n", (const char *) c0); + ret += -100 != (int) c0; + } + catch (struct CBStringException err) { + printf ("\tException (%s) correctly thrown\n", err.what()); + } + + try { + CBString c0 ("xxxxx"); + printf ("\t(unsigned int) \"%s\"\n", (const char *) c0); + ret += 1000 != (unsigned int) c0; + } + catch (struct CBStringException err) { + printf ("\tException (%s) correctly thrown\n", err.what()); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test10 (void) { +int ret = 0; + + try { + printf ("TEST: length() method\n"); + CBString c0, c1("Test"); + + printf ("\t\"%s\".length();\n", (const char *) c0); + ret += 0 != c0.length(); + printf ("\t\"%s\".length();\n", (const char *) c1); + ret += 4 != c1.length(); + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test11 (void) { +int ret = 0; + + printf ("TEST: character() method, [] operator\n"); + + try { + CBString c0("test"); + c0.writeprotect (); + ret += c0[0] != 't'; + ret += (1 + c0[0]) != 'u'; + ret += ((unsigned char) c0[0] + 1) != 'u'; + ret += c0.character(0) != 't'; + EXCEPTION_EXPECTED (c0[0] = 'x'); + EXCEPTION_EXPECTED (c0.character(0) = 'x'); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0("Test"); + + printf ("\t\"%s\".character ();\n", (const char *) c0); + ret += 's' != c0.character (2); + c0.character (2) = 'x'; + ret += c0 != "Text"; + + printf ("\t\"%s\"[];\n", (const char *) c0); + ret += 'T' != c0[0]; + c0[0] = 't'; + ret += c0 != "text"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + printf ("\t\"%s\".character ();\n", (const char *) c0); + ret += '?' != c0.character (0); + } + catch (struct CBStringException err) { + printf ("\tException (%s) correctly thrown\n", err.what()); + } + + try { + CBString c0; + printf ("\t\"%s\"[];\n", (const char *) c0); + ret += '?' != c0[0]; + } + catch (struct CBStringException err) { + printf ("\tException (%s) correctly thrown\n", err.what()); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test12 (void) { +int ret = 0; + +#ifndef BSTRLIB_NOVSNP + printf ("TEST: format(), formata() methods\n"); + + try { + CBString c0; + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.format ("%s(%d)", "extra", 4)); + EXCEPTION_EXPECTED (c0.formata ("%s(%d)", "extra", 4)); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test"), c2, c3; + + printf ("\tc.format (...);\n"); + c0.format ("%s(%d)", "extra", 4); + ret += c0 != "extra(4)"; + + c2 = c0 + c0 + c0 + c0; + c2 += c2; + c2.insert (0, "x"); + c3.format ("x%s%s%s%s%s%s%s%s", (const char *) c0, (const char *) c0 + , (const char *) c0, (const char *) c0 + , (const char *) c0, (const char *) c0 + , (const char *) c0, (const char *) c0); + ret += c2 != c3; + + printf ("\t\"%s\".formata (...);\n", (const char *) c1); + c1.formata ("%s(%d)", "extra", 4); + ret += c1 != "Testextra(4)"; + + c2 = c0 + c0 + c0 + c0; + c2 += c2; + c2.insert (0, "x"); + c3 = "x"; + c3.formata ("%s%s%s%s%s%s%s%s", (const char *) c0, (const char *) c0 + , (const char *) c0, (const char *) c0 + , (const char *) c0, (const char *) c0 + , (const char *) c0, (const char *) c0); + ret += c2 != c3; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); +#endif + return ret; +} + +int test13 (void) { +int ret = 0; + + try { + printf ("TEST: find() method\n"); + CBString c0, c1("Test"); + + printf ("\t\"%s\".find (CBString());\n", (const char *) c0); + ret += -1 != c0.find (CBString("x")); + ret += 1 != c1.find (CBString("e")); + + printf ("\t\"%s\".find (char *);\n", (const char *) c0); + ret += -1 != c0.find ("x"); + ret += 1 != c1.find ("e"); + + ret += 8 != CBString ("sssssssssap").find ("sap"); + ret += 9 != CBString ("sssssssssap").find ("ap"); + ret += 9 != CBString ("sssssssssap").find ("ap", 3); + ret += 9 != CBString ("sssssssssap").find ("a"); + ret += 9 != CBString ("sssssssssap").find ("a", 3); + ret += -1 != CBString ("sssssssssap").find ("x"); + ret += -1 != CBString ("sssssssssap").find ("x", 3); + ret += -1 != CBString ("sssssssssap").find ("ax"); + ret += -1 != CBString ("sssssssssap").find ("ax", 3); + ret += -1 != CBString ("sssssssssap").find ("sax"); + ret += -1 != CBString ("sssssssssap").find ("sax", 1); + ret += 8 != CBString ("sssssssssap").find ("sap", 3); + ret += 9 != CBString ("ssssssssssap").find ("sap", 3); + ret += 0 != CBString ("sssssssssap").find ("s"); + ret += 3 != CBString ("sssssssssap").find ("s", 3); + ret += 9 != CBString ("sssssssssap").find ("a"); + ret += 9 != CBString ("sssssssssap").find ("a", 5); + ret += 8 != CBString ("sasasasasap").find ("sap"); + ret += 9 != CBString ("ssasasasasap").find ("sap"); + + printf ("\t\"%s\".find (char);\n", (const char *) c0); + ret += -1 != c0.find ('x'); + ret += 1 != c1.find ('e'); + + printf ("TEST: reversefind () method\n"); + printf ("\t\"%s\".reversefind (CBString());\n", (const char *) c0); + ret += -1 != c0.reversefind (CBString("x"), c0.length()); + ret += 1 != c1.reversefind (CBString("e"), c1.length()); + + printf ("\t\"%s\".reversefind (char *);\n", (const char *) c0); + ret += -1 != c0.reversefind ("x", c0.length()); + ret += 1 != c1.reversefind ("e", c1.length()); + + printf ("\t\"%s\".reversefind (char);\n", (const char *) c0); + ret += -1 != c0.reversefind ('x', c0.length()); + ret += 1 != c1.reversefind ('e', c1.length()); + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test14 (void) { +int ret = 0; + + try { + printf ("TEST: findchr(), reversefindchr() methods\n"); + CBString c0, c1("Test"); + + printf ("\t\"%s\".findchr (CBString(\"abcdef\"));\n", (const char *) c0); + ret += -1 != c0.findchr (CBString ("abcdef")); + printf ("\t\"%s\".findchr (CBString(\"abcdef\"));\n", (const char *) c1); + ret += 1 != c1.findchr (CBString ("abcdef")); + printf ("\t\"%s\".findchr (\"abcdef\");\n", (const char *) c0); + ret += -1 != c0.findchr ("abcdef"); + printf ("\t\"%s\".findchr (\"abcdef\");\n", (const char *) c1); + ret += 1 != c1.findchr ("abcdef"); + + printf ("\t\"%s\".reversefindchr (CBString(\"abcdef\"));\n", (const char *) c0); + ret += -1 != c0.reversefindchr (CBString ("abcdef"), c0.length()); + printf ("\t\"%s\".reversefindchr (CBString(\"abcdef\"));\n", (const char *) c1); + ret += 1 != c1.reversefindchr (CBString ("abcdef"), c1.length()); + printf ("\t\"%s\".reversefindchr (\"abcdef\");\n", (const char *) c0); + ret += -1 != c0.reversefindchr ("abcdef", c0.length()); + printf ("\t\"%s\".reversefindchr (\"abcdef\");\n", (const char *) c1); + ret += 1 != c1.reversefindchr ("abcdef", c1.length()); + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test15 (void) { +int ret = 0; + + try { + printf ("TEST: nfindchr(), nreversefindchr() methods\n"); + CBString c0, c1("Test"); + + printf ("\t\"%s\".nfindchr (CBString(\"abcdef\"));\n", (const char *) c0); + ret += -1 != c0.nfindchr (CBString ("abcdef")); + printf ("\t\"%s\".nfindchr (CBString(\"abcdef\"));\n", (const char *) c1); + ret += 0 != c1.nfindchr (CBString ("abcdef")); + printf ("\t\"%s\".nfindchr (\"abcdef\");\n", (const char *) c0); + ret += -1 != c0.nfindchr ("abcdef"); + printf ("\t\"%s\".nfindchr (\"abcdef\");\n", (const char *) c1); + ret += 0 != c1.nfindchr ("abcdef"); + + printf ("\t\"%s\".nreversefindchr (CBString(\"abcdef\"));\n", (const char *) c0); + ret += -1 != c0.nreversefindchr (CBString ("abcdef"), c0.length()); + printf ("\t\"%s\".nreversefindchr (CBString(\"abcdef\"));\n", (const char *) c1); + ret += 3 != c1.nreversefindchr (CBString ("abcdef"), c1.length()); + printf ("\t\"%s\".nreversefindchr (\"abcdef\");\n", (const char *) c0); + ret += -1 != c0.nreversefindchr ("abcdef", c0.length()); + printf ("\t\"%s\".nreversefindchr (\"abcdef\");\n", (const char *) c1); + ret += 3 != c1.nreversefindchr ("abcdef", c1.length()); + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test16 (void) { +int ret = 0; + + printf ("TEST: midstr() method\n"); + + try { + CBString c0, c1("bogus"), c2; + + printf ("\t\"%s\".midstr (1,3)\n", (const char *) c0); + ret += (c2 = c0.midstr (1,3)) != ""; + ret += '\0' != ((const char *)c2)[c2.length ()]; + + printf ("\t\"%s\".midstr (1,3)\n", (const char *) c1); + ret += (c2 = c1.midstr (1,3)) != "ogu"; + ret += '\0' != ((const char *)c2)[c2.length ()]; + } + + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test17 (void) { +int ret = 0; + + printf ("TEST: fill() method\n"); + + try { + CBString c0; + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.fill (5, 'x')); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".fill(5,'x')\n", (const char *) c0); + c0.fill (5, 'x'); + ret += c0 != "xxxxx"; + + printf ("\t\"%s\".fill(5,'x')\n", (const char *) c1); + c1.fill (5, 'x'); + ret += c1 != "xxxxx"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test18 (void) { +int ret = 0; + + printf ("TEST: alloc() method\n"); + + try { + CBString c0; + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.alloc (5)); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".alloc(5)\n", (const char *) c0); + c0.alloc (5); + ret += c0 != ""; + + printf ("\t\"%s\".alloc(5)\n", (const char *) c1); + c1.alloc (5); + ret += c1 != "Test-test"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + + printf ("\t\"%s\".alloc(0)\n", (const char *) c0); + c0.alloc (0); + ret += c0 != "Error"; + } + catch (struct CBStringException err) { + printf ("\tException (%s) properly thrown\n", err.what()); + } + + try { + CBString c0; + + printf ("\t\"%s\".alloc(-1)\n", (const char *) c0); + c0.alloc (-1); + ret += c0 != "Error"; + } + catch (struct CBStringException err) { + printf ("\tException (%s) properly thrown\n", err.what()); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test19 (void) { +int ret = 0; + + printf ("TEST: setsubstr() method\n"); + + try { + CBString c0("Test-test"); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.setsubstr (4, "extra")); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".setsubstr (4,\"extra\")\n", (const char *) c0); + c0.setsubstr (4, "extra"); + ret += c0 != " extra"; + printf ("\t\"%s\".setsubstr (4,\"extra\")\n", (const char *) c1); + c1.setsubstr (4, "extra"); + ret += c1 != "Testextra"; + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + + printf ("\t\"%s\".setsubstr(-1,\"extra\")\n", (const char *) c0); + c0.setsubstr (-1, "extra"); + ret ++; + } + catch (struct CBStringException err) { + printf ("\tException (%s) properly thrown\n", err.what()); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test20 (void) { +int ret = 0; + + printf ("TEST: insert() method\n"); + + try { + CBString c0("Test-test"); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.insert (4, "extra")); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".insert (4,\"extra\")\n", (const char *) c0); + c0.insert (4, "extra"); + ret += c0 != " extra"; + printf ("\t\"%s\".insert (4,\"extra\")\n", (const char *) c1); + c1.insert (4, "extra"); + ret += c1 != "Testextra-test"; + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + + printf ("\t\"%s\".insert(-1,\"extra\")\n", (const char *) c0); + c0.insert (-1, "extra"); + ret ++; + } + catch (struct CBStringException err) { + printf ("\tException (%s) properly thrown\n", err.what()); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test21 (void) { +int ret = 0; + + printf ("TEST: insertchrs() method\n"); + + try { + CBString c0("Test-test"); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.insertchrs (4, 2, 'x')); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".insertchrs (4,2,'x')\n", (const char *) c0); + c0.insertchrs (4, 2, 'x'); + ret += c0 != "xxxxxx"; + printf ("\t\"%s\".insertchrs (4,2,'x')\n", (const char *) c1); + c1.insertchrs (4, 2, 'x'); + ret += c1 != "Testxx-test"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + + printf ("\t\"%s\".insertchrs (-1,2,'x')\n", (const char *) c0); + c0.insertchrs (-1, 2, 'x'); + ret ++; + } + catch (struct CBStringException err) { + printf ("\tException (%s) properly thrown\n", err.what()); + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test22 (void) { +int ret = 0; + + printf ("TEST: replace() method\n"); + + try { + CBString c0("Test-test"); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.replace (4, 2, "beef")); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".replace (4,2,\"beef\")\n", (const char *) c0); + c0.replace (4, 2, CBString ("beef")); + ret += c0 != " beef"; + c0 = ""; + c0.replace (4, 2, "beef"); + ret += c0 != " beef"; + + printf ("\t\"%s\".replace (4,2,\"beef\")\n", (const char *) c1); + c1.replace (4, 2, CBString ("beef")); + ret += c1 != "Testbeefest"; + c1 = "Test-test"; + c1.replace (4, 2, "beef"); + ret += c1 != "Testbeefest"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test23 (void) { +int ret = 0; + + printf ("TEST: findreplace() method\n"); + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".findreplace (\"est\",\"beef\")\n", (const char *) c0); + c0.findreplace ("est", "beef"); + ret += c0 != ""; + c0 = ""; + c0.findreplace (CBString ("est"), CBString ("beef")); + ret += c0 != ""; + + printf ("\t\"%s\".findreplace (\"est\",\"beef\")\n", (const char *) c1); + c1.findreplace ("est", "beef"); + ret += c1 != "Tbeef-tbeef"; + c1 = "Test-test"; + c1.findreplace (CBString ("est"), CBString ("beef")); + ret += c1 != "Tbeef-tbeef"; + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("TeSt-tEsT"); + + printf ("\t\"%s\".findreplacecaseless (\"est\",\"beef\")\n", (const char *) c0); + c0.findreplacecaseless ("est", "beef"); + ret += c0 != ""; + c0 = ""; + c0.findreplacecaseless (CBString ("est"), CBString ("beef")); + ret += c0 != ""; + + printf ("\t\"%s\".findreplacecaseless (\"est\",\"beef\")\n", (const char *) c1); + c1.findreplacecaseless ("est", "beef"); + ret += c1 != "Tbeef-tbeef"; + c1 = "Test-test"; + c1.findreplacecaseless (CBString ("est"), CBString ("beef")); + ret += c1 != "Tbeef-tbeef"; + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test24 (void) { +int ret = 0; + + printf ("TEST: remove() method\n"); + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".remove (4,2)\n", (const char *) c0); + c0.remove (4, 2); + ret += c0 != ""; + + printf ("\t\"%s\".remove (4,2)\n", (const char *) c1); + c1.remove (4, 2); + ret += c1 != "Testest"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test25 (void) { +int ret = 0; + + printf ("TEST: trunc() method\n"); + + try { + CBString c0, c1("Test-test"); + + printf ("\t\"%s\".trunc (4)\n", (const char *) c0); + c0.trunc (4); + ret += c0 != ""; + + printf ("\t\"%s\".trunc (4)\n", (const char *) c1); + c1.trunc (4); + ret += c1 != "Test"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test26 (void) { +int ret = 0; + + printf ("TEST: repeat() method\n"); + + try { + CBString c0, c1("Test"); + + printf ("\t\"%s\".repeat (4)\n", (const char *) c0); + c0.repeat (4); + ret += c0 != ""; + + printf ("\t\"%s\".repeat (4)\n", (const char *) c1); + c1.repeat (4); + ret += c1 != "TestTestTestTest"; + c1 = "Test"; + c1.repeat (4); + ret += c1 != "TestTestTestTest"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test27 (void) { +int ret = 0; + + printf ("TEST: ltrim(), rtrim() methods\n"); + + try { + CBString c0, c1(" Test "), c2(" "); + + printf ("\t\"%s\".ltrim ()\n", (const char *) c0); + c0.ltrim (); + ret += c0 != ""; + c0 = ""; + c0.rtrim (); + ret += c0 != ""; + + printf ("\t\"%s\".ltrim ()\n", (const char *) c1); + c1.ltrim (); + ret += c1 != "Test "; + c1 = " Test "; + c1.rtrim (); + ret += c1 != " Test"; + + printf ("\t\"%s\".ltrim ()\n", (const char *) c2); + c2.ltrim (); + ret += c2 != ""; + c2 = " "; + c2.rtrim (); + ret += c2 != ""; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +#if !defined(BSTRLIB_CANNOT_USE_STL) + +int test28 (void) { +int ret = 0; + + printf ("TEST: split(), join() mechanisms\n"); + + try { + CBString c0, c1("a b c d e f"); + struct CBStringList s; + s.split (c1, ' '); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.join (s)); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0, c1("a b c d e f"); + struct CBStringList s; + + printf ("\t\"%s\".split (' ')\n", (const char *) c1); + + s.split (c1, ' '); + CBString c2(s), c3(s, ','); + + printf ("\tc.join (<...>)\n"); + + ret += c2 != "abcdef"; + ret += c3 != "a,b,c,d,e,f"; + c0.join (s); + ret += c0 != "abcdef"; + c0.join (s, ','); + ret += c0 != "a,b,c,d,e,f"; + + CBString strPepe = "valor1@valor2@valor3@@@valor6"; + for (unsigned char c = (unsigned char) '\0';;c++) { + CBStringList sl; + CBString x; + + sl.split (strPepe, c); + x.join (sl, c); + if (x != strPepe) { + printf ("\tfailure[%d] split/join mismatch\n\t\t%s\n\t\t%s\n", __LINE__, (const char *) strPepe, (const char *) x); + ret++; + break; + } + if (UCHAR_MAX == c) break; + } + + { + CBStringList sl; + CBString x; + + sl.splitstr (strPepe, CBString ("or")); + x.join (sl, CBString ("or")); + if (x != strPepe) { + printf ("\tfailure[%d] splitstr/join mismatch\n\t\t%s\n\t\t%s\n", __LINE__, (const char *) strPepe, (const char *) x); + ret++; + } + } + + { + CBStringList sl; + CBString x; + + sl.splitstr (strPepe, CBString ("6")); + x.join (sl, CBString ("6")); + if (x != strPepe) { + printf ("\tfailure[%d] splitstr/join mismatch\n\t\t%s\n\t\t%s\n", __LINE__, (const char *) strPepe, (const char *) x); + ret++; + } + } + + { + CBStringList sl; + CBString x; + + sl.splitstr (strPepe, CBString ("val")); + x.join (sl, CBString ("val")); + if (x != strPepe) { + printf ("\tfailure[%d] splitstr/join mismatch\n\t\t%s\n\t\t%s\n", __LINE__, (const char *) strPepe, (const char *) x); + ret++; + } + } + + { + CBStringList sl; + CBString x; + + sl.splitstr (strPepe, CBString ("@@")); + x.join (sl, CBString ("@@")); + if (x != strPepe) { + printf ("\tfailure[%d] splitstr/join mismatch\n\t\t%s\n\t\t%s\n", __LINE__, (const char *) strPepe, (const char *) x); + ret++; + } + } + + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +#endif + +int test29 (void) { +int ret = 0; + + printf ("TEST: caselessEqual(), caselessCmp() mechanisms\n"); + + try { + CBString c0("Test"), c1("test"), c2("testy"); + + printf ("\t\"%s\".caselessEqual (\"%s\")\n", (const char *) c0, (const char *) c1); + ret += 1 != c0.caselessEqual (c1); + ret += 1 != c1.caselessEqual (c0); + printf ("\t\"%s\".caselessEqual (\"%s\")\n", (const char *) c0, (const char *) c2); + ret += 0 != c0.caselessEqual (c2); + ret += 0 != c2.caselessEqual (c0); + + printf ("\t\"%s\".caselessCmp (\"%s\")\n", (const char *) c0, (const char *) c1); + ret += 0 != c0.caselessCmp (c1); + ret += 0 != c1.caselessCmp (c0); + printf ("\t\"%s\".caselessCmp (\"%s\")\n", (const char *) c0, (const char *) c2); + ret += 0 == c0.caselessCmp (c2); + ret += 0 == c2.caselessCmp (c0); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int test30 (void) { +int ret = 0; + + printf ("TEST: toupper(), tolower() mechanisms\n"); + + try { + CBString c0("Test-test"); + + c0.writeprotect (); + EXCEPTION_EXPECTED (c0.toupper()); + EXCEPTION_EXPECTED (c0.tolower()); + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + try { + CBString c0; + + c0 = "Test"; + printf ("\t\"%s\".toupper ()\n", (const char *) c0); + c0.toupper(); + ret += c0 != "TEST"; + + c0 = "Test"; + printf ("\t\"%s\".tolower ()\n", (const char *) c0); + c0.tolower (); + ret += c0 != "test"; + } + catch (struct CBStringException err) { + printf ("Exception thrown [%d]: %s\n", __LINE__, err.what()); + ret ++; + } + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static size_t test31_aux (void *buff, size_t elsize, size_t nelem, void *parm) { + buff = buff; + elsize = elsize; + nelem = nelem; + parm = parm; + return 0; +} + +int test31 (void) { +CBString c; +int ret = 0; + + printf ("TEST: CBStream test\n"); + + CBStream s((bNread) test31_aux, NULL); + s << CBString("Test"); + + ret += ((c = s.read ()) != CBString ("Test")); + ret += !s.eof(); + + printf ("\t\"%s\" through CBStream.read()\n", (const char *) c); + + s << CBString("Test"); + + c.trunc (0); + ret += ((s >> c) != CBString ("Test")); + ret += !s.eof(); + + printf ("\t\"%s\" through CBStream.>>\n", (const char *) c); + + return ret; +} + +/* int bMultiConcatNeedNULLAsLastArgument (bstring dst, ...) + * + * Concatenate a sequence of exactly n char * arguments to dst. + */ +int bMultiConcatNeedNULLAsLastArgument (bstring dst, ...) { +va_list arglist; +int ret = 0; + va_start (arglist, dst); + do { + bstring parm = va_arg (arglist, bstring); + if (NULL == parm) break; + if (NULL == parm->data || parm->slen > parm->mlen || + parm->mlen < 0 || parm->slen < 0) { + ret = BSTR_ERR; + break; + } + ret = bconcat (dst, parm); + } while (0 <= ret); + va_end (arglist); + return ret; +} + +/* int bMultiCatCstrNeedNULLAsLastArgument (bstring dst, ...) + * + * Concatenate a sequence of exactly n char * arguments to dst. + */ +int bMultiCatCstrNeedNULLAsLastArgument (bstring dst, ...) { +va_list arglist; +int ret = 0; + va_start (arglist, dst); + do { + char* parm = va_arg (arglist, char *); + if (NULL == parm) break; + ret = bcatcstr (dst, parm); + } while (0 <= ret); + va_end (arglist); + return ret; +} + +/* + * The following macros are only available on more recent compilers that + * support variable length macro arguments and __VA_ARGS__. These can also + * be dangerous because there is no compiler time type checking on the + * arguments. + */ + + +#define bMultiConcat(dst,...) bMultiConcatNeedNULLAsLastArgument((dst),##__VA_ARGS__,NULL) +#define bMultiCatCstr(dst,...) bMultiCatCstrNeedNULLAsLastArgument((dst),##__VA_ARGS__,NULL) + +#define bGlue3_aux(a,b,c) a ## b ## c +#define bGlue3(a,b,c) bGlue3_aux(a,b,c) + +#if defined(_MSC_VER) +#define _bDeclTbstrIdx(t,n,...) \ + static unsigned char bGlue3(_btmpuc_,t,n)[] = {__VA_ARGS__, '\0'}; \ + struct tagbstring t = { -32, sizeof(bGlue3(_btmpuc_,t,n))-1, bGlue3(_btmpuc_,t,n)} +#define bDeclTbstr(t,...) _bDeclTbstrIdx(t,__COUNTER__,__VA_ARGS__) +#else +#define bDeclTbstr(t,...) \ + static unsigned char bGlue3(_btmpuc_,t,__LINE__)[] = {__VA_ARGS__, '\0'}; \ + struct tagbstring t = { -__LINE__, sizeof(bGlue3(_btmpuc_,t,__LINE__))-1, bGlue3(_btmpuc_,t,__LINE__)} +#endif + +static int test32(void) { +bstring b1 = bfromStatic ("a"); +bstring b2 = bfromStatic ("e"); +bstring b3 = bfromStatic ("i"); +bstring b4 = bfromStatic (""); +int ret = 0; + + printf ("TEST: bMultiCatCstr, bMultiConcat\n"); + + bMultiCatCstr(b1, "b", "c", "d"); + bMultiCatCstr(b2, "f", "g", "h"); + bMultiCatCstr(b3, "j", "k", "l"); + bMultiConcat(b4, b1, b2, b3); + + ret += 1 != biseqStatic (b1, "abcd"); + ret += 1 != biseqStatic (b2, "efgh"); + ret += 1 != biseqStatic (b3, "ijkl"); + ret += 1 != biseqStatic (b4, "abcdefghijkl"); + + bdestroy (b1); + bdestroy (b2); + bdestroy (b3); + bdestroy (b4); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +static int test33(void) { + bDeclTbstr (t1, 'H','e','l','l','o'); + bDeclTbstr (t2, 32,'w','o','r','l','d'); + bstring b = bfromStatic("["); + int ret; + + printf ("TEST: bDeclTbstr\n"); + + bconcat (b, &t1); + bconcat (b, &t2); + bcatStatic (b, "]"); + ret = 1 != biseqStatic (b, "[Hello world]"); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + return ret; +} + +int main () { +int ret = 0; + + printf ("Direct case testing of CPP core functions\n"); + + ret += test0 (); + ret += test1 (); + ret += test2 (); + ret += test3 (); + ret += test4 (); + ret += test5 (); + ret += test6 (); + ret += test7 (); + ret += test8 (); + ret += test9 (); + ret += test10 (); + ret += test11 (); + ret += test12 (); + ret += test13 (); + ret += test14 (); + ret += test15 (); + ret += test16 (); + ret += test17 (); + ret += test18 (); + ret += test19 (); + ret += test20 (); + ret += test21 (); + ret += test22 (); + ret += test23 (); + ret += test24 (); + ret += test25 (); + ret += test26 (); + ret += test27 (); +#if !defined(BSTRLIB_CANNOT_USE_STL) + ret += test28 (); +#endif + ret += test29 (); + ret += test30 (); + ret += test31 (); + ret += test32 (); + ret += test33 (); + + printf ("# test failures: %d\n", ret); + + return 0; +} diff --git a/bstrlib/testaux.c b/bstrlib/testaux.c new file mode 100644 index 0000000..99a05df --- /dev/null +++ b/bstrlib/testaux.c @@ -0,0 +1,423 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license. Refer to the accompanying documentation for details on usage and + * license. + */ + +/* + * testaux.c + * + * This file is the C unit test for the bstraux module of Bstrlib. + */ + +#include +#include "bstrlib.h" +#include "bstraux.h" + +static int tWrite (const void * buf, size_t elsize, size_t nelem, void * parm) { +bstring b = (bstring) parm; +size_t i; + + if (NULL == b || NULL == buf || 0 == elsize || 0 == nelem) + return -__LINE__; + + for (i=0; i < nelem; i++) { + if (0 > bcatblk (b, buf, elsize)) break; + buf = (const void *) (elsize + (const char *) buf); + } + return (int) i; +} + +int test0 (void) { +struct bwriteStream * ws; +bstring s; +int ret = 0; + + printf ("TEST: struct bwriteStream functions.\n"); + + ws = bwsOpen ((bNwrite) tWrite, (s = bfromcstr (""))); + bwsBuffLength (ws, 8); + ret += 8 != bwsBuffLength (ws, 0); + bwsWriteBlk (ws, bsStaticBlkParms ("Hello ")); + ret += 0 == biseqcstr (s, ""); + bwsWriteBlk (ws, bsStaticBlkParms ("World\n")); + ret += 0 == biseqcstr (s, "Hello Wo"); + ret += s != bwsClose (ws); + ret += 0 == biseqcstr (s, "Hello World\n"); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test1 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b, c, d; +int ret = 0; + + printf ("TEST: bTail and bHead functions.\n"); + b = bTail (&t, 5); + c = bHead (&t, 5); + ret += 0 >= biseqcstr (b, "world"); + ret += 0 >= biseqcstr (c, "Hello"); + bdestroy (b); + bdestroy (c); + + b = bTail (&t, 0); + c = bHead (&t, 0); + ret += 0 >= biseqcstr (b, ""); + ret += 0 >= biseqcstr (c, ""); + bdestroy (b); + bdestroy (c); + + d = bstrcpy (&t); + b = bTail (d, 5); + c = bHead (d, 5); + ret += 0 >= biseqcstr (b, "world"); + ret += 0 >= biseqcstr (c, "Hello"); + bdestroy (b); + bdestroy (c); + bdestroy (d); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test2 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b; +int ret = 0, reto; + + printf ("TEST: bSetChar function.\n"); + ret += 0 <= bSetChar (&t, 4, ','); + ret += 0 > bSetChar (b = bstrcpy (&t), 4, ','); + ret += 0 >= biseqcstr (b, "Hell, world"); + ret += 0 <= bSetChar (b, -1, 'x'); + b->slen = 2; + ret += 0 > bSetChar (b, 1, 'i'); + ret += 0 >= biseqcstr (b, "Hi"); + ret += 0 > bSetChar (b, 2, 's'); + ret += 0 >= biseqcstr (b, "His"); + ret += 0 > bSetChar (b, 1, '\0'); + ret += blength (b) != 3; + ret += bchare (b, 0, '?') != 'H'; + ret += bchare (b, 1, '?') != '\0'; + ret += bchare (b, 2, '?') != 's'; + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + reto = ret; + ret = 0; + + printf ("TEST: bSetCstrChar function.\n"); + ret += 0 <= bSetCstrChar (&t, 4, ','); + ret += 0 > bSetCstrChar (b = bstrcpy (&t), 4, ','); + ret += 0 >= biseqcstr (b, "Hell, world"); + ret += 0 <= bSetCstrChar (b, -1, 'x'); + b->slen = 2; + ret += 0 > bSetCstrChar (b, 1, 'i'); + ret += 0 >= biseqcstr (b, "Hi"); + ret += 0 > bSetCstrChar (b, 2, 's'); + ret += 0 >= biseqcstr (b, "His"); + ret += 0 > bSetCstrChar (b, 1, '\0'); + ret += blength (b) != 1; + ret += bchare (b, 0, '?') != 'H'; + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + return reto + ret; +} + +int test3 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b; +int ret = 0; + + printf ("TEST: bFill function.\n"); + ret += 0 <= bFill (&t, 'x', 7); + ret += 0 > bFill (b = bstrcpy (&t), 'x', 7); + ret += 0 >= biseqcstr (b, "xxxxxxx"); + ret += 0 <= bFill (b, 'x', -1); + ret += 0 > bFill (b, 'x', 0); + ret += 0 >= biseqcstr (b, ""); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test4 (void) { +struct tagbstring t = bsStatic ("foo"); +bstring b; +int ret = 0; + + printf ("TEST: bReplicate function.\n"); + ret += 0 <= bReplicate (&t, 4); + ret += 0 <= bReplicate (b = bstrcpy (&t), -1); + ret += 0 > bReplicate (b, 4); + ret += 0 >= biseqcstr (b, "foofoofoofoo"); + ret += 0 > bReplicate (b, 0); + ret += 0 >= biseqcstr (b, ""); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test5 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b; +int ret = 0; + + printf ("TEST: bReverse function.\n"); + ret += 0 <= bReverse (&t); + ret += 0 > bReverse (b = bstrcpy (&t)); + ret += 0 >= biseqcstr (b, "dlrow olleH"); + b->slen = 0; + ret += 0 > bReverse (b); + ret += 0 >= biseqcstr (b, ""); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test6 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b; +int ret = 0; + + printf ("TEST: bInsertChrs function.\n"); + ret += 0 <= bInsertChrs (&t, 6, 4, 'x', '?'); + ret += 0 > bInsertChrs (b = bstrcpy (&t), 6, 4, 'x', '?'); + ret += 0 >= biseqcstr (b, "Hello xxxxworld"); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test7 (void) { +struct tagbstring t = bsStatic (" i am "); +bstring b; +int ret = 0; + + printf ("TEST: bJustify functions.\n"); + ret += 0 <= bJustifyLeft (&t, ' '); + ret += 0 <= bJustifyRight (&t, 8, ' '); + ret += 0 <= bJustifyMargin (&t, 8, ' '); + ret += 0 <= bJustifyCenter (&t, 8, ' '); + ret += 0 > bJustifyLeft (b = bstrcpy (&t), ' '); + ret += 0 >= biseqcstr (b, "i am"); + ret += 0 > bJustifyRight (b, 8, ' '); + ret += 0 >= biseqcstr (b, " i am"); + ret += 0 > bJustifyMargin (b, 8, ' '); + ret += 0 >= biseqcstr (b, "i am"); + ret += 0 > bJustifyCenter (b, 8, ' '); + ret += 0 >= biseqcstr (b, " i am"); + bdestroy (b); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test8 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b; +char * c; +int ret = 0; + + printf ("TEST: NetStr functions.\n"); + c = bStr2NetStr (&t); + ret += 0 != strcmp (c, "11:Hello world,"); + b = bNetStr2Bstr (c); + ret += 0 >= biseq (b, &t); + bdestroy (b); + bcstrfree (c); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test9 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b, c; +int err, ret = 0; + + printf ("TEST: Base 64 codec.\n"); + + b = bBase64Encode (&t); + ret += 0 >= biseqcstr (b, "SGVsbG8gd29ybGQ="); + c = bBase64DecodeEx (b, &err); + ret += 0 != err; + ret += 0 >= biseq (c, &t); + bdestroy (b); + bdestroy (c); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test10 (void) { +struct tagbstring t = bsStatic ("Hello world"); +bstring b, c; +int err, ret = 0; + + printf ("TEST: UU codec.\n"); + + b = bUuEncode (&t); + ret += 0 >= biseqcstr (b, "+2&5L;&\\@=V]R;&0`\r\n"); + c = bUuDecodeEx (b, &err); + ret += 0 != err; + ret += 0 >= biseq (c, &t); + bdestroy (b); + bdestroy (c); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test11 (void) { +struct tagbstring t = bsStatic ("Hello world"); +unsigned char Ytstr[] = {0x72, 0x8f, 0x96, 0x96, 0x99, 0x4a, 0xa1, 0x99, 0x9c, 0x96, 0x8e}; +bstring b, c; +int ret = 0; + + printf ("TEST: Y codec.\n"); + + b = bYEncode (&t); + ret += 11 != b->slen; + ret += 0 >= bisstemeqblk (b, Ytstr, 11); + c = bYDecode (b); + ret += 0 >= biseq (c, &t); + bdestroy (b); + bdestroy (c); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test12 (void) { +struct tagbstring t = bsStatic ("Hello world"); +struct bStream * s; +bstring b; +int ret = 0; + + printf ("TEST: bsFromBstr.\n"); + + ret = bsread (b = bfromcstr (""), s = bsFromBstr (&t), 6); + ret += 1 != biseqcstr (b, "Hello "); + if (b) b->slen = 0; + ret = bsread (b, s, 6); + ret += 1 != biseqcstr (b, "world"); + + bdestroy (b); + bsclose (s); + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +struct vfgetc { + int ofs; + bstring base; +}; + +static int test13_fgetc (void * ctx) { +struct vfgetc * vctx = (struct vfgetc *) ctx; +int c; + + if (NULL == vctx || NULL == vctx->base) return EOF; + if (vctx->ofs >= blength (vctx->base)) return EOF; + c = bchare (vctx->base, vctx->ofs, EOF); + vctx->ofs++; + return c; +} + +int test13 (void) { +struct tagbstring t0 = bsStatic ("Random String, long enough to cause to reallocing"); +struct vfgetc vctx; +bstring b; +int ret = 0; +int i; + + printf ("TEST: bSecureInput, bSecureDestroy.\n"); + + for (i=0; i < 1000; i++) { + unsigned char * h; + + vctx.ofs = 0; + vctx.base = &t0; + + b = bSecureInput (INT_MAX, '\n', (bNgetc) test13_fgetc, &vctx); + ret += 1 != biseq (b, &t0); + h = b->data; + bSecureDestroy (b); + + /* WARNING! Technically undefined code follows (h has been freed): */ + ret += (0 == memcmp (h, t0.data, t0.slen)); + + if (ret) break; + } + + printf ("\t# failures: %d\n", ret); + + return ret; +} + +int test14_aux(bstring b, const char* chkVal) { +int ret = 0; + ret += 0 != bSGMLEncode (b); + ret += 1 != biseqcstr (b, chkVal); + return ret; +} + +int test14 (void) { +bstring b; +int ret = 0; + + printf ("TEST: bSGMLEncode.\n"); + ret += test14_aux (b = bfromStatic ("<\"Hello, you, me, & world\">"), "<"Hello, you, me, & world">"); + printf ("\t# failures: %d\n", ret); + return ret; +} + +int main () { +int ret = 0; + + printf ("Direct case testing of bstraux functions\n"); + + ret += test0 (); + ret += test1 (); + ret += test2 (); + ret += test3 (); + ret += test4 (); + ret += test5 (); + ret += test6 (); + ret += test7 (); + ret += test8 (); + ret += test9 (); + ret += test10 (); + ret += test11 (); + ret += test12 (); + ret += test13 (); + ret += test14 (); + + printf ("# test failures: %d\n", ret); + + return 0; +} diff --git a/bstrlib/utf8util.c b/bstrlib/utf8util.c new file mode 100644 index 0000000..b74606f --- /dev/null +++ b/bstrlib/utf8util.c @@ -0,0 +1,249 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * utf8util.c + * + * This file is not necessarily part of the core bstring library itself, but + * is just an generic module for implementing utf8 utility functions. + */ + +#include "utf8util.h" + +#ifndef NULL +#ifdef __cplusplus +#define NULL 0 +#else +#define NULL ((void *)0) +#endif +#endif + +/* Surrogate range is wrong, there is a maximum, the BOM alias is illegal and 0xFFFF is illegal */ +#define isLegalUnicodeCodePoint(v) ((((v) < 0xD800L) || ((v) > 0xDFFFL)) && (((unsigned long)(v)) <= 0x0010FFFFL) && (((v)|0x1F0001) != 0x1FFFFFL)) + +void utf8IteratorInit (struct utf8Iterator* iter, unsigned char* data, int slen) { + if (iter) { + iter->data = data; + iter->slen = (iter->data && slen >= 0) ? slen : -1; + iter->start = -1; + iter->next = (iter->slen >= 0) ? 0 : -1; + iter->error = (iter->slen >= 0) ? 0 : 1; + } +} + +void utf8IteratorUninit (struct utf8Iterator* iter) { + if (iter) { + iter->data = NULL; + iter->slen = -1; + iter->start = iter->next = -1; + } +} + +int utf8ScanBackwardsForCodePoint (unsigned char* msg, int len, int pos, cpUcs4* out) { + cpUcs4 v1, v2, v3, v4, x; + int ret; + if (NULL == msg || len < 0 || (unsigned) pos >= (unsigned) len) { + return -__LINE__; + } + if (!out) out = &x; + ret = 0; + if (msg[pos] < 0x80) { + *out = msg[pos]; + return 0; + } else if (msg[pos] < 0xC0) { + if (0 == pos) return -__LINE__; + ret = -__LINE__; + if (msg[pos-1] >= 0xC1 && msg[pos-1] < 0xF8) { + pos--; + ret = 1; + } else { + if (1 == pos) return -__LINE__; + if ((msg[pos-1] | 0x3F) != 0xBF) return -__LINE__; + if (msg[pos-2] >= 0xE0 && msg[pos-2] < 0xF8) { + pos -= 2; + ret = 2; + } else { + if (2 == pos) return -__LINE__; + if ((msg[pos-2] | 0x3F) != 0xBF) return -__LINE__; + if ((msg[pos-3]|0x07) == 0xF7) { + pos -= 3; + ret = 3; + } else return -__LINE__; + } + } + } + if (msg[pos] < 0xE0) { + if (pos + 1 >= len) return -__LINE__; + v1 = msg[pos] & ~0xE0; + v2 = msg[pos+1] & ~0xC0; + v1 = (v1 << 6) + v2; + if (v1 < 0x80) return -__LINE__; + *out = v1; + return ret; + } + if (msg[pos] < 0xF0) { + if (pos + 2 >= len) return -__LINE__; + v1 = msg[pos] & ~0xF0; + v2 = msg[pos+1] & ~0xC0; + v3 = msg[pos+2] & ~0xC0; + v1 = (v1 << 12) + (v2 << 6) + v3; + if (v1 < 0x800) return -__LINE__; + if (!isLegalUnicodeCodePoint(v1)) return -__LINE__; + *out = v1; + return ret; + } + + if (msg[pos] >= 0xF8) return -__LINE__; + + if (pos + 3 >= len) return -__LINE__; + v1 = msg[pos] & ~0xF8; + v2 = msg[pos+1] & ~0xC0; + v3 = msg[pos+2] & ~0xC0; + v4 = msg[pos+3] & ~0xC0; + v1 = (v1 << 18) + (v2 << 12) + (v3 << 6) + v4; + if (v1 < 0x10000) return -__LINE__; + if (!isLegalUnicodeCodePoint(v1)) return -__LINE__; + *out = v1; + return ret; +} + +/* +Code point UTF-8 +---------- ----- +U-00000000 - U-0000007F: 0xxxxxxx +U-00000080 - U-000007FF: 110xxxxx 10xxxxxx +U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx +U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx +U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx +*/ + +/* + * Returns next read code point for iterator. + * + * iter->data + iter->start points at the characters just read. + * + * iter->data + iter->next points at the characters that will be read next. + * + * iter->error is boolean indicating whether or not last read contained an error. + */ +cpUcs4 utf8IteratorGetNextCodePoint (struct utf8Iterator* iter, cpUcs4 errCh) { + unsigned char * chrs; + unsigned char c, d, e; + long v; + int i, ofs; + + if (NULL == iter || iter->next < 0) return errCh; + if (iter->next >= iter->slen) { + iter->start = iter->slen; + return errCh; + } + if (NULL == iter->data || iter->next < 0 || utf8IteratorNoMore(iter)) return errCh; + chrs = iter->data + iter->next; + + iter->error = 0; + c = chrs[0]; + ofs = 0; + + if (c < 0xC0 || c > 0xFD) { + if (c >= 0x80) goto ErrMode; + v = c; + ofs = 1; + } else if (c < 0xE0) { + if (iter->next >= iter->slen + 1) goto ErrMode; + v = (c << 6u) - (0x0C0 << 6u); + c = (unsigned char) ((unsigned) chrs[1] - 0x080); + v += c; + if (c >= 0x40 || v < 0x80) goto ErrMode; + ofs = 2; + } else if (c < 0xF0) { + if (iter->next >= iter->slen + 2) goto ErrMode; + v = (c << 12) - (0x0E0 << 12u); + c = (unsigned char) ((unsigned) chrs[1] - 0x080); + d = (unsigned char) ((unsigned) chrs[2] - 0x080); + v += (c << 6u) + d; + if ((c|d) >= 0x40 || v < 0x800 || !isLegalUnicodeCodePoint (v)) goto ErrMode; + ofs = 3; + } else if (c < 0xF8) { + if (iter->next >= iter->slen + 3) goto ErrMode; + v = (c << 18) - (0x0F0 << 18u); + c = (unsigned char) ((unsigned) chrs[1] - 0x080); + d = (unsigned char) ((unsigned) chrs[2] - 0x080); + e = (unsigned char) ((unsigned) chrs[3] - 0x080); + v += (c << 12u) + (d << 6u) + e; + if ((c|d|e) >= 0x40 || v < 0x10000 || !isLegalUnicodeCodePoint (v)) goto ErrMode; + ofs = 4; + } else { /* 5 and 6 byte encodings are invalid */ + ErrMode:; + iter->error = 1; + v = errCh; + for (i = iter->next+1; i < iter->slen; i++) if ((iter->data[i] & 0xC0) != 0x80) break; + ofs = i - iter->next; + } + + iter->start = iter->next; + iter->next += ofs; + return v; +} + +/* + * Returns next read code point for iterator. + * + * iter->data + iter->start points at the characters to be read. + * + * iter->data + iter->next points at the characters that will be read next. + * + * iter->error is boolean indicating whether or not last read contained an error. + */ +cpUcs4 utf8IteratorGetCurrCodePoint (struct utf8Iterator* iter, cpUcs4 errCh) { + unsigned char * chrs; + unsigned char c, d, e; + long v; + + if (NULL == iter || iter->next < 0) return errCh; + if (iter->next >= iter->slen) { + iter->start = iter->slen; + return errCh; + } + if (NULL == iter->data || iter->next < 0 || utf8IteratorNoMore(iter)) return errCh; + chrs = iter->data + iter->next; + + iter->error = 0; + c = chrs[0]; + + if (c < 0xC0 || c > 0xFD) { + if (c >= 0x80) goto ErrMode; + v = c; + } else if (c < 0xE0) { + if (iter->next >= iter->slen + 1) goto ErrMode; + v = (c << 6u) - (0x0C0 << 6u); + c = (unsigned char) ((unsigned) chrs[1] - 0x080); + v += c; + if (c >= 0x40 || v < 0x80) goto ErrMode; + } else if (c < 0xF0) { + if (iter->next >= iter->slen + 2) goto ErrMode; + v = (c << 12lu) - (0x0E0 << 12u); + c = (unsigned char) ((unsigned) chrs[1] - 0x080); + d = (unsigned char) ((unsigned) chrs[2] - 0x080); + v += (c << 6u) + d; + if ((c|d) >= 0x40 || v < 0x800 || !isLegalUnicodeCodePoint (v)) goto ErrMode; + } else if (c < 0xF8) { + if (iter->next >= iter->slen + 3) goto ErrMode; + v = (c << 18lu) - (0x0F0 << 18u); + c = (unsigned char) ((unsigned) chrs[1] - 0x080); + d = (unsigned char) ((unsigned) chrs[2] - 0x080); + e = (unsigned char) ((unsigned) chrs[3] - 0x080); + v += (c << 12lu) + (d << 6u) + e; + if ((c|d|e) >= 0x40 || v < 0x10000 || !isLegalUnicodeCodePoint (v)) goto ErrMode; + } else { /* 5 and 6 byte encodings are invalid */ + ErrMode:; + iter->error = 1; + v = errCh; + } + return v; +} diff --git a/bstrlib/utf8util.h b/bstrlib/utf8util.h new file mode 100644 index 0000000..5e615f7 --- /dev/null +++ b/bstrlib/utf8util.h @@ -0,0 +1,62 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2015, and is covered by the BSD open source + * license and the GPL. Refer to the accompanying documentation for details + * on usage and license. + */ + +/* + * utf8util.h + * + * This file defines the interface to the utf8 utility functions. + */ + +#ifndef UTF8_UNICODE_UTILITIES +#define UTF8_UNICODE_UTILITIES + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if INT_MAX >= 0x7fffffffUL +typedef int cpUcs4; +#elif LONG_MAX >= 0x7fffffffUL +typedef long cpUcs4; +#else +#error This compiler is not supported +#endif + +#if UINT_MAX == 0xFFFF +typedef unsigned int cpUcs2; +#elif USHRT_MAX == 0xFFFF +typedef unsigned short cpUcs2; +#elif UCHAR_MAX == 0xFFFF +typedef unsigned char cpUcs2; +#else +#error This compiler is not supported +#endif + +#define isLegalUnicodeCodePoint(v) ((((v) < 0xD800L) || ((v) > 0xDFFFL)) && (((unsigned long)(v)) <= 0x0010FFFFL) && (((v)|0x1F0001) != 0x1FFFFFL)) + +struct utf8Iterator { + unsigned char* data; + int slen; + int start, next; + int error; +}; + +#define utf8IteratorNoMore(it) (!(it) || (it)->next >= (it)->slen) + +extern void utf8IteratorInit (struct utf8Iterator* iter, unsigned char* data, int slen); +extern void utf8IteratorUninit (struct utf8Iterator* iter); +extern cpUcs4 utf8IteratorGetNextCodePoint (struct utf8Iterator* iter, cpUcs4 errCh); +extern cpUcs4 utf8IteratorGetCurrCodePoint (struct utf8Iterator* iter, cpUcs4 errCh); +extern int utf8ScanBackwardsForCodePoint (unsigned char* msg, int len, int pos, cpUcs4* out); + +#ifdef __cplusplus +} +#endif + +#endif /* UTF8_UNICODE_UTILITIES */