11 files changed, 1381 insertions, 0 deletions
diff --git a/src/pdclib/auxiliary/uctype/Makefile b/src/pdclib/auxiliary/uctype/Makefile
new file mode 100644
index 0000000..0d34b98
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/Makefile
@@ -0,0 +1,48 @@
+TARGET := get-uctypes
+# All source files of the project
+SRCFILES := $(wildcard *.c)
+# All header files of the project
+HDRFILES := $(wildcard *.h)
+# All object files in the project
+OBJFILES := $(patsubst %.c,%.o,$(SRCFILES))
+# All test drivers (_t)
+TSTFILES := $(patsubst %.c,%_t,$(SRCFILES))
+# All dependency files (.d)
+DEPFILES := $(patsubst %.c,%.d,$(SRCFILES))
+# All test driver dependency files (_t.d)
+TSTDEPFILES := $(patsubst %,%.d,$(TSTFILES))
+# All test driver dependency files (_t.d)
+
+WARNINGS := -Wall -Wextra -pedantic -Wno-unused-parameter -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wuninitialized -Wstrict-prototypes -Wdeclaration-after-statement
+CFLAGS := -g -std=c99 $(WARNINGS) $(USERFLAGS) -I.
+
+.PHONY: all clean tests
+
+all: $(TARGET)
+
+$(TARGET): $(OBJFILES)
+	@echo " CC      $@"
+	@$(CC) $^ -o $@
+	@echo
+
+tests: testdrivers
+	-@rc=0; count=0; failed=""; for file in $(TSTFILES); do echo " TST     $$file"; ./$$file; test=$$?; if [ $$test != 0 ]; then rc=`expr $$rc + $$test`; failed="$$failed $$file"; fi; count=`expr $$count + 1`; done; echo; echo "Tests executed: $$count  Tests failed: $$rc"; echo; for file in $$failed; do echo "Failed: $$file"; done; echo
+
+testdrivers: $(TSTFILES)
+	@echo
+
+-include $(DEPFILES) $(TSTDEPFILES)
+
+clean:
+	-@$(RM) $(wildcard $(OBJFILES) $(DEPFILES) $(TSTFILES) $(TSTDEPFILES) $(TARGET) aux.a)
+
+%.o: %.c Makefile
+	@echo " CC      $@"
+	@$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
+
+%_t: %.c Makefile aux.a
+	@echo " CC      $@"
+	@$(CC) $(CFLAGS) -MMD -MP -DTEST $< aux.a -o $@
+
+aux.a: $(OBJFILES)
+	@ar rc $@ $^
diff --git a/src/pdclib/auxiliary/uctype/derived_properties.c b/src/pdclib/auxiliary/uctype/derived_properties.c
new file mode 100644
index 0000000..c024efe
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/derived_properties.c
@@ -0,0 +1,300 @@
+/* derived properties
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "text_utilities.h"
+
+#include "derived_properties.h"
+
+#define LINE_BUFFER_SIZE 500u
+
+struct derived_properties_t * read_derived_properties( const char * filename )
+{
+    FILE * fh;
+    char buffer[ LINE_BUFFER_SIZE ];
+    struct derived_properties_t * dp = NULL;
+    size_t code_points = 0;
+    size_t properties = 0;
+    const char * code_point_count = "# Total code points: ";
+
+    if ( ( fh = fopen( filename, "r" ) ) == NULL )
+    {
+        fprintf( stderr, "Could not open '%s' for reading.\n", filename );
+        return NULL;
+    }
+
+    if ( ( check_file( fh, LINE_BUFFER_SIZE, ';', sizeof( derived_properties_fields ) / sizeof( int ), derived_properties_fields ) ) != (size_t)-1 )
+    {
+        while ( fgets( buffer, LINE_BUFFER_SIZE, fh ) )
+        {
+            if ( strstr( buffer, code_point_count ) != NULL )
+            {
+                size_t count = strtoul( buffer + strlen( code_point_count ), NULL, 10 );
+
+                if ( ( SIZE_MAX - count ) < code_points )
+                {
+                    fprintf( stderr, "Summing up total code points in '%s' would overflow.\n", filename );
+                    fclose( fh );
+                    return NULL;
+                }
+
+                code_points += count;
+                ++properties;
+            }
+        }
+
+        rewind( fh );
+
+        if ( ( dp = malloc( sizeof( struct derived_properties_t ) ) ) )
+        {
+            dp->count = properties;
+
+            if ( ( dp->name = calloc( properties, sizeof( char * ) ) ) )
+            {
+                if ( ( dp->begin = calloc( properties, sizeof( size_t ) ) ) )
+                {
+                    if ( ( dp->end = calloc( properties, sizeof( size_t ) ) ) )
+                    {
+                        if ( ( dp->code_points = malloc( code_points * sizeof( size_t ) ) ) )
+                        {
+                            char * p;
+                            char * range;
+                            properties = 0; /* Re-using the variable */
+                            code_points = 0; /* Re-using the variable */
+
+                            while ( fgets( buffer, LINE_BUFFER_SIZE, fh ) )
+                            {
+                                /* Remove comments */
+                                if ( ( p = strchr( buffer, '#' ) ) != NULL )
+                                {
+                                    *p = '\0';
+                                }
+
+                                /* > 0 because of newline */
+                                if ( strlen( buffer ) > 1 )
+                                {
+                                    size_t first;
+                                    size_t last;
+
+                                    range = next_token( buffer, ';' );
+                                    p = next_token( NULL, ';' );
+
+                                    if ( ! range || ! p )
+                                    {
+                                        size_t i;
+
+                                        fprintf( stderr, "Parse error, malformed input.\n" );
+
+                                        for ( i = 0; i < properties; ++i )
+                                        {
+                                            free( dp->name[ i ] );
+                                        }
+
+                                        free( dp->name );
+                                        free( dp->begin );
+                                        free( dp->end );
+                                        free( dp->code_points );
+                                        free( dp );
+                                        return NULL;
+                                    }
+
+                                    /* If we got to a new property (except the first) */
+                                    if ( dp->name[ properties ] && strcmp( p, dp->name[ properties ] ) )
+                                    {
+                                        /* Index into ->code_points where the previous property ends */
+                                        dp->end[ properties ] = code_points;
+                                        ++properties;
+                                    }
+
+                                    /* If we got to a new property, even the first */
+                                    if ( dp->name[ properties ] == NULL )
+                                    {
+                                        dp->name[ properties ] = malloc( strlen( p ) + 1 );
+                                        strcpy( dp->name[ properties ], p );
+
+                                        /* Index into ->code_points where this property begins */
+                                        dp->begin[ properties ] = code_points;
+                                    }
+
+                                    /* Re-using p, as we have done everything related to the property
+                                       name at this point.
+                                    */
+                                    first = strtoul( range, &p, 16 );
+
+                                    if ( *p == '\0' )
+                                    {
+                                        last = first;
+                                    }
+                                    else
+                                    {
+                                        while ( *p && ! isxdigit( *p ) )
+                                        {
+                                            ++p;
+                                        }
+
+                                        last = strtoul( p, NULL, 16 );
+
+                                        if ( last <= first )
+                                        {
+                                            size_t i;
+
+                                            fprintf( stderr, "Parse error, malformed input.\n" );
+
+                                            for ( i = 0; i < properties; ++i )
+                                            {
+                                                free( dp->name[ i ] );
+                                            }
+
+                                            free( dp->name );
+                                            free( dp->begin );
+                                            free( dp->end );
+                                            free( dp->code_points );
+                                            free( dp );
+                                            return NULL;
+                                        }
+                                    }
+
+                                    for ( ; first <= last; ++first )
+                                    {
+                                        dp->code_points[ code_points++ ] = first;
+                                    }
+                                }
+                            }
+
+                            /* Have to end the last property as well */
+                            dp->end[ properties ] = code_points;
+                        }
+                        else
+                        {
+                            fprintf( stderr, "Memory allocation failure.\n" );
+                            free( dp->name );
+                            free( dp->begin );
+                            free( dp->end );
+                            free( dp );
+                            dp = NULL;
+                        }
+                    }
+                    else
+                    {
+                        fprintf( stderr, "Memory allocation failure.\n" );
+                        free( dp->name );
+                        free( dp->begin );
+                        free( dp );
+                        dp = NULL;
+                    }
+                }
+                else
+                {
+                    fprintf( stderr, "Memory allocation failure.\n" );
+                    free( dp->name );
+                    free( dp );
+                    dp = NULL;
+                }
+            }
+            else
+            {
+                fprintf( stderr, "Memory allocation failure.\n" );
+                free( dp );
+                dp = NULL;
+            }
+        }
+        else
+        {
+            fprintf( stderr, "Memory allocation failure.\n" );
+        }
+    }
+
+    fclose( fh );
+    return dp;
+}
+
+static int comp( const void * l, const void *  r )
+{
+    const size_t * lhs = l;
+    const size_t * rhs = r;
+
+    return ( *lhs < *rhs ) ? -1 : ( *lhs > *rhs ) ? 1 : 0;
+}
+
+int lookup_property( struct derived_properties_t * dp, const char * property, size_t codepoint )
+{
+    size_t i;
+
+    for ( i = 0; i < dp->count; ++i )
+    {
+        /* Look for the requested property */
+        if ( strcmp( dp->name[ i ], property ) == 0 )
+        {
+            size_t cp = dp->begin[ i ];
+
+            return bsearch( &codepoint, dp->code_points + cp, dp->end[ i ] - cp, sizeof( size_t ), comp ) != NULL;
+        }
+    }
+
+    return 0;
+}
+
+void release_derived_properties( struct derived_properties_t * dp )
+{
+    size_t i;
+
+    for ( i = 0; i < dp->count; ++i )
+    {
+        free( dp->name[ i ] );
+    }
+
+    free( dp->name );
+    free( dp->begin );
+    free( dp->end );
+    free( dp->code_points );
+    free( dp );
+}
+
+#ifdef TEST
+
+#include "test.h"
+
+int main( void )
+{
+    FILE * fh = fopen( "test.txt", "wb+" );
+    struct derived_properties_t * dp;
+
+    TESTCASE( fh != NULL );
+    TESTCASE( fprintf( fh, "0000..0006 ; Test1 \n" ) == 20 );
+    TESTCASE( fprintf( fh, "# Total code points: 7\n" ) == 23 );
+    TESTCASE( fprintf( fh, "0001;Test2\n" ) == 11 );
+    TESTCASE( fprintf( fh, "# Total code points: 1\n" ) == 23 );
+
+    fclose( fh );
+    dp = read_derived_properties( "test.txt" );
+
+    TESTCASE( dp != NULL );
+    TESTCASE( dp->count == 2 );
+    TESTCASE( ! strcmp( dp->name[0], "Test1" ) );
+    TESTCASE( ! strcmp( dp->name[1], "Test2" ) );
+
+    TESTCASE( lookup_property( dp, "Test1", 0 ) );
+    TESTCASE( lookup_property( dp, "Test1", 6 ) );
+    TESTCASE( ! lookup_property( dp, "Test1", 7 ) );
+
+    TESTCASE( ! lookup_property( dp, "Test2", 0 ) );
+    TESTCASE( lookup_property( dp, "Test2", 1 ) );
+    TESTCASE( ! lookup_property( dp, "Test2", 2 ) );
+
+    TESTCASE( ! lookup_property( dp, "Test", 0 ) );
+    TESTCASE( ! lookup_property( dp, "Test3", 0 ) );
+
+    release_derived_properties( dp );
+    remove( "test.txt" );
+
+    return TEST_RESULTS;
+}
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/derived_properties.h b/src/pdclib/auxiliary/uctype/derived_properties.h
new file mode 100644
index 0000000..d06ac84
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/derived_properties.h
@@ -0,0 +1,34 @@
+/* derived properties
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#ifndef DERIVED_PROPERTIES
+#define DERIVED_PROPERTIES DERIVED_PROPERTIES
+
+#include <stddef.h>
+
+/* https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt */
+
+struct derived_properties_t
+{
+    size_t count;
+    char * * name;
+    size_t * begin;
+    size_t * end;
+    size_t * code_points;
+};
+
+static const int derived_properties_fields[] = {
+    -1, /* code point or code point range */
+    -1  /* property name */
+};
+
+struct derived_properties_t * read_derived_properties( const char * filename );
+
+int lookup_property( struct derived_properties_t * dp, const char * property, size_t codepoint );
+
+void release_derived_properties( struct derived_properties_t * dp );
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/main.c b/src/pdclib/auxiliary/uctype/main.c
new file mode 100644
index 0000000..ef60bb4
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/main.c
@@ -0,0 +1,300 @@
+/* main
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef TEST
+#include <wctype.h>
+#endif
+
+#include "uctype.h"
+
+/* RLE Compressed Output
+
+   <wctype.h> requires *11* flags:
+   iswupper, iswlower, iswalpha, iswdigit, iswblank, iswspace,
+   iswcntrl, iswxdigit, iswgraph, iswprint.
+   iswalnum (the 12th classification function) is *defined* as
+   iswalpha || iswdigit. And iswdigit and iswxdigit are defined
+   in a rather restrictive way that can be expressed by simple
+   ranges instead of lookup tables. And isgraph is defined as
+   isprint && ! isspace (which is trivial to check that it holds
+   true for all the records provided by get-unicode-ctype, at
+   least up to Unicode 11.0).
+   So we have only 8 flags we actually need in a lookup... nicely
+   reducing the storage requirement to an unsigned char.
+
+   Another trick is to express toupper / tolower as offsets
+   instead of absolute values, which will allow run-time-length
+   compression of the data.
+*/
+
+struct output_record_t
+{
+    size_t codepoint;
+    int toupper_diff;
+    int tolower_diff;
+    unsigned char flags;
+};
+
+#ifdef TEST
+static void print_codepoint_age( size_t codepoint, struct derived_properties_t * age )
+{
+    size_t index = age->count;
+
+    while ( index )
+    {
+        --index;
+
+        if ( lookup_property( age, age->name[ index ], codepoint ) )
+        {
+            printf( "%s", age->name[ index ] );
+            return;
+        }
+    }
+}
+
+static void print_additional_codepoint_info( size_t codepoint, struct unicode_record_t * ur )
+{
+    printf( " - %s",      ur->name );
+    printf( " - %s",      ur->general_category );
+    printf( " - %d",      ur->canonical_combining_class );
+    printf( " - %s",      ur->bidi_class );
+    printf( " - %s",      ( ur->decomposition ? ur->decomposition : "NULL" ) );
+    printf( " - %d",      ur->numeric_type );
+    printf( " - %d",      ur->numeric_digit );
+    printf( " - %s",      ( ur->numeric_value ? ur->numeric_value : "NULL" ) );
+    printf( " - %c",      ur->bidi_mirrored );
+    printf( " - U+%06zx", ur->simple_uppercase_mapping );
+    printf( " - U+%06zx", ur->simple_lowercase_mapping );
+    printf( " - U+%06zx", ur->simple_titlecase_mapping );
+    printf( " - " );
+
+    /* Implementations are at liberty to return non-zero values other
+       than 1 for "true".
+    */
+    printf( "%d", ( iswupper( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswlower( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswalpha( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswdigit( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswblank( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswspace( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswcntrl( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswxdigit( codepoint ) ) ? 1 : 0 );
+    printf( "%d", ( iswgraph( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswprint( codepoint ) )  ? 1 : 0 );
+    printf( "%d", ( iswpunct( codepoint ) )  ? 1 : 0 );
+}
+
+static void print_codepoint_info( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core, struct derived_properties_t * age )
+{
+    int rc;
+    int equal = 1;
+
+    if ( codepoint % 20 == 0 )
+    {
+        printf( "   cp      up       low    UlA0_WCXGP.\n" );
+    }
+
+    printf( "U+%06zX ", codepoint );
+    rc = get_towupper( codepoint, ur );       equal &= ( (unsigned)rc == towupper( codepoint ) ); printf( "U+%06X ", rc );
+    rc = get_towlower( codepoint, ur );       equal &= ( (unsigned)rc == towlower( codepoint ) ); printf( "U+%06X ", rc );
+    rc = get_iswupper( codepoint, ur, core ); equal &= ( iswupper( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswlower( codepoint, ur, core ); equal &= ( iswlower( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswalpha( codepoint, ur, core ); equal &= ( iswalpha( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswdigit( codepoint );           equal &= ( iswdigit( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswblank( codepoint, ur );       equal &= ( iswblank( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswspace( codepoint, ur );       equal &= ( iswspace( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswcntrl( codepoint, ur );       equal &= ( iswcntrl( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswxdigit( codepoint );          equal &= ( iswxdigit( codepoint ) ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswgraph( codepoint, ur );       equal &= ( iswgraph( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswprint( codepoint, ur );       equal &= ( iswprint( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+    rc = get_iswpunct( codepoint, ur, core ); equal &= ( iswpunct( codepoint )  ? 1 : 0 == rc );  printf( "%d", rc ? 1 : 0 );
+
+    if ( codepoint != ur->code_point )
+    {
+        /* These two may only differ for codepoint "ranges", which are
+           signified by "..., First>" / "..., Last>" pairs in UnicodeData.
+           If they differ and it's NOT a range, that is an error of some
+           kind.
+        */
+        if ( ! strstr( ur->name, ", Last>" ) || codepoint < ( ur - 1 )->code_point )
+        {
+            printf( " ERROR: U+%06zX != U+%06zX outside of First, Last codepoint range.  ", codepoint, ur->code_point );
+        }
+    }
+
+    if ( ! equal )
+    {
+        printf( " ERROR: Deviation from SysLib: " );
+        print_codepoint_age( codepoint, age );
+        print_additional_codepoint_info( codepoint, ur );
+    }
+
+    printf( "\n" );
+}
+#else
+static struct output_record_t get_output_record( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
+{
+    struct output_record_t rc;
+    char buffer[ 9 ];
+
+    rc.codepoint = codepoint;
+    rc.toupper_diff = get_towupper( codepoint, ur ) - codepoint;
+    rc.tolower_diff = get_towlower( codepoint, ur ) - codepoint;
+
+    sprintf( buffer, "%zu%zu%zu%zu%zu%zu%zu%zu",
+             get_iswupper( codepoint, ur, core ),
+             get_iswlower( codepoint, ur, core ),
+             get_iswalpha( codepoint, ur, core ),
+             get_iswblank( codepoint, ur ),
+             get_iswspace( codepoint, ur ),
+             get_iswcntrl( codepoint, ur ),
+             get_iswprint( codepoint, ur ),
+             get_iswpunct( codepoint, ur, core ) );
+
+    rc.flags = strtoul( buffer, NULL, 2 );
+
+    return rc;
+}
+#endif
+
+int main( int argc, char * argv[] )
+{
+    struct unicode_data_t * ud;
+    struct derived_properties_t * core;
+#ifdef TEST
+    struct derived_properties_t * age;
+#endif
+
+    char * locale = setlocale( LC_CTYPE, "" );
+
+    if ( ! strstr( locale, "UTF-8" ) || strstr( locale, "TR" ) || strstr( locale, "tr" ) )
+    {
+        fprintf( stderr, "Need non-turkish locale to work correctly.\n'%s' will not do.\n", locale );
+        return EXIT_FAILURE;
+    }
+
+    if ( argc != 4 )
+    {
+        printf( "\n"
+                "Usage: get-uctypes <UnicodeData.txt> <DerivedCoreProperties.txt>"
+#ifdef TEST
+                " <DerivedAge.txt>"
+#endif
+                "\n\n"
+                "Generates lookup tables for <wctype.h> from files available from\n"
+                "the Unicode Consortium.\n"
+                "\n"
+                "The required files can be retrieved from the following URL:\n"
+                "\n"
+                "http://www.unicode.org/Public/UCD/latest/ucd/\n"
+                "\n" );
+        return EXIT_FAILURE;
+    }
+
+    if ( ( ud = read_unicode_data( argv[ 1 ] ) ) != NULL )
+    {
+        if ( ( core = read_derived_properties( argv[ 2 ] ) ) != NULL )
+        {
+#ifndef TEST
+            /* Print (to file) RLE compressed data */
+            FILE * fh = fopen( "ctype.dat", "wb" );
+
+            if ( fh )
+            {
+                size_t codepoint = 0;
+                size_t i = 0;
+                struct unicode_record_t * ur = &(ud->records[i]);
+                /* Name substring indicating a code point _range_ */
+                const char * last = ", Last>";
+
+                struct output_record_t previous = get_output_record( codepoint, ur, core );
+
+                fprintf( fh, "%zx ", previous.codepoint );
+
+                for ( codepoint = 1; codepoint < 0x10fffe; ++codepoint )
+                {
+                    struct output_record_t current;
+
+                    while ( codepoint > ur->code_point )
+                    {
+                        ur = &(ud->records[++i]);
+                    }
+
+                    if ( codepoint != ur->code_point && ( ur->name && ( strstr( ur->name, last ) != ( ur->name + strlen( ur->name ) - strlen( last ) ) ) ) )
+                    {
+                        /* Unregistered Code Point */
+                        continue;
+                    }
+
+                    current = get_output_record( codepoint, ur, core );
+
+                    /* RLE */
+                    if ( current.codepoint != previous.codepoint + 1 ||
+                         current.toupper_diff != previous.toupper_diff ||
+                         current.tolower_diff != previous.tolower_diff ||
+                         current.flags != previous.flags )
+                    {
+                        fprintf( fh, "%zx %d %d %hhx\n", previous.codepoint, previous.toupper_diff, previous.tolower_diff, previous.flags );
+                        fprintf( fh, "%zx ", current.codepoint );
+                    }
+
+                    previous = current;
+                }
+
+                fprintf( fh, "%zx %d %d %hhx\n", previous.codepoint, previous.toupper_diff, previous.tolower_diff, previous.flags );
+                fclose( fh );
+            }
+            else
+            {
+                fprintf( stderr, "Could not open 'ctype.dat' for writing.\n" );
+            }
+#else
+            if ( ( age = read_derived_properties( argv[ 3 ] ) ) != NULL )
+            {
+                /* Print (to screen) raw data comparing our results
+                   to the system library.
+                   Differences are often because the system library
+                   uses older data, which is why we add the age to
+                   the output.
+                */
+                size_t codepoint = 0;
+                size_t i = 0;
+                struct unicode_record_t * ur = &(ud->records[i]);
+                /* Name substring indicating a code point _range_ */
+                const char * last = ", Last>";
+
+                for ( codepoint = 0; codepoint < 0x10fffe; ++codepoint )
+                {
+                    while ( codepoint > ur->code_point )
+                    {
+                        ur = &(ud->records[++i]);
+                    }
+
+                    if ( codepoint != ur->code_point && ! name_ends_with( ur, last ) )
+                    {
+                        /* Unregistered Code Point */
+                        continue;
+                    }
+
+                    print_codepoint_info( codepoint, ur, core, age );
+                }
+
+                release_derived_properties( age );
+            }
+#endif
+
+            release_derived_properties( core );
+        }
+
+        release_unicode_data( ud );
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/src/pdclib/auxiliary/uctype/test.h b/src/pdclib/auxiliary/uctype/test.h
new file mode 100644
index 0000000..3cd33a8
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/test.h
@@ -0,0 +1,19 @@
+/* test
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#ifndef TEST_H
+#define TEST_H TEST_H
+
+#include <stdio.h>
+
+#define NO_TESTDRIVER 0
+
+static int TEST_RESULTS = 0;
+
+#define TESTCASE( x ) if ( x ) {} \
+                      else { TEST_RESULTS += 1; printf( "FAILED: " __FILE__ ", line %d - %s\n", __LINE__, #x ); }
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/text_utilities.c b/src/pdclib/auxiliary/uctype/text_utilities.c
new file mode 100644
index 0000000..20973d9
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/text_utilities.c
@@ -0,0 +1,206 @@
+/* text utilities
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#include "text_utilities.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+char * trim( char * s )
+{
+    char * p;
+
+    /* Skip over leading whitespace */
+    while ( *s && isspace( *s ) )
+    {
+        ++s;
+    }
+
+    /* Trim trailing whitespace */
+    p = s;
+
+    while ( *p )
+    {
+        ++p;
+    }
+
+    while ( isspace( *(--p) ) )
+    {
+        *p = '\0';
+    }
+
+    return s;
+}
+
+char * next_token( char * s, char delim )
+{
+    static char * p = NULL;
+    char * rc;
+
+    if ( s != NULL )
+    {
+        /* Re-init the to-be-tokenized string */
+        p = s;
+    }
+
+    /* Remembering the start of the next token */
+    rc = p;
+
+    /* In case the function has not been initialized, or the previous
+       string been exhaused, do nothing.
+    */
+    if ( p )
+    {
+        /* Re-using s here */
+        if ( ( s = strchr( p, delim ) ) )
+        {
+            /* Null the delimiter */
+            *s = '\0';
+            /* Make the internal, static pointer point to the next token */
+            p = s + 1;
+        }
+        else
+        {
+            /* Delimiter not found, end-of-string reached. */
+            p = NULL;
+        }
+
+        /* Trim the result */
+        rc = trim( rc );
+    }
+
+    return rc;
+}
+
+size_t check_file( FILE * fh, size_t buffer_size, char delim, size_t fields, int const * widths )
+{
+    /* Dynamically allocated buffer */
+    char * buffer = malloc( buffer_size );
+    size_t lines = 0;
+
+    rewind( fh );
+
+    while ( fgets( buffer, buffer_size, fh ) )
+    {
+        size_t i;
+        char * p;
+
+        ++lines;
+
+        /* Check line for complete read */
+        if ( buffer[ strlen( buffer ) - 1 ] != '\n' )
+        {
+            fprintf( stderr, "Line %zu will not fit into a %zu-character buffer.\n", lines, buffer_size );
+            rewind( fh );
+            free( buffer );
+            return -1;
+        }
+
+        /* Remove comments */
+        if ( ( p = strchr( buffer, '#' ) ) != NULL )
+        {
+            *p = '\0';
+        }
+
+        /* > 1 because of newline */
+        if ( strlen( buffer ) > 1 )
+        {
+            /* Check field count and field widths */
+            p = next_token( buffer, delim );
+
+            for ( i = 0; i < fields; ++i )
+            {
+                if ( ! p )
+                {
+                    fprintf( stderr, "Line %zu contains less than %zu fields.\n", lines, fields );
+                    rewind( fh );
+                    free( buffer );
+                    return -1;
+                }
+
+                if ( widths[ i ] >= 0 && strlen( p ) >= (unsigned)widths[ i ] )
+                {
+                    fprintf( stderr, "Line %zu: Field %zu '%s' will not fit in a %d character string.\n", lines, i + 1, p, widths[ i ] );
+                    rewind( fh );
+                    free( buffer );
+                    return -1;
+                }
+
+                p = next_token( NULL, delim );
+            }
+
+            if ( p )
+            {
+                fprintf( stderr, "Line %zu contains more than %zu fields.\n", lines, fields );
+                rewind( fh );
+                free( buffer );
+                return -1;
+            }
+        }
+    }
+
+    /* Rewind, free the buffer, and report the number of lines */
+    rewind( fh );
+    free( buffer );
+    return lines;
+}
+
+#ifdef TEST
+
+#include "test.h"
+
+int main( void )
+{
+    FILE * fh = fopen( "test.txt", "wb+" );
+    int widths[] = { 4, 4, 4 };
+    char buffer[ 500 ];
+
+    /* check_file() (and as dependency, next_token() */
+
+    /* All ok */
+    TESTCASE( fprintf( fh, "%s;%s;%s\n", "1", "123", "12" ) == 9 );
+    TESTCASE( fprintf( fh, ";;\n" ) == 3 );
+    TESTCASE( check_file( fh, 10, ';', 3, widths ) == 2 );
+    /* Field 1 too long */
+    TESTCASE( fprintf( fh, "%s;%s;%s\n", "", "1234", "1" ) == 8 );
+    TESTCASE( check_file( fh, 10, ';', 3, widths ) == (size_t)-1 );
+    /* Too few fields */
+    TESTCASE( fprintf( fh, "%s;%s\n", "123", "123" ) == 8 );
+    TESTCASE( check_file( fh, 10, ';', 3, widths )== (size_t)-1 );
+    /* Too many fields */
+    TESTCASE( fprintf( fh, "%s;%s;%s;%s\n", "1", "1", "1", "1" ) == 8 );
+    TESTCASE( check_file( fh, 10, ';', 3, widths )== (size_t)-1 );
+    /* Line too long */
+    TESTCASE( fprintf( fh, "%s;%s;%s\n", "12", "123", "12" ) == 10 );
+    TESTCASE( check_file( fh, 10, ';', 3, widths )== (size_t)-1 );
+
+    fclose( fh );
+    remove( "test.txt" );
+
+    /* trim() */
+
+    strcpy( buffer, "  xyz" );
+    TESTCASE( ! strcmp( trim( buffer ), "xyz" ) );
+    strcpy( buffer, "xyz  " );
+    TESTCASE( ! strcmp( trim( buffer ), "xyz" ) );
+    strcpy( buffer, "  xyz  " );
+    TESTCASE( ! strcmp( trim( buffer ), "xyz" ) );
+    strcpy( buffer, "  x" );
+    TESTCASE( ! strcmp( trim( buffer ), "x" ) );
+    strcpy( buffer, "x  " );
+    TESTCASE( ! strcmp( trim( buffer ), "x" ) );
+    strcpy( buffer, " " );
+    TESTCASE( ! strcmp( trim( buffer ), "" ) );
+    strcpy( buffer, "  " );
+    TESTCASE( ! strcmp( trim( buffer ), "" ) );
+    strcpy( buffer, "" );
+    TESTCASE( ! strcmp( trim( buffer ), "" ) );
+
+    return TEST_RESULTS;
+}
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/text_utilities.h b/src/pdclib/auxiliary/uctype/text_utilities.h
new file mode 100644
index 0000000..f961e6b
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/text_utilities.h
@@ -0,0 +1,59 @@
+/* text utilities
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#ifndef TEXT_UTILITIES_H
+#define TEXT_UTILITIES_H TEXT_UTILITIES_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+/* Trim leading and trailing whitespace from a given string.
+   Will return a pointer beyond leading whitespace, and overwrite trailing
+   whitespace with null bytes.
+*/
+char * trim( char * s );
+
+/* A function similar to strtok(), that returns the next token in a string,
+   up to the next separator character (which is replaced with a null byte)
+   or up to end-of-string.
+   As opposed to strtok(), which treats consecutive separators as one, this
+   function will work "correctly" for those as well, returning a (pointer
+   to an) empty string in those cases.
+   Pass the string as first parameter IN THE FIRST CALL ONLY, and NULL in
+   subsequent calls. The function holds an internal, static pointer to the
+   string being processed. This, of course, means the function is not thread-
+   safe.
+*/
+char * next_token( char * s, char delim );
+
+/* When processing a file with delimited-values, there are a couple of things
+   you want to be sure about before parsing it:
+   - the number of lines (data records) in the file;
+   - that all lines of the file will fit the intended line buffer size;
+   - that all records in the file indeed have the expected number of fields;
+   - that none of the fields for which you are assuming a given size exceeds
+     that size.
+   (For line buffer size, consider that the buffer must be large enough for
+   the line contents, the newline (to check that the line was actually read
+   in full), and the null terminator.)
+   This function does all that for you in a single pass. The parameters are:
+   - FILE handle to the file (function will rewind the file before checking,
+     and rewind again when it is done);
+   - the intended line buffer size;
+   - the field delimiter;
+   - the expected number of fields;
+   - a pointer to an array holding the expected maximum width for each field,
+     with a negative value indicating that this field's width need not be
+     checked.
+   The function will return the number of lines in the file, or (size_t)-1
+   if one of the checks failed. The reason for the failed check will be
+   written to stderr. (The file will not be rewound in this case.)
+   This requires reading and tokenizing the file twice, but removes lots of
+   error checking from the actual parsing, making for cleaner code.
+*/
+size_t check_file( FILE * fh, size_t max_line_length, char delim, size_t fields, int const * widths );
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/uctype.c b/src/pdclib/auxiliary/uctype/uctype.c
new file mode 100644
index 0000000..ce8d8ef
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/uctype.c
@@ -0,0 +1,85 @@
+/* uctype
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#include "uctype.h"
+
+size_t get_towupper( size_t codepoint, struct unicode_record_t * ur )
+{
+    return towupper_differs( ur, codepoint ) ? ur->simple_uppercase_mapping : codepoint;
+}
+
+size_t get_towlower( size_t codepoint, struct unicode_record_t * ur )
+{
+    return towlower_differs( ur, codepoint ) ? ur->simple_lowercase_mapping : codepoint;
+}
+
+size_t get_iswupper( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
+{
+    return towlower_differs( ur, codepoint ) || lookup_property( core, "Uppercase", codepoint );
+}
+
+size_t get_iswlower( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
+{
+    return towupper_differs( ur, codepoint ) || lookup_property( core, "Lowercase", codepoint );
+}
+
+size_t get_iswalpha( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
+{
+    return lookup_property( core, "Alphabetic", codepoint ) || ( is_general_category( ur, "Nd" ) && ! get_iswdigit( codepoint ) );
+}
+
+size_t get_iswdigit( size_t codepoint )
+{
+    return codepoint >= 0x0030 && codepoint <= 0x0039;
+}
+
+size_t get_iswxdigit( size_t codepoint )
+{
+    return get_iswdigit( codepoint ) || ( codepoint >= 0x0041 && codepoint <= 0x0046 ) || ( codepoint >= 0x0061 && codepoint <= 0x0066 );
+}
+
+size_t get_iswblank( size_t codepoint, struct unicode_record_t * ur )
+{
+    return ( codepoint == 0x0009 ) || ( is_general_category( ur, "Zs" ) && ! decomposition_contains( ur, "<noBreak>" ) );
+}
+
+size_t get_iswspace( size_t codepoint, struct unicode_record_t * ur )
+{
+    return is_general_category( ur, "Zl" ) || is_general_category( ur, "Zp" ) || ( is_general_category( ur, "Zs" ) && ! decomposition_contains( ur, "<noBreak>" ) ) || ( codepoint == 0x0020 ) || ( codepoint >= 0x0009 && codepoint <= 0x000D );
+}
+
+size_t get_iswcntrl( size_t codepoint, struct unicode_record_t * ur )
+{
+    return is_general_category( ur, "Zl" ) || is_general_category( ur, "Zp" ) || has_name( ur, "<control>" );
+}
+
+size_t get_iswgraph( size_t codepoint, struct unicode_record_t * ur )
+{
+    return ! is_general_category( ur, "Cs" ) && ! has_name( ur, "<control>" ) && ! get_iswspace( codepoint, ur );
+}
+
+size_t get_iswprint( size_t codepoint, struct unicode_record_t * ur )
+{
+    return ! is_general_category( ur, "Zp" ) && ! is_general_category( ur, "Zl" ) && ! is_general_category( ur, "Cs" ) && ! has_name( ur, "<control>" );
+}
+
+size_t get_iswpunct( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
+{
+    return ! get_iswalpha( codepoint, ur, core ) && ! get_iswdigit( codepoint ) && ( ! has_name( ur, "<control>" ) && ! get_iswspace( codepoint, ur ) ) && ! is_general_category( ur, "Cs" );
+}
+
+#ifdef TEST
+
+#include "test.h"
+
+int main( void )
+{
+    TESTCASE( NO_TESTDRIVER );
+
+    return TEST_RESULTS;
+}
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/uctype.h b/src/pdclib/auxiliary/uctype/uctype.h
new file mode 100644
index 0000000..8cdda43
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/uctype.h
@@ -0,0 +1,29 @@
+/* uctype data
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#ifndef UCTYPE
+#define UCTYPE
+
+#include "derived_properties.h"
+#include "unicode_data.h"
+
+size_t get_towupper( size_t codepoint, struct unicode_record_t * ur );
+size_t get_towlower( size_t codepoint, struct unicode_record_t * ur );
+size_t get_iswupper( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
+size_t get_iswlower( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
+size_t get_iswalpha( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
+size_t get_iswdigit( size_t codepoint );
+size_t get_iswxdigit( size_t codepoint );
+size_t get_iswblank( size_t codepoint, struct unicode_record_t * ur );
+size_t get_iswspace( size_t codepoint, struct unicode_record_t * ur );
+size_t get_iswcntrl( size_t codepoint, struct unicode_record_t * ur );
+size_t get_iswgraph( size_t codepoint, struct unicode_record_t * ur );
+size_t get_iswprint( size_t codepoint, struct unicode_record_t * ur );
+size_t get_iswpunct( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
+
+
+#endif
+
diff --git a/src/pdclib/auxiliary/uctype/unicode_data.c b/src/pdclib/auxiliary/uctype/unicode_data.c
new file mode 100644
index 0000000..5d92fda
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/unicode_data.c
@@ -0,0 +1,224 @@
+/* unicode data
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "text_utilities.h"
+
+#include "unicode_data.h"
+
+#define LINE_BUFFER_SIZE 500u
+
+struct unicode_data_t * read_unicode_data( const char * filename )
+{
+    FILE * fh;
+    char buffer[ LINE_BUFFER_SIZE ];
+    struct unicode_data_t * ud = NULL;
+    size_t lines;
+
+    if ( ( fh = fopen( filename, "r" ) ) == NULL )
+    {
+        fprintf( stderr, "Could not open '%s' for reading.\n", filename );
+        return NULL;
+    }
+
+    if ( ( lines = check_file( fh, LINE_BUFFER_SIZE, ';', sizeof( unicode_record_fields ) / sizeof( int ), unicode_record_fields ) ) != (size_t)-1 )
+    {
+        if ( ( ud = malloc( sizeof( struct unicode_data_t ) ) ) )
+        {
+            ud->size = lines;
+
+            if ( ( ud->records = calloc( lines, sizeof( struct unicode_record_t ) ) ) )
+            {
+                size_t i;
+
+                for ( i = 0; i < lines; ++i )
+                {
+                    char *  p;
+
+                    fgets( buffer, LINE_BUFFER_SIZE, fh );
+
+                    ud->records[ i ].code_point = strtoul( next_token( buffer, ';' ), NULL, 16 );
+
+                    p = next_token( NULL, ';' );
+                    if ( *p )
+                    {
+                        ud->records[ i ].name = malloc( strlen( p ) + 1 );
+                        strcpy( ud->records[ i ].name, p );
+                    }
+
+                    strcpy( ud->records[ i ].general_category, next_token( NULL, ';' ) );
+
+                    p = next_token( NULL, ';' );
+                    ud->records[ i ].canonical_combining_class = ( *p ) ? strtol( p, NULL, 10 ) : -1l;
+
+                    strcpy( ud->records[ i ].bidi_class, next_token( NULL, ';' ) );
+
+                    p = next_token( NULL, ';' );
+                    if ( *p )
+                    {
+                        ud->records[ i ].decomposition = malloc( strlen( p ) + 1 );
+                        strcpy( ud->records[ i ].decomposition, p );
+                    }
+
+                    p = next_token( NULL, ';' );
+                    ud->records[ i ].numeric_type = ( *p ) ? strtol( p, NULL, 10 ) : -1l;
+
+                    p = next_token( NULL, ';' );
+                    ud->records[ i ].numeric_digit = ( *p ) ? strtol( p, NULL, 10 ) : -1l;
+
+                    p = next_token( NULL, ';' );
+                    if ( *p )
+                    {
+                        ud->records[ i ].numeric_value = malloc( strlen( p ) + 1 );
+                        strcpy( ud->records[ i ].numeric_value, p );
+                    }
+
+                    p = next_token( NULL, ';' );
+                    ud->records[ i ].bidi_mirrored = ( *p ) ? *p : '\0';
+
+                    next_token( NULL, ';' ); /* Unicode_1_Name */
+                    next_token( NULL, ';' ); /* ISO_Comment */
+
+                    ud->records[ i ].simple_uppercase_mapping = strtoul( next_token( NULL, ';' ), NULL, 16 );
+                    ud->records[ i ].simple_lowercase_mapping = strtoul( next_token( NULL, ';' ), NULL, 16 );
+                    ud->records[ i ].simple_titlecase_mapping = strtoul( next_token( NULL, ';' ), NULL, 16 );
+                }
+            }
+            else
+            {
+                fprintf( stderr, "Memory allocation failure.\n" );
+                free( ud );
+                ud = NULL;
+            }
+        }
+        else
+        {
+            fprintf( stderr, "Memory allocation failure.\n" );
+        }
+    }
+
+    fclose( fh );
+    return ud;
+}
+
+int has_name( struct unicode_record_t * ur, const char * name )
+{
+    return strcmp( ur->name, name ) == 0;
+}
+
+int name_ends_with( struct unicode_record_t * ur, const char * name )
+{
+    return strstr( ur->name, name ) == ( ur->name + ( strlen( ur->name ) - strlen( name ) ) );
+}
+
+int is_general_category( struct unicode_record_t * ur, const char * category )
+{
+    return strcmp( ur->general_category, category ) == 0;
+}
+
+int decomposition_contains( struct unicode_record_t * ur, const char * substring )
+{
+    return ur->decomposition && strstr( ur->decomposition, substring ) != NULL;
+}
+
+int towupper_differs( struct unicode_record_t * ur, size_t codepoint )
+{
+    return ur->simple_uppercase_mapping && ( ur->simple_uppercase_mapping != codepoint );
+}
+
+int towlower_differs( struct unicode_record_t * ur, size_t codepoint )
+{
+    return ur->simple_lowercase_mapping && ( ur->simple_lowercase_mapping != codepoint );
+}
+
+void release_unicode_data( struct unicode_data_t * ud )
+{
+    size_t i;
+
+    for ( i = 0; i < ud->size; ++i )
+    {
+        free( ud->records[i].name );
+        free( ud->records[i].decomposition );
+        free( ud->records[i].numeric_value );
+    }
+
+    free( ud->records );
+    free( ud );
+}
+
+#ifdef TEST
+
+#include "test.h"
+
+#include <inttypes.h>
+
+int main( void )
+{
+    FILE * fh = fopen( "test.txt", "w" );
+    struct unicode_data_t * ud;
+    int rc;
+
+    TESTCASE( fh != NULL );
+    TESTCASE( fprintf( fh, "%04x;%s;%s;%d;%s;;;;;%c;%s;;;;\n", 0, "<control>", "Cc", 0, "BN", 'N', "NULL" ) == 38 );
+    TESTCASE( ( rc = fprintf( fh, "%04x;%s;%s;%d;%s;%s;;;%s;%c;;;%04x;;%04x\n", 0x2170, "SMALL ROMAN NUMERAL ONE", "Nl", 0, "L", "<compat> 0069", "1", 'N', 0x2160, 0x2160 ) ) == 69 );
+
+    fclose( fh );
+    ud = read_unicode_data( "test.txt" );
+    remove( "test.txt" );
+
+    TESTCASE( ud != NULL );
+    TESTCASE( ud->size == 2 );
+
+    TESTCASE( ud->records[0].code_point == 0 );
+    TESTCASE( strcmp( ud->records[0].name, "<control>" ) == 0 );
+    TESTCASE( strcmp( ud->records[0].general_category, "Cc" ) == 0 );
+    TESTCASE( ud->records[0].canonical_combining_class == 0 );
+    TESTCASE( strcmp( ud->records[0].bidi_class, "BN" ) == 0 );
+    TESTCASE( ud->records[0].decomposition == NULL );
+    TESTCASE( ud->records[0].numeric_type == -1 );
+    TESTCASE( ud->records[0].numeric_digit == -1 );
+    TESTCASE( ud->records[0].numeric_value == NULL );
+    TESTCASE( ud->records[0].bidi_mirrored == 'N' );
+    TESTCASE( ud->records[0].simple_uppercase_mapping == 0 );
+    TESTCASE( ud->records[0].simple_lowercase_mapping == 0 );
+    TESTCASE( ud->records[0].simple_titlecase_mapping == 0 );
+
+    TESTCASE( ud->records[1].code_point == 0x2170 );
+    TESTCASE( strcmp( ud->records[1].name, "SMALL ROMAN NUMERAL ONE" ) == 0 );
+    TESTCASE( strcmp( ud->records[1].general_category, "Nl" ) == 0 );
+    TESTCASE( ud->records[1].canonical_combining_class == 0 );
+    TESTCASE( strcmp( ud->records[1].bidi_class, "L" ) == 0 );
+    TESTCASE( strcmp( ud->records[1].decomposition, "<compat> 0069" ) == 0 );
+    TESTCASE( ud->records[1].numeric_type == -1 );
+    TESTCASE( ud->records[1].numeric_digit == -1 );
+    TESTCASE( strcmp( ud->records[1].numeric_value, "1" ) == 0 );
+    TESTCASE( ud->records[1].bidi_mirrored == 'N' );
+    TESTCASE( ud->records[1].simple_uppercase_mapping == 0x2160 );
+    TESTCASE( ud->records[1].simple_lowercase_mapping == 0 );
+    TESTCASE( ud->records[1].simple_titlecase_mapping == 0x2160 );
+
+    TESTCASE( is_general_category( &(ud->records[0]), "Cc" ) );
+    TESTCASE( ! is_general_category( &(ud->records[0]), "" ) );
+    TESTCASE( is_general_category( &(ud->records[1]), "Nl" ) );
+    TESTCASE( ! is_general_category( &(ud->records[1]), "Foo" ) );
+
+    TESTCASE( decomposition_contains( &(ud->records[1]), "<compat>" ) );
+    TESTCASE( ! decomposition_contains( &(ud->records[1]), "Foo" ) );
+
+    TESTCASE( ! towupper_differs( &(ud->records[0]), 0 ) );
+    TESTCASE( ! towlower_differs( &(ud->records[0]), 0 ) );
+    TESTCASE( towupper_differs( &(ud->records[1]), 0x2170 ) );
+    TESTCASE( ! towlower_differs( &(ud->records[1]), 0x2170 ) );
+
+    release_unicode_data( ud );
+
+    return TEST_RESULTS;
+}
+
+#endif
diff --git a/src/pdclib/auxiliary/uctype/unicode_data.h b/src/pdclib/auxiliary/uctype/unicode_data.h
new file mode 100644
index 0000000..8cd4832
--- /dev/null
+++ b/src/pdclib/auxiliary/uctype/unicode_data.h
@@ -0,0 +1,77 @@
+/* unicode data
+
+   This file is part of the Public Domain C Library (PDCLib).
+   Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#ifndef UNICODE_DATA
+#define UNICODE_DATA UNICODE_DATA
+
+#include <stddef.h>
+
+/* https://www.unicode.org/reports/tr44/#UnicodeData.txt */
+
+/* We do not need all these fields at this point, but we read them anyway
+   so we do not need to change much should the need arise later.
+*/
+struct unicode_record_t
+{
+    size_t code_point;
+    char * name;
+    char general_category[ 3 ];
+    int canonical_combining_class;
+    char bidi_class[ 4 ];
+    char * decomposition;
+    int numeric_type;
+    int numeric_digit;
+    char * numeric_value;
+    char bidi_mirrored;
+    /*char * unicode_1_name;*/ /* Obsolete as of 6.2.0 */
+    /*char * iso_comment;*/ /* Obsoöete as of 5.2.0 */
+    size_t simple_uppercase_mapping;
+    size_t simple_lowercase_mapping;
+    size_t simple_titlecase_mapping;
+};
+
+struct unicode_data_t
+{
+    size_t size;
+    struct unicode_record_t * records;
+};
+
+/* The assumed field widths, for use with check_file(). */
+static const int unicode_record_fields[] = {
+    -1, /* code_point */
+    -1, /* name */
+     3, /* general_category */
+    -1, /* canonical_combining_class */
+     4, /* bidi_class */
+    -1, /* decomposition */
+    -1, /* numeric_type */
+    -1, /* numeric_digit */
+    -1, /* numeric_value */
+     2, /* bidi_mirrored */
+    -1, /* unicode_1_name */
+    -1, /* iso_comment */
+    -1, /* simple_uppercase_mapping */
+    -1, /* simple_lowercase_mapping */
+    -1  /* simple_titlecase_mapping */
+};
+
+struct unicode_data_t * read_unicode_data( const char * filename );
+
+int has_name( struct unicode_record_t * ur, const char * name );
+
+int name_ends_with( struct unicode_record_t * ur, const char * name );
+
+int is_general_category( struct unicode_record_t * ur, const char * category );
+
+int decomposition_contains( struct unicode_record_t * ur, const char * substring );
+
+int towupper_differs( struct unicode_record_t * ur, size_t codepoint );
+
+int towlower_differs( struct unicode_record_t * ur, size_t codepoint );
+
+void release_unicode_data( struct unicode_data_t * ud );
+
+#endif