aboutsummaryrefslogtreecommitdiffstats
path: root/src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c
diff options
context:
space:
mode:
authortcsullivan <tullivan99@gmail.com>2018-11-17 13:02:57 -0500
committertcsullivan <tullivan99@gmail.com>2018-11-17 13:02:57 -0500
commitc6ef89664b8c0d7aa85bddd5c7014aa6df82cbe7 (patch)
treed1f9d09412a46bdf4344fe30392455070a72993d /src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c
parentdb38c4b9dac461de0ed75bf6d079dacba1b31bc9 (diff)
added pdclib, removed sash
Diffstat (limited to 'src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c')
-rw-r--r--src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c639
1 files changed, 639 insertions, 0 deletions
diff --git a/src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c b/src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c
new file mode 100644
index 0000000..1f8fd71
--- /dev/null
+++ b/src/pdclib/functions/_PDCLIB/_PDCLIB_scan.c
@@ -0,0 +1,639 @@
+/* _PDCLIB_scan( const char *, struct _PDCLIB_status_t * )
+
+ This file is part of the Public Domain C Library (PDCLib).
+ Permission is granted to use, modify, and / or redistribute at will.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <string.h>
+#include <stddef.h>
+#include <limits.h>
+
+#ifndef REGTEST
+
+/* Using an integer's bits as flags for both the conversion flags and length
+ modifiers.
+*/
+#define E_suppressed 1<<0
+#define E_char 1<<6
+#define E_short 1<<7
+#define E_long 1<<8
+#define E_llong 1<<9
+#define E_intmax 1<<10
+#define E_size 1<<11
+#define E_ptrdiff 1<<12
+#define E_pointer 1<<13
+#define E_ldouble 1<<14
+#define E_unsigned 1<<16
+
+
+/* Helper function to get a character from the string or stream, whatever is
+ used for input. When reading from a string, returns EOF on end-of-string
+ so that handling of the return value can be uniform for both streams and
+ strings.
+*/
+static int GET( struct _PDCLIB_status_t * status )
+{
+ int rc = EOF;
+ if ( status->stream != NULL )
+ {
+ rc = getc( status->stream );
+ }
+ else
+ {
+ rc = ( *status->s == '\0' ) ? EOF : (unsigned char)*((status->s)++);
+ }
+ if ( rc != EOF )
+ {
+ ++(status->i);
+ ++(status->current);
+ }
+ return rc;
+}
+
+
+/* Helper function to put a read character back into the string or stream,
+ whatever is used for input.
+*/
+static void UNGET( int c, struct _PDCLIB_status_t * status )
+{
+ if ( status->stream != NULL )
+ {
+ ungetc( c, status->stream ); /* TODO: Error? */
+ }
+ else
+ {
+ --(status->s);
+ }
+ --(status->i);
+ --(status->current);
+}
+
+
+/* Helper function to check if a character is part of a given scanset */
+static int IN_SCANSET( const char * scanlist, const char * end_scanlist, int rc )
+{
+ /* SOLAR */
+ int previous = -1;
+ while ( scanlist != end_scanlist )
+ {
+ if ( ( *scanlist == '-' ) && ( previous != -1 ) )
+ {
+ /* possible scangroup ("a-z") */
+ if ( ++scanlist == end_scanlist )
+ {
+ /* '-' at end of scanlist does not describe a scangroup */
+ return rc == '-';
+ }
+ while ( ++previous <= (unsigned char)*scanlist )
+ {
+ if ( previous == rc )
+ {
+ return 1;
+ }
+ }
+ previous = -1;
+ }
+ else
+ {
+ /* not a scangroup, check verbatim */
+ if ( rc == (unsigned char)*scanlist )
+ {
+ return 1;
+ }
+ previous = (unsigned char)(*scanlist++);
+ }
+ }
+ return 0;
+}
+
+
+const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status )
+{
+ /* generic input character */
+ int rc;
+ const char * prev_spec;
+ const char * orig_spec = spec;
+ int value_parsed;
+ if ( *(++spec) == '%' )
+ {
+ /* %% -> match single '%' */
+ rc = GET( status );
+ switch ( rc )
+ {
+ case EOF:
+ /* input error */
+ if ( status->n == 0 )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ case '%':
+ return ++spec;
+ default:
+ UNGET( rc, status );
+ break;
+ }
+ }
+ /* Initializing status structure */
+ status->flags = 0;
+ status->base = -1;
+ status->current = 0;
+ status->width = 0;
+ status->prec = 0;
+
+ /* '*' suppresses assigning parsed value to variable */
+ if ( *spec == '*' )
+ {
+ status->flags |= E_suppressed;
+ ++spec;
+ }
+
+ /* If a width is given, strtol() will return its value. If not given,
+ strtol() will return zero. In both cases, endptr will point to the
+ rest of the conversion specifier - just what we need.
+ */
+ prev_spec = spec;
+ status->width = (int)strtol( spec, (char**)&spec, 10 );
+ if ( spec == prev_spec )
+ {
+ status->width = SIZE_MAX;
+ }
+
+ /* Optional length modifier
+ We step one character ahead in any case, and step back only if we find
+ there has been no length modifier (or step ahead another character if it
+ has been "hh" or "ll").
+ */
+ switch ( *(spec++) )
+ {
+ case 'h':
+ if ( *spec == 'h' )
+ {
+ /* hh -> char */
+ status->flags |= E_char;
+ ++spec;
+ }
+ else
+ {
+ /* h -> short */
+ status->flags |= E_short;
+ }
+ break;
+ case 'l':
+ if ( *spec == 'l' )
+ {
+ /* ll -> long long */
+ status->flags |= E_llong;
+ ++spec;
+ }
+ else
+ {
+ /* l -> long */
+ status->flags |= E_long;
+ }
+ break;
+ case 'j':
+ /* j -> intmax_t, which might or might not be long long */
+ status->flags |= E_intmax;
+ break;
+ case 'z':
+ /* z -> size_t, which might or might not be unsigned int */
+ status->flags |= E_size;
+ break;
+ case 't':
+ /* t -> ptrdiff_t, which might or might not be long */
+ status->flags |= E_ptrdiff;
+ break;
+ case 'L':
+ /* L -> long double */
+ status->flags |= E_ldouble;
+ break;
+ default:
+ --spec;
+ break;
+ }
+
+ /* Conversion specifier */
+
+ /* whether valid input had been parsed */
+ value_parsed = 0;
+
+ switch ( *spec )
+ {
+ case 'd':
+ status->base = 10;
+ break;
+ case 'i':
+ status->base = 0;
+ break;
+ case 'o':
+ status->base = 8;
+ status->flags |= E_unsigned;
+ break;
+ case 'u':
+ status->base = 10;
+ status->flags |= E_unsigned;
+ break;
+ case 'x':
+ status->base = 16;
+ status->flags |= E_unsigned;
+ break;
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ case 'a':
+ case 'A':
+ break;
+ case 'c':
+ {
+ char * c = va_arg( status->arg, char * );
+ /* for %c, default width is one */
+ if ( status->width == SIZE_MAX )
+ {
+ status->width = 1;
+ }
+ /* reading until width reached or input exhausted */
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ *(c++) = rc;
+ value_parsed = 1;
+ }
+ /* width or input exhausted */
+ if ( value_parsed )
+ {
+ ++status->n;
+ return ++spec;
+ }
+ else
+ {
+ /* input error, no character read */
+ if ( status->n == 0 )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ }
+ case 's':
+ {
+ char * c = va_arg( status->arg, char * );
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( isspace( rc ) )
+ {
+ UNGET( rc, status );
+ if ( value_parsed )
+ {
+ /* matching sequence terminated by whitespace */
+ *c = '\0';
+ ++status->n;
+ return ++spec;
+ }
+ else
+ {
+ /* matching error */
+ return NULL;
+ }
+ }
+ else
+ {
+ /* match */
+ value_parsed = 1;
+ *(c++) = rc;
+ }
+ }
+ /* width or input exhausted */
+ if ( value_parsed )
+ {
+ *c = '\0';
+ ++status->n;
+ return ++spec;
+ }
+ else
+ {
+ /* input error, no character read */
+ if ( status->n == 0 )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ }
+ case '[':
+ {
+ const char * endspec = spec;
+ int negative_scanlist = 0;
+ char * c;
+ if ( *(++endspec) == '^' )
+ {
+ negative_scanlist = 1;
+ ++endspec;
+ }
+ spec = endspec;
+ do
+ {
+ /* TODO: This can run beyond a malformed format string */
+ ++endspec;
+ } while ( *endspec != ']' );
+ /* read according to scanlist, equiv. to %s above */
+ c = va_arg( status->arg, char * );
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( negative_scanlist )
+ {
+ if ( IN_SCANSET( spec, endspec, rc ) )
+ {
+ UNGET( rc, status );
+ break;
+ }
+ }
+ else
+ {
+ if ( ! IN_SCANSET( spec, endspec, rc ) )
+ {
+ UNGET( rc, status );
+ break;
+ }
+ }
+ value_parsed = 1;
+ *(c++) = rc;
+ }
+ if ( value_parsed )
+ {
+ *c = '\0';
+ ++status->n;
+ return ++endspec;
+ }
+ else
+ {
+ if ( rc == EOF )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ }
+ case 'p':
+ status->base = 16;
+ status->flags |= E_pointer;
+ break;
+ case 'n':
+ {
+ int * val = va_arg( status->arg, int * );
+ *val = status->i;
+ return ++spec;
+ }
+ default:
+ /* No conversion specifier. Bad conversion. */
+ return orig_spec;
+ }
+
+ if ( status->base != -1 )
+ {
+ /* integer conversion */
+ uintmax_t value = 0; /* absolute value read */
+ int prefix_parsed = 0;
+ int sign = 0;
+ while ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( isspace( rc ) )
+ {
+ if ( sign )
+ {
+ /* matching sequence terminated by whitespace */
+ UNGET( rc, status );
+ break;
+ }
+ else
+ {
+ /* leading whitespace not counted against width */
+ status->current--;
+ }
+ }
+ else if ( ! sign )
+ {
+ /* no sign parsed yet */
+ switch ( rc )
+ {
+ case '-':
+ sign = -1;
+ break;
+ case '+':
+ sign = 1;
+ break;
+ default:
+ /* not a sign; put back character */
+ sign = 1;
+ UNGET( rc, status );
+ break;
+ }
+ }
+ else if ( ! prefix_parsed )
+ {
+ /* no prefix (0x... for hex, 0... for octal) parsed yet */
+ prefix_parsed = 1;
+ if ( rc != '0' )
+ {
+ /* not a prefix; if base not yet set, set to decimal */
+ if ( status->base == 0 )
+ {
+ status->base = 10;
+ }
+ UNGET( rc, status );
+ }
+ else
+ {
+ /* starts with zero, so it might be a prefix. */
+ /* check what follows next (might be 0x...) */
+ if ( ( status->current < status->width ) &&
+ ( ( rc = GET( status ) ) != EOF ) )
+ {
+ if ( tolower( rc ) == 'x' )
+ {
+ /* 0x... would be prefix for hex base... */
+ if ( ( status->base == 0 ) ||
+ ( status->base == 16 ) )
+ {
+ status->base = 16;
+ }
+ else
+ {
+ /* ...unless already set to other value */
+ UNGET( rc, status );
+ value_parsed = 1;
+ }
+ }
+ else
+ {
+ /* 0... but not 0x.... would be octal prefix */
+ UNGET( rc, status );
+ if ( status->base == 0 )
+ {
+ status->base = 8;
+ }
+ /* in any case we have read a zero */
+ value_parsed = 1;
+ }
+ }
+ else
+ {
+ /* failed to read beyond the initial zero */
+ value_parsed = 1;
+ break;
+ }
+ }
+ }
+ else
+ {
+ char * digitptr = memchr( _PDCLIB_digits, tolower( rc ), status->base );
+ if ( digitptr == NULL )
+ {
+ /* end of input item */
+ UNGET( rc, status );
+ break;
+ }
+ value *= status->base;
+ value += digitptr - _PDCLIB_digits;
+ value_parsed = 1;
+ }
+ }
+ /* width or input exhausted, or non-matching character */
+ if ( ! value_parsed )
+ {
+ /* out of input before anything could be parsed - input error */
+ /* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */
+ if ( ( status->n == 0 ) && ( rc == EOF ) )
+ {
+ status->n = -1;
+ }
+ return NULL;
+ }
+ /* convert value to target type and assign to parameter */
+ if ( ! ( status->flags & E_suppressed ) )
+ {
+ switch ( status->flags & ( E_char | E_short | E_long | E_llong |
+ E_intmax | E_size | E_ptrdiff | E_pointer |
+ E_unsigned ) )
+ {
+ case E_char:
+ *( va_arg( status->arg, char * ) ) = (char)( value * sign );
+ break;
+ case E_char | E_unsigned:
+ *( va_arg( status->arg, unsigned char * ) ) = (unsigned char)( value * sign );
+ break;
+
+ case E_short:
+ *( va_arg( status->arg, short * ) ) = (short)( value * sign );
+ break;
+ case E_short | E_unsigned:
+ *( va_arg( status->arg, unsigned short * ) ) = (unsigned short)( value * sign );
+ break;
+
+ case 0:
+ *( va_arg( status->arg, int * ) ) = (int)( value * sign );
+ break;
+ case E_unsigned:
+ *( va_arg( status->arg, unsigned int * ) ) = (unsigned int)( value * sign );
+ break;
+
+ case E_long:
+ *( va_arg( status->arg, long * ) ) = (long)( value * sign );
+ break;
+ case E_long | E_unsigned:
+ *( va_arg( status->arg, unsigned long * ) ) = (unsigned long)( value * sign );
+ break;
+
+ case E_llong:
+ *( va_arg( status->arg, long long * ) ) = (long long)( value * sign );
+ break;
+ case E_llong | E_unsigned:
+ *( va_arg( status->arg, unsigned long long * ) ) = (unsigned long long)( value * sign );
+ break;
+
+ case E_intmax:
+ *( va_arg( status->arg, intmax_t * ) ) = (intmax_t)( value * sign );
+ break;
+ case E_intmax | E_unsigned:
+ *( va_arg( status->arg, uintmax_t * ) ) = (uintmax_t)( value * sign );
+ break;
+
+ case E_size:
+ /* E_size always implies unsigned */
+ *( va_arg( status->arg, size_t * ) ) = (size_t)( value * sign );
+ break;
+
+ case E_ptrdiff:
+ /* E_ptrdiff always implies signed */
+ *( va_arg( status->arg, ptrdiff_t * ) ) = (ptrdiff_t)( value * sign );
+ break;
+
+ case E_pointer:
+ /* E_pointer always implies unsigned */
+ *( uintptr_t* )( va_arg( status->arg, void * ) ) = (uintptr_t)( value * sign );
+ break;
+
+ default:
+ puts( "UNSUPPORTED SCANF FLAG COMBINATION" );
+ return NULL; /* behaviour unspecified */
+ }
+ ++(status->n);
+ }
+ return ++spec;
+ }
+ /* TODO: Floats. */
+ return NULL;
+}
+
+#endif
+
+#ifdef TEST
+#define _PDCLIB_FILEID "_PDCLIB/scan.c"
+#define _PDCLIB_STRINGIO
+
+#include "_PDCLIB_test.h"
+
+#ifndef REGTEST
+
+static int testscanf( const char * s, const char * format, ... )
+{
+ struct _PDCLIB_status_t status;
+ status.n = 0;
+ status.i = 0;
+ status.s = (char *)s;
+ status.stream = NULL;
+ va_start( status.arg, format );
+ if ( *(_PDCLIB_scan( format, &status )) != '\0' )
+ {
+ printf( "_PDCLIB_scan() did not return end-of-specifier on '%s'.\n", format );
+ ++TEST_RESULTS;
+ }
+ va_end( status.arg );
+ return status.n;
+}
+
+#endif
+
+#define TEST_CONVERSION_ONLY
+
+int main( void )
+{
+#ifndef REGTEST
+ char source[100];
+#include "scanf_testcases.h"
+#endif
+ return TEST_RESULTS;
+}
+
+#endif