// blisp_parser.h – BLISP parsing interface using str views (no modifications) // Depends on arenastr.c and strmanip.c for str, arena, and helper functions. // All functions are static and do not allocate memory. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-function" static str bl_parse_integer_(str blisp, int base, int64_t *i) { unless(blisp.len) return blisp; str s = blisp; int sign=1; byte c = s.data[0]; if('-'==c){ s = str_drop(s, 1); sign = -1; } int64_t N=0; int ndigits=0; while(s.len) { c = s.data[0]; if('_'==c){ s = str_drop(s, 1); continue; } int digit; if(isdigit(c)){ digit = c-'0'; }else if(isalpha(c)){ digit = tolower(c)-'a'+10; }else break; ++ndigits; if(digit >= base) return blisp; N *= base; N += digit; s = str_drop(s, 1); } if(0==ndigits) return blisp; if(i) *i = sign * N; return s; } // helper to check when blisp functions fail and return the same string they received static bool bl_fail(str original, str next){ return original.data == next.data; } // Expect #[bdx]NNNN at the start of string, return remainder of string // pass integer out in [i] parameter // if no integer at start, return [blisp] unchanged static str bl_get_integer(str blisp, int64_t *i) { if(i) *i=0; if (blisp.len < 3) return blisp; // at least #d0 str s = blisp; unless('#'==s.data[0]) return blisp; s = str_drop(s, 1); byte c = s.data[0]; s = str_drop(s, 1); int base=10; switch(c){ case 'b': base= 2; break; case 'd': base=10; break; case 'x': base=16; break; default: return blisp; } str const end = bl_parse_integer_(s, base, i); if(bl_fail(s,end)) return blisp; return end; } // Expect :symbol ([a-zA-Z][0-9a-zA-Z_-]*) at the start of string, return remainder of string // pass symbol out in [sym] parameter // if no symbol at start, return [blisp] unchanged static str bl_get_symbol(str blisp, str *sym) { if(sym) *sym = (str){0}; if (blisp.len < 2) return blisp; // at least :x str s = blisp; unless(':'==s.data[0]) return blisp; s = str_drop(s, 1); str const start=s; byte c=s.data[0]; unless(isalpha(c)) return blisp; int nchars=0; while(s.len){ c=s.data[0]; unless(isalnum(c) || '_'==c || '-'==c) break; s = str_drop(s, 1); ++nchars; } if(0==nchars) return blisp; if(sym) *sym = str_take(start, nchars); return s; } // Expect 'simple string' at the start of string, return remainder of string // pass string out in [string] parameter // if no string at start, return [blisp] unchanged static str bl_get_simple_string(str blisp, str *string) { if(string) *string = (str){0}; if (blisp.len < 2) return blisp; // at least '' str s = blisp; unless('\''==s.data[0]) return blisp; s = str_drop(s, 1); str const start=s; for(byte c; s.len;){ c=s.data[0]; if('\''==c){ if(string) *string = str_take(start, start.len-s.len); return str_drop(s, 1); } s = str_drop(s, 1); } return blisp; } // Expect #13'sized string' at the start of string, return remainder of string // pass string out in [string] parameter // if no string at start, return [blisp] unchanged static str bl_get_sized_string(str blisp, str *string) { if(string) *string = (str){0}; if (blisp.len < 4) return blisp; // at least #0'' str s = blisp; unless('#'==s.data[0]) return blisp; s = str_drop(s, 1); int64_t len=0; str end = bl_parse_integer_(s, 10, &len); if(bl_fail(s,end) || len<0) return blisp; s=end; unless(s.len && '\''==s.data[0]) return blisp; s = str_drop(s, 1); unless((len+1) < s.len) return blisp; if(string) *string = str_take(s, len); s = str_drop(s, len); unless('\''==s.data[0]) return blisp; return str_drop(s, 1); } static str bl_get_string(str blisp, str *string) { if (blisp.len < 1) return blisp; // at least #|' str s = blisp; byte const c = s.data[0]; switch(c){ case '\'': return bl_get_simple_string(s, string); case '#': return bl_get_sized_string(s, string); default: return blisp; } } static str bl_skip_element(str blisp); // Expect #(...) at the start of string // if the key is found, return the string starting at the value matching the key (i.e. one space/0x20 after the key) // (there is no interpretation or check of the string at the value position, it is simply returned) // otherwise return blisp unchanged static str bl_find_map_key(str blisp, str key) { if (blisp.len < 3) return blisp; // at least #() str s = blisp; unless('#'==s.data[0]) return blisp; s = str_drop(s, 1); unless('('==s.data[0]) return blisp; s = str_drop(s, 1); while(s.len && ':'==s.data[0]){ str sym={0}; str v = bl_get_symbol(s, &sym); if(v.data == s.data || 0==sym.len) return blisp; unless(v.len && ' '==v.data[0]) return blisp; v = str_drop(v, 1); if(str_eq(key, sym)) return v; s = bl_skip_element(v); if(s.data == v.data) return blisp; unless(s.len) return blisp; if(')'==s.data[0]) return blisp; if(' '==s.data[0]) s = str_drop(s, 1); } return blisp; } static str bl_skip_element(str blisp); static str bl_skip_list(str blisp){ unless(blisp.len) return blisp; str s = blisp; unless('('==s.data[0]) return blisp; s = str_drop(s, 1); while(s.len && ')'!=s.data[0]){ str x = bl_skip_element(s); if(x.data == s.data) return blisp; s=x; if(s.len && ' '==s.data[0]) s = str_drop(s, 1); } unless(s.len && ')'==s.data[0]) return blisp; return str_drop(s, 1); } static str bl_skip_map(str blisp){ unless(blisp.len && '#'==blisp.data[0]) return blisp; str x = bl_skip_list(str_drop(blisp, 1)); if(x.data == blisp.data+1) return blisp; return x; } // skip any kind of element return the string just after the element // (before the space or close brace). static str bl_skip_element(str blisp){ unless(blisp.len) return blisp; str s = blisp; byte c = s.data[0]; switch(c){ case ':': return bl_get_symbol(s, NULL); case '\'': return bl_get_simple_string(s, NULL); case '(': return bl_skip_list(s); case '#':{ str x = str_drop(s, 1); unless(x.len) return blisp; c = x.data[0]; if('('==c) return bl_skip_map(s); else if(isalpha(c)) return bl_get_integer(s, NULL); else return bl_get_sized_string(s, NULL); break; } default: return blisp; } } // convenience functions for accessing know types in maps // TODO make all functions return this result type instead of returning the same string? typedef struct bl_result bl_result; struct bl_result{ bool ok; union{ int64_t i; str s; }; }; // bl_find_map_string - finds the key and expects and returns the string value static bl_result bl_find_map_string(str blisp, str key) { str const x = bl_find_map_key(blisp, key); if(bl_fail(blisp,x)) return (bl_result){.ok=false}; str string; str const y = bl_get_string(x, &string); if(bl_fail(x,y)) return (bl_result){.ok=false}; return (bl_result){.ok=true, .s=string}; } // bl_find_map_integer - finds the key and expects and returns the integer value static bl_result bl_find_map_integer(str blisp, str key) { str const x = bl_find_map_key(blisp, key); if(bl_fail(blisp, x)) return (bl_result){.ok=false}; int64_t i = 0; str const y = bl_get_integer(x, &i); if(bl_fail(x,y)) return (bl_result){.ok=false}; return (bl_result){.ok = true, .i = i}; } #pragma GCC diagnostic pop