snac2

Fork of https://codeberg.org/grunfink/snac2
git clone https://git.inz.fi/snac2
Log | Files | Refs | README | LICENSE

xs_regex.h (3928B)


      1 /* copyright (c) 2022 - 2025 grunfink et al. / MIT license */
      2 
      3 #ifndef _XS_REGEX_H
      4 
      5 #define _XS_REGEX_H
      6 
      7 int xs_regex_match(const char *str, const char *rx);
      8 xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
      9 #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
     10 xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
     11 #define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
     12 xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
     13 #define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
     14 #define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
     15 #define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
     16 
     17 #ifdef XS_IMPLEMENTATION
     18 
     19 #ifdef __TINYC__
     20 /* fix a compilation error in tcc */
     21 #define _REGEX_NELTS(n)
     22 #endif
     23 
     24 #include <regex.h>
     25 
     26 xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
     27 /* splits str using regex as a separator, at most count times.
     28     Always returns a list:
     29     len == 0: regcomp error
     30     len == 1: full string (no matches)
     31     len == odd: first part [ separator / next part ]...
     32 */
     33 {
     34     regex_t re;
     35     regmatch_t rm;
     36     int offset = 0;
     37     xs_list *list = xs_list_new();
     38     const char *p = str;
     39 
     40     if (regcomp(&re, rx, REG_EXTENDED))
     41         return list;
     42 
     43     while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
     44         /* add first the leading part of the string */
     45         if (xs_is_string(p))
     46             list = xs_list_append_nstr(list, p, rm.rm_so);
     47         else
     48             list = xs_list_append(list, "");
     49 
     50         if (xs_is_string(p + rm.rm_so))
     51             list = xs_list_append_nstr(list, p + rm.rm_so, rm.rm_eo - rm.rm_so);
     52         else
     53             list = xs_list_append(list, "");
     54 
     55         /* move forward */
     56         offset += rm.rm_eo;
     57 
     58         count--;
     59     }
     60 
     61     /* add the rest of the string */
     62     list = xs_list_append(list, p);
     63 
     64     regfree(&re);
     65 
     66     return list;
     67 }
     68 
     69 
     70 xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
     71 /* selects all matches and return them as a list */
     72 {
     73     xs_list *list = xs_list_new();
     74     xs *split = NULL;
     75     const xs_val *v;
     76     int n = 0;
     77 
     78     /* split */
     79     split = xs_regex_split_n(str, rx, count);
     80 
     81     /* now iterate to get only the 'separators' (odd ones) */
     82     xs_list_foreach(split, v) {
     83         if (n & 0x1)
     84             list = xs_list_append(list, v);
     85 
     86         n++;
     87     }
     88 
     89     return list;
     90 }
     91 
     92 
     93 xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
     94 /* replaces all matches with the rep string. If it contains unescaped &,
     95    they are replaced with the match */
     96 {
     97     xs_str_bld b = { 0 };
     98     xs *split = xs_regex_split_n(str, rx, count);
     99     const xs_val *v;
    100     int n = 0;
    101     int pholder = !!strchr(rep, '&');
    102 
    103     xs_list_foreach(split, v) {
    104         if (n & 0x1) {
    105             if (pholder) {
    106                 /* rep has a placeholder; process char by char */
    107                 const char *p = rep;
    108 
    109                 while (*p) {
    110                     if (*p == '&')
    111                         xs_str_bld_cat(&b, v);
    112                     else {
    113                         if (*p == '\\')
    114                             p++;
    115 
    116                         if (!*p)
    117                             break;
    118 
    119                         xs_str_bld_cat(&b, (char[2]){ *p });
    120                     }
    121 
    122                     p++;
    123                 }
    124             }
    125             else
    126                 xs_str_bld_cat(&b, rep);
    127         }
    128         else
    129             xs_str_bld_cat(&b, v);
    130 
    131         n++;
    132     }
    133 
    134     xs_free(str);
    135 
    136     return b.data;
    137 }
    138 
    139 
    140 int xs_regex_match(const char *str, const char *rx)
    141 /* returns if str matches the regex at least once */
    142 {
    143     xs *l = xs_regex_select_n(str, rx, 1);
    144 
    145     return xs_list_len(l) == 1;
    146 }
    147 
    148 
    149 #endif /* XS_IMPLEMENTATION */
    150 
    151 #endif /* XS_REGEX_H */