[PATCH 2/2] vi: allow regular expressions in ':s' commands (original) (raw)

Andrey Dobrovolsky andrey.dobrovolsky.odessa at gmail.com
Mon Jun 28 12:25:06 UTC 2021


From a8649c2724f6fc2bd921836803b63e5b0c3fc77b Mon Sep 17 00:00:00 2001 From: AndreyDobrovolskyOdessa <andrey.dobrovolsky.odessa at gmail.com> Date: Mon, 28 Jun 2021 02:43:02 +0300 Subject: [PATCH 2/2] vi: allow regular expressions in ':s' commands

BusyBox vi has never supported the use of regular expressions in search/replace (':s') commands. Implement this using GNU regex when VI_REGEX_SEARCH is enabled.

The implementation:

VI_REGEX_SEARCH isn't enabled in the default build. In that case:

function old new delta colon 4024 4021 -3

(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3) Total: -3 bytes

When VI_REGEX_SEARCH is enabled:

function old new delta colon 4024 4306 +282 do_substitution - 122 +122 .rodata 108207 108229 +22

(add/remove: 1/0 grow/shrink: 2/0 up/down: 426/0) Total: 426 bytes

Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa at gmail.com> Signed-off-by: Ron Yorston <rmy at pobox.com>

editors/vi.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 8 deletions(-)

diff --git a/editors/vi.c b/editors/vi.c index 959362b25..eed51fbad 100644 --- a/editors/vi.c +++ b/editors/vi.c @@ -2675,6 +2675,49 @@ static char *expand_args(char args) # endif #endif / FEATURE_VI_COLON */ +#if ENABLE_FEATURE_VI_REGEX_SEARCH +# define MAX_SUBPATTERN 10 // subpatterns \0 .. \9 + +// orig - input string +// regm - array of subpatterns bounds +// s - replace pattern +// result - buffer to place the substitution result +static size_t do_substitution(const char *orig, regmatch_t *regm, + const char *s, char *result) +{ + const char *from; // memcpy source pointer + size_t len, total_len = 0; + regmatch_t *cur_match; + + while (*s) { + len = 1; // default is to copy one char from replace pattern + from = s; + if (*s == '\') { + from = ++s; // skip backslash + if (*s >= '0' && *s < '0' + MAX_SUBPATTERN) { + cur_match = regm + (*s - '0'); + if (cur_match->rm_so >= 0) { + len = cur_match->rm_eo - cur_match->rm_so; + from = orig + cur_match->rm_so; + } + } + } + total_len += len; + if (result) { + memcpy(result, from, len); + result += len; + result = '\0'; + } + s++; + } + + return total_len; +} + +// do_substitution dry run +# define get_substituted_size(x, y) do_substitution(NULL, x, y, NULL) +#endif / ENABLE_FEATURE_VI_REGEX_SEARCH */ + // buf must be no longer than MAX_INPUT_LEN! static void colon(char *buf) { @@ -3082,6 +3125,12 @@ static void colon(char *buf) # if ENABLE_FEATURE_VI_VERBOSE_STATUS int last_line = 0, lines = 0; # endif +# if ENABLE_FEATURE_VI_REGEX_SEARCH + regex_t preg; + int cflags; + regmatch_t regmatch[MAX_SUBPATTERN]; + char *Rorig; +# endif // F points to the "find" pattern // R points to the "replace" pattern @@ -3098,7 +3147,6 @@ static void colon(char *buf) *flags++ = '\0'; // terminate "replace" gflag = *flags; } - len_R = strlen(R); if (len_F) { // save "find" as last search pattern free(last_search_pattern); @@ -3120,18 +3168,61 @@ static void colon(char *buf) b = e; } +# if ENABLE_FEATURE_VI_REGEX_SEARCH + Rorig = R; + cflags = 0; + if (ignorecase) + cflags = REG_ICASE; + memset(&preg, 0, sizeof(preg)); + if (regcomp(&preg, F, cflags) != 0) { + status_line(":s bad search pattern"); + goto regex_search_end; + } +# else + len_R = strlen(R); +# endif + for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0 char *ls = q; // orig line start char *found; vc4: +# if ENABLE_FEATURE_VI_REGEX_SEARCH + found = NULL; + regmatch[0].rm_so = 0; + regmatch[0].rm_eo = end_line(q) - q; + if (!regexec(&preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND)) { + found = q + regmatch[0].rm_so; + len_F = regmatch[0].rm_eo - regmatch[0].rm_so; + len_R = get_substituted_size(regmatch, Rorig); + R = xmalloc(len_R + 1); + do_substitution(q, regmatch, Rorig, R); + } +# else found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find" +# endif if (found) { uintptr_t bias; // we found the "find" pattern - delete it // For undo support, the first item should not be chained +# if ENABLE_FEATURE_VI_REGEX_SEARCH + if (len_F) { // match can be empty, no delete needed + text_hole_delete(found, found + len_F - 1, + subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); + } + // insert the "replace" patern + bias = string_insert(found, R, + subs || len_F ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); + free(R); +# else text_hole_delete(found, found + len_F - 1, subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); - // can't do this above, no undo => no third argument + // insert the "replace" patern + bias = string_insert(found, R, ALLOW_UNDO_CHAIN); +# endif + found += bias; + ls += bias; + dot = ls; + //q += bias; - recalculated anyway subs++; # if ENABLE_FEATURE_VI_VERBOSE_STATUS if (last_line != i) { @@ -3139,12 +3230,6 @@ static void colon(char *buf) ++lines; } # endif - // insert the "replace" patern - bias = string_insert(found, R, ALLOW_UNDO_CHAIN); - found += bias; - ls += bias; - dot = ls; - //q += bias; - recalculated anyway // check for "global" :s/foo/bar/g if (gflag == 'g') { if ((found + len_R) < end_line(ls)) { @@ -3164,6 +3249,10 @@ static void colon(char buf) status_line("%d substitutions on %d lines", subs, lines); # endif } +# if ENABLE_FEATURE_VI_REGEX_SEARCH + regex_search_end: + regfree(&preg); +# endif # endif / FEATURE_VI_SEARCH */ } else if (strncmp(cmd, "version", i) == 0) { // show software version status_line(BB_VER);

2.31.1



More information about the busybox mailing list