diff --git a/package/perl/0001-regcomp-c-regexec-c-fixup-regex-engine-build-under-Uusedl.patch b/package/perl/0001-regcomp-c-regexec-c-fixup-regex-engine-build-under-Uusedl.patch new file mode 100644 index 0000000000..f0837c2d33 --- /dev/null +++ b/package/perl/0001-regcomp-c-regexec-c-fixup-regex-engine-build-under-Uusedl.patch @@ -0,0 +1,554 @@ +From ba6e2c38aafc23cf114f3ba0d0ff3baead34328b Mon Sep 17 00:00:00 2001 +From: Yves Orton +Date: Tue, 1 Aug 2023 23:12:46 +0200 +Subject: [PATCH] regcomp*.c, regexec.c - fixup regex engine build under + -Uusedl + +The regex engine is built a bit different from most of the perl +codebase. It is compiled as part of the main libperl.so and it is +also compiled (with DEBUGGING enabled) as part of the re extension. +When perl itself is compiled with DEBUGGING enabled then the code +in the re.so extension and the code in libperl.so is the same. + +This all works fine and dandy until you have a static build where the +re.so is linked into libperl.so, which results in duplicate symbols +being defined. These symbols come in two flaviours: "auxiliary" and +"debugging" related symbols. + +We have basically three cases: + +1. USE_DYNAMIC_LOADING is defined. In this case we are doing a dynamic + build and re.so will be separate from libperl.so, so it even if this + is a DEBUGGING enabled build debug and auxiliary functions can be + compiled into *both* re.so and libperl.so. This is basically the + "standard build". + +2. USE_DYNAMIC_LOADING is not defined, and DEBUGGING is not defined + either. In this case auxiliary functions should only be compiled in + libperl.so, and the debug functions should only be compiled into + re.so + +3. USE_DYNAMIC_LOADING is not defined, and DEBUGGING *is* defined. In + this case auxiliary functions AND debug functions should only be + compiled into libperl.so + +It is possible to detect the different build options by looking at the +defines 'USE_DYNAMIC_LOADING', 'PERL_EXT_RE_DEBUG' and +'DEBUGGING_RE_ONLY'. 'USE_DYNAMIC_LOADING' is NOT defined when we are +building a static perl. 'PERL_EXT_RE_DEBUG' is defined only when we are +building re.so, and 'DEBUGGING_RE_ONLY' is defined only when we are +building re.so in a perl that is not itself already a DEBUGGING enabled +perl. The file ext/re/re_top.h responsible for setting up +DEBUGGING_RE_ONLY. + +This patch uses 'PERL_EXT_RE_DEBUG', 'DEBUGGING_RE_ONLY' and +'USE_DYNAMIC_LOADING' to define in regcomp.h two further define flags +'PERL_RE_BUILD_DEBUG' and 'PERL_RE_BUILD_AUX'. + +The 'PERL_RE_BUILD_DEBUG' flag determines if the debugging functions +should be compiled into libperl.so or re.so or both. The +'PERL_RE_BUILD_AUX' flag determines if the auxiliary functions should be +compiled into just libperl.so or into it and re.so. We then use these +flags to guard the different types of functions so that we can build in +all three modes without duplicate symbols. + +Upstream: https://github.com/Perl/perl5/commit/ba6e2c38aafc23cf114f3ba0d0ff3baead34328b +Signed-off-by: Fabrice Fontaine +--- + regcomp.c | 13 +- + regcomp.h | 14 ++- + regcomp_debug.c | 308 +++++++++++++++++++++++----------------------- + regcomp_invlist.c | 3 +- + regexec.c | 3 +- + 5 files changed, 181 insertions(+), 160 deletions(-) + +diff --git a/regcomp.c b/regcomp.c +index d3c135fbfad1..6e35d95d2ac6 100644 +--- a/regcomp.c ++++ b/regcomp.c +@@ -290,6 +290,7 @@ S_edit_distance(const UV* src, + /* END of edit_distance() stuff + * ========================================================= */ + ++#ifdef PERL_RE_BUILD_AUX + /* add a data member to the struct reg_data attached to this regex, it should + * always return a non-zero return. the 's' argument is the type of the items + * being added and the n is the number of items. The length of 's' should match +@@ -340,6 +341,7 @@ Perl_reg_add_data(RExC_state_t* const pRExC_state, const char* const s, const U3 + assert(count>0); + return count; + } ++#endif /* PERL_RE_BUILD_AUX */ + + /*XXX: todo make this not included in a non debugging perl, but appears to be + * used anyway there, in 'use re' */ +@@ -7443,6 +7445,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) + } + + ++#ifdef PERL_RE_BUILD_AUX + void + Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr) + { +@@ -7502,6 +7505,7 @@ Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr) + } + } + } ++#endif /* PERL_RE_BUILD_AUX */ + + /* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]]. + Character classes ([:foo:]) can also be negated ([:^foo:]). +@@ -9095,6 +9099,7 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state, + #undef IS_OPERATOR + #undef IS_OPERAND + ++#ifdef PERL_RE_BUILD_AUX + void + Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist) + { +@@ -9182,6 +9187,8 @@ Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** i + } + } + } ++#endif /* PERL_RE_BUILD_AUX */ ++ + + STATIC void + S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings) +@@ -12105,6 +12112,7 @@ S_optimize_regclass(pTHX_ + + #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION + ++#ifdef PERL_RE_BUILD_AUX + void + Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, + regnode* const node, +@@ -12261,6 +12269,7 @@ Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, + RExC_rxi->data->data[n] = (void*)rv; + ARG1u_SET(node, n); + } ++#endif /* PERL_RE_BUILD_AUX */ + + SV * + +@@ -12999,6 +13008,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, + } + #endif + ++ ++#ifdef PERL_RE_BUILD_AUX + SV* + Perl_get_ANYOFM_contents(pTHX_ const regnode * n) { + +@@ -13047,7 +13058,7 @@ Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) { + UTF_CONTINUATION_MARK | 0)); + return cp_list; + } +- ++#endif /* PERL_RE_BUILD_AUX */ + + + SV * +diff --git a/regcomp.h b/regcomp.h +index 31c91e6a68e8..017a9f843514 100644 +--- a/regcomp.h ++++ b/regcomp.h +@@ -1554,7 +1554,19 @@ typedef enum { + #define EVAL_OPTIMISTIC_FLAG 128 + #define EVAL_FLAGS_MASK (EVAL_OPTIMISTIC_FLAG-1) + +- ++/* We define PERL_RE_BUILD_DEBUG if we are NOT compiling the re extension and ++ * we are under DEBUGGING, or if we are ARE compiling the re extension ++ * and this is not a DEBUGGING enabled build (identified by ++ * DEBUGGING_RE_ONLY being defined) ++ */ ++#if ( defined(USE_DYNAMIC_LOADING) && defined(DEBUGGING)) || \ ++ ( defined(PERL_EXT_RE_BUILD) && defined(DEBUGGING_RE_ONLY)) || \ ++ (!defined(PERL_EXT_RE_BUILD) && defined(DEBUGGING)) ++#define PERL_RE_BUILD_DEBUG ++#endif ++#if ( defined(USE_DYNAMIC_LOADING) || !defined(PERL_EXT_RE_BUILD) ) ++#define PERL_RE_BUILD_AUX ++#endif + + #endif /* PERL_REGCOMP_H_ */ + +diff --git a/regcomp_debug.c b/regcomp_debug.c +index 93db7a89cf48..96598c49c0bc 100644 +--- a/regcomp_debug.c ++++ b/regcomp_debug.c +@@ -18,8 +18,7 @@ + #include "unicode_constants.h" + #include "regcomp_internal.h" + +-#ifdef DEBUGGING +- ++#ifdef PERL_RE_BUILD_DEBUG + int + Perl_re_printf(pTHX_ const char *fmt, ...) + { +@@ -159,13 +158,160 @@ Perl_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state, + }); + } + +-#endif /* DEBUGGING */ ++const regnode * ++Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, ++ const regnode *last, const regnode *plast, ++ SV* sv, I32 indent, U32 depth) ++{ ++ const regnode *next; ++ const regnode *optstart= NULL; ++ ++ RXi_GET_DECL(r, ri); ++ DECLARE_AND_GET_RE_DEBUG_FLAGS; ++ ++ PERL_ARGS_ASSERT_DUMPUNTIL; ++ ++#ifdef DEBUG_DUMPUNTIL ++ Perl_re_printf( aTHX_ "--- %d : %d - %d - %d\n", indent, node-start, ++ last ? last-start : 0, plast ? plast-start : 0); ++#endif ++ ++ if (plast && plast < last) ++ last= plast; ++ ++ while (node && (!last || node < last)) { ++ const U8 op = OP(node); ++ ++ if (op == CLOSE || op == SRCLOSE || op == WHILEM) ++ indent--; ++ next = regnext((regnode *)node); ++ const regnode *after = regnode_after((regnode *)node,0); ++ ++ /* Where, what. */ ++ if (op == OPTIMIZED) { ++ if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE)) ++ optstart = node; ++ else ++ goto after_print; ++ } else ++ CLEAR_OPTSTART; ++ ++ regprop(r, sv, node, NULL, NULL); ++ Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start), ++ (int)(2*indent + 1), "", SvPVX_const(sv)); ++ ++ if (op != OPTIMIZED) { ++ if (next == NULL) /* Next ptr. */ ++ Perl_re_printf( aTHX_ " (0)"); ++ else if (REGNODE_TYPE(op) == BRANCH ++ && REGNODE_TYPE(OP(next)) != BRANCH ) ++ Perl_re_printf( aTHX_ " (FAIL)"); ++ else ++ Perl_re_printf( aTHX_ " (%" IVdf ")", (IV)(next - start)); ++ Perl_re_printf( aTHX_ "\n"); ++ } ++ ++ after_print: ++ if (REGNODE_TYPE(op) == BRANCHJ) { ++ assert(next); ++ const regnode *nnode = (OP(next) == LONGJMP ++ ? regnext((regnode *)next) ++ : next); ++ if (last && nnode > last) ++ nnode = last; ++ DUMPUNTIL(after, nnode); ++ } ++ else if (REGNODE_TYPE(op) == BRANCH) { ++ assert(next); ++ DUMPUNTIL(after, next); ++ } ++ else if ( REGNODE_TYPE(op) == TRIE ) { ++ const regnode *this_trie = node; ++ const U32 n = ARG1u(node); ++ const reg_ac_data * const ac = op>=AHOCORASICK ? ++ (reg_ac_data *)ri->data->data[n] : ++ NULL; ++ const reg_trie_data * const trie = ++ (reg_trie_data*)ri->data->data[optrie]; ++#ifdef DEBUGGING ++ AV *const trie_words ++ = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]); ++#endif ++ const regnode *nextbranch= NULL; ++ I32 word_idx; ++ SvPVCLEAR(sv); ++ for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) { ++ SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0); ++ ++ Perl_re_indentf( aTHX_ "%s ", ++ indent+3, ++ elem_ptr ++ ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr), ++ SvCUR(*elem_ptr), PL_dump_re_max_len, ++ PL_colors[0], PL_colors[1], ++ (SvUTF8(*elem_ptr) ++ ? PERL_PV_ESCAPE_UNI ++ : 0) ++ | PERL_PV_PRETTY_ELLIPSES ++ | PERL_PV_PRETTY_LTGT ++ ) ++ : "???" ++ ); ++ if (trie->jump) { ++ U16 dist= trie->jump[word_idx+1]; ++ Perl_re_printf( aTHX_ "(%" UVuf ")\n", ++ (UV)((dist ? this_trie + dist : next) - start)); ++ if (dist) { ++ if (!nextbranch) ++ nextbranch= this_trie + trie->jump[0]; ++ DUMPUNTIL(this_trie + dist, nextbranch); ++ } ++ if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH) ++ nextbranch= regnext((regnode *)nextbranch); ++ } else { ++ Perl_re_printf( aTHX_ "\n"); ++ } ++ } ++ if (last && next > last) ++ node= last; ++ else ++ node= next; ++ } ++ else if ( op == CURLY ) { /* "next" might be very big: optimizer */ ++ DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */ ++ } ++ else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) { ++ assert(next); ++ DUMPUNTIL(after, next); ++ } ++ else if ( op == PLUS || op == STAR) { ++ DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */ ++ } ++ else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) { ++ /* Literal string, where present. */ ++ node = (const regnode *)REGNODE_AFTER_varies(node); ++ } ++ else { ++ node = REGNODE_AFTER_opcode(node,op); ++ } ++ if (op == CURLYX || op == OPEN || op == SROPEN) ++ indent++; ++ if (REGNODE_TYPE(op) == END) ++ break; ++ } ++ CLEAR_OPTSTART; ++#ifdef DEBUG_DUMPUNTIL ++ Perl_re_printf( aTHX_ "--- %d\n", (int)indent); ++#endif ++ return node; ++} ++ ++#endif /* PERL_RE_BUILD_DEBUG */ + + /* + - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form + */ + #ifdef DEBUGGING +- + static void + S_regdump_intflags(pTHX_ const char *lead, const U32 flags) + { +@@ -907,8 +1053,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ + #endif /* DEBUGGING */ + } + +-#ifdef DEBUGGING + ++#ifdef DEBUGGING + STATIC void + S_put_code_point(pTHX_ SV *sv, UV c) + { +@@ -1517,154 +1663,4 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, + + return did_output_something; + } +- +- +-const regnode * +-Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, +- const regnode *last, const regnode *plast, +- SV* sv, I32 indent, U32 depth) +-{ +- const regnode *next; +- const regnode *optstart= NULL; +- +- RXi_GET_DECL(r, ri); +- DECLARE_AND_GET_RE_DEBUG_FLAGS; +- +- PERL_ARGS_ASSERT_DUMPUNTIL; +- +-#ifdef DEBUG_DUMPUNTIL +- Perl_re_printf( aTHX_ "--- %d : %d - %d - %d\n", indent, node-start, +- last ? last-start : 0, plast ? plast-start : 0); +-#endif +- +- if (plast && plast < last) +- last= plast; +- +- while (node && (!last || node < last)) { +- const U8 op = OP(node); +- +- if (op == CLOSE || op == SRCLOSE || op == WHILEM) +- indent--; +- next = regnext((regnode *)node); +- const regnode *after = regnode_after((regnode *)node,0); +- +- /* Where, what. */ +- if (op == OPTIMIZED) { +- if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE)) +- optstart = node; +- else +- goto after_print; +- } else +- CLEAR_OPTSTART; +- +- regprop(r, sv, node, NULL, NULL); +- Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start), +- (int)(2*indent + 1), "", SvPVX_const(sv)); +- +- if (op != OPTIMIZED) { +- if (next == NULL) /* Next ptr. */ +- Perl_re_printf( aTHX_ " (0)"); +- else if (REGNODE_TYPE(op) == BRANCH +- && REGNODE_TYPE(OP(next)) != BRANCH ) +- Perl_re_printf( aTHX_ " (FAIL)"); +- else +- Perl_re_printf( aTHX_ " (%" IVdf ")", (IV)(next - start)); +- Perl_re_printf( aTHX_ "\n"); +- } +- +- after_print: +- if (REGNODE_TYPE(op) == BRANCHJ) { +- assert(next); +- const regnode *nnode = (OP(next) == LONGJMP +- ? regnext((regnode *)next) +- : next); +- if (last && nnode > last) +- nnode = last; +- DUMPUNTIL(after, nnode); +- } +- else if (REGNODE_TYPE(op) == BRANCH) { +- assert(next); +- DUMPUNTIL(after, next); +- } +- else if ( REGNODE_TYPE(op) == TRIE ) { +- const regnode *this_trie = node; +- const U32 n = ARG1u(node); +- const reg_ac_data * const ac = op>=AHOCORASICK ? +- (reg_ac_data *)ri->data->data[n] : +- NULL; +- const reg_trie_data * const trie = +- (reg_trie_data*)ri->data->data[optrie]; +-#ifdef DEBUGGING +- AV *const trie_words +- = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]); +-#endif +- const regnode *nextbranch= NULL; +- I32 word_idx; +- SvPVCLEAR(sv); +- for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) { +- SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0); +- +- Perl_re_indentf( aTHX_ "%s ", +- indent+3, +- elem_ptr +- ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr), +- SvCUR(*elem_ptr), PL_dump_re_max_len, +- PL_colors[0], PL_colors[1], +- (SvUTF8(*elem_ptr) +- ? PERL_PV_ESCAPE_UNI +- : 0) +- | PERL_PV_PRETTY_ELLIPSES +- | PERL_PV_PRETTY_LTGT +- ) +- : "???" +- ); +- if (trie->jump) { +- U16 dist= trie->jump[word_idx+1]; +- Perl_re_printf( aTHX_ "(%" UVuf ")\n", +- (UV)((dist ? this_trie + dist : next) - start)); +- if (dist) { +- if (!nextbranch) +- nextbranch= this_trie + trie->jump[0]; +- DUMPUNTIL(this_trie + dist, nextbranch); +- } +- if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH) +- nextbranch= regnext((regnode *)nextbranch); +- } else { +- Perl_re_printf( aTHX_ "\n"); +- } +- } +- if (last && next > last) +- node= last; +- else +- node= next; +- } +- else if ( op == CURLY ) { /* "next" might be very big: optimizer */ +- DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */ +- } +- else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) { +- assert(next); +- DUMPUNTIL(after, next); +- } +- else if ( op == PLUS || op == STAR) { +- DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */ +- } +- else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) { +- /* Literal string, where present. */ +- node = (const regnode *)REGNODE_AFTER_varies(node); +- } +- else { +- node = REGNODE_AFTER_opcode(node,op); +- } +- if (op == CURLYX || op == OPEN || op == SROPEN) +- indent++; +- if (REGNODE_TYPE(op) == END) +- break; +- } +- CLEAR_OPTSTART; +-#ifdef DEBUG_DUMPUNTIL +- Perl_re_printf( aTHX_ "--- %d\n", (int)indent); +-#endif +- return node; +-} +- +-#endif /* DEBUGGING */ ++#endif /* DEBUGGING */ +diff --git a/regcomp_invlist.c b/regcomp_invlist.c +index 9ea3f431817d..82f82305846a 100644 +--- a/regcomp_invlist.c ++++ b/regcomp_invlist.c +@@ -18,7 +18,7 @@ + #include "unicode_constants.h" + #include "regcomp_internal.h" + +- ++#ifdef PERL_RE_BUILD_AUX + void + Perl_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len) + { +@@ -70,6 +70,7 @@ Perl_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_l + } + } + } ++#endif /* PERL_RE_BUILD_AUX */ + + /* This section of code defines the inversion list object and its methods. The + * interfaces are highly subject to change, so as much as possible is static to +diff --git a/regexec.c b/regexec.c +index c404d9aa3d73..de0b7c461918 100644 +--- a/regexec.c ++++ b/regexec.c +@@ -4428,7 +4428,8 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp) + */ + #define REPORT_CODE_OFF 29 + #define INDENT_CHARS(depth) ((int)(depth) % 20) +-#ifdef DEBUGGING ++ ++#ifdef PERL_RE_BUILD_DEBUG + int + Perl_re_exec_indentf(pTHX_ const char *fmt, U32 depth, ...) + {