diff --git a/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst b/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst new file mode 100644 index 00000000000000..e7e3de7a96cbd3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst @@ -0,0 +1,2 @@ +Multiline regexes starting with a caret, such as ``re.compile("^foo", +re.MULTILINE)``, now run significantly faster. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index df377905bfae0d..739ca5d9211d74 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1854,12 +1854,41 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) state->start = state->ptr = ptr = end; return 0; } - while (status == 0 && ptr < end) { - ptr++; - RESET_CAPTURE_GROUP(); - TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); - state->start = state->ptr = ptr; - status = SRE(match)(state, pattern, 0); + if (pattern[0] == SRE_OP_AT && pattern[1] == SRE_AT_BEGINNING_LINE) { + /* skip to line boundaries */ + while (status == 0 && ptr < end) { + ptr++; + if (!SRE_IS_LINEBREAK((int) ptr[-1])) { +#if SIZEOF_SRE_CHAR == 1 + ptr = (SRE_CHAR *)memchr(ptr, '\n', end - ptr); + if (!ptr) { + break; + } +#else + while (ptr < end && !SRE_IS_LINEBREAK((int) *ptr)) { + ptr++; + } + if (ptr >= end) { + break; + } +#endif + /* advance to after the new line character */ + ptr++; + } + RESET_CAPTURE_GROUP(); + TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); + state->start = state->ptr = ptr; + status = SRE(match)(state, pattern, 0); + } + } + else { + while (status == 0 && ptr < end) { + ptr++; + RESET_CAPTURE_GROUP(); + TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); + state->start = state->ptr = ptr; + status = SRE(match)(state, pattern, 0); + } } }