+++ /dev/null
-From 53748ca8ca3c893025be34dd4f104546fcbd0602 Mon Sep 17 00:00:00 2001
-Date: Sat, 17 Jun 2023 13:20:24 +0200
-Subject: [PATCH] Add pcre2 support
-
-This is currently expected to cause crashes on Windows
-when compiled with GUI support.
-
-Closes bug #935.
-Initial patch submitted by: Gagan Sidhu
----
- acconfig.h | 6 ++
- actions.c | 8 +++
- cgi.c | 9 ++-
- client-tags.c | 5 ++
- configure.in | 64 ++++++++++++++++-
- pcrs.c | 148 +++++++++++++++++++++++++++++----------
- pcrs.h | 40 +++++++----
- project.h | 54 +++++++++-----
- templates/show-status | 5 +-
- urlmatch.c | 159 ++++++++++++++++++++++++++++++++++++++++++
- urlmatch.h | 4 ++
- w32log.c | 3 +
- 12 files changed, 430 insertions(+), 75 deletions(-)
-
---- a/acconfig.h
-+++ b/acconfig.h
-@@ -225,11 +225,17 @@
- /* Define if pcre.h must be included as <pcre/pcre.h>
- */
- #undef PCRE_H_IN_SUBDIR
-+#undef PCRE2_H_IN_SUBDIR
-+
-+#undef HAVE_PCRE2
-+#undef HAVE_PCRE2POSIX
-
- /* Define if pcreposix.h must be included as <pcre/pcreposix.h>
- */
- #undef PCREPOSIX_H_IN_SUBDIR
-
-+#undef PCRE2POSIX_H_IN_SUBDIR
-+
- @BOTTOM@
-
- /*
---- a/actions.c
-+++ b/actions.c
-@@ -828,8 +828,12 @@ int update_action_bits_for_tag(struct cl
- continue;
- }
-
-+#ifdef HAVE_PCRE2
-+ if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag))
-+#else
- /* and check if one of the tag patterns matches the tag, */
- if (0 == regexec(b->url->pattern.tag_regex, tag, 0, NULL, 0))
-+#endif
- {
- /* if it does, update the action bit map, */
- if (merge_current_action(csp->action, b->action))
-@@ -884,7 +888,11 @@ jb_err check_negative_tag_patterns(struc
- }
- for (tag = csp->tags->first; NULL != tag; tag = tag->next)
- {
-+#ifdef HAVE_PCRE2
-+ if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag->str))
-+#else
- if (0 == regexec(b->url->pattern.tag_regex, tag->str, 0, NULL, 0))
-+#endif
- {
- /*
- * The pattern matches at least one tag, thus the action
---- a/cgi.c
-+++ b/cgi.c
-@@ -2023,7 +2023,7 @@ jb_err template_fill(char **template_ptr
- char buf[BUFFER_SIZE];
- char *tmp_out_buffer;
- char *file_buffer;
-- size_t size;
-+ size_t buffer_size, new_size;
- int error;
- const char *flags;
-
-@@ -2032,7 +2032,7 @@ jb_err template_fill(char **template_ptr
- assert(exports);
-
- file_buffer = *template_ptr;
-- size = strlen(file_buffer) + 1;
-+ buffer_size = strlen(file_buffer) + 1;
-
- /*
- * Assemble pcrs joblist from exports map
-@@ -2082,7 +2082,10 @@ jb_err template_fill(char **template_ptr
- }
- else
- {
-- error = pcrs_execute(job, file_buffer, size, &tmp_out_buffer, &size);
-+ error = pcrs_execute(job, file_buffer, buffer_size, &tmp_out_buffer,
-+ &new_size);
-+
-+ buffer_size = new_size;
-
- pcrs_free_job(job);
- if (NULL == tmp_out_buffer)
---- a/client-tags.c
-+++ b/client-tags.c
-@@ -43,6 +43,7 @@
- #include "miscutil.h"
- #include "errlog.h"
- #include "parsers.h"
-+#include "urlmatch.h"
-
- struct client_specific_tag
- {
-@@ -658,7 +659,11 @@ int client_tag_match(const struct patter
-
- for (tag = tags->first; tag != NULL; tag = tag->next)
- {
-+#ifdef HAVE_PCRE2
-+ if (pcre2_pattern_matches(pattern->pattern.tag_regex, tag->str))
-+#else
- if (0 == regexec(pattern->pattern.tag_regex, tag->str, 0, NULL, 0))
-+#endif
- {
- log_error(LOG_LEVEL_TAGGING, "Client tag '%s' matches.", tag->str);
- return 1;
---- a/configure.in
-+++ b/configure.in
-@@ -863,12 +863,47 @@ else
- ])
- fi
-
-+AC_ARG_ENABLE(pcre2,
-+[ --disable-pcre2 Don't try to use pcre2 even if it's available],
-+[enableval2=$enableval],
-+[enableval2=yes])
-+if test $enableval2 = yes; then
-+ try_pcre2=yes
-+else
-+ AC_MSG_WARN([Ignoring pcre2 even if it's available])
-+ try_pcre2=no
-+fi
-+
-+if test $try_pcre2 != no; then
- dnl =================================================================
- dnl Checks for libraries.
- dnl =================================================================
- dnl Note: Some systems may have the library but not the system header
- dnl file, so we must check for both.
- dnl Also check for correct version
-+AC_CHECK_LIB(pcre2-8, pcre2_compile_8, [
-+ AC_CHECK_HEADER(pcre2.h, [
-+ AC_EGREP_HEADER(pcre2_pattern_info, pcre2.h,[have_pcre2=yes; AC_DEFINE(HAVE_PCRE2)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no])
-+ ], [
-+ AC_CHECK_HEADER(pcre2/pcre2.h, [
-+ AC_EGREP_HEADER(pcre2_pattern_info, pcre2/pcre2.h, [have_pcre2=yes; AC_DEFINE(PCRE2_H_IN_SUBDIR)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no])
-+ ], [have_pcre2=no])
-+ ], [#define PCRE2_CODE_UNIT_WIDTH 8])
-+], [have_pcre2=no])
-+
-+AC_CHECK_LIB(pcre2-posix, regcomp, [
-+ AC_CHECK_HEADER(pcre2posix.h, [
-+ AC_EGREP_HEADER(pcre2_regerror, pcre2posix.h, [have_pcre2posix=yes],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no])
-+ ], [
-+ AC_CHECK_HEADER(pcre/pcre2posix.h, [
-+ AC_EGREP_HEADER(pcre2_regerror, pcre2/pcre2posix.h, [have_pcre2posix=yes; AC_DEFINE(PCRE2POSIX_H_IN_SUBDIR)],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no])
-+ ], [have_pcre2posix=no])
-+ ])
-+], [have_pcre2posix=no], -lpcre2-8)
-+fi
-+
-+if test $have_pcre2 = "no"; then
-+
- AC_CHECK_LIB(pcre, pcre_compile, [
- AC_CHECK_HEADER(pcre.h, [
- AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no])
-@@ -889,6 +924,7 @@ AC_CHECK_LIB(pcreposix, regcomp, [
- ])
- ], [have_pcreposix=no], -lpcre)
-
-+fi
- dnl ================================================================
- dnl libpcrs is temporarily disabled.
- dnl
-@@ -1095,6 +1131,31 @@ fi
- # we don't need pcreposix, then link pcre dynamically; else
- # build it and link statically
- #
-+
-+#check for libpcre2 first. then regular pcre
-+
-+if test $have_pcre2 = "yes"; then
-+ echo "using libpcre2"
-+ STATIC_PCRE_ONLY=#
-+ LIBS="$LIBS -lpcre2-8 -lpcre2-posix"
-+ if test "$use_static_pcre" = "yes"; then
-+ pcre_dyn=no
-+ AC_DEFINE(PCRE_STATIC,1,[Define to statically link to pcre library on Windows.])
-+# see /usr/i686-w64-mingw32/sys-root/mingw/include/pcre.h line 54
-+# #if defined(_WIN32) && !defined(PCRE_STATIC)
-+# # ifndef PCRE_EXP_DECL
-+# # define PCRE_EXP_DECL extern __declspec(dllimport)
-+# # endif
-+# If you want to statically link a program against a PCRE library in the form of
-+# a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
-+# pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
-+# be declared __declspec(dllimport), with unwanted results.
-+ else
-+ pcre_dyn=yes
-+ AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.])
-+ fi
-+else
-+
- if test $have_pcre = "yes"; then
- echo "using libpcre"
- STATIC_PCRE_ONLY=#
-@@ -1116,7 +1177,8 @@ if test $have_pcre = "yes"; then
- AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.])
- fi
- else
-- AC_MSG_ERROR(pcre library not detected.)
-+ AC_MSG_ERROR(Detected neither pcre2 nor pcre library.)
-+fi
- fi
-
- AC_DEFINE(FEATURE_CONNECTION_KEEP_ALIVE)
---- a/pcrs.c
-+++ b/pcrs.c
-@@ -57,7 +57,7 @@
- * Internal prototypes
- */
-
--static int pcrs_parse_perl_options(const char *optstring, int *flags);
-+static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags);
- static pcrs_substitute *pcrs_compile_replacement(const char *replacement, int trivialflag,
- int capturecount, int *errptr);
- static int is_hex_sequence(const char *sequence);
-@@ -83,25 +83,25 @@ const char *pcrs_strerror(const int erro
- switch (error)
- {
- /* Passed-through PCRE error: */
-- case PCRE_ERROR_NOMEMORY: return "(pcre:) No memory";
-+ case PCREn(ERROR_NOMEMORY): return "(pcre:) No memory";
-
- /* Shouldn't happen unless PCRE or PCRS bug, or user messed with compiled job: */
-- case PCRE_ERROR_NULL: return "(pcre:) NULL code or subject or ovector";
-- case PCRE_ERROR_BADOPTION: return "(pcre:) Unrecognized option bit";
-- case PCRE_ERROR_BADMAGIC: return "(pcre:) Bad magic number in code";
-+ case PCREn(ERROR_NULL): return "(pcre:) NULL code or subject or ovector";
-+ case PCREn(ERROR_BADOPTION): return "(pcre:) Unrecognized option bit";
-+ case PCREn(ERROR_BADMAGIC): return "(pcre:) Bad magic number in code";
-+#if defined(PCRE_ERROR_UNKNOWN_NODE)
- case PCRE_ERROR_UNKNOWN_NODE: return "(pcre:) Bad node in pattern";
--
-+#endif
- /* Can't happen / not passed: */
-- case PCRE_ERROR_NOSUBSTRING: return "(pcre:) Fire in power supply";
-- case PCRE_ERROR_NOMATCH: return "(pcre:) Water in power supply";
-+ case PCREn(ERROR_NOSUBSTRING): return "(pcre:) Fire in power supply";
-+ case PCREn(ERROR_NOMATCH): return "(pcre:) Water in power supply";
-
- #ifdef PCRE_ERROR_MATCHLIMIT
- /*
- * Only reported by PCRE versions newer than our own.
- */
-- case PCRE_ERROR_MATCHLIMIT: return "(pcre:) Match limit reached";
-+ case PCREn(ERROR_MATCHLIMIT): return "(pcre:) Match limit reached";
- #endif /* def PCRE_ERROR_MATCHLIMIT */
--
- /* PCRS errors: */
- case PCRS_ERR_NOMEM: return "(pcrs:) No memory";
- case PCRS_ERR_CMDSYNTAX: return "(pcrs:) Syntax error while parsing command";
-@@ -111,16 +111,14 @@ const char *pcrs_strerror(const int erro
- case PCRS_WARN_TRUNCATION:
- return "(pcrs:) At least one variable was too big and has been truncated before compilation";
-
-- /*
-- * XXX: With the exception of PCRE_ERROR_MATCHLIMIT we
-- * only catch PCRE errors that can happen with our internal
-- * version. If Privoxy is linked against a newer
-- * PCRE version all bets are off ...
-- */
- default:
-+#ifdef HAVE_PCRE2
-+ pcre2_get_error_message(error, (PCRE2_UCHAR8*)buf, sizeof(buf));
-+#else
- snprintf(buf, sizeof(buf),
- "Error code %d. For details, check the pcre documentation.",
- error);
-+#endif
- return buf;
- }
- }
-@@ -149,7 +147,7 @@ const char *pcrs_strerror(const int erro
- * Returns : option integer suitable for pcre
- *
- *********************************************************************/
--static int pcrs_parse_perl_options(const char *optstring, int *flags)
-+static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags)
- {
- size_t i;
- int rc = 0;
-@@ -163,13 +161,13 @@ static int pcrs_parse_perl_options(const
- {
- case 'e': break; /* ToDo ;-) */
- case 'g': *flags |= PCRS_GLOBAL; break;
-- case 'i': rc |= PCRE_CASELESS; break;
-- case 'm': rc |= PCRE_MULTILINE; break;
-+ case 'i': rc |= PCREn(CASELESS); break;
-+ case 'm': rc |= PCREn(MULTILINE); break;
- case 'o': break;
-- case 's': rc |= PCRE_DOTALL; break;
-- case 'x': rc |= PCRE_EXTENDED; break;
-+ case 's': rc |= PCREn(DOTALL); break;
-+ case 'x': rc |= PCREn(EXTENDED); break;
- case 'D': *flags |= PCRS_DYNAMIC; break;
-- case 'U': rc |= PCRE_UNGREEDY; break;
-+ case 'U': rc |= PCREn(UNGREEDY); break;
- case 'T': *flags |= PCRS_TRIVIAL; break;
- default: break;
- }
-@@ -471,7 +469,15 @@ pcrs_job *pcrs_free_job(pcrs_job *job)
- else
- {
- next = job->next;
-- if (job->pattern != NULL) free(job->pattern);
-+ if (job->pattern != NULL)
-+ {
-+#ifdef HAVE_PCRE2
-+ pcre2_code_free(job->pattern);
-+#else
-+ free(job->pattern);
-+#endif
-+ }
-+#ifndef HAVE_PCRE2
- if (job->hints != NULL)
- {
- #ifdef PCRE_CONFIG_JIT
-@@ -480,6 +486,7 @@ pcrs_job *pcrs_free_job(pcrs_job *job)
- free(job->hints);
- #endif
- }
-+#endif
- if (job->substitute != NULL)
- {
- if (job->substitute->text != NULL) free(job->substitute->text);
-@@ -626,10 +633,14 @@ pcrs_job *pcrs_compile_command(const cha
- pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char *options, int *errptr)
- {
- pcrs_job *newjob;
-- int flags;
-+ unsigned int flags;
- int capturecount;
-- const char *error;
-+#ifdef HAVE_PCRE2
-+ int ret;
-+#else
- int pcre_study_options = 0;
-+ const char *error;
-+#endif
-
- *errptr = 0;
-
-@@ -661,25 +672,43 @@ pcrs_job *pcrs_compile(const char *patte
- /*
- * Compile the pattern
- */
-+#ifdef HAVE_PCRE2
-+ PCRE2_SIZE error_offset;
-+ newjob->pattern = pcre2_compile((const unsigned char *)pattern,
-+ PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, errptr,
-+ &error_offset, NULL);
-+#else
- newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL);
-+#endif
- if (newjob->pattern == NULL)
- {
- pcrs_free_job(newjob);
- return NULL;
- }
-
--
--#ifdef PCRE_STUDY_JIT_COMPILE
-+#if defined(PCRE_STUDY_JIT_COMPILE) || defined(HAVE_PCRE2)
- #ifdef DISABLE_PCRE_JIT_COMPILATION
- #warning PCRE_STUDY_JIT_COMPILE is supported but Privoxy has been configured not to use it
- #else
- if (!(flags & PCRS_DYNAMIC))
- {
-+#ifdef HAVE_PCRE2
-+ /* Try to enable JIT compilation but continue if it's unsupported. */
-+ if ((ret = pcre2_jit_compile(newjob->pattern, PCRE2_JIT_COMPLETE)) &&
-+ (ret != PCRE2_ERROR_JIT_BADOPTION))
-+ {
-+ *errptr = ret;
-+ pcrs_free_job(newjob);
-+ return NULL;
-+ }
-+#else
- pcre_study_options = PCRE_STUDY_JIT_COMPILE;
-+#endif
- }
- #endif
- #endif
-
-+#ifndef HAVE_PCRE2
- /*
- * Generate hints. This has little overhead, since the
- * hints will be NULL for a boring pattern anyway.
-@@ -691,13 +720,17 @@ pcrs_job *pcrs_compile(const char *patte
- pcrs_free_job(newjob);
- return NULL;
- }
--
-+#endif
-
- /*
- * Determine the number of capturing subpatterns.
- * This is needed for handling $+ in the substitute.
- */
-+#ifdef HAVE_PCRE2
-+ if (0 > (*errptr = pcre2_pattern_info(newjob->pattern, PCRE2_INFO_CAPTURECOUNT, &capturecount)))
-+#else
- if (0 > (*errptr = pcre_fullinfo(newjob->pattern, newjob->hints, PCRE_INFO_CAPTURECOUNT, &capturecount)))
-+#endif
- {
- pcrs_free_job(newjob);
- return NULL;
-@@ -809,14 +842,20 @@ int pcrs_execute_list(pcrs_job *joblist,
- *********************************************************************/
- int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char **result, size_t *result_length)
- {
-- int offsets[3 * PCRS_MAX_SUBMATCHES],
-- offset,
-+ int offset,
- i, k,
- matches_found,
- submatches,
- max_matches = PCRS_MAX_MATCH_INIT;
- size_t newsize;
-+#ifdef HAVE_PCRE2
- pcrs_match *matches, *dummy;
-+ pcre2_match_data *pcre2_matches;
-+ size_t *offsets;
-+#else
-+ pcrs_match *matches, *dummy;
-+ int offsets[3 * PCRS_MAX_SUBMATCHES];
-+#endif
- char *result_offset;
-
- offset = i = 0;
-@@ -830,27 +869,38 @@ int pcrs_execute(pcrs_job *job, const ch
- return(PCRS_ERR_BADJOB);
- }
-
-+#ifdef HAVE_PCRE2
-+ if (NULL == (pcre2_matches = pcre2_match_data_create_from_pattern(job->pattern, NULL)))
-+ {
-+ return(PCRS_ERR_NOMEM);
-+ }
-+ offsets = pcre2_get_ovector_pointer(pcre2_matches);
-+#endif
- if (NULL == (matches = (pcrs_match *)malloc((size_t)max_matches * sizeof(pcrs_match))))
- {
- return(PCRS_ERR_NOMEM);
- }
- memset(matches, '\0', (size_t)max_matches * sizeof(pcrs_match));
-
--
- /*
- * Find the pattern and calculate the space
- * requirements for the result
- */
- newsize = subject_length;
-
-+#ifdef HAVE_PCRE2
-+ while ((submatches = pcre2_match(job->pattern, (const unsigned char *)subject,
-+ subject_length, (size_t)offset, 0, pcre2_matches, NULL)) > 0)
-+#else
- while ((submatches = pcre_exec(job->pattern, job->hints, subject, (int)subject_length, offset, 0, offsets, 3 * PCRS_MAX_SUBMATCHES)) > 0)
-+#endif
- {
- job->flags |= PCRS_SUCCESS;
- matches[i].submatches = submatches;
-
- for (k = 0; k < submatches; k++)
- {
-- matches[i].submatch_offset[k] = offsets[2 * k];
-+ matches[i].submatch_offset[k] = (int)offsets[2 * k];
-
- /* Note: Non-found optional submatches have length -1-(-1)==0 */
- matches[i].submatch_length[k] = (size_t)(offsets[2 * k + 1] - offsets[2 * k]);
-@@ -867,7 +917,7 @@ int pcrs_execute(pcrs_job *job, const ch
- newsize += (size_t)offsets[0] * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES];
-
- /* chunk after match */
-- matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = offsets[1];
-+ matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = (int)offsets[1];
- matches[i].submatch_length[PCRS_MAX_SUBMATCHES + 1] = subject_length - (size_t)offsets[1] - 1;
- newsize += (subject_length - (size_t)offsets[1]) * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES + 1];
-
-@@ -894,12 +944,19 @@ int pcrs_execute(pcrs_job *job, const ch
- break;
- /* Go find the next one */
- else
-- offset = offsets[1];
-+ offset = (int)offsets[1];
- }
- /* Pass pcre error through if (bad) failure */
-+#ifdef HAVE_PCRE2
-+ if (submatches < PCRE2_ERROR_NOMATCH)
-+#else
- if (submatches < PCRE_ERROR_NOMATCH)
-+#endif
- {
- free(matches);
-+#ifdef HAVE_PCRE2
-+ pcre2_match_data_free(pcre2_matches);
-+#endif
- return submatches;
- }
- matches_found = i;
-@@ -909,9 +966,19 @@ int pcrs_execute(pcrs_job *job, const ch
- * Get memory for the result (must be freed by caller!)
- * and append terminating null byte.
- */
-- if ((*result = (char *)malloc(newsize + 1)) == NULL)
-+ if ((*result = (char *)malloc(newsize + 1
-+#ifdef HAVE_PCRE2
-+ /*
-+ * Work around to prevent invalid reads in the jit code.
-+ */
-+ + 16
-+#endif
-+ )) == NULL)
- {
- free(matches);
-+#ifdef HAVE_PCRE2
-+ pcre2_match_data_free(pcre2_matches);
-+#endif
- return PCRS_ERR_NOMEM;
- }
- else
-@@ -964,6 +1031,9 @@ int pcrs_execute(pcrs_job *job, const ch
- memcpy(result_offset, subject + offset, subject_length - (size_t)offset);
-
- *result_length = newsize;
-+#ifdef HAVE_PCRE2
-+ pcre2_match_data_free(pcre2_matches);
-+#endif
- free(matches);
- return matches_found;
-
-@@ -1101,7 +1171,7 @@ char pcrs_get_delimiter(const char *stri
- *********************************************************************/
- char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, int *hits)
- {
-- size_t size;
-+ size_t buffer_size, new_size;
- char *result = NULL;
- pcrs_job *job;
-
-@@ -1109,12 +1179,14 @@ char *pcrs_execute_single_command(const
- assert(pcrs_command);
-
- *hits = 0;
-- size = strlen(subject);
-+ buffer_size = strlen(subject);
-
- job = pcrs_compile_command(pcrs_command, hits);
- if (NULL != job)
- {
-- *hits = pcrs_execute(job, subject, size, &result, &size);
-+ *hits = pcrs_execute(job, subject, buffer_size, &result, &new_size);
-+ buffer_size = new_size;
-+
- if (*hits < 0)
- {
- freez(result);
---- a/pcrs.h
-+++ b/pcrs.h
-@@ -33,9 +33,18 @@
- *********************************************************************/
-
-
-+#ifdef HAVE_PCRE2
-+#define PCRE2_CODE_UNIT_WIDTH 8
-+#define PCREn(x) PCRE2_ ## x
-+#ifndef _PCRE2_H
-+#include <pcre2.h>
-+#endif
-+#else
-+#define PCREn(x) PCRE_ ## x
- #ifndef _PCRE_H
- #include <pcre.h>
- #endif
-+#endif
-
- /*
- * Constants:
-@@ -55,22 +64,23 @@
- * They are supposed to be handled together with PCRE error
- * codes and have to start with an offset to prevent overlaps.
- *
-- * PCRE 6.7 uses error codes from -1 to -21, PCRS error codes
-- * below -100 should be safe for a while.
-+ * PCRE 6.7 uses error codes from -1 to -21,
-+ * PCRE2 10.42 uses error codes from -66 to 101.
-+ * PCRS error codes below -300 should be safe for a while.
- */
--#define PCRS_ERR_NOMEM -100 /* Failed to acquire memory. */
--#define PCRS_ERR_CMDSYNTAX -101 /* Syntax of s///-command */
--#define PCRS_ERR_STUDY -102 /* pcre error while studying the pattern */
--#define PCRS_ERR_BADJOB -103 /* NULL job pointer, pattern or substitute */
--#define PCRS_WARN_BADREF -104 /* Backreference out of range */
--#define PCRS_WARN_TRUNCATION -105 /* At least one pcrs variable was too big,
-+#define PCRS_ERR_NOMEM -300 /* Failed to acquire memory. */
-+#define PCRS_ERR_CMDSYNTAX -301 /* Syntax of s///-command */
-+#define PCRS_ERR_STUDY -302 /* pcre error while studying the pattern */
-+#define PCRS_ERR_BADJOB -303 /* NULL job pointer, pattern or substitute */
-+#define PCRS_WARN_BADREF -304 /* Backreference out of range */
-+#define PCRS_WARN_TRUNCATION -305 /* At least one pcrs variable was too big,
- * only the first part was used. */
-
- /* Flags */
--#define PCRS_GLOBAL 1 /* Job should be applied globally, as with perl's g option */
--#define PCRS_TRIVIAL 2 /* Backreferences in the substitute are ignored */
--#define PCRS_SUCCESS 4 /* Job did previously match */
--#define PCRS_DYNAMIC 8 /* Job is dynamic (used to disable JIT compilation) */
-+#define PCRS_GLOBAL 0x08000000u /* Job should be applied globally, as with perl's g option */
-+#define PCRS_TRIVIAL 0x10000000u /* Backreferences in the substitute are ignored */
-+#define PCRS_SUCCESS 0x20000000u /* Job did previously match */
-+#define PCRS_DYNAMIC 0x40000000u /* Job is dynamic (used to disable JIT compilation) */
-
-
- /*
-@@ -107,10 +117,14 @@ typedef struct {
- /* A PCRS job */
-
- typedef struct PCRS_JOB {
-+#ifdef HAVE_PCRE2
-+ pcre2_code *pattern;
-+#else
- pcre *pattern; /* The compiled pcre pattern */
- pcre_extra *hints; /* The pcre hints for the pattern */
-+#endif
- int options; /* The pcre options (numeric) */
-- int flags; /* The pcrs and user flags (see "Flags" above) */
-+ unsigned int flags; /* The pcrs and user flags (see "Flags" above) */
- pcrs_substitute *substitute; /* The compiled pcrs substitute */
- struct PCRS_JOB *next; /* Pointer for chaining jobs to joblists */
- } pcrs_job;
---- a/project.h
-+++ b/project.h
-@@ -94,12 +94,38 @@
- */
-
- #ifdef STATIC_PCRE
-+#ifdef HAVE_PCRE2
-+# include "pcre2.h"
-+# include "pcre2posix.h"
-+#else
- # include "pcre.h"
-+# include "pcreposix.h"
-+#endif
- #else
--# ifdef PCRE_H_IN_SUBDIR
--# include <pcre/pcre.h>
-+# ifdef HAVE_PCRE2
-+# ifdef PCRE2_H_IN_SUBDIR
-+# define PCRE2_CODE_UNIT_WIDTH 8
-+# include <pcre2/pcre2.h>
-+# else
-+# define PCRE2_CODE_UNIT_WIDTH 8
-+# include <pcre2.h>
-+# endif
-+# ifdef PCRE2POSIX_H_IN_SUBDIR
-+# include <pcre2/pcre2posix.h>
-+# else
-+# include <pcre2posix.h>
-+# endif
- # else
--# include <pcre.h>
-+# ifdef PCRE_H_IN_SUBDIR
-+# include <pcre/pcre.h>
-+# else
-+# include <pcre.h>
-+# endif
-+# ifdef PCREPOSIX_H_IN_SUBDIR
-+# include <pcre/pcreposix.h>
-+# else
-+# include <pcreposix.h>
-+# endif
- # endif
- #endif
-
-@@ -109,16 +135,6 @@
- # include <pcrs.h>
- #endif
-
--#ifdef STATIC_PCRE
--# include "pcreposix.h"
--#else
--# ifdef PCRE_H_IN_SUBDIR
--# include <pcre/pcreposix.h>
--# else
--# include <pcreposix.h>
--# endif
--#endif
--
- #ifdef _WIN32
- /*
- * I don't want to have to #include all this just for the declaration
-@@ -404,10 +420,16 @@ struct http_response
- enum crunch_reason crunch_reason; /**< Why the response was generated in the first place. */
- };
-
-+#ifdef HAVE_PCRE2
-+#define REGEX_TYPE pcre2_code
-+#else
-+#define REGEX_TYPE regex_t
-+#endif
-+
- struct url_spec
- {
- #ifdef FEATURE_PCRE_HOST_PATTERNS
-- regex_t *host_regex;/**< Regex for host matching */
-+ REGEX_TYPE *host_regex;/**< Regex for host matching */
- enum host_regex_type { VANILLA_HOST_PATTERN, PCRE_HOST_PATTERN } host_regex_type;
- #endif /* defined FEATURE_PCRE_HOST_PATTERNS */
- int dcount; /**< How many parts to this domain? (length of dvec) */
-@@ -417,7 +439,7 @@ struct url_spec
-
- char *port_list; /**< List of acceptable ports, or NULL to match all ports */
-
-- regex_t *preg; /**< Regex for matching path part */
-+ REGEX_TYPE *preg; /**< Regex for matching path part */
- };
-
- /**
-@@ -432,7 +454,7 @@ struct pattern_spec
- union
- {
- struct url_spec url_spec;
-- regex_t *tag_regex;
-+ REGEX_TYPE *tag_regex;
- } pattern;
-
- unsigned int flags; /**< Bitmap with various pattern properties. */
---- a/templates/show-status
-+++ b/templates/show-status
-@@ -298,10 +298,7 @@
- <tr>
- <td><code>FEATURE_DYNAMIC_PCRE</code></td>
- <td>@if-FEATURE_DYNAMIC_PCRE-then@ Yes @else-not-FEATURE_DYNAMIC_PCRE@ No @endif-FEATURE_DYNAMIC_PCRE@</td>
-- <td>Dynamically link to the PCRE library. This is set automatically
-- by <code>./configure</code> if you do not have libpcre installed.
-- Dynamically linking to an external libpcre is recommended as the one that is distributed
-- with Privoxy itself is outdated and lacks various features and bug-fixes you may be interested in.</td>
-+ <td>Dynamically link to the PCRE(2) library (recommended).</td>
- </tr>
- <tr>
- <td><code>FEATURE_EXTENDED_STATISTICS</code></td>
---- a/urlmatch.c
-+++ b/urlmatch.c
-@@ -604,6 +604,100 @@ jb_err parse_http_request(const char *re
- }
-
-
-+#ifdef HAVE_PCRE2
-+/*********************************************************************
-+ *
-+ * Function : compile_pattern
-+ *
-+ * Description : Compiles a host, domain or TAG pattern.
-+ *
-+ * Parameters :
-+ * 1 : pattern = The pattern to compile.
-+ * 2 : anchoring = How the regex should be modified
-+ * before compilation. Can be either
-+ * one of NO_ANCHORING, LEFT_ANCHORED,
-+ * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
-+ * 3 : url = In case of failures, the spec member is
-+ * logged and the structure freed.
-+ * 4 : regex = Where the compiled regex should be stored.
-+ *
-+ * Returns : JB_ERR_OK - Success
-+ * JB_ERR_PARSE - Cannot parse regex
-+ *
-+ *********************************************************************/
-+static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
-+ struct pattern_spec *url, pcre2_code **regex)
-+{
-+ int errcode;
-+ const char *fmt = NULL;
-+ char *rebuf;
-+ size_t rebuf_size;
-+ PCRE2_SIZE error_offset;
-+ int ret;
-+
-+ assert(pattern);
-+
-+ if (pattern[0] == '\0')
-+ {
-+ *regex = NULL;
-+ return JB_ERR_OK;
-+ }
-+
-+ switch (anchoring)
-+ {
-+ case NO_ANCHORING:
-+ fmt = "%s";
-+ break;
-+ case RIGHT_ANCHORED:
-+ fmt = "%s$";
-+ break;
-+ case RIGHT_ANCHORED_HOST:
-+ fmt = "%s\\.?$";
-+ break;
-+ case LEFT_ANCHORED:
-+ fmt = "^%s";
-+ break;
-+ default:
-+ log_error(LOG_LEVEL_FATAL,
-+ "Invalid anchoring in compile_pattern %d", anchoring);
-+ }
-+ rebuf_size = strlen(pattern) + strlen(fmt);
-+ rebuf = malloc_or_die(rebuf_size);
-+
-+ snprintf(rebuf, rebuf_size, fmt, pattern);
-+
-+ *regex = pcre2_compile((const unsigned char *)pattern,
-+ PCRE2_ZERO_TERMINATED, PCRE2_CASELESS, &errcode,
-+ &error_offset, NULL);
-+ if (*regex == NULL)
-+ {
-+ log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
-+ pattern, url->spec, rebuf);
-+ freez(rebuf);
-+
-+ return JB_ERR_PARSE;
-+ }
-+
-+#ifndef DISABLE_PCRE_JIT_COMPILATION
-+ /* Try to enable JIT compilation but continue if it's unsupported. */
-+ if ((ret = pcre2_jit_compile(*regex, PCRE2_JIT_COMPLETE)) &&
-+ (ret != PCRE2_ERROR_JIT_BADOPTION))
-+ {
-+ log_error(LOG_LEVEL_ERROR,
-+ "Unexpected error enabling JIT compilation for %s from %s: %s",
-+ pattern, url->spec, rebuf);
-+ freez(rebuf);
-+
-+ return JB_ERR_PARSE;
-+ }
-+#endif
-+
-+ freez(rebuf);
-+
-+ return JB_ERR_OK;
-+
-+}
-+#else
- /*********************************************************************
- *
- * Function : compile_pattern
-@@ -686,6 +780,7 @@ static jb_err compile_pattern(const char
- return JB_ERR_OK;
-
- }
-+#endif
-
-
- /*********************************************************************
-@@ -1051,6 +1146,49 @@ static int simplematch(const char *patte
- }
-
-
-+#ifdef HAVE_PCRE2
-+/*********************************************************************
-+ *
-+ * Function : pcre2_pattern_matches
-+ *
-+ * Description : Checks if a compiled pcre2 pattern matches a string.
-+ *
-+ * Parameters :
-+ * 1 : pattern = The compiled pattern
-+ * 2 : string = The string to check
-+ *
-+ * Returns : TRUE for yes, FALSE otherwise.
-+ *
-+ *********************************************************************/
-+int pcre2_pattern_matches(const pcre2_code *pattern, const char *string)
-+{
-+ PCRE2_SIZE offset;
-+ int ret;
-+ pcre2_match_data *pcre2_matches;
-+
-+ assert(pattern != NULL);
-+ assert(string != NULL);
-+
-+ offset = 0;
-+
-+ pcre2_matches = pcre2_match_data_create_from_pattern(pattern, NULL);
-+ if (NULL == pcre2_matches)
-+ {
-+ log_error(LOG_LEVEL_ERROR,
-+ "Out of memory while matching pattern against %s", string);
-+ return FALSE;
-+ }
-+
-+ ret = pcre2_match(pattern, (const unsigned char *)string, strlen(string),
-+ offset, 0, pcre2_matches, NULL);
-+
-+ pcre2_match_data_free(pcre2_matches);
-+
-+ return (ret >= 0);
-+}
-+#endif
-+
-+
- /*********************************************************************
- *
- * Function : simple_domaincmp
-@@ -1268,8 +1406,12 @@ void free_pattern_spec(struct pattern_sp
- {
- if (pattern->pattern.tag_regex)
- {
-+#ifdef HAVE_PCRE2
-+ pcre2_code_free(pattern->pattern.tag_regex);
-+#else
- regfree(pattern->pattern.tag_regex);
- freez(pattern->pattern.tag_regex);
-+#endif
- }
- return;
- }
-@@ -1277,8 +1419,12 @@ void free_pattern_spec(struct pattern_sp
- #ifdef FEATURE_PCRE_HOST_PATTERNS
- if (pattern->pattern.url_spec.host_regex)
- {
-+#ifdef HAVE_PCRE2
-+ pcre2_code_free(pattern->pattern.url_spec.host_regex);
-+#else
- regfree(pattern->pattern.url_spec.host_regex);
- freez(pattern->pattern.url_spec.host_regex);
-+#endif
- }
- #endif /* def FEATURE_PCRE_HOST_PATTERNS */
- freez(pattern->pattern.url_spec.dbuffer);
-@@ -1287,8 +1433,12 @@ void free_pattern_spec(struct pattern_sp
- freez(pattern->pattern.url_spec.port_list);
- if (pattern->pattern.url_spec.preg)
- {
-+#ifdef HAVE_PCRE2
-+ pcre2_code_free(pattern->pattern.url_spec.preg);
-+#else
- regfree(pattern->pattern.url_spec.preg);
- freez(pattern->pattern.url_spec.preg);
-+#endif
- }
- }
-
-@@ -1333,8 +1483,13 @@ static int host_matches(const struct htt
- if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
- {
- return ((NULL == pattern->pattern.url_spec.host_regex)
-+#ifdef HAVE_PCRE2
-+ || pcre2_pattern_matches(pattern->pattern.url_spec.host_regex,
-+ http->host));
-+#else
- || (0 == regexec(pattern->pattern.url_spec.host_regex,
- http->host, 0, NULL, 0)));
-+#endif
- }
- #endif
- return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
-@@ -1357,7 +1512,11 @@ static int host_matches(const struct htt
- static int path_matches(const char *path, const struct pattern_spec *pattern)
- {
- return ((NULL == pattern->pattern.url_spec.preg)
-+#ifdef HAVE_PCRE2
-+ || (pcre2_pattern_matches(pattern->pattern.url_spec.preg, path)));
-+#else
- || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
-+#endif
- }
-
-
---- a/urlmatch.h
-+++ b/urlmatch.h
-@@ -50,6 +50,10 @@ extern int url_requires_percent_encoding
- extern int url_match(const struct pattern_spec *pattern,
- const struct http_request *http);
-
-+#ifdef HAVE_PCRE2
-+extern int pcre2_pattern_matches(const pcre2_code *pattern, const char *string);
-+#endif
-+
- extern jb_err create_pattern_spec(struct pattern_spec *url, char *buf);
- extern void free_pattern_spec(struct pattern_spec *url);
- extern int match_portlist(const char *portlist, int port);
---- a/w32log.c
-+++ b/w32log.c
-@@ -316,6 +316,9 @@ void TermLogWindow(void)
- void LogCreatePatternMatchingBuffers(void)
- {
- int i;
-+#ifdef HAVE_PCRE2
-+#warning The win32 build of Privoxy is expected to crash when compiled with pcre2 support.
-+#endif
- for (i = 0; patterns_to_highlight[i].str != NULL; i++)
- {
- regcomp(&patterns_to_highlight[i].buffer, patterns_to_highlight[i].str, REG_ICASE);