diff --git a/docs/manual/rewrite/flags.html.en b/docs/manual/rewrite/flags.html.en index fbeeb7b..3c575d5 100644 --- a/docs/manual/rewrite/flags.html.en +++ b/docs/manual/rewrite/flags.html.en @@ -85,10 +85,6 @@ of how you might use them.

B (escape backreferences)

The [B] flag instructs RewriteRule to escape non-alphanumeric characters before applying the transformation.

-

In 2.4.26 and later, you can limit the escaping to specific characters -in backreferences by listing them: [B=#?;]. Note: The space -character can be used in the list of characters to escape, but it cannot be -the last character in the list.

mod_rewrite has to unescape URLs before mapping them, so backreferences are unescaped at the time they are applied. @@ -120,6 +116,16 @@ when the backend may break if presented with an unescaped URL.

An alternative to this flag is using a RewriteCond to capture against %{THE_REQUEST} which will capture strings in the encoded form.

+ +

In 2.4.26 and later, you can limit the escaping to specific characters +in backreferences by listing them: [B=#?;]. Note: The space +character can be used in the list of characters to escape, but you must quote +the entire third argument of RewriteRule +and the space must not be the last character in the list.

+ +
# Escape spaces and question marks.
+RewriteRule "^search/(.*)$" "/search.php?term=$1" "[B= ?]"
+
top

BNP|backrefnoplus (don't escape space to +)

diff --git a/modules/mappers/mod_rewrite.c b/modules/mappers/mod_rewrite.c index 80fbc9e..6bd8f02 100644 --- a/modules/mappers/mod_rewrite.c +++ b/modules/mappers/mod_rewrite.c @@ -100,6 +100,8 @@ #include "mod_rewrite.h" #include "ap_expr.h" +#include "test_char.h" + static ap_dbd_t *(*dbd_acquire)(request_rec*) = NULL; static void (*dbd_prepare)(server_rec*, const char*, const char*) = NULL; static const char* really_last_key = "rewrite_really_last"; @@ -167,6 +169,8 @@ static const char* really_last_key = "rewrite_really_last"; #define RULEFLAG_END (1<<17) #define RULEFLAG_ESCAPENOPLUS (1<<18) #define RULEFLAG_QSLAST (1<<19) +#define RULEFLAG_QSNONE (1<<20) /* programattic only */ +#define RULEFLAG_ESCAPECTLS (1<<21) /* return code of the rewrite rule * the result may be escaped - or not @@ -320,7 +324,8 @@ typedef struct { data_item *cookie; /* added cookies */ int skip; /* number of next rules to skip */ int maxrounds; /* limit on number of loops with N flag */ - char *escapes; /* specific backref escapes */ + const char *escapes; /* specific backref escapes */ + const char *noescapes; /* specific backref chars not to escape */ } rewriterule_entry; typedef struct { @@ -420,7 +425,9 @@ static apr_global_mutex_t *rewrite_mapr_lock_acquire = NULL; static const char *rewritemap_mutex_type = "rewrite-map"; /* Optional functions imported from mod_ssl when loaded: */ -static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus); +static char *escape_backref(apr_pool_t *p, const char *path, + const char *escapeme, const char *noescapeme, + int flags); /* * +-------------------------------------------------------+ @@ -647,14 +654,21 @@ static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix, * Escapes a backreference in a similar way as php's urlencode does. * Based on ap_os_escape_path in server/util.c */ -static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus) { - char *copy = apr_palloc(p, 3 * strlen(path) + 3); +static char *escape_backref(apr_pool_t *p, const char *path, + const char *escapeme, const char *noescapeme, + int flags) +{ + char *copy = apr_palloc(p, 3 * strlen(path) + 1); const unsigned char *s = (const unsigned char *)path; unsigned char *d = (unsigned char *)copy; - unsigned c; + int noplus = (flags & RULEFLAG_ESCAPENOPLUS) != 0; + int ctls = (flags & RULEFLAG_ESCAPECTLS) != 0; + unsigned char c; while ((c = *s)) { - if (!escapeme) { + if (((ctls ? !TEST_CHAR(c, T_VCHAR_OBSTEXT) : !escapeme) + || (escapeme && ap_strchr_c(escapeme, c))) + && (!noescapeme || !ap_strchr_c(noescapeme, c))) { if (apr_isalnum(c) || c == '_') { *d++ = c; } @@ -665,23 +679,8 @@ static char *escape_backref(apr_pool_t *p, const char *path, const char *escapem d = c2x(c, '%', d); } } - else { - const char *esc = escapeme; - while (*esc) { - if (c == *esc) { - if (c == ' ' && !noplus) { - *d++ = '+'; - } - else { - d = c2x(c, '%', d); - } - break; - } - ++esc; - } - if (!*esc) { - *d++ = c; - } + else { + *d++ = c; } ++s; } @@ -763,11 +762,19 @@ static char *escape_absolute_uri(apr_pool_t *p, char *uri, unsigned scheme) * split out a QUERY_STRING part from * the current URI string */ -static void splitout_queryargs(request_rec *r, int qsappend, int qsdiscard, - int qslast) +static void splitout_queryargs(request_rec *r, int flags) { char *q; int split, skip; + int qsappend = flags & RULEFLAG_QSAPPEND; + int qsdiscard = flags & RULEFLAG_QSDISCARD; + int qslast = flags & RULEFLAG_QSLAST; + + if (flags & RULEFLAG_QSNONE) { + rewritelog((r, 2, NULL, "discarding query string, no parse from substitution")); + r->args = NULL; + return; + } /* don't touch, unless it's a scheme for which a query string makes sense. * See RFC 1738 and RFC 2368. @@ -792,7 +799,7 @@ static void splitout_queryargs(request_rec *r, int qsappend, int qsdiscard, olduri = apr_pstrdup(r->pool, r->filename); *q++ = '\0'; if (qsappend) { - if (*q) { + if (*q) { r->args = apr_pstrcat(r->pool, q, "&" , r->args, NULL); } } @@ -800,9 +807,9 @@ static void splitout_queryargs(request_rec *r, int qsappend, int qsdiscard, r->args = apr_pstrdup(r->pool, q); } - if (r->args) { + if (r->args) { len = strlen(r->args); - + if (!len) { r->args = NULL; } @@ -2434,7 +2441,8 @@ static char *do_expand(char *input, rewrite_ctx *ctx, rewriterule_entry *entry) /* escape the backreference */ char *tmp2, *tmp; tmp = apr_pstrmemdup(pool, bri->source + bri->regmatch[n].rm_so, span); - tmp2 = escape_backref(pool, tmp, entry->escapes, entry->flags & RULEFLAG_ESCAPENOPLUS); + tmp2 = escape_backref(pool, tmp, entry->escapes, entry->noescapes, + entry->flags); rewritelog((ctx->r, 5, ctx->perdir, "escaping backreference '%s' to '%s'", tmp, tmp2)); @@ -2735,7 +2743,7 @@ static apr_status_t rewritelock_remove(void *data) * XXX: what an inclined parser. Seems we have to leave it so * for backwards compat. *sigh* */ -static int parseargline(char *str, char **a1, char **a2, char **a3) +static int parseargline(char *str, char **a1, char **a2, char **a2_end, char **a3) { char quote; @@ -2786,8 +2794,10 @@ static int parseargline(char *str, char **a1, char **a2, char **a3) if (!*str) { *a3 = NULL; /* 3rd argument is optional */ + *a2_end = str; return 0; } + *a2_end = str; *str++ = '\0'; while (apr_isspace(*str)) { @@ -3327,7 +3337,7 @@ static const char *cmd_rewritecond(cmd_parms *cmd, void *in_dconf, rewrite_server_conf *sconf; rewritecond_entry *newcond; ap_regex_t *regexp; - char *a1 = NULL, *a2 = NULL, *a3 = NULL; + char *a1 = NULL, *a2 = NULL, *a2_end, *a3 = NULL; const char *err; sconf = ap_get_module_config(cmd->server->module_config, &rewrite_module); @@ -3345,7 +3355,7 @@ static const char *cmd_rewritecond(cmd_parms *cmd, void *in_dconf, * of the argument line. So we can use a1 .. a3 without * copying them again. */ - if (parseargline(str, &a1, &a2, &a3)) { + if (parseargline(str, &a1, &a2, &a2_end, &a3)) { return apr_pstrcat(cmd->pool, "RewriteCond: bad argument line '", str, "'", NULL); } @@ -3504,13 +3514,24 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg, case 'B': if (!*key || !strcasecmp(key, "ackrefescaping")) { cfg->flags |= RULEFLAG_ESCAPEBACKREF; - if (val && *val) { + if (val && *val) { cfg->escapes = val; } } + else if (!strcasecmp(key, "NE")) { + if (val && *val) { + cfg->noescapes = val; + } + else { + return "flag 'BNE' wants a list of characters (i.e. [BNE=...])"; + } + } else if (!strcasecmp(key, "NP") || !strcasecmp(key, "ackrefernoplus")) { cfg->flags |= RULEFLAG_ESCAPENOPLUS; } + else if (!strcasecmp(key, "CTLS")) { + cfg->flags |= RULEFLAG_ESCAPECTLS|RULEFLAG_ESCAPEBACKREF; + } else { ++error; } @@ -3753,7 +3774,7 @@ static const char *cmd_rewriterule(cmd_parms *cmd, void *in_dconf, rewrite_server_conf *sconf; rewriterule_entry *newrule; ap_regex_t *regexp; - char *a1 = NULL, *a2 = NULL, *a3 = NULL; + char *a1 = NULL, *a2 = NULL, *a2_end, *a3 = NULL; const char *err; sconf = ap_get_module_config(cmd->server->module_config, &rewrite_module); @@ -3767,12 +3788,11 @@ static const char *cmd_rewriterule(cmd_parms *cmd, void *in_dconf, } /* parse the argument line ourself */ - if (parseargline(str, &a1, &a2, &a3)) { + if (parseargline(str, &a1, &a2, &a2_end, &a3)) { return apr_pstrcat(cmd->pool, "RewriteRule: bad argument line '", str, "'", NULL); } - /* arg3: optional flags field */ newrule->forced_mimetype = NULL; newrule->forced_handler = NULL; newrule->forced_responsecode = HTTP_MOVED_TEMPORARILY; @@ -3781,6 +3801,9 @@ static const char *cmd_rewriterule(cmd_parms *cmd, void *in_dconf, newrule->cookie = NULL; newrule->skip = 0; newrule->maxrounds = REWRITE_MAX_ROUNDS; + newrule->escapes = newrule->noescapes = NULL; + + /* arg3: optional flags field */ if (a3 != NULL) { if ((err = cmd_parseflagfield(cmd->pool, newrule, a3, cmd_rewriterule_setflag)) != NULL) { @@ -3814,6 +3837,17 @@ static const char *cmd_rewriterule(cmd_parms *cmd, void *in_dconf, newrule->flags |= RULEFLAG_NOSUB; } + if (*(a2_end-1) == '?') { + /* a literal ? at the end of the unsubstituted rewrite rule */ + newrule->flags |= RULEFLAG_QSNONE; + *(a2_end-1) = '\0'; /* trailing ? has done its job */ + } + else if (newrule->flags & RULEFLAG_QSDISCARD) { + if (NULL == ap_strchr(newrule->output, '?')) { + newrule->flags |= RULEFLAG_QSNONE; + } + } + /* now, if the server or per-dir config holds an * array of RewriteCond entries, we take it for us * and clear the array @@ -4219,9 +4253,7 @@ static int apply_rewrite_rule(rewriterule_entry *p, rewrite_ctx *ctx) r->path_info = NULL; } - splitout_queryargs(r, p->flags & RULEFLAG_QSAPPEND, - p->flags & RULEFLAG_QSDISCARD, - p->flags & RULEFLAG_QSLAST); + splitout_queryargs(r, p->flags); /* Add the previously stripped per-directory location prefix, unless * (1) it's an absolute URL path and @@ -4700,8 +4732,25 @@ static int hook_uri2file(request_rec *r) } if (rulestatus) { - unsigned skip; - apr_size_t flen; + unsigned skip_absolute = is_absolute_uri(r->filename, NULL); + apr_size_t flen = r->filename ? strlen(r->filename) : 0; + int to_proxyreq = (flen > 6 && strncmp(r->filename, "proxy:", 6) == 0); + int will_escape = skip_absolute && (rulestatus != ACTION_NOESCAPE); + + if (r->args + && !will_escape + && *(ap_scan_vchar_obstext(r->args))) { + /* + * We have a raw control character or a ' ' in r->args. + * Correct encoding was missed. + * Correct encoding was missed and we're not going to escape + * it before returning. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10410) + "Rewritten query string contains control " + "characters or spaces"); + return HTTP_FORBIDDEN; + } if (ACTION_STATUS == rulestatus) { int n = r->status; @@ -4710,8 +4759,7 @@ static int hook_uri2file(request_rec *r) return n; } - flen = r->filename ? strlen(r->filename) : 0; - if (flen > 6 && strncmp(r->filename, "proxy:", 6) == 0) { + if (to_proxyreq) { /* it should be go on as an internal proxy request */ /* check if the proxy module is enabled, so @@ -4753,7 +4801,7 @@ static int hook_uri2file(request_rec *r) r->filename)); return OK; } - else if ((skip = is_absolute_uri(r->filename, NULL)) > 0) { + else if (skip_absolute > 0) { int n; /* it was finally rewritten to a remote URL */ @@ -4761,7 +4809,7 @@ static int hook_uri2file(request_rec *r) if (rulestatus != ACTION_NOESCAPE) { rewritelog((r, 1, NULL, "escaping %s for redirect", r->filename)); - r->filename = escape_absolute_uri(r->pool, r->filename, skip); + r->filename = escape_absolute_uri(r->pool, r->filename, skip_absolute); } /* append the QUERY_STRING part */ @@ -4985,7 +5033,26 @@ static int hook_fixup(request_rec *r) */ rulestatus = apply_rewrite_list(r, dconf->rewriterules, dconf->directory); if (rulestatus) { - unsigned skip; + unsigned skip_absolute = is_absolute_uri(r->filename, NULL); + int to_proxyreq = 0; + int will_escape = 0; + + l = strlen(r->filename); + to_proxyreq = l > 6 && strncmp(r->filename, "proxy:", 6) == 0; + will_escape = skip_absolute && (rulestatus != ACTION_NOESCAPE); + + if (r->args + && !will_escape + && *(ap_scan_vchar_obstext(r->args))) { + /* + * We have a raw control character or a ' ' in r->args. + * Correct encoding was missed. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10411) + "Rewritten query string contains control " + "characters or spaces"); + return HTTP_FORBIDDEN; + } if (ACTION_STATUS == rulestatus) { int n = r->status; @@ -4994,8 +5061,7 @@ static int hook_fixup(request_rec *r) return n; } - l = strlen(r->filename); - if (l > 6 && strncmp(r->filename, "proxy:", 6) == 0) { + if (to_proxyreq) { /* it should go on as an internal proxy request */ /* make sure the QUERY_STRING and @@ -5019,7 +5085,7 @@ static int hook_fixup(request_rec *r) "%s [OK]", r->filename)); return OK; } - else if ((skip = is_absolute_uri(r->filename, NULL)) > 0) { + else if (skip_absolute > 0) { /* it was finally rewritten to a remote URL */ /* because we are in a per-dir context @@ -5028,7 +5094,7 @@ static int hook_fixup(request_rec *r) */ if (dconf->baseurl != NULL) { /* skip 'scheme://' */ - cp = r->filename + skip; + cp = r->filename + skip_absolute; if ((cp = ap_strchr(cp, '/')) != NULL && *(++cp)) { rewritelog((r, 2, dconf->directory, @@ -5073,7 +5139,7 @@ static int hook_fixup(request_rec *r) if (rulestatus != ACTION_NOESCAPE) { rewritelog((r, 1, dconf->directory, "escaping %s for redirect", r->filename)); - r->filename = escape_absolute_uri(r->pool, r->filename, skip); + r->filename = escape_absolute_uri(r->pool, r->filename, skip_absolute); } /* append the QUERY_STRING part */ diff --git a/modules/proxy/mod_proxy.c b/modules/proxy/mod_proxy.c index ff9f81d..b6e11c1 100644 --- a/modules/proxy/mod_proxy.c +++ b/modules/proxy/mod_proxy.c @@ -960,6 +960,8 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_rec *r, struct proxy_alias *ent, } if (found) { + unsigned int encoded = ent->flags & PROXYPASS_MAP_ENCODED; + /* A proxy module is assigned this URL, check whether it's interested * in the request itself (e.g. proxy_wstunnel cares about Upgrade * requests only, and could hand over to proxy_http otherwise). @@ -979,6 +981,9 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_rec *r, struct proxy_alias *ent, if (ent->flags & PROXYPASS_NOQUERY) { apr_table_setn(r->notes, "proxy-noquery", "1"); } + if (encoded) { + apr_table_setn(r->notes, "proxy-noencode", "1"); + } if (servlet_uri) { ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, APLOGNO(10248) @@ -992,13 +997,13 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_rec *r, struct proxy_alias *ent, */ AP_DEBUG_ASSERT(strlen(r->uri) >= strlen(servlet_uri)); strcpy(r->uri, servlet_uri); - return DONE; } - - ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, APLOGNO(03464) - "URI path '%s' matches proxy handler '%s'", r->uri, - found); - return OK; + else { + ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, APLOGNO(03464) + "URI path '%s' matches proxy handler '%s'", r->uri, + found); + } + return (encoded) ? DONE : OK; } return HTTP_CONTINUE; diff --git a/modules/proxy/mod_proxy_ajp.c b/modules/proxy/mod_proxy_ajp.c index e2992fc..fdccb86 100644 --- a/modules/proxy/mod_proxy_ajp.c +++ b/modules/proxy/mod_proxy_ajp.c @@ -65,11 +65,25 @@ static int proxy_ajp_canon(request_rec *r, char *url) if (apr_table_get(r->notes, "proxy-nocanon")) { path = url; /* this is the raw path */ } + else if (apr_table_get(r->notes, "proxy-noencode")) { + path = url; /* this is the encoded path already */ + search = r->args; + } else { path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq); search = r->args; } + if (search && *ap_scan_vchar_obstext(search)) { + /* + * We have a raw control character or a ' ' in r->args. + * Correct encoding was missed. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10406) + "To be forwarded query string contains control " + "characters or spaces"); + return HTTP_FORBIDDEN; + } if (path == NULL) return HTTP_BAD_REQUEST; diff --git a/modules/proxy/mod_proxy_balancer.c b/modules/proxy/mod_proxy_balancer.c index db46952..8927d7b 100644 --- a/modules/proxy/mod_proxy_balancer.c +++ b/modules/proxy/mod_proxy_balancer.c @@ -102,11 +102,25 @@ static int proxy_balancer_canon(request_rec *r, char *url) if (apr_table_get(r->notes, "proxy-nocanon")) { path = url; /* this is the raw path */ } + else if (apr_table_get(r->notes, "proxy-noencode")) { + path = url; /* this is the encoded path already */ + search = r->args; + } else { path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq); search = r->args; } + if (search && *ap_scan_vchar_obstext(search)) { + /* + * We have a raw control character or a ' ' in r->args. + * Correct encoding was missed. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10407) + "To be forwarded query string contains control " + "characters or spaces"); + return HTTP_FORBIDDEN; + } if (path == NULL) return HTTP_BAD_REQUEST; diff --git a/modules/proxy/mod_proxy_fcgi.c b/modules/proxy/mod_proxy_fcgi.c index 3382b9b..a89b9a9 100644 --- a/modules/proxy/mod_proxy_fcgi.c +++ b/modules/proxy/mod_proxy_fcgi.c @@ -92,8 +92,9 @@ static int proxy_fcgi_canon(request_rec *r, char *url) host = apr_pstrcat(r->pool, "[", host, "]", NULL); } - if (apr_table_get(r->notes, "proxy-nocanon")) { - path = url; /* this is the raw path */ + if (apr_table_get(r->notes, "proxy-nocanon") + || apr_table_get(r->notes, "proxy-noencode")) { + path = url; /* this is the raw/encoded path */ } else { path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, diff --git a/modules/proxy/mod_proxy_http.c b/modules/proxy/mod_proxy_http.c index 3e5c056..9da15e4 100644 --- a/modules/proxy/mod_proxy_http.c +++ b/modules/proxy/mod_proxy_http.c @@ -121,11 +121,25 @@ static int proxy_http_canon(request_rec *r, char *url) if (apr_table_get(r->notes, "proxy-nocanon")) { path = url; /* this is the raw path */ } + else if (apr_table_get(r->notes, "proxy-noencode")) { + path = url; /* this is the encoded path already */ + search = r->args; + } else { path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq); search = r->args; } + if (search && *ap_scan_vchar_obstext(search)) { + /* + * We have a raw control character or a ' ' in r->args. + * Correct encoding was missed. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10408) + "To be forwarded query string contains control " + "characters or spaces"); + return HTTP_FORBIDDEN; + } break; case PROXYREQ_PROXY: path = url; diff --git a/modules/proxy/mod_proxy_uwsgi.c b/modules/proxy/mod_proxy_uwsgi.c index 4d7589c..877e7eb 100644 --- a/modules/proxy/mod_proxy_uwsgi.c +++ b/modules/proxy/mod_proxy_uwsgi.c @@ -84,8 +84,14 @@ static int uwsgi_canon(request_rec *r, char *url) host = apr_pstrcat(r->pool, "[", host, "]", NULL); } - path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, - r->proxyreq); + if (apr_table_get(r->notes, "proxy-nocanon") + || apr_table_get(r->notes, "proxy-noencode")) { + path = url; /* this is the raw/encoded path */ + } + else { + path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, + r->proxyreq); + } if (!path) { return HTTP_BAD_REQUEST; } diff --git a/modules/proxy/mod_proxy_wstunnel.c b/modules/proxy/mod_proxy_wstunnel.c index c29ded1..3a68b85 100644 --- a/modules/proxy/mod_proxy_wstunnel.c +++ b/modules/proxy/mod_proxy_wstunnel.c @@ -111,11 +111,25 @@ static int proxy_wstunnel_canon(request_rec *r, char *url) if (apr_table_get(r->notes, "proxy-nocanon")) { path = url; /* this is the raw path */ } + else if (apr_table_get(r->notes, "proxy-noencode")) { + path = url; /* this is the encoded path already */ + search = r->args; + } else { path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq); search = r->args; } + if (search && *ap_scan_vchar_obstext(search)) { + /* + * We have a raw control character or a ' ' in r->args. + * Correct encoding was missed. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10409) + "To be forwarded query string contains control " + "characters or spaces"); + return HTTP_FORBIDDEN; + } if (path == NULL) return HTTP_BAD_REQUEST;