From ee01bf862357c74ac9d33671fcc091f43da11aa1 Mon Sep 17 00:00:00 2001 From: lwthiker Date: Fri, 1 Sep 2023 21:59:31 +0300 Subject: [PATCH] Fix HTTP/2 handling in the Firefox version --- firefox/patches/curl-impersonate.patch | 453 ++++++++----------------- 1 file changed, 134 insertions(+), 319 deletions(-) diff --git a/firefox/patches/curl-impersonate.patch b/firefox/patches/curl-impersonate.patch index 5f9784b..e8984a0 100644 --- a/firefox/patches/curl-impersonate.patch +++ b/firefox/patches/curl-impersonate.patch @@ -211,6 +211,57 @@ index f815170a7..9d9417edc 100644 inet_ntop.c \ inet_pton.c \ krb5.c \ +diff --git a/lib/dynhds.c b/lib/dynhds.c +index b325e0060..4c8a73bab 100644 +--- a/lib/dynhds.c ++++ b/lib/dynhds.c +@@ -52,6 +52,8 @@ entry_new(const char *name, size_t namelen, + e->valuelen = valuelen; + if(opts & DYNHDS_OPT_LOWERCASE) + Curl_strntolower(e->name, e->name, e->namelen); ++ if(opts & DYNHDS_OPT_LOWERCASE_VAL) ++ Curl_strntolower(e->value, e->value, e->valuelen); + return e; + } + +@@ -134,6 +136,16 @@ void Curl_dynhds_set_opts(struct dynhds *dynhds, int opts) + dynhds->opts = opts; + } + ++void Curl_dynhds_set_opt(struct dynhds *dynhds, int opt) ++{ ++ dynhds->opts |= opt; ++} ++ ++void Curl_dynhds_del_opt(struct dynhds *dynhds, int opt) ++{ ++ dynhds->opts &= ~opt; ++} ++ + struct dynhds_entry *Curl_dynhds_getn(struct dynhds *dynhds, size_t n) + { + DEBUGASSERT(dynhds); +diff --git a/lib/dynhds.h b/lib/dynhds.h +index 777baa58a..2d542dfd6 100644 +--- a/lib/dynhds.h ++++ b/lib/dynhds.h +@@ -53,6 +53,7 @@ struct dynhds { + + #define DYNHDS_OPT_NONE (0) + #define DYNHDS_OPT_LOWERCASE (1 << 0) ++#define DYNHDS_OPT_LOWERCASE_VAL (1 << 1) + + /** + * Init for use on first time or after a reset. +@@ -82,6 +83,8 @@ size_t Curl_dynhds_count(struct dynhds *dynhds); + * This will not have an effect on already existing headers. + */ + void Curl_dynhds_set_opts(struct dynhds *dynhds, int opts); ++void Curl_dynhds_set_opt(struct dynhds *dynhds, int opt); ++void Curl_dynhds_del_opt(struct dynhds *dynhds, int opt); + + /** + * Return the n-th header entry or NULL if it does not exist. diff --git a/lib/easy.c b/lib/easy.c index d36cc03d1..c5a21e4e3 100644 --- a/lib/easy.c @@ -400,324 +451,8 @@ index a9c1efd00..e04b42381 100644 {"HTTPHEADER", CURLOPT_HTTPHEADER, CURLOT_SLIST, 0}, {"HTTPPOST", CURLOPT_HTTPPOST, CURLOT_OBJECT, 0}, {"HTTPPROXYTUNNEL", CURLOPT_HTTPPROXYTUNNEL, CURLOT_LONG, 0}, -diff --git a/lib/h2h3.c b/lib/h2h3.c -new file mode 100644 -index 000000000..ba9433cb9 ---- /dev/null -+++ b/lib/h2h3.c -@@ -0,0 +1,310 @@ -+/*************************************************************************** -+ * _ _ ____ _ -+ * Project ___| | | | _ \| | -+ * / __| | | | |_) | | -+ * | (__| |_| | _ <| |___ -+ * \___|\___/|_| \_\_____| -+ * -+ * Copyright (C) 1998 - 2022, Daniel Stenberg, , et al. -+ * -+ * This software is licensed as described in the file COPYING, which -+ * you should have received as part of this distribution. The terms -+ * are also available at https://curl.se/docs/copyright.html. -+ * -+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell -+ * copies of the Software, and permit persons to whom the Software is -+ * furnished to do so, under the terms of the COPYING file. -+ * -+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY -+ * KIND, either express or implied. -+ * -+ * SPDX-License-Identifier: curl -+ * -+ ***************************************************************************/ -+ -+#include "curl_setup.h" -+#include "urldata.h" -+#include "h2h3.h" -+#include "transfer.h" -+#include "sendf.h" -+#include "strcase.h" -+ -+/* The last 3 #include files should be in this order */ -+#include "curl_printf.h" -+#include "curl_memory.h" -+#include "memdebug.h" -+ -+/* -+ * Curl_pseudo_headers() creates the array with pseudo headers to be -+ * used in a HTTP/2 or HTTP/3 request. -+ */ -+ -+#if defined(USE_NGHTTP2) || defined(ENABLE_QUIC) -+ -+/* Index where :authority header field will appear in request header -+ field list. */ -+/* curl-impersonate: Put the ":authority" header in the second place. */ -+#define AUTHORITY_DST_IDX 2 -+ -+/* USHRT_MAX is 65535 == 0xffff */ -+#define HEADER_OVERFLOW(x) \ -+ (x.namelen > 0xffff || x.valuelen > 0xffff - x.namelen) -+ -+/* -+ * Check header memory for the token "trailers". -+ * Parse the tokens as separated by comma and surrounded by whitespace. -+ * Returns TRUE if found or FALSE if not. -+ */ -+static bool contains_trailers(const char *p, size_t len) -+{ -+ const char *end = p + len; -+ for(;;) { -+ for(; p != end && (*p == ' ' || *p == '\t'); ++p) -+ ; -+ if(p == end || (size_t)(end - p) < sizeof("trailers") - 1) -+ return FALSE; -+ if(strncasecompare("trailers", p, sizeof("trailers") - 1)) { -+ p += sizeof("trailers") - 1; -+ for(; p != end && (*p == ' ' || *p == '\t'); ++p) -+ ; -+ if(p == end || *p == ',') -+ return TRUE; -+ } -+ /* skip to next token */ -+ for(; p != end && *p != ','; ++p) -+ ; -+ if(p == end) -+ return FALSE; -+ ++p; -+ } -+} -+ -+typedef enum { -+ /* Send header to server */ -+ HEADERINST_FORWARD, -+ /* Don't send header to server */ -+ HEADERINST_IGNORE, -+ /* Discard header, and replace it with "te: trailers" */ -+ HEADERINST_TE_TRAILERS -+} header_instruction; -+ -+/* Decides how to treat given header field. */ -+static header_instruction inspect_header(const char *name, size_t namelen, -+ const char *value, size_t valuelen) { -+ switch(namelen) { -+ case 2: -+ if(!strncasecompare("te", name, namelen)) -+ return HEADERINST_FORWARD; -+ -+ return contains_trailers(value, valuelen) ? -+ HEADERINST_TE_TRAILERS : HEADERINST_IGNORE; -+ case 7: -+ return strncasecompare("upgrade", name, namelen) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ case 10: -+ return (strncasecompare("connection", name, namelen) || -+ strncasecompare("keep-alive", name, namelen)) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ case 16: -+ return strncasecompare("proxy-connection", name, namelen) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ case 17: -+ return strncasecompare("transfer-encoding", name, namelen) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ default: -+ return HEADERINST_FORWARD; -+ } -+} -+ -+CURLcode Curl_pseudo_headers(struct Curl_easy *data, -+ const char *mem, /* the request */ -+ const size_t len /* size of request */, -+ struct h2h3req **hp) -+{ -+ struct connectdata *conn = data->conn; -+ size_t nheader = 0; -+ size_t i; -+ size_t authority_idx; -+ char *hdbuf = (char *)mem; -+ char *end, *line_end; -+ struct h2h3pseudo *nva = NULL; -+ struct h2h3req *hreq = NULL; -+ char *vptr; -+ -+ /* Calculate number of headers contained in [mem, mem + len). Assumes a -+ correctly generated HTTP header field block. */ -+ for(i = 1; i < len; ++i) { -+ if(hdbuf[i] == '\n' && hdbuf[i - 1] == '\r') { -+ ++nheader; -+ ++i; -+ } -+ } -+ if(nheader < 2) { -+ goto fail; -+ } -+ /* We counted additional 2 \r\n in the first and last line. We need 3 -+ new headers: :method, :path and :scheme. Therefore we need one -+ more space. */ -+ nheader += 1; -+ hreq = malloc(sizeof(struct h2h3req) + -+ sizeof(struct h2h3pseudo) * (nheader - 1)); -+ if(!hreq) { -+ goto fail; -+ } -+ -+ nva = &hreq->header[0]; -+ -+ /* Extract :method, :path from request line -+ We do line endings with CRLF so checking for CR is enough */ -+ line_end = memchr(hdbuf, '\r', len); -+ if(!line_end) { -+ goto fail; -+ } -+ -+ /* Method does not contain spaces */ -+ end = memchr(hdbuf, ' ', line_end - hdbuf); -+ if(!end || end == hdbuf) -+ goto fail; -+ nva[0].name = H2H3_PSEUDO_METHOD; -+ nva[0].namelen = sizeof(H2H3_PSEUDO_METHOD) - 1; -+ nva[0].value = hdbuf; -+ nva[0].valuelen = (size_t)(end - hdbuf); -+ -+ hdbuf = end + 1; -+ -+ /* Path may contain spaces so scan backwards */ -+ end = NULL; -+ for(i = (size_t)(line_end - hdbuf); i; --i) { -+ if(hdbuf[i - 1] == ' ') { -+ end = &hdbuf[i - 1]; -+ break; -+ } -+ } -+ if(!end || end == hdbuf) -+ goto fail; -+ nva[1].name = H2H3_PSEUDO_PATH; -+ nva[1].namelen = sizeof(H2H3_PSEUDO_PATH) - 1; -+ nva[1].value = hdbuf; -+ nva[1].valuelen = (end - hdbuf); -+ -+ nva[2].name = H2H3_PSEUDO_SCHEME; -+ nva[2].namelen = sizeof(H2H3_PSEUDO_SCHEME) - 1; -+ vptr = Curl_checkheaders(data, STRCONST(H2H3_PSEUDO_SCHEME)); -+ if(vptr) { -+ vptr += sizeof(H2H3_PSEUDO_SCHEME); -+ while(*vptr && ISSPACE(*vptr)) -+ vptr++; -+ nva[2].value = vptr; -+ infof(data, "set pseudo header %s to %s", H2H3_PSEUDO_SCHEME, vptr); -+ } -+ else { -+ if(conn->handler->flags & PROTOPT_SSL) -+ nva[2].value = "https"; -+ else -+ nva[2].value = "http"; -+ } -+ nva[2].valuelen = strlen((char *)nva[2].value); -+ -+ authority_idx = 0; -+ i = 3; -+ while(i < nheader) { -+ size_t hlen; -+ -+ hdbuf = line_end + 2; -+ -+ /* check for next CR, but only within the piece of data left in the given -+ buffer */ -+ line_end = memchr(hdbuf, '\r', len - (hdbuf - (char *)mem)); -+ if(!line_end || (line_end == hdbuf)) -+ goto fail; -+ -+ /* header continuation lines are not supported */ -+ if(*hdbuf == ' ' || *hdbuf == '\t') -+ goto fail; -+ -+ for(end = hdbuf; end < line_end && *end != ':'; ++end) -+ ; -+ if(end == hdbuf || end == line_end) -+ goto fail; -+ hlen = end - hdbuf; -+ -+ if(hlen == 4 && strncasecompare("host", hdbuf, 4)) { -+ authority_idx = i; -+ nva[i].name = H2H3_PSEUDO_AUTHORITY; -+ nva[i].namelen = sizeof(H2H3_PSEUDO_AUTHORITY) - 1; -+ } -+ else { -+ nva[i].namelen = (size_t)(end - hdbuf); -+ /* Lower case the header name for HTTP/3 */ -+ Curl_strntolower((char *)hdbuf, hdbuf, nva[i].namelen); -+ nva[i].name = hdbuf; -+ } -+ hdbuf = end + 1; -+ while(*hdbuf == ' ' || *hdbuf == '\t') -+ ++hdbuf; -+ end = line_end; -+ -+ switch(inspect_header((const char *)nva[i].name, nva[i].namelen, hdbuf, -+ end - hdbuf)) { -+ case HEADERINST_IGNORE: -+ /* skip header fields prohibited by HTTP/2 specification. */ -+ --nheader; -+ continue; -+ case HEADERINST_TE_TRAILERS: -+ nva[i].value = "trailers"; -+ nva[i].valuelen = sizeof("trailers") - 1; -+ break; -+ default: -+ nva[i].value = hdbuf; -+ nva[i].valuelen = (end - hdbuf); -+ } -+ -+ ++i; -+ } -+ -+ /* :authority must come before non-pseudo header fields */ -+ if(authority_idx && authority_idx != AUTHORITY_DST_IDX) { -+ struct h2h3pseudo authority = nva[authority_idx]; -+ for(i = authority_idx; i > AUTHORITY_DST_IDX; --i) { -+ nva[i] = nva[i - 1]; -+ } -+ nva[i] = authority; -+ } -+ -+ /* Warn stream may be rejected if cumulative length of headers is too -+ large. */ -+#define MAX_ACC 60000 /* <64KB to account for some overhead */ -+ { -+ size_t acc = 0; -+ -+ for(i = 0; i < nheader; ++i) { -+ acc += nva[i].namelen + nva[i].valuelen; -+ -+ infof(data, "h2h3 [%.*s: %.*s]", -+ (int)nva[i].namelen, nva[i].name, -+ (int)nva[i].valuelen, nva[i].value); -+ } -+ -+ if(acc > MAX_ACC) { -+ infof(data, "http_request: Warning: The cumulative length of all " -+ "headers exceeds %d bytes and that could cause the " -+ "stream to be rejected.", MAX_ACC); -+ } -+ } -+ -+ hreq->entries = nheader; -+ *hp = hreq; -+ -+ return CURLE_OK; -+ -+ fail: -+ free(hreq); -+ return CURLE_OUT_OF_MEMORY; -+} -+ -+void Curl_pseudo_free(struct h2h3req *hp) -+{ -+ free(hp); -+} -+ -+#endif /* USE_NGHTTP2 or HTTP/3 enabled */ diff --git a/lib/http.c b/lib/http.c -index 219dcc2c0..a04214ff0 100644 +index 219dcc2c0..19ddd1d36 100644 --- a/lib/http.c +++ b/lib/http.c @@ -89,6 +89,7 @@ @@ -893,8 +628,47 @@ index 219dcc2c0..a04214ff0 100644 result = Curl_http_host(data, conn); if(result) return result; +@@ -4818,23 +4935,32 @@ CURLcode Curl_http_req_to_h2(struct dynhds *h2_headers, + Curl_dynhds_set_opts(h2_headers, DYNHDS_OPT_LOWERCASE); + result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_METHOD), + req->method, strlen(req->method)); +- if(!result && scheme) { +- result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_SCHEME), +- scheme, strlen(scheme)); ++ if(!result && req->path) { ++ result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_PATH), ++ req->path, strlen(req->path)); + } + if(!result && authority) { + result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_AUTHORITY), + authority, strlen(authority)); + } +- if(!result && req->path) { +- result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_PATH), +- req->path, strlen(req->path)); ++ if(!result && scheme) { ++ result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_SCHEME), ++ scheme, strlen(scheme)); + } + for(i = 0; !result && i < Curl_dynhds_count(&req->headers); ++i) { + e = Curl_dynhds_getn(&req->headers, i); + if(!h2_non_field(e->name, e->namelen)) { ++ /* curl-impersonate: ++ * Some HTTP/2 servers reject 'te' header value that is not lowercase (e.g. 'Trailers). ++ * Convert to lowercase explicitly. ++ */ ++ if(e->namelen == 2 && strcasecompare(e->name, "te")) ++ Curl_dynhds_set_opt(h2_headers, DYNHDS_OPT_LOWERCASE_VAL); ++ + result = Curl_dynhds_add(h2_headers, e->name, e->namelen, + e->value, e->valuelen); ++ ++ Curl_dynhds_del_opt(h2_headers, DYNHDS_OPT_LOWERCASE_VAL); + } + } + diff --git a/lib/http2.c b/lib/http2.c -index c666192fc..78c329d1b 100644 +index c666192fc..e926eb3f5 100644 --- a/lib/http2.c +++ b/lib/http2.c @@ -63,12 +63,13 @@ @@ -1032,7 +806,35 @@ index c666192fc..78c329d1b 100644 /* all set, traffic will be send on connect */ result = CURLE_OK; -@@ -1636,12 +1714,18 @@ static int sweight_in_effect(const struct Curl_easy *data) +@@ -1616,18 +1694,25 @@ out: + return rv; + } + ++ ++/* ++ * curl-impersonate: Set the HTTP/2 stream weight to the one used by Firefox ++ * by default to fetch html resources. ++ */ ++#define FIREFOX_DEFAULT_STREAM_WEIGHT (42) ++ + static int sweight_wanted(const struct Curl_easy *data) + { + /* 0 weight is not set by user and we take the nghttp2 default one */ + return data->set.priority.weight? +- data->set.priority.weight : NGHTTP2_DEFAULT_WEIGHT; ++ data->set.priority.weight : FIREFOX_DEFAULT_STREAM_WEIGHT; + } + + static int sweight_in_effect(const struct Curl_easy *data) + { + /* 0 weight is not set by user and we take the nghttp2 default one */ + return data->state.priority.weight? +- data->state.priority.weight : NGHTTP2_DEFAULT_WEIGHT; ++ data->state.priority.weight : FIREFOX_DEFAULT_STREAM_WEIGHT; + } + + /* +@@ -1636,12 +1721,18 @@ static int sweight_in_effect(const struct Curl_easy *data) * struct. */ @@ -1552,6 +1354,19 @@ index 5e5dbb744..2604fc755 100644 if(!conn_config->verifypeer && conn_config->verifyhost) infof(data, "WARNING: ignoring value of ssl.verifyhost"); +diff --git a/lib/vtls/vtls.c b/lib/vtls/vtls.c +index 32334016b..a9b6299b2 100644 +--- a/lib/vtls/vtls.c ++++ b/lib/vtls/vtls.c +@@ -139,7 +139,7 @@ static const struct alpn_spec ALPN_SPEC_H11 = { + }; + #ifdef USE_HTTP2 + static const struct alpn_spec ALPN_SPEC_H2_H11 = { +- { ALPN_H2, ALPN_HTTP_1_1 }, 2 ++ { ALPN_HTTP_1_1, ALPN_H2 }, 2 + }; + #endif + diff --git a/libcurl.pc.in b/libcurl.pc.in index 9db6b0f89..9e2f19af9 100644 --- a/libcurl.pc.in