From 0c2fc6ce9ba6085c1154d2b3b92198b6df6d882b Mon Sep 17 00:00:00 2001 From: lwthiker Date: Sat, 2 Sep 2023 15:29:53 +0300 Subject: [PATCH] Fix HTTP/2 handling in the Chrome version --- chrome/patches/curl-impersonate.patch | 545 ++++++++------------------ 1 file changed, 163 insertions(+), 382 deletions(-) diff --git a/chrome/patches/curl-impersonate.patch b/chrome/patches/curl-impersonate.patch index 4fbc3a0..6a0f468 100644 --- a/chrome/patches/curl-impersonate.patch +++ b/chrome/patches/curl-impersonate.patch @@ -250,6 +250,57 @@ index f815170a7..9d9417edc 100644 inet_ntop.c \ inet_pton.c \ krb5.c \ +diff --git a/lib/dynhds.c b/lib/dynhds.c +index b325e0060..4c8a73bab 100644 +--- a/lib/dynhds.c ++++ b/lib/dynhds.c +@@ -52,6 +52,8 @@ entry_new(const char *name, size_t namelen, + e->valuelen = valuelen; + if(opts & DYNHDS_OPT_LOWERCASE) + Curl_strntolower(e->name, e->name, e->namelen); ++ if(opts & DYNHDS_OPT_LOWERCASE_VAL) ++ Curl_strntolower(e->value, e->value, e->valuelen); + return e; + } + +@@ -134,6 +136,16 @@ void Curl_dynhds_set_opts(struct dynhds *dynhds, int opts) + dynhds->opts = opts; + } + ++void Curl_dynhds_set_opt(struct dynhds *dynhds, int opt) ++{ ++ dynhds->opts |= opt; ++} ++ ++void Curl_dynhds_del_opt(struct dynhds *dynhds, int opt) ++{ ++ dynhds->opts &= ~opt; ++} ++ + struct dynhds_entry *Curl_dynhds_getn(struct dynhds *dynhds, size_t n) + { + DEBUGASSERT(dynhds); +diff --git a/lib/dynhds.h b/lib/dynhds.h +index 777baa58a..2d542dfd6 100644 +--- a/lib/dynhds.h ++++ b/lib/dynhds.h +@@ -53,6 +53,7 @@ struct dynhds { + + #define DYNHDS_OPT_NONE (0) + #define DYNHDS_OPT_LOWERCASE (1 << 0) ++#define DYNHDS_OPT_LOWERCASE_VAL (1 << 1) + + /** + * Init for use on first time or after a reset. +@@ -82,6 +83,8 @@ size_t Curl_dynhds_count(struct dynhds *dynhds); + * This will not have an effect on already existing headers. + */ + void Curl_dynhds_set_opts(struct dynhds *dynhds, int opts); ++void Curl_dynhds_set_opt(struct dynhds *dynhds, int opt); ++void Curl_dynhds_del_opt(struct dynhds *dynhds, int opt); + + /** + * Return the n-th header entry or NULL if it does not exist. diff --git a/lib/easy.c b/lib/easy.c index d36cc03d1..65e94f1ad 100644 --- a/lib/easy.c @@ -534,389 +585,8 @@ index a9c1efd00..136a43e22 100644 + return ((CURLOPT_LASTENTRY%10000) != (330 + 1)); } #endif -diff --git a/lib/h2h3.c b/lib/h2h3.c -new file mode 100644 -index 000000000..01f8918ea ---- /dev/null -+++ b/lib/h2h3.c -@@ -0,0 +1,375 @@ -+/*************************************************************************** -+ * _ _ ____ _ -+ * Project ___| | | | _ \| | -+ * / __| | | | |_) | | -+ * | (__| |_| | _ <| |___ -+ * \___|\___/|_| \_\_____| -+ * -+ * Copyright (C) 1998 - 2022, Daniel Stenberg, , et al. -+ * -+ * This software is licensed as described in the file COPYING, which -+ * you should have received as part of this distribution. The terms -+ * are also available at https://curl.se/docs/copyright.html. -+ * -+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell -+ * copies of the Software, and permit persons to whom the Software is -+ * furnished to do so, under the terms of the COPYING file. -+ * -+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY -+ * KIND, either express or implied. -+ * -+ * SPDX-License-Identifier: curl -+ * -+ ***************************************************************************/ -+ -+#include "curl_setup.h" -+#include "urldata.h" -+#include "h2h3.h" -+#include "transfer.h" -+#include "sendf.h" -+#include "strcase.h" -+ -+/* The last 3 #include files should be in this order */ -+#include "curl_printf.h" -+#include "curl_memory.h" -+#include "memdebug.h" -+ -+/* -+ * Curl_pseudo_headers() creates the array with pseudo headers to be -+ * used in a HTTP/2 or HTTP/3 request. -+ */ -+ -+#if defined(USE_NGHTTP2) || defined(ENABLE_QUIC) -+ -+/* USHRT_MAX is 65535 == 0xffff */ -+#define HEADER_OVERFLOW(x) \ -+ (x.namelen > 0xffff || x.valuelen > 0xffff - x.namelen) -+ -+/* -+ * Check header memory for the token "trailers". -+ * Parse the tokens as separated by comma and surrounded by whitespace. -+ * Returns TRUE if found or FALSE if not. -+ */ -+static bool contains_trailers(const char *p, size_t len) -+{ -+ const char *end = p + len; -+ for(;;) { -+ for(; p != end && (*p == ' ' || *p == '\t'); ++p) -+ ; -+ if(p == end || (size_t)(end - p) < sizeof("trailers") - 1) -+ return FALSE; -+ if(strncasecompare("trailers", p, sizeof("trailers") - 1)) { -+ p += sizeof("trailers") - 1; -+ for(; p != end && (*p == ' ' || *p == '\t'); ++p) -+ ; -+ if(p == end || *p == ',') -+ return TRUE; -+ } -+ /* skip to next token */ -+ for(; p != end && *p != ','; ++p) -+ ; -+ if(p == end) -+ return FALSE; -+ ++p; -+ } -+} -+ -+typedef enum { -+ /* Send header to server */ -+ HEADERINST_FORWARD, -+ /* Don't send header to server */ -+ HEADERINST_IGNORE, -+ /* Discard header, and replace it with "te: trailers" */ -+ HEADERINST_TE_TRAILERS -+} header_instruction; -+ -+/* Decides how to treat given header field. */ -+static header_instruction inspect_header(const char *name, size_t namelen, -+ const char *value, size_t valuelen) { -+ switch(namelen) { -+ case 2: -+ if(!strncasecompare("te", name, namelen)) -+ return HEADERINST_FORWARD; -+ -+ return contains_trailers(value, valuelen) ? -+ HEADERINST_TE_TRAILERS : HEADERINST_IGNORE; -+ case 7: -+ return strncasecompare("upgrade", name, namelen) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ case 10: -+ return (strncasecompare("connection", name, namelen) || -+ strncasecompare("keep-alive", name, namelen)) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ case 16: -+ return strncasecompare("proxy-connection", name, namelen) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ case 17: -+ return strncasecompare("transfer-encoding", name, namelen) ? -+ HEADERINST_IGNORE : HEADERINST_FORWARD; -+ default: -+ return HEADERINST_FORWARD; -+ } -+} -+ -+/* -+ * curl-impersonate: -+ * Determine the position of HTTP/2 pseudo headers. -+ * The pseudo headers ":method", ":path", ":scheme", ":authority" -+ * are sent in different order by different browsers. An important part of the -+ * impersonation is ordering them like the browser does. -+ */ -+static int http2_pseudo_header_index(struct Curl_easy *data, -+ const char *header, -+ size_t *index) -+{ -+ char *off; -+ // Use the Chrome ordering by default: -+ // :method, :authority, :scheme, :path -+ char *order = "masp"; -+ if(data->set.str[STRING_HTTP2_PSEUDO_HEADERS_ORDER]) -+ order = data->set.str[STRING_HTTP2_PSEUDO_HEADERS_ORDER]; -+ -+ if(strlen(order) != 4) -+ return CURLE_BAD_FUNCTION_ARGUMENT; -+ -+ // :method should always be first -+ if(order[0] != 'm') -+ return CURLE_BAD_FUNCTION_ARGUMENT; -+ -+ // All pseudo-headers must be present -+ if(!strchr(order, 'm') || -+ !strchr(order, 'a') || -+ !strchr(order, 's') || -+ !strchr(order, 'p')) -+ return CURLE_BAD_FUNCTION_ARGUMENT; -+ -+ if(strcasecompare(header, ":method")) -+ off = strchr(order, 'm'); -+ else if(strcasecompare(header, ":authority")) -+ off = strchr(order, 'a'); -+ else if(strcasecompare(header, ":scheme")) -+ off = strchr(order, 's'); -+ else if(strcasecompare(header, ":path")) -+ off = strchr(order, 'p'); -+ else -+ return CURLE_BAD_FUNCTION_ARGUMENT; -+ -+ *index = off - order; -+ return CURLE_OK; -+} -+ -+CURLcode Curl_pseudo_headers(struct Curl_easy *data, -+ const char *mem, /* the request */ -+ const size_t len /* size of request */, -+ struct h2h3req **hp) -+{ -+ struct connectdata *conn = data->conn; -+ size_t nheader = 0; -+ size_t i; -+ size_t header_idx; -+ size_t authority_idx; -+ char *hdbuf = (char *)mem; -+ char *end, *line_end; -+ struct h2h3pseudo *nva = NULL; -+ struct h2h3req *hreq = NULL; -+ char *vptr; -+ -+ /* Calculate number of headers contained in [mem, mem + len). Assumes a -+ correctly generated HTTP header field block. */ -+ for(i = 1; i < len; ++i) { -+ if(hdbuf[i] == '\n' && hdbuf[i - 1] == '\r') { -+ ++nheader; -+ ++i; -+ } -+ } -+ if(nheader < 2) { -+ goto fail; -+ } -+ /* We counted additional 2 \r\n in the first and last line. We need 3 -+ new headers: :method, :path and :scheme. Therefore we need one -+ more space. */ -+ nheader += 1; -+ hreq = malloc(sizeof(struct h2h3req) + -+ sizeof(struct h2h3pseudo) * (nheader - 1)); -+ if(!hreq) { -+ goto fail; -+ } -+ -+ nva = &hreq->header[0]; -+ -+ /* Extract :method, :path from request line -+ We do line endings with CRLF so checking for CR is enough */ -+ line_end = memchr(hdbuf, '\r', len); -+ if(!line_end) { -+ goto fail; -+ } -+ -+ /* Method does not contain spaces */ -+ end = memchr(hdbuf, ' ', line_end - hdbuf); -+ if(!end || end == hdbuf) -+ goto fail; -+ /* curl-impersonate: Set the index of ":method" based on libcurl option */ -+ if(http2_pseudo_header_index(data, ":authority", &authority_idx)) -+ goto fail; -+ if(http2_pseudo_header_index(data, ":method", &header_idx)) -+ goto fail; -+ /* This is needed to overcome the fact that curl will only move the authority -+ * header into its place after all other headers have been placed. */ -+ if(header_idx > authority_idx) -+ header_idx--; -+ nva[header_idx].name = H2H3_PSEUDO_METHOD; -+ nva[header_idx].namelen = sizeof(H2H3_PSEUDO_METHOD) - 1; -+ nva[header_idx].value = hdbuf; -+ nva[header_idx].valuelen = (size_t)(end - hdbuf); -+ -+ hdbuf = end + 1; -+ -+ /* Path may contain spaces so scan backwards */ -+ end = NULL; -+ for(i = (size_t)(line_end - hdbuf); i; --i) { -+ if(hdbuf[i - 1] == ' ') { -+ end = &hdbuf[i - 1]; -+ break; -+ } -+ } -+ if(!end || end == hdbuf) -+ goto fail; -+ /* curl-impersonate: Set the index of ":path" based on libcurl option */ -+ if(http2_pseudo_header_index(data, ":path", &header_idx)) -+ goto fail; -+ if(header_idx > authority_idx) -+ header_idx--; -+ nva[header_idx].name = H2H3_PSEUDO_PATH; -+ nva[header_idx].namelen = sizeof(H2H3_PSEUDO_PATH) - 1; -+ nva[header_idx].value = hdbuf; -+ nva[header_idx].valuelen = (end - hdbuf); -+ -+ /* curl-impersonate: Set the index of ":scheme" based on libcurl option */ -+ if(http2_pseudo_header_index(data, ":scheme", &header_idx)) -+ goto fail; -+ if(header_idx > authority_idx) -+ header_idx--; -+ nva[header_idx].name = H2H3_PSEUDO_SCHEME; -+ nva[header_idx].namelen = sizeof(H2H3_PSEUDO_SCHEME) - 1; -+ vptr = Curl_checkheaders(data, STRCONST(H2H3_PSEUDO_SCHEME)); -+ if(vptr) { -+ vptr += sizeof(H2H3_PSEUDO_SCHEME); -+ while(*vptr && ISSPACE(*vptr)) -+ vptr++; -+ nva[header_idx].value = vptr; -+ infof(data, "set pseudo header %s to %s", H2H3_PSEUDO_SCHEME, vptr); -+ } -+ else { -+ if(conn->handler->flags & PROTOPT_SSL) -+ nva[header_idx].value = "https"; -+ else -+ nva[header_idx].value = "http"; -+ } -+ nva[header_idx].valuelen = strlen((char *)nva[header_idx].value); -+ -+ authority_idx = 0; -+ i = 3; -+ while(i < nheader) { -+ size_t hlen; -+ -+ hdbuf = line_end + 2; -+ -+ /* check for next CR, but only within the piece of data left in the given -+ buffer */ -+ line_end = memchr(hdbuf, '\r', len - (hdbuf - (char *)mem)); -+ if(!line_end || (line_end == hdbuf)) -+ goto fail; -+ -+ /* header continuation lines are not supported */ -+ if(*hdbuf == ' ' || *hdbuf == '\t') -+ goto fail; -+ -+ for(end = hdbuf; end < line_end && *end != ':'; ++end) -+ ; -+ if(end == hdbuf || end == line_end) -+ goto fail; -+ hlen = end - hdbuf; -+ -+ if(hlen == 4 && strncasecompare("host", hdbuf, 4)) { -+ authority_idx = i; -+ nva[i].name = H2H3_PSEUDO_AUTHORITY; -+ nva[i].namelen = sizeof(H2H3_PSEUDO_AUTHORITY) - 1; -+ } -+ else { -+ nva[i].namelen = (size_t)(end - hdbuf); -+ /* Lower case the header name for HTTP/3 */ -+ Curl_strntolower((char *)hdbuf, hdbuf, nva[i].namelen); -+ nva[i].name = hdbuf; -+ } -+ hdbuf = end + 1; -+ while(*hdbuf == ' ' || *hdbuf == '\t') -+ ++hdbuf; -+ end = line_end; -+ -+ switch(inspect_header((const char *)nva[i].name, nva[i].namelen, hdbuf, -+ end - hdbuf)) { -+ case HEADERINST_IGNORE: -+ /* skip header fields prohibited by HTTP/2 specification. */ -+ --nheader; -+ continue; -+ case HEADERINST_TE_TRAILERS: -+ nva[i].value = "trailers"; -+ nva[i].valuelen = sizeof("trailers") - 1; -+ break; -+ default: -+ nva[i].value = hdbuf; -+ nva[i].valuelen = (end - hdbuf); -+ } -+ -+ ++i; -+ } -+ -+ /* curl-impersonate: Set the index of ":authority" based on libcurl option */ -+ if(http2_pseudo_header_index(data, ":authority", &header_idx)) -+ goto fail; -+ /* :authority must come before non-pseudo header fields */ -+ if(authority_idx && authority_idx != header_idx) { -+ struct h2h3pseudo authority = nva[authority_idx]; -+ for(i = authority_idx; i > header_idx; --i) { -+ nva[i] = nva[i - 1]; -+ } -+ nva[i] = authority; -+ } -+ -+ /* Warn stream may be rejected if cumulative length of headers is too -+ large. */ -+#define MAX_ACC 60000 /* <64KB to account for some overhead */ -+ { -+ size_t acc = 0; -+ -+ for(i = 0; i < nheader; ++i) { -+ acc += nva[i].namelen + nva[i].valuelen; -+ -+ infof(data, "h2h3 [%.*s: %.*s]", -+ (int)nva[i].namelen, nva[i].name, -+ (int)nva[i].valuelen, nva[i].value); -+ } -+ -+ if(acc > MAX_ACC) { -+ infof(data, "http_request: Warning: The cumulative length of all " -+ "headers exceeds %d bytes and that could cause the " -+ "stream to be rejected.", MAX_ACC); -+ } -+ } -+ -+ hreq->entries = nheader; -+ *hp = hreq; -+ -+ return CURLE_OK; -+ -+ fail: -+ free(hreq); -+ return CURLE_OUT_OF_MEMORY; -+} -+ -+void Curl_pseudo_free(struct h2h3req *hp) -+{ -+ free(hp); -+} -+ -+#endif /* USE_NGHTTP2 or HTTP/3 enabled */ diff --git a/lib/http.c b/lib/http.c -index 219dcc2c0..b2c169d14 100644 +index 219dcc2c0..7b04c6c36 100644 --- a/lib/http.c +++ b/lib/http.c @@ -90,6 +90,7 @@ @@ -1092,6 +762,117 @@ index 219dcc2c0..b2c169d14 100644 result = Curl_http_host(data, conn); if(result) return result; +@@ -4777,12 +4894,41 @@ static bool h2_non_field(const char *name, size_t namelen) + return FALSE; + } + ++/* ++ * curl-impersonate: ++ * Determine the position of HTTP/2 pseudo headers. ++ * The pseudo headers ":method", ":path", ":scheme", ":authority" ++ * are sent in different order by different browsers. An important part of the ++ * impersonation is ordering them like the browser does. ++ */ ++static CURLcode h2_check_pseudo_header_order(const char *order) ++{ ++ if(strlen(order) != 4) ++ return CURLE_BAD_FUNCTION_ARGUMENT; ++ ++ // :method should always be first ++ if(order[0] != 'm') ++ return CURLE_BAD_FUNCTION_ARGUMENT; ++ ++ // All pseudo-headers must be present ++ if(!strchr(order, 'm') || ++ !strchr(order, 'a') || ++ !strchr(order, 's') || ++ !strchr(order, 'p')) ++ return CURLE_BAD_FUNCTION_ARGUMENT; ++ ++ return CURLE_OK; ++} ++ + CURLcode Curl_http_req_to_h2(struct dynhds *h2_headers, + struct httpreq *req, struct Curl_easy *data) + { + const char *scheme = NULL, *authority = NULL; + struct dynhds_entry *e; + size_t i; ++ // Use the Chrome ordering by default: ++ // :method, :authority, :scheme, :path ++ char *order = "masp"; + CURLcode result; + + DEBUGASSERT(req); +@@ -4816,25 +4962,56 @@ CURLcode Curl_http_req_to_h2(struct dynhds *h2_headers, + + Curl_dynhds_reset(h2_headers); + Curl_dynhds_set_opts(h2_headers, DYNHDS_OPT_LOWERCASE); +- result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_METHOD), +- req->method, strlen(req->method)); +- if(!result && scheme) { +- result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_SCHEME), +- scheme, strlen(scheme)); +- } +- if(!result && authority) { +- result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_AUTHORITY), +- authority, strlen(authority)); ++ ++ /* curl-impersonate: order of pseudo headers is different from the default */ ++ if(data->set.str[STRING_HTTP2_PSEUDO_HEADERS_ORDER]) { ++ order = data->set.str[STRING_HTTP2_PSEUDO_HEADERS_ORDER]; + } +- if(!result && req->path) { +- result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_PATH), +- req->path, strlen(req->path)); ++ ++ result = h2_check_pseudo_header_order(order); ++ ++ /* curl-impersonate: add http2 pseudo headers according to the specified order. */ ++ for(i = 0; !result && i < strlen(order); ++i) { ++ switch(order[i]) { ++ case 'm': ++ result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_METHOD), ++ req->method, strlen(req->method)); ++ break; ++ case 'a': ++ if(authority) { ++ result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_AUTHORITY), ++ authority, strlen(authority)); ++ } ++ break; ++ case 's': ++ if(scheme) { ++ result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_SCHEME), ++ scheme, strlen(scheme)); ++ } ++ break; ++ case 'p': ++ if(req->path) { ++ result = Curl_dynhds_add(h2_headers, STRCONST(HTTP_PSEUDO_PATH), ++ req->path, strlen(req->path)); ++ } ++ break; ++ } + } ++ + for(i = 0; !result && i < Curl_dynhds_count(&req->headers); ++i) { + e = Curl_dynhds_getn(&req->headers, i); + if(!h2_non_field(e->name, e->namelen)) { ++ /* curl-impersonate: ++ * Some HTTP/2 servers reject 'te' header value that is not lowercase (e.g. 'Trailers). ++ * Convert to lowercase explicitly. ++ */ ++ if(e->namelen == 2 && strcasecompare(e->name, "te")) ++ Curl_dynhds_set_opt(h2_headers, DYNHDS_OPT_LOWERCASE_VAL); ++ + result = Curl_dynhds_add(h2_headers, e->name, e->namelen, + e->value, e->valuelen); ++ ++ Curl_dynhds_del_opt(h2_headers, DYNHDS_OPT_LOWERCASE_VAL); + } + } + diff --git a/lib/http2.c b/lib/http2.c index c666192fc..f7a7697e3 100644 --- a/lib/http2.c