From 17aff1501e77af91d86a2b182bb2ee75adb1c9fb Mon Sep 17 00:00:00 2001 From: lwthiker Date: Sat, 30 Jul 2022 15:48:39 +0300 Subject: [PATCH] Don't ignore useragent from CURLOPT_USERAGENT When impersonating with libcurl, the built-in user agent used for impersonation was overriding and useragent the user had set via CURLOPT_USERAGENT. The fix takes care to specifically handle user agent set with CURLOPT_USERAGENT and use it instead of the built-in one when it is supplied. --- chrome/patches/curl-impersonate.patch | 22 +++- firefox/patches/curl-impersonate.patch | 22 +++- tests/minicurl.c | 19 ++- tests/test_impersonate.py | 163 +++++++++++++++++++++++++ 4 files changed, 218 insertions(+), 8 deletions(-) diff --git a/chrome/patches/curl-impersonate.patch b/chrome/patches/curl-impersonate.patch index c20b21d..2f3a571 100644 --- a/chrome/patches/curl-impersonate.patch +++ b/chrome/patches/curl-impersonate.patch @@ -678,7 +678,7 @@ index 9453cf55b..01f8918ea 100644 } nva[i] = authority; diff --git a/lib/http.c b/lib/http.c -index 258722a60..b8990121d 100644 +index 258722a60..9a06e281a 100644 --- a/lib/http.c +++ b/lib/http.c @@ -85,6 +85,7 @@ @@ -733,7 +733,7 @@ index 258722a60..b8990121d 100644 #endif /* loop through one or two lists */ -@@ -2069,6 +2079,92 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn, +@@ -2069,6 +2079,108 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn, *reqp = httpreq; } @@ -753,6 +753,7 @@ index 258722a60..b8990121d 100644 + struct curl_slist *head; + struct curl_slist *dup = NULL; + struct curl_slist *new_list = NULL; ++ char *uagent; + + if (!data->state.base_headers) + return CURLE_OK; @@ -789,6 +790,21 @@ index 258722a60..b8990121d 100644 + } + } + ++ /* If the user agent was set with CURLOPT_USERAGENT, but not with ++ * CURLOPT_HTTPHEADER, take it from there instead. */ ++ if(!found && ++ strncasecompare(head->data, "User-Agent", prefix_len) && ++ data->set.str[STRING_USERAGENT] && ++ *data->set.str[STRING_USERAGENT]) { ++ uagent = aprintf("User-Agent: %s", data->set.str[STRING_USERAGENT]); ++ if(!uagent) { ++ ret = CURLE_OUT_OF_MEMORY; ++ goto fail; ++ } ++ new_list = Curl_slist_append_nodup(new_list, uagent); ++ found = TRUE; ++ } ++ + if (!found) { + new_list = curl_slist_append(new_list, head->data); + } @@ -826,7 +842,7 @@ index 258722a60..b8990121d 100644 CURLcode Curl_http_useragent(struct Curl_easy *data) { /* The User-Agent string might have been allocated in url.c already, because -@@ -3088,6 +3184,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done) +@@ -3088,6 +3200,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done) http = data->req.p.http; DEBUGASSERT(http); diff --git a/firefox/patches/curl-impersonate.patch b/firefox/patches/curl-impersonate.patch index d40e224..88091e3 100644 --- a/firefox/patches/curl-impersonate.patch +++ b/firefox/patches/curl-impersonate.patch @@ -423,7 +423,7 @@ index 9453cf55b..ba9433cb9 100644 } diff --git a/lib/http.c b/lib/http.c -index 258722a60..b8990121d 100644 +index 258722a60..9a06e281a 100644 --- a/lib/http.c +++ b/lib/http.c @@ -85,6 +85,7 @@ @@ -478,7 +478,7 @@ index 258722a60..b8990121d 100644 #endif /* loop through one or two lists */ -@@ -2069,6 +2079,92 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn, +@@ -2069,6 +2079,108 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn, *reqp = httpreq; } @@ -498,6 +498,7 @@ index 258722a60..b8990121d 100644 + struct curl_slist *head; + struct curl_slist *dup = NULL; + struct curl_slist *new_list = NULL; ++ char *uagent; + + if (!data->state.base_headers) + return CURLE_OK; @@ -534,6 +535,21 @@ index 258722a60..b8990121d 100644 + } + } + ++ /* If the user agent was set with CURLOPT_USERAGENT, but not with ++ * CURLOPT_HTTPHEADER, take it from there instead. */ ++ if(!found && ++ strncasecompare(head->data, "User-Agent", prefix_len) && ++ data->set.str[STRING_USERAGENT] && ++ *data->set.str[STRING_USERAGENT]) { ++ uagent = aprintf("User-Agent: %s", data->set.str[STRING_USERAGENT]); ++ if(!uagent) { ++ ret = CURLE_OUT_OF_MEMORY; ++ goto fail; ++ } ++ new_list = Curl_slist_append_nodup(new_list, uagent); ++ found = TRUE; ++ } ++ + if (!found) { + new_list = curl_slist_append(new_list, head->data); + } @@ -571,7 +587,7 @@ index 258722a60..b8990121d 100644 CURLcode Curl_http_useragent(struct Curl_easy *data) { /* The User-Agent string might have been allocated in url.c already, because -@@ -3088,6 +3184,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done) +@@ -3088,6 +3200,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done) http = data->req.p.http; DEBUGASSERT(http); diff --git a/tests/minicurl.c b/tests/minicurl.c index ac87fb9..fdce987 100644 --- a/tests/minicurl.c +++ b/tests/minicurl.c @@ -26,6 +26,7 @@ struct opts { uint16_t local_port_end; bool insecure; char *urls[MAX_URLS]; + char *user_agent; struct curl_slist *headers; }; @@ -76,15 +77,19 @@ int parse_opts(int argc, char **argv, struct opts *opts) static struct option long_options[] = { {"header", required_argument, NULL, 'H'}, {"local-port", required_argument, NULL, 'l'}, + {"user-agent", required_argument, NULL, 'A'}, {0, 0, NULL, 0} }; - c = getopt_long(argc, argv, "o:kH:", long_options, &option_index); + c = getopt_long(argc, argv, "o:kH:A:", long_options, &option_index); if (c == -1) { break; } switch (c) { + case 'A': + opts->user_agent = optarg; + break; case 'l': r = parse_ports_range(optarg, &opts->local_port_start, @@ -185,6 +190,14 @@ int set_opts(CURL *curl, struct opts *opts, FILE *file) } } + if (opts->user_agent) { + c = curl_easy_setopt(curl, CURLOPT_USERAGENT, opts->user_agent); + if (c) { + fprintf(stderr, "curl_easy_setopt(CURLOPT_USERAGENT) failed\n"); + return 1; + } + } + if (opts->headers) { c = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, opts->headers); if (c) { @@ -246,7 +259,9 @@ int main(int argc, char *argv[]) c = curl_easy_perform(curl); if (c) { - fprintf(stderr, "curl_easy_perform() failed\n"); + fprintf(stderr, + "curl_easy_perform() failed: %d (%s)\n", + c, curl_easy_strerror(c)); goto out; } diff --git a/tests/test_impersonate.py b/tests/test_impersonate.py index 9e0110e..753b180 100644 --- a/tests/test_impersonate.py +++ b/tests/test_impersonate.py @@ -701,3 +701,166 @@ class TestImpersonation: _, output_headers = self._parse_nghttpd2_output(output) for i, header in enumerate(output_headers): assert header.lower() == headers[i].lower() + + @pytest.mark.parametrize( + "curl_binary, env_vars, ld_preload", + [ + ( + "minicurl", + { + "CURL_IMPERSONATE": "chrome101" + }, + "libcurl-impersonate-chrome", + ), + ( + "minicurl", + { + "CURL_IMPERSONATE": "chrome101", + "CURL_IMPERSONATE_HEADERS": "no" + }, + "libcurl-impersonate-chrome", + ), + ( + "minicurl", + { + "CURL_IMPERSONATE": "ff102" + }, + "libcurl-impersonate-ff", + ), + ( + "minicurl", + { + "CURL_IMPERSONATE": "ff102", + "CURL_IMPERSONATE_HEADERS": "no" + }, + "libcurl-impersonate-ff", + ) + ] + ) + async def test_user_agent( + self, + pytestconfig, + nghttpd, + curl_binary, + env_vars, + ld_preload + ): + """ + Ensure that any user-agent set with CURLOPT_HTTPHEADER will override + the one set by libcurl-impersonate. + """ + curl_binary = os.path.join( + pytestconfig.getoption("install_dir"), "bin", curl_binary + ) + + if not sys.platform.startswith("linux"): + pytest.skip() + + self._set_ld_preload(env_vars, os.path.join( + pytestconfig.getoption("install_dir"), "lib", ld_preload + )) + + user_agent = "My-User-Agent" + + ret = self._run_curl(curl_binary, + env_vars=env_vars, + extra_args=[ + "-k", + "-H", + f"User-Agent: {user_agent}" + ], + urls=["https://localhost:8443"]) + assert ret == 0 + + output = await self._read_proc_output(nghttpd, timeout=2) + + assert len(output) > 0 + + _, headers = self._parse_nghttpd2_output(output) + assert any([ + header.lower().startswith("user-agent:") for header in headers + ]) + + for header in headers: + if header.lower().startswith("user-agent:"): + assert header[len("user-agent:"):].strip() == user_agent + + @pytest.mark.parametrize( + "curl_binary, env_vars, ld_preload", + [ + ( + "minicurl", + { + "CURL_IMPERSONATE": "chrome101" + }, + "libcurl-impersonate-chrome", + ), + ( + "minicurl", + { + "CURL_IMPERSONATE": "chrome101", + "CURL_IMPERSONATE_HEADERS": "no" + }, + "libcurl-impersonate-chrome", + ), + ( + "minicurl", + { + "CURL_IMPERSONATE": "ff102" + }, + "libcurl-impersonate-ff", + ), + ( + "minicurl", + { + "CURL_IMPERSONATE": "ff102", + "CURL_IMPERSONATE_HEADERS": "no" + }, + "libcurl-impersonate-ff", + ) + ] + ) + async def test_user_agent_curlopt_useragent( + self, + pytestconfig, + nghttpd, + curl_binary, + env_vars, + ld_preload + ): + """ + Ensure that any user-agent set with CURLOPT_USERAGENT will override + the one set by libcurl-impersonate. See: + https://github.com/lwthiker/curl-impersonate/issues/51 + """ + curl_binary = os.path.join( + pytestconfig.getoption("install_dir"), "bin", curl_binary + ) + + if not sys.platform.startswith("linux"): + pytest.skip() + + self._set_ld_preload(env_vars, os.path.join( + pytestconfig.getoption("install_dir"), "lib", ld_preload + )) + + user_agent = "My-User-Agent" + + ret = self._run_curl(curl_binary, + env_vars=env_vars, + extra_args=["-k", "-A", user_agent], + urls=["https://localhost:8443"]) + assert ret == 0 + + output = await self._read_proc_output(nghttpd, timeout=2) + + assert len(output) > 0 + + _, headers = self._parse_nghttpd2_output(output) + assert any([ + header.lower().startswith("user-agent:") for header in headers + ]) + + for header in headers: + if header.lower().startswith("user-agent:"): + assert header[len("user-agent:"):].strip() == user_agent