Don't ignore useragent from CURLOPT_USERAGENT

When impersonating with libcurl, the built-in user agent used for
impersonation was overriding and useragent the user had set via
CURLOPT_USERAGENT. The fix takes care to specifically handle user agent
set with CURLOPT_USERAGENT and use it instead of the built-in one when
it is supplied.
This commit is contained in:
lwthiker
2022-07-30 15:48:39 +03:00
parent 52d74d29ea
commit 17aff1501e
4 changed files with 218 additions and 8 deletions

View File

@@ -678,7 +678,7 @@ index 9453cf55b..01f8918ea 100644
}
nva[i] = authority;
diff --git a/lib/http.c b/lib/http.c
index 258722a60..b8990121d 100644
index 258722a60..9a06e281a 100644
--- a/lib/http.c
+++ b/lib/http.c
@@ -85,6 +85,7 @@
@@ -733,7 +733,7 @@ index 258722a60..b8990121d 100644
#endif
/* loop through one or two lists */
@@ -2069,6 +2079,92 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn,
@@ -2069,6 +2079,108 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn,
*reqp = httpreq;
}
@@ -753,6 +753,7 @@ index 258722a60..b8990121d 100644
+ struct curl_slist *head;
+ struct curl_slist *dup = NULL;
+ struct curl_slist *new_list = NULL;
+ char *uagent;
+
+ if (!data->state.base_headers)
+ return CURLE_OK;
@@ -789,6 +790,21 @@ index 258722a60..b8990121d 100644
+ }
+ }
+
+ /* If the user agent was set with CURLOPT_USERAGENT, but not with
+ * CURLOPT_HTTPHEADER, take it from there instead. */
+ if(!found &&
+ strncasecompare(head->data, "User-Agent", prefix_len) &&
+ data->set.str[STRING_USERAGENT] &&
+ *data->set.str[STRING_USERAGENT]) {
+ uagent = aprintf("User-Agent: %s", data->set.str[STRING_USERAGENT]);
+ if(!uagent) {
+ ret = CURLE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ new_list = Curl_slist_append_nodup(new_list, uagent);
+ found = TRUE;
+ }
+
+ if (!found) {
+ new_list = curl_slist_append(new_list, head->data);
+ }
@@ -826,7 +842,7 @@ index 258722a60..b8990121d 100644
CURLcode Curl_http_useragent(struct Curl_easy *data)
{
/* The User-Agent string might have been allocated in url.c already, because
@@ -3088,6 +3184,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done)
@@ -3088,6 +3200,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done)
http = data->req.p.http;
DEBUGASSERT(http);

View File

@@ -423,7 +423,7 @@ index 9453cf55b..ba9433cb9 100644
}
diff --git a/lib/http.c b/lib/http.c
index 258722a60..b8990121d 100644
index 258722a60..9a06e281a 100644
--- a/lib/http.c
+++ b/lib/http.c
@@ -85,6 +85,7 @@
@@ -478,7 +478,7 @@ index 258722a60..b8990121d 100644
#endif
/* loop through one or two lists */
@@ -2069,6 +2079,92 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn,
@@ -2069,6 +2079,108 @@ void Curl_http_method(struct Curl_easy *data, struct connectdata *conn,
*reqp = httpreq;
}
@@ -498,6 +498,7 @@ index 258722a60..b8990121d 100644
+ struct curl_slist *head;
+ struct curl_slist *dup = NULL;
+ struct curl_slist *new_list = NULL;
+ char *uagent;
+
+ if (!data->state.base_headers)
+ return CURLE_OK;
@@ -534,6 +535,21 @@ index 258722a60..b8990121d 100644
+ }
+ }
+
+ /* If the user agent was set with CURLOPT_USERAGENT, but not with
+ * CURLOPT_HTTPHEADER, take it from there instead. */
+ if(!found &&
+ strncasecompare(head->data, "User-Agent", prefix_len) &&
+ data->set.str[STRING_USERAGENT] &&
+ *data->set.str[STRING_USERAGENT]) {
+ uagent = aprintf("User-Agent: %s", data->set.str[STRING_USERAGENT]);
+ if(!uagent) {
+ ret = CURLE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ new_list = Curl_slist_append_nodup(new_list, uagent);
+ found = TRUE;
+ }
+
+ if (!found) {
+ new_list = curl_slist_append(new_list, head->data);
+ }
@@ -571,7 +587,7 @@ index 258722a60..b8990121d 100644
CURLcode Curl_http_useragent(struct Curl_easy *data)
{
/* The User-Agent string might have been allocated in url.c already, because
@@ -3088,6 +3184,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done)
@@ -3088,6 +3200,11 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done)
http = data->req.p.http;
DEBUGASSERT(http);

View File

@@ -26,6 +26,7 @@ struct opts {
uint16_t local_port_end;
bool insecure;
char *urls[MAX_URLS];
char *user_agent;
struct curl_slist *headers;
};
@@ -76,15 +77,19 @@ int parse_opts(int argc, char **argv, struct opts *opts)
static struct option long_options[] = {
{"header", required_argument, NULL, 'H'},
{"local-port", required_argument, NULL, 'l'},
{"user-agent", required_argument, NULL, 'A'},
{0, 0, NULL, 0}
};
c = getopt_long(argc, argv, "o:kH:", long_options, &option_index);
c = getopt_long(argc, argv, "o:kH:A:", long_options, &option_index);
if (c == -1) {
break;
}
switch (c) {
case 'A':
opts->user_agent = optarg;
break;
case 'l':
r = parse_ports_range(optarg,
&opts->local_port_start,
@@ -185,6 +190,14 @@ int set_opts(CURL *curl, struct opts *opts, FILE *file)
}
}
if (opts->user_agent) {
c = curl_easy_setopt(curl, CURLOPT_USERAGENT, opts->user_agent);
if (c) {
fprintf(stderr, "curl_easy_setopt(CURLOPT_USERAGENT) failed\n");
return 1;
}
}
if (opts->headers) {
c = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, opts->headers);
if (c) {
@@ -246,7 +259,9 @@ int main(int argc, char *argv[])
c = curl_easy_perform(curl);
if (c) {
fprintf(stderr, "curl_easy_perform() failed\n");
fprintf(stderr,
"curl_easy_perform() failed: %d (%s)\n",
c, curl_easy_strerror(c));
goto out;
}

View File

@@ -701,3 +701,166 @@ class TestImpersonation:
_, output_headers = self._parse_nghttpd2_output(output)
for i, header in enumerate(output_headers):
assert header.lower() == headers[i].lower()
@pytest.mark.parametrize(
"curl_binary, env_vars, ld_preload",
[
(
"minicurl",
{
"CURL_IMPERSONATE": "chrome101"
},
"libcurl-impersonate-chrome",
),
(
"minicurl",
{
"CURL_IMPERSONATE": "chrome101",
"CURL_IMPERSONATE_HEADERS": "no"
},
"libcurl-impersonate-chrome",
),
(
"minicurl",
{
"CURL_IMPERSONATE": "ff102"
},
"libcurl-impersonate-ff",
),
(
"minicurl",
{
"CURL_IMPERSONATE": "ff102",
"CURL_IMPERSONATE_HEADERS": "no"
},
"libcurl-impersonate-ff",
)
]
)
async def test_user_agent(
self,
pytestconfig,
nghttpd,
curl_binary,
env_vars,
ld_preload
):
"""
Ensure that any user-agent set with CURLOPT_HTTPHEADER will override
the one set by libcurl-impersonate.
"""
curl_binary = os.path.join(
pytestconfig.getoption("install_dir"), "bin", curl_binary
)
if not sys.platform.startswith("linux"):
pytest.skip()
self._set_ld_preload(env_vars, os.path.join(
pytestconfig.getoption("install_dir"), "lib", ld_preload
))
user_agent = "My-User-Agent"
ret = self._run_curl(curl_binary,
env_vars=env_vars,
extra_args=[
"-k",
"-H",
f"User-Agent: {user_agent}"
],
urls=["https://localhost:8443"])
assert ret == 0
output = await self._read_proc_output(nghttpd, timeout=2)
assert len(output) > 0
_, headers = self._parse_nghttpd2_output(output)
assert any([
header.lower().startswith("user-agent:") for header in headers
])
for header in headers:
if header.lower().startswith("user-agent:"):
assert header[len("user-agent:"):].strip() == user_agent
@pytest.mark.parametrize(
"curl_binary, env_vars, ld_preload",
[
(
"minicurl",
{
"CURL_IMPERSONATE": "chrome101"
},
"libcurl-impersonate-chrome",
),
(
"minicurl",
{
"CURL_IMPERSONATE": "chrome101",
"CURL_IMPERSONATE_HEADERS": "no"
},
"libcurl-impersonate-chrome",
),
(
"minicurl",
{
"CURL_IMPERSONATE": "ff102"
},
"libcurl-impersonate-ff",
),
(
"minicurl",
{
"CURL_IMPERSONATE": "ff102",
"CURL_IMPERSONATE_HEADERS": "no"
},
"libcurl-impersonate-ff",
)
]
)
async def test_user_agent_curlopt_useragent(
self,
pytestconfig,
nghttpd,
curl_binary,
env_vars,
ld_preload
):
"""
Ensure that any user-agent set with CURLOPT_USERAGENT will override
the one set by libcurl-impersonate. See:
https://github.com/lwthiker/curl-impersonate/issues/51
"""
curl_binary = os.path.join(
pytestconfig.getoption("install_dir"), "bin", curl_binary
)
if not sys.platform.startswith("linux"):
pytest.skip()
self._set_ld_preload(env_vars, os.path.join(
pytestconfig.getoption("install_dir"), "lib", ld_preload
))
user_agent = "My-User-Agent"
ret = self._run_curl(curl_binary,
env_vars=env_vars,
extra_args=["-k", "-A", user_agent],
urls=["https://localhost:8443"])
assert ret == 0
output = await self._read_proc_output(nghttpd, timeout=2)
assert len(output) > 0
_, headers = self._parse_nghttpd2_output(output)
assert any([
header.lower().startswith("user-agent:") for header in headers
])
for header in headers:
if header.lower().startswith("user-agent:"):
assert header[len("user-agent:"):].strip() == user_agent