Merge pull request #94 from lwthiker/control_headers_from_env_var

Allow disabling built-in HTTP headers
This commit is contained in:
lwthiker
2022-07-30 11:19:47 +03:00
committed by GitHub
5 changed files with 242 additions and 75 deletions

View File

@@ -102,18 +102,23 @@ AUR packages are available to Archlinux users:
`libcurl-impersonate.so` is libcurl compiled with the same changes as the command line `curl-impersonate`.
It has an additional API function:
```c
CURLcode curl_easy_impersonate(struct Curl_easy *data, const char *target);
CURLcode curl_easy_impersonate(struct Curl_easy *data, const char *target,
int default_headers);
```
You can call it with the target names, e.g. `chrome101`, and it will internally set all the options and headers that are otherwise set by the wrapper scripts. Specifically it sets:
You can call it with the target names, e.g. `chrome101`, and it will internally set all the options and headers that are otherwise set by the wrapper scripts.
If `default_headers` is set to 0, the built-in list of HTTP headers will not be set, and the user is expected to provide them instead using the regular [`CURLOPT_HTTPHEADER`](https://curl.se/libcurl/c/CURLOPT_HTTPHEADER.html) libcurl option.
Calling the above function sets the following libcurl options:
* `CURLOPT_HTTP_VERSION`
* `CURLOPT_SSLVERSION`, `CURLOPT_SSL_CIPHER_LIST`, `CURLOPT_SSL_EC_CURVES`, `CURLOPT_SSL_ENABLE_NPN`, `CURLOPT_SSL_ENABLE_ALPN`
* `CURLOPT_HTTPBASEHEADER`, `CURLOPT_HTTP2_PSEUDO_HEADERS_ORDER` (non-standard HTTP options created for this project).
* `CURLOPT_HTTPBASEHEADER`, if `default_headers` is non-zero (this is a non-standard HTTP option created for this project).
* `CURLOPT_HTTP2_PSEUDO_HEADERS_ORDER` (non-standard HTTP/2 option created for this project).
* `CURLOPT_SSL_ENABLE_ALPS`, `CURLOPT_SSL_SIG_HASH_ALGS`, `CURLOPT_SSL_CERT_COMPRESSION`, `CURLOPT_SSL_ENABLE_TICKET` (non-standard TLS options created for this project).
Note that if you call `curl_easy_setopt()` later with one of the above it will override the options set by `curl_easy_impersonate()`.
### Using CURL_IMPERSONATE env var
*Experimental*: If your application uses `libcurl` already, you can replace the existing library at runtime with `LD_PRELOAD` (Linux only). You can then set the `CURL_IMPERSONATE` env var. For example:
If your application uses `libcurl` already, you can replace the existing library at runtime with `LD_PRELOAD` (Linux only). You can then set the `CURL_IMPERSONATE` env var. For example:
```bash
LD_PRELOAD=/path/to/libcurl-impersonate.so CURL_IMPERSONATE=chrome101 my_app
```
@@ -123,7 +128,12 @@ The `CURL_IMPERSONATE` env var has two effects:
This means that all the options needed for impersonation will be automatically set for any curl handle.
Note that the above will NOT WORK for `curl` itself because the curl tool overrides the TLS settings. Use the wrapper scripts instead.
If you need precise control over the HTTP headers, set `CURL_IMPERSONATE_HEADERS=no` to disable the built-in list of HTTP headers, then set them yourself with `curl_easy_setopt()`. For example:
```bash
LD_PRELOAD=/path/to/libcurl-impersonate.so CURL_IMPERSONATE=chrome101 CURL_IMPERSONATE_HEADERS=no my_app
```
Note that the `LD_PRELOAD` method will NOT WORK for `curl` itself because the curl tool overrides the TLS settings. Use the wrapper scripts instead.
## Contents

View File

@@ -125,10 +125,10 @@ index b00648e79..8f8f19799 100644
} CURLoption;
diff --git a/include/curl/easy.h b/include/curl/easy.h
index 9c7e63ada..d93353c69 100644
index 9c7e63ada..a3c54c4af 100644
--- a/include/curl/easy.h
+++ b/include/curl/easy.h
@@ -43,6 +43,15 @@ CURL_EXTERN CURLcode curl_easy_setopt(CURL *curl, CURLoption option, ...);
@@ -43,6 +43,16 @@ CURL_EXTERN CURLcode curl_easy_setopt(CURL *curl, CURLoption option, ...);
CURL_EXTERN CURLcode curl_easy_perform(CURL *curl);
CURL_EXTERN void curl_easy_cleanup(CURL *curl);
@@ -139,7 +139,8 @@ index 9c7e63ada..d93353c69 100644
+ * created as a separate API function and not just as another option to
+ * curl_easy_setopt().
+ */
+CURL_EXTERN CURLcode curl_easy_impersonate(CURL *curl, const char *target);
+CURL_EXTERN CURLcode curl_easy_impersonate(CURL *curl, const char *target,
+ int default_headers);
+
/*
* NAME curl_easy_getinfo()
@@ -237,7 +238,7 @@ index 9bd8e324b..bfd5e90e2 100644
inet_pton.c \
krb5.c \
diff --git a/lib/easy.c b/lib/easy.c
index 704a59df6..c3ee9ac97 100644
index 704a59df6..9cdbdf808 100644
--- a/lib/easy.c
+++ b/lib/easy.c
@@ -81,6 +81,8 @@
@@ -249,7 +250,7 @@ index 704a59df6..c3ee9ac97 100644
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@@ -332,6 +334,119 @@ CURLsslset curl_global_sslset(curl_sslbackend id, const char *name,
@@ -332,6 +334,122 @@ CURLsslset curl_global_sslset(curl_sslbackend id, const char *name,
return rc;
}
@@ -258,7 +259,8 @@ index 704a59df6..c3ee9ac97 100644
+ * Call curl_easy_setopt() with all the needed options as defined in the
+ * 'impersonations' array.
+ * */
+CURLcode curl_easy_impersonate(struct Curl_easy *data, const char *target)
+CURLcode curl_easy_impersonate(struct Curl_easy *data, const char *target,
+ int default_headers)
+{
+ int i;
+ int ret;
@@ -333,21 +335,23 @@ index 704a59df6..c3ee9ac97 100644
+ return ret;
+ }
+
+ /* Build a linked list out of the static array of headers. */
+ for(i = 0; i < IMPERSONATE_MAX_HEADERS; i++) {
+ if(opts->http_headers[i]) {
+ headers = curl_slist_append(headers, opts->http_headers[i]);
+ if(!headers) {
+ return CURLE_OUT_OF_MEMORY;
+ if(default_headers) {
+ /* Build a linked list out of the static array of headers. */
+ for(i = 0; i < IMPERSONATE_MAX_HEADERS; i++) {
+ if(opts->http_headers[i]) {
+ headers = curl_slist_append(headers, opts->http_headers[i]);
+ if(!headers) {
+ return CURLE_OUT_OF_MEMORY;
+ }
+ }
+ }
+ }
+
+ if(headers) {
+ ret = curl_easy_setopt(data, CURLOPT_HTTPBASEHEADER, headers);
+ curl_slist_free_all(headers);
+ if(ret)
+ return ret;
+ if(headers) {
+ ret = curl_easy_setopt(data, CURLOPT_HTTPBASEHEADER, headers);
+ curl_slist_free_all(headers);
+ if(ret)
+ return ret;
+ }
+ }
+
+ if(opts->http2_pseudo_headers_order) {
@@ -369,15 +373,16 @@ index 704a59df6..c3ee9ac97 100644
/*
* curl_easy_init() is the external interface to alloc, setup and init an
* easy handle that is returned. If anything goes wrong, NULL is returned.
@@ -340,6 +455,7 @@ struct Curl_easy *curl_easy_init(void)
@@ -340,6 +458,8 @@ struct Curl_easy *curl_easy_init(void)
{
CURLcode result;
struct Curl_easy *data;
+ char *target;
+ char *env_target;
+ char *env_headers;
/* Make sure we inited the global SSL stuff */
global_init_lock();
@@ -362,6 +478,22 @@ struct Curl_easy *curl_easy_init(void)
@@ -362,6 +482,29 @@ struct Curl_easy *curl_easy_init(void)
return NULL;
}
@@ -387,10 +392,17 @@ index 704a59df6..c3ee9ac97 100644
+ * This is a bit hacky but allows seamless integration of libcurl-impersonate
+ * without code modifications to the app.
+ */
+ target = curl_getenv("CURL_IMPERSONATE");
+ if(target) {
+ result = curl_easy_impersonate(data, target);
+ free(target);
+ env_target = curl_getenv("CURL_IMPERSONATE");
+ if(env_target) {
+ env_headers = curl_getenv("CURL_IMPERSONATE_HEADERS");
+ if(env_headers) {
+ result = curl_easy_impersonate(data, env_target,
+ !Curl_strcasecompare(env_headers, "no"));
+ free(env_headers);
+ } else {
+ result = curl_easy_impersonate(data, env_target, true);
+ }
+ free(env_target);
+ if(result) {
+ Curl_close(&data);
+ return NULL;
@@ -400,7 +412,7 @@ index 704a59df6..c3ee9ac97 100644
return data;
}
@@ -936,6 +1068,13 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
@@ -936,6 +1079,13 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
outcurl->state.referer_alloc = TRUE;
}
@@ -414,24 +426,36 @@ index 704a59df6..c3ee9ac97 100644
/* Reinitialize an SSL engine for the new handle
* note: the engine name has already been copied by dupset */
if(outcurl->set.str[STRING_SSL_ENGINE]) {
@@ -1025,6 +1164,8 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
@@ -1025,6 +1175,9 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
*/
void curl_easy_reset(struct Curl_easy *data)
{
+ char *target;
+ char *env_target;
+ char *env_headers;
+
Curl_free_request_state(data);
/* zero out UserDefined data: */
@@ -1049,6 +1190,12 @@ void curl_easy_reset(struct Curl_easy *data)
@@ -1049,6 +1202,23 @@ void curl_easy_reset(struct Curl_easy *data)
#if !defined(CURL_DISABLE_HTTP) && !defined(CURL_DISABLE_CRYPTO_AUTH)
Curl_http_auth_cleanup_digest(data);
#endif
+
+ target = curl_getenv("CURL_IMPERSONATE");
+ if(target) {
+ curl_easy_impersonate(data, target);
+ free(target);
+ /*
+ * curl-impersonate: Hook into curl_easy_reset() to set the required options
+ * from an environment variable, just like in curl_easy_init().
+ */
+ env_target = curl_getenv("CURL_IMPERSONATE");
+ if(env_target) {
+ env_headers = curl_getenv("CURL_IMPERSONATE_HEADERS");
+ if(env_headers) {
+ curl_easy_impersonate(data, env_target,
+ !Curl_strcasecompare(env_headers, "no"));
+ free(env_headers);
+ } else {
+ curl_easy_impersonate(data, env_target, true);
+ }
+ free(env_target);
+ }
}

View File

@@ -97,10 +97,10 @@ index b00648e79..eefa36f2e 100644
} CURLoption;
diff --git a/include/curl/easy.h b/include/curl/easy.h
index 9c7e63ada..d93353c69 100644
index 9c7e63ada..a3c54c4af 100644
--- a/include/curl/easy.h
+++ b/include/curl/easy.h
@@ -43,6 +43,15 @@ CURL_EXTERN CURLcode curl_easy_setopt(CURL *curl, CURLoption option, ...);
@@ -43,6 +43,16 @@ CURL_EXTERN CURLcode curl_easy_setopt(CURL *curl, CURLoption option, ...);
CURL_EXTERN CURLcode curl_easy_perform(CURL *curl);
CURL_EXTERN void curl_easy_cleanup(CURL *curl);
@@ -111,7 +111,8 @@ index 9c7e63ada..d93353c69 100644
+ * created as a separate API function and not just as another option to
+ * curl_easy_setopt().
+ */
+CURL_EXTERN CURLcode curl_easy_impersonate(CURL *curl, const char *target);
+CURL_EXTERN CURLcode curl_easy_impersonate(CURL *curl, const char *target,
+ int default_headers);
+
/*
* NAME curl_easy_getinfo()
@@ -209,7 +210,7 @@ index 9bd8e324b..bfd5e90e2 100644
inet_pton.c \
krb5.c \
diff --git a/lib/easy.c b/lib/easy.c
index 704a59df6..03d710757 100644
index 704a59df6..349d03933 100644
--- a/lib/easy.c
+++ b/lib/easy.c
@@ -81,6 +81,8 @@
@@ -221,7 +222,7 @@ index 704a59df6..03d710757 100644
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@@ -332,6 +334,73 @@ CURLsslset curl_global_sslset(curl_sslbackend id, const char *name,
@@ -332,6 +334,76 @@ CURLsslset curl_global_sslset(curl_sslbackend id, const char *name,
return rc;
}
@@ -230,7 +231,8 @@ index 704a59df6..03d710757 100644
+ * Call curl_easy_setopt() with all the needed options as defined in the
+ * 'impersonations' array.
+ * */
+CURLcode curl_easy_impersonate(struct Curl_easy *data, const char *target)
+CURLcode curl_easy_impersonate(struct Curl_easy *data, const char *target,
+ int default_headers)
+{
+ int i;
+ int ret;
@@ -267,21 +269,23 @@ index 704a59df6..03d710757 100644
+ return ret;
+ }
+
+ /* Build a linked list out of the static array of headers. */
+ for(i = 0; i < IMPERSONATE_MAX_HEADERS; i++) {
+ if(opts->http_headers[i]) {
+ headers = curl_slist_append(headers, opts->http_headers[i]);
+ if(!headers) {
+ return CURLE_OUT_OF_MEMORY;
+ if(default_headers) {
+ /* Build a linked list out of the static array of headers. */
+ for(i = 0; i < IMPERSONATE_MAX_HEADERS; i++) {
+ if(opts->http_headers[i]) {
+ headers = curl_slist_append(headers, opts->http_headers[i]);
+ if(!headers) {
+ return CURLE_OUT_OF_MEMORY;
+ }
+ }
+ }
+ }
+
+ if(headers) {
+ ret = curl_easy_setopt(data, CURLOPT_HTTPBASEHEADER, headers);
+ curl_slist_free_all(headers);
+ if(ret)
+ return ret;
+ if(headers) {
+ ret = curl_easy_setopt(data, CURLOPT_HTTPBASEHEADER, headers);
+ curl_slist_free_all(headers);
+ if(ret)
+ return ret;
+ }
+ }
+
+ /* Always enable all supported compressions. */
@@ -295,15 +299,16 @@ index 704a59df6..03d710757 100644
/*
* curl_easy_init() is the external interface to alloc, setup and init an
* easy handle that is returned. If anything goes wrong, NULL is returned.
@@ -340,6 +409,7 @@ struct Curl_easy *curl_easy_init(void)
@@ -340,6 +412,8 @@ struct Curl_easy *curl_easy_init(void)
{
CURLcode result;
struct Curl_easy *data;
+ char *target;
+ char *env_target;
+ char *env_headers;
/* Make sure we inited the global SSL stuff */
global_init_lock();
@@ -362,6 +432,22 @@ struct Curl_easy *curl_easy_init(void)
@@ -362,6 +436,29 @@ struct Curl_easy *curl_easy_init(void)
return NULL;
}
@@ -313,10 +318,17 @@ index 704a59df6..03d710757 100644
+ * This is a bit hacky but allows seamless integration of libcurl-impersonate
+ * without code modifications to the app.
+ */
+ target = curl_getenv("CURL_IMPERSONATE");
+ if(target) {
+ result = curl_easy_impersonate(data, target);
+ free(target);
+ env_target = curl_getenv("CURL_IMPERSONATE");
+ if(env_target) {
+ env_headers = curl_getenv("CURL_IMPERSONATE_HEADERS");
+ if(env_headers) {
+ result = curl_easy_impersonate(data, env_target,
+ !Curl_strcasecompare(env_headers, "no"));
+ free(env_headers);
+ } else {
+ result = curl_easy_impersonate(data, env_target, true);
+ }
+ free(env_target);
+ if(result) {
+ Curl_close(&data);
+ return NULL;
@@ -326,7 +338,7 @@ index 704a59df6..03d710757 100644
return data;
}
@@ -936,6 +1022,13 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
@@ -936,6 +1033,13 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
outcurl->state.referer_alloc = TRUE;
}
@@ -340,24 +352,36 @@ index 704a59df6..03d710757 100644
/* Reinitialize an SSL engine for the new handle
* note: the engine name has already been copied by dupset */
if(outcurl->set.str[STRING_SSL_ENGINE]) {
@@ -1025,6 +1118,8 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
@@ -1025,6 +1129,9 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
*/
void curl_easy_reset(struct Curl_easy *data)
{
+ char *target;
+ char *env_target;
+ char *env_headers;
+
Curl_free_request_state(data);
/* zero out UserDefined data: */
@@ -1049,6 +1144,12 @@ void curl_easy_reset(struct Curl_easy *data)
@@ -1049,6 +1156,23 @@ void curl_easy_reset(struct Curl_easy *data)
#if !defined(CURL_DISABLE_HTTP) && !defined(CURL_DISABLE_CRYPTO_AUTH)
Curl_http_auth_cleanup_digest(data);
#endif
+
+ target = curl_getenv("CURL_IMPERSONATE");
+ if(target) {
+ curl_easy_impersonate(data, target);
+ free(target);
+ /*
+ * curl-impersonate: Hook into curl_easy_reset() to set the required options
+ * from an environment variable, just like in curl_easy_init().
+ */
+ env_target = curl_getenv("CURL_IMPERSONATE");
+ if(env_target) {
+ env_headers = curl_getenv("CURL_IMPERSONATE_HEADERS");
+ if(env_headers) {
+ curl_easy_impersonate(data, env_target,
+ !Curl_strcasecompare(env_headers, "no"));
+ free(env_headers);
+ } else {
+ curl_easy_impersonate(data, env_target, true);
+ }
+ free(env_target);
+ }
}

View File

@@ -18,6 +18,7 @@
/* Support up to 16 URLs */
#define MAX_URLS 16
/* Command line options. */
struct opts {
char *outfile;
@@ -25,6 +26,7 @@ struct opts {
uint16_t local_port_end;
bool insecure;
char *urls[MAX_URLS];
struct curl_slist *headers;
};
int parse_ports_range(char *str, uint16_t *start, uint16_t *end)
@@ -63,6 +65,7 @@ int parse_opts(int argc, char **argv, struct opts *opts)
int c;
int r;
int i;
struct curl_slist *tmp;
memset(opts, 0, sizeof(*opts));
@@ -71,10 +74,12 @@ int parse_opts(int argc, char **argv, struct opts *opts)
while (1) {
int option_index = 0;
static struct option long_options[] = {
{"local-port", required_argument, NULL, 'l'}
{"header", required_argument, NULL, 'H'},
{"local-port", required_argument, NULL, 'l'},
{0, 0, NULL, 0}
};
c = getopt_long(argc, argv, "o:k", long_options, &option_index);
c = getopt_long(argc, argv, "o:kH:", long_options, &option_index);
if (c == -1) {
break;
}
@@ -94,16 +99,29 @@ int parse_opts(int argc, char **argv, struct opts *opts)
case 'k':
opts->insecure = true;
break;
case 'H':
tmp = curl_slist_append(opts->headers, optarg);
if (!tmp) {
fprintf(stderr, "curl_slist_append() failed\n");
if (opts->headers) {
curl_slist_free_all(opts->headers);
}
return 1;
}
opts->headers = tmp;
break;
case '?':
break;
}
}
/* No URL supplied. */
i = 0;
if (optind >= argc) {
return 1;
}
/* The rest of the options are URLs */
i = 0;
while (optind < argc) {
opts->urls[i++] = argv[optind++];
}
@@ -111,6 +129,13 @@ int parse_opts(int argc, char **argv, struct opts *opts)
return 0;
}
void clean_opts(struct opts *opts)
{
if (opts->headers) {
curl_slist_free_all(opts->headers);
}
}
/* Set all options except for the URL. */
int set_opts(CURL *curl, struct opts *opts, FILE *file)
{
@@ -160,6 +185,14 @@ int set_opts(CURL *curl, struct opts *opts, FILE *file)
}
}
if (opts->headers) {
c = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, opts->headers);
if (c) {
fprintf(stderr, "curl_easy_setopt(CURLOPT_HTTPHEADER) failed\n");
return 1;
}
}
return 0;
}
@@ -180,7 +213,8 @@ int main(int argc, char *argv[])
file = fopen(opts.outfile, "w");
if (!file) {
fprintf(stderr, "Failed opening %s for writing\n", opts.outfile);
exit(1);
c = 1;
goto out_clean_opts;
}
} else {
file = stdout;
@@ -231,5 +265,7 @@ out_close:
if (file) {
fclose(file);
}
out_clean_opts:
clean_opts(&opts);
return c;
}

View File

@@ -8,6 +8,7 @@ import logging
import pathlib
import subprocess
import tempfile
import itertools
from typing import List
import yaml
@@ -628,3 +629,75 @@ class TestImpersonation:
"<html>" in body or
"<!doctype html>" in body
)
@pytest.mark.parametrize(
"curl_binary, env_vars, ld_preload",
[
(
"minicurl",
{
"CURL_IMPERSONATE": "chrome101",
"CURL_IMPERSONATE_HEADERS": "no"
},
"libcurl-impersonate-chrome"
),
(
"minicurl",
{
"CURL_IMPERSONATE": "ff102",
"CURL_IMPERSONATE_HEADERS": "no"
},
"libcurl-impersonate-ff",
)
]
)
async def test_no_builtin_headers(self,
pytestconfig,
nghttpd,
curl_binary,
env_vars,
ld_preload):
"""
Ensure the built-in headers of libcurl-impersonate are not added when
the CURL_IMPERSONATE_HEADERS environment variable is set to "no".
"""
curl_binary = os.path.join(
pytestconfig.getoption("install_dir"), "bin", curl_binary
)
if not sys.platform.startswith("linux"):
pytest.skip()
self._set_ld_preload(env_vars, os.path.join(
pytestconfig.getoption("install_dir"), "lib", ld_preload
))
# Use some custom headers with a specific order.
# We will test that the headers are sent in the exact given order, as
# it is important for users to be able to control the exact headers
# content and order.
headers = [
"X-Hello: World",
"Accept: application/json",
"X-Goodbye: World",
"Accept-Encoding: deflate, gzip, br"
"X-Foo: Bar",
"User-Agent: curl-impersonate"
]
header_args = list(itertools.chain(*[
["-H", header]
for header in headers
]))
ret = self._run_curl(curl_binary,
env_vars=env_vars,
extra_args=["-k"] + header_args,
urls=["https://localhost:8443"])
assert ret == 0
output = await self._read_proc_output(nghttpd, timeout=2)
assert len(output) > 0
_, output_headers = self._parse_nghttpd2_output(output)
for i, header in enumerate(output_headers):
assert header.lower() == headers[i].lower()