Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upload the package zip in chunks for GHA #1043

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
cd70f77
Upload the package zip in chunks for GHA.
quyykk Apr 28, 2023
7fa81d1
Merge remote-tracking branch 'origin/main' into HEAD
BillyONeal Apr 28, 2023
df0a632
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk May 14, 2023
db160c1
Fix merge conflicts.
quyykk May 14, 2023
5f343b1
Address review comments
quyykk May 14, 2023
905209b
Fix formatting
quyykk May 14, 2023
77e5c79
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Aug 30, 2023
68f9a6b
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Aug 30, 2023
41d09b3
Pass the data through stdin instead of spliting the archive on disk.
quyykk Aug 30, 2023
d025512
Fix warning.
quyykk Aug 30, 2023
126232c
Fix formatting.
quyykk Aug 30, 2023
18868f1
Send the correct range in the HTTP request.
quyykk Aug 30, 2023
a1bf8c6
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Sep 7, 2023
e6bf304
Some small cleanups.
quyykk Sep 7, 2023
6faa588
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Sep 8, 2023
8b15edd
Address review comments.
quyykk Sep 9, 2023
1150fed
Format :/
quyykk Sep 9, 2023
dbed226
Update src/vcpkg/base/downloads.cpp
quyykk Sep 9, 2023
ef45cb8
More fixes.
quyykk Sep 9, 2023
e8bd3a0
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Sep 14, 2023
a29b364
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Oct 8, 2023
6e27717
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Nov 11, 2023
710d38f
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Nov 19, 2023
78b6bf3
Merge remote-tracking branch 'upstream/main' into gha-upload-fixes
quyykk Mar 11, 2024
2568af0
Format fixes.
quyykk Mar 11, 2024
d397db3
Apply suggestions from code review. Thanks!
quyykk Mar 12, 2024
608a72b
Format fixes.
quyykk Mar 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions include/vcpkg/base/downloads.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@ namespace vcpkg
StringView url,
const std::vector<std::string>& secrets,
View<std::string> headers,
const Path& file,
StringView method = "PUT");
const Path& file);
ExpectedL<Unit> patch_file(const Filesystem& fs,
StringView url,
View<std::string> headers,
const Path& file,
std::size_t file_size,
std::size_t chunk_size = 450 * 1024 * 1024);

ExpectedL<std::string> invoke_http_request(StringView method,
View<std::string> headers,
Expand Down
68 changes: 63 additions & 5 deletions src/vcpkg/base/downloads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

namespace vcpkg
{
static constexpr StringLiteral guid_marker = "9a1db05f-a65d-419b-aa72-037fb4d0672e";

static std::string replace_secrets(std::string input, View<std::string> secrets)
{
const auto replacement = msg::format(msgSecretBanner);
Expand Down Expand Up @@ -378,7 +380,7 @@
std::vector<int>* out,
View<std::string> secrets)
{
static constexpr StringLiteral guid_marker = "8a1db05f-a65d-419b-aa72-037fb4d0672e";

Check failure on line 383 in src/vcpkg/base/downloads.cpp

View workflow job for this annotation

GitHub Actions / builds / build (windows-2022, windows-ci)

the following warning is treated as an error

Check warning on line 383 in src/vcpkg/base/downloads.cpp

View workflow job for this annotation

GitHub Actions / builds / build (windows-2022, windows-ci)

declaration of 'guid_marker' hides global declaration

const size_t start_size = out->size();

Expand Down Expand Up @@ -451,7 +453,7 @@
for (auto i : {100, 1000, 10000, 0})
{
size_t start_size = out->size();
static constexpr StringLiteral guid_marker = "5ec47b8e-6776-4d70-b9b3-ac2a57bc0a1c";

Check warning on line 456 in src/vcpkg/base/downloads.cpp

View workflow job for this annotation

GitHub Actions / builds / build (windows-2022, windows-ci)

declaration of 'guid_marker' hides global declaration

Command cmd;
cmd.string_arg("curl")
Expand Down Expand Up @@ -525,7 +527,7 @@
const std::string& github_repository,
const Json::Object& snapshot)
{
static constexpr StringLiteral guid_marker = "fcfad8a3-bb68-4a54-ad00-dab1ff671ed2";

Check warning on line 530 in src/vcpkg/base/downloads.cpp

View workflow job for this annotation

GitHub Actions / builds / build (windows-2022, windows-ci)

declaration of 'guid_marker' hides global declaration

Command cmd;
cmd.string_arg("curl");
Expand Down Expand Up @@ -569,11 +571,8 @@
StringView url,
const std::vector<std::string>& secrets,
View<std::string> headers,
const Path& file,
StringView method)
const Path& file)
{
static constexpr StringLiteral guid_marker = "9a1db05f-a65d-419b-aa72-037fb4d0672e";

if (Strings::starts_with(url, "ftp://"))
{
// HTTP headers are ignored for FTP clients
Expand All @@ -599,7 +598,7 @@
}

Command cmd;
cmd.string_arg("curl").string_arg("-X").string_arg(method);
cmd.string_arg("curl").string_arg("-X").string_arg("PUT");

for (auto&& header : headers)
{
Expand Down Expand Up @@ -629,6 +628,65 @@
return res;
}

ExpectedL<Unit> patch_file(const Filesystem& fs,
StringView url,
View<std::string> headers,
const Path& file,
std::size_t file_size,
std::size_t chunk_size)
{
Command base_cmd;
base_cmd.string_arg("curl").string_arg("-X").string_arg("PATCH").string_arg("-w").string_arg(
"\\n" + guid_marker.to_string() + "%{http_code}");
quyykk marked this conversation as resolved.
Show resolved Hide resolved
for (auto&& header : headers)
{
base_cmd.string_arg("-H").string_arg(header);
}
base_cmd.string_arg(url);

auto file_ptr = fs.open_for_read(file, VCPKG_LINE_INFO);
std::vector<char> buffer(chunk_size);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that the default size is 450 MB. And no limits.
Is there an alternative to reading it into a buffer first just to forward it to anothers command stdin?
(Remembering all those Raspi people which struggle to build vcpkg due to low memory...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original way was to split the file on disk, but that's pretty hacky I think.

But I can decrease the buffer size. I'm not sure what you mean with limit.

Are people really running a GitHub Runner server on a Raspi lmao 😄

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are people really running a GitHub Runner server on a Raspi lmao

Well, this is only the tool uploading the artifacts. Caching large artifacts is more important when build machine power is low.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay. What buffer size do you think I should use? I can't make it really small or else the upload will be way slower than it would otherwise be.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. I see the trade-offs and barriers.

  • Can't make curl read chunks directly from (within) a large file.
  • Can't feed (the vcpkg function running) curl with piecewise input. (IO buffer smaller than network chunks.)

Changing curl (tool) is out of scope here.
If the interface remains running curl instead of calling into libcurl, then it would be best to fix the second point.
If this is too intrusive, it might be helpful to have a way for the user to change the buffer size, or at least to turn of the buffering in case of trouble.

std::size_t bytes_read = 0;
for (std::size_t i = 0; i < file_size; i += bytes_read)
{
bytes_read = file_ptr.read(buffer.data(), sizeof(decltype(buffer)::value_type), chunk_size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a whole curl process launch per chunk like this is kind of a problem. I don't see reasonable ways to achieve the effect this PR wants without linking with libcurl.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose it could still be done like this but the chunk sizes would have to be bigger than make sense to denote as a single contiguous memory buffer; there should be more than one read / write per curl launch, etc.......

and that sounds like a lot more work than linking with libcurl.

if (!bytes_read)
{
return msg::format_error(
msgFileReadFailed, msg::path = file, msg::byte_offset = i, msg::count = chunk_size);
}

auto cmd = base_cmd;
cmd.string_arg("-H")
.string_arg(fmt::format("Content-Range: bytes {}-{}/{}", i, i + bytes_read - 1, file_size))
.string_arg("--data-binary")
.string_arg("@-");

int code = 0;
auto res = cmd_execute_and_stream_lines(
cmd,
[&code](StringView line) {
if (Strings::starts_with(line, guid_marker))
{
code = std::strtol(line.data() + guid_marker.size(), nullptr, 10);
}
},
default_working_directory,
default_environment,
Encoding::Utf8,
StringView(buffer.data(), bytes_read));
if (!res.get() || *res.get() != 0 || (code >= 100 && code < 200) || code >= 300)
{
return msg::format_error(msgCurlFailedToPutHttp,
vicroms marked this conversation as resolved.
Show resolved Hide resolved
msg::exit_code = res.has_value() ? *res.get() : -1,
quyykk marked this conversation as resolved.
Show resolved Hide resolved
msg::url = url,
msg::value = code);
}
}

return Unit{};
}

std::string format_url_query(StringView base_url, View<std::string> query_params)
{
auto url = base_url.to_string();
Expand Down
8 changes: 2 additions & 6 deletions src/vcpkg/binarycaching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -915,14 +915,10 @@ namespace
if (auto cacheId = reserve_cache_entry(request.spec.name(), abi, cache_size))
{
std::vector<std::string> custom_headers{
m_token_header,
m_accept_header.to_string(),
"Content-Type: application/octet-stream",
"Content-Range: bytes 0-" + std::to_string(cache_size) + "/*",
};
m_token_header, m_accept_header.to_string(), "Content-Type: application/octet-stream"};
auto url = m_url + "/" + std::to_string(*cacheId.get());

if (put_file(m_fs, url, {}, custom_headers, zip_path, "PATCH"))
if (patch_file(m_fs, url, custom_headers, zip_path, cache_size))
{
Json::Object commit;
commit.insert("size", std::to_string(cache_size));
Expand Down
Loading