Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use the objects API for directory download/upload #82

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 77 additions & 1 deletion runners/s3-benchrunner-rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions runners/s3-benchrunner-rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ edition = "2021"
[dependencies]

# Swap which line is commented-out to use GitHub or local aws-s3-transfer-manager
aws-s3-transfer-manager = { git = "https://github.com/awslabs/aws-s3-transfer-manager-rs.git", rev = "790ead476a104cf0b66fdd00b5b9c3636321b244" }
aws-s3-transfer-manager = { git = "https://github.com/awslabs/aws-s3-transfer-manager-rs.git", rev = "e48ef1b0cb573bd7dbb99d426824587bfc73270a" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are 3 newer commits (latest is 06c087a5d53676bb048f6c512b8eb1fda63f03d5)
use one of those if you can

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like there are some api change

# aws-s3-transfer-manager = { path = "../../../aws-s3-transfer-manager-rs/aws-s3-transfer-manager" }

tracing-opentelemetry = "0.27"
opentelemetry = { version = "0.26", features = ["trace"] }
opentelemetry_sdk = { version = "0.26", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry_sdk = { version = "0.26", default-features = false, features = [
"trace",
"rt-tokio",
] }
opentelemetry-stdout = { version = "0.26", features = ["trace"] }
opentelemetry-semantic-conventions = "0.26"

Expand Down
5 changes: 4 additions & 1 deletion runners/s3-benchrunner-rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct BenchmarkConfig {
pub bucket: String,
pub region: String,
pub target_throughput_gigabits_per_sec: f64,
pub disable_directory: bool,
}

/// From the workload's JSON file
Expand Down Expand Up @@ -79,6 +80,7 @@ impl BenchmarkConfig {
bucket: &str,
region: &str,
target_throughput_gigabits_per_sec: f64,
disable_directory: bool,
) -> Result<Self> {
let json_file = File::open(workload_path)
.with_context(|| format!("Failed opening '{workload_path}'"))?;
Expand Down Expand Up @@ -110,6 +112,7 @@ impl BenchmarkConfig {
bucket: bucket.to_string(),
region: region.to_string(),
target_throughput_gigabits_per_sec,
disable_directory,
})
}
}
Expand All @@ -135,7 +138,7 @@ pub fn prepare_run(workload: &WorkloadConfig) -> Result<()> {
} else if let Some(dir) = filepath.parent() {
// create directory if necessary
if !dir.exists() {
std::fs::create_dir(dir)
std::fs::create_dir_all(dir)
.with_context(|| format!("failed creating directory: {dir:?}"))?;
}
}
Expand Down
7 changes: 6 additions & 1 deletion runners/s3-benchrunner-rust/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ struct Args {
target_throughput: f64,
#[arg(long, help = "Emit telemetry via OTLP/gRPC to http://localhost:4317")]
telemetry: bool,
#[arg(
long,
help = "Instead of using 1 upload_objects()/download_objects() call for multiple files on disk, use N upload()/download() calls."
)]
disable_directory: bool,
}

#[derive(ValueEnum, Clone, Debug)]
Expand Down Expand Up @@ -120,8 +125,8 @@ async fn new_runner(args: &Args) -> Result<Box<dyn RunBenchmark>> {
&args.bucket,
&args.region,
args.target_throughput,
args.disable_directory,
)?;

match args.s3_client {
S3ClientId::TransferManager => {
let transfer_manager = TransferManagerRunner::new(config).await;
Expand Down
102 changes: 93 additions & 9 deletions runners/s3-benchrunner-rust/src/transfer_manager.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{cmp::min, sync::Arc};
use std::{cmp::min, path::PathBuf, sync::Arc};

use anyhow::Context;
use async_trait::async_trait;
Expand Down Expand Up @@ -26,6 +26,7 @@ struct Handle {
config: BenchmarkConfig,
transfer_manager: aws_s3_transfer_manager::Client,
random_data_for_upload: Bytes,
transfer_path: Option<String>,
}

impl TransferManagerRunner {
Expand Down Expand Up @@ -57,12 +58,13 @@ impl TransferManagerRunner {
.await;

let transfer_manager = aws_s3_transfer_manager::Client::new(tm_config);

let transfer_path = resolve_transfer_path(&config);
TransferManagerRunner {
handle: Arc::new(Handle {
config,
transfer_manager,
random_data_for_upload,
transfer_path,
}),
}
}
Expand All @@ -87,6 +89,37 @@ impl TransferManagerRunner {
}
}
}
async fn download_objects(&self) -> Result<()> {
let path = self.handle.transfer_path.as_ref().unwrap();
let dest = PathBuf::from(path);

let download_objects_handle = self
.handle
.transfer_manager
.download_objects()
.bucket(&self.config().bucket)
.key_prefix(path)
.destination(&dest)
.send()
.await?;
download_objects_handle.join().await?;
Ok(())
}

async fn upload_objects(&self) -> Result<()> {
let path = self.handle.transfer_path.as_ref().unwrap();
let upload_objects_handle = self
.handle
.transfer_manager
.upload_objects()
.bucket(&self.config().bucket)
.key_prefix(path)
.source(path)
.send()
.await?;
upload_objects_handle.join().await?;
Ok(())
}

async fn download(&self, task_config: &TaskConfig) -> Result<()> {
let key = &task_config.key;
Expand Down Expand Up @@ -179,16 +212,44 @@ impl RunBenchmark for TransferManagerRunner {
// We want the benchmark to fail fast if anything goes wrong,
// so we're using a JoinSet.
let mut task_set: JoinSet<Result<()>> = JoinSet::new();
for i in 0..self.config().workload.tasks.len() {
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
let task = self.clone().run_task(i);
task_set.spawn(task.instrument(tracing::Span::current()));
}

while let Some(join_result) = task_set.join_next().await {
let task_result = join_result.unwrap();
task_result?;
let workload_config = &self.config().workload;

if workload_config.checksum.is_some() {
return Err(SkipBenchmarkError("checksums not yet implemented".to_string()).into());
}
if self.handle.transfer_path != None {
// Use the objects API to download/upload directory directly
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
match workload_config.tasks[0].action {
TaskAction::Download => {
self.download_objects()
.instrument(info_span!(
"download directory",
key = self.handle.transfer_path
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
))
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
.await?
}
TaskAction::Upload => {
self.upload_objects()
.instrument(info_span!(
"download directory",
key = self.handle.transfer_path
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
))
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
.await?
}
}
} else {
// Iterate through all the tasks to download/upload each object.
for i in 0..workload_config.tasks.len() {
let task = self.clone().run_task(i);
task_set.spawn(task.instrument(tracing::Span::current()));
}

while let Some(join_result) = task_set.join_next().await {
let task_result = join_result.unwrap();
task_result?;
}
}
Ok(())
}

Expand All @@ -206,6 +267,29 @@ fn calculate_concurrency(target_throughput_gigabits_per_sec: f64) -> usize {
(concurrency as usize).max(10)
}

/// Resolve the transfer path based on the config.
/// If None returns,
fn resolve_transfer_path(config: &BenchmarkConfig) -> Option<String> {
if config.workload.files_on_disk && !config.disable_directory {
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved
let first_task = &config.workload.tasks[0];
TingDaoK marked this conversation as resolved.
Show resolved Hide resolved

// Find the common parents directory for all the tasks.
// If there is no common parent, we can't use the same directory for downloads.
let mut common_root = std::path::Path::new(&first_task.key).parent()?;
for task in &config.workload.tasks {
let task_path = std::path::Path::new(&task.key);
common_root = common_root.ancestors().find(|ancestor| {
task_path
.ancestors()
.any(|task_ancestor| task_ancestor == *ancestor)
})?;
}
Some(common_root.to_str()?.to_string())
} else {
None
}
}

// Quickly generate a buffer of random data.
// This is fancy because a naive approach can add MINUTES to each debug run,
// and we want devs to iterate quickly.
Expand Down
Loading