diff --git a/Cargo.lock b/Cargo.lock index 1722a35..173ba26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1410,6 +1410,7 @@ dependencies = [ "crc32fast", "futures", "futures-util", + "hex", "ic-agent", "ic-oss", "ic-oss-types", diff --git a/README.md b/README.md index c36f2db..935f0b0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# `ic-oss` +# `IC-OSS` 🗂 A decentralized Object Storage Service on the Internet Computer. diff --git a/src/ic_oss_bucket/README.md b/src/ic_oss_bucket/README.md index 46e99c3..f516d33 100644 --- a/src/ic_oss_bucket/README.md +++ b/src/ic_oss_bucket/README.md @@ -68,7 +68,7 @@ dfx deploy ic_oss_bucket --argument "(opt variant {Init = max_children = 1000; visibility = 0; max_custom_data_size = 4096; - enable_hash_index = false; + enable_hash_index = true; } })" diff --git a/src/ic_oss_bucket/src/api_query.rs b/src/ic_oss_bucket/src/api_query.rs index e8f13af..52c949c 100644 --- a/src/ic_oss_bucket/src/api_query.rs +++ b/src/ic_oss_bucket/src/api_query.rs @@ -153,7 +153,7 @@ fn get_file_chunks( Err("permission denied".to_string())?; } - Ok(store::fs::get_chunks(id, index, take.unwrap_or(10).min(8))) + Ok(store::fs::get_chunks(id, index, take.unwrap_or(8).min(8))) } } } diff --git a/src/ic_oss_cli/Cargo.toml b/src/ic_oss_cli/Cargo.toml index 62358cd..f025e37 100644 --- a/src/ic_oss_cli/Cargo.toml +++ b/src/ic_oss_cli/Cargo.toml @@ -11,7 +11,7 @@ license.workspace = true [dependencies] bytes = { workspace = true } -candid = { workspace = true } +candid = { workspace = true, features = ["value", "printer"] } serde = { workspace = true } serde_bytes = { workspace = true } tokio = { workspace = true } @@ -21,6 +21,7 @@ futures = { workspace = true } futures-util = { workspace = true } crc32fast = { workspace = true } sha3 = { workspace = true } +hex = { workspace = true } ic-oss = { path = "../ic_oss", version = "0.7" } ic-oss-types = { path = "../ic_oss_types", version = "0.7" } ic-agent = "0.36" diff --git a/src/ic_oss_cli/src/file.rs b/src/ic_oss_cli/src/file.rs index f71e52f..81e75d5 100644 --- a/src/ic_oss_cli/src/file.rs +++ b/src/ic_oss_cli/src/file.rs @@ -1,13 +1,16 @@ use chrono::prelude::*; -use ic_oss_types::{file::*, format_error}; +use ic_oss_types::{file::*, format_error, ByteN}; +use sha3::{Digest, Sha3_256}; +use tokio::io::AsyncReadExt; use tokio::{time, time::Duration}; pub async fn upload_file( cli: &ic_oss::bucket::Client, + enable_hash_index: bool, + parent: u32, file: &str, retry: u8, ) -> Result<(), String> { - let start_ts: DateTime = Local::now(); let file_path = std::path::Path::new(file); let metadata = std::fs::metadata(file_path).map_err(format_error)?; if !metadata.is_file() { @@ -25,12 +28,25 @@ pub async fn upload_file( mime_db::lookup(file).unwrap_or("application/octet-stream") }; + let hash: Option> = if enable_hash_index { + let fs = tokio::fs::File::open(&file_path) + .await + .map_err(format_error)?; + Some(pre_sum_hash(fs).await?.into()) + } else { + None + }; + + let start_ts: DateTime = Local::now(); let input = CreateFileInput { + parent, name: file_path.file_name().unwrap().to_string_lossy().to_string(), content_type: content_type.to_string(), size: Some(file_size), + hash, ..Default::default() }; + let fs = tokio::fs::File::open(&file_path) .await .map_err(format_error)?; @@ -94,3 +110,16 @@ pub async fn upload_file( ); Ok(()) } + +async fn pre_sum_hash(mut fs: tokio::fs::File) -> Result<[u8; 32], String> { + let mut hasher = Sha3_256::new(); + let mut buf = vec![0u8; 1024 * 1024 * 2]; + loop { + let n = fs.read(&mut buf).await.map_err(format_error)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + Ok(hasher.finalize().into()) +} diff --git a/src/ic_oss_cli/src/main.rs b/src/ic_oss_cli/src/main.rs index e9d4d73..7a49f3e 100644 --- a/src/ic_oss_cli/src/main.rs +++ b/src/ic_oss_cli/src/main.rs @@ -1,13 +1,20 @@ -use candid::Principal; +use candid::{pretty::candid::value::pp_value, CandidType, IDLValue, Principal}; use clap::{Parser, Subcommand}; use ic_agent::identity::{AnonymousIdentity, BasicIdentity, Identity, Secp256k1Identity}; use ic_oss::agent::build_agent; -use ic_oss_types::format_error; +use ic_oss_types::{ + file::{MoveInput, CHUNK_SIZE}, + folder::CreateFolderInput, + format_error, ByteN, +}; use ring::{rand, signature::Ed25519KeyPair}; +use sha3::{Digest, Sha3_256}; use std::{ + io::SeekFrom, path::{Path, PathBuf}, sync::Arc, }; +use tokio::io::{AsyncSeekExt, AsyncWriteExt}; mod file; @@ -34,25 +41,63 @@ pub struct Cli { command: Option, } +impl Cli { + async fn client( + &self, + identity: Box, + ic: &bool, + bucket: &str, + ) -> Result { + let is_ic = *ic || self.ic; + let host = if is_ic { IC_HOST } else { self.host.as_str() }; + let agent = build_agent(host, identity).await?; + let bucket = Principal::from_text(bucket).map_err(format_error)?; + Ok(ic_oss::bucket::Client::new(Arc::new(agent), bucket)) + } +} + #[derive(Subcommand)] pub enum Commands { Identity { - /// file + /// file path #[arg(long)] - file: Option, + path: Option, /// create a identity #[arg(long)] new: bool, }, - /// upload file to the ic-oss - Upload { + /// Add a folder to a bucket + Add { + /// bucket + #[arg(short, long, value_name = "CANISTER")] + bucket: String, + + /// parent folder id + #[arg(short, long, default_value = "0")] + parent: u32, + + /// folder name + #[arg(short, long)] + name: String, + + /// Use the ic network + #[arg(long, default_value = "false")] + ic: bool, + }, + /// Uploads a file to a bucket + #[command(visible_alias = "upload")] + Put { /// bucket #[arg(short, long, value_name = "CANISTER")] bucket: String, - /// file + /// parent folder id + #[arg(short, long, default_value = "0")] + parent: u32, + + /// file path #[arg(long)] - file: String, + path: String, /// retry times #[arg(long, default_value = "3")] @@ -61,6 +106,120 @@ pub enum Commands { /// Use the ic network #[arg(long, default_value = "false")] ic: bool, + + /// digest algorithm, default is SHA3-256 + #[arg(long, default_value = "SHA3-256")] + digest: String, + }, + /// Downloads an file from a target bucket to the local file system + Get { + /// bucket + #[arg(short, long, value_name = "CANISTER")] + bucket: String, + + /// downloads file by id + #[arg(long)] + id: Option, + + /// downloads file by hash + #[arg(long)] + hash: Option, + + /// file path to save + #[arg(long, default_value = "./")] + path: String, + + /// Use the ic network + #[arg(long, default_value = "false")] + ic: bool, + + /// digest algorithm to verify the file, default is SHA3-256 + #[arg(long, default_value = "SHA3-256")] + digest: String, + }, + /// Lists files or folders in a folder + Ls { + /// bucket + #[arg(short, long, value_name = "CANISTER")] + bucket: String, + + /// parent folder id + #[arg(short, long, default_value = "0")] + parent: u32, + + /// kind 0: file, 1: folder + #[arg(short, long, default_value = "0")] + kind: u8, + + /// Use the ic network + #[arg(long, default_value = "false")] + ic: bool, + }, + /// Displays information on file, folder, or bucket, including metadata + Stat { + /// bucket + #[arg(short, long, value_name = "CANISTER")] + bucket: String, + + /// file or folder id + #[arg(long, default_value = "0")] + id: u32, + + /// kind 0: file, 1: folder, other: bucket + #[arg(short, long, default_value = "0")] + kind: u8, + + /// Use the ic network + #[arg(long, default_value = "false")] + ic: bool, + + /// Displays file information by file hash + #[arg(long)] + hash: Option, + }, + /// Removes file or folder from a bucket + Mv { + /// bucket + #[arg(short, long, value_name = "CANISTER")] + bucket: String, + + /// file or folder id + #[arg(long)] + id: u32, + + /// file or folder's parent id + #[arg(long)] + from: u32, + + /// target folder id + #[arg(long)] + to: u32, + + /// kind 0: file, 1: folder + #[arg(short, long, default_value = "0")] + kind: u8, + + /// Use the ic network + #[arg(long, default_value = "false")] + ic: bool, + }, + /// Removes file or folder from a bucket + Rm { + /// bucket + #[arg(short, long, value_name = "CANISTER")] + bucket: String, + + /// file or folder id + #[arg(long)] + id: u32, + + /// kind 0: file, 1: folder + #[arg(short, long, default_value = "0")] + kind: u8, + + /// Use the ic network + #[arg(long, default_value = "false")] + ic: bool, }, } @@ -70,7 +229,7 @@ async fn main() -> Result<(), String> { let identity = load_identity(&cli.identity).map_err(format_error)?; match &cli.command { - Some(Commands::Identity { new, file }) => { + Some(Commands::Identity { new, path }) => { if !new { let principal = identity.sender()?; println!("principal: {}", principal); @@ -85,13 +244,13 @@ async fn main() -> Result<(), String> { let id = BasicIdentity::from_pem(doc.as_bytes()).map_err(format_error)?; let principal = id.sender()?; - let file = match file { - Some(file) => Path::new(file).to_path_buf(), + let file = match path { + Some(path) => Path::new(path).to_path_buf(), None => PathBuf::from(format!("{}.pem", principal)), }; if file.try_exists().unwrap_or_default() { - return Err(format!("file already exists: {:?}", file)); + Err(format!("file already exists: {:?}", file))?; } std::fs::write(&file, doc.as_bytes()).map_err(format_error)?; @@ -100,18 +259,237 @@ async fn main() -> Result<(), String> { return Ok(()); } - Some(Commands::Upload { + Some(Commands::Add { bucket, - file, + parent, + name, + ic, + }) => { + let cli = cli.client(identity, ic, bucket).await?; + let folder = cli + .create_folder(CreateFolderInput { + parent: *parent, + name: name.clone(), + }) + .await + .map_err(format_error)?; + pretty_println(&folder)?; + return Ok(()); + } + + Some(Commands::Put { + bucket, + parent, + path, retry, ic, + digest, + }) => { + if digest != "SHA3-256" { + Err("unsupported digest algorithm".to_string())?; + } + let cli = cli.client(identity, ic, bucket).await?; + let info = cli.get_bucket_info().await.map_err(format_error)?; + upload_file(&cli, info.enable_hash_index, *parent, path, *retry).await?; + + return Ok(()); + } + + Some(Commands::Get { + bucket, + id, + path, + ic, + digest, + hash, + }) => { + if digest != "SHA3-256" { + Err("unsupported digest algorithm".to_string())?; + } + let cli = cli.client(identity, ic, bucket).await?; + let info = if let Some(hash) = hash { + let hash = parse_file_hash(hash)?; + cli.get_file_info_by_hash(hash) + .await + .map_err(format_error)? + } else if let Some(id) = id { + cli.get_file_info(*id).await.map_err(format_error)? + } else { + Err("missing file id or hash".to_string())? + }; + + if info.size != info.filled { + Err("file not fully uploaded".to_string())?; + } + let mut f = Path::new(path).to_path_buf(); + if f.is_dir() { + f = f.join(info.name); + } + let mut file = tokio::fs::File::create_new(&f) + .await + .map_err(format_error)?; + file.set_len(info.size as u64).await.map_err(format_error)?; + let mut hasher = Sha3_256::new(); + let mut filled = 0usize; + // TODO: support parallel download + for index in (0..info.chunks).step_by(6) { + let chunks = cli + .get_file_chunks(info.id, index, Some(6)) + .await + .map_err(format_error)?; + for chunk in chunks.iter() { + file.seek(SeekFrom::Start(chunk.0 as u64 * CHUNK_SIZE as u64)) + .await + .map_err(format_error)?; + hasher.update(&chunk.1); + file.write_all(&chunk.1).await.map_err(format_error)?; + filled += chunk.1.len(); + } + + println!( + "downloaded chunks: {}/{}, {:.2}%", + index as usize + chunks.len(), + info.chunks, + (filled as f32 / info.size as f32) * 100.0, + ); + } + + let hash: [u8; 32] = hasher.finalize().into(); + if let Some(h) = info.hash { + if *h != hash { + Err(format!( + "file hash mismatch, expected {}, got {}", + hex::encode(*h), + hex::encode(hash), + ))?; + } + } + + println!( + "\n{}:\n{}\t{}", + digest, + hex::encode(hash), + f.to_string_lossy(), + ); + + return Ok(()); + } + + Some(Commands::Ls { + bucket, + parent, + kind, + ic, + }) => { + let cli = cli.client(identity, ic, bucket).await?; + match kind { + 0 => { + let files = cli + .list_files(*parent, None, None) + .await + .map_err(format_error)?; + pretty_println(&files)?; + } + 1 => { + let folders = cli + .list_folders(*parent, None, None) + .await + .map_err(format_error)?; + pretty_println(&folders)?; + } + _ => return Err("invalid kind".to_string()), + } + return Ok(()); + } + + Some(Commands::Stat { + bucket, + id, + kind, + ic, + hash, + }) => { + let cli = cli.client(identity, ic, bucket).await?; + match kind { + 0 => { + let info = if let Some(hash) = hash { + let hash = parse_file_hash(hash)?; + cli.get_file_info_by_hash(hash) + .await + .map_err(format_error)? + } else { + cli.get_file_info(*id).await.map_err(format_error)? + }; + + pretty_println(&info)?; + } + 1 => { + let info = cli.get_folder_info(*id).await.map_err(format_error)?; + pretty_println(&info)?; + } + _ => { + let info = cli.get_bucket_info().await.map_err(format_error)?; + pretty_println(&info)?; + } + } + return Ok(()); + } + + Some(Commands::Mv { + bucket, + id, + from, + to, + kind, + ic, }) => { - let is_ic = *ic || cli.ic; - let host = if is_ic { IC_HOST } else { cli.host.as_str() }; - let agent = build_agent(host, identity).await?; - let bucket = Principal::from_text(bucket).map_err(format_error)?; - let cli = ic_oss::bucket::Client::new(Arc::new(agent), bucket); - upload_file(&cli, file, *retry).await?; + let cli = cli.client(identity, ic, bucket).await?; + match kind { + 0 => { + let res = cli + .move_file(MoveInput { + id: *id, + from: *from, + to: *to, + }) + .await + .map_err(format_error)?; + pretty_println(&res)?; + } + 1 => { + let res = cli + .move_folder(MoveInput { + id: *id, + from: *from, + to: *to, + }) + .await + .map_err(format_error)?; + pretty_println(&res)?; + } + _ => return Err("invalid kind".to_string()), + } + return Ok(()); + } + + Some(Commands::Rm { + bucket, + id, + kind, + ic, + }) => { + let cli = cli.client(identity, ic, bucket).await?; + match kind { + 0 => { + let res = cli.delete_file(*id).await.map_err(format_error)?; + pretty_println(&res)?; + } + 1 => { + let res = cli.delete_folder(*id).await.map_err(format_error)?; + pretty_println(&res)?; + } + _ => return Err("invalid kind".to_string()), + } return Ok(()); } @@ -135,3 +513,20 @@ fn load_identity(path: &str) -> anyhow::Result> { }, } } + +fn pretty_println(data: &T) -> Result<(), String> +where + T: CandidType, +{ + let val = IDLValue::try_from_candid_type(data).map_err(format_error)?; + let doc = pp_value(7, &val); + println!("{}", doc.pretty(120)); + Ok(()) +} + +fn parse_file_hash(s: &str) -> Result, String> { + let s = s.replace("\\", ""); + let data = hex::decode(s.strip_prefix("0x").unwrap_or(&s)).map_err(format_error)?; + let hash: [u8; 32] = data.try_into().map_err(format_error)?; + Ok(hash.into()) +}