Skip to content

Commit

Permalink
Add: infisto as a file storage solution (#1482)
Browse files Browse the repository at this point in the history
To be able to store data on disk to be more resilient on daemon errors a
new library `infisto` (indexed file storage) is created and used in
openvasd.

This allows with the configuration of openvasd

```
[storage]
type = "fs"

[storage.fs]
path = "/tmp/openvasd/storage"
key = "see config.example.toml for more details"
```

to store scans and their results on encrypted on disk.

Although the key used for encryption can should set via the environment
variable `STORAGE_KEY` of the system user that runs openvasd rather than
in the configuration or start parameter each way is supported.

Co-authored-by: Juan José Nicola <juan.nicola@greenbone.net>
  • Loading branch information
nichtsfrei and jjnicola authored Sep 7, 2023
1 parent 3315881 commit aac7ce8
Show file tree
Hide file tree
Showing 41 changed files with 2,637 additions and 536 deletions.
159 changes: 104 additions & 55 deletions rust/Cargo.lock

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,11 @@ members = [
"osp",
"openvasd",
"scanconfig",
"infisto",
]

[workspace.package]
version = "0.1.0"
edition = "2021"
license = "GPL-2.0-or-later"

13 changes: 13 additions & 0 deletions rust/examples/openvasd/config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,16 @@ address = "127.0.0.1:3000"
[log]
# level of the log messages: TRACE > DEBUG > INFO > WARN > ERROR
level = "INFO"

[storage]
# can be either fs (file system) or inmemory (in memory).
# If it is set to fs is highly recommended to set `STORAGE_KEY` in the env variable.
# WARNING: if the type is set to fs and no encryption key is set then the data is stored unencrypted.
#type = "fs"
type = "inmemory"

[storage.fs]
# Sets the storage root directory if the storage.type is set to `fs`.
path = "/var/lib/openvasd/storage"
# Sets the key used to ecrypt the storage data. It is recommended to set it via the `STORAGE_KEY` environment variable.
#key = "changeme"
24 changes: 24 additions & 0 deletions rust/infisto/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "infisto"
description = "A library to store and retrieve serializeable vec data in a file that can be accessed by an index."
version.workspace = true
license.workspace = true
edition.workspace = true

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bincode = { version = "2.0.0-rc.3", features = ["serde"] }
serde = { version = "1", features = ["derive"] }
rand = "0"
chacha20 = "0"
pbkdf2 = { version = "0", features = ["password-hash"] }
sha2 = "0"

[dev-dependencies]
criterion = "0"
uuid = "1.4.1"

[[bench]]
name = "comparison"
harness = false
60 changes: 60 additions & 0 deletions rust/infisto/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# INdexed FIle STOrage

Is a library to store data on to disk and fetch elements from that rather than loading the whole file.

## CachedIndexFileStorer

Caches the last files idx files into memory.

```
use infisto::base::IndexedByteStorage;
let base = "/tmp/openvasd/storage";
let name = "readme_cached";
let mut store = infisto::base::CachedIndexFileStorer::init(base).unwrap();
store.put(name, "Hello World".as_bytes()).unwrap();
store.append_all(name, &["a".as_bytes(), "b".as_bytes()]).unwrap();
let data: Vec<Vec<u8>> = store.by_range(name, infisto::base::Range::Between(1, 3)).unwrap();
assert_eq!(data.len(), 2);
assert_eq!(&data[0], "a".as_bytes());
assert_eq!(&data[1], "b".as_bytes());
store.remove(name).unwrap();
```

## ChaCha20IndexFileStorer

Encryptes the given data with chacha20 before storing it.

```
use infisto::base::IndexedByteStorage;
let base = "/tmp/openvasd/storage";
let name = "readme_crypt";
let key = "changeme";
let store = infisto::base::CachedIndexFileStorer::init(base).unwrap();
let mut store = infisto::crypto::ChaCha20IndexFileStorer::new(store, key);
store.put(name, "Hello World".as_bytes()).unwrap();
store.append_all(name, &["a".as_bytes(), "b".as_bytes()]).unwrap();
let data: Vec<Vec<u8>> = store.by_range(name, infisto::base::Range::Between(1, 3)).unwrap();
assert_eq!(data.len(), 2);
assert_eq!(&data[0], "a".as_bytes());
assert_eq!(&data[1], "b".as_bytes());
store.remove(name).unwrap();
```

## IndexedByteStorageIterator

Instead of loading all elements at once it allows to fetch single elements when required.

```
use infisto::base::IndexedByteStorage;
let base = "/tmp/openvasd/storage";
let name = "readme_iter";
let key = "changeme";
let mut store = infisto::base::CachedIndexFileStorer::init(base).unwrap();
store.put(name, "Hello World".as_bytes()).unwrap();
let mut iter: infisto::base::IndexedByteStorageIterator<_, Vec<u8>> =
infisto::base::IndexedByteStorageIterator::new(name, store.clone()).unwrap();
assert_eq!(iter.next(), Some(Ok("Hello World".as_bytes().to_vec())));
assert_eq!(iter.next(), None);
store.remove(name).unwrap();
```

157 changes: 157 additions & 0 deletions rust/infisto/benches/comparison.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// SPDX-FileCopyrightText: 2023 Greenbone AG
//
// SPDX-License-Identifier: GPL-2.0-or-later

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use infisto::base::{CachedIndexFileStorer, IndexedByteStorage};
use rand::distributions::Alphanumeric;
use rand::Rng;

const BASE: &str = "/tmp/openvasd";

pub fn reading(c: &mut Criterion) {
let amount = 1000000;
fn random_data() -> Vec<u8> {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut data = vec![0; 1024];
rng.fill_bytes(&mut data);
data
}
let mut data = Vec::with_capacity(amount);
for _ in 0..amount {
data.push(random_data());
}

let fname = |pre: &str| {
format!(
"{}{}",
pre,
rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(7)
.map(char::from)
.collect::<String>()
)
};
// prepare data
let cached_name = fname("cached");
let mut store = CachedIndexFileStorer::init(BASE).unwrap();
store.append_all(&cached_name, &data).unwrap();
let uncached_name = fname("uncached");
let mut store = infisto::base::IndexedFileStorer::init(BASE).unwrap();
store.append_all(&uncached_name, &data).unwrap();
// to be useable in openvasd we must create Stream interface to allow polling
// on ranges otherwise the use has to wait until the whole file is read
let crypto_name = fname("crypto");
let mut store = infisto::crypto::ChaCha20IndexFileStorer::new(
CachedIndexFileStorer::init(BASE).unwrap(),
infisto::crypto::Key::default(),
);
store.append_all(&crypto_name, &data).unwrap();
let mut group = c.benchmark_group("reading");
group.sample_size(10);
let store = CachedIndexFileStorer::init(BASE).unwrap();
group.bench_with_input("cached", &cached_name, move |b, key| {
b.iter(|| {
store
.by_range::<Vec<u8>>(black_box(key), infisto::base::Range::All)
.unwrap();
})
});
let store = infisto::base::IndexedFileStorer::init(BASE).unwrap();
group.bench_with_input("uncached", &uncached_name, move |b, key| {
b.iter(|| {
store
.by_range::<Vec<u8>>(black_box(key), infisto::base::Range::All)
.unwrap();
})
});
let store = infisto::crypto::ChaCha20IndexFileStorer::new(
CachedIndexFileStorer::init(BASE).unwrap(),
infisto::crypto::Key::default(),
);
group.bench_with_input("crypto", &crypto_name, move |b, key| {
b.iter(|| {
store
.by_range::<Vec<u8>>(black_box(key), infisto::base::Range::All)
.unwrap();
})
});

group.finish();
let mut clean_up_store = CachedIndexFileStorer::init(BASE).unwrap();
clean_up_store.remove(&crypto_name).unwrap();
clean_up_store.remove(&uncached_name).unwrap();
clean_up_store.remove(&cached_name).unwrap();
}
pub fn storing(c: &mut Criterion) {
let amount = 100000;
fn random_data() -> Vec<u8> {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut data = vec![0; 1024];
rng.fill_bytes(&mut data);
data
}
let mut data = Vec::with_capacity(amount);
for _ in 0..amount {
data.push(random_data());
}

let fname = |pre: &str| {
format!(
"{}{}",
pre,
rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(7)
.map(char::from)
.collect::<String>()
)
};
let mut group = c.benchmark_group("storing");
group.sample_size(10);
let cached_name = fname("cached");
group.bench_with_input(
BenchmarkId::new("cached", "1million times 1MB"),
&(&cached_name, &data),
move |b, (key, data)| {
let mut store = CachedIndexFileStorer::init(BASE).unwrap();
b.iter(|| {
store.append_all(black_box(key), black_box(data)).unwrap();
})
},
);
let uncached_name = fname("uncached");
group.bench_with_input(
"uncached",
&(&uncached_name, &data),
move |b, (key, data)| {
let mut store = infisto::base::IndexedFileStorer::init(BASE).unwrap();
b.iter(|| {
store.append_all(black_box(key), black_box(data)).unwrap();
})
},
);
let crypto_name = fname("crypto");
group.bench_with_input("crypto", &(&crypto_name, &data), move |b, (key, data)| {
let mut store = infisto::crypto::ChaCha20IndexFileStorer::new(
CachedIndexFileStorer::init(BASE).unwrap(),
infisto::crypto::Key::default(),
);
b.iter(|| {
store.append_all(black_box(key), black_box(data)).unwrap();
})
});
group.finish();
let mut clean_up_store = CachedIndexFileStorer::init(BASE).unwrap();
clean_up_store.remove(&crypto_name).unwrap();
clean_up_store.remove(&uncached_name).unwrap();
clean_up_store.remove(&cached_name).unwrap();
reading(c);
}

criterion_group!(benches, storing);

criterion_main!(benches);
Loading

0 comments on commit aac7ce8

Please sign in to comment.