Skip to content

Commit

Permalink
logcloud
Browse files Browse the repository at this point in the history
  • Loading branch information
marsupialtail committed Sep 10, 2024
1 parent 6d862c1 commit 4853c44
Show file tree
Hide file tree
Showing 8 changed files with 960 additions and 776 deletions.
372 changes: 130 additions & 242 deletions src/lava/build.rs

Large diffs are not rendered by default.

49 changes: 25 additions & 24 deletions src/lava/fm_chunk.rs
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
use std::collections::HashMap;
use zstd::stream::read::Decoder;
use super::error::LavaError;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::io::Read;
use zstd::stream::encode_all;
use super::error::LavaError;
pub(crate) struct FMChunk {
pub counts_so_far : HashMap<u32, u64>,
pub bwt_chunk : Vec<u32>,
use zstd::stream::read::Decoder;

pub(crate) struct FMChunk<T>
where
T: Serialize + for<'de> Deserialize<'de> + Clone + Eq + std::hash::Hash,
{
pub counts_so_far: HashMap<T, u64>,
pub bwt_chunk: Vec<T>,
}

impl FMChunk {
pub fn new(
chunk : Bytes
) -> Result<Self, LavaError> {
let compressed_counts_size = u64::from_le_bytes(chunk[0 .. 8].try_into().unwrap());
let compressed_counts = &chunk[8 .. (compressed_counts_size + 8) as usize];
impl<T> FMChunk<T>
where
T: Serialize + for<'de> Deserialize<'de> + Clone + Eq + std::hash::Hash,
{
pub fn new(chunk: Bytes) -> Result<Self, LavaError> {
let compressed_counts_size = u64::from_le_bytes(chunk[0..8].try_into().unwrap());
let compressed_counts = &chunk[8..(compressed_counts_size + 8) as usize];
let mut decompressor = Decoder::new(compressed_counts)?;
let mut serialized_counts: Vec<u8> = Vec::with_capacity(compressed_counts_size as usize);
decompressor.read_to_end(&mut serialized_counts)?;
let counts: HashMap<u32, u64> = bincode::deserialize(&serialized_counts)?;
let compressed_fm_chunk = &chunk[(compressed_counts_size + 8) as usize ..];
let counts: HashMap<T, u64> = bincode::deserialize(&serialized_counts)?;
let compressed_fm_chunk = &chunk[(compressed_counts_size + 8) as usize..];
let mut decompressor = Decoder::new(compressed_fm_chunk)?;
let mut serialized_fm_chunk: Vec<u8> = Vec::with_capacity(compressed_fm_chunk.len() as usize);
decompressor.read_to_end(&mut serialized_fm_chunk)?;
let fm_chunk: Vec<u32> = bincode::deserialize(&serialized_fm_chunk)?;
let fm_chunk: Vec<T> = bincode::deserialize(&serialized_fm_chunk)?;

Ok(Self {
counts_so_far : counts,
bwt_chunk : fm_chunk,
})
Ok(Self { counts_so_far: counts, bwt_chunk: fm_chunk })
}

#[allow(dead_code)]
Expand All @@ -44,15 +47,13 @@ impl FMChunk {
Ok(result)
}

pub fn search(& self, token: u32, pos: usize) -> Result<u64, LavaError> {
pub fn search(&self, token: T, pos: usize) -> Result<u64, LavaError> {
let mut result = *self.counts_so_far.get(&token).unwrap_or(&0);
for j in 0 .. pos {
for j in 0..pos {
if self.bwt_chunk[j] == token {
result += 1;
}
}
Ok(result)
}


}
}
Loading

0 comments on commit 4853c44

Please sign in to comment.