From 33b10d7691851a1e1c70e103760a54ba24c14035 Mon Sep 17 00:00:00 2001 From: ucyo Date: Tue, 11 Oct 2022 13:44:51 +0200 Subject: [PATCH 1/5] Implement Rank for order 1 --- src/lib.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 02f731f..fee57ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,7 @@ extern crate serde_yaml; use std::borrow::ToOwned; use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::collections::HashMap; +use std::fmt::Debug; use std::fs::File; use std::hash::Hash; use std::io::prelude::*; @@ -56,8 +57,8 @@ use serde::Serialize; use serde_yaml as yaml; /// The definition of all types that can be used in a `Chain`. -pub trait Chainable: Eq + Hash + Clone {} -impl Chainable for T where T: Eq + Hash + Clone {} +pub trait Chainable: Eq + Hash + Clone + Debug {} +impl Chainable for T where T: Eq + Hash + Clone + Debug {} type Token = Option; @@ -106,6 +107,21 @@ where } } + /// Returns a HashMap of current counts of token T + pub fn rank(&self, token: T) -> Vec<(&Token, &usize)> { + // let mut toks = vec![None; self.order]; + // toks.push(Some(token)); + let toks = vec![Some(token)]; + println!("Tokens {:?}", toks); + println!("Map {:?}", self.map); + let result = self.map.get(&toks).unwrap(); + let sorted: Vec<_> = result + .iter() + .sorted_by(|&a, &b| Ord::cmp(a.1, b.1).reverse()) + .collect(); + sorted + } + /// Determines whether or not the chain is empty. A chain is considered empty if nothing has /// been fed into it. pub fn is_empty(&self) -> bool { @@ -456,6 +472,42 @@ mod test { chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); } + #[test] + fn rank() { + let mut chain = Chain::new(); + chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); + let vec = chain.rank(3); + let mut iter = vec.iter(); + assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); + assert_eq!(iter.next(), None); + + chain.feed(vec![3, 10, 3, 11, 3, 11, 3, 10, 3, 11]); + let vec = chain.rank(3); + let mut iter = vec.iter(); + assert_eq!(iter.next(), Some(&(&Some(11), &3usize))); + assert_eq!(iter.next(), Some(&(&Some(10), &2usize))); + assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); + assert_eq!(iter.next(), None); + } + + #[test] + fn rank_higher_order() { + let mut chain = Chain::of_order(2); + chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); + let vec = chain.rank(3); + let mut iter = vec.iter(); + assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); + assert_eq!(iter.next(), None); + + chain.feed(vec![3, 10, 3, 11, 3, 11, 3, 10, 3, 11]); + let vec = chain.rank(3); + let mut iter = vec.iter(); + assert_eq!(iter.next(), Some(&(&Some(11), &3usize))); + assert_eq!(iter.next(), Some(&(&Some(10), &2usize))); + assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); + assert_eq!(iter.next(), None); + } + #[test] fn generate() { let mut chain = Chain::new(); From 082b8593ccbc53e5131e609fca26adc1710d1555 Mon Sep 17 00:00:00 2001 From: ucyo Date: Tue, 11 Oct 2022 13:54:28 +0200 Subject: [PATCH 2/5] Implement Rank for order N --- src/lib.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fee57ca..c8fab1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,10 +108,8 @@ where } /// Returns a HashMap of current counts of token T - pub fn rank(&self, token: T) -> Vec<(&Token, &usize)> { - // let mut toks = vec![None; self.order]; - // toks.push(Some(token)); - let toks = vec![Some(token)]; + pub fn rank(&self, token: Vec) -> Vec<(&Token, &usize)> { + let toks: Vec<_> = token.into_iter().map(|a| Some(a)).collect(); println!("Tokens {:?}", toks); println!("Map {:?}", self.map); let result = self.map.get(&toks).unwrap(); @@ -476,13 +474,13 @@ mod test { fn rank() { let mut chain = Chain::new(); chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); - let vec = chain.rank(3); + let vec = chain.rank(vec![3]); let mut iter = vec.iter(); assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); assert_eq!(iter.next(), None); chain.feed(vec![3, 10, 3, 11, 3, 11, 3, 10, 3, 11]); - let vec = chain.rank(3); + let vec = chain.rank(vec![3]); let mut iter = vec.iter(); assert_eq!(iter.next(), Some(&(&Some(11), &3usize))); assert_eq!(iter.next(), Some(&(&Some(10), &2usize))); @@ -494,17 +492,15 @@ mod test { fn rank_higher_order() { let mut chain = Chain::of_order(2); chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); - let vec = chain.rank(3); + let vec = chain.rank(vec![3,5]); let mut iter = vec.iter(); - assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); + assert_eq!(iter.next(), Some(&(&Some(10), &1usize))); assert_eq!(iter.next(), None); chain.feed(vec![3, 10, 3, 11, 3, 11, 3, 10, 3, 11]); - let vec = chain.rank(3); + let vec = chain.rank(vec![3,10]); let mut iter = vec.iter(); - assert_eq!(iter.next(), Some(&(&Some(11), &3usize))); - assert_eq!(iter.next(), Some(&(&Some(10), &2usize))); - assert_eq!(iter.next(), Some(&(&Some(5), &1usize))); + assert_eq!(iter.next(), Some(&(&Some(3), &2usize))); assert_eq!(iter.next(), None); } From 140bc54e2b7f36d1c89b1300304240d53207a48c Mon Sep 17 00:00:00 2001 From: ucyo Date: Tue, 11 Oct 2022 14:19:28 +0200 Subject: [PATCH 3/5] Support references for rank operation --- src/lib.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c8fab1c..2dcae1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,8 +108,17 @@ where } /// Returns a HashMap of current counts of token T - pub fn rank(&self, token: Vec) -> Vec<(&Token, &usize)> { - let toks: Vec<_> = token.into_iter().map(|a| Some(a)).collect(); + pub fn rank>(&self, tokens: S) -> Vec<(&Token, &usize)> { + let tokens = tokens.as_ref(); + if tokens.is_empty() { + return Vec::new(); + } + println!("Tokens {:?}", tokens); + let mut toks = vec![]; + toks.extend(tokens.iter().map(|token| Some(token.clone()))); + if !self.map.contains_key(&toks) { + return Vec::new() + } println!("Tokens {:?}", toks); println!("Map {:?}", self.map); let result = self.map.get(&toks).unwrap(); From 33352d7ae3a66f276dd50a5a2d32fcc63015147c Mon Sep 17 00:00:00 2001 From: ucyo Date: Tue, 11 Oct 2022 15:12:21 +0200 Subject: [PATCH 4/5] Add iterator over rank --- src/lib.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2dcae1b..ade3ade 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,17 +107,16 @@ where } } - /// Returns a HashMap of current counts of token T - pub fn rank>(&self, tokens: S) -> Vec<(&Token, &usize)> { + /// Returns a vector of current counts of token T + fn rank>(&self, tokens: S) -> Vec<(&Token, &usize)> { let tokens = tokens.as_ref(); if tokens.is_empty() { return Vec::new(); } - println!("Tokens {:?}", tokens); let mut toks = vec![]; toks.extend(tokens.iter().map(|token| Some(token.clone()))); if !self.map.contains_key(&toks) { - return Vec::new() + return Vec::new(); } println!("Tokens {:?}", toks); println!("Map {:?}", self.map); @@ -129,6 +128,12 @@ where sorted } + /// Get iterator over all tokens following a given set of tokens + /// (sorted by count) + pub fn iter_rank>(&self, tokens: S) -> RankIterator { + RankIterator::new(self, tokens) + } + /// Determines whether or not the chain is empty. A chain is considered empty if nothing has /// been fed into it. pub fn is_empty(&self) -> bool { @@ -455,6 +460,44 @@ where } } +#[derive(Debug)] +/// Iterator over tokens sorted by rank given a token (sorted by highest probability) +pub struct RankIterator<'a, T> +where + T: Chainable + 'a, +{ + chain: Vec<(&'a Token, &'a usize)>, + count: usize, +} + +impl<'a, T> RankIterator<'a, T> +where + T: Chainable + 'a, +{ + /// Generate rank iterator + pub fn new>(chain: &'a Chain, tokens: S) -> Self { + let m = chain.rank(tokens); + RankIterator { chain: m, count: 0 } + } +} + +impl<'a, T> Iterator for RankIterator<'a, T> +where + T: Chainable + 'a, +{ + type Item = &'a Token; + + fn next(&mut self) -> Option { + if self.count >= self.chain.len() { + None + } else { + let r = Some(self.chain[self.count].0); + self.count += 1; + r + } + } +} + #[cfg(test)] mod test { use super::Chain; @@ -497,17 +540,48 @@ mod test { assert_eq!(iter.next(), None); } + #[test] + fn iter_rank() { + let mut chain = Chain::new(); + chain.feed(vec![3, 5, 10]).feed(vec![5, 3, 12, 3, 5]); + let mut iter = chain.iter_rank(vec![3]); + assert_eq!(iter.next(), Some(&Some(5))); + assert_eq!(iter.next(), Some(&Some(12))); + assert_eq!(iter.next(), None); + + let mut iter = chain.iter_rank(vec![3]).take(1); + assert_eq!(iter.next(), Some(&Some(5))); + assert_eq!(iter.next(), None); + + let mut iter = chain.iter_rank(vec![]).take(1); + assert_eq!(iter.next(), None); + } + + #[test] + fn iter_rank_higher_order() { + let mut chain = Chain::of_order(2); + chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); + let mut iter = chain.iter_rank(vec![3, 5]); + assert_eq!(iter.next(), Some(&Some(10))); + assert_eq!(iter.next(), None); + + chain.feed(vec![3, 10, 3, 11, 3, 11, 3, 10, 3, 11]); + let mut iter = chain.iter_rank(vec![3, 10]); + assert_eq!(iter.next(), Some(&Some(3))); + assert_eq!(iter.next(), None); + } + #[test] fn rank_higher_order() { let mut chain = Chain::of_order(2); chain.feed(vec![3, 5, 10]).feed(vec![5, 12]); - let vec = chain.rank(vec![3,5]); + let vec = chain.rank(vec![3, 5]); let mut iter = vec.iter(); assert_eq!(iter.next(), Some(&(&Some(10), &1usize))); assert_eq!(iter.next(), None); chain.feed(vec![3, 10, 3, 11, 3, 11, 3, 10, 3, 11]); - let vec = chain.rank(vec![3,10]); + let vec = chain.rank(vec![3, 10]); let mut iter = vec.iter(); assert_eq!(iter.next(), Some(&(&Some(3), &2usize))); assert_eq!(iter.next(), None); From 292ab1aaed1b4215d4885b8f08657454960ab526 Mon Sep 17 00:00:00 2001 From: ucyo Date: Thu, 20 Oct 2022 11:30:17 +0200 Subject: [PATCH 5/5] Add todo --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ade3ade..7f96f85 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -131,6 +131,9 @@ where /// Get iterator over all tokens following a given set of tokens /// (sorted by count) pub fn iter_rank>(&self, tokens: S) -> RankIterator { + // TODO: + // The iterator is not stable. If elements have the same count, they + // have a different order when put out RankIterator::new(self, tokens) }