From 4ecc82b5f2b38530394ae3b6dbffad808a82b1dc Mon Sep 17 00:00:00 2001 From: _Frky <3105926+Frky@users.noreply.github.com> Date: Fri, 31 Dec 2021 16:06:33 +0100 Subject: [PATCH 1/4] Add test to emphasis bug in smack --- src/smack/smack.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/smack/smack.rs b/src/smack/smack.rs index bb37358..e76b03d 100644 --- a/src/smack/smack.rs +++ b/src/smack/smack.rs @@ -684,6 +684,34 @@ mod tests { assert!(id == 0); } + #[test] + fn test_wildcard_collision() { + let mut smack = Smack::new("test".to_string(), SMACK_CASE_INSENSITIVE); + smack.add_pattern( + b"****abcd", + 0, + SmackFlags::ANCHOR_BEGIN | SmackFlags::WILDCARDS, + ); + smack.add_pattern( + b"******abcd", + 1, + SmackFlags::ANCHOR_BEGIN | SmackFlags::WILDCARDS, + ); + smack.compile(); + let mut state = BASE_STATE; + let mut offset = 0; + let id = smack.search_next(&mut state, &b"xxxxabcd".to_vec(), &mut offset); + assert!(id == 0); + let mut state = BASE_STATE; + let mut offset = 0; + let mut id = smack.search_next(&mut state, &b"xxxxxxabcd".to_vec(), &mut offset); + assert!(id == 1); + let mut state = BASE_STATE; + let mut offset = 0; + let mut id = smack.search_next(&mut state, &b"xxxxaxabcd".to_vec(), &mut offset); + assert!(id == 0); + } + #[test] fn test_multiple_matches() { let mut smack = Smack::new("test".to_string(), SMACK_CASE_INSENSITIVE); From 0b41a6cd0876132ccf587ccbdc7bfe60b1727959 Mon Sep 17 00:00:00 2001 From: _Frky <3105926+Frky@users.noreply.github.com> Date: Fri, 31 Dec 2021 16:18:06 +0100 Subject: [PATCH 2/4] Fix test for Smack wildcard collision --- src/smack/smack.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/smack/smack.rs b/src/smack/smack.rs index e76b03d..a35e76c 100644 --- a/src/smack/smack.rs +++ b/src/smack/smack.rs @@ -708,8 +708,12 @@ mod tests { assert!(id == 1); let mut state = BASE_STATE; let mut offset = 0; + let mut id = smack.search_next(&mut state, &b"xxxxbxabcd".to_vec(), &mut offset); + assert!(id == 1); + let mut state = BASE_STATE; + let mut offset = 0; let mut id = smack.search_next(&mut state, &b"xxxxaxabcd".to_vec(), &mut offset); - assert!(id == 0); + assert!(id == 1); } #[test] From 6ad5491b245633dcdd93aea5853a8b5f1203a235 Mon Sep 17 00:00:00 2001 From: _Frky <3105926+Frky@users.noreply.github.com> Date: Fri, 31 Dec 2021 18:09:50 +0100 Subject: [PATCH 3/4] Add method to dump FSM to dot --- src/smack/smack.rs | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/smack/smack.rs b/src/smack/smack.rs index a35e76c..4ef7f45 100644 --- a/src/smack/smack.rs +++ b/src/smack/smack.rs @@ -1,4 +1,5 @@ use std::mem; +use std::convert::TryFrom; use crate::smack::smack_constants::*; use crate::smack::smack_pattern::SmackPattern; @@ -344,6 +345,50 @@ impl Smack { } (idx - px_start, row) } + fn show(&self) { + println!("digraph D {{"); + for row in 0..self.m_state_count { + for i in 1..self.symbol_count { + let s = self.symbol_to_char[i]; + let from = { + if row == BASE_STATE { + String::from("BASE") + } else if row == UNANCHORED_STATE { + String::from("UNANCHORED") + } else if row >= self.m_match_limit { + format!("\"{}\"", std::str::from_utf8(&self.m_pattern_list[self.m_match[row].m_ids[0]].pattern).unwrap()) + } else { + format!("{}", row).into() + } + }; + let to = { + let dst = self.goto(row, s); + if dst == BASE_STATE { + String::from("BASE") + } else if dst == UNANCHORED_STATE { + String::from("UNANCHORED") + } else if dst >= self.m_match_limit { + format!("\"{}\"", std::str::from_utf8(&self.m_pattern_list[self.m_match[dst].m_ids[0]].pattern).unwrap()) + } else { + format!("{}", dst).into() + } + }; + let c = { + if s == CHAR_ANCHOR_START { + String::from("^") + } else if s == CHAR_ANCHOR_END { + String::from("$") + } else if s == b'*'.into() { + String::from("ANY") + } else { + format!("{}", u8::try_from(s).unwrap() as char) + } + }; + println!(" {} -> {} [label=\"{}\"]", from, to, c); + } + } + println!("}}"); + } fn inner_match_shift7(&self, px: Vec, length: usize, state: usize) -> (usize, usize) { let px_start = 0; let px_end = length; From b460a548f3acc11936ead66c83c86fdd603d1851 Mon Sep 17 00:00:00 2001 From: _Frky <3105926+Frky@users.noreply.github.com> Date: Fri, 31 Dec 2021 18:10:19 +0100 Subject: [PATCH 4/4] Simplify failing test --- src/smack/smack.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/smack/smack.rs b/src/smack/smack.rs index 4ef7f45..4c6296e 100644 --- a/src/smack/smack.rs +++ b/src/smack/smack.rs @@ -733,31 +733,32 @@ mod tests { fn test_wildcard_collision() { let mut smack = Smack::new("test".to_string(), SMACK_CASE_INSENSITIVE); smack.add_pattern( - b"****abcd", + b"ab", 0, SmackFlags::ANCHOR_BEGIN | SmackFlags::WILDCARDS, ); smack.add_pattern( - b"******abcd", + b"*ab", 1, SmackFlags::ANCHOR_BEGIN | SmackFlags::WILDCARDS, ); smack.compile(); + smack.show(); let mut state = BASE_STATE; let mut offset = 0; - let id = smack.search_next(&mut state, &b"xxxxabcd".to_vec(), &mut offset); + let id = smack.search_next(&mut state, &b"ab".to_vec(), &mut offset); assert!(id == 0); let mut state = BASE_STATE; let mut offset = 0; - let mut id = smack.search_next(&mut state, &b"xxxxxxabcd".to_vec(), &mut offset); + let mut id = smack.search_next(&mut state, &b"xab".to_vec(), &mut offset); assert!(id == 1); let mut state = BASE_STATE; let mut offset = 0; - let mut id = smack.search_next(&mut state, &b"xxxxbxabcd".to_vec(), &mut offset); + let mut id = smack.search_next(&mut state, &b"bab".to_vec(), &mut offset); assert!(id == 1); let mut state = BASE_STATE; let mut offset = 0; - let mut id = smack.search_next(&mut state, &b"xxxxaxabcd".to_vec(), &mut offset); + let mut id = smack.search_next(&mut state, &b"aab".to_vec(), &mut offset); assert!(id == 1); } @@ -794,6 +795,10 @@ mod tests { let mut offset = 0; let id = smack.search_next(&mut state, &b"bac".to_vec(), &mut offset); assert!(id == 1); + let mut state = BASE_STATE; + let mut offset = 0; + let id = smack.search_next(&mut state, &b"aac".to_vec(), &mut offset); + assert!(id == 1); } #[test]