pelite/
strings.rs

1/*!
2Analyzing strings in binary data.
3 */
4
5#[derive(Copy, Clone, Debug)]
6pub enum Heuristic {
7	/// Printable ascii heuristic.
8	///
9	/// Considers strings valid if all characters are printable ascii characters.
10	/// Allows TAB, LF, CR and everything from space to tilde.
11	PrintableAscii,
12}
13
14/// Configure the string finding heuristics.
15#[derive(Clone, Debug)]
16// #[non_exhaustive]
17pub struct Config {
18	/// Minimum string length to accept as a valid string.
19	pub min_length: u8,
20	/// Minimum string length when there is a nul terminator.
21	/// Can have a lower threshold as having a nul terminator increases confidence that this is a c string literal.
22	pub min_length_nul: u8,
23	/// When true, requires any found string to be terminated by a nul terminator.
24	/// A nul terminator increases confidence that this is indeed a c string literal.
25	pub strict_nul: bool,
26	/// Heuristic to use to validate sequences.
27	pub heuristic: Heuristic,
28}
29impl Default for Config {
30	fn default() -> Config {
31		Config {
32			min_length: 6,
33			min_length_nul: 3,
34			strict_nul: true,
35			heuristic: Heuristic::PrintableAscii,
36		}
37	}
38}
39impl Config {
40	/// Constructs the [enumerator](struct.Enumerator.html) with this configuration.
41	///
42	/// Given the `base` argument the relative virtual address of the `bytes` slice.
43	pub fn enumerate(self, base: u32, bytes: &'_ [u8]) -> Enumerator<'_> {
44		Enumerator { base, offset: 0, bytes, config: self }
45	}
46}
47
48#[derive(Copy, Clone, Debug, Eq, PartialEq)]
49pub struct Found<'a> {
50	pub string: &'a [u8],
51	pub address: u32,
52	pub has_nul: bool,
53}
54impl<'a> Found<'a> {
55	pub fn nul(string: &'a [u8], address: u32) -> Found<'a> {
56		Found { string, address, has_nul: true }
57	}
58	pub fn non_nul(string: &'a [u8], address: u32) -> Found<'a> {
59		Found { string, address, has_nul: false }
60	}
61}
62
63fn is_printable_ascii(byte: u8) -> bool {
64	if byte >= 0x20 {
65		byte < 0x80
66	}
67	else {
68		(1 << byte as u32) & (1 << b'\n' | 1 << b'\r' | 1 << b'\t') != 0
69	}
70}
71
72/// Iterator over the strings in binary data.
73#[derive(Clone)]
74pub struct Enumerator<'a> {
75	base: u32,
76	offset: u32,
77	bytes: &'a [u8],
78	config: Config,
79}
80impl<'a> Iterator for Enumerator<'a> {
81	type Item = Found<'a>;
82	fn next(&mut self) -> Option<Found<'a>> {
83		let mut start = self.offset as usize;
84		let mut i = start;
85		let bytes = self.bytes;
86		match self.config.heuristic {
87			Heuristic::PrintableAscii => {
88				while i < bytes.len() {
89					if is_printable_ascii(bytes[i]) {
90						i += 1;
91						continue;
92					}
93					else if bytes[i as usize] == b'\0' {
94						if i - start >= self.config.min_length_nul as usize {
95							self.offset = (i + 1) as u32;
96							return Some(Found::nul(&bytes[start..i], self.base + start as u32));
97						}
98					}
99					else if !self.config.strict_nul {
100						if i - start >= self.config.min_length as usize {
101							self.offset = (i + 1) as u32;
102							return Some(Found::non_nul(&bytes[start..i], self.base + start as u32));
103						}
104					}
105					i += 1;
106					start = i;
107				}
108				if start != i {
109					if !self.config.strict_nul && i - start >= self.config.min_length as usize {
110						self.offset = i as u32;
111						return Some(Found::non_nul(&bytes[start..i], self.base + start as u32));
112					}
113				}
114			},
115		}
116		None
117	}
118}
119
120#[test]
121fn testing() {
122	let bytes = b"\x1fC-STRING\0\x80\x81AAAAAAAAAA\xff";
123	let strings: Vec<_> = Config { strict_nul: false, ..Config::default() }.enumerate(0x1000, bytes).collect();
124	assert_eq!(strings, vec![
125		Found { string: b"C-STRING", address: 0x1000 + 1, has_nul: true },
126		Found { string: b"AAAAAAAAAA", address: 0x1000 + 12, has_nul: false },
127	]);
128}