1#![allow(ellipsis_inclusive_range_patterns)]
40
41use std::prelude::v1::*;
42
43use std::{cmp, fmt, mem, str};
44
45#[cfg(feature = "std")]
46use std::error;
47
48pub const STACK_SIZE: usize = 4;
50pub(crate) const PTR_SKIP: u8 = 0;
52
53#[derive(Copy, Clone, Debug, Eq, PartialEq)]
57pub struct ParsePatError {
58 kind: PatError,
59 position: usize,
60}
61impl fmt::Display for ParsePatError {
62 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63 write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
64 }
65}
66#[cfg(feature = "std")]
67impl error::Error for ParsePatError {
68 fn description(&self) -> &str {
69 self.kind.to_str()
70 }
71}
72#[derive(Copy, Clone, Debug, Eq, PartialEq)]
73enum PatError {
74 UnpairedHexDigit,
75 UnknownChar,
76 ManyOverflow,
77 ManyRange,
78 ManyInvalid,
79 SaveOverflow,
80 StackError,
81 StackInvalid,
82 UnclosedQuote,
83 AlignedOperand,
84 ReadOperand,
85 SubPattern,
86 SubOverflow,
87}
88impl PatError {
89 fn to_str(self) -> &'static str {
90 match self {
91 PatError::UnpairedHexDigit => "unpaired hex digit",
92 PatError::UnknownChar => "unknown character",
93 PatError::ManyOverflow => "many range exceeded",
94 PatError::ManyRange => "many bounds nonsensical",
95 PatError::ManyInvalid => "many invalid syntax",
96 PatError::SaveOverflow => "save store overflow",
97 PatError::StackError => "stack unbalanced",
98 PatError::StackInvalid => "stack must follow jump",
99 PatError::UnclosedQuote => "string missing end quote",
100 PatError::AlignedOperand => "aligned operand error",
101 PatError::ReadOperand => "read operand error",
102 PatError::SubPattern => "sub pattern error",
103 PatError::SubOverflow => "sub pattern too large",
104 }
105 }
106}
107
108#[derive(Copy, Clone, Debug, Eq, PartialEq)]
114pub enum Atom {
115 Byte(u8),
117 Save(u8),
119 Push(u8),
121 Pop,
123 Fuzzy(u8),
125 Skip(u8),
127 Back(u8),
129 Rangext(u8),
131 Many(u8),
133 Jump1,
137 Jump4,
141 Ptr,
147 Pir(u8),
151 VTypeName,
153 Check(u8),
155 Aligned(u8),
157 ReadI8(u8),
159 ReadU8(u8),
161 ReadI16(u8),
163 ReadU16(u8),
165 ReadI32(u8),
167 ReadU32(u8),
169 Zero(u8),
171 Case(u8),
175 Break(u8),
177 Nop,
179}
180
181pub type Pattern = Vec<Atom>;
183
184pub fn save_len(pat: &[Atom]) -> usize {
186 pat.iter().filter_map(|&atom| {
187 match atom {
188 Atom::Save(slot) | Atom::Pir(slot) | Atom::Check(slot) | Atom::Zero(slot) |
189 Atom::ReadI8(slot) | Atom::ReadI16(slot) | Atom::ReadI32(slot) |
190 Atom::ReadU8(slot) | Atom::ReadU16(slot)| Atom::ReadU32(slot) => Some(slot as usize + 1),
191 _ => None,
192 }
193 }).max().unwrap_or(0)
194}
195
196pub fn parse(pat: &str) -> Result<Pattern, ParsePatError> {
292 let mut result = Vec::with_capacity(pat.len() / 2);
293 let mut pat_end = pat;
294 match parse_helper(&mut pat_end, &mut result) {
295 Ok(()) => Ok(result),
296 Err(kind) => {
297 let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
298 Err(ParsePatError { kind, position })
299 },
300 }
301}
302fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
305 result.push(Atom::Save(0));
306 let mut iter = pat.as_bytes().iter();
307 let mut save = 1;
308 let mut depth = 0;
309 #[derive(Default)]
310 struct SubPattern {
311 case: usize,
312 brks: Vec<usize>,
313 save: u8,
314 save_next: u8,
315 depth: u8,
316 }
317 let mut subs = Vec::<SubPattern>::new();
318 while let Some(mut chr) = iter.next().cloned() {
319 match chr {
320 b'%' => result.push(Atom::Jump1),
322 b'$' => result.push(Atom::Jump4),
324 b'*' => result.push(Atom::Ptr),
326 b'{' => {
328 depth += 1;
329 let atom = match result.last_mut() {
331 Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
332 Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
333 Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
334 _ => return Err(PatError::StackInvalid),
335 };
336 result.push(atom);
337 },
338 b'}' => {
340 if depth <= 0 {
342 return Err(PatError::StackError);
343 }
344 depth -= 1;
345 result.push(Atom::Pop);
346 },
347 b'(' => {
349 subs.push(SubPattern::default());
350 let sub = subs.last_mut().unwrap();
351 sub.save = save;
353 sub.depth = depth;
354 sub.case = result.len();
356 result.push(Atom::Case(0));
357 },
358 b'|' => {
360 let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
362 sub.save_next = cmp::max(sub.save_next, save);
364 save = sub.save;
365 depth = sub.depth;
366 sub.brks.push(result.len());
368 result.push(Atom::Break(0));
369 let case_offset = result.len() - sub.case - 1;
371 if case_offset >= 256 {
372 return Err(PatError::SubOverflow);
373 }
374 result[sub.case] = Atom::Case(case_offset as u8);
375 sub.case = result.len();
376 result.push(Atom::Case(0));
377 },
378 b')' => {
380 let sub = subs.pop().ok_or(PatError::SubPattern)?;
382 save = cmp::max(sub.save_next, save);
384 depth = sub.depth;
385 result[sub.case] = Atom::Nop;
387 for &brk in &sub.brks {
389 let brk_offset = result.len() - brk - 1;
390 if brk_offset >= 256 {
391 return Err(PatError::SubOverflow);
392 }
393 result[brk] = Atom::Break(brk_offset as u8);
394 }
395 },
396 b'[' => {
398 let mut lower_bound = 0u32;
400 let mut at_least_one_char = false;
401 loop {
402 chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
403 match chr {
404 b'-' | b']' => break,
405 chr @ b'0'...b'9' => {
406 at_least_one_char = true;
407 lower_bound = lower_bound * 10 + (chr - b'0') as u32;
408 if lower_bound >= 16384 {
409 return Err(PatError::ManyOverflow);
410 }
411 },
412 _ => return Err(PatError::ManyInvalid),
413 }
414 }
415 if !at_least_one_char {
416 return Err(PatError::ManyInvalid);
417 }
418 if lower_bound > 0 {
420 if lower_bound >= 256 {
421 result.push(Atom::Rangext((lower_bound >> 8) as u8));
422 }
423 result.push(Atom::Skip((lower_bound & 0xff) as u8));
424 }
425 if chr == b']' {
427 continue;
428 }
429 let mut upper_bound = 0u32;
431 loop {
432 chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
433 match chr {
434 b']' => break,
435 chr @ b'0'...b'9' => {
436 upper_bound = upper_bound * 10 + (chr - b'0') as u32;
437 if upper_bound >= 16384 {
438 return Err(PatError::ManyOverflow);
439 }
440 },
441 _ => return Err(PatError::ManyInvalid),
442 }
443 }
444 if lower_bound < upper_bound {
446 let many_skip = upper_bound - lower_bound;
447 if many_skip >= 256 {
448 result.push(Atom::Rangext((many_skip >> 8) as u8));
449 }
450 result.push(Atom::Many((many_skip & 0xff) as u8));
451 }
452 else {
453 return Err(PatError::ManyRange);
454 }
455 },
456 b'0'...b'9' | b'A'...b'F' | b'a'...b'f' => {
458 let hi = if chr >= b'a' { chr - b'a' + 10 }
460 else if chr >= b'A' { chr - b'A' + 10 }
461 else { chr - b'0' };
462 chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
463 let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
465 else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
466 else if chr >= b'0' && chr <= b'9' { chr - b'0' }
467 else { return Err(PatError::UnpairedHexDigit); };
468 result.push(Atom::Byte((hi << 4) + lo));
470 },
471 b'"' => {
473 loop {
474 if let Some(chr) = iter.next().cloned() {
475 if chr != b'"' {
476 result.push(Atom::Byte(chr));
477 }
478 else {
479 break;
480 }
481 }
482 else {
483 return Err(PatError::UnclosedQuote);
484 }
485 }
486 },
487 b'\'' => {
489 if save >= u8::max_value() {
491 return Err(PatError::SaveOverflow);
492 }
493 result.push(Atom::Save(save));
494 save += 1;
495 },
496 b'?' => {
498 if let Some(Atom::Skip(skip)) = result.last_mut() {
504 if *skip != PTR_SKIP && *skip < 255u8 {
505 *skip += 1;
506 continue;
507 }
508 }
509 result.push(Atom::Skip(1));
510 },
511 b'@' => {
512 let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
513 let atom = if op >= b'0' && op <= b'9' {
514 Atom::Aligned(op - b'0')
515 }
516 else if op >= b'A' && op <= b'Z' {
517 Atom::Aligned(10 + (op - b'A'))
518 }
519 else if op >= b'a' && op <= b'z' {
520 Atom::Aligned(10 + (op - b'a'))
521 }
522 else {
523 return Err(PatError::AlignedOperand);
524 };
525 result.push(atom);
526 },
527 b'i' => {
528 let atom = match iter.next().cloned() {
529 Some(b'1') => Atom::ReadI8(save),
530 Some(b'2') => Atom::ReadI16(save),
531 Some(b'4') => Atom::ReadI32(save),
532 _ => return Err(PatError::ReadOperand),
533 };
534 if save >= u8::max_value() {
535 return Err(PatError::SaveOverflow);
536 }
537 save += 1;
538 result.push(atom);
539 },
540 b'u' => {
541 let atom = match iter.next().cloned() {
542 Some(b'1') => Atom::ReadU8(save),
543 Some(b'2') => Atom::ReadU16(save),
544 Some(b'4') => Atom::ReadU32(save),
545 _ => return Err(PatError::ReadOperand),
546 };
547 if save >= u8::max_value() {
548 return Err(PatError::SaveOverflow);
549 }
550 save += 1;
551 result.push(atom);
552 },
553 b'z' => {
554 if save >= u8::max_value() {
555 return Err(PatError::SaveOverflow);
556 }
557 result.push(Atom::Zero(save));
558 save += 1;
559 },
560 b' ' | b'\n' | b'\r' | b'\t' => {},
562 _ => {
564 return Err(PatError::UnknownChar);
565 },
566 }
567 *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
569 }
570 if depth != 0 {
572 return Err(PatError::StackError);
573 }
574 if subs.len() != 0 {
576 return Err(PatError::SubPattern);
577 }
578
579 fn is_redundant(atom: &Atom) -> bool {
581 return match atom {
582 | Atom::Skip(_)
583 | Atom::Rangext(_)
584 | Atom::Pop
585 | Atom::Many(_) => true,
586 _ => false,
587 }
588 }
589 while result.last().map(is_redundant).unwrap_or(false) {
590 result.pop();
591 }
592
593 Ok(())
594}
595
596#[cfg(test)]
599mod tests {
600 use super::*;
601
602 const _: [(); 2] = [(); std::mem::size_of::<Atom>()];
603
604 #[test]
605 fn patterns() {
606 use self::Atom::*;
607
608 assert_eq!(parse("12 34 56 ? ?"), Ok(vec![Save(0), Byte(0x12), Byte(0x34), Byte(0x56)]));
609
610 assert_eq!(parse("B9'?? 68???? E8${'} 8B"), Ok(vec![
611 Save(0), Byte(0xB9), Save(1), Skip(2), Byte(0x68), Skip(4), Byte(0xE8), Push(4), Jump4, Save(2), Pop, Byte(0x8B)
612 ]));
613
614 assert_eq!(parse("${%{${%{}}}}"), Ok(vec![
615 Save(0), Push(4), Jump4, Push(1), Jump1, Push(4), Jump4, Push(1), Jump1
616 ]));
617
618 assert_eq!(parse("24 5A9e D0 AFBea3 fCdd"), Ok(vec![
619 Save(0), Byte(0x24), Byte(0x5A), Byte(0x9E), Byte(0xD0), Byte(0xAF), Byte(0xBE), Byte(0xA3), Byte(0xFC), Byte(0xDD)
620 ]));
621
622 assert_eq!(parse("\"string\""), Ok(vec![
623 Save(0), Byte(115), Byte(116), Byte(114), Byte(105), Byte(110), Byte(103)
624 ]));
625
626 assert_eq!(parse("*{FF D8 42}"), Ok(vec![
627 Save(0), Push(PTR_SKIP), Ptr, Byte(0xFF), Byte(0xD8), Byte(0x42)
628 ]));
629 assert_eq!(parse("*{\"hello\"00}"), Ok(vec![
630 Save(0), Push(PTR_SKIP), Ptr, Byte(104), Byte(101), Byte(108), Byte(108), Byte(111), Byte(0)
631 ]));
632
633 assert_eq!(parse("b8 [16] 50 [13-42] ff"), Ok(vec![
634 Save(0), Byte(0xb8), Skip(16), Byte(0x50), Skip(13), Many(29), Byte(0xff)
635 ]));
636
637 assert_eq!(parse("e9 $ @4"), Ok(vec![
638 Save(0), Byte(0xe9), Jump4, Aligned(4)
639 ]));
640
641 assert_eq!(parse("83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8"), Ok(vec![
642 Save(0), Byte(0x83), Byte(0xc0), Byte(0x2a),
643 Case(3), Byte(0x6a), Skip(1), Break(3),
644 Nop, Byte(0x68), Skip(4), Byte(0xe8),
645 ]));
646 }
647
648 #[test]
649 fn errors() {
650 use self::PatError::*;
651 assert_eq!(Err(ParsePatError { kind: StackError, position: 2 }), parse("${"));
652 assert_eq!(Err(ParsePatError { kind: StackError, position: 0 }), parse("}}"));
653 assert_eq!(Err(ParsePatError { kind: StackInvalid, position: 3 }), parse("AB {}"));
654 assert_eq!(Err(ParsePatError { kind: UnpairedHexDigit, position: 2 }), parse("123"));
655 assert_eq!(Err(ParsePatError { kind: UnpairedHexDigit, position: 3 }), parse("EE BZ"));
656 assert_eq!(Err(ParsePatError { kind: UnpairedHexDigit, position: 0 }), parse("A?"));
657 assert_eq!(Err(ParsePatError { kind: UnknownChar, position: 0 }), parse("é"));
658 assert_eq!(Err(ParsePatError { kind: AlignedOperand, position: 0 }), parse("@"));
659 assert_eq!(Err(ParsePatError { kind: UnclosedQuote, position: 0 }), parse("\"unbalanced"));
660 assert_eq!(Err(ParsePatError { kind: ManyInvalid, position: 0 }), parse("[-2]"));
661 assert_eq!(Err(ParsePatError { kind: ManyInvalid, position: 0 }), parse("[-]"));
662 assert_eq!(Err(ParsePatError { kind: ManyInvalid, position: 0 }), parse("[]"));
663 assert_eq!(Err(ParsePatError { kind: ManyRange, position: 0 }), parse("[0-]"));
664 assert_eq!(Err(ParsePatError { kind: ManyRange, position: 0 }), parse("[0-0]"));
665 assert_eq!(Err(ParsePatError { kind: ManyRange, position: 0 }), parse("[20-1]"));
666 assert_eq!(Err(ParsePatError { kind: ManyOverflow, position: 0 }), parse("[20000-40000]"));
667 }
668}