diff --git a/.gitignore b/.gitignore index 450c472..fb6d1e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /grep-0.1.7.crate /grep-0.1.8.crate +/grep-0.1.9.crate diff --git a/0001-grep-upgrade-to-regex-syntax-0.5.patch b/0001-grep-upgrade-to-regex-syntax-0.5.patch deleted file mode 100644 index 53652b6..0000000 --- a/0001-grep-upgrade-to-regex-syntax-0.5.patch +++ /dev/null @@ -1,460 +0,0 @@ -From cd08707c7c82058559bd5557efb3c1d0379dbf1d Mon Sep 17 00:00:00 2001 -From: Andrew Gallant -Date: Tue, 13 Mar 2018 20:38:50 -0400 -Subject: [PATCH 1/2] grep: upgrade to regex-syntax 0.5 - -This update brings with it many bug fixes: - - * Better error messages are printed overall. We also include - explicit call out for unsupported features like backreferences - and look-around. - * Regexes like `\s*{` no longer emit incomprehensible errors. - * Unicode escape sequences, such as `\u{..}` are now supported. - -For the most part, this upgrade was done in a straight-forward way. We -resist the urge to refactor the `grep` crate, in anticipation of it -being rewritten anyway. - -Note that we removed the `--fixed-strings` suggestion whenever a regex -syntax error occurs. In practice, I've found that it results in a lot of -false positives, and I believe that its use is not as paramount now that -regex parse errors are much more readable. - -Closes #268, Closes #395, Closes #702, Closes #853 ---- - src/literals.rs | 119 +++++++++++++++++++++++----------------------- - src/nonl.rs | 85 ++++++++++++++++++--------------- - src/search.rs | 19 ++++---- - src/word_boundary.rs | 31 ++++++------ - 4 files changed, 130 insertions(+), 124 deletions(-) - -diff --git a/src/literals.rs b/src/literals.rs -index eebeac4c7249..3e1c385bcda9 100644 ---- a/src/literals.rs -+++ b/src/literals.rs -@@ -10,10 +10,8 @@ principled. - use std::cmp; - - use regex::bytes::RegexBuilder; --use syntax::{ -- Expr, Literals, Lit, -- ByteClass, ByteRange, CharClass, ClassRange, Repeater, --}; -+use syntax::hir::{self, Hir, HirKind}; -+use syntax::hir::literal::{Literal, Literals}; - - #[derive(Clone, Debug)] - pub struct LiteralSets { -@@ -23,12 +21,12 @@ pub struct LiteralSets { - } - - impl LiteralSets { -- pub fn create(expr: &Expr) -> Self { -+ pub fn create(expr: &Hir) -> Self { - let mut required = Literals::empty(); - union_required(expr, &mut required); - LiteralSets { -- prefixes: expr.prefixes(), -- suffixes: expr.suffixes(), -+ prefixes: Literals::prefixes(expr), -+ suffixes: Literals::suffixes(expr), - required: required, - } - } -@@ -93,60 +91,52 @@ impl LiteralSets { - } - } - --fn union_required(expr: &Expr, lits: &mut Literals) { -- use syntax::Expr::*; -- match *expr { -- Literal { ref chars, casei: false } => { -- let s: String = chars.iter().cloned().collect(); -- lits.cross_add(s.as_bytes()); -+fn union_required(expr: &Hir, lits: &mut Literals) { -+ match *expr.kind() { -+ HirKind::Literal(hir::Literal::Unicode(c)) => { -+ let mut buf = [0u8; 4]; -+ lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); - } -- Literal { ref chars, casei: true } => { -- for &c in chars { -- let cls = CharClass::new(vec![ -- ClassRange { start: c, end: c }, -- ]).case_fold(); -- if !lits.add_char_class(&cls) { -- lits.cut(); -- return; -- } -+ HirKind::Literal(hir::Literal::Byte(b)) => { -+ lits.cross_add(&[b]); -+ } -+ HirKind::Class(hir::Class::Unicode(ref cls)) => { -+ if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) { -+ lits.cut(); -+ } -+ } -+ HirKind::Class(hir::Class::Bytes(ref cls)) => { -+ if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) { -+ lits.cut(); - } - } -- LiteralBytes { ref bytes, casei: false } => { -- lits.cross_add(bytes); -+ HirKind::Group(hir::Group { ref hir, .. }) => { -+ union_required(&**hir, lits); - } -- LiteralBytes { ref bytes, casei: true } => { -- for &b in bytes { -- let cls = ByteClass::new(vec![ -- ByteRange { start: b, end: b }, -- ]).case_fold(); -- if !lits.add_byte_class(&cls) { -+ HirKind::Repetition(ref x) => { -+ match x.kind { -+ hir::RepetitionKind::ZeroOrOne => lits.cut(), -+ hir::RepetitionKind::ZeroOrMore => lits.cut(), -+ hir::RepetitionKind::OneOrMore => { -+ union_required(&x.hir, lits); - lits.cut(); -- return; -+ } -+ hir::RepetitionKind::Range(ref rng) => { -+ let (min, max) = match *rng { -+ hir::RepetitionRange::Exactly(m) => (m, Some(m)), -+ hir::RepetitionRange::AtLeast(m) => (m, None), -+ hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), -+ }; -+ repeat_range_literals( -+ &x.hir, min, max, x.greedy, lits, union_required); - } - } - } -- Class(_) => { -- lits.cut(); -- } -- ClassBytes(_) => { -- lits.cut(); -+ HirKind::Concat(ref es) if es.is_empty() => {} -+ HirKind::Concat(ref es) if es.len() == 1 => { -+ union_required(&es[0], lits) - } -- Group { ref e, .. } => { -- union_required(&**e, lits); -- } -- Repeat { r: Repeater::ZeroOrOne, .. } => lits.cut(), -- Repeat { r: Repeater::ZeroOrMore, .. } => lits.cut(), -- Repeat { ref e, r: Repeater::OneOrMore, .. } => { -- union_required(&**e, lits); -- lits.cut(); -- } -- Repeat { ref e, r: Repeater::Range { min, max }, greedy } => { -- repeat_range_literals( -- &**e, min, max, greedy, lits, union_required); -- } -- Concat(ref es) if es.is_empty() => {} -- Concat(ref es) if es.len() == 1 => union_required(&es[0], lits), -- Concat(ref es) => { -+ HirKind::Concat(ref es) => { - for e in es { - let mut lits2 = lits.to_empty(); - union_required(e, &mut lits2); -@@ -157,7 +147,6 @@ fn union_required(expr: &Expr, lits: &mut Literals) { - if lits2.contains_empty() { - lits.cut(); - } -- // if !lits.union(lits2) { - if !lits.cross_product(&lits2) { - // If this expression couldn't yield any literal that - // could be extended, then we need to quit. Since we're -@@ -167,15 +156,15 @@ fn union_required(expr: &Expr, lits: &mut Literals) { - } - } - } -- Alternate(ref es) => { -+ HirKind::Alternation(ref es) => { - alternate_literals(es, lits, union_required); - } - _ => lits.cut(), - } - } - --fn repeat_range_literals( -- e: &Expr, -+fn repeat_range_literals( -+ e: &Hir, - min: u32, - max: Option, - _greedy: bool, -@@ -204,8 +193,8 @@ fn repeat_range_literals( - } - } - --fn alternate_literals( -- es: &[Expr], -+fn alternate_literals( -+ es: &[Hir], - lits: &mut Literals, - mut f: F, - ) { -@@ -234,11 +223,21 @@ fn alternate_literals( - } - lits.cut(); - if !lcs.is_empty() { -- lits.add(Lit::empty()); -- lits.add(Lit::new(lcs.to_vec())); -+ lits.add(Literal::empty()); -+ lits.add(Literal::new(lcs.to_vec())); - } - } - -+/// Return the number of characters in the given class. -+fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 { -+ cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() -+} -+ -+/// Return the number of bytes in the given class. -+fn count_byte_class(cls: &hir::ClassBytes) -> u32 { -+ cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() -+} -+ - /// Converts an arbitrary sequence of bytes to a literal suitable for building - /// a regular expression. - fn bytes_to_regex(bs: &[u8]) -> String { -diff --git a/src/nonl.rs b/src/nonl.rs -index 361b0b003eb7..3beb5f61ce57 100644 ---- a/src/nonl.rs -+++ b/src/nonl.rs -@@ -1,4 +1,4 @@ --use syntax::Expr; -+use syntax::hir::{self, Hir, HirKind}; - - use {Error, Result}; - -@@ -9,59 +9,66 @@ use {Error, Result}; - /// - /// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this - /// function panics. --pub fn remove(expr: Expr, byte: u8) -> Result { -- // TODO(burntsushi): There is a bug in this routine where only `\n` is -- // handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated -- // to proper character classes instead of the special `AnyCharNoNL` and -- // `AnyByteNoNL` classes. -- use syntax::Expr::*; -+pub fn remove(expr: Hir, byte: u8) -> Result { - assert!(byte <= 0x7F); - let chr = byte as char; - assert!(chr.len_utf8() == 1); - -- Ok(match expr { -- Literal { chars, casei } => { -- if chars.iter().position(|&c| c == chr).is_some() { -+ Ok(match expr.into_kind() { -+ HirKind::Empty => Hir::empty(), -+ HirKind::Literal(hir::Literal::Unicode(c)) => { -+ if c == chr { - return Err(Error::LiteralNotAllowed(chr)); - } -- Literal { chars: chars, casei: casei } -+ Hir::literal(hir::Literal::Unicode(c)) - } -- LiteralBytes { bytes, casei } => { -- if bytes.iter().position(|&b| b == byte).is_some() { -+ HirKind::Literal(hir::Literal::Byte(b)) => { -+ if b as char == chr { - return Err(Error::LiteralNotAllowed(chr)); - } -- LiteralBytes { bytes: bytes, casei: casei } -+ Hir::literal(hir::Literal::Byte(b)) - } -- AnyChar => AnyCharNoNL, -- AnyByte => AnyByteNoNL, -- Class(mut cls) => { -- cls.remove(chr); -- Class(cls) -- } -- ClassBytes(mut cls) => { -- cls.remove(byte); -- ClassBytes(cls) -- } -- Group { e, i, name } => { -- Group { -- e: Box::new(remove(*e, byte)?), -- i: i, -- name: name, -+ HirKind::Class(hir::Class::Unicode(mut cls)) => { -+ let remove = hir::ClassUnicode::new(Some( -+ hir::ClassUnicodeRange::new(chr, chr), -+ )); -+ cls.difference(&remove); -+ if cls.iter().next().is_none() { -+ return Err(Error::LiteralNotAllowed(chr)); - } -+ Hir::class(hir::Class::Unicode(cls)) - } -- Repeat { e, r, greedy } => { -- Repeat { -- e: Box::new(remove(*e, byte)?), -- r: r, -- greedy: greedy, -+ HirKind::Class(hir::Class::Bytes(mut cls)) => { -+ let remove = hir::ClassBytes::new(Some( -+ hir::ClassBytesRange::new(byte, byte), -+ )); -+ cls.difference(&remove); -+ if cls.iter().next().is_none() { -+ return Err(Error::LiteralNotAllowed(chr)); - } -+ Hir::class(hir::Class::Bytes(cls)) -+ } -+ HirKind::Anchor(x) => Hir::anchor(x), -+ HirKind::WordBoundary(x) => Hir::word_boundary(x), -+ HirKind::Repetition(mut x) => { -+ x.hir = Box::new(remove(*x.hir, byte)?); -+ Hir::repetition(x) -+ } -+ HirKind::Group(mut x) => { -+ x.hir = Box::new(remove(*x.hir, byte)?); -+ Hir::group(x) - } -- Concat(exprs) => { -- Concat(exprs.into_iter().map(|e| remove(e, byte)).collect::>>()?) -+ HirKind::Concat(xs) => { -+ let xs = xs.into_iter() -+ .map(|e| remove(e, byte)) -+ .collect::>>()?; -+ Hir::concat(xs) - } -- Alternate(exprs) => { -- Alternate(exprs.into_iter().map(|e| remove(e, byte)).collect::>>()?) -+ HirKind::Alternation(xs) => { -+ let xs = xs.into_iter() -+ .map(|e| remove(e, byte)) -+ .collect::>>()?; -+ Hir::alternation(xs) - } -- e => e, - }) - } -diff --git a/src/search.rs b/src/search.rs -index 8d056796ac14..1d5d7e29cccf 100644 ---- a/src/search.rs -+++ b/src/search.rs -@@ -1,10 +1,10 @@ - use memchr::{memchr, memrchr}; - use regex::bytes::{Regex, RegexBuilder}; --use syntax; - - use literals::LiteralSets; - use nonl; --use syntax::Expr; -+use syntax::ParserBuilder; -+use syntax::hir::Hir; - use word_boundary::strip_unicode_word_boundaries; - use Result; - -@@ -166,7 +166,7 @@ impl GrepBuilder { - - /// Creates a new regex from the given expression with the current - /// configuration. -- fn regex(&self, expr: &Expr) -> Result { -+ fn regex(&self, expr: &Hir) -> Result { - let mut builder = RegexBuilder::new(&expr.to_string()); - builder.unicode(true); - self.regex_build(builder) -@@ -184,15 +184,16 @@ impl GrepBuilder { - - /// Parses the underlying pattern and ensures the pattern can never match - /// the line terminator. -- fn parse(&self) -> Result { -- let expr = -- syntax::ExprBuilder::new() -- .allow_bytes(true) -- .unicode(true) -+ fn parse(&self) -> Result { -+ let expr = ParserBuilder::new() -+ .allow_invalid_utf8(true) - .case_insensitive(self.is_case_insensitive()?) -+ .multi_line(true) -+ .build() - .parse(&self.pattern)?; -+ debug!("original regex HIR pattern:\n{}", expr); - let expr = nonl::remove(expr, self.opts.line_terminator)?; -- debug!("regex ast:\n{:#?}", expr); -+ debug!("transformed regex HIR pattern:\n{}", expr); - Ok(expr) - } - -diff --git a/src/word_boundary.rs b/src/word_boundary.rs -index 6df5c6574933..8e6b86d12df8 100644 ---- a/src/word_boundary.rs -+++ b/src/word_boundary.rs -@@ -1,4 +1,4 @@ --use syntax::Expr; -+use syntax::hir::{self, Hir, HirKind}; - - /// Strips Unicode word boundaries from the given expression. - /// -@@ -8,7 +8,7 @@ use syntax::Expr; - /// false negatives. - /// - /// If no word boundaries could be stripped, then None is returned. --pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { -+pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option { - // The real reason we do this is because Unicode word boundaries are the - // one thing that Rust's regex DFA engine can't handle. When it sees a - // Unicode word boundary among non-ASCII text, it falls back to one of the -@@ -16,23 +16,24 @@ pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { - // a regex to find candidate matches without a Unicode word boundary. We'll - // only then use the full (and slower) regex to confirm a candidate as a - // match or not during search. -- use syntax::Expr::*; -- -- match *expr { -- Concat(ref es) if !es.is_empty() => { -+ // -+ // It looks like we only check the outer edges for `\b`? I guess this is -+ // an attempt to optimize for the `-w/--word-regexp` flag? ---AG -+ match *expr.kind() { -+ HirKind::Concat(ref es) if !es.is_empty() => { - let first = is_unicode_word_boundary(&es[0]); - let last = is_unicode_word_boundary(es.last().unwrap()); - // Be careful not to strip word boundaries if there are no other - // expressions to match. - match (first, last) { - (true, false) if es.len() > 1 => { -- Some(Concat(es[1..].to_vec())) -+ Some(Hir::concat(es[1..].to_vec())) - } - (false, true) if es.len() > 1 => { -- Some(Concat(es[..es.len() - 1].to_vec())) -+ Some(Hir::concat(es[..es.len() - 1].to_vec())) - } - (true, true) if es.len() > 2 => { -- Some(Concat(es[1..es.len() - 1].to_vec())) -+ Some(Hir::concat(es[1..es.len() - 1].to_vec())) - } - _ => None, - } -@@ -42,13 +43,11 @@ pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { - } - - /// Returns true if the given expression is a Unicode word boundary. --fn is_unicode_word_boundary(expr: &Expr) -> bool { -- use syntax::Expr::*; -- -- match *expr { -- WordBoundary => true, -- NotWordBoundary => true, -- Group { ref e, .. } => is_unicode_word_boundary(e), -+fn is_unicode_word_boundary(expr: &Hir) -> bool { -+ match *expr.kind() { -+ HirKind::WordBoundary(hir::WordBoundary::Unicode) => true, -+ HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true, -+ HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir), - _ => false, - } - } --- -2.14.3 - diff --git a/0002-grep-add-perfect-smart-case-detection.patch b/0002-grep-add-perfect-smart-case-detection.patch deleted file mode 100644 index 781a939..0000000 --- a/0002-grep-add-perfect-smart-case-detection.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 42b8132d0ad1918c1c0dc677015d87c12819fa26 Mon Sep 17 00:00:00 2001 -From: Andrew Gallant -Date: Tue, 13 Mar 2018 21:43:23 -0400 -Subject: [PATCH 2/2] grep: add "perfect" smart case detection - -This commit removes the previous smart case detection logic and replaces -it with detection based on the regex AST. This particular AST is a faithful -representation of the concrete syntax, which lets us be very precise in -how we handle it. - -Closes #851 ---- - src/lib.rs | 1 + - src/search.rs | 58 +++------------ - src/smart_case.rs | 191 +++++++++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 201 insertions(+), 49 deletions(-) - create mode 100644 src/smart_case.rs - -diff --git a/src/lib.rs b/src/lib.rs -index 3b2f0ebd65d5..023cd64ac36a 100644 ---- a/src/lib.rs -+++ b/src/lib.rs -@@ -19,6 +19,7 @@ pub use search::{Grep, GrepBuilder, Iter, Match}; - mod literals; - mod nonl; - mod search; -+mod smart_case; - mod word_boundary; - - /// Result is a convenient type alias that fixes the type of the error to -diff --git a/src/search.rs b/src/search.rs -index 1d5d7e29cccf..49ddf1f875c4 100644 ---- a/src/search.rs -+++ b/src/search.rs -@@ -1,10 +1,11 @@ - use memchr::{memchr, memrchr}; -+use syntax::ParserBuilder; -+use syntax::hir::Hir; - use regex::bytes::{Regex, RegexBuilder}; - - use literals::LiteralSets; - use nonl; --use syntax::ParserBuilder; --use syntax::hir::Hir; -+use smart_case::Cased; - use word_boundary::strip_unicode_word_boundaries; - use Result; - -@@ -205,7 +206,11 @@ impl GrepBuilder { - if !self.opts.case_smart { - return Ok(false); - } -- Ok(!has_uppercase_literal(&self.pattern)) -+ let cased = match Cased::from_pattern(&self.pattern) { -+ None => return Ok(false), -+ Some(cased) => cased, -+ }; -+ Ok(cased.any_literal && !cased.any_uppercase) - } - } - -@@ -311,44 +316,15 @@ impl<'b, 's> Iterator for Iter<'b, 's> { - } - } - --/// Determine whether the pattern contains an uppercase character which should --/// negate the effect of the smart-case option. --/// --/// Ideally we would be able to check the AST in order to correctly handle --/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly --/// cased), but we don't currently have that option. For now, our 'good enough' --/// solution is to simply perform a semi-naïve scan of the input pattern and --/// ignore all characters following a '\'. The ExprBuilder will handle any --/// actual errors, and this at least lets us support the most common cases, --/// like 'foo\w' and 'foo\S', in an intuitive manner. --fn has_uppercase_literal(pattern: &str) -> bool { -- let mut chars = pattern.chars(); -- while let Some(c) = chars.next() { -- if c == '\\' { -- chars.next(); -- } else if c.is_uppercase() { -- return true; -- } -- } -- false --} -- - #[cfg(test)] - mod tests { -- #![allow(unused_imports)] -- - use memchr::{memchr, memrchr}; - use regex::bytes::Regex; - -- use super::{GrepBuilder, Match, has_uppercase_literal}; -+ use super::{GrepBuilder, Match}; - - static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt"); - -- #[allow(dead_code)] -- fn s(bytes: &[u8]) -> String { -- String::from_utf8(bytes.to_vec()).unwrap() -- } -- - fn find_lines(pat: &str, haystack: &[u8]) -> Vec { - let re = Regex::new(pat).unwrap(); - let mut lines = vec![]; -@@ -377,20 +353,4 @@ mod tests { - assert_eq!(expected.len(), got.len()); - assert_eq!(expected, got); - } -- -- #[test] -- fn pattern_case() { -- assert_eq!(has_uppercase_literal(&"".to_string()), false); -- assert_eq!(has_uppercase_literal(&"foo".to_string()), false); -- assert_eq!(has_uppercase_literal(&"Foo".to_string()), true); -- assert_eq!(has_uppercase_literal(&"foO".to_string()), true); -- assert_eq!(has_uppercase_literal(&"foo\\\\".to_string()), false); -- assert_eq!(has_uppercase_literal(&"foo\\w".to_string()), false); -- assert_eq!(has_uppercase_literal(&"foo\\S".to_string()), false); -- assert_eq!(has_uppercase_literal(&"foo\\p{Ll}".to_string()), true); -- assert_eq!(has_uppercase_literal(&"foo[a-z]".to_string()), false); -- assert_eq!(has_uppercase_literal(&"foo[A-Z]".to_string()), true); -- assert_eq!(has_uppercase_literal(&"foo[\\S\\t]".to_string()), false); -- assert_eq!(has_uppercase_literal(&"foo\\\\S".to_string()), true); -- } - } -diff --git a/src/smart_case.rs b/src/smart_case.rs -new file mode 100644 -index 000000000000..1379b32620bc ---- /dev/null -+++ b/src/smart_case.rs -@@ -0,0 +1,191 @@ -+use syntax::ast::{self, Ast}; -+use syntax::ast::parse::Parser; -+ -+/// The results of analyzing a regex for cased literals. -+#[derive(Clone, Debug, Default)] -+pub struct Cased { -+ /// True if and only if a literal uppercase character occurs in the regex. -+ /// -+ /// A regex like `\pL` contains no uppercase literals, even though `L` -+ /// is uppercase and the `\pL` class contains uppercase characters. -+ pub any_uppercase: bool, -+ /// True if and only if the regex contains any literal at all. A regex like -+ /// `\pL` has this set to false. -+ pub any_literal: bool, -+} -+ -+impl Cased { -+ /// Returns a `Cased` value by doing analysis on the AST of `pattern`. -+ /// -+ /// If `pattern` is not a valid regular expression, then `None` is -+ /// returned. -+ pub fn from_pattern(pattern: &str) -> Option { -+ Parser::new() -+ .parse(pattern) -+ .map(|ast| Cased::from_ast(&ast)) -+ .ok() -+ } -+ -+ fn from_ast(ast: &Ast) -> Cased { -+ let mut cased = Cased::default(); -+ cased.from_ast_impl(ast); -+ cased -+ } -+ -+ fn from_ast_impl(&mut self, ast: &Ast) { -+ if self.done() { -+ return; -+ } -+ match *ast { -+ Ast::Empty(_) -+ | Ast::Flags(_) -+ | Ast::Dot(_) -+ | Ast::Assertion(_) -+ | Ast::Class(ast::Class::Unicode(_)) -+ | Ast::Class(ast::Class::Perl(_)) => {} -+ Ast::Literal(ref x) => { -+ self.from_ast_literal(x); -+ } -+ Ast::Class(ast::Class::Bracketed(ref x)) => { -+ self.from_ast_class_set(&x.kind); -+ } -+ Ast::Repetition(ref x) => { -+ self.from_ast_impl(&x.ast); -+ } -+ Ast::Group(ref x) => { -+ self.from_ast_impl(&x.ast); -+ } -+ Ast::Alternation(ref alt) => { -+ for x in &alt.asts { -+ self.from_ast_impl(x); -+ } -+ } -+ Ast::Concat(ref alt) => { -+ for x in &alt.asts { -+ self.from_ast_impl(x); -+ } -+ } -+ } -+ } -+ -+ fn from_ast_class_set(&mut self, ast: &ast::ClassSet) { -+ if self.done() { -+ return; -+ } -+ match *ast { -+ ast::ClassSet::Item(ref item) => { -+ self.from_ast_class_set_item(item); -+ } -+ ast::ClassSet::BinaryOp(ref x) => { -+ self.from_ast_class_set(&x.lhs); -+ self.from_ast_class_set(&x.rhs); -+ } -+ } -+ } -+ -+ fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) { -+ if self.done() { -+ return; -+ } -+ match *ast { -+ ast::ClassSetItem::Empty(_) -+ | ast::ClassSetItem::Ascii(_) -+ | ast::ClassSetItem::Unicode(_) -+ | ast::ClassSetItem::Perl(_) => {} -+ ast::ClassSetItem::Literal(ref x) => { -+ self.from_ast_literal(x); -+ } -+ ast::ClassSetItem::Range(ref x) => { -+ self.from_ast_literal(&x.start); -+ self.from_ast_literal(&x.end); -+ } -+ ast::ClassSetItem::Bracketed(ref x) => { -+ self.from_ast_class_set(&x.kind); -+ } -+ ast::ClassSetItem::Union(ref union) => { -+ for x in &union.items { -+ self.from_ast_class_set_item(x); -+ } -+ } -+ } -+ } -+ -+ fn from_ast_literal(&mut self, ast: &ast::Literal) { -+ self.any_literal = true; -+ self.any_uppercase = self.any_uppercase || ast.c.is_uppercase(); -+ } -+ -+ /// Returns true if and only if the attributes can never change no matter -+ /// what other AST it might see. -+ fn done(&self) -> bool { -+ self.any_uppercase && self.any_literal -+ } -+} -+ -+#[cfg(test)] -+mod tests { -+ use super::*; -+ -+ fn cased(pattern: &str) -> Cased { -+ Cased::from_pattern(pattern).unwrap() -+ } -+ -+ #[test] -+ fn various() { -+ let x = cased(""); -+ assert!(!x.any_uppercase); -+ assert!(!x.any_literal); -+ -+ let x = cased("foo"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased("Foo"); -+ assert!(x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased("foO"); -+ assert!(x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo\\"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo\w"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo\S"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo\p{Ll}"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo[a-z]"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo[A-Z]"); -+ assert!(x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo[\S\t]"); -+ assert!(!x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"foo\\S"); -+ assert!(x.any_uppercase); -+ assert!(x.any_literal); -+ -+ let x = cased(r"\p{Ll}"); -+ assert!(!x.any_uppercase); -+ assert!(!x.any_literal); -+ -+ let x = cased(r"aBc\w"); -+ assert!(x.any_uppercase); -+ assert!(x.any_literal); -+ } -+} --- -2.14.3 - diff --git a/grep-0.1.8-fix-metadata.diff b/grep-0.1.8-fix-metadata.diff deleted file mode 100644 index a431127..0000000 --- a/grep-0.1.8-fix-metadata.diff +++ /dev/null @@ -1,12 +0,0 @@ ---- grep-0.1.8/Cargo.toml 1970-01-01T01:00:00+01:00 -+++ grep-0.1.8/Cargo.toml 2018-06-12T09:25:27.056693+02:00 -@@ -28,7 +28,7 @@ - version = "2" - - [dependencies.regex] --version = "0.2.1" -+version = "1" - - [dependencies.regex-syntax] --version = "0.4.0" -+version = "0.6" diff --git a/rust-grep.spec b/rust-grep.spec index 4740511..c220706 100644 --- a/rust-grep.spec +++ b/rust-grep.spec @@ -5,19 +5,13 @@ %global crate grep Name: rust-%{crate} -Version: 0.1.8 -Release: 4%{?dist} +Version: 0.1.9 +Release: 1%{?dist} Summary: Fast line oriented regex searching as a library License: Unlicense or MIT URL: https://crates.io/crates/grep Source0: https://crates.io/api/v1/crates/%{crate}/%{version}/download#/%{crate}-%{version}.crate -# Initial patched metadata -# * bump regex and regex-syntax -Patch0: grep-0.1.8-fix-metadata.diff -# https://github.com/BurntSushi/ripgrep/pull/858/ -Patch1: 0001-grep-upgrade-to-regex-syntax-0.5.patch -Patch2: 0002-grep-add-perfect-smart-case-detection.patch ExclusiveArch: %{rust_arches} @@ -62,6 +56,9 @@ which use %{crate} from crates.io. %{cargo_registry}/%{crate}-%{version}/ %changelog +* Sat Aug 04 2018 Josh Stone - 0.1.9-1 +- Update to 0.1.9 + * Sat Jul 14 2018 Fedora Release Engineering - 0.1.8-4 - Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild diff --git a/sources b/sources index 1e381d4..e7acb5a 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (grep-0.1.8.crate) = d0a4e9dd51402ed8036aa5d2cfcdb184538d2bf0c81baac75c73e80d05945a63cb08a931d87fa526695af93271cecaed6650a045904ceaa05ed6e06fd23e537b +SHA512 (grep-0.1.9.crate) = 1ce3c1c1faf2d001bdafc6e021ec3523e08cfaec611bbaf70492cf0038578c2e738ead0df826229fbc0f5e54ff3f9f9cf928be9223bb53907f12b6f468545ef3