From cb2ed4f60bf3068a87675874aabded90cb097af0 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 13 Mar 2018 22:45:27 -0700 Subject: [PATCH] bump regex and regex-syntax --- 0001-grep-upgrade-to-regex-syntax-0.5.patch | 460 ++++++++++++++++++ ...rep-add-perfect-smart-case-detection.patch | 328 +++++++++++++ grep-0.1.8-fix-metadata.diff | 12 + rust-grep.spec | 15 +- 4 files changed, 812 insertions(+), 3 deletions(-) create mode 100644 0001-grep-upgrade-to-regex-syntax-0.5.patch create mode 100644 0002-grep-add-perfect-smart-case-detection.patch create mode 100644 grep-0.1.8-fix-metadata.diff diff --git a/0001-grep-upgrade-to-regex-syntax-0.5.patch b/0001-grep-upgrade-to-regex-syntax-0.5.patch new file mode 100644 index 0000000..53652b6 --- /dev/null +++ b/0001-grep-upgrade-to-regex-syntax-0.5.patch @@ -0,0 +1,460 @@ +From cd08707c7c82058559bd5557efb3c1d0379dbf1d Mon Sep 17 00:00:00 2001 +From: Andrew Gallant +Date: Tue, 13 Mar 2018 20:38:50 -0400 +Subject: [PATCH 1/2] grep: upgrade to regex-syntax 0.5 + +This update brings with it many bug fixes: + + * Better error messages are printed overall. We also include + explicit call out for unsupported features like backreferences + and look-around. + * Regexes like `\s*{` no longer emit incomprehensible errors. + * Unicode escape sequences, such as `\u{..}` are now supported. + +For the most part, this upgrade was done in a straight-forward way. We +resist the urge to refactor the `grep` crate, in anticipation of it +being rewritten anyway. + +Note that we removed the `--fixed-strings` suggestion whenever a regex +syntax error occurs. In practice, I've found that it results in a lot of +false positives, and I believe that its use is not as paramount now that +regex parse errors are much more readable. + +Closes #268, Closes #395, Closes #702, Closes #853 +--- + src/literals.rs | 119 +++++++++++++++++++++++----------------------- + src/nonl.rs | 85 ++++++++++++++++++--------------- + src/search.rs | 19 ++++---- + src/word_boundary.rs | 31 ++++++------ + 4 files changed, 130 insertions(+), 124 deletions(-) + +diff --git a/src/literals.rs b/src/literals.rs +index eebeac4c7249..3e1c385bcda9 100644 +--- a/src/literals.rs ++++ b/src/literals.rs +@@ -10,10 +10,8 @@ principled. + use std::cmp; + + use regex::bytes::RegexBuilder; +-use syntax::{ +- Expr, Literals, Lit, +- ByteClass, ByteRange, CharClass, ClassRange, Repeater, +-}; ++use syntax::hir::{self, Hir, HirKind}; ++use syntax::hir::literal::{Literal, Literals}; + + #[derive(Clone, Debug)] + pub struct LiteralSets { +@@ -23,12 +21,12 @@ pub struct LiteralSets { + } + + impl LiteralSets { +- pub fn create(expr: &Expr) -> Self { ++ pub fn create(expr: &Hir) -> Self { + let mut required = Literals::empty(); + union_required(expr, &mut required); + LiteralSets { +- prefixes: expr.prefixes(), +- suffixes: expr.suffixes(), ++ prefixes: Literals::prefixes(expr), ++ suffixes: Literals::suffixes(expr), + required: required, + } + } +@@ -93,60 +91,52 @@ impl LiteralSets { + } + } + +-fn union_required(expr: &Expr, lits: &mut Literals) { +- use syntax::Expr::*; +- match *expr { +- Literal { ref chars, casei: false } => { +- let s: String = chars.iter().cloned().collect(); +- lits.cross_add(s.as_bytes()); ++fn union_required(expr: &Hir, lits: &mut Literals) { ++ match *expr.kind() { ++ HirKind::Literal(hir::Literal::Unicode(c)) => { ++ let mut buf = [0u8; 4]; ++ lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); + } +- Literal { ref chars, casei: true } => { +- for &c in chars { +- let cls = CharClass::new(vec![ +- ClassRange { start: c, end: c }, +- ]).case_fold(); +- if !lits.add_char_class(&cls) { +- lits.cut(); +- return; +- } ++ HirKind::Literal(hir::Literal::Byte(b)) => { ++ lits.cross_add(&[b]); ++ } ++ HirKind::Class(hir::Class::Unicode(ref cls)) => { ++ if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) { ++ lits.cut(); ++ } ++ } ++ HirKind::Class(hir::Class::Bytes(ref cls)) => { ++ if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) { ++ lits.cut(); + } + } +- LiteralBytes { ref bytes, casei: false } => { +- lits.cross_add(bytes); ++ HirKind::Group(hir::Group { ref hir, .. }) => { ++ union_required(&**hir, lits); + } +- LiteralBytes { ref bytes, casei: true } => { +- for &b in bytes { +- let cls = ByteClass::new(vec![ +- ByteRange { start: b, end: b }, +- ]).case_fold(); +- if !lits.add_byte_class(&cls) { ++ HirKind::Repetition(ref x) => { ++ match x.kind { ++ hir::RepetitionKind::ZeroOrOne => lits.cut(), ++ hir::RepetitionKind::ZeroOrMore => lits.cut(), ++ hir::RepetitionKind::OneOrMore => { ++ union_required(&x.hir, lits); + lits.cut(); +- return; ++ } ++ hir::RepetitionKind::Range(ref rng) => { ++ let (min, max) = match *rng { ++ hir::RepetitionRange::Exactly(m) => (m, Some(m)), ++ hir::RepetitionRange::AtLeast(m) => (m, None), ++ hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), ++ }; ++ repeat_range_literals( ++ &x.hir, min, max, x.greedy, lits, union_required); + } + } + } +- Class(_) => { +- lits.cut(); +- } +- ClassBytes(_) => { +- lits.cut(); ++ HirKind::Concat(ref es) if es.is_empty() => {} ++ HirKind::Concat(ref es) if es.len() == 1 => { ++ union_required(&es[0], lits) + } +- Group { ref e, .. } => { +- union_required(&**e, lits); +- } +- Repeat { r: Repeater::ZeroOrOne, .. } => lits.cut(), +- Repeat { r: Repeater::ZeroOrMore, .. } => lits.cut(), +- Repeat { ref e, r: Repeater::OneOrMore, .. } => { +- union_required(&**e, lits); +- lits.cut(); +- } +- Repeat { ref e, r: Repeater::Range { min, max }, greedy } => { +- repeat_range_literals( +- &**e, min, max, greedy, lits, union_required); +- } +- Concat(ref es) if es.is_empty() => {} +- Concat(ref es) if es.len() == 1 => union_required(&es[0], lits), +- Concat(ref es) => { ++ HirKind::Concat(ref es) => { + for e in es { + let mut lits2 = lits.to_empty(); + union_required(e, &mut lits2); +@@ -157,7 +147,6 @@ fn union_required(expr: &Expr, lits: &mut Literals) { + if lits2.contains_empty() { + lits.cut(); + } +- // if !lits.union(lits2) { + if !lits.cross_product(&lits2) { + // If this expression couldn't yield any literal that + // could be extended, then we need to quit. Since we're +@@ -167,15 +156,15 @@ fn union_required(expr: &Expr, lits: &mut Literals) { + } + } + } +- Alternate(ref es) => { ++ HirKind::Alternation(ref es) => { + alternate_literals(es, lits, union_required); + } + _ => lits.cut(), + } + } + +-fn repeat_range_literals( +- e: &Expr, ++fn repeat_range_literals( ++ e: &Hir, + min: u32, + max: Option, + _greedy: bool, +@@ -204,8 +193,8 @@ fn repeat_range_literals( + } + } + +-fn alternate_literals( +- es: &[Expr], ++fn alternate_literals( ++ es: &[Hir], + lits: &mut Literals, + mut f: F, + ) { +@@ -234,11 +223,21 @@ fn alternate_literals( + } + lits.cut(); + if !lcs.is_empty() { +- lits.add(Lit::empty()); +- lits.add(Lit::new(lcs.to_vec())); ++ lits.add(Literal::empty()); ++ lits.add(Literal::new(lcs.to_vec())); + } + } + ++/// Return the number of characters in the given class. ++fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 { ++ cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() ++} ++ ++/// Return the number of bytes in the given class. ++fn count_byte_class(cls: &hir::ClassBytes) -> u32 { ++ cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() ++} ++ + /// Converts an arbitrary sequence of bytes to a literal suitable for building + /// a regular expression. + fn bytes_to_regex(bs: &[u8]) -> String { +diff --git a/src/nonl.rs b/src/nonl.rs +index 361b0b003eb7..3beb5f61ce57 100644 +--- a/src/nonl.rs ++++ b/src/nonl.rs +@@ -1,4 +1,4 @@ +-use syntax::Expr; ++use syntax::hir::{self, Hir, HirKind}; + + use {Error, Result}; + +@@ -9,59 +9,66 @@ use {Error, Result}; + /// + /// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this + /// function panics. +-pub fn remove(expr: Expr, byte: u8) -> Result { +- // TODO(burntsushi): There is a bug in this routine where only `\n` is +- // handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated +- // to proper character classes instead of the special `AnyCharNoNL` and +- // `AnyByteNoNL` classes. +- use syntax::Expr::*; ++pub fn remove(expr: Hir, byte: u8) -> Result { + assert!(byte <= 0x7F); + let chr = byte as char; + assert!(chr.len_utf8() == 1); + +- Ok(match expr { +- Literal { chars, casei } => { +- if chars.iter().position(|&c| c == chr).is_some() { ++ Ok(match expr.into_kind() { ++ HirKind::Empty => Hir::empty(), ++ HirKind::Literal(hir::Literal::Unicode(c)) => { ++ if c == chr { + return Err(Error::LiteralNotAllowed(chr)); + } +- Literal { chars: chars, casei: casei } ++ Hir::literal(hir::Literal::Unicode(c)) + } +- LiteralBytes { bytes, casei } => { +- if bytes.iter().position(|&b| b == byte).is_some() { ++ HirKind::Literal(hir::Literal::Byte(b)) => { ++ if b as char == chr { + return Err(Error::LiteralNotAllowed(chr)); + } +- LiteralBytes { bytes: bytes, casei: casei } ++ Hir::literal(hir::Literal::Byte(b)) + } +- AnyChar => AnyCharNoNL, +- AnyByte => AnyByteNoNL, +- Class(mut cls) => { +- cls.remove(chr); +- Class(cls) +- } +- ClassBytes(mut cls) => { +- cls.remove(byte); +- ClassBytes(cls) +- } +- Group { e, i, name } => { +- Group { +- e: Box::new(remove(*e, byte)?), +- i: i, +- name: name, ++ HirKind::Class(hir::Class::Unicode(mut cls)) => { ++ let remove = hir::ClassUnicode::new(Some( ++ hir::ClassUnicodeRange::new(chr, chr), ++ )); ++ cls.difference(&remove); ++ if cls.iter().next().is_none() { ++ return Err(Error::LiteralNotAllowed(chr)); + } ++ Hir::class(hir::Class::Unicode(cls)) + } +- Repeat { e, r, greedy } => { +- Repeat { +- e: Box::new(remove(*e, byte)?), +- r: r, +- greedy: greedy, ++ HirKind::Class(hir::Class::Bytes(mut cls)) => { ++ let remove = hir::ClassBytes::new(Some( ++ hir::ClassBytesRange::new(byte, byte), ++ )); ++ cls.difference(&remove); ++ if cls.iter().next().is_none() { ++ return Err(Error::LiteralNotAllowed(chr)); + } ++ Hir::class(hir::Class::Bytes(cls)) ++ } ++ HirKind::Anchor(x) => Hir::anchor(x), ++ HirKind::WordBoundary(x) => Hir::word_boundary(x), ++ HirKind::Repetition(mut x) => { ++ x.hir = Box::new(remove(*x.hir, byte)?); ++ Hir::repetition(x) ++ } ++ HirKind::Group(mut x) => { ++ x.hir = Box::new(remove(*x.hir, byte)?); ++ Hir::group(x) + } +- Concat(exprs) => { +- Concat(exprs.into_iter().map(|e| remove(e, byte)).collect::>>()?) ++ HirKind::Concat(xs) => { ++ let xs = xs.into_iter() ++ .map(|e| remove(e, byte)) ++ .collect::>>()?; ++ Hir::concat(xs) + } +- Alternate(exprs) => { +- Alternate(exprs.into_iter().map(|e| remove(e, byte)).collect::>>()?) ++ HirKind::Alternation(xs) => { ++ let xs = xs.into_iter() ++ .map(|e| remove(e, byte)) ++ .collect::>>()?; ++ Hir::alternation(xs) + } +- e => e, + }) + } +diff --git a/src/search.rs b/src/search.rs +index 8d056796ac14..1d5d7e29cccf 100644 +--- a/src/search.rs ++++ b/src/search.rs +@@ -1,10 +1,10 @@ + use memchr::{memchr, memrchr}; + use regex::bytes::{Regex, RegexBuilder}; +-use syntax; + + use literals::LiteralSets; + use nonl; +-use syntax::Expr; ++use syntax::ParserBuilder; ++use syntax::hir::Hir; + use word_boundary::strip_unicode_word_boundaries; + use Result; + +@@ -166,7 +166,7 @@ impl GrepBuilder { + + /// Creates a new regex from the given expression with the current + /// configuration. +- fn regex(&self, expr: &Expr) -> Result { ++ fn regex(&self, expr: &Hir) -> Result { + let mut builder = RegexBuilder::new(&expr.to_string()); + builder.unicode(true); + self.regex_build(builder) +@@ -184,15 +184,16 @@ impl GrepBuilder { + + /// Parses the underlying pattern and ensures the pattern can never match + /// the line terminator. +- fn parse(&self) -> Result { +- let expr = +- syntax::ExprBuilder::new() +- .allow_bytes(true) +- .unicode(true) ++ fn parse(&self) -> Result { ++ let expr = ParserBuilder::new() ++ .allow_invalid_utf8(true) + .case_insensitive(self.is_case_insensitive()?) ++ .multi_line(true) ++ .build() + .parse(&self.pattern)?; ++ debug!("original regex HIR pattern:\n{}", expr); + let expr = nonl::remove(expr, self.opts.line_terminator)?; +- debug!("regex ast:\n{:#?}", expr); ++ debug!("transformed regex HIR pattern:\n{}", expr); + Ok(expr) + } + +diff --git a/src/word_boundary.rs b/src/word_boundary.rs +index 6df5c6574933..8e6b86d12df8 100644 +--- a/src/word_boundary.rs ++++ b/src/word_boundary.rs +@@ -1,4 +1,4 @@ +-use syntax::Expr; ++use syntax::hir::{self, Hir, HirKind}; + + /// Strips Unicode word boundaries from the given expression. + /// +@@ -8,7 +8,7 @@ use syntax::Expr; + /// false negatives. + /// + /// If no word boundaries could be stripped, then None is returned. +-pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { ++pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option { + // The real reason we do this is because Unicode word boundaries are the + // one thing that Rust's regex DFA engine can't handle. When it sees a + // Unicode word boundary among non-ASCII text, it falls back to one of the +@@ -16,23 +16,24 @@ pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { + // a regex to find candidate matches without a Unicode word boundary. We'll + // only then use the full (and slower) regex to confirm a candidate as a + // match or not during search. +- use syntax::Expr::*; +- +- match *expr { +- Concat(ref es) if !es.is_empty() => { ++ // ++ // It looks like we only check the outer edges for `\b`? I guess this is ++ // an attempt to optimize for the `-w/--word-regexp` flag? ---AG ++ match *expr.kind() { ++ HirKind::Concat(ref es) if !es.is_empty() => { + let first = is_unicode_word_boundary(&es[0]); + let last = is_unicode_word_boundary(es.last().unwrap()); + // Be careful not to strip word boundaries if there are no other + // expressions to match. + match (first, last) { + (true, false) if es.len() > 1 => { +- Some(Concat(es[1..].to_vec())) ++ Some(Hir::concat(es[1..].to_vec())) + } + (false, true) if es.len() > 1 => { +- Some(Concat(es[..es.len() - 1].to_vec())) ++ Some(Hir::concat(es[..es.len() - 1].to_vec())) + } + (true, true) if es.len() > 2 => { +- Some(Concat(es[1..es.len() - 1].to_vec())) ++ Some(Hir::concat(es[1..es.len() - 1].to_vec())) + } + _ => None, + } +@@ -42,13 +43,11 @@ pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option { + } + + /// Returns true if the given expression is a Unicode word boundary. +-fn is_unicode_word_boundary(expr: &Expr) -> bool { +- use syntax::Expr::*; +- +- match *expr { +- WordBoundary => true, +- NotWordBoundary => true, +- Group { ref e, .. } => is_unicode_word_boundary(e), ++fn is_unicode_word_boundary(expr: &Hir) -> bool { ++ match *expr.kind() { ++ HirKind::WordBoundary(hir::WordBoundary::Unicode) => true, ++ HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true, ++ HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir), + _ => false, + } + } +-- +2.14.3 + diff --git a/0002-grep-add-perfect-smart-case-detection.patch b/0002-grep-add-perfect-smart-case-detection.patch new file mode 100644 index 0000000..781a939 --- /dev/null +++ b/0002-grep-add-perfect-smart-case-detection.patch @@ -0,0 +1,328 @@ +From 42b8132d0ad1918c1c0dc677015d87c12819fa26 Mon Sep 17 00:00:00 2001 +From: Andrew Gallant +Date: Tue, 13 Mar 2018 21:43:23 -0400 +Subject: [PATCH 2/2] grep: add "perfect" smart case detection + +This commit removes the previous smart case detection logic and replaces +it with detection based on the regex AST. This particular AST is a faithful +representation of the concrete syntax, which lets us be very precise in +how we handle it. + +Closes #851 +--- + src/lib.rs | 1 + + src/search.rs | 58 +++------------ + src/smart_case.rs | 191 +++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 201 insertions(+), 49 deletions(-) + create mode 100644 src/smart_case.rs + +diff --git a/src/lib.rs b/src/lib.rs +index 3b2f0ebd65d5..023cd64ac36a 100644 +--- a/src/lib.rs ++++ b/src/lib.rs +@@ -19,6 +19,7 @@ pub use search::{Grep, GrepBuilder, Iter, Match}; + mod literals; + mod nonl; + mod search; ++mod smart_case; + mod word_boundary; + + /// Result is a convenient type alias that fixes the type of the error to +diff --git a/src/search.rs b/src/search.rs +index 1d5d7e29cccf..49ddf1f875c4 100644 +--- a/src/search.rs ++++ b/src/search.rs +@@ -1,10 +1,11 @@ + use memchr::{memchr, memrchr}; ++use syntax::ParserBuilder; ++use syntax::hir::Hir; + use regex::bytes::{Regex, RegexBuilder}; + + use literals::LiteralSets; + use nonl; +-use syntax::ParserBuilder; +-use syntax::hir::Hir; ++use smart_case::Cased; + use word_boundary::strip_unicode_word_boundaries; + use Result; + +@@ -205,7 +206,11 @@ impl GrepBuilder { + if !self.opts.case_smart { + return Ok(false); + } +- Ok(!has_uppercase_literal(&self.pattern)) ++ let cased = match Cased::from_pattern(&self.pattern) { ++ None => return Ok(false), ++ Some(cased) => cased, ++ }; ++ Ok(cased.any_literal && !cased.any_uppercase) + } + } + +@@ -311,44 +316,15 @@ impl<'b, 's> Iterator for Iter<'b, 's> { + } + } + +-/// Determine whether the pattern contains an uppercase character which should +-/// negate the effect of the smart-case option. +-/// +-/// Ideally we would be able to check the AST in order to correctly handle +-/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly +-/// cased), but we don't currently have that option. For now, our 'good enough' +-/// solution is to simply perform a semi-naïve scan of the input pattern and +-/// ignore all characters following a '\'. The ExprBuilder will handle any +-/// actual errors, and this at least lets us support the most common cases, +-/// like 'foo\w' and 'foo\S', in an intuitive manner. +-fn has_uppercase_literal(pattern: &str) -> bool { +- let mut chars = pattern.chars(); +- while let Some(c) = chars.next() { +- if c == '\\' { +- chars.next(); +- } else if c.is_uppercase() { +- return true; +- } +- } +- false +-} +- + #[cfg(test)] + mod tests { +- #![allow(unused_imports)] +- + use memchr::{memchr, memrchr}; + use regex::bytes::Regex; + +- use super::{GrepBuilder, Match, has_uppercase_literal}; ++ use super::{GrepBuilder, Match}; + + static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt"); + +- #[allow(dead_code)] +- fn s(bytes: &[u8]) -> String { +- String::from_utf8(bytes.to_vec()).unwrap() +- } +- + fn find_lines(pat: &str, haystack: &[u8]) -> Vec { + let re = Regex::new(pat).unwrap(); + let mut lines = vec![]; +@@ -377,20 +353,4 @@ mod tests { + assert_eq!(expected.len(), got.len()); + assert_eq!(expected, got); + } +- +- #[test] +- fn pattern_case() { +- assert_eq!(has_uppercase_literal(&"".to_string()), false); +- assert_eq!(has_uppercase_literal(&"foo".to_string()), false); +- assert_eq!(has_uppercase_literal(&"Foo".to_string()), true); +- assert_eq!(has_uppercase_literal(&"foO".to_string()), true); +- assert_eq!(has_uppercase_literal(&"foo\\\\".to_string()), false); +- assert_eq!(has_uppercase_literal(&"foo\\w".to_string()), false); +- assert_eq!(has_uppercase_literal(&"foo\\S".to_string()), false); +- assert_eq!(has_uppercase_literal(&"foo\\p{Ll}".to_string()), true); +- assert_eq!(has_uppercase_literal(&"foo[a-z]".to_string()), false); +- assert_eq!(has_uppercase_literal(&"foo[A-Z]".to_string()), true); +- assert_eq!(has_uppercase_literal(&"foo[\\S\\t]".to_string()), false); +- assert_eq!(has_uppercase_literal(&"foo\\\\S".to_string()), true); +- } + } +diff --git a/src/smart_case.rs b/src/smart_case.rs +new file mode 100644 +index 000000000000..1379b32620bc +--- /dev/null ++++ b/src/smart_case.rs +@@ -0,0 +1,191 @@ ++use syntax::ast::{self, Ast}; ++use syntax::ast::parse::Parser; ++ ++/// The results of analyzing a regex for cased literals. ++#[derive(Clone, Debug, Default)] ++pub struct Cased { ++ /// True if and only if a literal uppercase character occurs in the regex. ++ /// ++ /// A regex like `\pL` contains no uppercase literals, even though `L` ++ /// is uppercase and the `\pL` class contains uppercase characters. ++ pub any_uppercase: bool, ++ /// True if and only if the regex contains any literal at all. A regex like ++ /// `\pL` has this set to false. ++ pub any_literal: bool, ++} ++ ++impl Cased { ++ /// Returns a `Cased` value by doing analysis on the AST of `pattern`. ++ /// ++ /// If `pattern` is not a valid regular expression, then `None` is ++ /// returned. ++ pub fn from_pattern(pattern: &str) -> Option { ++ Parser::new() ++ .parse(pattern) ++ .map(|ast| Cased::from_ast(&ast)) ++ .ok() ++ } ++ ++ fn from_ast(ast: &Ast) -> Cased { ++ let mut cased = Cased::default(); ++ cased.from_ast_impl(ast); ++ cased ++ } ++ ++ fn from_ast_impl(&mut self, ast: &Ast) { ++ if self.done() { ++ return; ++ } ++ match *ast { ++ Ast::Empty(_) ++ | Ast::Flags(_) ++ | Ast::Dot(_) ++ | Ast::Assertion(_) ++ | Ast::Class(ast::Class::Unicode(_)) ++ | Ast::Class(ast::Class::Perl(_)) => {} ++ Ast::Literal(ref x) => { ++ self.from_ast_literal(x); ++ } ++ Ast::Class(ast::Class::Bracketed(ref x)) => { ++ self.from_ast_class_set(&x.kind); ++ } ++ Ast::Repetition(ref x) => { ++ self.from_ast_impl(&x.ast); ++ } ++ Ast::Group(ref x) => { ++ self.from_ast_impl(&x.ast); ++ } ++ Ast::Alternation(ref alt) => { ++ for x in &alt.asts { ++ self.from_ast_impl(x); ++ } ++ } ++ Ast::Concat(ref alt) => { ++ for x in &alt.asts { ++ self.from_ast_impl(x); ++ } ++ } ++ } ++ } ++ ++ fn from_ast_class_set(&mut self, ast: &ast::ClassSet) { ++ if self.done() { ++ return; ++ } ++ match *ast { ++ ast::ClassSet::Item(ref item) => { ++ self.from_ast_class_set_item(item); ++ } ++ ast::ClassSet::BinaryOp(ref x) => { ++ self.from_ast_class_set(&x.lhs); ++ self.from_ast_class_set(&x.rhs); ++ } ++ } ++ } ++ ++ fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) { ++ if self.done() { ++ return; ++ } ++ match *ast { ++ ast::ClassSetItem::Empty(_) ++ | ast::ClassSetItem::Ascii(_) ++ | ast::ClassSetItem::Unicode(_) ++ | ast::ClassSetItem::Perl(_) => {} ++ ast::ClassSetItem::Literal(ref x) => { ++ self.from_ast_literal(x); ++ } ++ ast::ClassSetItem::Range(ref x) => { ++ self.from_ast_literal(&x.start); ++ self.from_ast_literal(&x.end); ++ } ++ ast::ClassSetItem::Bracketed(ref x) => { ++ self.from_ast_class_set(&x.kind); ++ } ++ ast::ClassSetItem::Union(ref union) => { ++ for x in &union.items { ++ self.from_ast_class_set_item(x); ++ } ++ } ++ } ++ } ++ ++ fn from_ast_literal(&mut self, ast: &ast::Literal) { ++ self.any_literal = true; ++ self.any_uppercase = self.any_uppercase || ast.c.is_uppercase(); ++ } ++ ++ /// Returns true if and only if the attributes can never change no matter ++ /// what other AST it might see. ++ fn done(&self) -> bool { ++ self.any_uppercase && self.any_literal ++ } ++} ++ ++#[cfg(test)] ++mod tests { ++ use super::*; ++ ++ fn cased(pattern: &str) -> Cased { ++ Cased::from_pattern(pattern).unwrap() ++ } ++ ++ #[test] ++ fn various() { ++ let x = cased(""); ++ assert!(!x.any_uppercase); ++ assert!(!x.any_literal); ++ ++ let x = cased("foo"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased("Foo"); ++ assert!(x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased("foO"); ++ assert!(x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo\\"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo\w"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo\S"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo\p{Ll}"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo[a-z]"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo[A-Z]"); ++ assert!(x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo[\S\t]"); ++ assert!(!x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"foo\\S"); ++ assert!(x.any_uppercase); ++ assert!(x.any_literal); ++ ++ let x = cased(r"\p{Ll}"); ++ assert!(!x.any_uppercase); ++ assert!(!x.any_literal); ++ ++ let x = cased(r"aBc\w"); ++ assert!(x.any_uppercase); ++ assert!(x.any_literal); ++ } ++} +-- +2.14.3 + diff --git a/grep-0.1.8-fix-metadata.diff b/grep-0.1.8-fix-metadata.diff new file mode 100644 index 0000000..729e452 --- /dev/null +++ b/grep-0.1.8-fix-metadata.diff @@ -0,0 +1,12 @@ +--- grep-0.1.8/Cargo.toml 1969-12-31T16:00:00-08:00 ++++ grep-0.1.8/Cargo.toml 2018-03-13T21:48:49.760072-07:00 +@@ -28,7 +28,7 @@ + version = "2" + + [dependencies.regex] +-version = "0.2.1" ++version = "0.2.9" + + [dependencies.regex-syntax] +-version = "0.4.0" ++version = "0.5.3" diff --git a/rust-grep.spec b/rust-grep.spec index 99328bb..4206416 100644 --- a/rust-grep.spec +++ b/rust-grep.spec @@ -6,12 +6,18 @@ Name: rust-%{crate} Version: 0.1.8 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Fast line oriented regex searching as a library License: Unlicense or MIT URL: https://crates.io/crates/grep Source0: https://crates.io/api/v1/crates/%{crate}/%{version}/download#/%{crate}-%{version}.crate +# Initial patched metadata +# * bump regex and regex-syntax +Patch0: grep-0.1.8-fix-metadata.diff +# https://github.com/BurntSushi/ripgrep/pull/858/ +Patch1: 0001-grep-upgrade-to-regex-syntax-0.5.patch +Patch2: 0002-grep-add-perfect-smart-case-detection.patch ExclusiveArch: %{rust_arches} @@ -19,8 +25,8 @@ BuildRequires: rust-packaging # [dependencies] BuildRequires: (crate(log) >= 0.4.0 with crate(log) < 0.5.0) BuildRequires: (crate(memchr) >= 2.0.0 with crate(memchr) < 3.0.0) -BuildRequires: (crate(regex) >= 0.2.1 with crate(regex) < 0.3.0) -BuildRequires: (crate(regex-syntax) >= 0.4.0 with crate(regex-syntax) < 0.5.0) +BuildRequires: (crate(regex) >= 0.2.9 with crate(regex) < 0.3.0) +BuildRequires: (crate(regex-syntax) >= 0.5.3 with crate(regex-syntax) < 0.6.0) %description %{summary}. @@ -56,6 +62,9 @@ which use %{crate} from crates.io. %{cargo_registry}/%{crate}-%{version}/ %changelog +* Wed Mar 14 2018 Josh Stone - 0.1.8-2 +- bump regex and regex-syntax + * Mon Feb 12 2018 Igor Gnatenko - 0.1.8-1 - Update to 0.1.8