Update to 0.1.2

Signed-off-by: Igor Gnatenko <ignatenkobrain@fedoraproject.org>
6 years ago · 00bd1f2f1d
parent bf84d0ad86
commit 00bd1f2f1d
5 changed files with 8 additions and 407 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 /grep-regex-0.1.1.crate
+/grep-regex-0.1.2.crate
--- a/0001-regex-make-CRLF-hack-more-robust.patch
+++ b/0001-regex-make-CRLF-hack-more-robust.patch
@ -1,387 +0,0 @@
-From 9d703110cfe01782d2d0b03a340f5983da215e68 Mon Sep 17 00:00:00 2001
-From: Andrew Gallant <jamslam@gmail.com>
-Date: Sat, 26 Jan 2019 12:25:21 -0500
-Subject: [PATCH] regex: make CRLF hack more robust
-
-This commit improves the CRLF hack to be more robust. In particular, in
-addition to rewriting `$` as `(?:\r??$)`, we now strip `\r` from the end
-of a match if and only if the regex has an ending line anchor required for
-a match. This doesn't quite make the hack 100% correct, but should fix most
-use cases in practice. An example of a regex that will still be incorrect
-is `foo|bar$`, since the analysis isn't quite sophisticated enough to
-determine that a `\r` can be safely stripped from any match. Even if we
-fix that, regexes like `foo\r|bar$` still won't be handled correctly. Alas,
-more work on this front should really be focused on enabling this in the
-regex engine itself.
-
-The specific cause of this bug was that grep-searcher was sneakily
-stripping CRLF from matching lines when it really shouldn't have. We remove
-that code now, and instead rely on better match semantics provided at a
-lower level.
-
-Fixes #1095
---
- src/config.rs  |   8 ++++
- src/crlf.rs    | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
- src/matcher.rs |  57 ++++++++++++++++++++++++++--
- 3 files changed, 162 insertions(+), 3 deletions(-)
-
-diff --git a/src/config.rs b/src/config.rs
-index f3d1f1c..efed9d4 100644
--- a/src/config.rs
-+++ b/src/config.rs
-@@ -160,6 +160,14 @@ impl ConfiguredHIR {
-         non_matching_bytes(&self.expr)
-     }
- 
-+    /// Returns true if and only if this regex needs to have its match offsets
-+    /// tweaked because of CRLF support. Specifically, this occurs when the
-+    /// CRLF hack is enabled and the regex is line anchored at the end. In
-+    /// this case, matches that end with a `\r` have the `\r` stripped.
-+    pub fn needs_crlf_stripped(&self) -> bool {
-+        self.config.crlf && self.expr.is_line_anchored_end()
-+    }
-+
-     /// Builds a regular expression from this HIR expression.
-     pub fn regex(&self) -> Result<Regex, Error> {
-         self.pattern_to_regex(&self.expr.to_string())
-diff --git a/src/crlf.rs b/src/crlf.rs
-index ff6b15b..838a28f 100644
--- a/src/crlf.rs
-+++ b/src/crlf.rs
-@@ -1,5 +1,105 @@
-+use std::collections::HashMap;
-+
-+use grep_matcher::{Match, Matcher, NoError};
-+use regex::bytes::Regex;
- use regex_syntax::hir::{self, Hir, HirKind};
- 
-+use config::ConfiguredHIR;
-+use error::Error;
-+use matcher::RegexCaptures;
-+
-+/// A matcher for implementing "word match" semantics.
-+#[derive(Clone, Debug)]
-+pub struct CRLFMatcher {
-+    /// The regex.
-+    regex: Regex,
-+    /// A map from capture group name to capture group index.
-+    names: HashMap<String, usize>,
-+}
-+
-+impl CRLFMatcher {
-+    /// Create a new matcher from the given pattern that strips `\r` from the
-+    /// end of every match.
-+    ///
-+    /// This panics if the given expression doesn't need its CRLF stripped.
-+    pub fn new(expr: &ConfiguredHIR) -> Result<CRLFMatcher, Error> {
-+        assert!(expr.needs_crlf_stripped());
-+
-+        let regex = expr.regex()?;
-+        let mut names = HashMap::new();
-+        for (i, optional_name) in regex.capture_names().enumerate() {
-+            if let Some(name) = optional_name {
-+                names.insert(name.to_string(), i.checked_sub(1).unwrap());
-+            }
-+        }
-+        Ok(CRLFMatcher { regex, names })
-+    }
-+}
-+
-+impl Matcher for CRLFMatcher {
-+    type Captures = RegexCaptures;
-+    type Error = NoError;
-+
-+    fn find_at(
-+        &self,
-+        haystack: &[u8],
-+        at: usize,
-+    ) -> Result<Option<Match>, NoError> {
-+        let m = match self.regex.find_at(haystack, at) {
-+            None => return Ok(None),
-+            Some(m) => Match::new(m.start(), m.end()),
-+        };
-+        Ok(Some(adjust_match(haystack, m)))
-+    }
-+
-+    fn new_captures(&self) -> Result<RegexCaptures, NoError> {
-+        Ok(RegexCaptures::new(self.regex.capture_locations()))
-+    }
-+
-+    fn capture_count(&self) -> usize {
-+        self.regex.captures_len().checked_sub(1).unwrap()
-+    }
-+
-+    fn capture_index(&self, name: &str) -> Option<usize> {
-+        self.names.get(name).map(|i| *i)
-+    }
-+
-+    fn captures_at(
-+        &self,
-+        haystack: &[u8],
-+        at: usize,
-+        caps: &mut RegexCaptures,
-+    ) -> Result<bool, NoError> {
-+        caps.strip_crlf(false);
-+        let r = self.regex.captures_read_at(caps.locations(), haystack, at);
-+        if !r.is_some() {
-+            return Ok(false);
-+        }
-+
-+        // If the end of our match includes a `\r`, then strip it from all
-+        // capture groups ending at the same location.
-+        let end = caps.locations().get(0).unwrap().1;
-+        if end > 0 && haystack.get(end - 1) == Some(&b'\r') {
-+            caps.strip_crlf(true);
-+        }
-+        Ok(true)
-+    }
-+
-+    // We specifically do not implement other methods like find_iter or
-+    // captures_iter. Namely, the iter methods are guaranteed to be correct
-+    // by virtue of implementing find_at and captures_at above.
-+}
-+
-+/// If the given match ends with a `\r`, then return a new match that ends
-+/// immediately before the `\r`.
-+pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
-+    if m.end() > 0 && haystack.get(m.end() - 1) == Some(&b'\r') {
-+        m.with_end(m.end() - 1)
-+    } else {
-+        m
-+    }
-+}
-+
- /// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`.
- ///
- /// This does not preserve the exact semantics of the given expression,
-diff --git a/src/matcher.rs b/src/matcher.rs
-index 831d573..b15d824 100644
--- a/src/matcher.rs
-+++ b/src/matcher.rs
-@@ -6,6 +6,7 @@ use grep_matcher::{
- use regex::bytes::{CaptureLocations, Regex};
- 
- use config::{Config, ConfiguredHIR};
-+use crlf::CRLFMatcher;
- use error::Error;
- use word::WordMatcher;
- 
-@@ -344,6 +345,11 @@ impl RegexMatcher {
- enum RegexMatcherImpl {
-     /// The standard matcher used for all regular expressions.
-     Standard(StandardMatcher),
-+    /// A matcher that strips `\r` from the end of matches.
-+    ///
-+    /// This is only used when the CRLF hack is enabled and the regex is line
-+    /// anchored at the end.
-+    CRLF(CRLFMatcher),
-     /// A matcher that only matches at word boundaries. This transforms the
-     /// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`.
-     /// Because of this, the WordMatcher provides its own implementation of
-@@ -358,6 +364,8 @@ impl RegexMatcherImpl {
-     fn new(expr: &ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
-         if expr.config().word {
-             Ok(RegexMatcherImpl::Word(WordMatcher::new(expr)?))
-+        } else if expr.needs_crlf_stripped() {
-+            Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
-         } else {
-             Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
-         }
-@@ -379,6 +387,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.find_at(haystack, at),
-+            CRLF(ref m) => m.find_at(haystack, at),
-             Word(ref m) => m.find_at(haystack, at),
-         }
-     }
-@@ -387,6 +396,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.new_captures(),
-+            CRLF(ref m) => m.new_captures(),
-             Word(ref m) => m.new_captures(),
-         }
-     }
-@@ -395,6 +405,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.capture_count(),
-+            CRLF(ref m) => m.capture_count(),
-             Word(ref m) => m.capture_count(),
-         }
-     }
-@@ -403,6 +414,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.capture_index(name),
-+            CRLF(ref m) => m.capture_index(name),
-             Word(ref m) => m.capture_index(name),
-         }
-     }
-@@ -411,6 +423,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.find(haystack),
-+            CRLF(ref m) => m.find(haystack),
-             Word(ref m) => m.find(haystack),
-         }
-     }
-@@ -425,6 +438,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.find_iter(haystack, matched),
-+            CRLF(ref m) => m.find_iter(haystack, matched),
-             Word(ref m) => m.find_iter(haystack, matched),
-         }
-     }
-@@ -439,6 +453,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.try_find_iter(haystack, matched),
-+            CRLF(ref m) => m.try_find_iter(haystack, matched),
-             Word(ref m) => m.try_find_iter(haystack, matched),
-         }
-     }
-@@ -451,6 +466,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.captures(haystack, caps),
-+            CRLF(ref m) => m.captures(haystack, caps),
-             Word(ref m) => m.captures(haystack, caps),
-         }
-     }
-@@ -466,6 +482,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.captures_iter(haystack, caps, matched),
-+            CRLF(ref m) => m.captures_iter(haystack, caps, matched),
-             Word(ref m) => m.captures_iter(haystack, caps, matched),
-         }
-     }
-@@ -481,6 +498,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
-+            CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
-             Word(ref m) => m.try_captures_iter(haystack, caps, matched),
-         }
-     }
-@@ -494,6 +512,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.captures_at(haystack, at, caps),
-+            CRLF(ref m) => m.captures_at(haystack, at, caps),
-             Word(ref m) => m.captures_at(haystack, at, caps),
-         }
-     }
-@@ -509,6 +528,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.replace(haystack, dst, append),
-+            CRLF(ref m) => m.replace(haystack, dst, append),
-             Word(ref m) => m.replace(haystack, dst, append),
-         }
-     }
-@@ -527,6 +547,9 @@ impl Matcher for RegexMatcher {
-             Standard(ref m) => {
-                 m.replace_with_captures(haystack, caps, dst, append)
-             }
-+            CRLF(ref m) => {
-+                m.replace_with_captures(haystack, caps, dst, append)
-+            }
-             Word(ref m) => {
-                 m.replace_with_captures(haystack, caps, dst, append)
-             }
-@@ -537,6 +560,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.is_match(haystack),
-+            CRLF(ref m) => m.is_match(haystack),
-             Word(ref m) => m.is_match(haystack),
-         }
-     }
-@@ -549,6 +573,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.is_match_at(haystack, at),
-+            CRLF(ref m) => m.is_match_at(haystack, at),
-             Word(ref m) => m.is_match_at(haystack, at),
-         }
-     }
-@@ -560,6 +585,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.shortest_match(haystack),
-+            CRLF(ref m) => m.shortest_match(haystack),
-             Word(ref m) => m.shortest_match(haystack),
-         }
-     }
-@@ -572,6 +598,7 @@ impl Matcher for RegexMatcher {
-         use self::RegexMatcherImpl::*;
-         match self.matcher {
-             Standard(ref m) => m.shortest_match_at(haystack, at),
-+            CRLF(ref m) => m.shortest_match_at(haystack, at),
-             Word(ref m) => m.shortest_match_at(haystack, at),
-         }
-     }
-@@ -712,6 +739,9 @@ pub struct RegexCaptures {
-     /// and the capturing groups must behave as if `(re)` is the `0`th capture
-     /// group.
-     offset: usize,
-+    /// When enable, the end of a match has `\r` stripped from it, if one
-+    /// exists.
-+    strip_crlf: bool,
- }
- 
- impl Captures for RegexCaptures {
-@@ -720,8 +750,25 @@ impl Captures for RegexCaptures {
-     }
- 
-     fn get(&self, i: usize) -> Option<Match> {
-        let actual = i.checked_add(self.offset).unwrap();
-        self.locs.pos(actual).map(|(s, e)| Match::new(s, e))
-+        if !self.strip_crlf {
-+            let actual = i.checked_add(self.offset).unwrap();
-+            return self.locs.pos(actual).map(|(s, e)| Match::new(s, e));
-+        }
-+
-+        // currently don't support capture offsetting with CRLF stripping
-+        assert_eq!(self.offset, 0);
-+        let m = match self.locs.pos(i).map(|(s, e)| Match::new(s, e)) {
-+            None => return None,
-+            Some(m) => m,
-+        };
-+        // If the end position of this match corresponds to the end position
-+        // of the overall match, then we apply our CRLF stripping. Otherwise,
-+        // we cannot assume stripping is correct.
-+        if i == 0 || m.end() == self.locs.pos(0).unwrap().1 {
-+            Some(m.with_end(m.end() - 1))
-+        } else {
-+            Some(m)
-+        }
-     }
- }
- 
-@@ -734,12 +781,16 @@ impl RegexCaptures {
-         locs: CaptureLocations,
-         offset: usize,
-     ) -> RegexCaptures {
-        RegexCaptures { locs, offset }
-+        RegexCaptures { locs, offset, strip_crlf: false }
-     }
- 
-     pub(crate) fn locations(&mut self) -> &mut CaptureLocations {
-         &mut self.locs
-     }
-+
-+    pub(crate) fn strip_crlf(&mut self, yes: bool) {
-+        self.strip_crlf = yes;
-+    }
- }
- 
- #[cfg(test)]
-- 
-2.20.1
-
--- a/grep-regex-fix-metadata.diff
+++ b/grep-regex-fix-metadata.diff
@ -1,11 +0,0 @@
--- grep-regex-0.1.1/Cargo.toml	1970-01-01T01:00:00+01:00
-+++ grep-regex-0.1.1/Cargo.toml	2019-02-14T09:25:19.720854+01:00
-@@ -31,7 +31,7 @@
- version = "1.0.5"
- 
- [dependencies.regex-syntax]
-version = "0.6.2"
-+version = "0.6.5"
- 
- [dependencies.thread_local]
- version = "0.3.6"
--- a/rust-grep-regex.spec
+++ b/rust-grep-regex.spec
@ -6,19 +6,14 @@
 %global crate grep-regex

 Name:           rust-%{crate}
-Version:        0.1.1
-Release:        5%{?dist}
+Version:        0.1.2
+Release:        1%{?dist}
 Summary:        Use Rust's regex library with the 'grep' crate

 # Upstream license specification: Unlicense/MIT
 License:        Unlicense or MIT
 URL:            https://crates.io/crates/grep-regex
 Source:         %{crates_source}
-# Initial patched metadata
-# * Bump regex-syntax to 0.6.5 (for patch0001)
-Patch0:         grep-regex-fix-metadata.diff
-# https://github.com/BurntSushi/ripgrep/commit/9d703110cfe01782d2d0b03a340f5983da215e68
-Patch0001:      0001-regex-make-CRLF-hack-more-robust.patch

 ExclusiveArch:  %{rust_arches}

@ -26,7 +21,7 @@ BuildRequires:  rust-packaging
 BuildRequires:  (crate(grep-matcher/default) >= 0.1.1 with crate(grep-matcher/default) < 0.2.0)
 BuildRequires:  (crate(log/default) >= 0.4.5 with crate(log/default) < 0.5.0)
 BuildRequires:  (crate(regex-syntax/default) >= 0.6.5 with crate(regex-syntax/default) < 0.7.0)
-BuildRequires:  (crate(regex/default) >= 1.0.5 with crate(regex/default) < 2.0.0)
+BuildRequires:  (crate(regex/default) >= 1.1.0 with crate(regex/default) < 2.0.0)
 BuildRequires:  (crate(thread_local/default) >= 0.3.6 with crate(thread_local/default) < 0.4.0)
 BuildRequires:  (crate(utf8-ranges/default) >= 1.0.1 with crate(utf8-ranges/default) < 2.0.0)

@ -77,6 +72,9 @@ which use "default" feature of "%{crate}" crate.
 %endif

 %changelog
+* Sat Feb 16 2019 Igor Gnatenko <ignatenkobrain@fedoraproject.org> - 0.1.2-1
+- Update to 0.1.2
+
 * Thu Feb 14 2019 Igor Gnatenko <ignatenkobrain@fedoraproject.org> - 0.1.1-5
 - Backport fix for CRLF handling

--- a/2
+++ b/2
@ -1 +1 @@
-SHA512 (grep-regex-0.1.1.crate) = 281585dca0d8e42778bda7a23780ddef38d31145c36e3ea831b1edb98be66c87bba553303fb973af1211aef21d1deb11d2f9466947fb0622b2d1bf180af01a1f
+SHA512 (grep-regex-0.1.2.crate) = 4ccaeda895b554969398dd1875e486b1994c7ef539277e5f4f30549b8d65a18228a48a5375ade1638ad7c26fd79d57ef0273e6d1b14dc285d9690bd63572872d