import coreutils-8.32-34.el9

i9c-beta changed/i9c-beta/coreutils-8.32-34.el9
MSVSphere Packaging Team 2 years ago
commit 93300dd2bf

@ -0,0 +1 @@
b2b12195e276c64c8e850cf40ea2cff9b3aa53f6 SOURCES/coreutils-8.32.tar.xz

1
.gitignore vendored

@ -0,0 +1 @@
SOURCES/coreutils-8.32.tar.xz

@ -0,0 +1,20 @@
diff --git a/src/date.c b/src/date.c
index ddb011e..619a72b 100644
--- a/src/date.c
+++ b/src/date.c
@@ -490,14 +490,7 @@ main (int argc, char **argv)
format = DATE_FMT_LANGINFO ();
if (! *format)
{
- /* Do not wrap the following literal format string with _(...).
- For example, suppose LC_ALL is unset, LC_TIME=POSIX,
- and LANG="ko_KR". In that case, POSIX says that LC_TIME
- determines the format and contents of date and time strings
- written by date, which means "date" must generate output
- using the POSIX locale; but adding _() would cause "date"
- to use a Korean translation of the format. */
- format = "%a %b %e %H:%M:%S %Z %Y";
+ format = dcgettext(NULL, N_("%a %b %e %H:%M:%S %Z %Y"), LC_TIME);
}
}

@ -0,0 +1,49 @@
diff --git a/src/uname.c b/src/uname.c
index 6371ca2..1ad8fd7 100644
--- a/src/uname.c
+++ b/src/uname.c
@@ -300,13 +300,19 @@ main (int argc, char **argv)
if (toprint & PRINT_PROCESSOR)
{
- char const *element = unknown;
+ char *element = unknown;
#if HAVE_SYSINFO && defined SI_ARCHITECTURE
{
static char processor[257];
if (0 <= sysinfo (SI_ARCHITECTURE, processor, sizeof processor))
element = processor;
}
+#else
+ {
+ static struct utsname u;
+ uname(&u);
+ element = u.machine;
+ }
#endif
#ifdef UNAME_PROCESSOR
if (element == unknown)
@@ -344,7 +350,7 @@ main (int argc, char **argv)
if (toprint & PRINT_HARDWARE_PLATFORM)
{
- char const *element = unknown;
+ char *element = unknown;
#if HAVE_SYSINFO && defined SI_PLATFORM
{
static char hardware_platform[257];
@@ -352,6 +358,14 @@ main (int argc, char **argv)
hardware_platform, sizeof hardware_platform))
element = hardware_platform;
}
+#else
+ {
+ static struct utsname u;
+ uname(&u);
+ element = u.machine;
+ if(strlen(element)==4 && element[0]=='i' && element[2]=='8' && element[3]=='6')
+ element[1]='3';
+ }
#endif
#ifdef UNAME_HARDWARE_PLATFORM
if (element == unknown)

@ -0,0 +1,26 @@
From 6880c3dc9098b3337612850d1500b474aeb944ca Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Tue, 29 Aug 2017 17:33:51 +0200
Subject: [PATCH] require_selinux_(): use selinuxenabled(8) if available
---
init.cfg | 3 +++
1 file changed, 3 insertions(+)
diff --git a/init.cfg b/init.cfg
index af6b581..f887b3a 100644
--- a/init.cfg
+++ b/init.cfg
@@ -114,6 +114,9 @@ require_selinux_()
grep 'selinuxfs$' /proc/filesystems > /dev/null \
|| skip_ "this system lacks SELinux support"
+ # use the 'selinuxenabled' utility if available
+ selinuxenabled; [ $? = 1 ] && skip_ "SELinux is disabled"
+
# Independent of whether SELinux is enabled system-wide,
# the current file system may lack SELinux support.
# Also the current build may have SELinux support disabled.
--
2.9.5

@ -0,0 +1,29 @@
From 0d04ee8ddedb2bf33d64f148f246a3b7ec4fef21 Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Mon, 23 Jan 2017 12:35:41 +0100
Subject: [PATCH] test-lock: disable the rwlock test
It hangs indefinitely if the system rwlock implementation does not
prevent writer starvation (and glibc does not implement it).
Bug: http://www.mail-archive.com/bug-gnulib@gnu.org/msg33017.html
---
gnulib-tests/test-lock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gnulib-tests/test-lock.c b/gnulib-tests/test-lock.c
index aa6de27..5af0a6c 100644
--- a/gnulib-tests/test-lock.c
+++ b/gnulib-tests/test-lock.c
@@ -42,7 +42,7 @@
Uncomment some of these, to verify that all tests crash if no locking
is enabled. */
#define DO_TEST_LOCK 1
-#define DO_TEST_RWLOCK 1
+#define DO_TEST_RWLOCK 0
#define DO_TEST_RECURSIVE_LOCK 1
#define DO_TEST_ONCE 1
--
2.7.4

@ -0,0 +1,100 @@
From 81e25c8521937ecf7f444bab11fddaaf81cc3efd Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Fri, 17 Jun 2016 16:58:18 +0200
Subject: [PATCH] downstream changes to default DIR_COLORS
---
DIR_COLORS | 9 ++++++++-
DIR_COLORS.lightbgcolor | 21 +++++++++++++++------
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/DIR_COLORS b/DIR_COLORS
index bd5df23..84f2417 100644
--- a/DIR_COLORS
+++ b/DIR_COLORS
@@ -1,3 +1,7 @@
+# This file goes in the /etc directory, and must be world readable.
+# You can copy this file to .dir_colors in your $HOME directory to override
+# the system defaults.
+
# Configuration file for dircolors, a utility to help you set the
# LS_COLORS environment variable used by GNU ls with the --color option.
@@ -8,6 +12,9 @@
# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the
# slackware version of dircolors) are recognized but ignored.
+# For compatibility, the pattern "^COLOR.*none" is recognized as a way to
+# disable colorization. See https://bugzilla.redhat.com/1349579 for details.
+
# Below are TERM entries, which can be a glob patterns, to match
# against the TERM environment variable to determine if it is colorizable.
TERM Eterm
@@ -58,7 +65,7 @@ DOOR 01;35 # door
BLK 40;33;01 # block device driver
CHR 40;33;01 # character device driver
ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ...
-MISSING 00 # ... and the files they point to
+MISSING 01;37;41 # ... and the files they point to
SETUID 37;41 # file that is setuid (u+s)
SETGID 30;43 # file that is setgid (g+s)
CAPABILITY 30;41 # file with capability
diff --git a/DIR_COLORS.lightbgcolor b/DIR_COLORS.lightbgcolor
index 4316832..6402854 100644
--- a/DIR_COLORS.lightbgcolor
+++ b/DIR_COLORS.lightbgcolor
@@ -1,3 +1,9 @@
+# Configuration file for the color ls utility - modified for lighter backgrounds
+
+# This file goes in the /etc directory, and must be world readable.
+# You can copy this file to .dir_colors in your $HOME directory to override
+# the system defaults.
+
# Configuration file for dircolors, a utility to help you set the
# LS_COLORS environment variable used by GNU ls with the --color option.
@@ -8,6 +14,9 @@
# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the
# slackware version of dircolors) are recognized but ignored.
+# For compatibility, the pattern "^COLOR.*none" is recognized as a way to
+# disable colorization. See https://bugzilla.redhat.com/1349579 for details.
+
# Below are TERM entries, which can be a glob patterns, to match
# against the TERM environment variable to determine if it is colorizable.
TERM Eterm
@@ -48,17 +57,17 @@ TERM xterm*
#NORMAL 00 # no color code at all
#FILE 00 # regular file: use no color at all
RESET 0 # reset to "normal" color
-DIR 01;34 # directory
-LINK 01;36 # symbolic link. (If you set this to 'target' instead of a
+DIR 00;34 # directory
+LINK 00;36 # symbolic link. (If you set this to 'target' instead of a
# numerical value, the color is as for the file pointed to.)
MULTIHARDLINK 00 # regular file with more than one link
FIFO 40;33 # pipe
-SOCK 01;35 # socket
-DOOR 01;35 # door
+SOCK 00;35 # socket
+DOOR 00;35 # door
BLK 40;33;01 # block device driver
CHR 40;33;01 # character device driver
ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ...
-MISSING 00 # ... and the files they point to
+MISSING 01;37;41 # ... and the files they point to
SETUID 37;41 # file that is setuid (u+s)
SETGID 30;43 # file that is setgid (g+s)
CAPABILITY 30;41 # file with capability
@@ -67,7 +76,7 @@ OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky
STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable
# This is for files with execute permission:
-EXEC 01;32
+EXEC 00;32
# List any file extensions like '.gz' or '.tar' that you would like ls
# to colorize below. Put the extension, a space, and the color init string.
--
2.21.1

@ -0,0 +1,52 @@
From 51b9a8ba0974d262e0b0f81a2078b3c7907b25ed Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 7 Apr 2021 17:29:59 -0700
Subject: [PATCH] utimens: fix confusing arg type in internal func
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Although the old code was technically correct, this was accidental
and it understandably confused Coverity. Reported by Ondrej Dubaj in:
https://lists.gnu.org/r/bug-tar/2021-04/msg00000.html
* lib/utimens.c (update_timespec): Change arg type from struct
timespec *[2] (pointer to array of 2 pointers to timespecs) to
struct timespec ** (pointer to pointer to the first timespec in
an array of 2 timespecs). Although the old code happened to be
technically correct, it was misleading and confused Coverity.
And though the type struct timespec (**)[2] (pointer to pointer
to array of 2 timespecs) would perhaps be more technically
correct, it would be almost as confusing and would require changes
elsewhere in this file; lets quit while were ahead.
Upstream-commit: a3a946f670718d0dee5a7425ad5ac0a29fb46ea1
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
lib/utimens.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/utimens.c b/lib/utimens.c
index 3f53942..ea8c672 100644
--- a/lib/utimens.c
+++ b/lib/utimens.c
@@ -123,14 +123,14 @@ validate_timespec (struct timespec timespec[2])
return result + (utime_omit_count == 1);
}
-/* Normalize any UTIME_NOW or UTIME_OMIT values in *TS, using stat
- buffer STATBUF to obtain the current timestamps of the file. If
+/* Normalize any UTIME_NOW or UTIME_OMIT values in (*TS)[0] and (*TS)[1],
+ using STATBUF to obtain the current timestamps of the file. If
both times are UTIME_NOW, set *TS to NULL (as this can avoid some
permissions issues). If both times are UTIME_OMIT, return true
(nothing further beyond the prior collection of STATBUF is
necessary); otherwise return false. */
static bool
-update_timespec (struct stat const *statbuf, struct timespec *ts[2])
+update_timespec (struct stat const *statbuf, struct timespec **ts)
{
struct timespec *timespec = *ts;
if (timespec[0].tv_nsec == UTIME_OMIT
--
2.26.3

File diff suppressed because it is too large Load Diff

@ -0,0 +1,119 @@
From 76126e2831580d0df20530f4d6f72189bd4f0b9a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 18 Jun 2020 22:16:24 -0700
Subject: [PATCH] cp: default to COW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Likewise for install. Proposed in Bug#24400, and long past due.
* NEWS:
* doc/coreutils.texi (cp invocation):
* src/copy.h (enum Reflink_type): Document this.
* src/cp.c (cp_option_init):
* src/install.c (cp_option_init): Implement this.
Upstream-commit: 25725f9d41735d176d73a757430739fb71c7d043
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
doc/coreutils.texi | 19 ++++++++++++-------
src/copy.h | 4 ++--
src/cp.c | 2 +-
src/install.c | 2 +-
4 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 02e0c1c..2382a16 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -8854,12 +8854,14 @@ The @var{when} value can be one of the following:
@table @samp
@item always
-The default behavior: if the copy-on-write operation is not supported
+If the copy-on-write operation is not supported
then report the failure for each file and exit with a failure status.
+Plain @option{--reflink} is equivalent to @option{--reflink=when}.
@item auto
If the copy-on-write operation is not supported then fall back
to the standard copy behavior.
+This is the default if no @option{--reflink} option is given.
@item never
Disable copy-on-write operation and use the standard copy behavior.
@@ -8868,12 +8870,6 @@ Disable copy-on-write operation and use the standard copy behavior.
This option is overridden by the @option{--link}, @option{--symbolic-link}
and @option{--attributes-only} options, thus allowing it to be used
to configure the default data copying behavior for @command{cp}.
-For example, with the following alias, @command{cp} will use the
-minimum amount of space supported by the file system.
-
-@example
-alias cp='cp --reflink=auto --sparse=always'
-@end example
@item --remove-destination
@opindex --remove-destination
@@ -8918,6 +8914,15 @@ This is useful in creating a file for use with the @command{mkswap} command,
since such a file must not have any holes.
@end table
+For example, with the following alias, @command{cp} will use the
+minimum amount of space supported by the file system.
+(Older versions of @command{cp} can also benefit from
+@option{--reflink=auto} here.)
+
+@example
+alias cp='cp --sparse=always'
+@end example
+
@optStripTrailingSlashes
@item -s
diff --git a/src/copy.h b/src/copy.h
index 874d6f7..a0ad494 100644
--- a/src/copy.h
+++ b/src/copy.h
@@ -46,10 +46,10 @@ enum Sparse_type
/* Control creation of COW files. */
enum Reflink_type
{
- /* Default to a standard copy. */
+ /* Do a standard copy. */
REFLINK_NEVER,
- /* Try a COW copy and fall back to a standard copy. */
+ /* Try a COW copy and fall back to a standard copy; this is the default. */
REFLINK_AUTO,
/* Require a COW copy and fail if not available. */
diff --git a/src/cp.c b/src/cp.c
index 0193df8..9e7ad14 100644
--- a/src/cp.c
+++ b/src/cp.c
@@ -796,7 +796,7 @@ cp_option_init (struct cp_options *x)
x->move_mode = false;
x->install_mode = false;
x->one_file_system = false;
- x->reflink_mode = REFLINK_NEVER;
+ x->reflink_mode = REFLINK_AUTO;
x->preserve_ownership = false;
x->preserve_links = false;
diff --git a/src/install.c b/src/install.c
index 4ab44a6..aef16ca 100644
--- a/src/install.c
+++ b/src/install.c
@@ -264,7 +264,7 @@ cp_option_init (struct cp_options *x)
{
cp_options_default (x);
x->copy_as_regular = true;
- x->reflink_mode = REFLINK_NEVER;
+ x->reflink_mode = REFLINK_AUTO;
x->dereference = DEREF_ALWAYS;
x->unlink_dest_before_opening = true;
x->unlink_dest_after_failed_open = false;
--
2.25.4

@ -0,0 +1,72 @@
From 0f053de4bc3ca0cfd88a42d236881dfdddb10ee9 Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Wed, 30 Jun 2021 17:53:22 +0200
Subject: [PATCH] df: fix duplicated remote entries due to bind mounts
As originally reported in <https://bugzilla.redhat.com/1962515>,
df invoked without -a printed duplicated entries for NFS mounts
of bind mounts. This is a regression from commit v8.25-54-g1c17f61ef99,
which introduced the use of a hash table.
The proposed patch makes sure that the devlist entry seen the last time
is used for comparison when eliminating duplicated mount entries. This
way it worked before introducing the hash table.
Patch co-authored by Roberto Bergantinos.
* src/ls.c (struct devlist): Introduce the seen_last pointer.
(devlist_for_dev): Return the devlist entry seen the last time if found.
(filter_mount_list): Remember the devlist entry seen the last time for
each hashed item.
Fixes https://bugs.gnu.org/49298
Upstream-commit: d6125af095c9553f38cba0696f15158f5abe4ecc
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/df.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/src/df.c b/src/df.c
index 7e01839..3e9247f 100644
--- a/src/df.c
+++ b/src/df.c
@@ -54,6 +54,7 @@ struct devlist
dev_t dev_num;
struct mount_entry *me;
struct devlist *next;
+ struct devlist *seen_last; /* valid for hashed devlist entries only */
};
/* Filled with device numbers of examined file systems to avoid
@@ -689,7 +690,13 @@ devlist_for_dev (dev_t dev)
return NULL;
struct devlist dev_entry;
dev_entry.dev_num = dev;
- return hash_lookup (devlist_table, &dev_entry);
+
+ struct devlist *found = hash_lookup (devlist_table, &dev_entry);
+ if (found == NULL)
+ return NULL;
+
+ /* Return the last devlist entry we have seen with this dev_num */
+ return found->seen_last;
}
static void
@@ -807,8 +814,12 @@ filter_mount_list (bool devices_only)
devlist->dev_num = buf.st_dev;
devlist->next = device_list;
device_list = devlist;
- if (hash_insert (devlist_table, devlist) == NULL)
+
+ struct devlist *hash_entry = hash_insert (devlist_table, devlist);
+ if (hash_entry == NULL)
xalloc_die ();
+ /* Ensure lookups use this latest devlist. */
+ hash_entry->seen_last = devlist;
me = me->me_next;
}
--
2.31.1

@ -0,0 +1,81 @@
From 9618fb718b75920f37e5be2049ad1d0bb5c4a28c Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 26 Jan 2021 09:23:54 -0800
Subject: [PATCH] expr: fix bug with unmatched \(...\)
Problem reported by Qiuhao Li.
* doc/coreutils.texi (String expressions):
Document the correct behavior, which POSIX requires.
* src/expr.c (docolon): Treat unmatched \(...\) as empty.
* tests/misc/expr.pl: New test.
Upstream-commit: 735083ba24878075235007b4417982ad5700436d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
doc/coreutils.texi | 14 ++++++++------
src/expr.c | 9 +++++++--
tests/misc/expr.pl | 3 +++
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 2382a16..5b2bb2c 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -13529,12 +13529,14 @@ second is considered to be a (basic, a la GNU @code{grep}) regular
expression, with a @code{^} implicitly prepended. The first argument is
then matched against this regular expression.
-If the match succeeds and @var{regex} uses @samp{\(} and @samp{\)}, the
-@code{:} expression returns the part of @var{string} that matched the
-subexpression; otherwise, it returns the number of characters matched.
-
-If the match fails, the @code{:} operator returns the null string if
-@samp{\(} and @samp{\)} are used in @var{regex}, otherwise 0.
+If @var{regex} does not use @samp{\(} and @samp{\)}, the @code{:}
+expression returns the number of characters matched, or 0 if the match
+fails.
+
+If @var{regex} uses @samp{\(} and @samp{\)}, the @code{:} expression
+returns the part of @var{string} that matched the subexpression, or
+the null string if the match failed or the subexpression did not
+contribute to the match.
@kindex \( @r{regexp operator}
Only the first @samp{\( @dots{} \)} pair is relevant to the return
diff --git a/src/expr.c b/src/expr.c
index e134872..0616a42 100644
--- a/src/expr.c
+++ b/src/expr.c
@@ -721,8 +721,13 @@ docolon (VALUE *sv, VALUE *pv)
/* Were \(...\) used? */
if (re_buffer.re_nsub > 0)
{
- sv->u.s[re_regs.end[1]] = '\0';
- v = str_value (sv->u.s + re_regs.start[1]);
+ if (re_regs.end[1] < 0)
+ v = str_value ("");
+ else
+ {
+ sv->u.s[re_regs.end[1]] = '\0';
+ v = str_value (sv->u.s + re_regs.start[1]);
+ }
}
else
{
diff --git a/tests/misc/expr.pl b/tests/misc/expr.pl
index e45f8e7..e57f79d 100755
--- a/tests/misc/expr.pl
+++ b/tests/misc/expr.pl
@@ -84,6 +84,9 @@ my @Tests =
# In 5.94 and earlier, anchors incorrectly matched newlines.
['anchor', "'a\nb' : 'a\$'", {OUT => '0'}, {EXIT => 1}],
+ # In 8.32, \( ... \) that did not match caused memory errors.
+ ['emptysub', '"a" : "\\(b\\)*"', {OUT => ''}, {EXIT => 1}],
+
# These tests are taken from grep/tests/bre.tests.
['bre1', '"abc" : "a\\(b\\)c"', {OUT => 'b'}],
['bre2', '"a(" : "a("', {OUT => '2'}],
--
2.26.2

@ -0,0 +1,38 @@
From 602fb566468d3837b7871c17a0fab1a20228d119 Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Mon, 7 Jun 2021 14:43:03 +0200
Subject: [PATCH] mountlist: recognize fuse.portal as dummy file system
This was originally proposed at:
https://lists.gnu.org/archive/html/bug-gnulib/2021-02/msg00053.html
As the full review might take some time, would it be possible to apply
at least the part related to fuse.portal file systems? They started to
cause problems recently:
https://bugs.launchpad.net/ubuntu/+source/xdg-desktop-portal/+bug/1905623
https://github.com/muesli/duf/issues/35
https://bugzilla.redhat.com/1913358
Upstream-commit: 9a38d499ca16f2f4304992eb1ab0894cd0b478e1
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
lib/mountlist.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/mountlist.c b/lib/mountlist.c
index e0227b7..e5f6b07 100644
--- a/lib/mountlist.c
+++ b/lib/mountlist.c
@@ -170,6 +170,7 @@
|| strcmp (Fs_type, "debugfs") == 0 \
|| strcmp (Fs_type, "devpts") == 0 \
|| strcmp (Fs_type, "fusectl") == 0 \
+ || strcmp (Fs_type, "fuse.portal") == 0 \
|| strcmp (Fs_type, "mqueue") == 0 \
|| strcmp (Fs_type, "rpc_pipefs") == 0 \
|| strcmp (Fs_type, "sysfs") == 0 \
--
2.31.1

@ -0,0 +1,48 @@
From f61085aaa37f169365c56e44f5129d0491913b6a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 27 Aug 2020 17:52:58 -0700
Subject: [PATCH] perror, strerror_r: remove unportable tests
Problem reported by Florian Weimer in:
https://lists.gnu.org/r/bug-gnulib/2020-08/msg00220.html
* tests/test-perror2.c (main):
* tests/test-strerror_r.c (main): Omit unportable tests.
Upstream-commit: 175e0bc72808d564074c4adcc72aeadb74adfcc6
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
gnulib-tests/test-perror2.c | 3 ---
gnulib-tests/test-strerror_r.c | 3 ---
2 files changed, 6 deletions(-)
diff --git a/gnulib-tests/test-perror2.c b/gnulib-tests/test-perror2.c
index 1d14eda..c6214dd 100644
--- a/gnulib-tests/test-perror2.c
+++ b/gnulib-tests/test-perror2.c
@@ -79,9 +79,6 @@ main (void)
errno = -5;
perror ("");
ASSERT (!ferror (stderr));
- ASSERT (msg1 == msg2 || msg1 == msg4 || STREQ (msg1, str1));
- ASSERT (msg2 == msg4 || STREQ (msg2, str2));
- ASSERT (msg3 == msg4 || STREQ (msg3, str3));
ASSERT (STREQ (msg4, str4));
free (str1);
diff --git a/gnulib-tests/test-strerror_r.c b/gnulib-tests/test-strerror_r.c
index b11d6fd..c1dbcf8 100644
--- a/gnulib-tests/test-strerror_r.c
+++ b/gnulib-tests/test-strerror_r.c
@@ -165,9 +165,6 @@ main (void)
strerror_r (EACCES, buf, sizeof buf);
strerror_r (-5, buf, sizeof buf);
- ASSERT (msg1 == msg2 || msg1 == msg4 || STREQ (msg1, str1));
- ASSERT (msg2 == msg4 || STREQ (msg2, str2));
- ASSERT (msg3 == msg4 || STREQ (msg3, str3));
ASSERT (STREQ (msg4, str4));
free (str1);
--
2.25.4

@ -0,0 +1,164 @@
From b9f9ed14bda93ecb407129b69e6476813c250046 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 15 Apr 2020 20:50:32 -0700
Subject: [PATCH] fts: remove NOSTAT_LEAF_OPTIMIZATION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It caused find and du to dump core, and it was useful
only for obsolescent Linux filesystems anyway. Problem reported in:
https://lists.gnu.org/r/bug-gnulib/2020-04/msg00068.html
Quite possibly there is still a serious underlying fts bug with
tight-loop-check and mutating file systems, but if so this patch
should cause the bug to be triggered less often.
* lib/fts.c (enum leaf_optimization): Remove
NOSTAT_LEAF_OPTIMIZATION, as its problematic.
(S_MAGIC_REISERFS, S_MAGIC_XFS): Remove; no longer needed.
(leaf_optimization): Remove special cases for ReiserFS and XFS.
(fts_read): Remove NOSTAT_LEAF_OPTIMIZATION code.
* lib/fts_.h (struct _ftsent.fts_n_dirs_remaining):
Remove. All uses removed.
Upstream-commit: 47bf2cf3184027c1eb9c1dfeea5c5b8b2d69710d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
lib/fts.c | 56 ++++++++-------------------------------------------
lib/fts_.h | 5 -----
2 files changed, 8 insertions(+), 53 deletions(-)
diff --git a/lib/fts.c b/lib/fts.c
index 1093ce5..dfe3fef 100644
--- a/lib/fts.c
+++ b/lib/fts.c
@@ -445,7 +445,6 @@ fts_open (char * const *argv,
if ((parent = fts_alloc(sp, "", 0)) == NULL)
goto mem2;
parent->fts_level = FTS_ROOTPARENTLEVEL;
- parent->fts_n_dirs_remaining = -1;
}
/* The classic fts implementation would call fts_stat with
@@ -634,9 +633,8 @@ fts_close (FTS *sp)
}
/* Minimum link count of a traditional Unix directory. When leaf
- optimization is OK and MIN_DIR_NLINK <= st_nlink, then st_nlink is
- an upper bound on the number of subdirectories (counting "." and
- ".."). */
+ optimization is OK and a directory's st_nlink == MIN_DIR_NLINK,
+ then the directory has no subdirectories. */
enum { MIN_DIR_NLINK = 2 };
/* Whether leaf optimization is OK for a directory. */
@@ -645,12 +643,8 @@ enum leaf_optimization
/* st_nlink is not reliable for this directory's subdirectories. */
NO_LEAF_OPTIMIZATION,
- /* Leaf optimization is OK, but is not useful for avoiding stat calls. */
- OK_LEAF_OPTIMIZATION,
-
- /* Leaf optimization is not only OK: it is useful for avoiding
- stat calls, because dirent.d_type does not work. */
- NOSTAT_LEAF_OPTIMIZATION
+ /* st_nlink == 2 means the directory lacks subdirectories. */
+ OK_LEAF_OPTIMIZATION
};
#if (defined __linux__ || defined __ANDROID__) \
@@ -663,9 +657,7 @@ enum leaf_optimization
# define S_MAGIC_CIFS 0xFF534D42
# define S_MAGIC_NFS 0x6969
# define S_MAGIC_PROC 0x9FA0
-# define S_MAGIC_REISERFS 0x52654973
# define S_MAGIC_TMPFS 0x1021994
-# define S_MAGIC_XFS 0x58465342
# ifdef HAVE___FSWORD_T
typedef __fsword_t fsword;
@@ -786,23 +778,15 @@ dirent_inode_sort_may_be_useful (FTSENT const *p, int dir_fd)
}
/* Given an FTS entry P for a directory with descriptor DIR_FD,
- return true if it is both useful and valid to apply leaf optimization.
- The optimization is useful only for file systems that lack usable
- dirent.d_type info. The optimization is valid if an st_nlink value
- of at least MIN_DIR_NLINK is an upper bound on the number of
- subdirectories of D, counting "." and ".." as subdirectories.
+ return whether it is valid to apply leaf optimization.
+ The optimization is valid if a directory's st_nlink value equal
+ to MIN_DIR_NLINK means the directory has no subdirectories.
DIR_FD is negative if unavailable. */
static enum leaf_optimization
leaf_optimization (FTSENT const *p, int dir_fd)
{
switch (filesystem_type (p, dir_fd))
{
- /* List here the file system types that may lack usable dirent.d_type
- info, yet for which the optimization does apply. */
- case S_MAGIC_REISERFS:
- case S_MAGIC_XFS: /* XFS lacked it until 2013-08-22 commit. */
- return NOSTAT_LEAF_OPTIMIZATION;
-
case 0:
/* Leaf optimization is unsafe if the file system type is unknown. */
FALLTHROUGH;
@@ -1027,26 +1011,7 @@ check_for_dir:
if (p->fts_info == FTS_NSOK)
{
if (p->fts_statp->st_size == FTS_STAT_REQUIRED)
- {
- FTSENT *parent = p->fts_parent;
- if (parent->fts_n_dirs_remaining == 0
- && ISSET(FTS_NOSTAT)
- && ISSET(FTS_PHYSICAL)
- && (leaf_optimization (parent, sp->fts_cwd_fd)
- == NOSTAT_LEAF_OPTIMIZATION))
- {
- /* nothing more needed */
- }
- else
- {
- p->fts_info = fts_stat(sp, p, false);
- if (S_ISDIR(p->fts_statp->st_mode)
- && p->fts_level != FTS_ROOTLEVEL
- && 0 < parent->fts_n_dirs_remaining
- && parent->fts_n_dirs_remaining != (nlink_t) -1)
- parent->fts_n_dirs_remaining--;
- }
- }
+ p->fts_info = fts_stat(sp, p, false);
else
fts_assert (p->fts_statp->st_size == FTS_NO_STAT_REQUIRED);
}
@@ -1830,11 +1795,6 @@ err: memset(sbp, 0, sizeof(struct stat));
}
if (S_ISDIR(sbp->st_mode)) {
- p->fts_n_dirs_remaining
- = ((sbp->st_nlink < MIN_DIR_NLINK
- || p->fts_level <= FTS_ROOTLEVEL)
- ? -1
- : sbp->st_nlink - (ISSET (FTS_SEEDOT) ? 0 : MIN_DIR_NLINK));
if (ISDOT(p->fts_name)) {
/* Command-line "." and ".." are real directories. */
return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT);
diff --git a/lib/fts_.h b/lib/fts_.h
index d40a116..2e76cc4 100644
--- a/lib/fts_.h
+++ b/lib/fts_.h
@@ -227,11 +227,6 @@ typedef struct _ftsent {
size_t fts_namelen; /* strlen(fts_name) */
- /* If not (nlink_t) -1, an upper bound on the number of
- remaining subdirectories of interest. If this becomes
- zero, some work can be avoided. */
- nlink_t fts_n_dirs_remaining;
-
# define FTS_D 1 /* preorder directory */
# define FTS_DC 2 /* directory that causes cycles */
# define FTS_DEFAULT 3 /* none of the above */
--
2.21.1

@ -0,0 +1,153 @@
From 8c022656320592dbad146f5d3a3ae1875f419446 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 5 Mar 2020 17:25:29 -0800
Subject: [PATCH 1/2] ls: restore 8.31 behavior on removed directories
* NEWS: Mention this.
* src/ls.c: Do not include <sys/sycall.h>
(print_dir): Don't worry about whether the directory is removed.
* tests/ls/removed-directory.sh: Adjust to match new (i.e., old)
behavior.
Upstream-commit: 10fcb97bd728f09d4a027eddf8ad2900f0819b0a
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/ls.c | 22 ----------------------
tests/ls/removed-directory.sh | 10 ++--------
2 files changed, 2 insertions(+), 30 deletions(-)
diff --git a/src/ls.c b/src/ls.c
index 9d25f62..850ecc2 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -49,10 +49,6 @@
# include <sys/ptem.h>
#endif
-#ifdef __linux__
-# include <sys/syscall.h>
-#endif
-
#include <stdio.h>
#include <assert.h>
#include <setjmp.h>
@@ -2896,7 +2892,6 @@ print_dir (char const *name, char const *realname, bool command_line_arg)
struct dirent *next;
uintmax_t total_blocks = 0;
static bool first = true;
- bool found_any_entries = false;
errno = 0;
dirp = opendir (name);
@@ -2972,7 +2967,6 @@ print_dir (char const *name, char const *realname, bool command_line_arg)
next = readdir (dirp);
if (next)
{
- found_any_entries = true;
if (! file_ignored (next->d_name))
{
enum filetype type = unknown;
@@ -3018,22 +3012,6 @@ print_dir (char const *name, char const *realname, bool command_line_arg)
if (errno != EOVERFLOW)
break;
}
-#ifdef __linux__
- else if (! found_any_entries)
- {
- /* If readdir finds no directory entries at all, not even "." or
- "..", then double check that the directory exists. */
- if (syscall (SYS_getdents, dirfd (dirp), NULL, 0) == -1
- && errno != EINVAL)
- {
- /* We exclude EINVAL as that pertains to buffer handling,
- and we've passed NULL as the buffer for simplicity.
- ENOENT is returned if appropriate before buffer handling. */
- file_failure (command_line_arg, _("reading directory %s"), name);
- }
- break;
- }
-#endif
else
break;
diff --git a/tests/ls/removed-directory.sh b/tests/ls/removed-directory.sh
index e8c835d..fe8f929 100755
--- a/tests/ls/removed-directory.sh
+++ b/tests/ls/removed-directory.sh
@@ -26,20 +26,14 @@ case $host_triplet in
*) skip_ 'non linux kernel' ;;
esac
-LS_FAILURE=2
-
-cat <<\EOF >exp-err || framework_failure_
-ls: reading directory '.': No such file or directory
-EOF
-
cwd=$(pwd)
mkdir d || framework_failure_
cd d || framework_failure_
rmdir ../d || framework_failure_
-returns_ $LS_FAILURE ls >../out 2>../err || fail=1
+ls >../out 2>../err || fail=1
cd "$cwd" || framework_failure_
compare /dev/null out || fail=1
-compare exp-err err || fail=1
+compare /dev/null err || fail=1
Exit $fail
--
2.21.1
From 847324a0debd9d12062c79e7a7a9d3d8ce76390d Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sat, 7 Mar 2020 10:29:51 -0800
Subject: [PATCH 2/2] ls: improve removed-directory test
* tests/ls/removed-directory.sh: Remove host_triplet test.
Skip this test if one cannot remove the working directory.
From a suggestion by Bernhard Voelker (Bug#39929).
Upstream-commit: 672819c73f2e94e61386dc0584bddf9da860cc26
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
tests/ls/removed-directory.sh | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/tests/ls/removed-directory.sh b/tests/ls/removed-directory.sh
index fe8f929..63b209d 100755
--- a/tests/ls/removed-directory.sh
+++ b/tests/ls/removed-directory.sh
@@ -1,7 +1,7 @@
#!/bin/sh
-# If ls is asked to list a removed directory (e.g. the parent process's
-# current working directory that has been removed by another process), it
-# emits an error message.
+# If ls is asked to list a removed directory (e.g., the parent process's
+# current working directory has been removed by another process), it
+# should not emit an error message merely because the directory is removed.
# Copyright (C) 2020 Free Software Foundation, Inc.
@@ -21,15 +21,10 @@
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ ls
-case $host_triplet in
- *linux*) ;;
- *) skip_ 'non linux kernel' ;;
-esac
-
cwd=$(pwd)
mkdir d || framework_failure_
cd d || framework_failure_
-rmdir ../d || framework_failure_
+rmdir ../d || skip_ "can't remove working directory on this platform"
ls >../out 2>../err || fail=1
cd "$cwd" || framework_failure_
--
2.21.1

@ -0,0 +1,85 @@
From 53c6b01e8e3fd338d7f53e5ff817ef86f9efa852 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Wed, 11 Nov 2020 17:22:33 +0000
Subject: [PATCH] ls: fix crash printing SELinux context for unstatable files
This crash was identified by Cyber Independent Testing Lab:
https://cyber-itl.org/2020/10/28/citl-7000-defects.html
and was introduced with commit v6.9.90-11-g4245876e2
* src/ls.c (gobble_file): Ensure scontext is initialized
in the case where files are not statable.
* tests/ls/selinux-segfault.sh: Renamed from proc-selinux-segfault.sh,
and added test case for broken symlinks.
* tests/local.mk: Adjust for the renamed test.
Upstream-commit: 6fc695cb4a26f09dfeef8b1c24895a707055334e
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/ls.c | 3 +++
tests/local.mk | 2 +-
.../{proc-selinux-segfault.sh => selinux-segfault.sh} | 10 ++++++++--
3 files changed, 12 insertions(+), 3 deletions(-)
rename tests/ls/{proc-selinux-segfault.sh => selinux-segfault.sh} (77%)
diff --git a/src/ls.c b/src/ls.c
index 4acf5f4..8eb483d 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -3412,6 +3412,9 @@ gobble_file (char const *name, enum filetype type, ino_t inode,
provokes an exit status of 1. */
file_failure (command_line_arg,
_("cannot access %s"), full_name);
+
+ f->scontext = UNKNOWN_SECURITY_CONTEXT;
+
if (command_line_arg)
return 0;
diff --git a/tests/local.mk b/tests/local.mk
index 2aeff2b..2441fdc 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -616,7 +616,7 @@ all_tests = \
tests/ls/multihardlink.sh \
tests/ls/no-arg.sh \
tests/ls/no-cap.sh \
- tests/ls/proc-selinux-segfault.sh \
+ tests/ls/selinux-segfault.sh \
tests/ls/quote-align.sh \
tests/ls/readdir-mountpoint-inode.sh \
tests/ls/recursive.sh \
diff --git a/tests/ls/proc-selinux-segfault.sh b/tests/ls/selinux-segfault.sh
similarity index 77%
rename from tests/ls/proc-selinux-segfault.sh
rename to tests/ls/selinux-segfault.sh
index 831a00e..e2b7ef6 100755
--- a/tests/ls/proc-selinux-segfault.sh
+++ b/tests/ls/selinux-segfault.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# ls -l /proc/sys would segfault when built against libselinux1 2.0.15-2+b1
+# Ensure we don't segfault in selinux handling
# Copyright (C) 2008-2020 Free Software Foundation, Inc.
@@ -19,9 +19,15 @@
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ ls
+# ls -l /proc/sys would segfault when built against libselinux1 2.0.15-2+b1
f=/proc/sys
test -r $f || f=.
-
ls -l $f > out || fail=1
+# ls <= 8.32 would segfault when printing
+# the security context of broken symlink targets
+mkdir sedir || framework_failure_
+ln -sf missing sedir/broken || framework_failure_
+returns_ 1 ls -L -R -Z -m sedir > out || fail=1
+
Exit $fail
--
2.26.2

@ -0,0 +1,186 @@
From be77b4ab7cb68fd2daf9de90bd75d844392788ac Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Thu, 25 Mar 2021 11:57:56 +0100
Subject: [PATCH 1/4] ln: fix memory leaks in do_link
* src/ln.c (do_link): Free memory allocated by convert_abs_rel
on all code paths (Bug#47373).
Upstream-commit: 6e98f67758260579d7d44ea5f2df4c82d28c9f58
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/ln.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/src/ln.c b/src/ln.c
index ffa278e..9b52602 100644
--- a/src/ln.c
+++ b/src/ln.c
@@ -229,14 +229,14 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
if (errno != ENOENT)
{
error (0, errno, _("failed to access %s"), quoteaf (dest));
- return false;
+ goto fail;
}
force = false;
}
else if (S_ISDIR (dest_stats.st_mode))
{
error (0, 0, _("%s: cannot overwrite directory"), quotef (dest));
- return false;
+ goto fail;
}
else if (seen_file (dest_set, dest, &dest_stats))
{
@@ -245,7 +245,7 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
error (0, 0,
_("will not overwrite just-created %s with %s"),
quoteaf_n (0, dest), quoteaf_n (1, source));
- return false;
+ goto fail;
}
else
{
@@ -274,7 +274,7 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
{
error (0, 0, _("%s and %s are the same file"),
quoteaf_n (0, source), quoteaf_n (1, dest));
- return false;
+ goto fail;
}
}
@@ -285,7 +285,10 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
fprintf (stderr, _("%s: replace %s? "),
program_name, quoteaf (dest));
if (!yesno ())
- return true;
+ {
+ free(rel_source);
+ return true;
+ }
}
if (backup_type != no_backups)
@@ -304,7 +307,7 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
{
error (0, rename_errno, _("cannot backup %s"),
quoteaf (dest));
- return false;
+ goto fail;
}
force = false;
}
@@ -397,6 +400,10 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
free (backup_base);
free (rel_source);
return link_errno <= 0;
+
+fail:
+ free (rel_source);
+ return false;
}
void
--
2.26.3
From c051578e69bd8acf8f8a469566ae34e855345532 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 25 Mar 2021 09:15:50 -0700
Subject: [PATCH 2/4] maint: indenting
* src/ln.c: Fix indenting.
Upstream-commit: 8980b7c898046d899646da01c296fd15f0cced21
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/ln.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/ln.c b/src/ln.c
index 9b52602..8881d6a 100644
--- a/src/ln.c
+++ b/src/ln.c
@@ -286,7 +286,7 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
program_name, quoteaf (dest));
if (!yesno ())
{
- free(rel_source);
+ free (rel_source);
return true;
}
}
@@ -304,7 +304,7 @@ do_link (char const *source, int destdir_fd, char const *dest_base,
free (backup_base);
backup_base = NULL;
if (rename_errno != ENOENT)
- {
+ {
error (0, rename_errno, _("cannot backup %s"),
quoteaf (dest));
goto fail;
--
2.26.3
From 0d6a4afe5bee0e397fb2fc3b205a29b32a69af9d Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 25 Mar 2021 09:16:36 -0700
Subject: [PATCH 3/4] hostname: use puts
* src/hostname.c (main): Prefer puts to printf "%s\n".
Upstream-commit: c7a588ac3632aae21642d4d568497177950d36bf
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/hostname.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/hostname.c b/src/hostname.c
index 0b5c0cf..62cc98c 100644
--- a/src/hostname.c
+++ b/src/hostname.c
@@ -103,7 +103,7 @@ main (int argc, char **argv)
hostname = xgethostname ();
if (hostname == NULL)
die (EXIT_FAILURE, errno, _("cannot determine hostname"));
- printf ("%s\n", hostname);
+ puts (hostname);
}
if (optind + 1 < argc)
--
2.26.3
From 19c98d2080251edbaad9fb271aa10ad34f953500 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 25 Mar 2021 11:20:18 -0700
Subject: [PATCH 4/4] hostname: pacify valgrind
* src/hostname.c (main) [IF_LINT]: Free hostname (Bug#47384).
Upstream-commit: 4698e284f37844bc9b9f63f00eb556ccaaed5030
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/hostname.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/hostname.c b/src/hostname.c
index 62cc98c..7210248 100644
--- a/src/hostname.c
+++ b/src/hostname.c
@@ -104,6 +104,7 @@ main (int argc, char **argv)
if (hostname == NULL)
die (EXIT_FAILURE, errno, _("cannot determine hostname"));
puts (hostname);
+ IF_LINT (free (hostname));
}
if (optind + 1 < argc)
--
2.26.3

@ -0,0 +1,104 @@
From 09400b7f7f48d8eedc0df55de8073a43bc0aac96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Tue, 27 Oct 2020 20:15:43 +0000
Subject: [PATCH 1/2] stat,tail: sync file system constants from the linux
kernel
* src/stat.c: Add magic constants for "devmem", and
"zonefs" file systems.
* NEWS: Mention the improvement.
Upstream-commit: ff80b6b0a0507e24f39cc1aad09d147f5187430b
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/stat.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/stat.c b/src/stat.c
index 5012622..8cd69da 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -347,6 +347,8 @@ human_fstype (STRUCT_STATVFS const *statfsbuf)
return "debugfs";
case S_MAGIC_DEVFS: /* 0x1373 local */
return "devfs";
+ case S_MAGIC_DEVMEM: /* 0x454D444D local */
+ return "devmem";
case S_MAGIC_DEVPTS: /* 0x1CD1 local */
return "devpts";
case S_MAGIC_DMA_BUF: /* 0x444D4142 local */
@@ -549,6 +551,8 @@ human_fstype (STRUCT_STATVFS const *statfsbuf)
return "z3fold";
case S_MAGIC_ZFS: /* 0x2FC12FC1 local */
return "zfs";
+ case S_MAGIC_ZONEFS: /* 0x5A4F4653 local */
+ return "zonefs";
case S_MAGIC_ZSMALLOC: /* 0x58295829 local */
return "zsmallocfs";
--
2.25.4
From d5948fd41013dfe4d2d10083111821667977c6d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Tue, 27 Oct 2020 21:04:14 +0000
Subject: [PATCH 2/2] mountlist: recognize more file system types as remote
Sync "remote" file systems from stat.c in coreutils.
Note we only consider file systems that do not use host:resource
mount source. I.e. those that don't generally use a colon when
mounting, as that case is already considered. Searching for
"<fstype> /etc/fstab" was informative for identifying these.
The full list of "remote" file systems in coreutils is currently:
acfs afs ceph cifs coda fhgfs fuseblk fusectl
gfs gfs2 gpfs ibrix k-afs lustre novell nfs nfsd
ocfs2 panfs prl_fs smb smb2 snfs vboxsf vmhgfs vxfs
Note also we do not include virtual machine file systems,
as even though they're remote to the current kernel,
they are generally not distributed to separate hosts.
* lib/mountlist.c (ME_REMOTE): Sync previously unconsidered
"remote" file systems from stat.c in coreutils.
Upstream-commit: dd1fc46be12d671c1a9d9dc5a6fa8c766e99aa2f
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
lib/mountlist.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/mountlist.c b/lib/mountlist.c
index 7abe024..e0227b7 100644
--- a/lib/mountlist.c
+++ b/lib/mountlist.c
@@ -221,8 +221,9 @@ me_remote (char const *fs_name, char const *fs_type _GL_UNUSED)
#ifndef ME_REMOTE
/* A file system is "remote" if its Fs_name contains a ':'
or if (it is of type (smbfs or cifs) and its Fs_name starts with '//')
- or if it is of type (afs or auristorfs)
- or Fs_name is equal to "-hosts" (used by autofs to mount remote fs). */
+ or if it is of any other of the listed types
+ or Fs_name is equal to "-hosts" (used by autofs to mount remote fs).
+ "VM" file systems like prl_fs or vboxsf are not considered remote here. */
# define ME_REMOTE(Fs_name, Fs_type) \
(strchr (Fs_name, ':') != NULL \
|| ((Fs_name)[0] == '/' \
@@ -230,8 +231,15 @@ me_remote (char const *fs_name, char const *fs_type _GL_UNUSED)
&& (strcmp (Fs_type, "smbfs") == 0 \
|| strcmp (Fs_type, "smb3") == 0 \
|| strcmp (Fs_type, "cifs") == 0)) \
+ || strcmp (Fs_type, "acfs") == 0 \
|| strcmp (Fs_type, "afs") == 0 \
+ || strcmp (Fs_type, "coda") == 0 \
|| strcmp (Fs_type, "auristorfs") == 0 \
+ || strcmp (Fs_type, "fhgfs") == 0 \
+ || strcmp (Fs_type, "gpfs") == 0 \
+ || strcmp (Fs_type, "ibrix") == 0 \
+ || strcmp (Fs_type, "ocfs2") == 0 \
+ || strcmp (Fs_type, "vxfs") == 0 \
|| strcmp ("-hosts", Fs_name) == 0)
#endif
--
2.25.4

@ -0,0 +1,109 @@
From 11b37b65d08c2a8b6d967fd866ebbdbe7e864949 Mon Sep 17 00:00:00 2001
From: Nishant Nayan <nishant.nayan@oracle.com>
Date: Thu, 26 Nov 2020 14:35:17 +0000
Subject: [PATCH] rm: do not skip files upon failure to remove an empty dir
When removing a directory fails for some reason, and that directory
is empty, the rm_fts code gets the return value of the excise call
confused with the return value of its earlier call to prompt,
causing fts_skip_tree to be called again and the next file
that rm would otherwise have deleted to survive.
* src/remove.c (rm_fts): Ensure we only skip a single fts entry,
when processing empty dirs. I.e. only skip the entry
having successfully removed it.
* tests/rm/empty-immutable-skip.sh: New root-only test.
* tests/local.mk: Add it.
* NEWS: Mention the bug fix.
Fixes https://bugs.gnu.org/44883
Upstream-commit: 6bf108358a6104ec1c694c9530b3cd56b95f4b57
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/remove.c | 3 ++-
tests/local.mk | 1 +
tests/rm/empty-immutable-skip.sh | 46 ++++++++++++++++++++++++++++++++
3 files changed, 49 insertions(+), 1 deletion(-)
create mode 100755 tests/rm/empty-immutable-skip.sh
diff --git a/src/remove.c b/src/remove.c
index 2d40c55..adf9489 100644
--- a/src/remove.c
+++ b/src/remove.c
@@ -506,7 +506,8 @@ rm_fts (FTS *fts, FTSENT *ent, struct rm_options const *x)
/* When we know (from prompt when in interactive mode)
that this is an empty directory, don't prompt twice. */
s = excise (fts, ent, x, true);
- fts_skip_tree (fts, ent);
+ if (s == RM_OK)
+ fts_skip_tree (fts, ent);
}
if (s != RM_OK)
diff --git a/tests/local.mk b/tests/local.mk
index 5f7f775..2aeff2b 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -136,6 +136,7 @@ all_root_tests = \
tests/rm/no-give-up.sh \
tests/rm/one-file-system.sh \
tests/rm/read-only.sh \
+ tests/rm/empty-immutable-skip.sh \
tests/tail-2/append-only.sh \
tests/tail-2/end-of-device.sh \
tests/touch/now-owned-by-other.sh
diff --git a/tests/rm/empty-immutable-skip.sh b/tests/rm/empty-immutable-skip.sh
new file mode 100755
index 0000000..c91d8d4
--- /dev/null
+++ b/tests/rm/empty-immutable-skip.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# Ensure that rm does not skip extra files after hitting an empty immutable dir.
+# Requires root access to do chattr +i, as well as an ext[23] or xfs file system
+
+# Copyright (C) 2020 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ rm
+require_root_
+
+# These simple one-file operations are expected to work even in the
+# presence of this bug, and we need them to set up the rest of the test.
+chattr_i_works=1
+touch f
+chattr +i f 2>/dev/null || chattr_i_works=0
+rm f 2>/dev/null
+test -f f || chattr_i_works=0
+chattr -i f 2>/dev/null || chattr_i_works=0
+rm f 2>/dev/null || chattr_i_works=0
+test -f f && chattr_i_works=0
+
+if test $chattr_i_works = 0; then
+ skip_ "chattr +i doesn't work on this file system"
+fi
+
+mkdir empty || framework_failure_
+touch x y || framework_failure_
+chattr +i empty || framework_failure_
+rm -rf empty x y
+{ test -f x || test -f y; } && fail=1
+chattr -i empty
+
+Exit $fail
--
2.26.2

@ -0,0 +1,100 @@
From bb0e7fabcaed9a7e71e30f05e638e9f243cdb13e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Mon, 25 Jan 2021 14:12:48 +0000
Subject: [PATCH] split: fix --number=K/N to output correct part of file
This functionality regressed with the adjustments
in commit v8.25-4-g62e7af032
* src/split.c (bytes_chunk_extract): Account for already read data
when seeking into the file.
* tests/split/b-chunk.sh: Use the hidden ---io-blksize option,
to test this functionality.
Fixes https://bugs.gnu.org/46048
Upstream-commit: bb21daa125aeb4e32546309d370918ca47e612db
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/split.c | 2 +-
tests/split/b-chunk.sh | 45 ++++++++++++++++++++++++------------------
2 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/split.c b/src/split.c
index 09e610b..19248f6 100644
--- a/src/split.c
+++ b/src/split.c
@@ -1001,7 +1001,7 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
}
else
{
- if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+ if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
die (EXIT_FAILURE, errno, "%s", quotef (infile));
initial_read = SIZE_MAX;
}
diff --git a/tests/split/b-chunk.sh b/tests/split/b-chunk.sh
index 864ce55..39a6799 100755
--- a/tests/split/b-chunk.sh
+++ b/tests/split/b-chunk.sh
@@ -35,32 +35,39 @@ split -e -n 10 /dev/null || fail=1
returns_ 1 stat x?? 2>/dev/null || fail=1
printf '1\n2\n3\n4\n5\n' > input || framework_failure_
+printf '1\n2' > exp-1 || framework_failure_
+printf '\n3\n' > exp-2 || framework_failure_
+printf '4\n5\n' > exp-3 || framework_failure_
for file in input /proc/version /sys/kernel/profiling; do
test -f $file || continue
- split -n 3 $file > out || fail=1
- split -n 1/3 $file > b1 || fail=1
- split -n 2/3 $file > b2 || fail=1
- split -n 3/3 $file > b3 || fail=1
+ for blksize in 1 2 4096; do
+ if ! test "$file" = 'input'; then
+ # For /proc like files we must be able to read all
+ # into the internal buffer to be able to determine size.
+ test "$blksize" = 4096 || continue
+ fi
- case $file in
- input)
- printf '1\n2' > exp-1
- printf '\n3\n' > exp-2
- printf '4\n5\n' > exp-3
+ split -n 3 ---io-blksize=$blksize $file > out || fail=1
+ split -n 1/3 ---io-blksize=$blksize $file > b1 || fail=1
+ split -n 2/3 ---io-blksize=$blksize $file > b2 || fail=1
+ split -n 3/3 ---io-blksize=$blksize $file > b3 || fail=1
- compare exp-1 xaa || fail=1
- compare exp-2 xab || fail=1
- compare exp-3 xac || fail=1
- ;;
- esac
+ case $file in
+ input)
+ compare exp-1 xaa || fail=1
+ compare exp-2 xab || fail=1
+ compare exp-3 xac || fail=1
+ ;;
+ esac
- compare xaa b1 || fail=1
- compare xab b2 || fail=1
- compare xac b3 || fail=1
- cat xaa xab xac | compare - $file || fail=1
- test -f xad && fail=1
+ compare xaa b1 || fail=1
+ compare xab b2 || fail=1
+ compare xac b3 || fail=1
+ cat xaa xab xac | compare - $file || fail=1
+ test -f xad && fail=1
+ done
done
Exit $fail
--
2.26.2

@ -0,0 +1,32 @@
From b87f944c87ffe04db6e5476b007a8e4979de933d Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Thu, 18 Feb 2021 11:18:04 +0100
Subject: [PATCH] stat,tail: add support for the exfat file system
Bug: https://bugzilla.redhat.com/1921427
* src/stat.c (human_fstype): Add case for the 'exfat' file system type.
Fixes https://bugs.gnu.org/46613
Upstream-commit: a5e0d8f387e81e854427addbbaf2504541bbf4b9
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/stat.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/stat.c b/src/stat.c
index 8cd69da..4e1c8e3 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -361,6 +361,8 @@ human_fstype (STRUCT_STATVFS const *statfsbuf)
return "efs";
case S_MAGIC_EROFS_V1: /* 0xE0F5E1E2 local */
return "erofs";
+ case S_MAGIC_EXFAT: /* 0x2011BAB0 local */
+ return "exfat";
case S_MAGIC_EXFS: /* 0x45584653 local */
return "exfs";
case S_MAGIC_EXOFS: /* 0x5DF5 local */
--
2.26.2

@ -0,0 +1,181 @@
From c7a04cef4075da864a3468e63a5bb79334d8f556 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sat, 26 Jun 2021 18:23:52 -0700
Subject: [PATCH] tail: use poll, not select
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This fixes an unlikely stack out-of-bounds write reported by
Stepan Broz via Kamil Dudka (Bug#49209).
* src/tail.c: Do not include <sys/select.h>.
[!_AIX]: Include poll.h.
(check_output_alive) [!_AIX]: Use poll instead of select.
(tail_forever_inotify): Likewise. Simplify logic, as there is no
need for a while (len <= evbuf_off) loop.
Upstream-commit: da0d448bca62c6305fc432f67e2c5ccc2da75346
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/tail.c | 100 +++++++++++++++++++++--------------------------------
1 file changed, 39 insertions(+), 61 deletions(-)
diff --git a/src/tail.c b/src/tail.c
index 1c88723..5b4f21a 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -28,12 +28,9 @@
#include <stdio.h>
#include <assert.h>
#include <getopt.h>
-#include <sys/select.h>
+#include <poll.h>
#include <sys/types.h>
#include <signal.h>
-#ifdef _AIX
-# include <poll.h>
-#endif
#include "system.h"
#include "argmatch.h"
@@ -351,27 +348,12 @@ check_output_alive (void)
if (! monitor_output)
return;
-#ifdef _AIX
- /* select on AIX was seen to give a readable event immediately. */
struct pollfd pfd;
pfd.fd = STDOUT_FILENO;
pfd.events = POLLERR;
if (poll (&pfd, 1, 0) >= 0 && (pfd.revents & POLLERR))
die_pipe ();
-#else
- struct timeval delay;
- delay.tv_sec = delay.tv_usec = 0;
-
- fd_set rfd;
- FD_ZERO (&rfd);
- FD_SET (STDOUT_FILENO, &rfd);
-
- /* readable event on STDOUT is equivalent to POLLERR,
- and implies an error condition on output like broken pipe. */
- if (select (STDOUT_FILENO + 1, &rfd, NULL, NULL, &delay) == 1)
- die_pipe ();
-#endif
}
static bool
@@ -1612,7 +1594,7 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
/* Wait for inotify events and handle them. Events on directories
ensure that watched files can be re-added when following by name.
This loop blocks on the 'safe_read' call until a new event is notified.
- But when --pid=P is specified, tail usually waits via the select. */
+ But when --pid=P is specified, tail usually waits via poll. */
while (1)
{
struct File_spec *fspec;
@@ -1629,54 +1611,51 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
return false;
}
- /* When watching a PID, ensure that a read from WD will not block
- indefinitely. */
- while (len <= evbuf_off)
+ if (len <= evbuf_off)
{
- struct timeval delay; /* how long to wait for file changes. */
+ /* Poll for inotify events. When watching a PID, ensure
+ that a read from WD will not block indefinitely.
+ If MONITOR_OUTPUT, also poll for a broken output pipe. */
- if (pid)
+ int file_change;
+ struct pollfd pfd[2];
+ do
{
- if (writer_is_dead)
- exit (EXIT_SUCCESS);
+ /* How many ms to wait for changes. -1 means wait forever. */
+ int delay = -1;
- writer_is_dead = (kill (pid, 0) != 0 && errno != EPERM);
-
- if (writer_is_dead)
- delay.tv_sec = delay.tv_usec = 0;
- else
+ if (pid)
{
- delay.tv_sec = (time_t) sleep_interval;
- delay.tv_usec = 1000000 * (sleep_interval - delay.tv_sec);
+ if (writer_is_dead)
+ exit (EXIT_SUCCESS);
+
+ writer_is_dead = (kill (pid, 0) != 0 && errno != EPERM);
+
+ if (writer_is_dead || sleep_interval <= 0)
+ delay = 0;
+ else if (sleep_interval < INT_MAX / 1000 - 1)
+ {
+ /* delay = ceil (sleep_interval * 1000), sans libm. */
+ double ddelay = sleep_interval * 1000;
+ delay = ddelay;
+ delay += delay < ddelay;
+ }
}
+
+ pfd[0].fd = wd;
+ pfd[0].events = POLLIN;
+ pfd[1].fd = STDOUT_FILENO;
+ pfd[1].events = pfd[1].revents = 0;
+ file_change = poll (pfd, monitor_output + 1, delay);
}
+ while (file_change == 0);
- fd_set rfd;
- FD_ZERO (&rfd);
- FD_SET (wd, &rfd);
- if (monitor_output)
- FD_SET (STDOUT_FILENO, &rfd);
-
- int file_change = select (MAX (wd, STDOUT_FILENO) + 1,
- &rfd, NULL, NULL, pid ? &delay: NULL);
-
- if (file_change == 0)
- continue;
- else if (file_change == -1)
- die (EXIT_FAILURE, errno,
- _("error waiting for inotify and output events"));
- else if (FD_ISSET (STDOUT_FILENO, &rfd))
- {
- /* readable event on STDOUT is equivalent to POLLERR,
- and implies an error on output like broken pipe. */
- die_pipe ();
- }
- else
- break;
- }
+ if (file_change < 0)
+ die (EXIT_FAILURE, errno,
+ _("error waiting for inotify and output events"));
+ if (pfd[1].revents)
+ die_pipe ();
- if (len <= evbuf_off)
- {
len = safe_read (wd, evbuf, evlen);
evbuf_off = 0;
@@ -2437,8 +2416,7 @@ main (int argc, char **argv)
if (forever && ignore_fifo_and_pipe (F, n_files))
{
/* If stdout is a fifo or pipe, then monitor it
- so that we exit if the reader goes away.
- Note select() on a regular file is always readable. */
+ so that we exit if the reader goes away. */
struct stat out_stat;
if (fstat (STDOUT_FILENO, &out_stat) < 0)
die (EXIT_FAILURE, errno, _("standard output"));
--
2.31.1

@ -0,0 +1,99 @@
From fc6318841f008dadc1e7c93e539f10d24aa83e90 Mon Sep 17 00:00:00 2001
From: Bernhard Voelker <mail@bernhard-voelker.de>
Date: Wed, 21 Apr 2021 00:12:00 +0200
Subject: [PATCH 1/2] tests: fix FP in ls/stat-free-color.sh
On newer systems like Fedora 34 and openSUSE Tumbleweed, ls(1) calls
newfstatat(STDOUT_FILENO, ...), but only when there is something to
output.
* tests/ls/stat-free-color.sh: Add -a option to the reference invocation
of ls, thus enforcing something gets output.
Upstream-commit: b7091093bb6505c33279f9bc940b2e94763a6e5d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
tests/ls/stat-free-color.sh | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/tests/ls/stat-free-color.sh b/tests/ls/stat-free-color.sh
index 00942f7..87bed1c 100755
--- a/tests/ls/stat-free-color.sh
+++ b/tests/ls/stat-free-color.sh
@@ -56,12 +56,14 @@ eval $(dircolors -b color-without-stat)
# The system may perform additional stat-like calls before main.
# Furthermore, underlying library functions may also implicitly
# add an extra stat call, e.g. opendir since glibc-2.21-360-g46f894d.
-# To avoid counting those, first get a baseline count for running
-# ls with one empty directory argument. Then, compare that with the
-# invocation under test.
+# Finally, ls(1) makes a stat call for stdout, but only in the case
+# when there is something to output.
+# To get the comparison right, first get a baseline count for running
+# 'ls -a' with one empty directory argument. Then, compare that with
+# the invocation under test.
mkdir d || framework_failure_
-strace -q -o log1 -e $stats ls --color=always d || fail=1
+strace -q -o log1 -e $stats ls -a --color=always d || fail=1
n_stat1=$(grep -vF '+++' log1 | wc -l) || framework_failure_
test $n_stat1 = 0 \
--
2.31.1
From c16ca58f17a088e925c0d1c4015c48332c380a00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Sun, 9 May 2021 23:41:00 +0100
Subject: [PATCH 2/2] tests: fix tests/cp/sparse-2.sh false failure on some
systems
* tests/cp/sparse-2.sh: Double check cp --sparse=always,
with dd conv=sparse, in the case where the former didn't
create a sparse file. Now that this test is being newly run
on macos, we're seeing a failure due to seek() not creating
holes on apfs unless the size is >= 16MiB.
Upstream-commit: 6b499720fecae935dc00e236d6aefe94d9010482
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
tests/cp/fiemap-2.sh | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/tests/cp/fiemap-2.sh b/tests/cp/fiemap-2.sh
index 548a376..e20ce54 100755
--- a/tests/cp/fiemap-2.sh
+++ b/tests/cp/fiemap-2.sh
@@ -17,7 +17,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
-print_ver_ cp
+print_ver_ cp stat dd
# Require a fiemap-enabled FS.
touch fiemap_chk # check a file rather than current dir for best coverage
@@ -46,10 +46,17 @@ dd bs=1k seek=1 of=k count=255 < /dev/zero || framework_failure_
# cp should detect the all-zero blocks and convert some of them to holes.
# How many it detects/converts currently depends on io_blksize.
-# Currently, on my F14/ext4 desktop, this K starts off with size 256KiB,
+# Currently, on my F14/ext4 desktop, this K file starts off with size 256KiB,
# (note that the K in the preceding test starts off with size 4KiB).
# cp from coreutils-8.9 with --sparse=always reduces the size to 32KiB.
cp --sparse=always k k2 || fail=1
-test $(stat -c %b k2) -lt $(stat -c %b k) || fail=1
+if test $(stat -c %b k2) -ge $(stat -c %b k); then
+ # If not sparse, then double check by creating with dd
+ # as we're not guaranteed that seek will create a hole.
+ # apfs on darwin 19.2.0 for example was seen to not to create holes < 16MiB.
+ hole_size=$(stat -c %o k2) || framework_failure_
+ dd if=k of=k2.dd bs=$hole_size conv=sparse || framework_failure_
+ test $(stat -c %b k2) -eq $(stat -c %b k2.dd) || fail=1
+fi
Exit $fail
--
2.31.1

@ -0,0 +1,13 @@
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 400e135..47e4480 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -10074,6 +10074,8 @@ incorrect. @xref{Directory Setuid and Setgid}, for how the
set-user-ID and set-group-ID bits of directories are inherited unless
overridden in this way.
+Note: The @option{--mode},@option{-m} option only applies to the right-most directories listed on the command line. When combined with @option{--parents}, @option{-p} option, any parent directories are created with @samp{u+wx} modified by umask.
+
@item -p
@itemx --parents
@opindex -p

@ -0,0 +1,87 @@
From f4422844dbcd839ce486bcbc15b7bd5b72c9198d Mon Sep 17 00:00:00 2001
From: Rohan Sable <rsable@redhat.com>
Date: Mon, 7 Mar 2022 14:14:13 +0000
Subject: [PATCH 1/2] ls: avoid triggering automounts
statx() has different defaults wrt automounting
compared to stat() or lstat(), so explicitly
set the AT_NO_AUTOMOUNT flag to suppress that behavior,
and avoid unintended operations or potential errors.
* src/ls.c (do_statx): Pass AT_NO_AUTOMOUNT to avoid this behavior.
Fixes https://bugs.gnu.org/54286
Signed-off-by: Rohan Sable <rsable@redhat.com>
Upstream-commit: 85c975df2c25bd799370b04bb294e568e001102f
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/ls.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/ls.c b/src/ls.c
index 1047801..fe0e9f8 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -1175,7 +1175,7 @@ do_statx (int fd, char const *name, struct stat *st, int flags,
{
struct statx stx;
bool want_btime = mask & STATX_BTIME;
- int ret = statx (fd, name, flags, mask, &stx);
+ int ret = statx (fd, name, flags | AT_NO_AUTOMOUNT, mask, &stx);
if (ret >= 0)
{
statx_to_stat (&stx, st);
--
2.34.1
From 3d227f9e4f3fe806064721e4b9451ee06526bc80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Mon, 7 Mar 2022 23:29:20 +0000
Subject: [PATCH 2/2] stat: only automount with --cached=never
Revert to the default behavior before the introduction of statx().
* src/stat.c (do_stat): Set AT_NO_AUTOMOUNT without --cached=never.
* doc/coreutils.texi (stat invocation): Mention the automount
behavior with --cached=never.
Fixes https://bugs.gnu.org/54287
Upstream-commit: 92cb8427c537f37edd43c5cef1909585201372ab
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
doc/coreutils.texi | 1 +
src/stat.c | 3 +++
2 files changed, 4 insertions(+)
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 19b535c..0f5c16a 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -12564,6 +12564,7 @@ Always read the already cached attributes if available.
@item never
Always sychronize with the latest file system attributes.
+This also mounts automounted files.
@item default
Leave the caching behavior to the underlying file system.
diff --git a/src/stat.c b/src/stat.c
index 0c34501..803340a 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -1381,6 +1381,9 @@ do_stat (char const *filename, char const *format, char const *format2)
else if (force_sync)
flags |= AT_STATX_FORCE_SYNC;
+ if (! force_sync)
+ flags |= AT_NO_AUTOMOUNT;
+
fd = statx (fd, pathname, flags, format_to_mask (format), &stx);
if (fd < 0)
{
--
2.34.1

@ -0,0 +1,63 @@
# skip everything for non-interactive shells
if (! $?prompt) exit
# color-ls initialization
if ( $?USER_LS_COLORS ) then
if ( "$USER_LS_COLORS" != "" ) then
#when USER_LS_COLORS defined do not override user
#specified LS_COLORS and use them
goto finish
endif
endif
alias ll 'ls -l'
alias l. 'ls -d .*'
set COLORS=/etc/DIR_COLORS
if ($?TERM) then
if ( -e "/etc/DIR_COLORS.$TERM" ) then
set COLORS="/etc/DIR_COLORS.$TERM"
endif
endif
if ( -f ~/.dircolors ) set COLORS=~/.dircolors
if ( -f ~/.dir_colors ) set COLORS=~/.dir_colors
if ($?TERM) then
if ( -f ~/.dircolors."$TERM" ) set COLORS=~/.dircolors."$TERM"
if ( -f ~/.dir_colors."$TERM" ) set COLORS=~/.dir_colors."$TERM"
endif
set INCLUDE="`/usr/bin/cat "$COLORS" | /usr/bin/grep '^INCLUDE' | /usr/bin/cut -d ' ' -f2-`"
if ( ! -e "$COLORS" ) exit
set _tmp="`/usr/bin/mktemp .colorlsXXX -q --tmpdir=/tmp`"
#if mktemp fails, exit when include was active, otherwise use $COLORS file
if ( "$_tmp" == '' ) then
if ( "$INCLUDE" == '' ) then
eval "`/usr/bin/dircolors -c $COLORS`"
endif
goto cleanup
endif
if ( "$INCLUDE" != '' ) /usr/bin/cat "$INCLUDE" >> $_tmp
/usr/bin/grep -v '^INCLUDE' "$COLORS" >> $_tmp
eval "`/usr/bin/dircolors -c $_tmp`"
/usr/bin/rm -f $_tmp
if ( "$LS_COLORS" == '' ) exit
cleanup:
set color_none=`/usr/bin/sed -n '/^COLOR.*none/Ip' < $COLORS`
if ( "$color_none" != '' ) then
unset color_none
exit
endif
unset color_none
unset _tmp
unset INCLUDE
unset COLORS
finish:
alias ll 'ls -l --color=auto'
alias l. 'ls -d .* --color=auto'
alias ls 'ls --color=auto'

@ -0,0 +1,53 @@
# color-ls initialization
# Skip all for noninteractive shells.
[ ! -t 0 ] && return
#when USER_LS_COLORS defined do not override user LS_COLORS, but use them.
if [ -z "$USER_LS_COLORS" ]; then
alias ll='ls -l' 2>/dev/null
alias l.='ls -d .*' 2>/dev/null
INCLUDE=
COLORS=
for colors in "$HOME/.dir_colors.$TERM" "$HOME/.dircolors.$TERM" \
"$HOME/.dir_colors" "$HOME/.dircolors"; do
[ -e "$colors" ] && COLORS="$colors" && \
INCLUDE="`/usr/bin/cat "$COLORS" | /usr/bin/grep '^INCLUDE' | /usr/bin/cut -d ' ' -f2-`" && \
break
done
[ -z "$COLORS" ] && [ -e "/etc/DIR_COLORS.$TERM" ] && \
COLORS="/etc/DIR_COLORS.$TERM"
[ -z "$COLORS" ] && [ -e "/etc/DIR_COLORS" ] && \
COLORS="/etc/DIR_COLORS"
# Existence of $COLORS already checked above.
[ -n "$COLORS" ] || return
if [ -e "$INCLUDE" ];
then
TMP="`/usr/bin/mktemp .colorlsXXX -q --tmpdir=/tmp`"
[ -z "$TMP" ] && return
/usr/bin/cat "$INCLUDE" >> $TMP
/usr/bin/grep -v '^INCLUDE' "$COLORS" >> $TMP
eval "`/usr/bin/dircolors --sh $TMP 2>/dev/null`"
/usr/bin/rm -f $TMP
else
eval "`/usr/bin/dircolors --sh $COLORS 2>/dev/null`"
fi
[ -z "$LS_COLORS" ] && return
/usr/bin/grep -qi "^COLOR.*none" $COLORS >/dev/null 2>/dev/null && return
fi
unset TMP COLORS INCLUDE
alias ll='ls -l --color=auto' 2>/dev/null
alias l.='ls -d .* --color=auto' 2>/dev/null
alias ls='ls --color=auto' 2>/dev/null

@ -0,0 +1,187 @@
From 6e36198f10a2f63b89c89ebb5d5c185b20fb3a63 Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Mon, 29 Mar 2010 17:20:34 +0000
Subject: [PATCH] coreutils-df-direct.patch
---
doc/coreutils.texi | 7 ++++++
src/df.c | 34 ++++++++++++++++++++++++++--
tests/df/direct.sh | 55 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 94 insertions(+), 2 deletions(-)
create mode 100755 tests/df/direct.sh
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 5b9a597..6810c15 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -11898,6 +11898,13 @@ some systems (notably SunOS), doing this yields more up to date results,
but in general this option makes @command{df} much slower, especially when
there are many or very busy file systems.
+@item --direct
+@opindex --direct
+@cindex direct statfs for a file
+Do not resolve mount point and show statistics directly for a file. It can be
+especially useful for NFS mount points if there is a boundary between two
+storage policies behind the mount point.
+
@item --total
@opindex --total
@cindex grand total of disk size, usage and available space
diff --git a/src/df.c b/src/df.c
index 48025b9..c8efa5b 100644
--- a/src/df.c
+++ b/src/df.c
@@ -125,6 +125,9 @@ static bool print_type;
/* If true, print a grand total at the end. */
static bool print_grand_total;
+/* If true, show statistics for a file instead of mount point. */
+static bool direct_statfs;
+
/* Grand total data. */
static struct fs_usage grand_fsu;
@@ -252,13 +255,15 @@ enum
NO_SYNC_OPTION = CHAR_MAX + 1,
SYNC_OPTION,
TOTAL_OPTION,
- OUTPUT_OPTION
+ OUTPUT_OPTION,
+ DIRECT_OPTION
};
static struct option const long_options[] =
{
{"all", no_argument, NULL, 'a'},
{"block-size", required_argument, NULL, 'B'},
+ {"direct", no_argument, NULL, DIRECT_OPTION},
{"inodes", no_argument, NULL, 'i'},
{"human-readable", no_argument, NULL, 'h'},
{"si", no_argument, NULL, 'H'},
@@ -561,7 +566,10 @@ get_header (void)
for (col = 0; col < ncolumns; col++)
{
char *cell = NULL;
- char const *header = _(columns[col]->caption);
+ char const *header = (columns[col]->field == TARGET_FIELD
+ && direct_statfs)?
+ _("File") :
+ _(columns[col]->caption);
if (columns[col]->field == SIZE_FIELD
&& (header_mode == DEFAULT_MODE
@@ -1464,6 +1472,17 @@ get_point (const char *point, const struct stat *statp)
static void
get_entry (char const *name, struct stat const *statp)
{
+ if (direct_statfs)
+ {
+ char *resolved = canonicalize_file_name (name);
+ if (resolved)
+ {
+ get_dev (NULL, resolved, name, NULL, NULL, false, false, NULL, false);
+ free (resolved);
+ return;
+ }
+ }
+
if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode))
&& get_disk (name))
return;
@@ -1534,6 +1553,7 @@ or all file systems by default.\n\
-B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\
'-BM' prints sizes in units of 1,048,576 bytes;\n\
see SIZE format below\n\
+ --direct show statistics for a file instead of mount point\n\
-h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\
-H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\
"), stdout);
@@ -1624,6 +1644,9 @@ main (int argc, char **argv)
xstrtol_fatal (e, oi, c, long_options, optarg);
}
break;
+ case DIRECT_OPTION:
+ direct_statfs = true;
+ break;
case 'i':
if (header_mode == OUTPUT_MODE)
{
@@ -1720,6 +1743,13 @@ main (int argc, char **argv)
}
}
+ if (direct_statfs && show_local_fs)
+ {
+ error (0, 0, _("options --direct and --local (-l) are mutually "
+ "exclusive"));
+ usage (EXIT_FAILURE);
+ }
+
if (human_output_opts == -1)
{
if (posix_format)
diff --git a/tests/df/direct.sh b/tests/df/direct.sh
new file mode 100755
index 0000000..8e4cfb8
--- /dev/null
+++ b/tests/df/direct.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# Ensure "df --direct" works as documented
+
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ df
+
+df || skip_ "df fails"
+
+DIR=`pwd` || framework_failure
+FILE="$DIR/file"
+touch "$FILE" || framework_failure
+echo "$FILE" > file_exp || framework_failure
+echo "Mounted on" > header_mounted_exp || framework_failure
+echo "File" > header_file_exp || framework_failure
+
+fail=0
+
+df --portability "$FILE" > df_out || fail=1
+df --portability --direct "$FILE" > df_direct_out || fail=1
+df --portability --direct --local "$FILE" > /dev/null 2>&1 && fail=1
+
+# check df header
+$AWK '{ if (NR==1) print $6 " " $7; }' df_out > header_mounted_out \
+ || framework_failure
+$AWK '{ if (NR==1) print $6; }' df_direct_out > header_file_out \
+ || framework_failure
+compare header_mounted_out header_mounted_exp || fail=1
+compare header_file_out header_file_exp || fail=1
+
+# check df output (without --direct)
+$AWK '{ if (NR==2) print $6; }' df_out > file_out \
+ || framework_failure
+compare file_out file_exp && fail=1
+
+# check df output (with --direct)
+$AWK '{ if (NR==2) print $6; }' df_direct_out > file_out \
+ || framework_failure
+compare file_out file_exp || fail=1
+
+Exit $fail
--
2.31.1

@ -0,0 +1,94 @@
diff --git a/lib/getugroups.c b/lib/getugroups.c
index 299bae6..8ece29b 100644
--- a/lib/getugroups.c
+++ b/lib/getugroups.c
@@ -19,6 +19,9 @@
#include <config.h>
+/* We do not need this code if getgrouplist(3) is available. */
+#ifndef HAVE_GETGROUPLIST
+
#include "getugroups.h"
#include <errno.h>
@@ -126,3 +129,4 @@ getugroups (int maxcount, gid_t *grouplist, char const *username,
}
#endif /* HAVE_GRP_H */
+#endif /* have getgrouplist */
diff --git a/lib/mgetgroups.c b/lib/mgetgroups.c
index 76474c2..0a9d221 100644
--- a/lib/mgetgroups.c
+++ b/lib/mgetgroups.c
@@ -31,6 +31,7 @@
#endif
#include "getugroups.h"
+#include "xalloc.h"
#include "xalloc-oversized.h"
/* Work around an incompatibility of OS X 10.11: getgrouplist
@@ -121,9 +122,17 @@ mgetgroups (char const *username, gid_t gid, gid_t **groups)
/* else no username, so fall through and use getgroups. */
#endif
- max_n_groups = (username
- ? getugroups (0, NULL, username, gid)
- : getgroups (0, NULL));
+ if (!username)
+ max_n_groups = getgroups(0, NULL);
+ else
+ {
+#ifdef HAVE_GETGROUPLIST
+ max_n_groups = 0;
+ getgrouplist (username, gid, NULL, &max_n_groups);
+#else
+ max_n_groups = getugroups (0, NULL, username, gid);
+#endif
+ }
/* If we failed to count groups because there is no supplemental
group support, then return an array containing just GID.
@@ -145,10 +154,25 @@ mgetgroups (char const *username, gid_t gid, gid_t **groups)
if (g == NULL)
return -1;
- ng = (username
- ? getugroups (max_n_groups, g, username, gid)
- : getgroups (max_n_groups - (gid != (gid_t) -1),
- g + (gid != (gid_t) -1)));
+ if (!username)
+ ng = getgroups (max_n_groups - (gid != (gid_t)-1), g + (gid != (gid_t)-1));
+ else
+ {
+#ifdef HAVE_GETGROUPLIST
+ int e;
+ ng = max_n_groups;
+ while ((e = getgrouplist (username, gid, g, &ng)) == -1
+ && ng > max_n_groups)
+ {
+ max_n_groups = ng;
+ g = xrealloc (g, max_n_groups * sizeof (GETGROUPS_T));
+ }
+ if (e == -1)
+ ng = -1;
+#else
+ ng = getugroups (max_n_groups, g, username, gid);
+#endif
+ }
if (ng < 0)
{
diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4
index 62777c7..5180243 100644
--- a/m4/jm-macros.m4
+++ b/m4/jm-macros.m4
@@ -82,6 +82,7 @@ AC_DEFUN([coreutils_MACROS],
fchown
fchmod
ftruncate
+ getgrouplist
iswspace
mkfifo
mbrlen

@ -0,0 +1,565 @@
diff --git a/src/cut.c b/src/cut.c
index 7ab6be4..022d0ad 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -28,6 +28,11 @@
#include <assert.h>
#include <getopt.h>
#include <sys/types.h>
+
+/* Get mbstate_t, mbrtowc(). */
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
#include "system.h"
#include "error.h"
@@ -38,6 +43,18 @@
#include "set-fields.h"
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
+ installation; work around this configuration error. */
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
+# undef MB_LEN_MAX
+# define MB_LEN_MAX 16
+#endif
+
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "cut"
@@ -54,6 +71,52 @@
} \
while (0)
+/* Refill the buffer BUF to get a multibyte character. */
+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
+ do \
+ { \
+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
+ { \
+ memmove (BUF, BUFPOS, BUFLEN); \
+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
+ BUFPOS = BUF; \
+ } \
+ } \
+ while (0)
+
+/* Get wide character on BUFPOS. BUFPOS is not included after that.
+ If byte sequence is not valid as a character, CONVFAIL is true. Otherwise false. */
+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
+ do \
+ { \
+ mbstate_t state_bak; \
+ \
+ if (BUFLEN < 1) \
+ { \
+ WC = WEOF; \
+ break; \
+ } \
+ \
+ /* Get a wide character. */ \
+ CONVFAIL = false; \
+ state_bak = STATE; \
+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
+ \
+ switch (MBLENGTH) \
+ { \
+ case (size_t)-1: \
+ case (size_t)-2: \
+ CONVFAIL = true; \
+ STATE = state_bak; \
+ /* Fall througn. */ \
+ \
+ case 0: \
+ MBLENGTH = 1; \
+ break; \
+ } \
+ } \
+ while (0)
+
/* Pointer inside RP. When checking if a byte or field is selected
by a finite range, we check if it is between CURRENT_RP.LO
@@ -61,6 +124,9 @@
CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
static struct field_range_pair *current_rp;
+/* Length of the delimiter given as argument to -d. */
+size_t delimlen;
+
/* This buffer is used to support the semantics of the -s option
(or lack of same) when the specified field list includes (does
not include) the first field. In both of those cases, the entire
@@ -77,15 +143,25 @@ enum operating_mode
{
undefined_mode,
- /* Output characters that are in the given bytes. */
+ /* Output bytes that are at the given positions. */
byte_mode,
+ /* Output characters that are at the given positions. */
+ character_mode,
+
/* Output the given delimiter-separated fields. */
field_mode
};
static enum operating_mode operating_mode;
+/* If nonzero, when in byte mode, don't split multibyte characters. */
+static int byte_mode_character_aware;
+
+/* If nonzero, the function for single byte locale is work
+ if this program runs on multibyte locale. */
+static int force_singlebyte_mode;
+
/* If true do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
@@ -97,6 +173,9 @@ static bool complement;
/* The delimiter character for field mode. */
static unsigned char delim;
+#if HAVE_WCHAR_H
+static wchar_t wcdelim;
+#endif
/* The delimiter for each line/record. */
static unsigned char line_delim = '\n';
@@ -164,7 +243,7 @@ Print selected parts of lines from each FILE to standard output.\n\
-f, --fields=LIST select only these fields; also print any line\n\
that contains no delimiter character, unless\n\
the -s option is specified\n\
- -n (ignored)\n\
+ -n with -b: don't split multibyte characters\n\
"), stdout);
fputs (_("\
--complement complement the set of selected bytes, characters\n\
@@ -280,6 +359,82 @@ cut_bytes (FILE *stream)
}
}
+#if HAVE_MBRTOWC
+/* This function is in use for the following case.
+
+ 1. Read from the stream STREAM, printing to standard output any selected
+ characters.
+
+ 2. Read from stream STREAM, printing to standard output any selected bytes,
+ without splitting multibyte characters. */
+
+static void
+cut_characters_or_cut_bytes_no_split (FILE *stream)
+{
+ uintmax_t idx; /* number of bytes or characters in the line so far. */
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
+ char *bufpos; /* Next read position of BUF. */
+ size_t buflen; /* The length of the byte sequence in buf. */
+ wint_t wc; /* A gotten wide character. */
+ size_t mblength; /* The byte size of a multibyte character which shows
+ as same character as WC. */
+ mbstate_t state; /* State of the stream. */
+ bool convfail = false; /* true, when conversion failed. Otherwise false. */
+ /* Whether to begin printing delimiters between ranges for the current line.
+ Set after we've begun printing data corresponding to the first range. */
+ bool print_delimiter = false;
+
+ idx = 0;
+ buflen = 0;
+ bufpos = buf;
+ memset (&state, '\0', sizeof(mbstate_t));
+
+ current_rp = frp;
+
+ while (1)
+ {
+ REFILL_BUFFER (buf, bufpos, buflen, stream);
+
+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
+ (void) convfail; /* ignore unused */
+
+ if (wc == WEOF)
+ {
+ if (idx > 0)
+ putchar (line_delim);
+ break;
+ }
+ else if (wc == line_delim)
+ {
+ putchar (line_delim);
+ idx = 0;
+ print_delimiter = false;
+ current_rp = frp;
+ }
+ else
+ {
+ next_item (&idx);
+ if (print_kth (idx))
+ {
+ if (output_delimiter_specified)
+ {
+ if (print_delimiter && is_range_start_index (idx))
+ {
+ fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout);
+ }
+ print_delimiter = true;
+ }
+ fwrite (bufpos, mblength, sizeof(char), stdout);
+ }
+ }
+
+ buflen -= mblength;
+ bufpos += mblength;
+ }
+}
+#endif
+
/* Read from stream STREAM, printing to standard output any selected fields. */
static void
@@ -425,13 +580,211 @@ cut_fields (FILE *stream)
}
}
+#if HAVE_MBRTOWC
+static void
+cut_fields_mb (FILE *stream)
+{
+ int c;
+ uintmax_t field_idx;
+ int found_any_selected_field;
+ int buffer_first_field;
+ int empty_input;
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
+ char *bufpos; /* Next read position of BUF. */
+ size_t buflen; /* The length of the byte sequence in buf. */
+ wint_t wc = 0; /* A gotten wide character. */
+ size_t mblength; /* The byte size of a multibyte character which shows
+ as same character as WC. */
+ mbstate_t state; /* State of the stream. */
+ bool convfail = false; /* true, when conversion failed. Otherwise false. */
+
+ current_rp = frp;
+
+ found_any_selected_field = 0;
+ field_idx = 1;
+ bufpos = buf;
+ buflen = 0;
+ memset (&state, '\0', sizeof(mbstate_t));
+
+ c = getc (stream);
+ empty_input = (c == EOF);
+ if (c != EOF)
+ {
+ ungetc (c, stream);
+ wc = 0;
+ }
+ else
+ wc = WEOF;
+
+ /* To support the semantics of the -s flag, we may have to buffer
+ all of the first field to determine whether it is `delimited.'
+ But that is unnecessary if all non-delimited lines must be printed
+ and the first field has been selected, or if non-delimited lines
+ must be suppressed and the first field has *not* been selected.
+ That is because a non-delimited line has exactly one field. */
+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
+
+ while (1)
+ {
+ if (field_idx == 1 && buffer_first_field)
+ {
+ int len = 0;
+
+ while (1)
+ {
+ REFILL_BUFFER (buf, bufpos, buflen, stream);
+
+ GET_NEXT_WC_FROM_BUFFER
+ (wc, bufpos, buflen, mblength, state, convfail);
+
+ if (wc == WEOF)
+ break;
+
+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
+ memcpy (field_1_buffer + len, bufpos, mblength);
+ len += mblength;
+ buflen -= mblength;
+ bufpos += mblength;
+
+ if (!convfail && (wc == line_delim || wc == wcdelim))
+ break;
+ }
+
+ if (len <= 0 && wc == WEOF)
+ break;
+
+ /* If the first field extends to the end of line (it is not
+ delimited) and we are printing all non-delimited lines,
+ print this one. */
+ if (convfail || (!convfail && wc != wcdelim))
+ {
+ if (suppress_non_delimited)
+ {
+ /* Empty. */
+ }
+ else
+ {
+ fwrite (field_1_buffer, sizeof (char), len, stdout);
+ /* Make sure the output line is newline terminated. */
+ if (convfail || (!convfail && wc != line_delim))
+ putchar (line_delim);
+ }
+ continue;
+ }
+
+ if (print_kth (1))
+ {
+ /* Print the field, but not the trailing delimiter. */
+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
+ found_any_selected_field = 1;
+ }
+ next_item (&field_idx);
+ }
+
+ if (wc != WEOF)
+ {
+ if (print_kth (field_idx))
+ {
+ if (found_any_selected_field)
+ {
+ fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout);
+ }
+ found_any_selected_field = 1;
+ }
+
+ while (1)
+ {
+ REFILL_BUFFER (buf, bufpos, buflen, stream);
+
+ GET_NEXT_WC_FROM_BUFFER
+ (wc, bufpos, buflen, mblength, state, convfail);
+
+ if (wc == WEOF)
+ break;
+ else if (!convfail && (wc == wcdelim || wc == line_delim))
+ {
+ buflen -= mblength;
+ bufpos += mblength;
+ break;
+ }
+
+ if (print_kth (field_idx))
+ fwrite (bufpos, mblength, sizeof(char), stdout);
+
+ buflen -= mblength;
+ bufpos += mblength;
+ }
+ }
+
+ if ((!convfail || wc == line_delim) && buflen < 1)
+ wc = WEOF;
+
+ if (!convfail && wc == wcdelim)
+ next_item (&field_idx);
+ else if (wc == WEOF || (!convfail && wc == line_delim))
+ {
+ if (found_any_selected_field
+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
+ putchar (line_delim);
+ if (wc == WEOF)
+ break;
+ field_idx = 1;
+ current_rp = frp;
+ found_any_selected_field = 0;
+ }
+ }
+}
+#endif
+
static void
cut_stream (FILE *stream)
{
- if (operating_mode == byte_mode)
- cut_bytes (stream);
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
+ {
+ switch (operating_mode)
+ {
+ case byte_mode:
+ if (byte_mode_character_aware)
+ cut_characters_or_cut_bytes_no_split (stream);
+ else
+ cut_bytes (stream);
+ break;
+
+ case character_mode:
+ cut_characters_or_cut_bytes_no_split (stream);
+ break;
+
+ case field_mode:
+ if (delimlen == 1)
+ {
+ /* Check if we have utf8 multibyte locale, so we can use this
+ optimization because of uniqueness of characters, which is
+ not true for e.g. SJIS */
+ char * loc = setlocale(LC_CTYPE, NULL);
+ if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") ||
+ strstr (loc, "UTF8") || strstr (loc, "utf8")))
+ {
+ cut_fields (stream);
+ break;
+ }
+ }
+ cut_fields_mb (stream);
+ break;
+
+ default:
+ abort ();
+ }
+ }
else
- cut_fields (stream);
+#endif
+ {
+ if (operating_mode == field_mode)
+ cut_fields (stream);
+ else
+ cut_bytes (stream);
+ }
}
/* Process file FILE to standard output.
@@ -483,6 +836,7 @@ main (int argc, char **argv)
bool ok;
bool delim_specified = false;
char *spec_list_string IF_LINT ( = NULL);
+ char mbdelim[MB_LEN_MAX + 1];
initialize_main (&argc, &argv);
set_program_name (argv[0]);
@@ -505,7 +859,6 @@ main (int argc, char **argv)
switch (optc)
{
case 'b':
- case 'c':
/* Build the byte list. */
if (operating_mode != undefined_mode)
FATAL_ERROR (_("only one type of list may be specified"));
@@ -513,6 +866,14 @@ main (int argc, char **argv)
spec_list_string = optarg;
break;
+ case 'c':
+ /* Build the character list. */
+ if (operating_mode != undefined_mode)
+ FATAL_ERROR (_("only one type of list may be specified"));
+ operating_mode = character_mode;
+ spec_list_string = optarg;
+ break;
+
case 'f':
/* Build the field list. */
if (operating_mode != undefined_mode)
@@ -524,10 +885,38 @@ main (int argc, char **argv)
case 'd':
/* New delimiter. */
/* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
- if (optarg[0] != '\0' && optarg[1] != '\0')
- FATAL_ERROR (_("the delimiter must be a single character"));
- delim = optarg[0];
- delim_specified = true;
+ {
+#if HAVE_MBRTOWC
+ if(MB_CUR_MAX > 1)
+ {
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof(mbstate_t));
+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
+
+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
+ ++force_singlebyte_mode;
+ else
+ {
+ delimlen = (delimlen < 1) ? 1 : delimlen;
+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
+ FATAL_ERROR (_("the delimiter must be a single character"));
+ memcpy (mbdelim, optarg, delimlen);
+ mbdelim[delimlen] = '\0';
+ if (delimlen == 1)
+ delim = *optarg;
+ }
+ }
+
+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
+#endif
+ {
+ if (optarg[0] != '\0' && optarg[1] != '\0')
+ FATAL_ERROR (_("the delimiter must be a single character"));
+ delim = (unsigned char) optarg[0];
+ }
+ delim_specified = true;
+ }
break;
case OUTPUT_DELIMITER_OPTION:
@@ -540,6 +929,7 @@ main (int argc, char **argv)
break;
case 'n':
+ byte_mode_character_aware = 1;
break;
case 's':
@@ -579,15 +969,34 @@ main (int argc, char **argv)
| (complement ? SETFLD_COMPLEMENT : 0) );
if (!delim_specified)
- delim = '\t';
+ {
+ delim = '\t';
+#ifdef HAVE_MBRTOWC
+ wcdelim = L'\t';
+ mbdelim[0] = '\t';
+ mbdelim[1] = '\0';
+ delimlen = 1;
+#endif
+ }
if (output_delimiter_string == NULL)
{
- static char dummy[2];
- dummy[0] = delim;
- dummy[1] = '\0';
- output_delimiter_string = dummy;
- output_delimiter_length = 1;
+#ifdef HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
+ {
+ output_delimiter_string = xstrdup(mbdelim);
+ output_delimiter_length = delimlen;
+ }
+
+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
+#endif
+ {
+ static char dummy[2];
+ dummy[0] = delim;
+ dummy[1] = '\0';
+ output_delimiter_string = dummy;
+ output_delimiter_length = 1;
+ }
}
if (optind == argc)

@ -0,0 +1,866 @@
From e87ab5b991b08092a7e07af82b3ec822a8604151 Mon Sep 17 00:00:00 2001
From: Ondrej Oprala <ooprala@redhat.com>
Date: Wed, 5 Aug 2015 09:15:09 +0200
Subject: [PATCH] expand,unexpand: add multibyte support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* NEWS: Mention the changes.
* bootstrap.conf: Add mbfile to the list of modules.
* configure.ac: Properly initialize mbfile.
* src/expand.c (expand): Iterate over multibyte characters properly.
* src/unexpand.c (unexpand): Iterate over multibyte characters
properly.
* tests/local.mk: Add new tests.
* tests/{expand,unexpand}/mb.sh: New tests.
Co-authored-by: Pádraig Brady <pbrady@redhat.com>
---
bootstrap.conf | 1 +
configure.ac | 2 +
lib/mbfile.c | 3 +
lib/mbfile.h | 255 +++++++++++++++++++++++++++++++++++++++++++++++++++
m4/mbfile.m4 | 14 +++
src/expand.c | 43 +++++----
src/local.mk | 4 +-
src/unexpand.c | 54 +++++++----
tests/expand/mb.sh | 98 ++++++++++++++++++++
tests/local.mk | 2 +
tests/unexpand/mb.sh | 97 ++++++++++++++++++++
10 files changed, 535 insertions(+), 34 deletions(-)
create mode 100644 lib/mbfile.c
create mode 100644 lib/mbfile.h
create mode 100644 m4/mbfile.m4
create mode 100755 tests/expand/mb.sh
create mode 100755 tests/unexpand/mb.sh
diff --git a/bootstrap.conf b/bootstrap.conf
index 8a0ff31..a1c78b2 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -152,6 +152,7 @@ gnulib_modules="
maintainer-makefile
malloc-gnu
manywarnings
+ mbfile
mbrlen
mbrtowc
mbsalign
diff --git a/configure.ac b/configure.ac
index 1e74b36..24c9725 100644
--- a/configure.ac
+++ b/configure.ac
@@ -427,6 +427,8 @@ fi
# I'm leaving it here for now. This whole thing needs to be modernized...
gl_WINSIZE_IN_PTEM
+gl_MBFILE
+
gl_HEADER_TIOCGWINSZ_IN_TERMIOS_H
if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \
diff --git a/lib/mbfile.c b/lib/mbfile.c
new file mode 100644
index 0000000..b0a468e
--- /dev/null
+++ b/lib/mbfile.c
@@ -0,0 +1,3 @@
+#include <config.h>
+#define MBFILE_INLINE _GL_EXTERN_INLINE
+#include "mbfile.h"
diff --git a/lib/mbfile.h b/lib/mbfile.h
new file mode 100644
index 0000000..11f1b12
--- /dev/null
+++ b/lib/mbfile.h
@@ -0,0 +1,255 @@
+/* Multibyte character I/O: macros for multi-byte encodings.
+ Copyright (C) 2001, 2005, 2009-2015 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Mitsuru Chinen <mchinen@yamato.ibm.com>
+ and Bruno Haible <bruno@clisp.org>. */
+
+/* The macros in this file implement multi-byte character input from a
+ stream.
+
+ mb_file_t
+ is the type for multibyte character input stream, usable for variable
+ declarations.
+
+ mbf_char_t
+ is the type for multibyte character or EOF, usable for variable
+ declarations.
+
+ mbf_init (mbf, stream)
+ initializes the MB_FILE for reading from stream.
+
+ mbf_getc (mbc, mbf)
+ reads the next multibyte character from mbf and stores it in mbc.
+
+ mb_iseof (mbc)
+ returns true if mbc represents the EOF value.
+
+ Here are the function prototypes of the macros.
+
+ extern void mbf_init (mb_file_t mbf, FILE *stream);
+ extern void mbf_getc (mbf_char_t mbc, mb_file_t mbf);
+ extern bool mb_iseof (const mbf_char_t mbc);
+ */
+
+#ifndef _MBFILE_H
+#define _MBFILE_H 1
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
+ <wchar.h>. */
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+
+#include "mbchar.h"
+
+#ifndef _GL_INLINE_HEADER_BEGIN
+ #error "Please include config.h first."
+#endif
+_GL_INLINE_HEADER_BEGIN
+#ifndef MBFILE_INLINE
+# define MBFILE_INLINE _GL_INLINE
+#endif
+
+struct mbfile_multi {
+ FILE *fp;
+ bool eof_seen;
+ bool have_pushback;
+ mbstate_t state;
+ unsigned int bufcount;
+ char buf[MBCHAR_BUF_SIZE];
+ struct mbchar pushback;
+};
+
+MBFILE_INLINE void
+mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi *mbf)
+{
+ size_t bytes;
+
+ /* If EOF has already been seen, don't use getc. This matters if
+ mbf->fp is connected to an interactive tty. */
+ if (mbf->eof_seen)
+ goto eof;
+
+ /* Return character pushed back, if there is one. */
+ if (mbf->have_pushback)
+ {
+ mb_copy (mbc, &mbf->pushback);
+ mbf->have_pushback = false;
+ return;
+ }
+
+ /* Before using mbrtowc, we need at least one byte. */
+ if (mbf->bufcount == 0)
+ {
+ int c = getc (mbf->fp);
+ if (c == EOF)
+ {
+ mbf->eof_seen = true;
+ goto eof;
+ }
+ mbf->buf[0] = (unsigned char) c;
+ mbf->bufcount++;
+ }
+
+ /* Handle most ASCII characters quickly, without calling mbrtowc(). */
+ if (mbf->bufcount == 1 && mbsinit (&mbf->state) && is_basic (mbf->buf[0]))
+ {
+ /* These characters are part of the basic character set. ISO C 99
+ guarantees that their wide character code is identical to their
+ char code. */
+ mbc->wc = mbc->buf[0] = mbf->buf[0];
+ mbc->wc_valid = true;
+ mbc->ptr = &mbc->buf[0];
+ mbc->bytes = 1;
+ mbf->bufcount = 0;
+ return;
+ }
+
+ /* Use mbrtowc on an increasing number of bytes. Read only as many bytes
+ from mbf->fp as needed. This is needed to give reasonable interactive
+ behaviour when mbf->fp is connected to an interactive tty. */
+ for (;;)
+ {
+ /* We don't know whether the 'mbrtowc' function updates the state when
+ it returns -2, - this is the ISO C 99 and glibc-2.2 behaviour - or
+ not - amended ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We
+ don't have an autoconf test for this, yet.
+ The new behaviour would allow us to feed the bytes one by one into
+ mbrtowc. But the old behaviour forces us to feed all bytes since
+ the end of the last character into mbrtowc. Since we want to retry
+ with more bytes when mbrtowc returns -2, we must backup the state
+ before calling mbrtowc, because implementations with the new
+ behaviour will clobber it. */
+ mbstate_t backup_state = mbf->state;
+
+ bytes = mbrtowc (&mbc->wc, &mbf->buf[0], mbf->bufcount, &mbf->state);
+
+ if (bytes == (size_t) -1)
+ {
+ /* An invalid multibyte sequence was encountered. */
+ /* Return a single byte. */
+ bytes = 1;
+ mbc->wc_valid = false;
+ break;
+ }
+ else if (bytes == (size_t) -2)
+ {
+ /* An incomplete multibyte character. */
+ mbf->state = backup_state;
+ if (mbf->bufcount == MBCHAR_BUF_SIZE)
+ {
+ /* An overlong incomplete multibyte sequence was encountered. */
+ /* Return a single byte. */
+ bytes = 1;
+ mbc->wc_valid = false;
+ break;
+ }
+ else
+ {
+ /* Read one more byte and retry mbrtowc. */
+ int c = getc (mbf->fp);
+ if (c == EOF)
+ {
+ /* An incomplete multibyte character at the end. */
+ mbf->eof_seen = true;
+ bytes = mbf->bufcount;
+ mbc->wc_valid = false;
+ break;
+ }
+ mbf->buf[mbf->bufcount] = (unsigned char) c;
+ mbf->bufcount++;
+ }
+ }
+ else
+ {
+ if (bytes == 0)
+ {
+ /* A null wide character was encountered. */
+ bytes = 1;
+ assert (mbf->buf[0] == '\0');
+ assert (mbc->wc == 0);
+ }
+ mbc->wc_valid = true;
+ break;
+ }
+ }
+
+ /* Return the multibyte sequence mbf->buf[0..bytes-1]. */
+ mbc->ptr = &mbc->buf[0];
+ memcpy (&mbc->buf[0], &mbf->buf[0], bytes);
+ mbc->bytes = bytes;
+
+ mbf->bufcount -= bytes;
+ if (mbf->bufcount > 0)
+ {
+ /* It's not worth calling memmove() for so few bytes. */
+ unsigned int count = mbf->bufcount;
+ char *p = &mbf->buf[0];
+
+ do
+ {
+ *p = *(p + bytes);
+ p++;
+ }
+ while (--count > 0);
+ }
+ return;
+
+eof:
+ /* An mbchar_t with bytes == 0 is used to indicate EOF. */
+ mbc->ptr = NULL;
+ mbc->bytes = 0;
+ mbc->wc_valid = false;
+ return;
+}
+
+MBFILE_INLINE void
+mbfile_multi_ungetc (const struct mbchar *mbc, struct mbfile_multi *mbf)
+{
+ mb_copy (&mbf->pushback, mbc);
+ mbf->have_pushback = true;
+}
+
+typedef struct mbfile_multi mb_file_t;
+
+typedef mbchar_t mbf_char_t;
+
+#define mbf_init(mbf, stream) \
+ ((mbf).fp = (stream), \
+ (mbf).eof_seen = false, \
+ (mbf).have_pushback = false, \
+ memset (&(mbf).state, '\0', sizeof (mbstate_t)), \
+ (mbf).bufcount = 0)
+
+#define mbf_getc(mbc, mbf) mbfile_multi_getc (&(mbc), &(mbf))
+
+#define mbf_ungetc(mbc, mbf) mbfile_multi_ungetc (&(mbc), &(mbf))
+
+#define mb_iseof(mbc) ((mbc).bytes == 0)
+
+#ifndef _GL_INLINE_HEADER_BEGIN
+ #error "Please include config.h first."
+#endif
+_GL_INLINE_HEADER_BEGIN
+
+#endif /* _MBFILE_H */
diff --git a/m4/mbfile.m4 b/m4/mbfile.m4
new file mode 100644
index 0000000..8589902
--- /dev/null
+++ b/m4/mbfile.m4
@@ -0,0 +1,14 @@
+# mbfile.m4 serial 7
+dnl Copyright (C) 2005, 2008-2015 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl autoconf tests required for use of mbfile.h
+dnl From Bruno Haible.
+
+AC_DEFUN([gl_MBFILE],
+[
+ AC_REQUIRE([AC_TYPE_MBSTATE_T])
+ :
+])
diff --git a/src/expand.c b/src/expand.c
index 9fa2e10..380e020 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -37,6 +37,9 @@
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
+
+#include <mbfile.h>
+
#include "system.h"
#include "die.h"
#include "xstrndup.h"
@@ -100,19 +103,19 @@ expand (void)
{
/* Input stream. */
FILE *fp = next_file (NULL);
+ mb_file_t mbf;
+ mbf_char_t c;
if (!fp)
return;
+ mbf_init (mbf, fp);
+
while (true)
{
- /* Input character, or EOF. */
- int c;
-
/* If true, perform translations. */
bool convert = true;
-
/* The following variables have valid values only when CONVERT
is true: */
@@ -122,17 +125,23 @@ expand (void)
/* Index in TAB_LIST of next tab stop to examine. */
size_t tab_index = 0;
-
/* Convert a line of text. */
do
{
- while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
- continue;
+ do {
+ mbf_getc (c, mbf);
+ if (mb_iseof (c))
+ {
+ mbf_init (mbf, fp = next_file (fp));
+ continue;
+ }
+ }
+ while (false);
if (convert)
{
- if (c == '\t')
+ if (mb_iseq (c, '\t'))
{
/* Column the next input tab stop is on. */
uintmax_t next_tab_column;
@@ -151,32 +160,34 @@ expand (void)
if (putchar (' ') < 0)
die (EXIT_FAILURE, errno, _("write error"));
- c = ' ';
+ mb_setascii (&c, ' ');
}
- else if (c == '\b')
+ else if (mb_iseq (c, '\b'))
{
/* Go back one column, and force recalculation of the
next tab stop. */
column -= !!column;
tab_index -= !!tab_index;
}
- else
+ /* A leading control character could make us trip over. */
+ else if (!mb_iscntrl (c))
{
- column++;
+ column += mb_width (c);
if (!column)
die (EXIT_FAILURE, 0, _("input line is too long"));
}
- convert &= convert_entire_line || !! isblank (c);
+ convert &= convert_entire_line || mb_isblank (c);
}
- if (c < 0)
+ if (mb_iseof (c))
return;
- if (putchar (c) < 0)
+ mb_putc (c, stdout);
+ if (ferror (stdout))
die (EXIT_FAILURE, errno, _("write error"));
}
- while (c != '\n');
+ while (!mb_iseq (c, '\n'));
}
}
diff --git a/src/local.mk b/src/local.mk
index 72db9c704..ef3bfa469 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -415,8 +415,8 @@ src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS)
src_ginstall_CPPFLAGS = -DENABLE_MATCHPATHCON=1 $(AM_CPPFLAGS)
-src_expand_SOURCES = src/expand.c src/expand-common.c
-src_unexpand_SOURCES = src/unexpand.c src/expand-common.c
+src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c
+src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c
# Ensure we don't link against libcoreutils.a as that lib is
# not compiled with -fPIC which causes issues on 64 bit at least
diff --git a/src/unexpand.c b/src/unexpand.c
index 7801274..569a7ee 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -38,6 +38,9 @@
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
+
+#include <mbfile.h>
+
#include "system.h"
#include "die.h"
#include "xstrndup.h"
@@ -107,11 +110,12 @@ unexpand (void)
{
/* Input stream. */
FILE *fp = next_file (NULL);
+ mb_file_t mbf;
/* The array of pending blanks. In non-POSIX locales, blanks can
include characters other than spaces, so the blanks must be
stored, not merely counted. */
- char *pending_blank;
+ mbf_char_t *pending_blank;
if (!fp)
return;
@@ -119,12 +123,14 @@ unexpand (void)
/* The worst case is a non-blank character, then one blank, then a
tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
- pending_blank = xmalloc (max_column_width);
+ pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t));
+
+ mbf_init (mbf, fp);
while (true)
{
/* Input character, or EOF. */
- int c;
+ mbf_char_t c;
/* If true, perform translations. */
bool convert = true;
@@ -158,12 +164,19 @@ unexpand (void)
do
{
- while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
- continue;
+ do {
+ mbf_getc (c, mbf);
+ if (mb_iseof (c))
+ {
+ mbf_init (mbf, fp = next_file (fp));
+ continue;
+ }
+ }
+ while (false);
if (convert)
{
- bool blank = !! isblank (c);
+ bool blank = mb_isblank (c);
if (blank)
{
@@ -180,16 +193,16 @@ unexpand (void)
if (next_tab_column < column)
die (EXIT_FAILURE, 0, _("input line is too long"));
- if (c == '\t')
+ if (mb_iseq (c, '\t'))
{
column = next_tab_column;
if (pending)
- pending_blank[0] = '\t';
+ mb_setascii (&pending_blank[0], '\t');
}
else
{
- column++;
+ column += mb_width (c);
if (! (prev_blank && column == next_tab_column))
{
@@ -197,13 +210,14 @@ unexpand (void)
will be replaced by tabs. */
if (column == next_tab_column)
one_blank_before_tab_stop = true;
- pending_blank[pending++] = c;
+ mb_copy (&pending_blank[pending++], &c);
prev_blank = true;
continue;
}
/* Replace the pending blanks by a tab or two. */
- pending_blank[0] = c = '\t';
+ mb_setascii (&c, '\t');
+ mb_setascii (&pending_blank[0], '\t');
}
/* Discard pending blanks, unless it was a single
@@ -211,7 +225,7 @@ unexpand (void)
pending = one_blank_before_tab_stop;
}
}
- else if (c == '\b')
+ else if (mb_iseq (c, '\b'))
{
/* Go back one column, and force recalculation of the
next tab stop. */
@@ -221,16 +235,20 @@ unexpand (void)
}
else
{
- column++;
- if (!column)
+ const uintmax_t orig_column = column;
+ column += mb_width (c);
+ if (column < orig_column)
die (EXIT_FAILURE, 0, _("input line is too long"));
}
if (pending)
{
if (pending > 1 && one_blank_before_tab_stop)
- pending_blank[0] = '\t';
- if (fwrite (pending_blank, 1, pending, stdout) != pending)
+ mb_setascii (&pending_blank[0], '\t');
+
+ for (int n = 0; n < pending; ++n)
+ mb_putc (pending_blank[n], stdout);
+ if (ferror (stdout))
die (EXIT_FAILURE, errno, _("write error"));
pending = 0;
one_blank_before_tab_stop = false;
@@ -240,16 +258,17 @@ unexpand (void)
convert &= convert_entire_line || blank;
}
- if (c < 0)
+ if (mb_iseof (c))
{
free (pending_blank);
return;
}
- if (putchar (c) < 0)
+ mb_putc (c, stdout);
+ if (ferror (stdout))
die (EXIT_FAILURE, errno, _("write error"));
}
- while (c != '\n');
+ while (!mb_iseq (c, '\n'));
}
}
diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh
new file mode 100755
index 0000000..7971e18
--- /dev/null
+++ b/tests/expand/mb.sh
@@ -0,0 +1,98 @@
+#!/bin/sh
+
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ expand
+
+export LC_ALL=en_US.UTF-8
+
+#input containing multibyte characters
+cat <<\EOF > in || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+EOF
+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_
+
+cat <<\EOF > exp || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+expand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+#test characters with display widths != 1
+env printf '12345678
+e\t|ascii(1)
+\u00E9\t|composed(1)
+e\u0301\t|decomposed(1)
+\u3000\t|ideo-space(2)
+\uFF0D\t|full-hypen(2)
+' > in || framework_failure_
+
+env printf '12345678
+e |ascii(1)
+\u00E9 |composed(1)
+e\u0301 |decomposed(1)
+\u3000 |ideo-space(2)
+\uFF0D |full-hypen(2)
+' > exp || framework_failure_
+
+expand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+#shouldn't fail with "input line too long"
+#when a line starts with a control character
+env printf '\n' > in || framework_failure_
+
+expand < in > out || fail=1
+compare in out > /dev/null 2>&1 || fail=1
+
+#non-Unicode characters interspersed between Unicode ones
+env printf '12345678
+\t\xFF|
+\xFF\t|
+\t\xFFä|
+ä\xFF\t|
+\tä\xFF|
+\xFF\tä|
+äbcdef\xFF\t|
+' > in || framework_failure_
+
+env printf '12345678
+ \xFF|
+\xFF |
+ \xFFä|
+ä\xFF |
+ ä\xFF|
+\xFF ä|
+äbcdef\xFF |
+' > exp || framework_failure_
+
+expand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+exit $fail
diff --git a/tests/local.mk b/tests/local.mk
index 192f776..8053397 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -544,6 +544,7 @@ all_tests = \
tests/du/threshold.sh \
tests/du/trailing-slash.sh \
tests/du/two-args.sh \
+ tests/expand/mb.sh \
tests/id/gnu-zero-uids.sh \
tests/id/no-context.sh \
tests/id/context.sh \
@@ -684,6 +685,7 @@ all_tests = \
tests/touch/read-only.sh \
tests/touch/relative.sh \
tests/touch/trailing-slash.sh \
+ tests/unexpand/mb.sh \
$(all_root_tests)
# See tests/factor/create-test.sh.
diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh
new file mode 100755
index 0000000..60d4c1a
--- /dev/null
+++ b/tests/unexpand/mb.sh
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ unexpand
+
+export LC_ALL=en_US.UTF-8
+
+#input containing multibyte characters
+cat > in <<\EOF
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+cat > exp <<\EOF
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+unexpand -a < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+#test characters with a display width larger than 1
+
+env printf '12345678
+e |ascii(1)
+\u00E9 |composed(1)
+e\u0301 |decomposed(1)
+\u3000 |ideo-space(2)
+\uFF0D |full-hypen(2)
+' > in || framework_failure_
+
+env printf '12345678
+e\t|ascii(1)
+\u00E9\t|composed(1)
+e\u0301\t|decomposed(1)
+\u3000\t|ideo-space(2)
+\uFF0D\t|full-hypen(2)
+' > exp || framework_failure_
+
+unexpand -a < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+#test input where a blank of width > 1 is not being substituted
+in="$(LC_ALL=en_US.UTF-8 printf ' \u3000 ö ü ß')"
+exp='   ö ü ß'
+
+unexpand -a < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+#non-Unicode characters interspersed between Unicode ones
+env printf '12345678
+ \xFF|
+\xFF |
+ \xFFä|
+ä\xFF |
+ ä\xFF|
+\xFF ä|
+äbcdef\xFF |
+' > in || framework_failure_
+
+env printf '12345678
+\t\xFF|
+\xFF\t|
+\t\xFFä|
+ä\xFF\t|
+\tä\xFF|
+\xFF\tä|
+äbcdef\xFF\t|
+' > exp || framework_failure_
+
+unexpand -a < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
--
2.7.4

@ -0,0 +1,108 @@
diff --git a/src/expand.c b/src/expand.c
index 380e020..310b349 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -129,15 +129,19 @@ expand (void)
do
{
- do {
+ while (true) {
mbf_getc (c, mbf);
- if (mb_iseof (c))
+ if ((mb_iseof (c)) && (fp = next_file (fp)))
{
- mbf_init (mbf, fp = next_file (fp));
+ mbf_init (mbf, fp);
continue;
}
+ else
+ {
+ break;
+ }
}
- while (false);
+
if (convert)
{
diff --git a/src/unexpand.c b/src/unexpand.c
index 3bbbd66..863a90a 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -164,15 +164,19 @@ unexpand (void)
do
{
- do {
+ while (true) {
mbf_getc (c, mbf);
- if (mb_iseof (c))
+ if ((mb_iseof (c)) && (fp = next_file (fp)))
{
- mbf_init (mbf, fp = next_file (fp));
+ mbf_init (mbf, fp);
continue;
}
+ else
+ {
+ break;
+ }
}
- while (false);
+
if (convert)
{
diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh
index 7971e18..031be7a 100755
--- a/tests/expand/mb.sh
+++ b/tests/expand/mb.sh
@@ -44,6 +44,20 @@ EOF
expand < in > out || fail=1
compare exp out > /dev/null 2>&1 || fail=1
+#multiple files as an input
+cat <<\EOF >> exp || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+expand ./in ./in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
#test characters with display widths != 1
env printf '12345678
e\t|ascii(1)
diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh
index 60d4c1a..8d75652 100755
--- a/tests/unexpand/mb.sh
+++ b/tests/unexpand/mb.sh
@@ -44,6 +44,22 @@ EOF
unexpand -a < in > out || fail=1
compare exp out > /dev/null 2>&1 || fail=1
+
+#multiple files as an input
+cat >> exp <<\EOF
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+
+unexpand -a ./in ./in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
#test characters with a display width larger than 1
env printf '12345678

@ -0,0 +1,80 @@
From ff424639fe863cbd6963add1a79b97290c1606c6 Mon Sep 17 00:00:00 2001
From: rpm-build <rpm-build>
Date: Fri, 3 Feb 2017 12:26:53 +0100
Subject: [PATCH] fold.c: preserve new-lines in mutlibyte text
---
src/fold.c | 49 ++++++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 25 deletions(-)
diff --git a/src/fold.c b/src/fold.c
index d23edd5..8c232a7 100644
--- a/src/fold.c
+++ b/src/fold.c
@@ -342,39 +342,38 @@ fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
}
rescan:
- if (operating_mode == byte_mode) /* byte mode */
+ if (convfail)
+ increment = 1;
+ else if (wc == L'\n')
+ {
+ /* preserve newline */
+ fwrite (line_out, sizeof(char), offset_out, stdout);
+ START_NEW_LINE;
+ continue;
+ }
+ else if (operating_mode == byte_mode) /* byte mode */
increment = mblength;
else if (operating_mode == character_mode) /* character mode */
increment = 1;
- else /* column mode */
+ else /* column mode */
{
- if (convfail)
- increment = 1;
- else
+ switch (wc)
{
- switch (wc)
- {
- case L'\n':
- fwrite (line_out, sizeof(char), offset_out, stdout);
- START_NEW_LINE;
- continue;
+ case L'\b':
+ increment = (column > 0) ? -1 : 0;
+ break;
- case L'\b':
- increment = (column > 0) ? -1 : 0;
- break;
+ case L'\r':
+ increment = -1 * column;
+ break;
- case L'\r':
- increment = -1 * column;
- break;
+ case L'\t':
+ increment = 8 - column % 8;
+ break;
- case L'\t':
- increment = 8 - column % 8;
- break;
-
- default:
- increment = wcwidth (wc);
- increment = (increment < 0) ? 0 : increment;
- }
+ default:
+ increment = wcwidth (wc);
+ increment = (increment < 0) ? 0 : increment;
}
}
--
2.7.4

@ -0,0 +1,35 @@
From 3976ef5a20369d8b490907ab2cba2d617305a5e0 Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Mon, 30 May 2016 16:19:20 +0200
Subject: [PATCH] sort: do not use static array 'blanks' in human_numcompare()
... because the array is not initialized with MB locales. Note this is
rather a conservative fix. I plan to do more cleanup of the i18n patch
in Fedora to prevent mistakes like this in future updates of coreutils.
---
src/sort.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/sort.c b/src/sort.c
index 9e07ad8..e47b039 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2304,12 +2304,10 @@ find_unit_order (char const *number)
<none/unknown> < K/k < M < G < T < P < E < Z < Y */
static int
-human_numcompare (char const *a, char const *b)
+human_numcompare (char *a, char *b)
{
- while (blanks[to_uchar (*a)])
- a++;
- while (blanks[to_uchar (*b)])
- b++;
+ skipblanks(&a, a + strlen(a));
+ skipblanks(&b, b + strlen(b));
int diff = find_unit_order (a) - find_unit_order (b);
return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));
--
2.5.5

@ -0,0 +1,456 @@
From 7a7c776a4e228d180e74614fd8c8afcad5d4bdf7 Mon Sep 17 00:00:00 2001
From: Jakub Martisko <jamartis@redhat.com>
Date: Thu, 7 Jul 2016 12:53:26 +0200
Subject: [PATCH] coreutils-i18n-un-expand-BOM.patch
---
src/expand-common.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/expand-common.h | 12 ++++++
src/expand.c | 45 +++++++++++++++++++-
src/unexpand.c | 43 ++++++++++++++++++-
tests/expand/mb.sh | 71 ++++++++++++++++++++++++++++++++
tests/unexpand/mb.sh | 59 ++++++++++++++++++++++++++
6 files changed, 342 insertions(+), 2 deletions(-)
diff --git a/src/expand-common.c b/src/expand-common.c
index 4657e46..97cbb09 100644
--- a/src/expand-common.c
+++ b/src/expand-common.c
@@ -19,6 +19,7 @@
#include <assert.h>
#include <stdio.h>
#include <sys/types.h>
+#include <mbfile.h>
#include "system.h"
#include "die.h"
#include "error.h"
@@ -126,6 +127,119 @@ set_increment_size (uintmax_t tabval)
return ok;
}
+extern int
+set_utf_locale (void)
+{
+ /*try using some predefined locale */
+ const char* predef_locales[] = {"C.UTF8","en_US.UTF8","en_GB.UTF8"};
+
+ const int predef_locales_count=3;
+ for (int i=0;i<predef_locales_count;i++)
+ {
+ if (setlocale(LC_ALL,predef_locales[i])!=NULL)
+ {
+ break;
+ }
+ else if (i==predef_locales_count-1)
+ {
+ return 1;
+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
+ }
+ }
+ return 0;
+}
+
+extern bool
+check_utf_locale(void)
+{
+ char* locale = setlocale (LC_CTYPE , NULL);
+ if (locale == NULL)
+ {
+ return false;
+ }
+ else if (strcasestr(locale, "utf8") == NULL && strcasestr(locale, "utf-8") == NULL)
+ {
+ return false;
+ }
+ return true;
+}
+
+extern bool
+check_bom(FILE* fp, mb_file_t *mbf)
+{
+ int c;
+
+
+ c=fgetc(fp);
+
+ /*test BOM header of the first file */
+ mbf->bufcount=0;
+ if (c == 0xEF)
+ {
+ c=fgetc(fp);
+ }
+ else
+ {
+ if (c != EOF)
+ {
+ ungetc(c,fp);
+ }
+ return false;
+ }
+
+ if (c == 0xBB)
+ {
+ c=fgetc(fp);
+ }
+ else
+ {
+ if ( c!= EOF )
+ {
+ mbf->buf[0]=(unsigned char) 0xEF;
+ mbf->bufcount=1;
+ ungetc(c,fp);
+ return false;
+ }
+ else
+ {
+ ungetc(0xEF,fp);
+ return false;
+ }
+ }
+ if (c == 0xBF)
+ {
+ mbf->bufcount=0;
+ return true;
+ }
+ else
+ {
+ if (c != EOF)
+ {
+ mbf->buf[0]=(unsigned char) 0xEF;
+ mbf->buf[1]=(unsigned char) 0xBB;
+ mbf->bufcount=2;
+ ungetc(c,fp);
+ return false;
+ }
+ else
+ {
+ mbf->buf[0]=(unsigned char) 0xEF;
+ mbf->bufcount=1;
+ ungetc(0xBB,fp);
+ return false;
+ }
+ }
+ return false;
+}
+
+extern void
+print_bom(void)
+{
+ putc (0xEF, stdout);
+ putc (0xBB, stdout);
+ putc (0xBF, stdout);
+}
+
/* Add the comma or blank separated list of tab stops STOPS
to the list of tab stops. */
extern void
diff --git a/src/expand-common.h b/src/expand-common.h
index 8cb2079..763bfda 100644
--- a/src/expand-common.h
+++ b/src/expand-common.h
@@ -34,6 +34,18 @@ extern size_t max_column_width;
/* The desired exit status. */
extern int exit_status;
+extern int
+set_utf_locale (void);
+
+extern bool
+check_utf_locale(void);
+
+extern bool
+check_bom(FILE* fp, mb_file_t *mbf);
+
+extern void
+print_bom(void);
+
/* Add tab stop TABVAL to the end of 'tab_list'. */
extern void
add_tab_stop (uintmax_t tabval);
diff --git a/src/expand.c b/src/expand.c
index 310b349..4136824 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -103,11 +103,33 @@ expand (void)
FILE *fp = next_file (NULL);
mb_file_t mbf;
mbf_char_t c;
+ /* True if the starting locale is utf8. */
+ bool using_utf_locale;
+
+ /* True if the first file contains BOM header. */
+ bool found_bom;
+ using_utf_locale=check_utf_locale();
if (!fp)
return;
-
mbf_init (mbf, fp);
+ found_bom=check_bom(fp,&mbf);
+
+ if (using_utf_locale == false && found_bom == true)
+ {
+ /*try using some predefined locale */
+
+ if (set_utf_locale () != 0)
+ {
+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
+ }
+ }
+
+
+ if (found_bom == true)
+ {
+ print_bom();
+ }
while (true)
{
@@ -132,6 +154,27 @@ expand (void)
if ((mb_iseof (c)) && (fp = next_file (fp)))
{
mbf_init (mbf, fp);
+ if (fp!=NULL)
+ {
+ if (check_bom(fp,&mbf)==true)
+ {
+ /*Not the first file - check BOM header*/
+ if (using_utf_locale==false && found_bom==false)
+ {
+ /*BOM header in subsequent file but not in the first one. */
+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
+ }
+ }
+ else
+ {
+ if(using_utf_locale==false && found_bom==true)
+ {
+ /*First file conatined BOM header - locale was switched to UTF
+ *all subsequent files should contain BOM. */
+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
+ }
+ }
+ }
continue;
}
else
diff --git a/src/unexpand.c b/src/unexpand.c
index 863a90a..5681b58 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -116,16 +116,36 @@ unexpand (void)
include characters other than spaces, so the blanks must be
stored, not merely counted. */
mbf_char_t *pending_blank;
+ /* True if the starting locale is utf8. */
+ bool using_utf_locale;
+
+ /* True if the first file contains BOM header. */
+ bool found_bom;
+ using_utf_locale=check_utf_locale();
if (!fp)
return;
+ mbf_init (mbf, fp);
+ found_bom=check_bom(fp,&mbf);
+
+ if (using_utf_locale == false && found_bom == true)
+ {
+ /*try using some predefined locale */
+ if (set_utf_locale () != 0)
+ {
+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
+ }
+ }
/* The worst case is a non-blank character, then one blank, then a
tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t));
- mbf_init (mbf, fp);
+ if (found_bom == true)
+ {
+ print_bom();
+ }
while (true)
{
@@ -169,6 +189,27 @@ unexpand (void)
if ((mb_iseof (c)) && (fp = next_file (fp)))
{
mbf_init (mbf, fp);
+ if (fp!=NULL)
+ {
+ if (check_bom(fp,&mbf)==true)
+ {
+ /*Not the first file - check BOM header*/
+ if (using_utf_locale==false && found_bom==false)
+ {
+ /*BOM header in subsequent file but not in the first one. */
+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
+ }
+ }
+ else
+ {
+ if(using_utf_locale==false && found_bom==true)
+ {
+ /*First file conatined BOM header - locale was switched to UTF
+ *all subsequent files should contain BOM. */
+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
+ }
+ }
+ }
continue;
}
else
diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh
index 031be7a..1621c84 100755
--- a/tests/expand/mb.sh
+++ b/tests/expand/mb.sh
@@ -109,4 +109,75 @@ env printf '12345678
expand < in > out || fail=1
compare exp out > /dev/null 2>&1 || fail=1
+
+
+#BOM header test 1
+printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+EOF
+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_
+
+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+
+expand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LANG=C expand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LC_ALL=C expand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+
+printf '\xEF\xBB\xBF' > in1; cat <<\EOF >> in1 || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+EOF
+env printf ' äöü\t. öüä. \tä xx\n' >> in1 || framework_failure_
+
+
+printf '\xEF\xBB\xBF' > exp; cat <<\EOF >> exp || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+expand in1 in1 > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LANG=C expand in1 in1 > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LC_ALL=C expand in1 in1 > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
exit $fail
diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh
index 8d75652..9d4ee3e 100755
--- a/tests/unexpand/mb.sh
+++ b/tests/unexpand/mb.sh
@@ -111,3 +111,62 @@ env printf '12345678
unexpand -a < in > out || fail=1
compare exp out > /dev/null 2>&1 || fail=1
+
+#BOM header test 1
+printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_
+
+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+unexpand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LANG=C unexpand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LC_ALL=C unexpand < in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+
+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+1234567812345678123456781
+. . . .
+a b c d
+. . . .
+ä ö ü ß
+. . . .
+ äöü . öüä. ä xx
+EOF
+
+
+unexpand in in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LANG=C unexpand in in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
+
+LC_ALL=C unexpand in in > out || fail=1
+compare exp out > /dev/null 2>&1 || fail=1
--
2.9.3

File diff suppressed because it is too large Load Diff

@ -0,0 +1,625 @@
From 5a6af47c3db45b6303bac4dcd6da186fd5cd178c Mon Sep 17 00:00:00 2001
From: Ondrej Valousek <ondrej.valousek.xm@renesas.com>
Date: Fri, 2 Dec 2022 13:40:19 +0100
Subject: [PATCH 1/3] file-has-acl: Basic support for checking NFSv4 ACLs in
Linux.
* lib/acl-internal.h (acl_nfs4_nontrivial): New declaration.
* lib/acl-internal.c (acl_nfs4_nontrivial): New function.
* lib/file-has-acl.c: Include <arpa/inet.h>.
(XATTR_NAME_NFSV4_ACL, TRIVIAL_NFS4_ACL_MAX_LENGTH): New macros.
(file_has_acl): Test for NFSv4 ACLs.
* doc/acl-nfsv4.txt: New file.
Upstream-commit: b0604a8e134dbcc307c0ffdd5ebd3693e9de7081
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
doc/acl-nfsv4.txt | 17 ++++++++
lib/acl-internal.c | 100 +++++++++++++++++++++++++++++++++++++++++++++
lib/acl-internal.h | 3 ++
lib/file-has-acl.c | 21 ++++++++++
4 files changed, 141 insertions(+)
create mode 100644 doc/acl-nfsv4.txt
diff --git a/doc/acl-nfsv4.txt b/doc/acl-nfsv4.txt
new file mode 100644
index 0000000..71352f5
--- /dev/null
+++ b/doc/acl-nfsv4.txt
@@ -0,0 +1,17 @@
+General introduction:
+ https://linux.die.net/man/5/nfs4_acl
+
+The NFSv4 acls are defined in RFC7530 and as such, every NFSv4 server supporting ACLs
+will support this kind of ACLs (note the difference from POSIX draft ACLs)
+
+The ACLs can be obtained via the nfsv4-acl-tools, i.e.
+
+$ nfs4_getfacl <file>
+
+# file: <file>
+A::OWNER@:rwaDxtTnNcCy
+A::GROUP@:rwaDxtTnNcy
+A::EVERYONE@:rwaDxtTnNcy
+
+Gnulib is aiming to only provide a basic support of these, i.e. recognize trivial
+and non-trivial ACLs
diff --git a/lib/acl-internal.c b/lib/acl-internal.c
index be244c6..4c65dff 100644
--- a/lib/acl-internal.c
+++ b/lib/acl-internal.c
@@ -25,6 +25,9 @@
#if USE_ACL && HAVE_ACL_GET_FILE /* Linux, FreeBSD, Mac OS X, IRIX, Tru64, Cygwin >= 2.5 */
+# include <string.h>
+# include <arpa/inet.h>
+
# if HAVE_ACL_TYPE_EXTENDED /* Mac OS X */
/* ACL is an ACL, from a file, stored as type ACL_TYPE_EXTENDED.
@@ -122,6 +125,103 @@ acl_default_nontrivial (acl_t acl)
return (acl_entries (acl) > 0);
}
+# define ACE4_WHO_OWNER "OWNER@"
+# define ACE4_WHO_GROUP "GROUP@"
+# define ACE4_WHO_EVERYONE "EVERYONE@"
+
+# define ACE4_ACCESS_ALLOWED_ACE_TYPE 0
+# define ACE4_ACCESS_DENIED_ACE_TYPE 1
+
+/* ACE flag values */
+# define ACE4_IDENTIFIER_GROUP 0x00000040
+# define ROUNDUP(x, y) (((x) + (y) - 1) & - (y))
+
+int
+acl_nfs4_nontrivial (char *xattr, int len)
+{
+ int bufs = len;
+ uint32_t num_aces = ntohl (*((uint32_t*)(xattr))), /* Grab the number of aces in the acl */
+ num_a_aces = 0,
+ num_d_aces = 0;
+ char *bufp = xattr;
+
+ bufp += 4; /* sizeof(uint32_t); */
+ bufs -= 4;
+
+ for (uint32_t ace_n = 0; num_aces > ace_n ; ace_n++)
+ {
+ int d_ptr;
+ uint32_t flag,
+ wholen,
+ type;
+
+ /* Get the acl type */
+ if (bufs <= 0)
+ return -1;
+
+ type = ntohl (*((uint32_t*)bufp));
+
+ bufp += 4;
+ bufs -= 4;
+ if (bufs <= 0)
+ return -1;
+
+ flag = ntohl (*((uint32_t*)bufp));
+ /* As per RFC 7530, the flag should be 0, but we are just generous to Netapp
+ * and also accept the Group flag
+ */
+ if (flag & ~ACE4_IDENTIFIER_GROUP)
+ return 1;
+
+ /* we skip mask -
+ * it's too risky to test it and it does not seem to be actually needed */
+ bufp += 2*4;
+ bufs -= 2*4;
+
+ if (bufs <= 0)
+ return -1;
+
+ wholen = ntohl (*((uint32_t*)bufp));
+
+ bufp += 4;
+ bufs -= 4;
+
+ /* Get the who string */
+ if (bufs <= 0)
+ return -1;
+
+ /* for trivial ACL, we expect max 5 (typically 3) ACES, 3 Allow, 2 deny */
+ if (((strncmp (bufp, ACE4_WHO_OWNER, wholen) == 0)
+ || (strncmp (bufp, ACE4_WHO_GROUP, wholen) == 0))
+ && wholen == 6)
+ {
+ if (type == ACE4_ACCESS_ALLOWED_ACE_TYPE)
+ num_a_aces++;
+ if (type == ACE4_ACCESS_DENIED_ACE_TYPE)
+ num_d_aces++;
+ }
+ else
+ if ((strncmp (bufp, ACE4_WHO_EVERYONE, wholen) == 0)
+ && (type == ACE4_ACCESS_ALLOWED_ACE_TYPE)
+ && (wholen == 9))
+ num_a_aces++;
+ else
+ return 1;
+
+ d_ptr = ROUNDUP (wholen, 4);
+ bufp += d_ptr;
+ bufs -= d_ptr;
+
+ /* Make sure we aren't outside our domain */
+ if (bufs < 0)
+ return -1;
+
+ }
+ return !((num_a_aces <= 3) && (num_d_aces <= 2)
+ && (num_a_aces + num_d_aces == num_aces));
+
+}
+
# endif
#elif USE_ACL && HAVE_FACL && defined GETACL /* Solaris, Cygwin < 2.5, not HP-UX */
diff --git a/lib/acl-internal.h b/lib/acl-internal.h
index 9353376..2a249ff 100644
--- a/lib/acl-internal.h
+++ b/lib/acl-internal.h
@@ -147,6 +147,9 @@ rpl_acl_set_fd (int fd, acl_t acl)
# define acl_entries rpl_acl_entries
extern int acl_entries (acl_t);
# endif
+/* Return 1 if given ACL in XDR format is non-trivial
+ * Return 0 if it is trivial */
+extern int acl_nfs4_nontrivial (char *, int);
# if HAVE_ACL_TYPE_EXTENDED /* Mac OS X */
/* ACL is an ACL, from a file, stored as type ACL_TYPE_EXTENDED.
diff --git a/lib/file-has-acl.c b/lib/file-has-acl.c
index e02f062..1710234 100644
--- a/lib/file-has-acl.c
+++ b/lib/file-has-acl.c
@@ -32,6 +32,11 @@
#if GETXATTR_WITH_POSIX_ACLS
# include <sys/xattr.h>
# include <linux/xattr.h>
+# include <arpa/inet.h>
+# ifndef XATTR_NAME_NFSV4_ACL
+# define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+# endif
+# define TRIVIAL_NFS4_ACL_MAX_LENGTH 128
#endif
/* Return 1 if NAME has a nontrivial access control list,
@@ -67,6 +72,22 @@ file_has_acl (char const *name, struct stat const *sb)
return 1;
}
+ if (ret < 0)
+ { /* we might be on NFS, so try to check NFSv4 ACLs too */
+ char xattr[TRIVIAL_NFS4_ACL_MAX_LENGTH];
+
+ errno = 0; /* we need to reset errno set by the previous getxattr() */
+ ret = getxattr (name, XATTR_NAME_NFSV4_ACL, xattr, TRIVIAL_NFS4_ACL_MAX_LENGTH);
+ if (ret < 0 && errno == ENODATA)
+ ret = 0;
+ else
+ if (ret < 0 && errno == ERANGE)
+ return 1; /* we won't fit into the buffer, so non-trivial ACL is presented */
+ else
+ if (ret > 0)
+ /* looks like trivial ACL, but we need to investigate further */
+ return acl_nfs4_nontrivial (xattr, ret);
+ }
if (ret < 0)
return - acl_errno_valid (errno);
return ret;
--
2.38.1
From c5266d204a446bea619fa18da8520dceb0a54192 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 23 Dec 2022 15:18:29 -0800
Subject: [PATCH 2/3] file-has-acl: improve recent NFSv4 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This fixes a link failure with emacsclient on GNU/Linux. This
program wants file_has_acl but none of the other ACL primitives,
so it doesnt link acl-internal.o; this way it doesnt need to
link with -lacl. While I was at it I reviewed the recent changes,
fixed some unlikely overflow bugs, and adjusted to GNU style.
* doc/acl-nfsv4.txt: Remove. Its contents are now in a
comment in lib/file-has-acl.c.
* lib/acl-internal.c, lib/acl-internal.h: Move recent changes
relating to acl_nfs4_nontrivial to lib/file-has-acl.c, so that
there is no trouble linking programs that need only file_has_acl.
* lib/file-has-acl.c (acl_nfs4_nontrivial): Move here from
lib/acl-internal.c, so that we needn't link -lacl in
programs that want only file_has_acl, such as emacsclient.
Do not assume a char buffer is aligned for uint32_t.
Check more carefully for buffer read overrun.
Allow up to 6 ACEs, since other code does; but check
that theyre distinct. Avoid integer overflow.
Use memcmp rather than strncmp to compare memory blocks.
(file_has_acl): Preserve initial errno instead of setting to 0.
Allocate a bit more room for trivial ACL buffer.
Use EINVAL for botchedk NFSv4 ACLs (which shouldnt happen).
Upstream-commit: 35bd46f0c816948dc1a0430c8ba8b10a01167320
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
doc/acl-nfsv4.txt | 17 ------
lib/acl-internal.c | 100 -----------------------------------
lib/acl-internal.h | 3 --
lib/file-has-acl.c | 129 +++++++++++++++++++++++++++++++++++++++------
4 files changed, 113 insertions(+), 136 deletions(-)
delete mode 100644 doc/acl-nfsv4.txt
diff --git a/doc/acl-nfsv4.txt b/doc/acl-nfsv4.txt
deleted file mode 100644
index 71352f5..0000000
--- a/doc/acl-nfsv4.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-General introduction:
- https://linux.die.net/man/5/nfs4_acl
-
-The NFSv4 acls are defined in RFC7530 and as such, every NFSv4 server supporting ACLs
-will support this kind of ACLs (note the difference from POSIX draft ACLs)
-
-The ACLs can be obtained via the nfsv4-acl-tools, i.e.
-
-$ nfs4_getfacl <file>
-
-# file: <file>
-A::OWNER@:rwaDxtTnNcCy
-A::GROUP@:rwaDxtTnNcy
-A::EVERYONE@:rwaDxtTnNcy
-
-Gnulib is aiming to only provide a basic support of these, i.e. recognize trivial
-and non-trivial ACLs
diff --git a/lib/acl-internal.c b/lib/acl-internal.c
index 4c65dff..be244c6 100644
--- a/lib/acl-internal.c
+++ b/lib/acl-internal.c
@@ -25,9 +25,6 @@
#if USE_ACL && HAVE_ACL_GET_FILE /* Linux, FreeBSD, Mac OS X, IRIX, Tru64, Cygwin >= 2.5 */
-# include <string.h>
-# include <arpa/inet.h>
-
# if HAVE_ACL_TYPE_EXTENDED /* Mac OS X */
/* ACL is an ACL, from a file, stored as type ACL_TYPE_EXTENDED.
@@ -125,103 +122,6 @@ acl_default_nontrivial (acl_t acl)
return (acl_entries (acl) > 0);
}
-# define ACE4_WHO_OWNER "OWNER@"
-# define ACE4_WHO_GROUP "GROUP@"
-# define ACE4_WHO_EVERYONE "EVERYONE@"
-
-# define ACE4_ACCESS_ALLOWED_ACE_TYPE 0
-# define ACE4_ACCESS_DENIED_ACE_TYPE 1
-
-/* ACE flag values */
-# define ACE4_IDENTIFIER_GROUP 0x00000040
-# define ROUNDUP(x, y) (((x) + (y) - 1) & - (y))
-
-int
-acl_nfs4_nontrivial (char *xattr, int len)
-{
- int bufs = len;
- uint32_t num_aces = ntohl (*((uint32_t*)(xattr))), /* Grab the number of aces in the acl */
- num_a_aces = 0,
- num_d_aces = 0;
- char *bufp = xattr;
-
- bufp += 4; /* sizeof(uint32_t); */
- bufs -= 4;
-
- for (uint32_t ace_n = 0; num_aces > ace_n ; ace_n++)
- {
- int d_ptr;
- uint32_t flag,
- wholen,
- type;
-
- /* Get the acl type */
- if (bufs <= 0)
- return -1;
-
- type = ntohl (*((uint32_t*)bufp));
-
- bufp += 4;
- bufs -= 4;
- if (bufs <= 0)
- return -1;
-
- flag = ntohl (*((uint32_t*)bufp));
- /* As per RFC 7530, the flag should be 0, but we are just generous to Netapp
- * and also accept the Group flag
- */
- if (flag & ~ACE4_IDENTIFIER_GROUP)
- return 1;
-
- /* we skip mask -
- * it's too risky to test it and it does not seem to be actually needed */
- bufp += 2*4;
- bufs -= 2*4;
-
- if (bufs <= 0)
- return -1;
-
- wholen = ntohl (*((uint32_t*)bufp));
-
- bufp += 4;
- bufs -= 4;
-
- /* Get the who string */
- if (bufs <= 0)
- return -1;
-
- /* for trivial ACL, we expect max 5 (typically 3) ACES, 3 Allow, 2 deny */
- if (((strncmp (bufp, ACE4_WHO_OWNER, wholen) == 0)
- || (strncmp (bufp, ACE4_WHO_GROUP, wholen) == 0))
- && wholen == 6)
- {
- if (type == ACE4_ACCESS_ALLOWED_ACE_TYPE)
- num_a_aces++;
- if (type == ACE4_ACCESS_DENIED_ACE_TYPE)
- num_d_aces++;
- }
- else
- if ((strncmp (bufp, ACE4_WHO_EVERYONE, wholen) == 0)
- && (type == ACE4_ACCESS_ALLOWED_ACE_TYPE)
- && (wholen == 9))
- num_a_aces++;
- else
- return 1;
-
- d_ptr = ROUNDUP (wholen, 4);
- bufp += d_ptr;
- bufs -= d_ptr;
-
- /* Make sure we aren't outside our domain */
- if (bufs < 0)
- return -1;
-
- }
- return !((num_a_aces <= 3) && (num_d_aces <= 2)
- && (num_a_aces + num_d_aces == num_aces));
-
-}
-
# endif
#elif USE_ACL && HAVE_FACL && defined GETACL /* Solaris, Cygwin < 2.5, not HP-UX */
diff --git a/lib/acl-internal.h b/lib/acl-internal.h
index 2a249ff..9353376 100644
--- a/lib/acl-internal.h
+++ b/lib/acl-internal.h
@@ -147,9 +147,6 @@ rpl_acl_set_fd (int fd, acl_t acl)
# define acl_entries rpl_acl_entries
extern int acl_entries (acl_t);
# endif
-/* Return 1 if given ACL in XDR format is non-trivial
- * Return 0 if it is trivial */
-extern int acl_nfs4_nontrivial (char *, int);
# if HAVE_ACL_TYPE_EXTENDED /* Mac OS X */
/* ACL is an ACL, from a file, stored as type ACL_TYPE_EXTENDED.
diff --git a/lib/file-has-acl.c b/lib/file-has-acl.c
index 1710234..676523b 100644
--- a/lib/file-has-acl.c
+++ b/lib/file-has-acl.c
@@ -29,14 +29,97 @@
#include "acl-internal.h"
-#if GETXATTR_WITH_POSIX_ACLS
+#if USE_ACL && GETXATTR_WITH_POSIX_ACLS
+# include <string.h>
+# include <arpa/inet.h>
# include <sys/xattr.h>
# include <linux/xattr.h>
-# include <arpa/inet.h>
# ifndef XATTR_NAME_NFSV4_ACL
# define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
# endif
-# define TRIVIAL_NFS4_ACL_MAX_LENGTH 128
+
+enum {
+ /* ACE4_ACCESS_ALLOWED_ACE_TYPE = 0x00000000, */
+ ACE4_ACCESS_DENIED_ACE_TYPE = 0x00000001,
+ ACE4_IDENTIFIER_GROUP = 0x00000040
+};
+
+/* Return 1 if given ACL in XDR format is non-trivial, 0 if it is trivial.
+ -1 upon failure to determine it. Possibly change errno. Assume that
+ the ACL is valid, except avoid undefined behavior even if invalid.
+
+ See <https://linux.die.net/man/5/nfs4_acl>. The NFSv4 acls are
+ defined in Internet RFC 7530 and as such, every NFSv4 server
+ supporting ACLs should support NFSv4 ACLs (they differ from from
+ POSIX draft ACLs). The ACLs can be obtained via the
+ nfsv4-acl-tools, e.g., the nfs4_getfacl command. Gnulib provides
+ only basic support of NFSv4 ACLs, i.e., recognize trivial vs
+ nontrivial ACLs. */
+
+static int
+acl_nfs4_nontrivial (uint32_t *xattr, ssize_t nbytes)
+{
+ enum { BYTES_PER_NETWORK_UINT = 4};
+
+ /* Grab the number of aces in the acl. */
+ nbytes -= BYTES_PER_NETWORK_UINT;
+ if (nbytes < 0)
+ return -1;
+ uint32_t num_aces = ntohl (*xattr++);
+ if (6 < num_aces)
+ return 1;
+ int ace_found = 0;
+
+ for (int ace_n = 0; ace_n < num_aces; ace_n++)
+ {
+ /* Get the acl type and flag. Skip the mask; it's too risky to
+ test it and it does not seem to be needed. Get the wholen. */
+ nbytes -= 4 * BYTES_PER_NETWORK_UINT;
+ if (nbytes < 0)
+ return -1;
+ uint32_t type = ntohl (xattr[0]);
+ uint32_t flag = ntohl (xattr[1]);
+ uint32_t wholen = ntohl (xattr[3]);
+ xattr += 4;
+ int64_t wholen4 = wholen;
+ wholen4 = ((wholen4 + (BYTES_PER_NETWORK_UINT))
+ & ~ (BYTES_PER_NETWORK_UINT - 1));
+
+ /* Trivial ACLs have only ACE4_ACCESS_ALLOWED_ACE_TYPE or
+ ACE4_ACCESS_DENIED_ACE_TYPE. */
+ if (ACE4_ACCESS_DENIED_ACE_TYPE < type)
+ return 1;
+
+ /* RFC 7530 says FLAG should be 0, but be generous to NetApp and
+ also accept the group flag. */
+ if (flag & ~ACE4_IDENTIFIER_GROUP)
+ return 1;
+
+ /* Get the who string. Check NBYTES - WHOLEN4 before storing
+ into NBYTES, to avoid truncation on conversion. */
+ if (nbytes - wholen4 < 0)
+ return -1;
+ nbytes -= wholen4;
+
+ /* For a trivial ACL, max 6 (typically 3) ACEs, 3 allow, 3 deny.
+ Check that there is at most one ACE of each TYPE and WHO. */
+ int who2
+ = (wholen == 6 && memcmp (xattr, "OWNER@", 6) == 0 ? 0
+ : wholen == 6 && memcmp (xattr, "GROUP@", 6) == 0 ? 2
+ : wholen == 9 && memcmp (xattr, "EVERYONE@", 9) == 0 ? 4
+ : -1);
+ if (who2 < 0)
+ return 1;
+ int ace_found_bit = 1 << (who2 | type);
+ if (ace_found & ace_found_bit)
+ return 1;
+ ace_found |= ace_found_bit;
+
+ xattr = (uint32_t *) ((char *) xattr + wholen4);
+ }
+
+ return 0;
+}
#endif
/* Return 1 if NAME has a nontrivial access control list,
@@ -56,6 +139,7 @@ file_has_acl (char const *name, struct stat const *sb)
# if GETXATTR_WITH_POSIX_ACLS
ssize_t ret;
+ int initial_errno = errno;
ret = getxattr (name, XATTR_NAME_POSIX_ACL_ACCESS, NULL, 0);
if (ret < 0 && errno == ENODATA)
@@ -73,20 +157,33 @@ file_has_acl (char const *name, struct stat const *sb)
}
if (ret < 0)
- { /* we might be on NFS, so try to check NFSv4 ACLs too */
- char xattr[TRIVIAL_NFS4_ACL_MAX_LENGTH];
-
- errno = 0; /* we need to reset errno set by the previous getxattr() */
- ret = getxattr (name, XATTR_NAME_NFSV4_ACL, xattr, TRIVIAL_NFS4_ACL_MAX_LENGTH);
- if (ret < 0 && errno == ENODATA)
- ret = 0;
+ {
+ /* Check for NFSv4 ACLs. The max length of a trivial
+ ACL is 6 words for owner, 6 for group, 7 for everyone,
+ all times 2 because there are both allow and deny ACEs.
+ There are 6 words for owner because of type, flag, mask,
+ wholen, "OWNER@"+pad and similarly for group; everyone is
+ another word to hold "EVERYONE@". */
+ uint32_t xattr[2 * (6 + 6 + 7)];
+
+ ret = getxattr (name, XATTR_NAME_NFSV4_ACL, xattr, sizeof xattr);
+ if (ret < 0)
+ switch (errno)
+ {
+ case ENODATA: return 0;
+ case ERANGE : return 1; /* ACL must be nontrivial. */
+ }
else
- if (ret < 0 && errno == ERANGE)
- return 1; /* we won't fit into the buffer, so non-trivial ACL is presented */
- else
- if (ret > 0)
- /* looks like trivial ACL, but we need to investigate further */
- return acl_nfs4_nontrivial (xattr, ret);
+ {
+ /* It looks like a trivial ACL, but investigate further. */
+ ret = acl_nfs4_nontrivial (xattr, ret);
+ if (ret < 0)
+ {
+ errno = EINVAL;
+ return ret;
+ }
+ errno = initial_errno;
+ }
}
if (ret < 0)
return - acl_errno_valid (errno);
--
2.38.1
From faf965110372c82cd99e9f44f0c64f03cdabb2c1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 27 Dec 2022 20:00:58 -0800
Subject: [PATCH 3/3] file-has-acl: fix recently-introduced NFSv4 bug
* lib/file-has-acl.c (acl_nfs4_nontrivial): Fix off-by-one
error when rounding WHOLEN up to next multiple of 4.
Pacify GCC 12.2.1 -Wcast-align.
Upstream-commit: d65e5a8ba77595a598c9ddb8dfa09c4aea732659
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
lib/file-has-acl.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/lib/file-has-acl.c b/lib/file-has-acl.c
index 676523b..7876edc 100644
--- a/lib/file-has-acl.c
+++ b/lib/file-has-acl.c
@@ -81,9 +81,10 @@ acl_nfs4_nontrivial (uint32_t *xattr, ssize_t nbytes)
uint32_t flag = ntohl (xattr[1]);
uint32_t wholen = ntohl (xattr[3]);
xattr += 4;
- int64_t wholen4 = wholen;
- wholen4 = ((wholen4 + (BYTES_PER_NETWORK_UINT))
- & ~ (BYTES_PER_NETWORK_UINT - 1));
+ int whowords = (wholen / BYTES_PER_NETWORK_UINT
+ + (wholen % BYTES_PER_NETWORK_UINT != 0));
+ int64_t wholen4 = whowords;
+ wholen4 *= BYTES_PER_NETWORK_UINT;
/* Trivial ACLs have only ACE4_ACCESS_ALLOWED_ACE_TYPE or
ACE4_ACCESS_DENIED_ACE_TYPE. */
@@ -115,7 +116,7 @@ acl_nfs4_nontrivial (uint32_t *xattr, ssize_t nbytes)
return 1;
ace_found |= ace_found_bit;
- xattr = (uint32_t *) ((char *) xattr + wholen4);
+ xattr += whowords;
}
return 0;
--
2.38.1

@ -0,0 +1,9 @@
Provides: bundled(gnulib)
# make it possible to install the latest available Adobe Reader RPM for Linux
Provides: /bin/cat
Provides: /bin/chmod
Provides: /bin/echo
Provides: /bin/ln
Provides: /bin/rm
Provides: /bin/touch

@ -0,0 +1,120 @@
diff --git a/man/chcon.x b/man/chcon.x
index 8c1ff6f..c84fb96 100644
--- a/man/chcon.x
+++ b/man/chcon.x
@@ -1,4 +1,4 @@
[NAME]
-chcon \- change file security context
+chcon \- change file SELinux security context
[DESCRIPTION]
.\" Add any additional description here
diff --git a/man/runcon.x b/man/runcon.x
index d2df13e..5c5f5d8 100644
--- a/man/runcon.x
+++ b/man/runcon.x
@@ -1,5 +1,5 @@
[NAME]
-runcon \- run command with specified security context
+runcon \- run command with specified SELinux security context
[DESCRIPTION]
Run COMMAND with completely-specified CONTEXT, or with current or
transitioned security context modified by one or more of LEVEL,
diff --git a/src/cp.c b/src/cp.c
index 1b528c6..25dbb88 100644
--- a/src/cp.c
+++ b/src/cp.c
@@ -203,6 +203,9 @@ Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.\n\
all\n\
"), stdout);
fputs (_("\
+ -c deprecated, same as --preserve=context\n\
+"), stdout);
+ fputs (_("\
--no-preserve=ATTR_LIST don't preserve the specified attributes\n\
--parents use full source file name under DIRECTORY\n\
"), stdout);
@@ -929,7 +932,7 @@ main (int argc, char **argv)
selinux_enabled = (0 < is_selinux_enabled ());
cp_option_init (&x);
- while ((c = getopt_long (argc, argv, "abdfHilLnprst:uvxPRS:TZ",
+ while ((c = getopt_long (argc, argv, "abcdfHilLnprst:uvxPRS:TZ",
long_opts, NULL))
!= -1)
{
@@ -977,6 +980,17 @@ main (int argc, char **argv)
copy_contents = true;
break;
+ case 'c':
+ fprintf (stderr, "%s: warning: option '-c' is deprecated, please use '--preserve=context' instead\n", argv[0]);
+ if ( x.set_security_context ) {
+ (void) fprintf(stderr, "%s: cannot force target context and preserve it\n", argv[0]);
+ exit( 1 );
+ }
+ else if (selinux_enabled) {
+ x.preserve_security_context = true;
+ x.require_preserve_context = true;
+ }
+ break;
case 'd':
x.preserve_links = true;
x.dereference = DEREF_NEVER;
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 47e4480..cff2ead 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -8083,6 +8083,11 @@ done
exit $fail
@end example
+@item -c
+@cindex SELinux security context information, preserving
+Preserve SELinux security context of the original files if possible.
+Some file systems don't support storing of SELinux security context.
+
@item --copy-contents
@cindex directories, copying recursively
@cindex copying directories recursively
diff --git a/src/install.c b/src/install.c
index d79d597..437889a 100644
--- a/src/install.c
+++ b/src/install.c
@@ -673,7 +673,7 @@ In the 4th form, create all components of the given DIRECTORY(ies).\n\
-v, --verbose print the name of each directory as it is created\n\
"), stdout);
fputs (_("\
- --preserve-context preserve SELinux security context\n\
+ -P, --preserve-context preserve SELinux security context (-P deprecated)\n\
-Z set SELinux security context of destination\n\
file and each created directory to default type\n\
--context[=CTX] like -Z, or if CTX is specified then set the\n\
@@ -824,7 +824,7 @@ main (int argc, char **argv)
dir_arg = false;
umask (0);
- while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pt:TvS:Z", long_options,
+ while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pPt:TvS:Z", long_options,
NULL)) != -1)
{
switch (optc)
@@ -885,6 +885,8 @@ main (int argc, char **argv)
no_target_directory = true;
break;
+ case 'P':
+ fprintf (stderr, "%s: warning: option '-P' is deprecated, please use '--preserve-context' instead\n", argv[0]);
case PRESERVE_CONTEXT_OPTION:
if (! selinux_enabled)
{
@@ -892,6 +894,10 @@ main (int argc, char **argv)
"this kernel is not SELinux-enabled"));
break;
}
+ if ( x.set_security_context ) {
+ (void) fprintf(stderr, "%s: cannot force target context and preserve it\n", argv[0]);
+ exit( 1 );
+ }
x.preserve_security_context = true;
use_default_selinux_context = false;
break;

@ -0,0 +1,12 @@
diff -urNp coreutils-5.97-orig/man/date.x coreutils-5.97/man/date.x
--- coreutils-5.97-orig/man/date.x 1999-11-02 15:07:36.000000000 +0100
+++ coreutils-5.97/man/date.x 2008-10-15 10:13:31.000000000 +0200
@@ -11,3 +11,8 @@ calendar date, time of day, time zone, day of week, relative time,
relative date, and numbers. An empty string indicates the beginning
of the day. The date string format is more complex than is easily
documented here but is fully described in the info documentation.
+[ENVIRONMENT]
+.TP
+TZ
+Specifies the timezone, unless overridden by command line parameters.
+If neither is specified, the setting from /etc/localtime is used.

@ -0,0 +1,105 @@
%{_bindir}/arch
%{_bindir}/b2sum
%{_bindir}/basename
%{_bindir}/basenc
%{_bindir}/cat
%{_bindir}/chgrp
%{_bindir}/chmod
%{_bindir}/chown
%{_bindir}/cp
%{_bindir}/cut
%{_bindir}/date
%{_bindir}/dd
%{_bindir}/df
%{_bindir}/echo
%{_bindir}/env
%{_bindir}/false
%{_bindir}/link
%{_bindir}/ln
%{_bindir}/ls
%{_bindir}/mkdir
%{_bindir}/mknod
%{_bindir}/mv
%{_bindir}/nice
%{_bindir}/pwd
%{_bindir}/readlink
%{_bindir}/rm
%{_bindir}/rmdir
%{_bindir}/sleep
%{_bindir}/sort
%{_bindir}/stty
%{_bindir}/sync
%{_bindir}/mktemp
%{_bindir}/touch
%{_bindir}/true
%{_bindir}/uname
%{_bindir}/unlink
%{_bindir}/[
%{_bindir}/base32
%{_bindir}/base64
%{_bindir}/chcon
%{_bindir}/cksum
%{_bindir}/comm
%{_bindir}/csplit
%{_bindir}/dir
%{_bindir}/dircolors
%{_bindir}/dirname
%{_bindir}/du
%{_bindir}/expand
%{_bindir}/expr
%{_bindir}/factor
%{_bindir}/fmt
%{_bindir}/fold
%{_bindir}/groups
%{_bindir}/head
%{_bindir}/hostid
%{_bindir}/id
%{_bindir}/install
%{_bindir}/join
%{_bindir}/logname
%{_bindir}/md5sum
%{_bindir}/mkfifo
%{_bindir}/nl
%{_bindir}/nohup
%{_bindir}/nproc
%{_bindir}/numfmt
%{_bindir}/od
%{_bindir}/paste
%{_bindir}/pathchk
%{_bindir}/pinky
%{_bindir}/pr
%{_bindir}/printenv
%{_bindir}/printf
%{_bindir}/ptx
%{_bindir}/realpath
%{_bindir}/runcon
%{_bindir}/seq
%{_bindir}/sha1sum
%{_bindir}/sha224sum
%{_bindir}/sha256sum
%{_bindir}/sha384sum
%{_bindir}/sha512sum
%{_bindir}/shred
%{_bindir}/shuf
%{_bindir}/split
%{_bindir}/stat
%{_bindir}/stdbuf
%{_bindir}/sum
%{_bindir}/tac
%{_bindir}/tail
%{_bindir}/tee
%{_bindir}/test
%{_bindir}/timeout
%{_bindir}/tr
%{_bindir}/truncate
%{_bindir}/tsort
%{_bindir}/tty
%{_bindir}/unexpand
%{_bindir}/uniq
%{_bindir}/users
%{_bindir}/vdir
%{_bindir}/wc
%{_bindir}/who
%{_bindir}/whoami
%{_bindir}/yes
%{_sbindir}/chroot

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save