diff --git a/coreutils-8.25-sort-thousands-sep.patch b/coreutils-8.25-sort-thousands-sep.patch new file mode 100644 index 0000000..b9d2b5c --- /dev/null +++ b/coreutils-8.25-sort-thousands-sep.patch @@ -0,0 +1,332 @@ +From c479153d77b419a6cae4551b63d2b73096c1130e Mon Sep 17 00:00:00 2001 +From: Kamil Dudka +Date: Mon, 18 Jul 2016 19:04:43 +0200 +Subject: [PATCH 1/3] maint: sort.c: deduplicate code for traversing numbers + +* src/sort.c (traverse_raw_number): New function for traversing numbers. +(find_unit_order): Use traverse_raw_number() instead of open-coding it. +(debug_key): Likewise. +--- + src/sort.c | 63 ++++++++++++++++++++++++++++++++++---------------------------- + 1 file changed, 35 insertions(+), 28 deletions(-) + +diff --git a/src/sort.c b/src/sort.c +index 5b02343..e28bb6c 100644 +--- a/src/sort.c ++++ b/src/sort.c +@@ -2231,18 +2231,16 @@ static char const unit_order[UCHAR_LIM] = + #endif + }; + +-/* Return an integer that represents the order of magnitude of the +- unit following the number. The number may contain thousands +- separators and a decimal point, but it may not contain leading blanks. +- Negative numbers get negative orders; zero numbers have a zero order. */ +- +-static int _GL_ATTRIBUTE_PURE +-find_unit_order (char const *number) ++/* Traverse number given as *number consisting of digits, thousands_sep, and ++ decimal_point chars only. Returns the highest digit found in the number, ++ or '\0' if no digit has been found. Upon return *number points at the ++ character that immediately follows after the given number. */ ++static unsigned char ++traverse_raw_number (char const **number) + { +- bool minus_sign = (*number == '-'); +- char const *p = number + minus_sign; +- int nonzero = 0; ++ char const *p = *number; + unsigned char ch; ++ unsigned char max_digit = '\0'; + + /* Scan to end of number. + Decimals or separators not followed by digits stop the scan. +@@ -2253,16 +2251,34 @@ find_unit_order (char const *number) + do + { + while (ISDIGIT (ch = *p++)) +- nonzero |= ch - '0'; ++ if (max_digit < ch) ++ max_digit = ch; + } + while (ch == thousands_sep); + + if (ch == decimal_point) + while (ISDIGIT (ch = *p++)) +- nonzero |= ch - '0'; ++ if (max_digit < ch) ++ max_digit = ch; ++ ++ *number = p - 1; ++ return max_digit; ++} ++ ++/* Return an integer that represents the order of magnitude of the ++ unit following the number. The number may contain thousands ++ separators and a decimal point, but it may not contain leading blanks. ++ Negative numbers get negative orders; zero numbers have a zero order. */ + +- if (nonzero) ++static int _GL_ATTRIBUTE_PURE ++find_unit_order (char const *number) ++{ ++ bool minus_sign = (*number == '-'); ++ char const *p = number + minus_sign; ++ unsigned char max_digit = traverse_raw_number (&p); ++ if ('0' < max_digit) + { ++ unsigned char ch = *p; + int order = unit_order[ch]; + return (minus_sign ? -order : order); + } +@@ -2655,23 +2671,14 @@ debug_key (struct line const *line, struct keyfield const *key) + ignore_value (strtold (beg, &tighter_lim)); + else if (key->numeric || key->human_numeric) + { +- char *p = beg + (beg < lim && *beg == '-'); +- bool found_digit = false; +- unsigned char ch; +- +- do ++ char const *p = beg + (beg < lim && *beg == '-'); ++ unsigned char max_digit = traverse_raw_number (&p); ++ if ('0' <= max_digit) + { +- while (ISDIGIT (ch = *p++)) +- found_digit = true; ++ unsigned char ch = *p; ++ tighter_lim = (char *) p ++ + (key->human_numeric && unit_order[ch]); + } +- while (ch == thousands_sep); +- +- if (ch == decimal_point) +- while (ISDIGIT (ch = *p++)) +- found_digit = true; +- +- if (found_digit) +- tighter_lim = p - ! (key->human_numeric && unit_order[ch]); + } + else + tighter_lim = lim; +-- +2.5.5 + + +From 8c39465a5b0343ff7a21286dd69ed5430685d2f7 Mon Sep 17 00:00:00 2001 +From: Kamil Dudka +Date: Mon, 18 Jul 2016 19:04:44 +0200 +Subject: [PATCH 2/3] sort: make -h work with -k and blank used as thousands + separator + +* src/sort.c (traverse_raw_number): Allow to skip only one occurrence +of thousands_sep to avoid finding the unit in the next column in case +thousands_sep matches as blank and is used as column delimiter. +* tests/misc/sort-h-thousands-sep.sh: Add regression test for this bug. +* tests/local.mk: Reference the test. +* NEWS: Mention the bug fix. +Reported at https://bugzilla.redhat.com/1355780 +Fixes http://bugs.gnu.org/24015 +--- + src/sort.c | 14 ++++++++---- + tests/local.mk | 1 + + tests/misc/sort-h-thousands-sep.sh | 47 ++++++++++++++++++++++++++++++++++++++ + 3 files changed, 57 insertions(+), 5 deletions(-) + create mode 100755 tests/misc/sort-h-thousands-sep.sh + +diff --git a/src/sort.c b/src/sort.c +index e28bb6c..dd3ba58 100644 +--- a/src/sort.c ++++ b/src/sort.c +@@ -2248,13 +2248,17 @@ traverse_raw_number (char const **number) + to be lacking in units. + FIXME: add support for multibyte thousands_sep and decimal_point. */ + +- do ++ while (ISDIGIT (ch = *p++)) + { +- while (ISDIGIT (ch = *p++)) +- if (max_digit < ch) +- max_digit = ch; ++ if (max_digit < ch) ++ max_digit = ch; ++ ++ /* Allow to skip only one occurrence of thousands_sep to avoid finding ++ the unit in the next column in case thousands_sep matches as blank ++ and is used as column delimiter. */ ++ if (*p == thousands_sep) ++ ++p; + } +- while (ch == thousands_sep); + + if (ch == decimal_point) + while (ISDIGIT (ch = *p++)) +diff --git a/tests/local.mk b/tests/local.mk +index 42d39f2..dccff8d 100644 +--- a/tests/local.mk ++++ b/tests/local.mk +@@ -344,6 +344,7 @@ all_tests = \ + tests/misc/sort-discrim.sh \ + tests/misc/sort-files0-from.pl \ + tests/misc/sort-float.sh \ ++ tests/misc/sort-h-thousands-sep.sh \ + tests/misc/sort-mb-tests.sh \ + tests/i18n/sort.sh \ + tests/misc/sort-merge.pl \ +diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh +new file mode 100755 +index 0000000..17f1b6c +--- /dev/null ++++ b/tests/misc/sort-h-thousands-sep.sh +@@ -0,0 +1,47 @@ ++#!/bin/sh ++# exercise 'sort -h' in locales where thousands separator is blank ++ ++# Copyright (C) 2016 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src ++print_ver_ sort ++test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \ ++ || skip_ 'The Swedish locale with blank thousands separator is unavailable.' ++ ++tee exp1 > in << _EOF_ ++1 1k 4 003 1M ++2k 2M 4 002 2 ++3M 3 4 001 3k ++_EOF_ ++ ++cat > exp2 << _EOF_ ++3M 3 4 001 3k ++1 1k 4 003 1M ++2k 2M 4 002 2 ++_EOF_ ++ ++cat > exp3 << _EOF_ ++3M 3 4 001 3k ++2k 2M 4 002 2 ++1 1k 4 003 1M ++_EOF_ ++ ++for i in 1 2 3; do ++ LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1 ++ compare "exp${i}" "out${i}" || fail=1 ++done ++ ++Exit $fail +-- +2.5.5 + + +From 46ef53f558e7bc1c0bc0abd62a86b40b4141e058 Mon Sep 17 00:00:00 2001 +From: Kamil Dudka +Date: Mon, 18 Jul 2016 19:04:45 +0200 +Subject: [PATCH 3/3] sort: with -h, disallow thousands separator between + number and unit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +* src/sort.c (traverse_raw_number): Accept thousands separator only +if it is immediately followed by a digit. +* tests/misc/sort-h-thousands-sep.sh: Cover the fix for this bug. + +Suggested by Pádraig Brady in http://bugs.gnu.org/24015 +--- + src/sort.c | 11 ++++++++++- + tests/misc/sort-h-thousands-sep.sh | 25 +++++++++++++------------ + 2 files changed, 23 insertions(+), 13 deletions(-) + +diff --git a/src/sort.c b/src/sort.c +index dd3ba58..69ef75f 100644 +--- a/src/sort.c ++++ b/src/sort.c +@@ -2241,6 +2241,7 @@ traverse_raw_number (char const **number) + char const *p = *number; + unsigned char ch; + unsigned char max_digit = '\0'; ++ bool ends_with_thousands_sep = false; + + /* Scan to end of number. + Decimals or separators not followed by digits stop the scan. +@@ -2256,10 +2257,18 @@ traverse_raw_number (char const **number) + /* Allow to skip only one occurrence of thousands_sep to avoid finding + the unit in the next column in case thousands_sep matches as blank + and is used as column delimiter. */ +- if (*p == thousands_sep) ++ ends_with_thousands_sep = (*p == thousands_sep); ++ if (ends_with_thousands_sep) + ++p; + } + ++ if (ends_with_thousands_sep) ++ { ++ /* thousands_sep not followed by digit is not allowed. */ ++ *number = p - 2; ++ return max_digit; ++ } ++ + if (ch == decimal_point) + while (ISDIGIT (ch = *p++)) + if (max_digit < ch) +diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh +index 17f1b6c..3ffa89e 100755 +--- a/tests/misc/sort-h-thousands-sep.sh ++++ b/tests/misc/sort-h-thousands-sep.sh +@@ -18,28 +18,29 @@ + + . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src + print_ver_ sort ++ + test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \ + || skip_ 'The Swedish locale with blank thousands separator is unavailable.' + +-tee exp1 > in << _EOF_ +-1 1k 4 003 1M +-2k 2M 4 002 2 +-3M 3 4 001 3k ++tee exp1 exp3 > in << _EOF_ ++1 1k 1 M 4 003 1M ++2k 2M 2 k 4 002 2 ++3M 3 3 G 4 001 3k + _EOF_ + + cat > exp2 << _EOF_ +-3M 3 4 001 3k +-1 1k 4 003 1M +-2k 2M 4 002 2 ++3M 3 3 G 4 001 3k ++1 1k 1 M 4 003 1M ++2k 2M 2 k 4 002 2 + _EOF_ + +-cat > exp3 << _EOF_ +-3M 3 4 001 3k +-2k 2M 4 002 2 +-1 1k 4 003 1M ++cat > exp5 << _EOF_ ++3M 3 3 G 4 001 3k ++2k 2M 2 k 4 002 2 ++1 1k 1 M 4 003 1M + _EOF_ + +-for i in 1 2 3; do ++for i in 1 2 3 5; do + LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1 + compare "exp${i}" "out${i}" || fail=1 + done +-- +2.5.5 + diff --git a/coreutils.spec b/coreutils.spec index 010fe77..934e7a3 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils Version: 8.25 -Release: 13%{?dist} +Release: 14%{?dist} License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ @@ -19,6 +19,8 @@ Source10: coreutils-find-requires.sh # From upstream Patch952: coreutils-8.25-intall-Z-selinux.patch +# fix 'sort -h -k' in locales that use blank as thousands separator (#1355780) +Patch953: coreutils-8.25-sort-thousands-sep.patch # Our patches #general patch to workaround koji build system issues @@ -209,9 +211,13 @@ tee DIR_COLORS{,.256color,.lightbgcolor} /dev/null %patch951 -p1 -b .selinuxman %patch952 -p1 +# upstream patches +%patch953 -p1 + chmod a+x \ tests/df/direct.sh \ tests/install/install-Z-selinux.sh \ + tests/misc/sort-h-thousands-sep.sh \ tests/misc/sort-mb-tests.sh \ || : @@ -352,6 +358,9 @@ fi %license COPYING %changelog +* Tue Jul 19 2016 Kamil Dudka - 8.25-14 +- fix 'sort -h -k' in locales that use blank as thousands separator (#1355780) + * Thu Jul 14 2016 Kamil Dudka - 8.25-13 - make 'sort -h' work for arbitrary column even when using UTF-8 locales