From 06b6b564ae2436c853a8a360243e4044f0b64db9 Mon Sep 17 00:00:00 2001 From: Apertis package maintainers <packagers@lists.apertis.org> Date: Fri, 11 Jun 2021 14:37:30 +0200 Subject: [PATCH] d/patches: backport improvements to the `sort` command The post-installation script of at least one packages makes use of the `-k` option for `sort`, which isn't implemented in release 0.0.6 of `rust-coreutils`. As this has since been implemented upstream, this commit picks the corresponding patches so we can benefit of those improvements. Signed-off-by: Arnaud Ferraris <arnaud.ferraris@collabora.com> --- debian/patches/Ignore-a-test.patch | 60 + ...table-sort-ignore-non-printing-month.patch | 2303 +++++++++++++++++ ...s-fixes-and-performance-improvements.patch | 502 ++++ debian/patches/series | 6 + .../sort-implement-k-and-t-support.patch | 1333 ++++++++++ 5 files changed, 4204 insertions(+) create mode 100644 debian/patches/Ignore-a-test.patch create mode 100644 debian/patches/Sort-Implement-stable-sort-ignore-non-printing-month.patch create mode 100644 debian/patches/Sort-Various-fixes-and-performance-improvements.patch create mode 100644 debian/patches/sort-implement-k-and-t-support.patch diff --git a/debian/patches/Ignore-a-test.patch b/debian/patches/Ignore-a-test.patch new file mode 100644 index 0000000..1584ee6 --- /dev/null +++ b/debian/patches/Ignore-a-test.patch @@ -0,0 +1,60 @@ +From: Sylvestre Ledru <sylvestre@debian.org> +Date: Fri, 9 Apr 2021 10:14:41 +0200 +Subject: Ignore a test (#2053) + +* Disable chksum: test_arg_overrides_stdin +fails often with: + +---- test_cksum::test_arg_overrides_stdin stdout ---- +current_directory_resolved: +touch: /tmp/.tmpv9hydc/a +run: /target/x86_64-unknown-linux-gnu/debug/coreutils cksum a +thread 'test_cksum::test_arg_overrides_stdin' panicked at 'Broken pipe (os error 32)', tests/common/util.rs:742:37 +note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace + +* rustfmt the recent change + +Origin: upstream, https://github.com/uutils/coreutils/commit/9ae4928b7b4f43495921fe8ac6128ea226d20dbd +--- + src/uu/sort/src/sort.rs | 6 ++---- + src/uu/stdbuf/src/stdbuf.rs | 3 +-- + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs +index 08936ff..cf6c258 100644 +--- a/src/uu/sort/src/sort.rs ++++ b/src/uu/sort/src/sort.rs +@@ -665,9 +665,7 @@ fn get_leading_gen(a: &str) -> String { + for c in p_iter.to_owned() { + let next_char_numeric = p_iter.peek().unwrap_or(&'\0').is_numeric(); + // Only general numeric recognizes e notation and, see block below, the '+' sign +- if (c.eq(&'e') && !next_char_numeric) +- || (c.eq(&'E') && !next_char_numeric) +- { ++ if (c.eq(&'e') && !next_char_numeric) || (c.eq(&'E') && !next_char_numeric) { + r = a.split(c).next().unwrap_or("").to_owned(); + break; + // If positive sign and next char is not numeric, split at postive sign at keep trailing numbers +@@ -813,7 +811,7 @@ fn human_numeric_convert(a: &str) -> f64 { + 'E' => 1E18, + 'Z' => 1E21, + 'Y' => 1E24, +- _ => 1f64, ++ _ => 1f64, + }; + num_part * suffix + } +diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs +index a61ba96..67ed9a8 100644 +--- a/src/uu/stdbuf/src/stdbuf.rs ++++ b/src/uu/stdbuf/src/stdbuf.rs +@@ -80,8 +80,7 @@ fn print_version() { + fn print_usage(opts: &Options) { + let brief = "Run COMMAND, with modified buffering operations for its standard streams\n \ + Mandatory arguments to long options are mandatory for short options too."; +- let explanation = +- "If MODE is 'L' the corresponding stream will be line buffered.\n \ ++ let explanation = "If MODE is 'L' the corresponding stream will be line buffered.\n \ + This option is invalid with standard input.\n\n \ + If MODE is '0' the corresponding stream will be unbuffered.\n\n \ + Otherwise MODE is a number which may be followed by one of the following:\n\n \ diff --git a/debian/patches/Sort-Implement-stable-sort-ignore-non-printing-month.patch b/debian/patches/Sort-Implement-stable-sort-ignore-non-printing-month.patch new file mode 100644 index 0000000..f023d25 --- /dev/null +++ b/debian/patches/Sort-Implement-stable-sort-ignore-non-printing-month.patch @@ -0,0 +1,2303 @@ +From: electricboogie <32370782+electricboogie@users.noreply.github.com> +Date: Thu, 8 Apr 2021 15:07:09 -0500 +Subject: Sort: Implement stable sort, ignore non-printing, month sort dedup, + auto parallel sort through rayon, zero terminated sort, check silent (#2008) + +Origin: upstream, https://github.com/uutils/coreutils/commit/8474249e5f301068565c2d62f04b04d40b0b5817 +--- + Cargo.lock | 20 +- + src/uu/sort/Cargo.toml | 3 +- + src/uu/sort/src/sort.rs | 558 ++++++++++++++++----- + tests/by-util/test_sort.rs | 252 ++++++++-- + .../sort/exponents-positive-general.expected | 12 + + tests/fixtures/sort/exponents-positive-general.txt | 12 + + .../sort/exponents-positive-numeric.expected | 12 + + tests/fixtures/sort/exponents-positive-numeric.txt | 12 + + .../sort/human-mixed-inputs-reverse.expected | 37 ++ + tests/fixtures/sort/human-mixed-inputs-reverse.txt | 37 ++ + .../sort/human-mixed-inputs-stable.expected | 37 ++ + tests/fixtures/sort/human-mixed-inputs-stable.txt | 37 ++ + .../sort/human-mixed-inputs-unique.expected | 13 + + tests/fixtures/sort/human-mixed-inputs-unique.txt | 37 ++ + tests/fixtures/sort/human-mixed-inputs.expected | 37 ++ + tests/fixtures/sort/human-mixed-inputs.txt | 46 ++ + .../sort/mixed_floats_ints_chars_numeric.expected | 30 ++ + .../sort/mixed_floats_ints_chars_numeric.txt | 30 ++ + ...ixed_floats_ints_chars_numeric_reverse.expected | 30 ++ + ...oats_ints_chars_numeric_reverse_stable.expected | 30 ++ + ...ed_floats_ints_chars_numeric_reverse_stable.txt | 30 ++ + ...mixed_floats_ints_chars_numeric_stable.expected | 30 ++ + .../mixed_floats_ints_chars_numeric_stable.txt | 30 ++ + ...mixed_floats_ints_chars_numeric_unique.expected | 20 + + .../mixed_floats_ints_chars_numeric_unique.txt | 30 ++ + ...oats_ints_chars_numeric_unique_reverse.expected | 20 + + ...ed_floats_ints_chars_numeric_unique_reverse.txt | 30 ++ + ...loats_ints_chars_numeric_unique_stable.expected | 20 + + ...xed_floats_ints_chars_numeric_unique_stable.txt | 30 ++ + tests/fixtures/sort/months-dedup.expected | 6 + + tests/fixtures/sort/months-dedup.txt | 37 ++ + .../sort/numeric-floats-with-nan2.expected | 23 + + tests/fixtures/sort/numeric-floats-with-nan2.txt | 23 + + tests/fixtures/sort/zero-terminated.expected | 1 + + tests/fixtures/sort/zero-terminated.txt | 1 + + 35 files changed, 1422 insertions(+), 191 deletions(-) + create mode 100644 tests/fixtures/sort/exponents-positive-general.expected + create mode 100644 tests/fixtures/sort/exponents-positive-general.txt + create mode 100644 tests/fixtures/sort/exponents-positive-numeric.expected + create mode 100644 tests/fixtures/sort/exponents-positive-numeric.txt + create mode 100644 tests/fixtures/sort/human-mixed-inputs-reverse.expected + create mode 100644 tests/fixtures/sort/human-mixed-inputs-reverse.txt + create mode 100644 tests/fixtures/sort/human-mixed-inputs-stable.expected + create mode 100644 tests/fixtures/sort/human-mixed-inputs-stable.txt + create mode 100644 tests/fixtures/sort/human-mixed-inputs-unique.expected + create mode 100644 tests/fixtures/sort/human-mixed-inputs-unique.txt + create mode 100644 tests/fixtures/sort/human-mixed-inputs.expected + create mode 100644 tests/fixtures/sort/human-mixed-inputs.txt + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt + create mode 100644 tests/fixtures/sort/months-dedup.expected + create mode 100644 tests/fixtures/sort/months-dedup.txt + create mode 100644 tests/fixtures/sort/numeric-floats-with-nan2.expected + create mode 100644 tests/fixtures/sort/numeric-floats-with-nan2.txt + create mode 100644 tests/fixtures/sort/zero-terminated.expected + create mode 100644 tests/fixtures/sort/zero-terminated.txt + +diff --git a/Cargo.lock b/Cargo.lock +index 88ac18c..53eb8b4 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -994,12 +994,6 @@ dependencies = [ + "maybe-uninit", + ] + +-[[package]] +-name = "static_assertions" +-version = "1.1.0" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +- + [[package]] + name = "strsim" + version = "0.8.0" +@@ -1126,17 +1120,6 @@ dependencies = [ + "winapi 0.3.9", + ] + +-[[package]] +-name = "twox-hash" +-version = "1.6.0" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "04f8ab788026715fa63b31960869617cba39117e520eb415b0139543e325ab59" +-dependencies = [ +- "cfg-if 0.1.10", +- "rand 0.7.3", +- "static_assertions", +-] +- + [[package]] + name = "typenum" + version = "1.13.0" +@@ -2028,10 +2011,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "81feaf23fcf9fd3df43afd736c6785292d6f8ce9d202ac0dc7585c2a49879762" + dependencies = [ + "clap", ++ "fnv", + "itertools", + "rand 0.7.3", ++ "rayon", + "semver", +- "twox-hash", + "uucore", + "uucore_procs", + ] +diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml +index 16ae0cb..cfcd74d 100644 +--- a/src/uu/sort/Cargo.toml ++++ b/src/uu/sort/Cargo.toml +@@ -15,9 +15,10 @@ edition = "2018" + path = "src/sort.rs" + + [dependencies] ++rayon = "1.5" + rand = "0.7" + clap = "2.33" +-twox-hash = "1.6.0" ++fnv = "1.0.7" + itertools = "0.9" + semver = "0.9.0" + uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } +diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs +index 6c29ad9..08936ff 100644 +--- a/src/uu/sort/src/sort.rs ++++ b/src/uu/sort/src/sort.rs +@@ -7,23 +7,29 @@ + // * file that was distributed with this source code. + #![allow(dead_code)] + ++// Although these links don't always seem to describe reality, check out the POSIX and GNU specs: ++// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html ++// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html ++ + // spell-checker:ignore (ToDO) outfile nondictionary + #[macro_use] + extern crate uucore; + + use clap::{App, Arg}; ++use fnv::FnvHasher; + use itertools::Itertools; + use rand::distributions::Alphanumeric; + use rand::{thread_rng, Rng}; ++use rayon::prelude::*; + use semver::Version; + use std::cmp::Ordering; + use std::collections::BinaryHeap; ++use std::env; + use std::fs::File; + use std::hash::{Hash, Hasher}; + use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write}; + use std::mem::replace; + use std::path::Path; +-use twox_hash::XxHash64; + use uucore::fs::is_stdin_interactive; // for Iterator::dedup() + + static NAME: &str = "sort"; +@@ -33,27 +39,37 @@ static VERSION: &str = env!("CARGO_PKG_VERSION"); + static OPT_HUMAN_NUMERIC_SORT: &str = "human-numeric-sort"; + static OPT_MONTH_SORT: &str = "month-sort"; + static OPT_NUMERIC_SORT: &str = "numeric-sort"; ++static OPT_GENERAL_NUMERIC_SORT: &str = "general-numeric-sort"; + static OPT_VERSION_SORT: &str = "version-sort"; + + static OPT_DICTIONARY_ORDER: &str = "dictionary-order"; + static OPT_MERGE: &str = "merge"; + static OPT_CHECK: &str = "check"; ++static OPT_CHECK_SILENT: &str = "check-silent"; + static OPT_IGNORE_CASE: &str = "ignore-case"; + static OPT_IGNORE_BLANKS: &str = "ignore-blanks"; ++static OPT_IGNORE_NONPRINTING: &str = "ignore-nonprinting"; + static OPT_OUTPUT: &str = "output"; + static OPT_REVERSE: &str = "reverse"; + static OPT_STABLE: &str = "stable"; + static OPT_UNIQUE: &str = "unique"; + static OPT_RANDOM: &str = "random-sort"; ++static OPT_ZERO_TERMINATED: &str = "zero-terminated"; ++static OPT_PARALLEL: &str = "parallel"; ++static OPT_FILES0_FROM: &str = "files0-from"; + + static ARG_FILES: &str = "files"; + + static DECIMAL_PT: char = '.'; + static THOUSANDS_SEP: char = ','; ++static NEGATIVE: char = '-'; ++static POSITIVE: char = '+'; ++ + #[derive(Eq, Ord, PartialEq, PartialOrd)] + enum SortMode { + Numeric, + HumanNumeric, ++ GeneralNumeric, + Month, + Version, + Default, +@@ -67,10 +83,13 @@ struct Settings { + stable: bool, + unique: bool, + check: bool, ++ check_silent: bool, + random: bool, +- compare_fns: Vec<fn(&str, &str) -> Ordering>, ++ compare_fn: fn(&str, &str) -> Ordering, + transform_fns: Vec<fn(&str) -> String>, ++ threads: String, + salt: String, ++ zero_terminated: bool, + } + + impl Default for Settings { +@@ -83,10 +102,13 @@ impl Default for Settings { + stable: false, + unique: false, + check: false, ++ check_silent: false, + random: false, +- compare_fns: Vec::new(), ++ compare_fn: default_compare, + transform_fns: Vec::new(), ++ threads: String::new(), + salt: String::new(), ++ zero_terminated: false, + } + } + } +@@ -206,6 +228,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + .long(OPT_NUMERIC_SORT) + .help("compare according to string numerical value"), + ) ++ .arg( ++ Arg::with_name(OPT_GENERAL_NUMERIC_SORT) ++ .short("g") ++ .long(OPT_GENERAL_NUMERIC_SORT) ++ .help("compare according to string general numerical value"), ++ ) + .arg( + Arg::with_name(OPT_VERSION_SORT) + .short("V") +@@ -230,12 +258,24 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + .long(OPT_CHECK) + .help("check for sorted input; do not sort"), + ) ++ .arg( ++ Arg::with_name(OPT_CHECK_SILENT) ++ .short("C") ++ .long(OPT_CHECK_SILENT) ++ .help("exit successfully if the given file is already sorted, and exit with status 1 otherwise. "), ++ ) + .arg( + Arg::with_name(OPT_IGNORE_CASE) + .short("f") + .long(OPT_IGNORE_CASE) + .help("fold lower case to upper case characters"), + ) ++ .arg( ++ Arg::with_name(OPT_IGNORE_NONPRINTING) ++ .short("-i") ++ .long(OPT_IGNORE_NONPRINTING) ++ .help("ignore nonprinting characters"), ++ ) + .arg( + Arg::with_name(OPT_IGNORE_BLANKS) + .short("b") +@@ -274,18 +314,65 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + .long(OPT_UNIQUE) + .help("output only the first of an equal run"), + ) ++ .arg( ++ Arg::with_name(OPT_ZERO_TERMINATED) ++ .short("z") ++ .long(OPT_ZERO_TERMINATED) ++ .help("line delimiter is NUL, not newline"), ++ ) ++ .arg( ++ Arg::with_name(OPT_PARALLEL) ++ .long(OPT_PARALLEL) ++ .help("change the number of threads running concurrently to N") ++ .takes_value(true) ++ .value_name("NUM_THREADS"), ++ ) ++ .arg( ++ Arg::with_name(OPT_FILES0_FROM) ++ .long(OPT_FILES0_FROM) ++ .help("read input from the files specified by NUL-terminated NUL_FILES") ++ .takes_value(true) ++ .value_name("NUL_FILES") ++ .multiple(true), ++ ) + .arg(Arg::with_name(ARG_FILES).multiple(true).takes_value(true)) + .get_matches_from(args); + +- let mut files: Vec<String> = matches +- .values_of(ARG_FILES) +- .map(|v| v.map(ToString::to_string).collect()) +- .unwrap_or_default(); ++ // check whether user specified a zero terminated list of files for input, otherwise read files from args ++ let mut files: Vec<String> = if matches.is_present(OPT_FILES0_FROM) { ++ let files0_from: Vec<String> = matches ++ .values_of(OPT_FILES0_FROM) ++ .map(|v| v.map(ToString::to_string).collect()) ++ .unwrap_or_default(); ++ ++ let mut files = Vec::new(); ++ for path in &files0_from { ++ let (reader, _) = open(path.as_str()).expect("Could not read from file specified."); ++ let buf_reader = BufReader::new(reader); ++ for line in buf_reader.split(b'\0') { ++ if let Ok(n) = line { ++ files.push( ++ std::str::from_utf8(&n) ++ .expect("Could not parse zero terminated string from input.") ++ .to_string(), ++ ); ++ } ++ } ++ } ++ files ++ } else { ++ matches ++ .values_of(ARG_FILES) ++ .map(|v| v.map(ToString::to_string).collect()) ++ .unwrap_or_default() ++ }; + + settings.mode = if matches.is_present(OPT_HUMAN_NUMERIC_SORT) { + SortMode::HumanNumeric + } else if matches.is_present(OPT_MONTH_SORT) { + SortMode::Month ++ } else if matches.is_present(OPT_GENERAL_NUMERIC_SORT) { ++ SortMode::GeneralNumeric + } else if matches.is_present(OPT_NUMERIC_SORT) { + SortMode::Numeric + } else if matches.is_present(OPT_VERSION_SORT) { +@@ -294,12 +381,29 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + SortMode::Default + }; + ++ if matches.is_present(OPT_PARALLEL) { ++ // "0" is default - threads = num of cores ++ settings.threads = matches ++ .value_of(OPT_PARALLEL) ++ .map(String::from) ++ .unwrap_or("0".to_string()); ++ env::set_var("RAYON_NUM_THREADS", &settings.threads); ++ } ++ + if matches.is_present(OPT_DICTIONARY_ORDER) { + settings.transform_fns.push(remove_nondictionary_chars); ++ } else if matches.is_present(OPT_IGNORE_NONPRINTING) { ++ settings.transform_fns.push(remove_nonprinting_chars); + } + ++ settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); + settings.merge = matches.is_present(OPT_MERGE); ++ + settings.check = matches.is_present(OPT_CHECK); ++ if matches.is_present(OPT_CHECK_SILENT) { ++ settings.check_silent = matches.is_present(OPT_CHECK_SILENT); ++ settings.check = true; ++ }; + + if matches.is_present(OPT_IGNORE_CASE) { + settings.transform_fns.push(|s| s.to_uppercase()); +@@ -327,20 +431,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + crash!(1, "sort: extra operand `{}' not allowed with -c", files[1]) + } + +- settings.compare_fns.push(match settings.mode { ++ settings.compare_fn = match settings.mode { + SortMode::Numeric => numeric_compare, ++ SortMode::GeneralNumeric => general_numeric_compare, + SortMode::HumanNumeric => human_numeric_size_compare, + SortMode::Month => month_compare, + SortMode::Version => version_compare, + SortMode::Default => default_compare, +- }); +- +- if !settings.stable { +- match settings.mode { +- SortMode::Default => {} +- _ => settings.compare_fns.push(default_compare), +- } +- } ++ }; + + exec(files, &mut settings) + } +@@ -359,67 +457,79 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { + + if settings.merge { + file_merger.push_file(buf_reader.lines()); +- } else if settings.check { +- return exec_check_file(buf_reader.lines(), &settings); ++ } else if settings.zero_terminated { ++ for line in buf_reader.split(b'\0') { ++ if let Ok(n) = line { ++ lines.push( ++ std::str::from_utf8(&n) ++ .expect("Could not parse string from zero terminated input.") ++ .to_string(), ++ ); ++ } ++ } + } else { + for line in buf_reader.lines() { + if let Ok(n) = line { + lines.push(n); +- } else { +- break; + } + } + } + } + +- sort_by(&mut lines, &settings); ++ if settings.check { ++ return exec_check_file(lines, &settings); ++ } else { ++ sort_by(&mut lines, &settings); ++ } + + if settings.merge { + if settings.unique { +- print_sorted(file_merger.dedup(), &settings.outfile) ++ print_sorted(file_merger.dedup(), &settings) + } else { +- print_sorted(file_merger, &settings.outfile) ++ print_sorted(file_merger, &settings) + } +- } else if settings.unique && settings.mode == SortMode::Numeric { ++ } else if settings.mode == SortMode::Month && settings.unique { + print_sorted( + lines + .iter() +- .dedup_by(|a, b| num_sort_dedup(a) == num_sort_dedup(b)), +- &settings.outfile, ++ .dedup_by(|a, b| get_months_dedup(a) == get_months_dedup(b)), ++ &settings, + ) + } else if settings.unique { +- print_sorted(lines.iter().dedup(), &settings.outfile) ++ print_sorted( ++ lines ++ .iter() ++ .dedup_by(|a, b| get_nums_dedup(a) == get_nums_dedup(b)), ++ &settings, ++ ) + } else { +- print_sorted(lines.iter(), &settings.outfile) ++ print_sorted(lines.iter(), &settings) + } + + 0 + } + +-fn exec_check_file(lines: Lines<BufReader<Box<dyn Read>>>, settings: &Settings) -> i32 { ++fn exec_check_file(unwrapped_lines: Vec<String>, settings: &Settings) -> i32 { + // errors yields the line before each disorder, + // plus the last line (quirk of .coalesce()) +- let unwrapped_lines = lines.filter_map(|maybe_line| { +- if let Ok(line) = maybe_line { +- Some(line) +- } else { +- None +- } +- }); +- let mut errors = unwrapped_lines +- .enumerate() +- .coalesce(|(last_i, last_line), (i, line)| { +- if compare_by(&last_line, &line, &settings) == Ordering::Greater { +- Err(((last_i, last_line), (i, line))) +- } else { +- Ok((i, line)) +- } +- }); ++ let mut errors = ++ unwrapped_lines ++ .iter() ++ .enumerate() ++ .coalesce(|(last_i, last_line), (i, line)| { ++ if compare_by(&last_line, &line, &settings) == Ordering::Greater { ++ Err(((last_i, last_line), (i, line))) ++ } else { ++ Ok((i, line)) ++ } ++ }); + if let Some((first_error_index, _line)) = errors.next() { + // Check for a second "error", as .coalesce() always returns the last + // line, no matter what our merging function does. + if let Some(_last_line_or_next_error) = errors.next() { +- println!("sort: disorder in line {}", first_error_index); ++ if !settings.check_silent { ++ println!("sort: disorder in line {}", first_error_index); ++ }; + 1 + } else { + // first "error" was actually the last line. +@@ -431,8 +541,9 @@ fn exec_check_file(lines: Lines<BufReader<Box<dyn Read>>>, settings: &Settings) + } + } + ++#[inline(always)] + fn transform(line: &str, settings: &Settings) -> String { +- let mut transformed = line.to_string(); ++ let mut transformed = line.to_owned(); + for transform_fn in &settings.transform_fns { + transformed = transform_fn(&transformed); + } +@@ -440,8 +551,9 @@ fn transform(line: &str, settings: &Settings) -> String { + transformed + } + ++#[inline(always)] + fn sort_by(lines: &mut Vec<String>, settings: &Settings) { +- lines.sort_by(|a, b| compare_by(a, b, &settings)) ++ lines.par_sort_by(|a, b| compare_by(a, b, &settings)) + } + + fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering { +@@ -454,72 +566,198 @@ fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering { + (a, b) + }; + +- for compare_fn in &settings.compare_fns { +- let cmp: Ordering = if settings.random { +- random_shuffle(a, b, settings.salt.clone()) ++ // 1st Compare ++ let mut cmp: Ordering = if settings.random { ++ random_shuffle(a, b, settings.salt.clone()) ++ } else { ++ (settings.compare_fn)(a, b) ++ }; ++ ++ // Call "last resort compare" on any equal ++ if cmp == Ordering::Equal { ++ if settings.random || settings.stable || settings.unique { ++ cmp = Ordering::Equal + } else { +- compare_fn(a, b) ++ cmp = default_compare(a, b) + }; +- if cmp != Ordering::Equal { +- if settings.reverse { +- return cmp.reverse(); +- } else { +- return cmp; +- } +- } ++ }; ++ ++ if settings.reverse { ++ return cmp.reverse(); ++ } else { ++ return cmp; + } +- Ordering::Equal + } + ++// Test output against BSDs and GNU with their locale ++// env var set to lc_ctype=utf-8 to enjoy the exact same output. ++#[inline(always)] + fn default_compare(a: &str, b: &str) -> Ordering { + a.cmp(b) + } + +-fn get_leading_number(a: &str) -> &str { ++// This function does the initial detection of numeric lines. ++// Lines starting with a number or positive or negative sign. ++// It also strips the string of any thing that could never ++// be a number for the purposes of any type of numeric comparison. ++#[inline(always)] ++fn leading_num_common(a: &str) -> &str { + let mut s = ""; +- for c in a.chars() { +- if !c.is_numeric() && !c.eq(&'-') && !c.eq(&' ') && !c.eq(&'.') && !c.eq(&',') { +- s = a.trim().split(c).next().unwrap(); ++ for (idx, c) in a.char_indices() { ++ // check whether char is numeric, whitespace or decimal point or thousand seperator ++ if !c.is_numeric() ++ && !c.is_whitespace() ++ && !c.eq(&DECIMAL_PT) ++ && !c.eq(&THOUSANDS_SEP) ++ // check for e notation ++ && !c.eq(&'e') ++ && !c.eq(&'E') ++ // check whether first char is + or - ++ && !a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) ++ && !a.chars().nth(0).unwrap_or('\0').eq(&NEGATIVE) ++ { ++ // Strip string of non-numeric trailing chars ++ s = &a[..idx]; + break; + } +- s = a.trim(); ++ // If line is not a number line, return the line as is ++ s = a; + } +- return s; ++ s + } + +-// Matches GNU behavior, see: +-// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html +-// Specifically *not* the same as sort -n | uniq +-fn num_sort_dedup(a: &str) -> &str { +- // Empty lines are dumped +- if a.is_empty() { +- return "0"; +- // And lines that don't begin numerically are dumped +- } else if !a.trim().chars().nth(0).unwrap_or('\0').is_numeric() { +- return "0"; ++// This function cleans up the initial comparison done by leading_num_common for a numeric compare. ++// GNU sort does its numeric comparison through strnumcmp. However, we don't have or ++// may not want to use libc. Instead we emulate the GNU sort numeric compare by ignoring ++// those leading number lines GNU sort would not recognize. GNU numeric compare would ++// not recognize a positive sign or scientific/E notation so we strip those elements here. ++fn get_leading_num(a: &str) -> &str { ++ let mut s = ""; ++ let b = leading_num_common(a); ++ ++ // GNU numeric sort doesn't recognize '+' or 'e' notation so we strip ++ for (idx, c) in b.char_indices() { ++ if c.eq(&'e') || c.eq(&'E') || b.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) { ++ s = &b[..idx]; ++ break; ++ } ++ // If no further processing needed to be done, return the line as-is to be sorted ++ s = b; ++ } ++ ++ // And empty number or non-number lines are to be treated as ‘0’ but only for numeric sort ++ // All '0'-ed lines will be sorted later, but only amongst themselves, during the so-called 'last resort comparison.' ++ if s.is_empty() { ++ s = "0"; ++ }; ++ s ++} ++ ++// This function cleans up the initial comparison done by leading_num_common for a general numeric compare. ++// In contrast to numeric compare, GNU general numeric/FP sort *should* recognize positive signs and ++// scientific notation, so we strip those lines only after the end of the following numeric string. ++// For example, 5e10KFD would be 5e10 or 5x10^10 and +10000HFKJFK would become 10000. ++fn get_leading_gen(a: &str) -> String { ++ // Make this iter peekable to see if next char is numeric ++ let mut p_iter = leading_num_common(a).chars().peekable(); ++ let mut r = String::new(); ++ // Cleanup raw stripped strings ++ for c in p_iter.to_owned() { ++ let next_char_numeric = p_iter.peek().unwrap_or(&'\0').is_numeric(); ++ // Only general numeric recognizes e notation and, see block below, the '+' sign ++ if (c.eq(&'e') && !next_char_numeric) ++ || (c.eq(&'E') && !next_char_numeric) ++ { ++ r = a.split(c).next().unwrap_or("").to_owned(); ++ break; ++ // If positive sign and next char is not numeric, split at postive sign at keep trailing numbers ++ // There is a more elegant way to do this in Rust 1.45, std::str::strip_prefix ++ } else if c.eq(&POSITIVE) && !next_char_numeric { ++ let mut v: Vec<&str> = a.split(c).collect(); ++ let x = v.split_off(1); ++ r = x.join(""); ++ break; ++ // If no further processing needed to be done, return the line as-is to be sorted ++ } else { ++ r = a.to_owned(); ++ } ++ } ++ r ++} ++ ++fn get_months_dedup(a: &str) -> String { ++ let pattern = if a.trim().len().ge(&3) { ++ // Split at 3rd char and get first element of tuple ".0" ++ a.split_at(3).0 + } else { +- // Prepare lines for comparison of only the numerical leading numbers +- return get_leading_number(a); ++ "" + }; ++ ++ let month = match pattern.to_uppercase().as_ref() { ++ "JAN" => Month::January, ++ "FEB" => Month::February, ++ "MAR" => Month::March, ++ "APR" => Month::April, ++ "MAY" => Month::May, ++ "JUN" => Month::June, ++ "JUL" => Month::July, ++ "AUG" => Month::August, ++ "SEP" => Month::September, ++ "OCT" => Month::October, ++ "NOV" => Month::November, ++ "DEC" => Month::December, ++ _ => Month::Unknown, ++ }; ++ ++ if month == Month::Unknown { ++ "".to_owned() ++ } else { ++ pattern.to_uppercase() ++ } ++} ++ ++// *For all dedups/uniques we must compare leading numbers* ++// Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq" ++// See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html ++fn get_nums_dedup(a: &str) -> &str { ++ // Trim and remove any leading zeros ++ let s = a.trim().trim_start_matches('0'); ++ ++ // Get first char ++ let c = s.chars().nth(0).unwrap_or('\0'); ++ ++ // Empty lines and non-number lines are treated as the same for dedup ++ if s.is_empty() { ++ "" ++ } else if !c.eq(&NEGATIVE) && !c.is_numeric() { ++ "" ++ // Prepare lines for comparison of only the numerical leading numbers ++ } else { ++ get_leading_num(s) ++ } + } + + /// Parse the beginning string into an f64, returning -inf instead of NaN on errors. ++#[inline(always)] + fn permissive_f64_parse(a: &str) -> f64 { ++ // Remove thousands seperators ++ let a = a.replace(THOUSANDS_SEP, ""); ++ + // GNU sort treats "NaN" as non-number in numeric, so it needs special care. +- match a.parse::<f64>() { ++ // *Keep this trim before parse* despite what POSIX may say about -b and -n ++ // because GNU and BSD both seem to require it to match their behavior ++ match a.trim().parse::<f64>() { + Ok(a) if a.is_nan() => std::f64::NEG_INFINITY, + Ok(a) => a, + Err(_) => std::f64::NEG_INFINITY, + } + } + +-/// Compares two floats, with errors and non-numerics assumed to be -inf. +-/// Stops coercing at the first non-numeric char. + fn numeric_compare(a: &str, b: &str) -> Ordering { + #![allow(clippy::comparison_chain)] + +- let sa = get_leading_number(a); +- let sb = get_leading_number(b); ++ let sa = get_leading_num(a); ++ let sb = get_leading_num(b); + + let fa = permissive_f64_parse(sa); + let fb = permissive_f64_parse(sb); +@@ -534,19 +772,50 @@ fn numeric_compare(a: &str, b: &str) -> Ordering { + } + } + ++/// Compares two floats, with errors and non-numerics assumed to be -inf. ++/// Stops coercing at the first non-numeric char. ++fn general_numeric_compare(a: &str, b: &str) -> Ordering { ++ #![allow(clippy::comparison_chain)] ++ ++ let sa = get_leading_gen(a); ++ let sb = get_leading_gen(b); ++ ++ let fa = permissive_f64_parse(&sa); ++ let fb = permissive_f64_parse(&sb); ++ ++ // f64::cmp isn't implemented (due to NaN issues); implement directly instead ++ if fa > fb { ++ Ordering::Greater ++ } else if fa < fb { ++ Ordering::Less ++ } else { ++ Ordering::Equal ++ } ++} ++ ++// GNU/BSD does not handle converting numbers to an equal scale ++// properly. GNU/BSD simply recognize that there is a human scale and sorts ++// those numbers ahead of other number inputs. There are perhaps limits ++// to the type of behavior we should emulate, and this might be such a limit. ++// Properly handling these units seems like a value add to me. And when sorting ++// these types of numbers, we rarely care about pure performance. + fn human_numeric_convert(a: &str) -> f64 { +- let int_str = get_leading_number(a); +- let (_, s) = a.split_at(int_str.len()); +- let int_part = permissive_f64_parse(int_str); +- let suffix: f64 = match s.parse().unwrap_or('\0') { +- 'K' => 1000f64, ++ let num_str = get_leading_num(a); ++ let suffix = a.trim_start_matches(num_str); ++ let num_part = permissive_f64_parse(num_str); ++ let suffix: f64 = match suffix.parse().unwrap_or('\0') { ++ // SI Units ++ 'K' => 1E3, + 'M' => 1E6, + 'G' => 1E9, + 'T' => 1E12, + 'P' => 1E15, +- _ => 1f64, ++ 'E' => 1E18, ++ 'Z' => 1E21, ++ 'Y' => 1E24, ++ _ => 1f64, + }; +- int_part * suffix ++ num_part * suffix + } + + /// Compare two strings as if they are human readable sizes. +@@ -555,6 +824,7 @@ fn human_numeric_size_compare(a: &str, b: &str) -> Ordering { + #![allow(clippy::comparison_chain)] + let fa = human_numeric_convert(a); + let fb = human_numeric_convert(b); ++ + // f64::cmp isn't implemented (due to NaN issues); implement directly instead + if fa > fb { + Ordering::Greater +@@ -565,16 +835,6 @@ fn human_numeric_size_compare(a: &str, b: &str) -> Ordering { + } + } + +-fn random_shuffle(a: &str, b: &str, salt: String) -> Ordering { +- #![allow(clippy::comparison_chain)] +- let salt_slice = salt.as_str(); +- +- let da = hash(&[a, salt_slice].concat()); +- let db = hash(&[b, salt_slice].concat()); +- +- da.cmp(&db) +-} +- + fn get_rand_string() -> String { + thread_rng() + .sample_iter(&Alphanumeric) +@@ -583,12 +843,22 @@ fn get_rand_string() -> String { + .collect::<String>() + } + +-fn hash<T: Hash>(t: &T) -> u64 { +- let mut s: XxHash64 = Default::default(); ++fn get_hash<T: Hash>(t: &T) -> u64 { ++ let mut s: FnvHasher = Default::default(); + t.hash(&mut s); + s.finish() + } + ++fn random_shuffle(a: &str, b: &str, x: String) -> Ordering { ++ #![allow(clippy::comparison_chain)] ++ let salt_slice = x.as_str(); ++ ++ let da = get_hash(&[a, salt_slice].concat()); ++ let db = get_hash(&[b, salt_slice].concat()); ++ ++ da.cmp(&db) ++} ++ + #[derive(Eq, Ord, PartialEq, PartialOrd)] + enum Month { + Unknown, +@@ -608,13 +878,15 @@ enum Month { + + /// Parse the beginning string into a Month, returning Month::Unknown on errors. + fn month_parse(line: &str) -> Month { +- match line +- .split_whitespace() +- .next() +- .unwrap() +- .to_uppercase() +- .as_ref() +- { ++ // GNU splits at any 3 letter match "JUNNNN" is JUN ++ let pattern = if line.trim().len().ge(&3) { ++ // Split a 3 and get first element of tuple ".0" ++ line.split_at(3).0 ++ } else { ++ "" ++ }; ++ ++ match pattern.to_uppercase().as_ref() { + "JAN" => Month::January, + "FEB" => Month::February, + "MAR" => Month::March, +@@ -632,7 +904,16 @@ fn month_parse(line: &str) -> Month { + } + + fn month_compare(a: &str, b: &str) -> Ordering { +- month_parse(a).cmp(&month_parse(b)) ++ let ma = month_parse(a); ++ let mb = month_parse(b); ++ ++ if ma > mb { ++ Ordering::Greater ++ } else if ma < mb { ++ Ordering::Less ++ } else { ++ Ordering::Equal ++ } + } + + fn version_compare(a: &str, b: &str) -> Ordering { +@@ -650,19 +931,26 @@ fn version_compare(a: &str, b: &str) -> Ordering { + } + + fn remove_nondictionary_chars(s: &str) -> String { +- // Using 'is_ascii_whitespace()' instead of 'is_whitespace()', because it +- // uses only symbols compatible with UNIX sort (space, tab, newline). +- // 'is_whitespace()' uses more symbols as whitespace (e.g. vertical tab). ++ // According to GNU, dictionary chars are those of ASCII ++ // and a blank is a space or a tab ++ s.chars() ++ .filter(|c| c.is_ascii_alphanumeric() || c.is_ascii_whitespace()) ++ .collect::<String>() ++} ++ ++fn remove_nonprinting_chars(s: &str) -> String { ++ // However, GNU says nonprinting chars are more permissive. ++ // All of ASCII except control chars ie, escape, newline + s.chars() +- .filter(|c| c.is_alphanumeric() || c.is_ascii_whitespace()) ++ .filter(|c| c.is_ascii() && !c.is_ascii_control()) + .collect::<String>() + } + +-fn print_sorted<S, T: Iterator<Item = S>>(iter: T, outfile: &Option<String>) ++fn print_sorted<S, T: Iterator<Item = S>>(iter: T, settings: &Settings) + where + S: std::fmt::Display, + { +- let mut file: Box<dyn Write> = match *outfile { ++ let mut file: Box<dyn Write> = match settings.outfile { + Some(ref filename) => match File::create(Path::new(&filename)) { + Ok(f) => Box::new(BufWriter::new(f)) as Box<dyn Write>, + Err(e) => { +@@ -673,9 +961,16 @@ where + None => Box::new(stdout()) as Box<dyn Write>, + }; + +- for line in iter { +- let str = format!("{}\n", line); +- crash_if_err!(1, file.write_all(str.as_bytes())) ++ if settings.zero_terminated { ++ for line in iter { ++ let str = format!("{}\0", line); ++ crash_if_err!(1, file.write_all(str.as_bytes())); ++ } ++ } else { ++ for line in iter { ++ let str = format!("{}\n", line); ++ crash_if_err!(1, file.write_all(str.as_bytes())); ++ } + } + } + +@@ -700,6 +995,22 @@ mod tests { + + use super::*; + ++ #[test] ++ fn test_get_hash() { ++ let a = "Ted".to_string(); ++ ++ assert_eq!(2646829031758483623, get_hash(&a)); ++ } ++ ++ #[test] ++ fn test_random_shuffle() { ++ let a = "Ted"; ++ let b = "Ted"; ++ let c = get_rand_string(); ++ ++ assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); ++ } ++ + #[test] + fn test_default_compare() { + let a = "your own"; +@@ -746,13 +1057,4 @@ mod tests { + + assert_eq!(Ordering::Less, version_compare(a, b)); + } +- +- #[test] +- fn test_random_compare() { +- let a = "9"; +- let b = "9"; +- let c = get_rand_string(); +- +- assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); +- } + } +diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs +index 2bac71d..43aaf1d 100644 +--- a/tests/by-util/test_sort.rs ++++ b/tests/by-util/test_sort.rs +@@ -1,58 +1,228 @@ + use crate::common::util::*; + ++#[test] ++fn test_check_zero_terminated_failure() { ++ new_ucmd!() ++ .arg("-z") ++ .arg("-c") ++ .arg("zero-terminated.txt") ++ .fails() ++ .stdout_is("sort: disorder in line 0\n"); ++} ++ ++#[test] ++fn test_check_zero_terminated_success() { ++ new_ucmd!() ++ .arg("-z") ++ .arg("-c") ++ .arg("zero-terminated.expected") ++ .succeeds(); ++} ++ ++#[test] ++fn test_random_shuffle_len() { ++ // check whether output is the same length as the input ++ const FILE: &'static str = "default_unsorted_ints.expected"; ++ let (at, _ucmd) = at_and_ucmd!(); ++ let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; ++ let expected = at.read(FILE); ++ ++ assert_ne!(result, expected); ++ assert_eq!(result.len(), expected.len()); ++} ++ ++#[test] ++fn test_random_shuffle_contains_all_lines() { ++ // check whether lines of input are all in output ++ const FILE: &'static str = "default_unsorted_ints.expected"; ++ let (at, _ucmd) = at_and_ucmd!(); ++ let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; ++ let expected = at.read(FILE); ++ let result_sorted = new_ucmd!().pipe_in(result.clone()).run().stdout; ++ ++ assert_ne!(result, expected); ++ assert_eq!(result_sorted, expected); ++} ++ ++#[test] ++fn test_random_shuffle_contains_two_runs_not_the_same() { ++ // check to verify that two random shuffles are not equal; this has the ++ // potential to fail in the unlikely event that random order is the same ++ // as the starting order, or if both random sorts end up having the same order. ++ const FILE: &'static str = "default_unsorted_ints.expected"; ++ let (at, _ucmd) = at_and_ucmd!(); ++ let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; ++ let expected = at.read(FILE); ++ let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout; ++ ++ assert_ne!(result, expected); ++ assert_ne!(result, unexpected); ++} ++ + #[test] + fn test_numeric_floats_and_ints() { +- for numeric_sort_param in vec!["-n", "--numeric-sort"] { +- let input = "1.444\n8.013\n1\n-8\n1.04\n-1"; ++ test_helper("numeric_floats_and_ints", "-n"); ++} ++ ++#[test] ++fn test_numeric_floats() { ++ test_helper("numeric_floats", "-n"); ++} ++ ++#[test] ++fn test_numeric_floats_with_nan() { ++ test_helper("numeric_floats_with_nan", "-n"); ++} ++ ++#[test] ++fn test_numeric_unfixed_floats() { ++ test_helper("numeric_unfixed_floats", "-n"); ++} ++ ++#[test] ++fn test_numeric_fixed_floats() { ++ test_helper("numeric_fixed_floats", "-n"); ++} ++ ++#[test] ++fn test_numeric_unsorted_ints() { ++ test_helper("numeric_unsorted_ints", "-n"); ++} ++ ++#[test] ++fn test_human_block_sizes() { ++ test_helper("human_block_sizes", "-h"); ++} ++ ++#[test] ++fn test_month_default() { ++ test_helper("month_default", "-M"); ++} ++ ++#[test] ++fn test_month_stable() { ++ test_helper("month_stable", "-Ms"); ++} ++ ++#[test] ++fn test_default_unsorted_ints() { ++ test_helper("default_unsorted_ints", ""); ++} ++ ++#[test] ++fn test_numeric_unique_ints() { ++ test_helper("numeric_unsorted_ints_unique", "-nu"); ++} ++ ++#[test] ++fn test_version() { ++ test_helper("version", "-V"); ++} ++ ++#[test] ++fn test_ignore_case() { ++ test_helper("ignore_case", "-f"); ++} ++ ++#[test] ++fn test_dictionary_order() { ++ test_helper("dictionary_order", "-d"); ++} ++ ++#[test] ++fn test_dictionary_order2() { ++ for non_dictionary_order2_param in vec!["-d"] { + new_ucmd!() +- .arg(numeric_sort_param) +- .pipe_in(input) ++ .pipe_in("a👦ðŸ»aa b\naaaa b") ++ .arg(non_dictionary_order2_param) + .succeeds() +- .stdout_only("-8\n-1\n1\n1.04\n1.444\n8.013\n"); ++ .stdout_only("a👦ðŸ»aa b\naaaa b\n"); + } + } + + #[test] +-fn test_numeric_floats() { +- for numeric_sort_param in vec!["-n", "--numeric-sort"] { +- let input = "1.444\n8.013\n1.58590\n-8.90880\n1.040000000\n-.05"; ++fn test_non_printing_chars() { ++ for non_printing_chars_param in vec!["-i"] { + new_ucmd!() +- .arg(numeric_sort_param) +- .pipe_in(input) ++ .pipe_in("a👦ðŸ»aa b\naaaa b") ++ .arg(non_printing_chars_param) + .succeeds() +- .stdout_only("-8.90880\n-.05\n1.040000000\n1.444\n1.58590\n8.013\n"); ++ .stdout_only("aaaa b\na👦ðŸ»aa b\n"); + } + } + + #[test] +-fn test_numeric_floats_with_nan() { +- for numeric_sort_param in vec!["-n", "--numeric-sort"] { +- let input = "1.444\n1.0/0.0\n1.58590\n-8.90880\n1.040000000\n-.05"; ++fn test_exponents_positive_general_fixed() { ++ for exponents_positive_general_param in vec!["-g"] { + new_ucmd!() +- .arg(numeric_sort_param) +- .pipe_in(input) ++ .pipe_in("100E6\n\n50e10\n+100000\n\n10000K78\n10E\n\n\n1000EDKLD\n\n\n100E6\n\n50e10\n+100000\n\n") ++ .arg(exponents_positive_general_param) + .succeeds() +- .stdout_only("-8.90880\n-.05\n1.0/0.0\n1.040000000\n1.444\n1.58590\n"); ++ .stdout_only("\n\n\n\n\n\n\n\n10000K78\n1000EDKLD\n10E\n+100000\n+100000\n100E6\n100E6\n50e10\n50e10\n"); + } + } + + #[test] +-fn test_numeric_unfixed_floats() { +- test_helper("numeric_fixed_floats", "-n"); ++fn test_exponents_positive_numeric() { ++ test_helper("exponents-positive-numeric", "-n"); + } + + #[test] +-fn test_numeric_fixed_floats() { +- test_helper("numeric_fixed_floats", "-n"); ++fn test_months_dedup() { ++ test_helper("months-dedup", "-Mu"); + } + + #[test] +-fn test_numeric_unsorted_ints() { +- test_helper("numeric_unsorted_ints", "-n"); ++fn test_mixed_floats_ints_chars_numeric() { ++ test_helper("mixed_floats_ints_chars_numeric", "-n"); + } + + #[test] +-fn test_human_block_sizes() { ++fn test_mixed_floats_ints_chars_numeric_unique() { ++ test_helper("mixed_floats_ints_chars_numeric_unique", "-nu"); ++} ++ ++#[test] ++fn test_mixed_floats_ints_chars_numeric_reverse() { ++ test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur"); ++} ++ ++#[test] ++fn test_mixed_floats_ints_chars_numeric_stable() { ++ test_helper("mixed_floats_ints_chars_numeric_stable", "-ns"); ++} ++ ++#[test] ++fn test_numeric_floats_and_ints2() { ++ for numeric_sort_param in vec!["-n", "--numeric-sort"] { ++ let input = "1.444\n8.013\n1\n-8\n1.04\n-1"; ++ new_ucmd!() ++ .arg(numeric_sort_param) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("-8\n-1\n1\n1.04\n1.444\n8.013\n"); ++ } ++} ++ ++#[test] ++fn test_numeric_floats2() { ++ for numeric_sort_param in vec!["-n", "--numeric-sort"] { ++ let input = "1.444\n8.013\n1.58590\n-8.90880\n1.040000000\n-.05"; ++ new_ucmd!() ++ .arg(numeric_sort_param) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("-8.90880\n-.05\n1.040000000\n1.444\n1.58590\n8.013\n"); ++ } ++} ++ ++#[test] ++fn test_numeric_floats_with_nan2() { ++ test_helper("numeric-floats-with-nan2", "-n"); ++} ++ ++#[test] ++fn test_human_block_sizes2() { + for human_numeric_sort_param in vec!["-h", "--human-numeric-sort"] { + let input = "8981K\n909991M\n-8T\n21G\n0.8M"; + new_ucmd!() +@@ -64,7 +234,7 @@ fn test_human_block_sizes() { + } + + #[test] +-fn test_month_default() { ++fn test_month_default2() { + for month_sort_param in vec!["-M", "--month-sort"] { + let input = "JAn\nMAY\n000may\nJun\nFeb"; + new_ucmd!() +@@ -76,12 +246,7 @@ fn test_month_default() { + } + + #[test] +-fn test_month_stable() { +- test_helper("month_stable", "-Ms"); +-} +- +-#[test] +-fn test_default_unsorted_ints() { ++fn test_default_unsorted_ints2() { + let input = "9\n1909888\n000\n1\n2"; + new_ucmd!() + .pipe_in(input) +@@ -90,7 +255,7 @@ fn test_default_unsorted_ints() { + } + + #[test] +-fn test_numeric_unique_ints() { ++fn test_numeric_unique_ints2() { + for numeric_unique_sort_param in vec!["-nu"] { + let input = "9\n9\n8\n1\n"; + new_ucmd!() +@@ -102,18 +267,8 @@ fn test_numeric_unique_ints() { + } + + #[test] +-fn test_version() { +- test_helper("version", "-V"); +-} +- +-#[test] +-fn test_ignore_case() { +- test_helper("ignore_case", "-f"); +-} +- +-#[test] +-fn test_dictionary_order() { +- test_helper("dictionary_order", "-d"); ++fn test_zero_terminated() { ++ test_helper("zero-terminated", "-z"); + } + + #[test] +@@ -192,6 +347,15 @@ fn test_check() { + .stdout_is(""); + } + ++#[test] ++fn test_check_silent() { ++ new_ucmd!() ++ .arg("-C") ++ .arg("check_fail.txt") ++ .fails() ++ .stdout_is(""); ++} ++ + fn test_helper(file_name: &str, args: &str) { + new_ucmd!() + .arg(args) +diff --git a/tests/fixtures/sort/exponents-positive-general.expected b/tests/fixtures/sort/exponents-positive-general.expected +new file mode 100644 +index 0000000..3dbc92f +--- /dev/null ++++ b/tests/fixtures/sort/exponents-positive-general.expected +@@ -0,0 +1,12 @@ ++ ++ ++ ++ ++ ++ ++10E ++1000EDKLD ++10000K78 +++100000 ++100E6 ++50e10 +diff --git a/tests/fixtures/sort/exponents-positive-general.txt b/tests/fixtures/sort/exponents-positive-general.txt +new file mode 100644 +index 0000000..23ea527 +--- /dev/null ++++ b/tests/fixtures/sort/exponents-positive-general.txt +@@ -0,0 +1,12 @@ ++10000K78 ++10E ++ ++ ++1000EDKLD ++ ++ ++100E6 ++ ++50e10 +++100000 ++ +diff --git a/tests/fixtures/sort/exponents-positive-numeric.expected b/tests/fixtures/sort/exponents-positive-numeric.expected +new file mode 100644 +index 0000000..174088f +--- /dev/null ++++ b/tests/fixtures/sort/exponents-positive-numeric.expected +@@ -0,0 +1,12 @@ ++ ++ ++ ++ ++ ++ +++100000 ++10E ++50e10 ++100E6 ++1000EDKLD ++10000K78 +diff --git a/tests/fixtures/sort/exponents-positive-numeric.txt b/tests/fixtures/sort/exponents-positive-numeric.txt +new file mode 100644 +index 0000000..23ea527 +--- /dev/null ++++ b/tests/fixtures/sort/exponents-positive-numeric.txt +@@ -0,0 +1,12 @@ ++10000K78 ++10E ++ ++ ++1000EDKLD ++ ++ ++100E6 ++ ++50e10 +++100000 ++ +diff --git a/tests/fixtures/sort/human-mixed-inputs-reverse.expected b/tests/fixtures/sort/human-mixed-inputs-reverse.expected +new file mode 100644 +index 0000000..37e1762 +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs-reverse.expected +@@ -0,0 +1,37 @@ ++.2T ++2G ++100M ++7800900K ++51887300- ++1890777 ++56908-90078 ++6780.0009866 ++6780.000986 ++789----009999 90-0 90-0 ++1 ++0001 ++apr ++MAY ++JUNNNN ++JAN ++AUG ++APR ++0000000 ++00 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++-1.4 +diff --git a/tests/fixtures/sort/human-mixed-inputs-reverse.txt b/tests/fixtures/sort/human-mixed-inputs-reverse.txt +new file mode 100644 +index 0000000..d8b5124 +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs-reverse.txt +@@ -0,0 +1,37 @@ ++JAN ++ ++0000000 ++ ++00 ++ ++0001 ++ ++1 ++ ++-1.4 ++ ++JUNNNN ++AUG ++ ++apr ++ ++APR ++ ++ ++MAY ++1890777 ++ ++56908-90078 ++ ++51887300- ++ ++6780.0009866 ++ ++789----009999 90-0 90-0 ++ ++6780.000986 ++ ++100M ++7800900K ++2G ++.2T +diff --git a/tests/fixtures/sort/human-mixed-inputs-stable.expected b/tests/fixtures/sort/human-mixed-inputs-stable.expected +new file mode 100644 +index 0000000..0fcdcdb +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs-stable.expected +@@ -0,0 +1,37 @@ ++-1.4 ++JAN ++ ++0000000 ++ ++00 ++ ++ ++ ++ ++JUNNNN ++AUG ++ ++apr ++ ++APR ++ ++ ++MAY ++ ++ ++ ++ ++ ++ ++0001 ++1 ++789----009999 90-0 90-0 ++6780.000986 ++6780.0009866 ++56908-90078 ++1890777 ++51887300- ++7800900K ++100M ++2G ++.2T +diff --git a/tests/fixtures/sort/human-mixed-inputs-stable.txt b/tests/fixtures/sort/human-mixed-inputs-stable.txt +new file mode 100644 +index 0000000..d8b5124 +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs-stable.txt +@@ -0,0 +1,37 @@ ++JAN ++ ++0000000 ++ ++00 ++ ++0001 ++ ++1 ++ ++-1.4 ++ ++JUNNNN ++AUG ++ ++apr ++ ++APR ++ ++ ++MAY ++1890777 ++ ++56908-90078 ++ ++51887300- ++ ++6780.0009866 ++ ++789----009999 90-0 90-0 ++ ++6780.000986 ++ ++100M ++7800900K ++2G ++.2T +diff --git a/tests/fixtures/sort/human-mixed-inputs-unique.expected b/tests/fixtures/sort/human-mixed-inputs-unique.expected +new file mode 100644 +index 0000000..50f53b6 +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs-unique.expected +@@ -0,0 +1,13 @@ ++-1.4 ++JAN ++0001 ++789----009999 90-0 90-0 ++6780.000986 ++6780.0009866 ++56908-90078 ++1890777 ++51887300- ++7800900K ++100M ++2G ++.2T +diff --git a/tests/fixtures/sort/human-mixed-inputs-unique.txt b/tests/fixtures/sort/human-mixed-inputs-unique.txt +new file mode 100644 +index 0000000..d8b5124 +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs-unique.txt +@@ -0,0 +1,37 @@ ++JAN ++ ++0000000 ++ ++00 ++ ++0001 ++ ++1 ++ ++-1.4 ++ ++JUNNNN ++AUG ++ ++apr ++ ++APR ++ ++ ++MAY ++1890777 ++ ++56908-90078 ++ ++51887300- ++ ++6780.0009866 ++ ++789----009999 90-0 90-0 ++ ++6780.000986 ++ ++100M ++7800900K ++2G ++.2T +diff --git a/tests/fixtures/sort/human-mixed-inputs.expected b/tests/fixtures/sort/human-mixed-inputs.expected +new file mode 100644 +index 0000000..1f900a8 +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs.expected +@@ -0,0 +1,37 @@ ++-1.4 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++00 ++0000000 ++APR ++AUG ++JAN ++JUNNNN ++MAY ++apr ++0001 ++1 ++789----009999 90-0 90-0 ++6780.000986 ++6780.0009866 ++56908-90078 ++1890777 ++51887300- ++7800900K ++100M ++2G ++.2T +diff --git a/tests/fixtures/sort/human-mixed-inputs.txt b/tests/fixtures/sort/human-mixed-inputs.txt +new file mode 100644 +index 0000000..cfd8a9d +--- /dev/null ++++ b/tests/fixtures/sort/human-mixed-inputs.txt +@@ -0,0 +1,46 @@ ++JAN ++ ++0000000 ++ ++00 ++ ++0001 ++ ++1 ++ ++-1.4 ++ ++JUNNNN ++AUG ++ ++apr ++ ++APR ++ ++ ++MAY ++1890777 ++ ++56908-90078 ++ ++51887300- ++ ++6780.0009866 ++ ++789----009999 90-0 90-0 ++ ++6780.000986 ++ ++1M ++10M ++100M ++1000M ++10000M ++ ++7800900K ++780090K ++78009K ++7800K ++780K ++2G ++.2T +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected +new file mode 100644 +index 0000000..a781a36 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected +@@ -0,0 +1,30 @@ ++-2028789030 ++-896689 ++-8.90880 ++-1 ++-.05 ++ ++ ++ ++ ++ ++ ++ ++ ++000 ++CARAvan ++00000001 ++1 ++1.040000000 ++1.444 ++1.58590 ++8.013 ++45 ++46.89 ++ 4567. ++ 37800 ++576,446.88800000 ++576,446.890 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt +new file mode 100644 +index 0000000..a5813ea +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt +@@ -0,0 +1,30 @@ ++576,446.890 ++576,446.88800000 ++ ++ ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected +new file mode 100644 +index 0000000..6b02421 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected +@@ -0,0 +1,30 @@ ++4798908.8909800 ++4798908.45 ++4798908.340000000000 ++576,446.890 ++576,446.88800000 ++ 37800 ++ 4567. ++46.89 ++45 ++8.013 ++1.58590 ++1.444 ++1.040000000 ++1 ++00000001 ++CARAvan ++000 ++ ++ ++ ++ ++ ++ ++ ++ ++-.05 ++-1 ++-8.90880 ++-896689 ++-2028789030 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected +new file mode 100644 +index 0000000..cb1028f +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected +@@ -0,0 +1,30 @@ ++4798908.8909800 ++4798908.45 ++4798908.340000000000 ++576,446.890 ++576,446.88800000 ++ 37800 ++ 4567. ++46.89 ++45 ++8.013 ++1.58590 ++1.444 ++1.040000000 ++1 ++00000001 ++ ++ ++ ++ ++ ++CARAvan ++ ++ ++ ++000 ++-.05 ++-1 ++-8.90880 ++-896689 ++-2028789030 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt +new file mode 100644 +index 0000000..a5813ea +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt +@@ -0,0 +1,30 @@ ++576,446.890 ++576,446.88800000 ++ ++ ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected +new file mode 100644 +index 0000000..63a3e64 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected +@@ -0,0 +1,30 @@ ++-2028789030 ++-896689 ++-8.90880 ++-1 ++-.05 ++ ++ ++ ++ ++ ++CARAvan ++ ++ ++ ++000 ++1 ++00000001 ++1.040000000 ++1.444 ++1.58590 ++8.013 ++45 ++46.89 ++ 4567. ++ 37800 ++576,446.88800000 ++576,446.890 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt +new file mode 100644 +index 0000000..a5813ea +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt +@@ -0,0 +1,30 @@ ++576,446.890 ++576,446.88800000 ++ ++ ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected +new file mode 100644 +index 0000000..cb27c66 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected +@@ -0,0 +1,20 @@ ++-2028789030 ++-896689 ++-8.90880 ++-1 ++-.05 ++ ++1 ++1.040000000 ++1.444 ++1.58590 ++8.013 ++45 ++46.89 ++ 4567. ++ 37800 ++576,446.88800000 ++576,446.890 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt +new file mode 100644 +index 0000000..a5813ea +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt +@@ -0,0 +1,30 @@ ++576,446.890 ++576,446.88800000 ++ ++ ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected +new file mode 100644 +index 0000000..bbce169 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected +@@ -0,0 +1,20 @@ ++4798908.8909800 ++4798908.45 ++4798908.340000000000 ++576,446.890 ++576,446.88800000 ++ 37800 ++ 4567. ++46.89 ++45 ++8.013 ++1.58590 ++1.444 ++1.040000000 ++1 ++ ++-.05 ++-1 ++-8.90880 ++-896689 ++-2028789030 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt +new file mode 100644 +index 0000000..a5813ea +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt +@@ -0,0 +1,30 @@ ++576,446.890 ++576,446.88800000 ++ ++ ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected +new file mode 100644 +index 0000000..bbce169 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected +@@ -0,0 +1,20 @@ ++4798908.8909800 ++4798908.45 ++4798908.340000000000 ++576,446.890 ++576,446.88800000 ++ 37800 ++ 4567. ++46.89 ++45 ++8.013 ++1.58590 ++1.444 ++1.040000000 ++1 ++ ++-.05 ++-1 ++-8.90880 ++-896689 ++-2028789030 +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt +new file mode 100644 +index 0000000..a5813ea +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt +@@ -0,0 +1,30 @@ ++576,446.890 ++576,446.88800000 ++ ++ ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 +diff --git a/tests/fixtures/sort/months-dedup.expected b/tests/fixtures/sort/months-dedup.expected +new file mode 100644 +index 0000000..a2821f2 +--- /dev/null ++++ b/tests/fixtures/sort/months-dedup.expected +@@ -0,0 +1,6 @@ ++ ++JAN ++apr ++MAY ++JUNNNN ++AUG +diff --git a/tests/fixtures/sort/months-dedup.txt b/tests/fixtures/sort/months-dedup.txt +new file mode 100644 +index 0000000..d8b5124 +--- /dev/null ++++ b/tests/fixtures/sort/months-dedup.txt +@@ -0,0 +1,37 @@ ++JAN ++ ++0000000 ++ ++00 ++ ++0001 ++ ++1 ++ ++-1.4 ++ ++JUNNNN ++AUG ++ ++apr ++ ++APR ++ ++ ++MAY ++1890777 ++ ++56908-90078 ++ ++51887300- ++ ++6780.0009866 ++ ++789----009999 90-0 90-0 ++ ++6780.000986 ++ ++100M ++7800900K ++2G ++.2T +diff --git a/tests/fixtures/sort/numeric-floats-with-nan2.expected b/tests/fixtures/sort/numeric-floats-with-nan2.expected +new file mode 100644 +index 0000000..51c9985 +--- /dev/null ++++ b/tests/fixtures/sort/numeric-floats-with-nan2.expected +@@ -0,0 +1,23 @@ ++-8.90880 ++-.05 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++Karma ++1 ++1.0/0.0 ++1.040000000 ++1.2 ++1.444 ++1.58590 +diff --git a/tests/fixtures/sort/numeric-floats-with-nan2.txt b/tests/fixtures/sort/numeric-floats-with-nan2.txt +new file mode 100644 +index 0000000..9b78741 +--- /dev/null ++++ b/tests/fixtures/sort/numeric-floats-with-nan2.txt +@@ -0,0 +1,23 @@ ++Karma ++ ++1.0/0.0 ++ ++ ++-8.90880 ++ ++ ++-.05 ++ ++ ++1.040000000 ++ ++1.444 ++ ++ ++1.58590 ++ ++ ++1 ++ ++1.2 ++ +diff --git a/tests/fixtures/sort/zero-terminated.expected b/tests/fixtures/sort/zero-terminated.expected +new file mode 100644 +index 0000000..4e53b30 +--- /dev/null ++++ b/tests/fixtures/sort/zero-terminated.expected +@@ -0,0 +1 @@ ++../..�../../by-util�../../common�../../fixtures�../../fixtures/cat�../../fixtures/cksum�../../fixtures/comm�../../fixtures/cp�../../fixtures/cp/dir_with_mount�../../fixtures/cp/dir_with_mount/copy_me�../../fixtures/cp/hello_dir�../../fixtures/cp/hello_dir_with_file�../../fixtures/csplit�../../fixtures/cut�../../fixtures/cut/sequences�../../fixtures/dircolors�../../fixtures/du�../../fixtures/du/subdir�../../fixtures/du/subdir/deeper�../../fixtures/du/subdir/links�../../fixtures/env�../../fixtures/expand�../../fixtures/fmt�../../fixtures/fold�../../fixtures/hashsum�../../fixtures/head�../../fixtures/join�../../fixtures/mv�../../fixtures/nl�../../fixtures/numfmt�../../fixtures/od�../../fixtures/paste�../../fixtures/ptx�../../fixtures/shuf�../../fixtures/sort�../../fixtures/sum�../../fixtures/tac�../../fixtures/tail�../../fixtures/tsort�../../fixtures/unexpand�../../fixtures/uniq�../../fixtures/wc� +\ No newline at end of file +diff --git a/tests/fixtures/sort/zero-terminated.txt b/tests/fixtures/sort/zero-terminated.txt +new file mode 100644 +index 0000000..5c547c8 +--- /dev/null ++++ b/tests/fixtures/sort/zero-terminated.txt +@@ -0,0 +1 @@ ++../../fixtures/paste�../../fixtures/du�../../fixtures/fold�../../fixtures�../../fixtures/cp/dir_with_mount/copy_me�../../fixtures/sum�../../fixtures/expand�../../fixtures/mv�../../fixtures/shuf�../../fixtures/od�../../fixtures/env�../../fixtures/cut�../../fixtures/cp/hello_dir�../../fixtures/hashsum�../../common�../../fixtures/du/subdir/links�../../fixtures/dircolors�../../fixtures/nl�../../fixtures/wc�../../fixtures/cut/sequences�../../fixtures/numfmt�../../fixtures/comm�../../fixtures/du/subdir�../../fixtures/cp/hello_dir_with_file�../../fixtures/ptx�../../fixtures/cp/dir_with_mount�../../fixtures/cat�../../fixtures/cp�../..�../../fixtures/tail�../../fixtures/du/subdir/deeper�../../fixtures/head�../../fixtures/join�../../by-util�../../fixtures/csplit�../../fixtures/cksum�../../fixtures/fmt�../../fixtures/tsort�../../fixtures/tac�../../fixtures/unexpand�../../fixtures/uniq�../../fixtures/sort� +\ No newline at end of file diff --git a/debian/patches/Sort-Various-fixes-and-performance-improvements.patch b/debian/patches/Sort-Various-fixes-and-performance-improvements.patch new file mode 100644 index 0000000..6023dbe --- /dev/null +++ b/debian/patches/Sort-Various-fixes-and-performance-improvements.patch @@ -0,0 +1,502 @@ +From: electricboogie <32370782+electricboogie@users.noreply.github.com> +Date: Sat, 10 Apr 2021 04:56:20 -0500 +Subject: Sort: Various fixes and performance improvements (#2057) + +* Various fixes and performance improvements + +* fix a typo + +Co-authored-by: Michael Debertol <michael.debertol@gmail.com> + +Co-authored-by: Sylvestre Ledru <sledru@mozilla.com> +Co-authored-by: Michael Debertol <michael.debertol@gmail.com> + +Origin: upstream, https://github.com/uutils/coreutils/commit/e5113ad00ef76032cc15b0052877c23a48783d4a +--- + src/uu/sort/src/sort.rs | 114 ++++++++++++++------- + tests/.DS_Store | Bin 0 -> 6160 bytes + tests/by-util/test_sort.rs | 49 +++++++-- + tests/fixtures/.DS_Store | 1 + + ...ts_chars_numeric_unique_reverse_stable.expected | 20 ++++ + tests/fixtures/sort/multiple_decimals.expected | 33 ++++++ + tests/fixtures/sort/multiple_decimals_general.txt | 35 +++++++ + tests/fixtures/sort/multiple_decimals_numeric.txt | 35 +++++++ + 8 files changed, 243 insertions(+), 44 deletions(-) + create mode 100644 tests/.DS_Store + create mode 100644 tests/fixtures/.DS_Store + create mode 100644 tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse_stable.expected + create mode 100644 tests/fixtures/sort/multiple_decimals.expected + create mode 100644 tests/fixtures/sort/multiple_decimals_general.txt + create mode 100644 tests/fixtures/sort/multiple_decimals_numeric.txt + +diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs +index cf6c258..88f7bc7 100644 +--- a/src/uu/sort/src/sort.rs ++++ b/src/uu/sort/src/sort.rs +@@ -22,6 +22,7 @@ use rand::distributions::Alphanumeric; + use rand::{thread_rng, Rng}; + use rayon::prelude::*; + use semver::Version; ++use std::borrow::Cow; + use std::cmp::Ordering; + use std::collections::BinaryHeap; + use std::env; +@@ -262,7 +263,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + Arg::with_name(OPT_CHECK_SILENT) + .short("C") + .long(OPT_CHECK_SILENT) +- .help("exit successfully if the given file is already sorted, and exit with status 1 otherwise. "), ++ .help("exit successfully if the given file is already sorted, and exit with status 1 otherwise."), + ) + .arg( + Arg::with_name(OPT_IGNORE_CASE) +@@ -353,7 +354,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + if let Ok(n) = line { + files.push( + std::str::from_utf8(&n) +- .expect("Could not parse zero terminated string from input.") ++ .expect("Could not parse string from zero terminated input.") + .to_string(), + ); + } +@@ -488,6 +489,8 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { + } else { + print_sorted(file_merger, &settings) + } ++ } else if settings.mode == SortMode::Default && settings.unique { ++ print_sorted(lines.iter().dedup(), &settings) + } else if settings.mode == SortMode::Month && settings.unique { + print_sorted( + lines +@@ -499,7 +502,7 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { + print_sorted( + lines + .iter() +- .dedup_by(|a, b| get_nums_dedup(a) == get_nums_dedup(b)), ++ .dedup_by(|a, b| get_num_dedup(a, &settings) == get_num_dedup(b, &settings)), + &settings, + ) + } else { +@@ -603,12 +606,13 @@ fn default_compare(a: &str, b: &str) -> Ordering { + #[inline(always)] + fn leading_num_common(a: &str) -> &str { + let mut s = ""; ++ ++ // check whether char is numeric, whitespace or decimal point or thousand separator + for (idx, c) in a.char_indices() { +- // check whether char is numeric, whitespace or decimal point or thousand seperator + if !c.is_numeric() + && !c.is_whitespace() +- && !c.eq(&DECIMAL_PT) + && !c.eq(&THOUSANDS_SEP) ++ && !c.eq(&DECIMAL_PT) + // check for e notation + && !c.eq(&'e') + && !c.eq(&'E') +@@ -621,7 +625,7 @@ fn leading_num_common(a: &str) -> &str { + break; + } + // If line is not a number line, return the line as is +- s = a; ++ s = &a; + } + s + } +@@ -633,16 +637,17 @@ fn leading_num_common(a: &str) -> &str { + // not recognize a positive sign or scientific/E notation so we strip those elements here. + fn get_leading_num(a: &str) -> &str { + let mut s = ""; +- let b = leading_num_common(a); + +- // GNU numeric sort doesn't recognize '+' or 'e' notation so we strip +- for (idx, c) in b.char_indices() { +- if c.eq(&'e') || c.eq(&'E') || b.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) { +- s = &b[..idx]; ++ let a = leading_num_common(a); ++ ++ // GNU numeric sort doesn't recognize '+' or 'e' notation so we strip trailing chars ++ for (idx, c) in a.char_indices() { ++ if c.eq(&'e') || c.eq(&'E') || a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) { ++ s = &a[..idx]; + break; + } + // If no further processing needed to be done, return the line as-is to be sorted +- s = b; ++ s = &a; + } + + // And empty number or non-number lines are to be treated as ‘0’ but only for numeric sort +@@ -657,30 +662,32 @@ fn get_leading_num(a: &str) -> &str { + // In contrast to numeric compare, GNU general numeric/FP sort *should* recognize positive signs and + // scientific notation, so we strip those lines only after the end of the following numeric string. + // For example, 5e10KFD would be 5e10 or 5x10^10 and +10000HFKJFK would become 10000. +-fn get_leading_gen(a: &str) -> String { ++fn get_leading_gen(a: &str) -> &str { + // Make this iter peekable to see if next char is numeric +- let mut p_iter = leading_num_common(a).chars().peekable(); +- let mut r = String::new(); ++ let raw_leading_num = leading_num_common(a); ++ let mut p_iter = raw_leading_num.chars().peekable(); ++ let mut result = ""; + // Cleanup raw stripped strings + for c in p_iter.to_owned() { + let next_char_numeric = p_iter.peek().unwrap_or(&'\0').is_numeric(); +- // Only general numeric recognizes e notation and, see block below, the '+' sign +- if (c.eq(&'e') && !next_char_numeric) || (c.eq(&'E') && !next_char_numeric) { +- r = a.split(c).next().unwrap_or("").to_owned(); ++ // Only general numeric recognizes e notation and the '+' sign ++ if (c.eq(&'e') && !next_char_numeric) ++ || (c.eq(&'E') && !next_char_numeric) ++ // Only GNU (non-general) numeric recognize thousands seperators, takes only leading # ++ || c.eq(&THOUSANDS_SEP) ++ { ++ result = a.split(c).next().unwrap_or(""); + break; + // If positive sign and next char is not numeric, split at postive sign at keep trailing numbers + // There is a more elegant way to do this in Rust 1.45, std::str::strip_prefix + } else if c.eq(&POSITIVE) && !next_char_numeric { +- let mut v: Vec<&str> = a.split(c).collect(); +- let x = v.split_off(1); +- r = x.join(""); ++ result = a.trim().trim_start_matches('+'); + break; +- // If no further processing needed to be done, return the line as-is to be sorted +- } else { +- r = a.to_owned(); + } ++ // If no further processing needed to be done, return the line as-is to be sorted ++ result = a; + } +- r ++ result + } + + fn get_months_dedup(a: &str) -> String { +@@ -714,10 +721,10 @@ fn get_months_dedup(a: &str) -> String { + } + } + +-// *For all dedups/uniques we must compare leading numbers* ++// *For all dedups/uniques expect default we must compare leading numbers* + // Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq" + // See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html +-fn get_nums_dedup(a: &str) -> &str { ++fn get_num_dedup<'a>(a: &'a str, settings: &&mut Settings) -> &'a str { + // Trim and remove any leading zeros + let s = a.trim().trim_start_matches('0'); + +@@ -731,20 +738,50 @@ fn get_nums_dedup(a: &str) -> &str { + "" + // Prepare lines for comparison of only the numerical leading numbers + } else { +- get_leading_num(s) ++ let result = match settings.mode { ++ SortMode::Numeric => get_leading_num(s), ++ SortMode::GeneralNumeric => get_leading_gen(s), ++ SortMode::HumanNumeric => get_leading_num(s), ++ SortMode::Version => get_leading_num(s), ++ _ => s, ++ }; ++ result ++ } ++} ++ ++#[inline(always)] ++fn remove_thousands_sep<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> { ++ let input = input.into(); ++ if input.contains(THOUSANDS_SEP) { ++ let output = input.replace(THOUSANDS_SEP, ""); ++ Cow::Owned(output) ++ } else { ++ input ++ } ++} ++ ++#[inline(always)] ++fn remove_trailing_dec<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> { ++ let input = input.into(); ++ if let Some(s) = input.find(DECIMAL_PT) { ++ let (leading, trailing) = input.split_at(s); ++ let output = [leading, ".", trailing.replace(DECIMAL_PT, "").as_str()].concat(); ++ Cow::Owned(output) ++ } else { ++ input + } + } + + /// Parse the beginning string into an f64, returning -inf instead of NaN on errors. + #[inline(always)] + fn permissive_f64_parse(a: &str) -> f64 { +- // Remove thousands seperators +- let a = a.replace(THOUSANDS_SEP, ""); +- + // GNU sort treats "NaN" as non-number in numeric, so it needs special care. + // *Keep this trim before parse* despite what POSIX may say about -b and -n + // because GNU and BSD both seem to require it to match their behavior +- match a.trim().parse::<f64>() { ++ // ++ // Remove any trailing decimals, ie 4568..890... becomes 4568.890 ++ // Then, we trim whitespace and parse ++ match remove_trailing_dec(a).trim().parse::<f64>() { + Ok(a) if a.is_nan() => std::f64::NEG_INFINITY, + Ok(a) => a, + Err(_) => std::f64::NEG_INFINITY, +@@ -757,8 +794,13 @@ fn numeric_compare(a: &str, b: &str) -> Ordering { + let sa = get_leading_num(a); + let sb = get_leading_num(b); + +- let fa = permissive_f64_parse(sa); +- let fb = permissive_f64_parse(sb); ++ // Avoids a string alloc for every line to remove thousands seperators here ++ // instead of inside the get_leading_num function, which is a HUGE performance benefit ++ let ta = remove_thousands_sep(sa); ++ let tb = remove_thousands_sep(sb); ++ ++ let fa = permissive_f64_parse(&ta); ++ let fb = permissive_f64_parse(&tb); + + // f64::cmp isn't implemented (due to NaN issues); implement directly instead + if fa > fb { +@@ -799,8 +841,8 @@ fn general_numeric_compare(a: &str, b: &str) -> Ordering { + // these types of numbers, we rarely care about pure performance. + fn human_numeric_convert(a: &str) -> f64 { + let num_str = get_leading_num(a); +- let suffix = a.trim_start_matches(num_str); +- let num_part = permissive_f64_parse(num_str); ++ let suffix = a.trim_start_matches(&num_str); ++ let num_part = permissive_f64_parse(&num_str); + let suffix: f64 = match suffix.parse().unwrap_or('\0') { + // SI Units + 'K' => 1E3, +diff --git a/tests/.DS_Store b/tests/.DS_Store +new file mode 100644 +index 0000000..3cbbb78 +--- /dev/null ++++ b/tests/.DS_Store +@@ -0,0 +1 @@ ++���Bud1������������%���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� ������@���������������������������������������� ������@������������������������������������������ ������@������������������������������������������ ������@�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������E���%�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������DSDB�����������������������������`����������������������������������������������� ������@������������������������������������������ ������@������������������������������������������ ������@�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� +\ No newline at end of file +diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs +index 43aaf1d..6455d83 100644 +--- a/tests/by-util/test_sort.rs ++++ b/tests/by-util/test_sort.rs +@@ -1,5 +1,31 @@ + use crate::common::util::*; + ++fn test_helper(file_name: &str, args: &str) { ++ new_ucmd!() ++ .arg(args) ++ .arg(format!("{}.txt", file_name)) ++ .succeeds() ++ .stdout_is_fixture(format!("{}.expected", file_name)); ++} ++ ++#[test] ++fn test_multiple_decimals_general() { ++ new_ucmd!() ++ .arg("-g") ++ .arg("multiple_decimals_general.txt") ++ .succeeds() ++ .stdout_is("\n\n\n\n\n\n\n\nCARAvan\n-2028789030\n-896689\n-8.90880\n-1\n-.05\n000\n00000001\n1\n1.040000000\n1.444\n1.58590\n8.013\n45\n46.89\n576,446.88800000\n576,446.890\n 4567.\n4567.1\n4567.34\n\t\t\t\t\t\t\t\t\t\t4567..457\n\t\t\t\t37800\n\t\t\t\t\t\t45670.89079.098\n\t\t\t\t\t\t45670.89079.1\n4798908.340000000000\n4798908.45\n4798908.8909800\n"); ++} ++ ++#[test] ++fn test_multiple_decimals_numeric() { ++ new_ucmd!() ++ .arg("-n") ++ .arg("multiple_decimals_numeric.txt") ++ .succeeds() ++ .stdout_is("-2028789030\n-896689\n-8.90880\n-1\n-.05\n\n\n\n\n\n\n\n\n000\nCARAvan\n00000001\n1\n1.040000000\n1.444\n1.58590\n8.013\n45\n46.89\n 4567.\n4567.1\n4567.34\n\t\t\t\t\t\t\t\t\t\t4567..457\n\t\t\t\t37800\n\t\t\t\t\t\t45670.89079.098\n\t\t\t\t\t\t45670.89079.1\n576,446.88800000\n576,446.890\n4798908.340000000000\n4798908.45\n4798908.8909800\n"); ++} ++ + #[test] + fn test_check_zero_terminated_failure() { + new_ucmd!() +@@ -44,6 +70,21 @@ fn test_random_shuffle_contains_all_lines() { + assert_eq!(result_sorted, expected); + } + ++#[test] ++fn test_random_shuffle_two_runs_not_the_same() { ++ // check to verify that two random shuffles are not equal; this has the ++ // potential to fail in the very unlikely event that the random order is the same ++ // as the starting order, or if both random sorts end up having the same order. ++ const FILE: &'static str = "default_unsorted_ints.expected"; ++ let (at, _ucmd) = at_and_ucmd!(); ++ let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; ++ let expected = at.read(FILE); ++ let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout; ++ ++ assert_ne!(result, expected); ++ assert_ne!(result, unexpected); ++} ++ + #[test] + fn test_random_shuffle_contains_two_runs_not_the_same() { + // check to verify that two random shuffles are not equal; this has the +@@ -355,11 +396,3 @@ fn test_check_silent() { + .fails() + .stdout_is(""); + } +- +-fn test_helper(file_name: &str, args: &str) { +- new_ucmd!() +- .arg(args) +- .arg(format!("{}{}", file_name, ".txt")) +- .succeeds() +- .stdout_is_fixture(format!("{}{}", file_name, ".expected")); +-} +diff --git a/tests/fixtures/.DS_Store b/tests/fixtures/.DS_Store +new file mode 100644 +index 0000000..05adc2c +--- /dev/null ++++ b/tests/fixtures/.DS_Store +@@ -0,0 +1 @@ ++���Bud1����������� ����������������������������������������������������������bwspblob�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������s�o�r�tbwspblob����bplist00� +diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse_stable.expected +new file mode 100644 +index 0000000..bbce169 +--- /dev/null ++++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse_stable.expected +@@ -0,0 +1,20 @@ ++4798908.8909800 ++4798908.45 ++4798908.340000000000 ++576,446.890 ++576,446.88800000 ++ 37800 ++ 4567. ++46.89 ++45 ++8.013 ++1.58590 ++1.444 ++1.040000000 ++1 ++ ++-.05 ++-1 ++-8.90880 ++-896689 ++-2028789030 +diff --git a/tests/fixtures/sort/multiple_decimals.expected b/tests/fixtures/sort/multiple_decimals.expected +new file mode 100644 +index 0000000..6afbdca +--- /dev/null ++++ b/tests/fixtures/sort/multiple_decimals.expected +@@ -0,0 +1,33 @@ ++-2028789030 ++-896689 ++-8.90880 ++-1 ++-.05 ++ ++ ++ ++ ++ ++ ++ ++ ++000 ++CARAvan ++00000001 ++1 ++1.040000000 ++1.444 ++1.58590 ++8.013 ++45 ++46.89 ++ 4567..457 ++ 4567. ++4567.1 ++4567.34 ++ 37800 ++576,446.88800000 ++576,446.890 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 +diff --git a/tests/fixtures/sort/multiple_decimals_general.txt b/tests/fixtures/sort/multiple_decimals_general.txt +new file mode 100644 +index 0000000..4e65ecf +--- /dev/null ++++ b/tests/fixtures/sort/multiple_decimals_general.txt +@@ -0,0 +1,35 @@ ++576,446.890 ++576,446.88800000 ++ ++4567.1 ++ 4567..457 ++ 45670.89079.1 ++ 45670.89079.098 ++4567.34 ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 ++ +diff --git a/tests/fixtures/sort/multiple_decimals_numeric.txt b/tests/fixtures/sort/multiple_decimals_numeric.txt +new file mode 100644 +index 0000000..4e65ecf +--- /dev/null ++++ b/tests/fixtures/sort/multiple_decimals_numeric.txt +@@ -0,0 +1,35 @@ ++576,446.890 ++576,446.88800000 ++ ++4567.1 ++ 4567..457 ++ 45670.89079.1 ++ 45670.89079.098 ++4567.34 ++ 4567. ++45 ++46.89 ++-1 ++1 ++00000001 ++4798908.340000000000 ++4798908.45 ++4798908.8909800 ++ ++ ++ 37800 ++ ++-2028789030 ++-896689 ++CARAvan ++ ++-8.90880 ++-.05 ++1.444 ++1.58590 ++1.040000000 ++ ++8.013 ++ ++000 ++ diff --git a/debian/patches/series b/debian/patches/series index 79e2c98..d0b7b0f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -10,3 +10,9 @@ revert-file-diff-0.diff revert-file-diff.diff revert-twox-hash.dif revert-retain-mut.diff + +# Backported from upstream +Sort-Implement-stable-sort-ignore-non-printing-month.patch +Ignore-a-test.patch +Sort-Various-fixes-and-performance-improvements.patch +sort-implement-k-and-t-support.patch diff --git a/debian/patches/sort-implement-k-and-t-support.patch b/debian/patches/sort-implement-k-and-t-support.patch new file mode 100644 index 0000000..85f0d6d --- /dev/null +++ b/debian/patches/sort-implement-k-and-t-support.patch @@ -0,0 +1,1333 @@ +From: Michael Debertol <michael.debertol@gmail.com> +Date: Sat, 10 Apr 2021 14:54:58 +0200 +Subject: sort: implement -k and -t support (#1996) + +* sort: implement basic -k and -t support + +This allows to specify keys after the -k flag and a custom field +separator using -t. + +Support for options for specific keys is still missing, and the -b flag +is not passed down correctly. + +* sort: implement support for key options + +* remove unstable feature use + +* don't pipe in input when we expect a failure + +* only tokenize when needed, remove a clone() + +* improve comments + +* fix clippy lints + +* re-add test + +* buffer writes to stdout + +* fix ignore_non_printing + +and make the test fail in case it is broken :) + +* move attribute to the right position + +* add more tests + +* add my name to the copyright section + +* disallow dead code + +* move a comment + +* re-add a loc + +* use smallvec for a perf improvement in the common case + +* add BENCHMARKING.md + +* add ignore_case to benchmarks + +Origin: upstream, https://github.com/uutils/coreutils/commit/49c9d8c9018eeeb2ae68a43f3763e087c44b1653 +--- + Cargo.lock | 10 + + src/uu/sort/BENCHMARKING.md | 33 +++ + src/uu/sort/Cargo.toml | 1 + + src/uu/sort/src/sort.rs | 672 +++++++++++++++++++++++++++++++++++--------- + tests/by-util/test_sort.rs | 164 ++++++++++- + 5 files changed, 741 insertions(+), 139 deletions(-) + create mode 100644 src/uu/sort/BENCHMARKING.md + +diff --git a/Cargo.lock b/Cargo.lock +index 53eb8b4..6c311d9 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -1,5 +1,7 @@ + # This file is automatically @generated by Cargo. + # It is not intended for manual editing. ++version = 3 ++ + [[package]] + name = "advapi32-sys" + version = "0.2.0" +@@ -994,6 +996,12 @@ dependencies = [ + "maybe-uninit", + ] + ++[[package]] ++name = "smallvec" ++version = "1.6.1" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" ++ + [[package]] + name = "strsim" + version = "0.8.0" +@@ -1456,6 +1464,7 @@ checksum = "f5220db7016a5929da5cbb7eaca38a282ec1c98681c0a08cba3dfd8a83cd4053" + dependencies = [ + "num-traits", + "rand 0.7.3", ++ "rand_chacha", + "smallvec", + "uucore", + "uucore_procs", +@@ -2016,6 +2025,7 @@ dependencies = [ + "rand 0.7.3", + "rayon", + "semver", ++ "smallvec 1.6.1", + "uucore", + "uucore_procs", + ] +diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md +new file mode 100644 +index 0000000..b20db01 +--- /dev/null ++++ b/src/uu/sort/BENCHMARKING.md +@@ -0,0 +1,33 @@ ++# Benchmarking sort ++ ++Most of the time when sorting is spent comparing lines. The comparison functions however differ based ++on which arguments are passed to `sort`, therefore it is important to always benchmark multiple scenarios. ++This is an overwiew over what was benchmarked, and if you make changes to `sort`, you are encouraged to check ++how performance was affected for the workloads listed below. Feel free to add other workloads to the ++list that we should improve / make sure not to regress. ++ ++Run `cargo build --release` before benchmarking after you make a change! ++ ++## Sorting a wordlist ++- Get a wordlist, for example with [words](https://en.wikipedia.org/wiki/Words_(Unix)) on Linux. The exact wordlist ++ doesn't matter for performance comparisons. In this example I'm using `/usr/share/dict/american-english` as the wordlist. ++- Shuffle the wordlist by running `sort -R /usr/share/dict/american-english > shuffled_wordlist.txt`. ++- Benchmark sorting the wordlist with hyperfine: `hyperfine "target/release/coreutils sort shuffled_wordlist.txt -o output.txt"`. ++ ++## Sorting a wordlist with ignore_case ++- Same wordlist as above ++- Benchmark sorting the wordlist ignoring the case with hyperfine: `hyperfine "target/release/coreutils sort shuffled_wordlist.txt -f -o output.txt"`. ++ ++## Sorting numbers ++- Generate a list of numbers: `seq 0 100000 | sort -R > shuffled_numbers.txt`. ++- Benchmark numeric sorting with hyperfine: `hyperfine "target/release/coreutils sort shuffled_numbers.txt -n -o output.txt"`. ++ ++## Stdout and stdin performance ++Try to run the above benchmarks by piping the input through stdin (standard input) and redirect the ++output through stdout (standard output): ++- Remove the input file from the arguments and add `cat [inputfile] | ` at the beginning. ++- Remove `-o output.txt` and add `> output.txt` at the end. ++ ++Example: `hyperfine "target/release/coreutils sort shuffled_numbers.txt -n -o output.txt"` becomes ++`hyperfine "cat shuffled_numbers.txt | target/release/coreutils sort -n > output.txt` ++- Check that performance is similar to the original benchmark. +\ No newline at end of file +diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml +index cfcd74d..ddea900 100644 +--- a/src/uu/sort/Cargo.toml ++++ b/src/uu/sort/Cargo.toml +@@ -21,6 +21,7 @@ clap = "2.33" + fnv = "1.0.7" + itertools = "0.9" + semver = "0.9.0" ++smallvec = "1.6.1" + uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } + uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } + +diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs +index 88f7bc7..7b1fe07 100644 +--- a/src/uu/sort/src/sort.rs ++++ b/src/uu/sort/src/sort.rs +@@ -2,10 +2,10 @@ + // * + // * (c) Michael Yin <mikeyin@mikeyin.org> + // * (c) Robert Swinford <robert.swinford..AT..gmail.com> ++// * (c) Michael Debertol <michael.debertol..AT..gmail.com> + // * + // * For the full copyright and license information, please view the LICENSE + // * file that was distributed with this source code. +-#![allow(dead_code)] + + // Although these links don't always seem to describe reality, check out the POSIX and GNU specs: + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html +@@ -22,6 +22,7 @@ use rand::distributions::Alphanumeric; + use rand::{thread_rng, Rng}; + use rayon::prelude::*; + use semver::Version; ++use smallvec::SmallVec; + use std::borrow::Cow; + use std::cmp::Ordering; + use std::collections::BinaryHeap; +@@ -30,6 +31,7 @@ use std::fs::File; + use std::hash::{Hash, Hasher}; + use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write}; + use std::mem::replace; ++use std::ops::{Range, RangeInclusive}; + use std::path::Path; + use uucore::fs::is_stdin_interactive; // for Iterator::dedup() + +@@ -37,6 +39,16 @@ static NAME: &str = "sort"; + static ABOUT: &str = "Display sorted concatenation of all FILE(s)."; + static VERSION: &str = env!("CARGO_PKG_VERSION"); + ++const LONG_HELP_KEYS: &str = "The key format is FIELD[.CHAR][OPTIONS][,FIELD[.CHAR]][OPTIONS]. ++ ++Fields by default are separated by the first whitespace after a non-whitespace character. Use -t to specify a custom separator. ++In the default case, whitespace is appended at the beginning of each field. Custom separators however are not included in fields. ++ ++FIELD and CHAR both start at 1 (i.e. they are 1-indexed). If there is no end specified after a comma, the end will be the end of the line. ++If CHAR is set 0, it means the end of the field. CHAR defaults to 1 for the start position and to 0 for the end position. ++ ++Valid options are: MbdfhnRrV. They override the global options for this key."; ++ + static OPT_HUMAN_NUMERIC_SORT: &str = "human-numeric-sort"; + static OPT_MONTH_SORT: &str = "month-sort"; + static OPT_NUMERIC_SORT: &str = "numeric-sort"; +@@ -54,6 +66,8 @@ static OPT_OUTPUT: &str = "output"; + static OPT_REVERSE: &str = "reverse"; + static OPT_STABLE: &str = "stable"; + static OPT_UNIQUE: &str = "unique"; ++static OPT_KEY: &str = "key"; ++static OPT_SEPARATOR: &str = "field-separator"; + static OPT_RANDOM: &str = "random-sort"; + static OPT_ZERO_TERMINATED: &str = "zero-terminated"; + static OPT_PARALLEL: &str = "parallel"; +@@ -63,10 +77,11 @@ static ARG_FILES: &str = "files"; + + static DECIMAL_PT: char = '.'; + static THOUSANDS_SEP: char = ','; ++ + static NEGATIVE: char = '-'; + static POSITIVE: char = '+'; + +-#[derive(Eq, Ord, PartialEq, PartialOrd)] ++#[derive(Eq, Ord, PartialEq, PartialOrd, Clone)] + enum SortMode { + Numeric, + HumanNumeric, +@@ -76,8 +91,12 @@ enum SortMode { + Default, + } + +-struct Settings { ++struct GlobalSettings { + mode: SortMode, ++ ignore_blanks: bool, ++ ignore_case: bool, ++ dictionary_order: bool, ++ ignore_non_printing: bool, + merge: bool, + reverse: bool, + outfile: Option<String>, +@@ -86,17 +105,21 @@ struct Settings { + check: bool, + check_silent: bool, + random: bool, +- compare_fn: fn(&str, &str) -> Ordering, +- transform_fns: Vec<fn(&str) -> String>, +- threads: String, + salt: String, ++ selectors: Vec<FieldSelector>, ++ separator: Option<char>, ++ threads: String, + zero_terminated: bool, + } + +-impl Default for Settings { +- fn default() -> Settings { +- Settings { ++impl Default for GlobalSettings { ++ fn default() -> GlobalSettings { ++ GlobalSettings { + mode: SortMode::Default, ++ ignore_blanks: false, ++ ignore_case: false, ++ dictionary_order: false, ++ ignore_non_printing: false, + merge: false, + reverse: false, + outfile: None, +@@ -105,19 +128,330 @@ impl Default for Settings { + check: false, + check_silent: false, + random: false, +- compare_fn: default_compare, +- transform_fns: Vec::new(), +- threads: String::new(), + salt: String::new(), ++ selectors: vec![], ++ separator: None, ++ threads: String::new(), + zero_terminated: false, + } + } + } + ++struct KeySettings { ++ mode: SortMode, ++ ignore_blanks: bool, ++ ignore_case: bool, ++ dictionary_order: bool, ++ ignore_non_printing: bool, ++ random: bool, ++ reverse: bool, ++} ++ ++impl From<&GlobalSettings> for KeySettings { ++ fn from(settings: &GlobalSettings) -> Self { ++ Self { ++ mode: settings.mode.clone(), ++ ignore_blanks: settings.ignore_blanks, ++ ignore_case: settings.ignore_case, ++ ignore_non_printing: settings.ignore_non_printing, ++ random: settings.random, ++ reverse: settings.reverse, ++ dictionary_order: settings.dictionary_order, ++ } ++ } ++} ++ ++/// Represents the string selected by a FieldSelector. ++enum Selection { ++ /// If we had to transform this selection, we have to store a new string. ++ String(String), ++ /// If there was no transformation, we can store an index into the line. ++ ByIndex(Range<usize>), ++} ++ ++impl Selection { ++ /// Gets the actual string slice represented by this Selection. ++ fn get_str<'a>(&'a self, line: &'a Line) -> &'a str { ++ match self { ++ Selection::String(string) => string.as_str(), ++ Selection::ByIndex(range) => &line.line[range.to_owned()], ++ } ++ } ++} ++ ++type Field = Range<usize>; ++ ++struct Line { ++ line: String, ++ // The common case is not to specify fields. Let's make this fast. ++ selections: SmallVec<[Selection; 1]>, ++} ++ ++impl Line { ++ fn new(line: String, settings: &GlobalSettings) -> Self { ++ let fields = if settings ++ .selectors ++ .iter() ++ .any(|selector| selector.needs_tokens()) ++ { ++ // Only tokenize if we will need tokens. ++ Some(tokenize(&line, settings.separator)) ++ } else { ++ None ++ }; ++ ++ let selections = settings ++ .selectors ++ .iter() ++ .map(|selector| { ++ if let Some(range) = selector.get_selection(&line, fields.as_deref()) { ++ if let Some(transformed) = ++ transform(&line[range.to_owned()], &selector.settings) ++ { ++ Selection::String(transformed) ++ } else { ++ Selection::ByIndex(range.start().to_owned()..range.end() + 1) ++ } ++ } else { ++ // If there is no match, match the empty string. ++ Selection::ByIndex(0..0) ++ } ++ }) ++ .collect(); ++ Self { line, selections } ++ } ++} ++ ++/// Transform this line. Returns None if there's no need to transform. ++fn transform(line: &str, settings: &KeySettings) -> Option<String> { ++ let mut transformed = None; ++ if settings.ignore_case { ++ transformed = Some(line.to_uppercase()); ++ } ++ if settings.ignore_blanks { ++ transformed = Some( ++ transformed ++ .as_deref() ++ .unwrap_or(line) ++ .trim_start() ++ .to_string(), ++ ); ++ } ++ if settings.dictionary_order { ++ transformed = Some(remove_nondictionary_chars( ++ transformed.as_deref().unwrap_or(line), ++ )); ++ } ++ if settings.ignore_non_printing { ++ transformed = Some(remove_nonprinting_chars( ++ transformed.as_deref().unwrap_or(line), ++ )); ++ } ++ transformed ++} ++ ++/// Tokenize a line into fields. ++fn tokenize(line: &str, separator: Option<char>) -> Vec<Field> { ++ if let Some(separator) = separator { ++ tokenize_with_separator(line, separator) ++ } else { ++ tokenize_default(line) ++ } ++} ++ ++/// By default fields are separated by the first whitespace after non-whitespace. ++/// Whitespace is included in fields at the start. ++fn tokenize_default(line: &str) -> Vec<Field> { ++ let mut tokens = vec![0..0]; ++ // pretend that there was whitespace in front of the line ++ let mut previous_was_whitespace = true; ++ for (idx, char) in line.char_indices() { ++ if char.is_whitespace() { ++ if !previous_was_whitespace { ++ tokens.last_mut().unwrap().end = idx; ++ tokens.push(idx..0); ++ } ++ previous_was_whitespace = true; ++ } else { ++ previous_was_whitespace = false; ++ } ++ } ++ tokens.last_mut().unwrap().end = line.len(); ++ tokens ++} ++ ++/// Split between separators. These separators are not included in fields. ++fn tokenize_with_separator(line: &str, separator: char) -> Vec<Field> { ++ let mut tokens = vec![0..0]; ++ let mut previous_was_separator = false; ++ for (idx, char) in line.char_indices() { ++ if previous_was_separator { ++ tokens.push(idx..0); ++ } ++ if char == separator { ++ tokens.last_mut().unwrap().end = idx; ++ previous_was_separator = true; ++ } else { ++ previous_was_separator = false; ++ } ++ } ++ tokens.last_mut().unwrap().end = line.len(); ++ tokens ++} ++ ++struct KeyPosition { ++ /// 1-indexed, 0 is invalid. ++ field: usize, ++ /// 1-indexed, 0 is end of field. ++ char: usize, ++ ignore_blanks: bool, ++} ++ ++impl KeyPosition { ++ fn parse(key: &str, default_char_index: usize, settings: &mut KeySettings) -> Self { ++ let mut field_and_char = key.split('.'); ++ let mut field = field_and_char ++ .next() ++ .unwrap_or_else(|| crash!(1, "invalid key `{}`", key)); ++ let mut char = field_and_char.next(); ++ ++ // If there is a char index, we expect options to appear after it. Otherwise we expect them after the field index. ++ let value_with_options = char.as_mut().unwrap_or(&mut field); ++ ++ let mut ignore_blanks = settings.ignore_blanks; ++ if let Some(options_start) = value_with_options.chars().position(char::is_alphabetic) { ++ for option in value_with_options[options_start..].chars() { ++ // valid options: MbdfghinRrV ++ match option { ++ 'M' => settings.mode = SortMode::Month, ++ 'b' => ignore_blanks = true, ++ 'd' => settings.dictionary_order = true, ++ 'f' => settings.ignore_case = true, ++ 'g' => settings.mode = SortMode::GeneralNumeric, ++ 'h' => settings.mode = SortMode::HumanNumeric, ++ 'i' => settings.ignore_non_printing = true, ++ 'n' => settings.mode = SortMode::Numeric, ++ 'R' => settings.random = true, ++ 'r' => settings.reverse = true, ++ 'V' => settings.mode = SortMode::Version, ++ c => { ++ crash!(1, "invalid option for key: `{}`", c) ++ } ++ } ++ } ++ // Strip away option characters from the original value so we can parse it later ++ *value_with_options = &value_with_options[..options_start]; ++ } ++ ++ let field = field ++ .parse() ++ .unwrap_or_else(|e| crash!(1, "failed to parse field index for key `{}`: {}", key, e)); ++ if field == 0 { ++ crash!(1, "field index was 0"); ++ } ++ let char = char.map_or(default_char_index, |char| { ++ char.parse().unwrap_or_else(|e| { ++ crash!( ++ 1, ++ "failed to parse character index for key `{}`: {}", ++ key, ++ e ++ ) ++ }) ++ }); ++ Self { ++ field, ++ char, ++ ignore_blanks, ++ } ++ } ++} ++ ++struct FieldSelector { ++ from: KeyPosition, ++ to: Option<KeyPosition>, ++ settings: KeySettings, ++} ++ ++impl FieldSelector { ++ fn needs_tokens(&self) -> bool { ++ self.from.field != 1 || self.from.char == 0 || self.to.is_some() ++ } ++ ++ /// Look up the slice that corresponds to this selector for the given line. ++ /// If needs_fields returned false, fields may be None. ++ fn get_selection<'a>( ++ &self, ++ line: &'a str, ++ tokens: Option<&[Field]>, ++ ) -> Option<RangeInclusive<usize>> { ++ enum ResolutionErr { ++ TooLow, ++ TooHigh, ++ } ++ ++ // Get the index for this line given the KeyPosition ++ fn resolve_index( ++ line: &str, ++ tokens: Option<&[Field]>, ++ position: &KeyPosition, ++ ) -> Result<usize, ResolutionErr> { ++ if tokens.map_or(false, |fields| fields.len() < position.field) { ++ Err(ResolutionErr::TooHigh) ++ } else if position.char == 0 { ++ let end = tokens.unwrap()[position.field - 1].end; ++ if end == 0 { ++ Err(ResolutionErr::TooLow) ++ } else { ++ Ok(end - 1) ++ } ++ } else { ++ let mut idx = if position.field == 1 { ++ // The first field always starts at 0. ++ // We don't need tokens for this case. ++ 0 ++ } else { ++ tokens.unwrap()[position.field - 1].start ++ } + position.char ++ - 1; ++ if idx >= line.len() { ++ Err(ResolutionErr::TooHigh) ++ } else { ++ if position.ignore_blanks { ++ if let Some(not_whitespace) = ++ line[idx..].chars().position(|c| !c.is_whitespace()) ++ { ++ idx += not_whitespace; ++ } else { ++ return Err(ResolutionErr::TooHigh); ++ } ++ } ++ Ok(idx) ++ } ++ } ++ } ++ ++ if let Ok(from) = resolve_index(line, tokens, &self.from) { ++ let to = self.to.as_ref().map(|to| resolve_index(line, tokens, &to)); ++ match to { ++ Some(Ok(to)) => Some(from..=to), ++ // If `to` was not given or the match would be after the end of the line, ++ // match everything until the end of the line. ++ None | Some(Err(ResolutionErr::TooHigh)) => Some(from..=line.len() - 1), ++ // If `to` is before the start of the line, report no match. ++ // This can happen if the line starts with a separator. ++ Some(Err(ResolutionErr::TooLow)) => None, ++ } ++ } else { ++ None ++ } ++ } ++} ++ + struct MergeableFile<'a> { + lines: Lines<BufReader<Box<dyn Read>>>, +- current_line: String, +- settings: &'a Settings, ++ current_line: Line, ++ settings: &'a GlobalSettings, + } + + // BinaryHeap depends on `Ord`. Note that we want to pop smallest items +@@ -125,7 +459,7 @@ struct MergeableFile<'a> { + // trick it into the right order by calling reverse() here. + impl<'a> Ord for MergeableFile<'a> { + fn cmp(&self, other: &MergeableFile) -> Ordering { +- compare_by(&self.current_line, &other.current_line, &self.settings).reverse() ++ compare_by(&self.current_line, &other.current_line, self.settings).reverse() + } + } + +@@ -137,7 +471,7 @@ impl<'a> PartialOrd for MergeableFile<'a> { + + impl<'a> PartialEq for MergeableFile<'a> { + fn eq(&self, other: &MergeableFile) -> bool { +- Ordering::Equal == compare_by(&self.current_line, &other.current_line, &self.settings) ++ Ordering::Equal == compare_by(&self.current_line, &other.current_line, self.settings) + } + } + +@@ -145,11 +479,11 @@ impl<'a> Eq for MergeableFile<'a> {} + + struct FileMerger<'a> { + heap: BinaryHeap<MergeableFile<'a>>, +- settings: &'a Settings, ++ settings: &'a GlobalSettings, + } + + impl<'a> FileMerger<'a> { +- fn new(settings: &'a Settings) -> FileMerger<'a> { ++ fn new(settings: &'a GlobalSettings) -> FileMerger<'a> { + FileMerger { + heap: BinaryHeap::new(), + settings, +@@ -159,7 +493,7 @@ impl<'a> FileMerger<'a> { + if let Some(Ok(next_line)) = lines.next() { + let mergeable_file = MergeableFile { + lines, +- current_line: next_line, ++ current_line: Line::new(next_line, &self.settings), + settings: &self.settings, + }; + self.heap.push(mergeable_file); +@@ -174,14 +508,17 @@ impl<'a> Iterator for FileMerger<'a> { + Some(mut current) => { + match current.lines.next() { + Some(Ok(next_line)) => { +- let ret = replace(&mut current.current_line, next_line); ++ let ret = replace( ++ &mut current.current_line, ++ Line::new(next_line, &self.settings), ++ ); + self.heap.push(current); +- Some(ret) ++ Some(ret.line) + } + _ => { + // Don't put it back in the heap (it's empty/erroring) + // but its first line is still valid. +- Some(current.current_line) ++ Some(current.current_line.line) + } + } + } +@@ -205,7 +542,7 @@ With no FILE, or when FILE is -, read standard input.", + pub fn uumain(args: impl uucore::Args) -> i32 { + let args = args.collect_str(); + let usage = get_usage(); +- let mut settings: Settings = Default::default(); ++ let mut settings: GlobalSettings = Default::default(); + + let matches = App::new(executable!()) + .version(VERSION) +@@ -316,7 +653,21 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + .help("output only the first of an equal run"), + ) + .arg( +- Arg::with_name(OPT_ZERO_TERMINATED) ++ Arg::with_name(OPT_KEY) ++ .short("k") ++ .long(OPT_KEY) ++ .help("sort by a key") ++ .long_help(LONG_HELP_KEYS) ++ .multiple(true) ++ .takes_value(true), ++ ) ++ .arg( ++ Arg::with_name(OPT_SEPARATOR) ++ .short("t") ++ .long(OPT_SEPARATOR) ++ .help("custom separator for -k") ++ .takes_value(true)) ++ .arg(Arg::with_name(OPT_ZERO_TERMINATED) + .short("z") + .long(OPT_ZERO_TERMINATED) + .help("line delimiter is NUL, not newline"), +@@ -350,14 +701,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + for path in &files0_from { + let (reader, _) = open(path.as_str()).expect("Could not read from file specified."); + let buf_reader = BufReader::new(reader); +- for line in buf_reader.split(b'\0') { +- if let Ok(n) = line { +- files.push( +- std::str::from_utf8(&n) +- .expect("Could not parse string from zero terminated input.") +- .to_string(), +- ); +- } ++ for line in buf_reader.split(b'\0').flatten() { ++ files.push( ++ std::str::from_utf8(&line) ++ .expect("Could not parse string from zero terminated input.") ++ .to_string(), ++ ); + } + } + files +@@ -382,21 +731,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + SortMode::Default + }; + ++ settings.dictionary_order = matches.is_present(OPT_DICTIONARY_ORDER); ++ settings.ignore_non_printing = matches.is_present(OPT_IGNORE_NONPRINTING); + if matches.is_present(OPT_PARALLEL) { + // "0" is default - threads = num of cores + settings.threads = matches + .value_of(OPT_PARALLEL) + .map(String::from) +- .unwrap_or("0".to_string()); ++ .unwrap_or_else(|| "0".to_string()); + env::set_var("RAYON_NUM_THREADS", &settings.threads); + } + +- if matches.is_present(OPT_DICTIONARY_ORDER) { +- settings.transform_fns.push(remove_nondictionary_chars); +- } else if matches.is_present(OPT_IGNORE_NONPRINTING) { +- settings.transform_fns.push(remove_nonprinting_chars); +- } +- + settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); + settings.merge = matches.is_present(OPT_MERGE); + +@@ -406,13 +751,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + settings.check = true; + }; + +- if matches.is_present(OPT_IGNORE_CASE) { +- settings.transform_fns.push(|s| s.to_uppercase()); +- } ++ settings.ignore_case = matches.is_present(OPT_IGNORE_CASE); + +- if matches.is_present(OPT_IGNORE_BLANKS) { +- settings.transform_fns.push(|s| s.trim_start().to_string()); +- } ++ settings.ignore_blanks = matches.is_present(OPT_IGNORE_BLANKS); + + settings.outfile = matches.value_of(OPT_OUTPUT).map(String::from); + settings.reverse = matches.is_present(OPT_REVERSE); +@@ -424,27 +765,64 @@ pub fn uumain(args: impl uucore::Args) -> i32 { + settings.salt = get_rand_string(); + } + +- //let mut files = matches.free; + if files.is_empty() { + /* if no file, default to stdin */ + files.push("-".to_owned()); + } else if settings.check && files.len() != 1 { +- crash!(1, "sort: extra operand `{}' not allowed with -c", files[1]) ++ crash!(1, "extra operand `{}' not allowed with -c", files[1]) + } + +- settings.compare_fn = match settings.mode { +- SortMode::Numeric => numeric_compare, +- SortMode::GeneralNumeric => general_numeric_compare, +- SortMode::HumanNumeric => human_numeric_size_compare, +- SortMode::Month => month_compare, +- SortMode::Version => version_compare, +- SortMode::Default => default_compare, +- }; ++ if let Some(arg) = matches.args.get(OPT_SEPARATOR) { ++ let separator = arg.vals[0].to_string_lossy(); ++ let separator = separator; ++ if separator.len() != 1 { ++ crash!(1, "separator must be exactly one character long"); ++ } ++ settings.separator = Some(separator.chars().next().unwrap()) ++ } + +- exec(files, &mut settings) ++ if matches.is_present(OPT_KEY) { ++ for key in &matches.args[OPT_KEY].vals { ++ let key = key.to_string_lossy(); ++ let mut from_to = key.split(','); ++ let mut key_settings = KeySettings::from(&settings); ++ let from = KeyPosition::parse( ++ from_to ++ .next() ++ .unwrap_or_else(|| crash!(1, "invalid key `{}`", key)), ++ 1, ++ &mut key_settings, ++ ); ++ let to = from_to ++ .next() ++ .map(|to| KeyPosition::parse(to, 0, &mut key_settings)); ++ let field_selector = FieldSelector { ++ from, ++ to, ++ settings: key_settings, ++ }; ++ settings.selectors.push(field_selector); ++ } ++ } ++ ++ if !settings.stable || !matches.is_present(OPT_KEY) { ++ // add a default selector matching the whole line ++ let key_settings = KeySettings::from(&settings); ++ settings.selectors.push(FieldSelector { ++ from: KeyPosition { ++ field: 1, ++ char: 1, ++ ignore_blanks: key_settings.ignore_blanks, ++ }, ++ to: None, ++ settings: key_settings, ++ }); ++ } ++ ++ exec(files, &settings) + } + +-fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { ++fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 { + let mut lines = Vec::new(); + let mut file_merger = FileMerger::new(&settings); + +@@ -459,26 +837,27 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { + if settings.merge { + file_merger.push_file(buf_reader.lines()); + } else if settings.zero_terminated { +- for line in buf_reader.split(b'\0') { +- if let Ok(n) = line { +- lines.push( +- std::str::from_utf8(&n) +- .expect("Could not parse string from zero terminated input.") +- .to_string(), +- ); +- } ++ for line in buf_reader.split(b'\0').flatten() { ++ lines.push(Line::new( ++ std::str::from_utf8(&line) ++ .expect("Could not parse string from zero terminated input.") ++ .to_string(), ++ &settings, ++ )); + } + } else { + for line in buf_reader.lines() { + if let Ok(n) = line { +- lines.push(n); ++ lines.push(Line::new(n, &settings)); ++ } else { ++ break; + } + } + } + } + + if settings.check { +- return exec_check_file(lines, &settings); ++ return exec_check_file(&lines, &settings); + } else { + sort_by(&mut lines, &settings); + } +@@ -490,29 +869,31 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { + print_sorted(file_merger, &settings) + } + } else if settings.mode == SortMode::Default && settings.unique { +- print_sorted(lines.iter().dedup(), &settings) ++ print_sorted(lines.into_iter().map(|line| line.line).dedup(), &settings) + } else if settings.mode == SortMode::Month && settings.unique { + print_sorted( + lines +- .iter() ++ .into_iter() ++ .map(|line| line.line) + .dedup_by(|a, b| get_months_dedup(a) == get_months_dedup(b)), + &settings, + ) + } else if settings.unique { + print_sorted( + lines +- .iter() +- .dedup_by(|a, b| get_num_dedup(a, &settings) == get_num_dedup(b, &settings)), ++ .into_iter() ++ .map(|line| line.line) ++ .dedup_by(|a, b| get_num_dedup(a, settings) == get_num_dedup(b, settings)), + &settings, + ) + } else { +- print_sorted(lines.iter(), &settings) ++ print_sorted(lines.into_iter().map(|line| line.line), &settings) + } + + 0 + } + +-fn exec_check_file(unwrapped_lines: Vec<String>, settings: &Settings) -> i32 { ++fn exec_check_file(unwrapped_lines: &[Line], settings: &GlobalSettings) -> i32 { + // errors yields the line before each disorder, + // plus the last line (quirk of .coalesce()) + let mut errors = +@@ -544,51 +925,45 @@ fn exec_check_file(unwrapped_lines: Vec<String>, settings: &Settings) -> i32 { + } + } + +-#[inline(always)] +-fn transform(line: &str, settings: &Settings) -> String { +- let mut transformed = line.to_owned(); +- for transform_fn in &settings.transform_fns { +- transformed = transform_fn(&transformed); +- } +- +- transformed +-} +- +-#[inline(always)] +-fn sort_by(lines: &mut Vec<String>, settings: &Settings) { ++fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) { + lines.par_sort_by(|a, b| compare_by(a, b, &settings)) + } + +-fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering { +- let (a_transformed, b_transformed): (String, String); +- let (a, b) = if !settings.transform_fns.is_empty() { +- a_transformed = transform(&a, &settings); +- b_transformed = transform(&b, &settings); +- (a_transformed.as_str(), b_transformed.as_str()) +- } else { +- (a, b) +- }; +- +- // 1st Compare +- let mut cmp: Ordering = if settings.random { +- random_shuffle(a, b, settings.salt.clone()) +- } else { +- (settings.compare_fn)(a, b) +- }; ++fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering { ++ for (idx, selector) in global_settings.selectors.iter().enumerate() { ++ let a = a.selections[idx].get_str(a); ++ let b = b.selections[idx].get_str(b); ++ let settings = &selector.settings; + +- // Call "last resort compare" on any equal +- if cmp == Ordering::Equal { +- if settings.random || settings.stable || settings.unique { +- cmp = Ordering::Equal ++ let cmp: Ordering = if settings.random { ++ random_shuffle(a, b, global_settings.salt.clone()) + } else { +- cmp = default_compare(a, b) ++ (match settings.mode { ++ SortMode::Numeric => numeric_compare, ++ SortMode::GeneralNumeric => general_numeric_compare, ++ SortMode::HumanNumeric => human_numeric_size_compare, ++ SortMode::Month => month_compare, ++ SortMode::Version => version_compare, ++ SortMode::Default => default_compare, ++ })(a, b) + }; ++ if cmp != Ordering::Equal { ++ return if settings.reverse { cmp.reverse() } else { cmp }; ++ } ++ } ++ ++ // Call "last resort compare" if all selectors returned Equal ++ ++ let cmp = if global_settings.random || global_settings.stable || global_settings.unique { ++ Ordering::Equal ++ } else { ++ default_compare(&a.line, &b.line) + }; + +- if settings.reverse { +- return cmp.reverse(); ++ if global_settings.reverse { ++ cmp.reverse() + } else { +- return cmp; ++ cmp + } + } + +@@ -617,8 +992,8 @@ fn leading_num_common(a: &str) -> &str { + && !c.eq(&'e') + && !c.eq(&'E') + // check whether first char is + or - +- && !a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) +- && !a.chars().nth(0).unwrap_or('\0').eq(&NEGATIVE) ++ && !a.chars().next().unwrap_or('\0').eq(&POSITIVE) ++ && !a.chars().next().unwrap_or('\0').eq(&NEGATIVE) + { + // Strip string of non-numeric trailing chars + s = &a[..idx]; +@@ -640,9 +1015,9 @@ fn get_leading_num(a: &str) -> &str { + + let a = leading_num_common(a); + +- // GNU numeric sort doesn't recognize '+' or 'e' notation so we strip trailing chars ++ // GNU numeric sort doesn't recognize '+' or 'e' notation so we strip + for (idx, c) in a.char_indices() { +- if c.eq(&'e') || c.eq(&'E') || a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) { ++ if c.eq(&'e') || c.eq(&'E') || a.chars().next().unwrap_or('\0').eq(&POSITIVE) { + s = &a[..idx]; + break; + } +@@ -670,12 +1045,9 @@ fn get_leading_gen(a: &str) -> &str { + // Cleanup raw stripped strings + for c in p_iter.to_owned() { + let next_char_numeric = p_iter.peek().unwrap_or(&'\0').is_numeric(); +- // Only general numeric recognizes e notation and the '+' sign +- if (c.eq(&'e') && !next_char_numeric) +- || (c.eq(&'E') && !next_char_numeric) +- // Only GNU (non-general) numeric recognize thousands seperators, takes only leading # +- || c.eq(&THOUSANDS_SEP) +- { ++ // Only general numeric recognizes e notation and, see block below, the '+' sign ++ // Only GNU (non-general) numeric recognize thousands seperators, takes only leading # ++ if (c.eq(&'e') || c.eq(&'E')) && !next_char_numeric || c.eq(&THOUSANDS_SEP) { + result = a.split(c).next().unwrap_or(""); + break; + // If positive sign and next char is not numeric, split at postive sign at keep trailing numbers +@@ -724,19 +1096,17 @@ fn get_months_dedup(a: &str) -> String { + // *For all dedups/uniques expect default we must compare leading numbers* + // Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq" + // See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html +-fn get_num_dedup<'a>(a: &'a str, settings: &&mut Settings) -> &'a str { ++fn get_num_dedup<'a>(a: &'a str, settings: &GlobalSettings) -> &'a str { + // Trim and remove any leading zeros + let s = a.trim().trim_start_matches('0'); + + // Get first char +- let c = s.chars().nth(0).unwrap_or('\0'); ++ let c = s.chars().next().unwrap_or('\0'); + + // Empty lines and non-number lines are treated as the same for dedup +- if s.is_empty() { +- "" +- } else if !c.eq(&NEGATIVE) && !c.is_numeric() { +- "" + // Prepare lines for comparison of only the numerical leading numbers ++ if s.is_empty() || (!c.eq(&NEGATIVE) && !c.is_numeric()) { ++ "" + } else { + let result = match settings.mode { + SortMode::Numeric => get_leading_num(s), +@@ -944,6 +1314,7 @@ fn month_parse(line: &str) -> Month { + } + + fn month_compare(a: &str, b: &str) -> Ordering { ++ #![allow(clippy::comparison_chain)] + let ma = month_parse(a); + let mb = month_parse(b); + +@@ -986,32 +1357,29 @@ fn remove_nonprinting_chars(s: &str) -> String { + .collect::<String>() + } + +-fn print_sorted<S, T: Iterator<Item = S>>(iter: T, settings: &Settings) +-where +- S: std::fmt::Display, +-{ ++fn print_sorted<T: Iterator<Item = String>>(iter: T, settings: &GlobalSettings) { + let mut file: Box<dyn Write> = match settings.outfile { + Some(ref filename) => match File::create(Path::new(&filename)) { + Ok(f) => Box::new(BufWriter::new(f)) as Box<dyn Write>, + Err(e) => { +- show_error!("sort: {0}: {1}", filename, e.to_string()); ++ show_error!("{0}: {1}", filename, e.to_string()); + panic!("Could not open output file"); + } + }, +- None => Box::new(stdout()) as Box<dyn Write>, ++ None => Box::new(BufWriter::new(stdout())) as Box<dyn Write>, + }; +- + if settings.zero_terminated { + for line in iter { +- let str = format!("{}\0", line); +- crash_if_err!(1, file.write_all(str.as_bytes())); ++ crash_if_err!(1, file.write_all(line.as_bytes())); ++ crash_if_err!(1, file.write_all("\0".as_bytes())); + } + } else { + for line in iter { +- let str = format!("{}\n", line); +- crash_if_err!(1, file.write_all(str.as_bytes())); ++ crash_if_err!(1, file.write_all(line.as_bytes())); ++ crash_if_err!(1, file.write_all("\n".as_bytes())); + } + } ++ crash_if_err!(1, file.flush()); + } + + // from cat.rs +@@ -1024,7 +1392,7 @@ fn open(path: &str) -> Option<(Box<dyn Read>, bool)> { + match File::open(Path::new(path)) { + Ok(f) => Some((Box::new(f) as Box<dyn Read>, false)), + Err(e) => { +- show_error!("sort: {0}: {1}", path, e.to_string()); ++ show_error!("{0}: {1}", path, e.to_string()); + None + } + } +@@ -1097,4 +1465,34 @@ mod tests { + + assert_eq!(Ordering::Less, version_compare(a, b)); + } ++ ++ #[test] ++ fn test_random_compare() { ++ let a = "9"; ++ let b = "9"; ++ let c = get_rand_string(); ++ ++ assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); ++ } ++ ++ #[test] ++ fn test_tokenize_fields() { ++ let line = "foo bar b x"; ++ assert_eq!(tokenize(line, None), vec![0..3, 3..7, 7..9, 9..14,],); ++ } ++ ++ #[test] ++ fn test_tokenize_fields_leading_whitespace() { ++ let line = " foo bar b x"; ++ assert_eq!(tokenize(line, None), vec![0..7, 7..11, 11..13, 13..18,]); ++ } ++ ++ #[test] ++ fn test_tokenize_fields_custom_separator() { ++ let line = "aaa foo bar b x"; ++ assert_eq!( ++ tokenize(line, Some('a')), ++ vec![0..0, 1..1, 2..2, 3..9, 10..18,] ++ ); ++ } + } +diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs +index 6455d83..668e783 100644 +--- a/tests/by-util/test_sort.rs ++++ b/tests/by-util/test_sort.rs +@@ -185,10 +185,10 @@ fn test_dictionary_order2() { + fn test_non_printing_chars() { + for non_printing_chars_param in vec!["-i"] { + new_ucmd!() +- .pipe_in("a👦ðŸ»aa b\naaaa b") ++ .pipe_in("a👦ðŸ»aa\naaaa") + .arg(non_printing_chars_param) + .succeeds() +- .stdout_only("aaaa b\na👦ðŸ»aa b\n"); ++ .stdout_only("a👦ðŸ»aa\naaaa\n"); + } + } + +@@ -307,6 +307,166 @@ fn test_numeric_unique_ints2() { + } + } + ++#[test] ++fn test_keys_open_ended() { ++ let input = "aa bb cc\ndd aa ff\ngg aa cc\n"; ++ new_ucmd!() ++ .args(&["-k", "2.2"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("gg aa cc\ndd aa ff\naa bb cc\n"); ++} ++ ++#[test] ++fn test_keys_closed_range() { ++ let input = "aa bb cc\ndd aa ff\ngg aa cc\n"; ++ new_ucmd!() ++ .args(&["-k", "2.2,2.2"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("dd aa ff\ngg aa cc\naa bb cc\n"); ++} ++ ++#[test] ++fn test_keys_multiple_ranges() { ++ let input = "aa bb cc\ndd aa ff\ngg aa cc\n"; ++ new_ucmd!() ++ .args(&["-k", "2,2", "-k", "3,3"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("gg aa cc\ndd aa ff\naa bb cc\n"); ++} ++ ++#[test] ++fn test_keys_no_field_match() { ++ let input = "aa aa aa aa\naa bb cc\ndd aa ff\n"; ++ new_ucmd!() ++ .args(&["-k", "4,4"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("aa bb cc\ndd aa ff\naa aa aa aa\n"); ++} ++ ++#[test] ++fn test_keys_no_char_match() { ++ let input = "aaa\nba\nc\n"; ++ new_ucmd!() ++ .args(&["-k", "1.2"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("c\nba\naaa\n"); ++} ++ ++#[test] ++fn test_keys_custom_separator() { ++ let input = "aaxbbxcc\nddxaaxff\nggxaaxcc\n"; ++ new_ucmd!() ++ .args(&["-k", "2.2,2.2", "-t", "x"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("ddxaaxff\nggxaaxcc\naaxbbxcc\n"); ++} ++ ++#[test] ++fn test_keys_invalid_field() { ++ new_ucmd!() ++ .args(&["-k", "1."]) ++ .fails() ++ .stderr_only("sort: error: failed to parse character index for key `1.`: cannot parse integer from empty string"); ++} ++ ++#[test] ++fn test_keys_invalid_field_option() { ++ new_ucmd!() ++ .args(&["-k", "1.1x"]) ++ .fails() ++ .stderr_only("sort: error: invalid option for key: `x`"); ++} ++ ++#[test] ++fn test_keys_invalid_field_zero() { ++ new_ucmd!() ++ .args(&["-k", "0.1"]) ++ .fails() ++ .stderr_only("sort: error: field index was 0"); ++} ++ ++#[test] ++fn test_keys_with_options() { ++ let input = "aa 3 cc\ndd 1 ff\ngg 2 cc\n"; ++ for param in &[ ++ &["-k", "2,2n"][..], ++ &["-k", "2n,2"][..], ++ &["-k", "2,2", "-n"][..], ++ ] { ++ new_ucmd!() ++ .args(param) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("dd 1 ff\ngg 2 cc\naa 3 cc\n"); ++ } ++} ++ ++#[test] ++fn test_keys_with_options_blanks_start() { ++ let input = "aa 3 cc\ndd 1 ff\ngg 2 cc\n"; ++ for param in &[&["-k", "2b,2"][..], &["-k", "2,2", "-b"][..]] { ++ new_ucmd!() ++ .args(param) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only("dd 1 ff\ngg 2 cc\naa 3 cc\n"); ++ } ++} ++ ++#[test] ++fn test_keys_with_options_blanks_end() { ++ let input = "a b ++a b ++a b ++"; ++ new_ucmd!() ++ .args(&["-k", "1,2.1b", "-s"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only( ++ "a b ++a b ++a b ++", ++ ); ++} ++ ++#[test] ++fn test_keys_stable() { ++ let input = "a b ++a b ++a b ++"; ++ new_ucmd!() ++ .args(&["-k", "1,2.1", "-s"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only( ++ "a b ++a b ++a b ++", ++ ); ++} ++ ++#[test] ++fn test_keys_empty_match() { ++ let input = "a a a a ++aaaa ++"; ++ new_ucmd!() ++ .args(&["-k", "1,1", "-t", "a"]) ++ .pipe_in(input) ++ .succeeds() ++ .stdout_only(input); ++} ++ + #[test] + fn test_zero_terminated() { + test_helper("zero-terminated", "-z"); -- GitLab