diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index 66989c948995377f9359618c47aac586ae6e94d9..0000000000000000000000000000000000000000 --- a/.appveyor.yml +++ /dev/null @@ -1,32 +0,0 @@ -os: Visual Studio 2015 - -environment: - matrix: - # Stable 64-bit MSVC - - channel: stable - target: x86_64-pc-windows-msvc - # Nightly 32-bit MSVC - - channel: nightly - target: i686-pc-windows-msvc - # Beta 32-bit GNU - - channel: beta - target: i686-pc-windows-gnu - -matrix: - allow_failures: - - channel: nightly - - channel: beta - -install: - - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - rustup-init -yv --default-toolchain %channel% --default-host %target% - - set PATH=%PATH%;%USERPROFILE%\.cargo\bin - - rustc -vV - - cargo -vV - -# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs -# directly or perform other testing commands. Rust will automatically be placed in the PATH -# environment variable. -build: false -test_script: - - cargo test --all --verbose diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 2e372ed9553e6a0145c9b5f96e8f21bf0000b2a4..9448ae4f6e9984b06e4c5a1616ea6f28713d27c1 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,6 @@ { "git": { - "sha1": "bb3ffd6bf97436c0363a7f63fb00cc1620272ee9" - } -} + "sha1": "38b6cd950ff359724f4f36685bb4b5465cda06b4" + }, + "path_in_vcs": "" +} \ No newline at end of file diff --git a/.gitignore b/.gitignore deleted file mode 100755 index 6aa106405a4b4a00d779beab77c90c9473a6d203..0000000000000000000000000000000000000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/target/ -**/*.rs.bk -Cargo.lock diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 200d884cb64a9cbe4fff03aa442365ac96f7ecb0..0000000000000000000000000000000000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,10 +0,0 @@ -# This file is a template, and might need editing before it works on your project. -# Official language image. Look for the different tagged releases at: -# https://hub.docker.com/r/library/rust/tags/ -image: "rust:latest" - -# Use cargo to test the project -test:cargo: - script: - - rustc --version && cargo --version # Print version info for debugging - - cargo test --verbose diff --git a/Cargo.toml b/Cargo.toml index 8a610afe56b887d9ea5645acb501bd65a30a2988..6a920c9e786b68753a56440a12b648116b42d4d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,31 +3,60 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] +edition = "2018" name = "wild" -version = "2.0.2" +version = "2.1.0" authors = ["Kornel <kornel@geekhood.net>"] +include = [ + "/src/*.rs", + "/Cargo.toml", + "/LICENSE", + "/README.md", +] description = "Glob (wildcard) expanded command-line arguments on Windows" -homepage = "https://crates.rs/crates/wild" +homepage = "https://lib.rs/crates/wild" documentation = "https://docs.rs/wild" readme = "README.md" -keywords = ["wildcards", "glob", "windows", "shell", "CommandLineToArgvW"] -categories = ["command-line-interface"] -license = "MIT" +keywords = [ + "wildcards", + "glob", + "windows", + "shell", + "CommandLineToArgvW", +] +categories = [ + "command-line-interface", + "os::windows-apis", +] +license = "Apache-2.0 OR MIT" repository = "https://gitlab.com/kornelski/wild" + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + [dev-dependencies.glob] version = "0.3" + [target."cfg(windows)".dependencies.glob] version = "0.3" + [badges.appveyor] repository = "pornel/wild" [badges.gitlab] repository = "kornelski/wild" + +[badges.maintenance] +status = "passively-maintained" diff --git a/Cargo.toml.orig b/Cargo.toml.orig index eb384f661126b3ded0b631323d671a36890b9c7a..3915a4140f209a614a03865d840482c3624a42bd 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,33 +1,30 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies -# -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) - [package] -name = "wild" -version = "2.0.2" authors = ["Kornel <kornel@geekhood.net>"] +categories = ["command-line-interface", "os::windows-apis"] description = "Glob (wildcard) expanded command-line arguments on Windows" -homepage = "https://crates.rs/crates/wild" documentation = "https://docs.rs/wild" -readme = "README.md" +homepage = "https://lib.rs/crates/wild" keywords = ["wildcards", "glob", "windows", "shell", "CommandLineToArgvW"] -categories = ["command-line-interface"] -license = "MIT" +license = "Apache-2.0 OR MIT" +name = "wild" +readme = "README.md" repository = "https://gitlab.com/kornelski/wild" -[dev-dependencies.glob] -version = "0.3" -[target."cfg(windows)".dependencies.glob] -version = "0.3" -[badges.appveyor] -repository = "pornel/wild" +version = "2.1.0" +edition = "2018" +include = ["/src/*.rs", "/Cargo.toml", "/LICENSE", "/README.md"] + +[badges] +gitlab = { repository = "kornelski/wild" } +appveyor = { repository = "pornel/wild" } +maintenance = { status = "passively-maintained" } + +[target.'cfg(windows)'.dependencies] +glob = "0.3" + +[dev-dependencies] +glob = "0.3" -[badges.gitlab] -repository = "kornelski/wild" +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] diff --git a/README.md b/README.md index 663113573e58eb0ce545b2f3c4fc47d9f74dccce..85ea5806fc4af083bfba477d77326d57b0c97609 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# [`Wild::args`](https://crates.rs/crates/wild) for [Rust](https://www.rust-lang.org) +# [`Wild::args`](https://lib.rs/crates/wild) for [Rust](https://www.rust-lang.org) Allows Rust applications support wildcard arguments (`*foo*`, `file.???`, `*.log.[0-9]`, etc.) on command-line, uniformly on all platforms, including Windows. Unix shells automatically interpret wildcard arguments and pass them expanded (already converted to file names) to applications, but Windows' `cmd.exe` doesn't do that. For consistent cross-platform behavior, this crate emulates Unix-like expansion on Windows. You only need to use `wild::args()` instead of `std::env::args()`. -It is more robust than using [`glob()`](https://crates.rs/crates/glob) on values from `std::env::args()`, because this crate is aware of argument quoting, and special characteres in quotes (`"*"`) are intentionally not expanded. +It is more robust than using [`glob()`](https://lib.rs/crates/glob) on values from `std::env::args()`, because this crate is aware of argument quoting, and special characteres in quotes (`"*"`) are intentionally not expanded. The glob syntax on Windows is limited to `*`, `?`, and `[a-z]`/`[!a-z]` ranges, as supported by the glob crate. Parsing of quoted arguments precisely follows Windows' native syntax ([`CommandLineToArgvW`][1], specifically). @@ -14,16 +14,19 @@ The glob syntax on Windows is limited to `*`, `?`, and `[a-z]`/`[!a-z]` ranges, `wild::args()` is a drop-in replacement for `std::env::args()`. -```rust -extern crate wild; +```toml +[dependencies] +wild = "2" +``` +```rust fn main() { let args = wild::args(); println!("The args are: {:?}", args.collect::<Vec<_>>()); } ``` -## Usage with [Clap](https://crates.rs/crates/clap) +## Usage with [Clap](https://lib.rs/crates/clap) ```rust let matches = clap::App::new("your_app") diff --git a/debian/cargo-checksum.json b/debian/cargo-checksum.json index c08973a457a505e44f50155a0a88e2fd676180fa..60d032f7651ebab3db6ee8f0b6b92dbfc75872af 100644 --- a/debian/cargo-checksum.json +++ b/debian/cargo-checksum.json @@ -1 +1 @@ -{"package":"97d34fecce28871e5c0e059deae21ef7f7d13b98a5964b24c58b3735c8052fc8","files":{}} +{"package":"05b116685a6be0c52f5a103334cbff26db643826c7b3735fc0a3ba9871310a74","files":{}} diff --git a/debian/changelog b/debian/changelog index 311b1a4fbc114ff82a5af94d58f95995b7fd4cee..c78c92f9746ffe9906a079e4b5ced05d276d53d5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +rust-wild (2.1.0-1) unstable; urgency=medium + + * Team upload. + * Package wild 2.1.0 from crates.io using debcargo 2.5.0 + + -- Sylvestre Ledru <sylvestre@debian.org> Wed, 14 Sep 2022 09:24:42 +0200 + rust-wild (2.0.2-1) unstable; urgency=medium * Team upload. diff --git a/debian/compat b/debian/compat index b4de3947675361a7770d29b8982c407b0ec6b2a0..48082f72f087ce7e6fa75b9c41d7387daecd447b 100644 --- a/debian/compat +++ b/debian/compat @@ -1 +1 @@ -11 +12 diff --git a/debian/control b/debian/control index 1527a196156287f99106e955a3456acfddb61802..fedc36f8396c252f40501e409b4b1bb2e8f26a34 100644 --- a/debian/control +++ b/debian/control @@ -1,8 +1,8 @@ Source: rust-wild Section: rust Priority: optional -Build-Depends: debhelper (>= 11), - dh-cargo (>= 15), +Build-Depends: debhelper (>= 12), + dh-cargo (>= 25), cargo:native <!nocheck>, rustc:native <!nocheck>, libstd-rust-dev <!nocheck>, @@ -10,10 +10,11 @@ Build-Depends: debhelper (>= 11), Maintainer: Debian Rust Maintainers <pkg-rust-maintainers@alioth-lists.debian.net> Uploaders: Helen Koike <helen@koikeco.de> -Standards-Version: 4.2.0 +Standards-Version: 4.5.1 Vcs-Git: https://salsa.debian.org/rust-team/debcargo-conf.git [src/wild] Vcs-Browser: https://salsa.debian.org/rust-team/debcargo-conf/tree/master/src/wild -Homepage: https://crates.rs/crates/wild +Homepage: https://lib.rs/crates/wild +Rules-Requires-Root: no Package: librust-wild-dev Architecture: any @@ -25,10 +26,10 @@ Provides: librust-wild+default-dev (= ${binary:Version}), librust-wild-2-dev (= ${binary:Version}), librust-wild-2+default-dev (= ${binary:Version}), - librust-wild-2.0-dev (= ${binary:Version}), - librust-wild-2.0+default-dev (= ${binary:Version}), - librust-wild-2.0.2-dev (= ${binary:Version}), - librust-wild-2.0.2+default-dev (= ${binary:Version}) + librust-wild-2.1-dev (= ${binary:Version}), + librust-wild-2.1+default-dev (= ${binary:Version}), + librust-wild-2.1.0-dev (= ${binary:Version}), + librust-wild-2.1.0+default-dev (= ${binary:Version}) Description: Glob (wildcard) expanded command-line arguments on Windows - Rust source code This package contains the source for the Rust wild crate, packaged by debcargo for use with cargo and dh-cargo. diff --git a/debian/copyright.debcargo.hint b/debian/copyright.debcargo.hint index f09c43763439118c520c6a92e3a149899c615100..16392124e57d99ad11f0f4b85dd3cc9c10ffd35c 100644 --- a/debian/copyright.debcargo.hint +++ b/debian/copyright.debcargo.hint @@ -5,7 +5,7 @@ Source: https://gitlab.com/kornelski/wild Files: * Copyright: FIXME (overlay) UNKNOWN-YEARS Kornel <kornel@geekhood.net> -License: MIT +License: Apache-2.0 or MIT Comment: FIXME (overlay): Since upstream copyright years are not available in Cargo.toml, they were extracted from the upstream Git repository. This may not @@ -21,9 +21,13 @@ Comment: Files: debian/* Copyright: - 2019 Debian Rust Maintainers <pkg-rust-maintainers@alioth-lists.debian.net> - 2019 Helen Koike <helen@koikeco.de> -License: MIT + 2019-2022 Debian Rust Maintainers <pkg-rust-maintainers@alioth-lists.debian.net> + 2019-2022 Helen Koike <helen@koikeco.de> +License: Apache-2.0 or MIT + +License: Apache-2.0 + Debian systems provide the Apache 2.0 license in + /usr/share/common-licenses/Apache-2.0 License: MIT Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000000000000000000000000000000000000..d6d84dadb7109f321eb8a989bc53f6f4c1a45ac9 --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,14 @@ +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.1.0 --all-targets --all-features +Features: test-name=rust-wild:@ +Depends: dh-cargo (>= 18), librust-glob-0.3+default-dev, @ +Restrictions: allow-stderr, skip-not-installable + +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.1.0 --all-targets +Features: test-name=librust-wild-dev:default +Depends: dh-cargo (>= 18), librust-glob-0.3+default-dev, @ +Restrictions: allow-stderr, skip-not-installable + +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.1.0 --all-targets --no-default-features +Features: test-name=librust-wild-dev: +Depends: dh-cargo (>= 18), librust-glob-0.3+default-dev, @ +Restrictions: allow-stderr, skip-not-installable diff --git a/debian/watch b/debian/watch index d75195bd28c291ca9679638983970ad1ab45286f..ae23bf0cd9c04166b8143e7659909f34837603e8 100644 --- a/debian/watch +++ b/debian/watch @@ -2,4 +2,3 @@ version=4 opts=filenamemangle=s/.*\/(.*)\/download/wild-$1\.tar\.gz/g,\ uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/ \ https://qa.debian.org/cgi-bin/fakeupstream.cgi?upstream=crates.io/wild .*/crates/wild/@ANY_VERSION@/download - diff --git a/src/argsiter.rs b/src/argsiter.rs index bbe17ed5b87a4aee6b3f2f55e9d36c5b5dd163c0..e9539335c7061112f0ee8cfb379acc63cfca08d0 100644 --- a/src/argsiter.rs +++ b/src/argsiter.rs @@ -1,6 +1,5 @@ -use globiter::*; +use crate::globiter::GlobArgs; use std::ffi::OsString; -use glob; use std::fmt; /// Windows replacement for `std::env::ArgsOs` @@ -75,13 +74,13 @@ impl Iterator for ArgsOs { } impl fmt::Debug for Args { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.iter.fmt(f) } } impl fmt::Debug for ArgsOs { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.args.as_ref().map(|args| args.fmt(f)) .unwrap_or_else(|| "".fmt(f)) } diff --git a/src/globiter.rs b/src/globiter.rs index 037cb29cdf70978e009af72644ebd413a6e1587e..8a0f055137c1ec0a1a342c3f494c8bac103e2374 100644 --- a/src/globiter.rs +++ b/src/globiter.rs @@ -1,5 +1,6 @@ +use crate::parser::CommandLineWParser; +use crate::parser::CharCode; use std::ffi::OsString; -use parser; use std::fmt; pub(crate) struct ArgOs { @@ -10,13 +11,13 @@ pub(crate) struct ArgOs { /// Iterator retuning glob-escaped arguments. Call `args()` to obtain it. #[must_use] -pub(crate) struct GlobArgs<'a> { - line: &'a [u16], +pub(crate) struct GlobArgs<'argsline> { + parser: CommandLineWParser<'argsline>, } impl<'a> fmt::Debug for GlobArgs<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - String::from_utf16_lossy(self.line).fmt(f) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.parser.fmt(f) } } @@ -37,37 +38,53 @@ impl LossyOsStringExt for OsString {} impl<'a> Iterator for GlobArgs<'a> { type Item = ArgOs; fn next(&mut self) -> Option<Self::Item> { - let state = (vec![], vec![], false); - let (state, rest) = parser::next_arg(self.line, state, |&mut (ref mut arg, ref mut text, ref mut contains_glob), c, quoted| { + let mut pattern = vec![]; + let mut text = vec![]; + let mut contains_glob = false; + let has_arg = self.parser.accumulate_next(|c| { + let (quoted, c) = match c { + CharCode::Quoted(c) => (true, c), + CharCode::Unquoted(c) => (false, c), + }; text.push(c); - match c as u8 { - b'?' | b'*' | b'[' | b']' if c < 256 => { + const Q: u16 = b'?' as u16; + const A: u16 = b'*' as u16; + const L: u16 = b'[' as u16; + const R: u16 = b']' as u16; + match c { + Q | A | L | R => { if quoted { - arg.push(u16::from(b'[')); - arg.push(c); - arg.push(u16::from(b']')); + pattern.extend([ + u16::from(b'['), + c, + u16::from(b']'), + ].iter().copied()); } else { - arg.push(c); - *contains_glob = true; + pattern.push(c); + contains_glob = true; } }, - _ => arg.push(c), + _ => pattern.push(c), }; }); - self.line = rest; - state.map(|(pattern, text, contains_glob)| ArgOs { - pattern: OsString::from_wide(&pattern), - text: OsString::from_wide(&text), - contains_glob, - }) + if has_arg { + Some(ArgOs { + pattern: OsString::from_wide(&pattern), + text: OsString::from_wide(&text), + contains_glob, + }) + } else { + None + } } } -impl<'a> GlobArgs<'a> { +impl<'argsline> GlobArgs<'argsline> { /// UTF-16/UCS2 string from `GetCommandLineW` #[allow(dead_code)] - pub(crate) fn new(line: &'a [u16]) -> Self { - Self { line } + pub(crate) fn new(command_line_args_ucs2: &'argsline [u16]) -> Self { + Self { + parser: CommandLineWParser::new(command_line_args_ucs2), + } } } - diff --git a/src/lib.rs b/src/lib.rs index ffd980a441da83cc4980b6fd37c485ee8ebe6406..7d6be438a01d3eb595b6e57256d1f91a54a35a0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg_attr(docsrs, feature(doc_cfg))] //! Emulates glob (wildcard) argument expansion on Windows. No-op on other platforms. //! //! Unix shells expand command-line arguments like `a*`, `file.???` and pass them expanded to applications. @@ -14,29 +15,28 @@ //! //! Use `wild::args()` instead of `std::env::args()` (or `wild::args_os()` instead of `std::env::args_os()`). //! -//! If you use [Clap](https://crates.rs/crates/clap), use `.get_matches_from(wild::args())` instead of `.get_matches()`. +//! If you use [Clap](https://lib.rs/crates/clap), use `.get_matches_from(wild::args())` instead of `.get_matches()`. -#[cfg(any(test,windows))] -extern crate glob; +/// An optional, experimental low-level interface for parsing command-line strings from other sources. In most cases [`args`] and [`args_os`] are more appropriate. +#[cfg(any(test, windows))] +pub mod parser; -#[cfg(any(test,windows))] -mod parser; +#[cfg(any(test, windows))] +mod globiter; -#[cfg(any(test,windows))] +#[cfg(any(test, windows))] mod argsiter; #[cfg(windows)] -pub use argsiter::*; - -#[cfg(any(test,windows))] -mod globiter; +pub use crate::argsiter::*; -/// Iterator of arguments. Equivalent to `std::env::Args`. See `args()` for details. +/// Iterator of arguments. Equivalent to `std::env::Args`. See [`args`] for details. /// /// On unix it's an alias for `std::env::Args`. /// On Windows it's a custom iterator that implements glog expansion. #[cfg(not(windows))] pub type Args = std::env::Args; +/// Same as [`Args`], but keeps invalid Unicode intact. #[cfg(not(windows))] pub type ArgsOs = std::env::ArgsOs; @@ -50,11 +50,13 @@ pub type ArgsOs = std::env::ArgsOs; /// the file system as it parses. This allows reading potentially huge lists of /// filenames, but it's not an atomic snapshot (use `.collect()` if you need that). #[cfg(not(windows))] +#[must_use] pub fn args() -> Args { std::env::args() } #[cfg(not(windows))] +#[must_use] pub fn args_os() -> ArgsOs { std::env::args_os() } @@ -69,14 +71,14 @@ pub fn args_os() -> ArgsOs { /// On non-Windows platforms it returns `env::args()` as-is, /// assuming expansion has already been done by the shell. #[cfg(windows)] +#[must_use] pub fn args() -> Args { - Args { - iter: args_os(), - } + Args { iter: args_os() } } -/// Same as `args()`, but returns `OsString` +/// Same as [`args`], but returns `OsString` #[cfg(windows)] +#[must_use] pub fn args_os() -> ArgsOs { ArgsOs { args: globs(), @@ -107,7 +109,7 @@ fn raw_command_line() -> Option<&'static [u16]> { return None; } let mut len = 0; - while *line_ptr.offset(len as isize) != 0 { + while *line_ptr.add(len) != 0 { len += 1; } Some(std::slice::from_raw_parts(line_ptr, len)) diff --git a/src/parser.rs b/src/parser.rs index fb82cd7ac4caeba98be0bf41ea745079aeed7b91..aa41c266e73771c5778d0b6ddb2a859b7f16a39a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,3 +1,27 @@ +use std::fmt; + +/// An experimental, low-level access to each individual character of raw arguments. +#[must_use] +pub struct CommandLineWParser<'argsline> { + line: &'argsline [u16], +} + +impl<'argsline> CommandLineWParser<'argsline> { + #[inline] + #[must_use] + pub fn new(command_line_args_ucs2: &'argsline [u16]) -> Self { + Self { + line: command_line_args_ucs2, + } + } +} + +impl<'a> fmt::Debug for CommandLineWParser<'a> { + #[cold] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + String::from_utf16_lossy(self.line).fmt(f) + } +} #[derive(Debug)] enum State { @@ -7,101 +31,117 @@ enum State { Backslashes(usize, bool), } +/// A single code unit, which may be UCS-2 or half-broken UTF-16. Not a character. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CharCode { + /// This code unit was found inside quotes (it's just text) + Quoted(u16), + /// This code unit was found outside quotes (you could interpret it as a glob) + Unquoted(u16), +} + /// Given UCS2/potentially-broken-UTF-16 string parses one argument, following /// the absolutely bizarre quoting rules of `CommandLineToArgvW`, and returns -/// parsed argument as well as a slice of the remaining arguments. +/// a bool indicating whether there's anything moe left. /// -/// Calling this repeatedly until rest is empty will parse all arguments. +/// Calling this repeatedly until it returns false will parse all arguments. /// -/// `arg` is an empty pre-allocated argument to be returned, and the callback adds a new code unit to it. -/// The last callback argument is whether the unit was quoted or not. +/// The callback is expected to accumulate code units itself. /// /// This parses u16 code units, rather than code points. /// This allows supporting unpaired surrogates and ensures they won't "eat" any control characters. -pub fn next_arg<AddC, ArgVec>(line: &[u16], mut arg: ArgVec, push: AddC) -> (Option<ArgVec>, &[u16]) -where - AddC: Fn(&mut ArgVec, u16, bool), -{ - use self::State::*; - let mut state = BetweenArgs; - for (i, &cu) in line.iter().enumerate() { - state = match state { - BetweenArgs => match cu { - c if c == u16::from(b' ') => BetweenArgs, - c if c == u16::from(b'"') => InArg(true), - c if c == u16::from(b'\\') => Backslashes(1, false), - c => { - push(&mut arg, c, false); - InArg(false) - }, - }, - InArg(quoted) => match cu { - c if c == u16::from(b'\\') => Backslashes(1, quoted), - c if quoted && c == u16::from(b'"') => OnQuote, - c if !quoted && c == u16::from(b'"') => InArg(true), - c if !quoted && c == u16::from(b' ') => { - return (Some(arg), &line[i+1..]); +impl<'argsline> CommandLineWParser<'argsline> { + pub fn accumulate_next<CharacterAccumulator>(&mut self, mut push: CharacterAccumulator) -> bool + where CharacterAccumulator: FnMut(CharCode) + { + use self::State::*; + let mut state = BetweenArgs; + for (i, &cu) in self.line.iter().enumerate() { + state = match state { + BetweenArgs => match cu { + c if c == u16::from(b' ') => BetweenArgs, + c if c == u16::from(b'"') => InArg(true), + c if c == u16::from(b'\\') => Backslashes(1, false), + c => { + push(CharCode::Unquoted(c)); + InArg(false) + }, }, - c => { - push(&mut arg, c, quoted); - InArg(quoted) - }, - }, - OnQuote => match cu { - c if c == u16::from(b'"') => { - // In quoted arg "" means literal quote and the end of the quoted string (but not arg) - push(&mut arg, u16::from(b'"'), true); - InArg(false) - }, - c if c == u16::from(b' ') => { - return (Some(arg), &line[i+1..]); + InArg(quoted) => match cu { + c if c == u16::from(b'\\') => Backslashes(1, quoted), + c if quoted && c == u16::from(b'"') => OnQuote, + c if !quoted && c == u16::from(b'"') => InArg(true), + c if !quoted && c == u16::from(b' ') => { + self.line = &self.line[i+1..]; + return true; + }, + c => { + push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); + InArg(quoted) + }, }, - c => { - push(&mut arg, c, false); - InArg(false) + OnQuote => match cu { + c if c == u16::from(b'"') => { + // In quoted arg "" means literal quote and the end of the quoted string (but not arg) + push(CharCode::Quoted(u16::from(b'"'))); + InArg(false) + }, + c if c == u16::from(b' ') => { + self.line = &self.line[i+1..]; + return true; + }, + c => { + push(CharCode::Unquoted(c)); + InArg(false) + }, }, - }, - Backslashes(count, quoted) => match cu { - c if c == u16::from(b'\\') => Backslashes(count + 1, quoted), - c if c == u16::from(b'"') => { - // backslashes followed by a quotation mark are treated as pairs of protected backslashes - for _ in 0..count/2 { - push(&mut arg, u16::from(b'\\'), quoted); - } + Backslashes(count, quoted) => match cu { + c if c == u16::from(b'\\') => Backslashes(count + 1, quoted), + c if c == u16::from(b'"') => { + // backslashes followed by a quotation mark are treated as pairs of protected backslashes + let b = if quoted { CharCode::Quoted(u16::from(b'\\')) } else { CharCode::Unquoted(u16::from(b'\\')) }; + for _ in 0..count/2 { + push(b); + } - if count & 1 != 0 { - // An odd number of backslashes is treated as followed by a protected quotation mark. - push(&mut arg, u16::from(b'"'), quoted); - InArg(quoted) - } else if quoted { - // An even number of backslashes is treated as followed by a word terminator. - return (Some(arg), &line[i+1..]); - } else { + if count & 1 != 0 { + // An odd number of backslashes is treated as followed by a protected quotation mark. + let c = u16::from(b'"'); + push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); + InArg(quoted) + } else if quoted { + // An even number of backslashes is treated as followed by a word terminator. + self.line = &self.line[i+1..]; + return true; + } else { + InArg(quoted) + } + }, + c => { + // A string of backslashes not followed by a quotation mark has no special meaning. + let b = if quoted { CharCode::Quoted(u16::from(b'\\')) } else { CharCode::Unquoted(u16::from(b'\\')) }; + for _ in 0..count { + push(b); + } + push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); InArg(quoted) - } + }, }, - c => { - // A string of backslashes not followed by a quotation mark has no special meaning. - for _ in 0..count { - push(&mut arg, u16::from(b'\\'), quoted); - } - push(&mut arg, c, quoted); - InArg(quoted) - }, - }, + } } + let arg = match state { + BetweenArgs => false, + OnQuote | InArg(..) => true, + Backslashes(count, quoted) => { + // A string of backslashes not followed by a quotation mark has no special meaning. + let b = if quoted { CharCode::Quoted(u16::from(b'\\')) } else { CharCode::Unquoted(u16::from(b'\\')) }; + for _ in 0..count { + push(b); + } + true + }, + }; + self.line = &self.line[..0]; + arg } - let arg = match state { - BetweenArgs => None, - OnQuote | InArg(..) => Some(arg), - Backslashes(count, quoted) => { - // A string of backslashes not followed by a quotation mark has no special meaning. - for _ in 0..count { - push(&mut arg, u16::from(b'\\'), quoted); - } - - Some(arg) - }, - }; - (arg, &line[..0]) }