diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 9448ae4f6e9984b06e4c5a1616ea6f28713d27c1..945620e4dbac8d7052b8b930b82969cab9f26b92 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "38b6cd950ff359724f4f36685bb4b5465cda06b4" + "sha1": "fb5b2a9facb12da0413d0dcf819b73bce1ad0c01" }, "path_in_vcs": "" } \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 6a920c9e786b68753a56440a12b648116b42d4d8..bdb7e495e82c682bea7f26d87b567b4f9fc68b6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,9 +10,9 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" name = "wild" -version = "2.1.0" +version = "2.2.1" authors = ["Kornel <kornel@geekhood.net>"] include = [ "/src/*.rs", @@ -39,18 +39,22 @@ license = "Apache-2.0 OR MIT" repository = "https://gitlab.com/kornelski/wild" [package.metadata.docs.rs] -targets = ["x86_64-unknown-linux-gnu"] all-features = true rustdoc-args = [ "--cfg", "docsrs", + "--generate-link-to-definition", ] +targets = ["x86_64-unknown-linux-gnu"] [dev-dependencies.glob] -version = "0.3" +version = "0.3.1" + +[features] +glob-quoted-on-windows = [] [target."cfg(windows)".dependencies.glob] -version = "0.3" +version = "0.3.1" [badges.appveyor] repository = "pornel/wild" diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 3915a4140f209a614a03865d840482c3624a42bd..8e91fd212df0615cbe601e3321b0aebf05cdbc59 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -9,8 +9,8 @@ license = "Apache-2.0 OR MIT" name = "wild" readme = "README.md" repository = "https://gitlab.com/kornelski/wild" -version = "2.1.0" -edition = "2018" +version = "2.2.1" +edition = "2021" include = ["/src/*.rs", "/Cargo.toml", "/LICENSE", "/README.md"] [badges] @@ -19,12 +19,24 @@ appveyor = { repository = "pornel/wild" } maintenance = { status = "passively-maintained" } [target.'cfg(windows)'.dependencies] -glob = "0.3" +glob = "0.3.1" [dev-dependencies] -glob = "0.3" +glob = "0.3.1" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] all-features = true -rustdoc-args = ["--cfg", "docsrs"] +rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"] + +[features] +# Give up on trying to accurately emulate Unix-like argument quoting semantics, +# and always interpret `*` (etc.) as file path patterns, even in quoted strings. +# +# This affects only Windows. +# +# This creates possibility of non-file arguments that contain `*` to be interpreted as file paths. +# OTOH it makes it easier for users to use globs on paths with spaces, and to call executables via tools/APIs that always quote args. +# +# Don't enable this feature from libraries. This decision should be left to binaries. +glob-quoted-on-windows = [] diff --git a/debian/cargo-checksum.json b/debian/cargo-checksum.json index 60d032f7651ebab3db6ee8f0b6b92dbfc75872af..e2d9ce2b354f3b9b2fc315f65d54e50a76fb001a 100644 --- a/debian/cargo-checksum.json +++ b/debian/cargo-checksum.json @@ -1 +1 @@ -{"package":"05b116685a6be0c52f5a103334cbff26db643826c7b3735fc0a3ba9871310a74","files":{}} +{"package":"a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1","files":{}} diff --git a/debian/changelog b/debian/changelog index c78c92f9746ffe9906a079e4b5ced05d276d53d5..85862d9bf939b0dd1db1fed658f57db42ba7ff04 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +rust-wild (2.2.1-1) unstable; urgency=medium + + * Team upload. + * Package wild 2.2.1 from crates.io using debcargo 2.7.6 + + -- Blair Noctis <ncts@debian.org> Sat, 11 Jan 2025 19:39:27 +0000 + rust-wild (2.1.0-1) unstable; urgency=medium * Team upload. diff --git a/debian/compat b/debian/compat deleted file mode 100644 index 48082f72f087ce7e6fa75b9c41d7387daecd447b..0000000000000000000000000000000000000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -12 diff --git a/debian/control b/debian/control index fedc36f8396c252f40501e409b4b1bb2e8f26a34..8e612f903cd1ea2f2853301d0372224a773ce8e1 100644 --- a/debian/control +++ b/debian/control @@ -1,19 +1,20 @@ Source: rust-wild Section: rust Priority: optional -Build-Depends: debhelper (>= 12), - dh-cargo (>= 25), +Build-Depends: debhelper-compat (= 13), + dh-sequence-cargo, cargo:native <!nocheck>, rustc:native <!nocheck>, libstd-rust-dev <!nocheck>, - librust-glob-0.3+default-dev <!nocheck> + librust-glob-0.3+default-dev (>= 0.3.1-~~) <!nocheck> Maintainer: Debian Rust Maintainers <pkg-rust-maintainers@alioth-lists.debian.net> Uploaders: Helen Koike <helen@koikeco.de> -Standards-Version: 4.5.1 +Standards-Version: 4.7.0 Vcs-Git: https://salsa.debian.org/rust-team/debcargo-conf.git [src/wild] Vcs-Browser: https://salsa.debian.org/rust-team/debcargo-conf/tree/master/src/wild Homepage: https://lib.rs/crates/wild +X-Cargo-Crate: wild Rules-Requires-Root: no Package: librust-wild-dev @@ -21,15 +22,18 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - librust-glob-0.3+default-dev + librust-glob-0.3+default-dev (>= 0.3.1-~~) Provides: librust-wild+default-dev (= ${binary:Version}), + librust-wild+glob-quoted-on-windows-dev (= ${binary:Version}), librust-wild-2-dev (= ${binary:Version}), librust-wild-2+default-dev (= ${binary:Version}), - librust-wild-2.1-dev (= ${binary:Version}), - librust-wild-2.1+default-dev (= ${binary:Version}), - librust-wild-2.1.0-dev (= ${binary:Version}), - librust-wild-2.1.0+default-dev (= ${binary:Version}) + librust-wild-2+glob-quoted-on-windows-dev (= ${binary:Version}), + librust-wild-2.2-dev (= ${binary:Version}), + librust-wild-2.2+default-dev (= ${binary:Version}), + librust-wild-2.2+glob-quoted-on-windows-dev (= ${binary:Version}), + librust-wild-2.2.1-dev (= ${binary:Version}), + librust-wild-2.2.1+default-dev (= ${binary:Version}), + librust-wild-2.2.1+glob-quoted-on-windows-dev (= ${binary:Version}) Description: Glob (wildcard) expanded command-line arguments on Windows - Rust source code - This package contains the source for the Rust wild crate, packaged by debcargo - for use with cargo and dh-cargo. + Source code for Debianized Rust crate "wild" diff --git a/debian/copyright.debcargo.hint b/debian/copyright.debcargo.hint index 16392124e57d99ad11f0f4b85dd3cc9c10ffd35c..bfb03a6874586d32ce2c1e852bd97cea550415e1 100644 --- a/debian/copyright.debcargo.hint +++ b/debian/copyright.debcargo.hint @@ -12,7 +12,7 @@ Comment: be correct information so you should review and fix this before uploading to the archive. -Files: ./LICENSE +Files: LICENSE Copyright: 2018 Kornel LesiÅ„ski License: UNKNOWN-LICENSE; FIXME (overlay) Comment: @@ -21,8 +21,8 @@ Comment: Files: debian/* Copyright: - 2019-2022 Debian Rust Maintainers <pkg-rust-maintainers@alioth-lists.debian.net> - 2019-2022 Helen Koike <helen@koikeco.de> + 2019-2025 Debian Rust Maintainers <pkg-rust-maintainers@alioth-lists.debian.net> + 2019-2025 Helen Koike <helen@koikeco.de> License: Apache-2.0 or MIT License: Apache-2.0 diff --git a/debian/debcargo.toml b/debian/debcargo.toml index 642220996c8041ced336e8199b5415a042bab062..e4f3f4911c05f43320094f315191c8e202ca0258 100644 --- a/debian/debcargo.toml +++ b/debian/debcargo.toml @@ -1,2 +1,4 @@ overlay = "." uploaders = ["Helen Koike <helen@koikeco.de>"] + +collapse_features = true \ No newline at end of file diff --git a/debian/tests/control b/debian/tests/control index d6d84dadb7109f321eb8a989bc53f6f4c1a45ac9..3d62825fc7d0a714f7312254ee0f08aa95b2d024 100644 --- a/debian/tests/control +++ b/debian/tests/control @@ -1,14 +1,19 @@ -Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.1.0 --all-targets --all-features +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.2.1 --all-targets --all-features Features: test-name=rust-wild:@ -Depends: dh-cargo (>= 18), librust-glob-0.3+default-dev, @ +Depends: dh-cargo (>= 31), rustc, librust-glob-0.3+default-dev (>= 0.3.1-~~), @ Restrictions: allow-stderr, skip-not-installable -Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.1.0 --all-targets +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.2.1 --all-targets Features: test-name=librust-wild-dev:default -Depends: dh-cargo (>= 18), librust-glob-0.3+default-dev, @ +Depends: dh-cargo (>= 31), rustc, librust-glob-0.3+default-dev (>= 0.3.1-~~), @ Restrictions: allow-stderr, skip-not-installable -Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.1.0 --all-targets --no-default-features +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.2.1 --all-targets --no-default-features --features glob-quoted-on-windows +Features: test-name=librust-wild-dev:glob-quoted-on-windows +Depends: dh-cargo (>= 31), rustc, librust-glob-0.3+default-dev (>= 0.3.1-~~), @ +Restrictions: allow-stderr, skip-not-installable + +Test-Command: /usr/share/cargo/bin/cargo-auto-test wild 2.2.1 --all-targets --no-default-features Features: test-name=librust-wild-dev: -Depends: dh-cargo (>= 18), librust-glob-0.3+default-dev, @ +Depends: dh-cargo (>= 31), rustc, librust-glob-0.3+default-dev (>= 0.3.1-~~), @ Restrictions: allow-stderr, skip-not-installable diff --git a/debian/watch b/debian/watch index ae23bf0cd9c04166b8143e7659909f34837603e8..ae37069920bc0ffb22af3d786cf7f6dc0368ee8f 100644 --- a/debian/watch +++ b/debian/watch @@ -1,4 +1,4 @@ version=4 opts=filenamemangle=s/.*\/(.*)\/download/wild-$1\.tar\.gz/g,\ -uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/ \ +uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\.?\d*)$/$1~$2/ \ https://qa.debian.org/cgi-bin/fakeupstream.cgi?upstream=crates.io/wild .*/crates/wild/@ANY_VERSION@/download diff --git a/src/argsiter.rs b/src/argsiter.rs index e9539335c7061112f0ee8cfb379acc63cfca08d0..17ea59e24f29f17a697fa93ed2b7261bb3900161 100644 --- a/src/argsiter.rs +++ b/src/argsiter.rs @@ -5,10 +5,21 @@ use std::fmt; /// Windows replacement for `std::env::ArgsOs` #[cfg_attr(test, allow(dead_code))] pub struct ArgsOs { - pub(crate) args: Option<GlobArgs<'static>>, + pub(crate) args: GlobArgs<'static>, pub(crate) current_arg_globs: Option<glob::Paths>, } +impl ArgsOs { + /// Expects result of `GetCommandLineW` + #[inline] + pub(crate) fn from_raw_command_line(cmd: &'static [u16]) -> Self { + Self { + args: GlobArgs::new(cmd), + current_arg_globs: None, + } + } +} + /// Windows replacement for `std::env::Args` pub struct Args { pub(crate) iter: ArgsOs, @@ -36,53 +47,41 @@ impl Iterator for ArgsOs { type Item = OsString; fn next(&mut self) -> Option<Self::Item> { - let glob_options = glob::MatchOptions { case_sensitive: false, ..Default::default() }; - match self.current_arg_globs.as_mut().and_then(first_non_error) { - Some(path) => Some(path.into_os_string()), - None => match self.args { - Some(ref mut args) => match args.next() { - // lossy: https://github.com/rust-lang-nursery/glob/issues/23 - Some(arg) => if arg.contains_glob { - match glob::glob_with(&arg.pattern.to_string_lossy(), glob_options) { - Ok(mut glob_iter) => { - let first_glob = first_non_error(&mut glob_iter); - self.current_arg_globs = Some(glob_iter); - match first_glob { - Some(path) => Some(path.into_os_string()), - None => { - // non-matching patterns are passed as regular strings - self.current_arg_globs = None; - Some(arg.text) - }, - } - } - Err(_) => { - // Invalid patterns are passed as regular strings - Some(arg.text) - }, - } - } else { - // valid, but non-wildcard args passed as is, in order to avoid normalizing slashes - Some(arg.text) - }, - None => None, // end of args + if let Some(path) = self.current_arg_globs.as_mut().and_then(first_non_error) { + return Some(path.into_os_string()); + } + let arg = self.args.next()?; // if None — end of args + let glob_opts = glob::MatchOptions { case_sensitive: false, ..Default::default() }; + if let Some(Ok(mut glob_iter)) = arg.pattern.as_ref().map(move |pat| glob::glob_with(pat, glob_opts)) { + let first_glob = first_non_error(&mut glob_iter); + self.current_arg_globs = Some(glob_iter); + match first_glob { + Some(path) => Some(path.into_os_string()), + None => { + // non-matching patterns are passed as regular strings + self.current_arg_globs = None; + Some(arg.text) }, - None => None, // error: no args available at all - }, + } + // Invalid patterns are passed as regular strings + } else { + // valid, but non-wildcard args passed as is, in order to avoid normalizing slashes + Some(arg.text) } } } impl fmt::Debug for Args { + #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.iter.fmt(f) } } impl fmt::Debug for ArgsOs { + #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.args.as_ref().map(|args| args.fmt(f)) - .unwrap_or_else(|| "".fmt(f)) + self.args.fmt(f) } } @@ -90,13 +89,8 @@ impl fmt::Debug for ArgsOs { #[test] fn finds_cargo_toml() { let cmd = "foo.exe _not_?a?_[f]ilename_ \"_not_?a?_[p]attern_\" Cargo.tom?".chars().map(|c| c as u16).collect::<Vec<_>>(); - let args = GlobArgs::new(Box::leak(cmd.into_boxed_slice())); - let iter = Args { - iter: ArgsOs { - args: Some(args), - current_arg_globs: None, - }, - }; + let args = ArgsOs::from_raw_command_line(Box::leak(cmd.into_boxed_slice())); + let iter = Args { iter: args }; assert_eq!("\"foo.exe _not_?a?_[f]ilename_ \\\"_not_?a?_[p]attern_\\\" Cargo.tom?\"", format!("{:?}", iter)); let args: Vec<_> = iter.collect(); assert_eq!(4, args.len()); @@ -109,13 +103,8 @@ fn finds_cargo_toml() { #[test] fn unquoted_slashes_unchanged() { let cmd = r#"foo.exe //// .. ./ \\\\"#.chars().map(|c| c as u16).collect::<Vec<_>>(); - let args = GlobArgs::new(Box::leak(cmd.into_boxed_slice())); - let iter = Args { - iter: ArgsOs { - args: Some(args), - current_arg_globs: None, - }, - }; + let args = ArgsOs::from_raw_command_line(Box::leak(cmd.into_boxed_slice())); + let iter = Args { iter: args }; let args: Vec<_> = iter.collect(); assert_eq!(5, args.len()); assert_eq!("foo.exe", &args[0]); @@ -128,11 +117,7 @@ fn unquoted_slashes_unchanged() { #[test] fn finds_readme_case_insensitive() { let cmd = "foo.exe _not_?a?_[f]ilename_ \"_not_?a?_[p]attern_\" read*.MD".chars().map(|c| c as u16).collect::<Vec<_>>(); - let args = GlobArgs::new(Box::leak(cmd.into_boxed_slice())); - let iter = ArgsOs { - args: Some(args), - current_arg_globs: None, - }; + let iter = ArgsOs::from_raw_command_line(Box::leak(cmd.into_boxed_slice())); let args: Vec<_> = iter.map(|c| c.to_string_lossy().to_string()).collect(); assert_eq!(4, args.len()); assert_eq!("foo.exe", &args[0]); diff --git a/src/globiter.rs b/src/globiter.rs index 8a0f055137c1ec0a1a342c3f494c8bac103e2374..b784b74d673762b3fa4e9c99e2186de7f1557a06 100644 --- a/src/globiter.rs +++ b/src/globiter.rs @@ -4,9 +4,11 @@ use std::ffi::OsString; use std::fmt; pub(crate) struct ArgOs { - pub pattern: OsString, + /// `Some` if contains a glob + /// + /// Pattern is a string, because https://github.com/rust-lang-nursery/glob/issues/23 + pub pattern: Option<String>, pub text: OsString, - pub contains_glob: bool, } /// Iterator retuning glob-escaped arguments. Call `args()` to obtain it. @@ -16,6 +18,7 @@ pub(crate) struct GlobArgs<'argsline> { } impl<'a> fmt::Debug for GlobArgs<'a> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.parser.fmt(f) } @@ -38,15 +41,14 @@ impl LossyOsStringExt for OsString {} impl<'a> Iterator for GlobArgs<'a> { type Item = ArgOs; fn next(&mut self) -> Option<Self::Item> { - let mut pattern = vec![]; + let mut pattern: Option<Vec<u16>> = None; let mut text = vec![]; - let mut contains_glob = false; + let everything_as_unquoted = cfg!(feature = "glob-quoted-on-windows"); let has_arg = self.parser.accumulate_next(|c| { let (quoted, c) = match c { - CharCode::Quoted(c) => (true, c), + CharCode::Quoted(c) => (!everything_as_unquoted, c), CharCode::Unquoted(c) => (false, c), }; - text.push(c); const Q: u16 = b'?' as u16; const A: u16 = b'*' as u16; const L: u16 = b'[' as u16; @@ -54,24 +56,34 @@ impl<'a> Iterator for GlobArgs<'a> { match c { Q | A | L | R => { if quoted { - pattern.extend([ - u16::from(b'['), - c, - u16::from(b']'), - ].iter().copied()); + if let Some(pattern) = &mut pattern { + pattern.extend([L, c, R]); + } } else { - pattern.push(c); - contains_glob = true; + let p = pattern.get_or_insert_with(|| { + text.iter().flat_map(|&c| match c { + // type inference picks a slice here, sometimes! + Q | A | L | R => <[u16; 3] as IntoIterator>::into_iter([L, c, R]).take(3), + _ => <[u16; 3] as IntoIterator>::into_iter([c, 0, 0]).take(1), + }).collect() + }); + p.push(c); } }, - _ => pattern.push(c), + _ => if let Some(p) = &mut pattern { + p.push(c) + }, }; + text.push(c); }); if has_arg { Some(ArgOs { - pattern: OsString::from_wide(&pattern), + pattern: pattern.map(|pattern| { + char::decode_utf16(pattern) + .map(|r| r.unwrap_or('?')) + .collect::<String>() + }), text: OsString::from_wide(&text), - contains_glob, }) } else { None @@ -82,6 +94,7 @@ impl<'a> Iterator for GlobArgs<'a> { impl<'argsline> GlobArgs<'argsline> { /// UTF-16/UCS2 string from `GetCommandLineW` #[allow(dead_code)] + #[inline] pub(crate) fn new(command_line_args_ucs2: &'argsline [u16]) -> Self { Self { parser: CommandLineWParser::new(command_line_args_ucs2), diff --git a/src/lib.rs b/src/lib.rs index 7d6be438a01d3eb595b6e57256d1f91a54a35a0a..a330f8280dc493ce1d02d68fffc3e222b1cc1e5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ //! //! Use `wild::args()` instead of `std::env::args()` (or `wild::args_os()` instead of `std::env::args_os()`). //! -//! If you use [Clap](https://lib.rs/crates/clap), use `.get_matches_from(wild::args())` instead of `.get_matches()`. +//! If you use [Clap](https://lib.rs/crates/clap), use `.get_matches_from(wild::args_os())` instead of `.get_matches()`. /// An optional, experimental low-level interface for parsing command-line strings from other sources. In most cases [`args`] and [`args_os`] are more appropriate. #[cfg(any(test, windows))] @@ -80,20 +80,7 @@ pub fn args() -> Args { #[cfg(windows)] #[must_use] pub fn args_os() -> ArgsOs { - ArgsOs { - args: globs(), - current_arg_globs: None, - } -} - -/// Parses `GetCommandLineW` the same way as `CommandLineToArgvW`, -/// but escapes quoted glob metacharacters `*`, `?`, `[`, `]` using `[*]` syntax. -/// -/// Windows-only, unstable. -#[cfg(windows)] -#[inline] -fn globs() -> Option<globiter::GlobArgs<'static>> { - raw_command_line().map(|cmd| globiter::GlobArgs::new(cmd)) + ArgsOs::from_raw_command_line(raw_command_line()) } #[cfg(windows)] @@ -102,17 +89,17 @@ extern "system" { } #[cfg(windows)] -fn raw_command_line() -> Option<&'static [u16]> { +fn raw_command_line() -> &'static [u16] { unsafe { let line_ptr = GetCommandLineW(); if line_ptr.is_null() { - return None; + return &[]; } let mut len = 0; while *line_ptr.add(len) != 0 { len += 1; } - Some(std::slice::from_raw_parts(line_ptr, len)) + std::slice::from_raw_parts(line_ptr, len) } } @@ -120,7 +107,7 @@ fn raw_command_line() -> Option<&'static [u16]> { fn parsed(s: &str) -> String { let t: Vec<_> = s.encode_utf16().collect(); let args: Vec<_> = globiter::GlobArgs::new(&t) - .map(|s| s.pattern.to_string_lossy().to_string()) + .map(|s| s.pattern.map(|p| format!("<glob {p}>")).unwrap_or(s.text.to_string_lossy().into_owned())) .collect(); args.join(";") } @@ -135,9 +122,8 @@ fn unquoted(s: &str) -> String { } #[test] -#[cfg(windows)] fn test_actual_args() { - assert!(globs().expect("args found").count() >= 1); + assert!(args_os().count() >= 1); } #[test] @@ -146,20 +132,17 @@ fn test_parse_1() { assert_eq!(r#"æ¼¢å—"#, parsed("\"æ¼¢å—\"")); assert_eq!(r#"æ¼¢\å—"#, parsed("\"æ¼¢\\å—\"")); assert_eq!(r#"unquoted"#, parsed("unquoted")); - assert_eq!(r#"*"#, parsed("*")); - assert_eq!(r#"?"#, parsed("?")); + assert_eq!(r#"<glob *>"#, parsed("*")); + assert_eq!(r#"<glob ?>"#, parsed("?")); assert_eq!(r#"quoted"#, parsed("\"quoted\"")); assert_eq!(r#"quoted"#, unquoted("\"quoted\"")); - assert_eq!(r#"[*]"#, parsed("\"*\"")); assert_eq!(r#"*"#, unquoted("\"*\"")); - assert_eq!(r#"[?]"#, parsed("\"?\"")); assert_eq!(r#"?"#, unquoted("\"?\"")); - assert_eq!(r#"[]]"#, parsed("\"]\"")); assert_eq!(r#"]"#, unquoted("\"]\"")); assert_eq!(r#"quo"ted"#, parsed(r#" "quo\"ted" "#)); // backslash can escape quotes - assert_eq!(r#"quo"ted? "#, parsed(r#" "quo""ted?" "#)); // and quote can escape quotes + assert_eq!(r#"<glob quo"ted? >"#, parsed(r#" "quo""ted?" "#)); // and quote can escape quotes assert_eq!(r#"unquo"ted"#, parsed(r#" unquo\"ted "#)); // backslash can escape quotes, even outside quotes - assert_eq!(r#"unquoted?"#, parsed(r#" unquo""ted? "#)); // quote escaping does not work outside quotes + assert_eq!(r#"<glob unquoted?>"#, parsed(r#" unquo""ted? "#)); // quote escaping does not work outside quotes assert_eq!(r#"""#, parsed(r#""""""#)); // quote escapes quote in quoted string assert_eq!(r#"""#, parsed(r#"""""""#)); assert_eq!(r#""""#, parsed(r#""""""""#)); @@ -169,17 +152,20 @@ fn test_parse_1() { assert_eq!(r#"\\server\share\path with spaces"#, parsed(r#""\\server\share\path with spaces""#)); // lone double backslash is not special assert_eq!("aba", parsed(r#""a"b"a""#)); // quotes can go in and out assert_eq!("abac", parsed(r#""a"b"a"c"#)); // quotes can go in and out - assert_eq!("c*a[*]b*a[*]c*", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out assert_eq!(r#"\\"#, parsed(r#"\\\\""#)); - assert_eq!(r#"?\\?"#, parsed(r#"?\\\\"?"#)); // unpaired quote is interpreted like an end quote + assert_eq!(r#"<glob ?\\?>"#, parsed(r#"?\\\\"?"#)); // unpaired quote is interpreted like an end quote assert_eq!(r#"\""#, parsed(r#"\\\""#)); - assert_eq!(r#"\"[a-z]"#, parsed(r#"\\\"[a-z]"#)); + assert_eq!(r#"<glob \"[a-z]>"#, parsed(r#"\\\"[a-z]"#)); assert_eq!(" ", parsed(r#"" "#)); // unterminated quotes are OK assert_eq!("", parsed(r#""""#)); - assert_eq!(r#"[a-c][d-z]"#, parsed(r#"[a-c]""[d-z]"#)); - assert_eq!(r#"[[]a-c[]]"[d-z]"#, parsed(r#""[a-c]""[d-z]""#)); + assert_eq!(r#"<glob [a-c][d-z]>"#, parsed(r#"[a-c]""[d-z]"#)); assert_eq!("", parsed(r#"""#)); assert_eq!("x", parsed(r#"x""#)); + assert_eq!(r#"\;x;y"#, parsed(r"\ x y")); + assert_eq!(r#"\\;x;y"#, parsed(r"\\ x y")); + assert_eq!(r#"a\\\;x;y"#, parsed(r"a\\\ x y")); + assert_eq!(r#"<glob a\\\*>;x;y"#, parsed(r"a\\\* x y")); + assert_eq!(r#"a\\\ x;y"#, parsed(r#""a\\\ x" y"#)); assert_eq!(r#"\"#, parsed(r"\")); assert_eq!(r#"\\"#, parsed(r"\\")); assert_eq!(r#"\\\"#, parsed(r"\\\")); @@ -187,6 +173,27 @@ fn test_parse_1() { assert_eq!(r#"\\a"#, parsed(r#"\\\\"a"#)); assert_eq!(r#"\\a"#, parsed(r#"\\\\"a""#)); assert_eq!(r#"¥¥"#, parsed(r#"¥¥""#)); // in Unicode this isn't backslash + assert_eq!(r#".\path\to\folder\;-rf"#, parsed(r#".\path\to\folder\ -rf"#)); +} + +#[test] +#[cfg(not(feature = "glob-quoted-on-windows"))] +fn test_unquoted() { + assert_eq!(r#"*"#, parsed("\"*\"")); + assert_eq!(r#"?"#, parsed("\"?\"")); + assert_eq!(r#"]"#, parsed("\"]\"")); + assert_eq!("<glob c*a[*]b*a[*]c*>", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out + assert_eq!(r#"<glob [[]a-c[]]"[d-z]>"#, parsed(r#""[a-c]""[d-z]""#)); +} + +#[test] +#[cfg(feature = "glob-quoted-on-windows")] +fn test_unquoted() { + assert_eq!(r#"<glob *>"#, parsed("\"*\"")); + assert_eq!(r#"<glob ?>"#, parsed("\"?\"")); + assert_eq!(r#"<glob ]>"#, parsed("\"]\"")); + assert_eq!("<glob c*a*b*a*c*>", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out + assert_eq!(r#"<glob [a-c]"[d-z]>"#, parsed(r#""[a-c]""[d-z]""#)); } #[test] diff --git a/src/parser.rs b/src/parser.rs index aa41c266e73771c5778d0b6ddb2a859b7f16a39a..09864fdc767a9b888f8310ecc1a32d447238e731 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ use std::fmt; /// An experimental, low-level access to each individual character of raw arguments. #[must_use] pub struct CommandLineWParser<'argsline> { - line: &'argsline [u16], + line: std::slice::Iter<'argsline, u16>, } impl<'argsline> CommandLineWParser<'argsline> { @@ -11,7 +11,7 @@ impl<'argsline> CommandLineWParser<'argsline> { #[must_use] pub fn new(command_line_args_ucs2: &'argsline [u16]) -> Self { Self { - line: command_line_args_ucs2, + line: command_line_args_ucs2.iter(), } } } @@ -19,7 +19,7 @@ impl<'argsline> CommandLineWParser<'argsline> { impl<'a> fmt::Debug for CommandLineWParser<'a> { #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - String::from_utf16_lossy(self.line).fmt(f) + String::from_utf16_lossy(self.line.as_slice()).fmt(f) } } @@ -28,6 +28,7 @@ enum State { BetweenArgs, InArg(bool), OnQuote, + /// number + in quotes Backslashes(usize, bool), } @@ -40,9 +41,14 @@ pub enum CharCode { Unquoted(u16), } +const SPACE: u16 = b' ' as u16; +const TAB: u16 = b'\t' as u16; +const QUOTE: u16 = b'"' as u16; +const BACKSLASH: u16 = b'\\' as u16; + /// Given UCS2/potentially-broken-UTF-16 string parses one argument, following /// the absolutely bizarre quoting rules of `CommandLineToArgvW`, and returns -/// a bool indicating whether there's anything moe left. +/// a bool indicating whether there's anything more left. /// /// Calling this repeatedly until it returns false will parse all arguments. /// @@ -56,23 +62,22 @@ impl<'argsline> CommandLineWParser<'argsline> { { use self::State::*; let mut state = BetweenArgs; - for (i, &cu) in self.line.iter().enumerate() { + for &cu in &mut self.line { state = match state { BetweenArgs => match cu { - c if c == u16::from(b' ') => BetweenArgs, - c if c == u16::from(b'"') => InArg(true), - c if c == u16::from(b'\\') => Backslashes(1, false), + SPACE | TAB => BetweenArgs, + QUOTE => InArg(true), + BACKSLASH => Backslashes(1, false), c => { push(CharCode::Unquoted(c)); InArg(false) }, }, InArg(quoted) => match cu { - c if c == u16::from(b'\\') => Backslashes(1, quoted), - c if quoted && c == u16::from(b'"') => OnQuote, - c if !quoted && c == u16::from(b'"') => InArg(true), - c if !quoted && c == u16::from(b' ') => { - self.line = &self.line[i+1..]; + BACKSLASH => Backslashes(1, quoted), + QUOTE if quoted => OnQuote, + QUOTE if !quoted => InArg(true), + SPACE | TAB if !quoted => { return true; }, c => { @@ -81,13 +86,12 @@ impl<'argsline> CommandLineWParser<'argsline> { }, }, OnQuote => match cu { - c if c == u16::from(b'"') => { + QUOTE => { // In quoted arg "" means literal quote and the end of the quoted string (but not arg) - push(CharCode::Quoted(u16::from(b'"'))); + push(CharCode::Quoted(QUOTE)); InArg(false) }, - c if c == u16::from(b' ') => { - self.line = &self.line[i+1..]; + SPACE | TAB => { return true; }, c => { @@ -96,22 +100,20 @@ impl<'argsline> CommandLineWParser<'argsline> { }, }, Backslashes(count, quoted) => match cu { - c if c == u16::from(b'\\') => Backslashes(count + 1, quoted), - c if c == u16::from(b'"') => { + BACKSLASH => Backslashes(count + 1, quoted), + QUOTE => { // backslashes followed by a quotation mark are treated as pairs of protected backslashes - let b = if quoted { CharCode::Quoted(u16::from(b'\\')) } else { CharCode::Unquoted(u16::from(b'\\')) }; + let b = if quoted { CharCode::Quoted(BACKSLASH) } else { CharCode::Unquoted(BACKSLASH) }; for _ in 0..count/2 { push(b); } if count & 1 != 0 { // An odd number of backslashes is treated as followed by a protected quotation mark. - let c = u16::from(b'"'); - push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); + push(if quoted { CharCode::Quoted(QUOTE) } else { CharCode::Unquoted(QUOTE) }); InArg(quoted) } else if quoted { // An even number of backslashes is treated as followed by a word terminator. - self.line = &self.line[i+1..]; return true; } else { InArg(quoted) @@ -119,29 +121,32 @@ impl<'argsline> CommandLineWParser<'argsline> { }, c => { // A string of backslashes not followed by a quotation mark has no special meaning. - let b = if quoted { CharCode::Quoted(u16::from(b'\\')) } else { CharCode::Unquoted(u16::from(b'\\')) }; + let b = if quoted { CharCode::Quoted(BACKSLASH) } else { CharCode::Unquoted(BACKSLASH) }; for _ in 0..count { push(b); } - push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); - InArg(quoted) + match c { + SPACE | TAB if !quoted => return true, + c => { + push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); + InArg(quoted) + }, + } }, }, - } + }; } - let arg = match state { + match state { BetweenArgs => false, OnQuote | InArg(..) => true, Backslashes(count, quoted) => { // A string of backslashes not followed by a quotation mark has no special meaning. - let b = if quoted { CharCode::Quoted(u16::from(b'\\')) } else { CharCode::Unquoted(u16::from(b'\\')) }; + let b = if quoted { CharCode::Quoted(BACKSLASH) } else { CharCode::Unquoted(BACKSLASH) }; for _ in 0..count { push(b); } true }, - }; - self.line = &self.line[..0]; - arg + } } }