use std::borrow::Cow;
use std::error;
use std::fmt;
use std::mem;
use std::result;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ParseError;
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use std::error::Error;
write!(f, "{}", self.description())
}
}
impl error::Error for ParseError {
fn description(&self) -> &str {
"missing closing quote"
}
}
enum State {
Delimiter,
Backslash,
Unquoted,
UnquotedBackslash,
SingleQuoted,
DoubleQuoted,
DoubleQuotedBackslash,
Comment,
}
pub fn split(s: &str) -> result::Result<Vec<String>, ParseError> {
use State::*;
let mut words = Vec::new();
let mut word = String::new();
let mut chars = s.chars();
let mut state = Delimiter;
loop {
let c = chars.next();
state = match state {
Delimiter => match c {
None => break,
Some('\'') => SingleQuoted,
Some('\"') => DoubleQuoted,
Some('\\') => Backslash,
Some('\t') | Some(' ') | Some('\n') => Delimiter,
Some('#') => Comment,
Some(c) => {
word.push(c);
Unquoted
}
},
Backslash => match c {
None => {
word.push('\\');
words.push(mem::replace(&mut word, String::new()));
break;
}
Some('\n') => Delimiter,
Some(c) => {
word.push(c);
Unquoted
}
},
Unquoted => match c {
None => {
words.push(mem::replace(&mut word, String::new()));
break;
}
Some('\'') => SingleQuoted,
Some('\"') => DoubleQuoted,
Some('\\') => UnquotedBackslash,
Some('\t') | Some(' ') | Some('\n') => {
words.push(mem::replace(&mut word, String::new()));
Delimiter
}
Some(c) => {
word.push(c);
Unquoted
}
},
UnquotedBackslash => match c {
None => {
word.push('\\');
words.push(mem::replace(&mut word, String::new()));
break;
}
Some('\n') => Unquoted,
Some(c) => {
word.push(c);
Unquoted
}
},
SingleQuoted => match c {
None => return Err(ParseError),
Some('\'') => Unquoted,
Some(c) => {
word.push(c);
SingleQuoted
}
},
DoubleQuoted => match c {
None => return Err(ParseError),
Some('\"') => Unquoted,
Some('\\') => DoubleQuotedBackslash,
Some(c) => {
word.push(c);
DoubleQuoted
}
},
DoubleQuotedBackslash => match c {
None => return Err(ParseError),
Some('\n') => DoubleQuoted,
Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
word.push(c);
DoubleQuoted
}
Some(c) => {
word.push('\\');
word.push(c);
DoubleQuoted
}
},
Comment => match c {
None => break,
Some('\n') => Delimiter,
Some(_) => Comment,
},
}
}
Ok(words)
}
enum EscapeStyle {
None,
SingleQuoted,
Mixed,
}
fn escape_style(s: &str) -> EscapeStyle {
if s.is_empty() {
return EscapeStyle::SingleQuoted;
}
let mut special = false;
let mut newline = false;
let mut single_quote = false;
for c in s.chars() {
match c {
'\n' => {
newline = true;
special = true;
}
'\'' => {
single_quote = true;
special = true;
}
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
| '?' | '[' | '#' | '˜' | '=' | '%' => {
special = true;
}
_ => continue,
}
}
if !special {
EscapeStyle::None
} else if newline && !single_quote {
EscapeStyle::SingleQuoted
} else {
EscapeStyle::Mixed
}
}
pub fn quote(s: &str) -> Cow<str> {
match escape_style(s) {
EscapeStyle::None => s.into(),
EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
EscapeStyle::Mixed => {
let mut quoted = String::new();
quoted.push('\'');
for c in s.chars() {
if c == '\'' {
quoted.push_str("'\\''");
} else {
quoted.push(c);
}
}
quoted.push('\'');
quoted.into()
}
}
}
pub fn join<I, S>(words: I) -> String
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut line = words.into_iter().fold(String::new(), |mut line, word| {
let quoted = quote(word.as_ref());
line.push_str(quoted.as_ref());
line.push(' ');
line
});
line.pop();
line
}
#[cfg(test)]
mod tests {
use super::*;
fn split_ok(cases: &[(&str, &[&str])]) {
for &(input, expected) in cases {
match split(input) {
Err(actual) => {
panic!(
"After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n",
input, expected, actual
);
}
Ok(actual) => {
assert!(
expected == actual.as_slice(),
"After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
input,
expected,
actual
);
}
}
}
}
#[test]
fn split_empty() {
split_ok(&[("", &[])]);
}
#[test]
fn split_initial_whitespace_is_removed() {
split_ok(&[
(" a", &["a"]),
("\t\t\t\tbar", &["bar"]),
("\t \nc", &["c"]),
]);
}
#[test]
fn split_trailing_whitespace_is_removed() {
split_ok(&[
("a ", &["a"]),
("b\t", &["b"]),
("c\t \n \n \n", &["c"]),
("d\n\n", &["d"]),
]);
}
#[test]
fn split_carriage_return_is_not_special() {
split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
}
#[test]
fn split_single_quotes() {
split_ok(&[
(r#"''"#, &[r#""#]),
(r#"'a'"#, &[r#"a"#]),
(r#"'\'"#, &[r#"\"#]),
(r#"' \ '"#, &[r#" \ "#]),
(r#"'#'"#, &[r#"#"#]),
]);
}
#[test]
fn split_double_quotes() {
split_ok(&[
(r#""""#, &[""]),
(r#""""""#, &[""]),
(r#""a b c' d""#, &["a b c' d"]),
(r#""\a""#, &["\\a"]),
(r#""$""#, &["$"]),
(r#""\$""#, &["$"]),
(r#""`""#, &["`"]),
(r#""\`""#, &["`"]),
(r#""\"""#, &["\""]),
(r#""\\""#, &["\\"]),
("\"\n\"", &["\n"]),
("\"\\\n\"", &[""]),
]);
}
#[test]
fn split_unquoted() {
split_ok(&[
(r#"\|\&\;"#, &[r#"|&;"#]),
(r#"\<\>"#, &[r#"<>"#]),
(r#"\(\)"#, &[r#"()"#]),
(r#"\$"#, &[r#"$"#]),
(r#"\`"#, &[r#"`"#]),
(r#"\""#, &[r#"""#]),
(r#"\'"#, &[r#"'"#]),
("\\\n", &[]),
(" \\\n \n", &[]),
("a\nb\nc", &["a", "b", "c"]),
("a\\\nb\\\nc", &["abc"]),
("foo bar baz", &["foo", "bar", "baz"]),
(r#"\🦉"#, &[r"🦉"]),
]);
}
#[test]
fn split_trailing_backslash() {
split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
}
#[test]
fn split_errors() {
assert_eq!(split("'abc"), Err(ParseError));
assert_eq!(split("\""), Err(ParseError));
assert_eq!(split("'\\"), Err(ParseError));
assert_eq!(split("'\\"), Err(ParseError));
}
#[test]
fn split_comments() {
split_ok(&[
(r#" x # comment "#, &["x"]),
(r#" w1#w2 "#, &["w1#w2"]),
(r#"'not really a # comment'"#, &["not really a # comment"]),
(" a # very long comment \n b # another comment", &["a", "b"]),
]);
}
#[test]
fn test_quote() {
assert_eq!(quote(""), "''");
assert_eq!(quote("'"), "''\\'''");
assert_eq!(quote("abc"), "abc");
assert_eq!(quote("a \n b"), "'a \n b'");
assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
}
#[test]
fn test_join() {
assert_eq!(join(&["a", "b", "c"]), "a b c");
assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
}
#[test]
fn join_followed_by_split_is_identity() {
let cases: Vec<&[&str]> = vec![
&["a"],
&["python", "-c", "print('Hello world!')"],
&["echo", " arg with spaces ", "arg \' with \" quotes"],
&["even newlines are quoted correctly\n", "\n", "\n\n\t "],
&["$", "`test`"],
&["cat", "~user/log*"],
&["test", "'a \"b", "\"X'"],
&["empty", "", "", ""],
];
for argv in cases {
let args = join(argv);
assert_eq!(split(&args).unwrap(), argv);
}
}
}