/**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of Qt Creator. ** ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms ** and conditions see https://www.qt.io/terms-conditions. For further ** information use the contact form at https://www.qt.io/contact-us. ** ** GNU General Public License Usage ** Alternatively, this file may be used under the terms of the GNU ** General Public License version 3 as published by the Free Software ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT ** included in the packaging of this file. Please review the following ** information to ensure the GNU General Public License requirements will ** be met: https://www.gnu.org/licenses/gpl-3.0.html. ** ****************************************************************************/ #include "commandline.h" #include "environment.h" #include "macroexpander.h" #include "qtcassert.h" #include "stringutils.h" #include #include #include #include QT_BEGIN_NAMESPACE QDebug operator<<(QDebug dbg, const Utils::CommandLine &cmd) { return dbg << cmd.toUserOutput(); } QT_END_NAMESPACE // The main state of the Unix shell parser enum MxQuoting { MxBasic, MxSingleQuote, MxDoubleQuote, MxParen, MxSubst, MxGroup, MxMath }; struct MxState { MxQuoting current; // Bizarrely enough, double quoting has an impact on the behavior of some // complex expressions within the quoted string. bool dquote; }; QT_BEGIN_NAMESPACE Q_DECLARE_TYPEINFO(MxState, Q_PRIMITIVE_TYPE); QT_END_NAMESPACE // Pushed state for the case where a $(()) expansion turns out bogus struct MxSave { QString str; int pos, varPos; }; QT_BEGIN_NAMESPACE Q_DECLARE_TYPEINFO(MxSave, Q_MOVABLE_TYPE); QT_END_NAMESPACE namespace Utils { /*! \class Utils::ProcessArgs \brief The ProcessArgs class provides functionality for dealing with shell-quoted process arguments. */ inline static bool isMetaCharWin(ushort c) { static const uchar iqm[] = { 0x00, 0x00, 0x00, 0x00, 0x40, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; // &()<>| return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } static void envExpandWin(QString &args, const Environment *env, const QString *pwd) { static const QString cdName = QLatin1String("CD"); int off = 0; next: for (int prev = -1, that; (that = args.indexOf(QLatin1Char('%'), off)) >= 0; prev = that, off = that + 1) { if (prev >= 0) { const QString var = args.mid(prev + 1, that - prev - 1).toUpper(); const QString val = (var == cdName && pwd && !pwd->isEmpty()) ? QDir::toNativeSeparators(*pwd) : env->expandedValueForKey(var); if (!val.isEmpty()) { // Empty values are impossible, so this is an existence check args.replace(prev, that - prev + 1, val); off = prev + val.length(); goto next; } } } } static ProcessArgs prepareArgsWin(const QString &_args, ProcessArgs::SplitError *err, const Environment *env, const QString *pwd) { QString args(_args); if (env) { envExpandWin(args, env, pwd); } else { if (args.indexOf(QLatin1Char('%')) >= 0) { if (err) *err = ProcessArgs::FoundMeta; return ProcessArgs::createWindowsArgs(QString()); } } if (!args.isEmpty() && args.unicode()[0].unicode() == '@') args.remove(0, 1); for (int p = 0; p < args.length(); p++) { ushort c = args.unicode()[p].unicode(); if (c == '^') { args.remove(p, 1); } else if (c == '"') { do { if (++p == args.length()) break; // For cmd, this is no error. } while (args.unicode()[p].unicode() != '"'); } else if (isMetaCharWin(c)) { if (err) *err = ProcessArgs::FoundMeta; return ProcessArgs::createWindowsArgs(QString()); } } if (err) *err = ProcessArgs::SplitOk; return ProcessArgs::createWindowsArgs(args); } inline static bool isWhiteSpaceWin(ushort c) { return c == ' ' || c == '\t'; } static QStringList doSplitArgsWin(const QString &args, ProcessArgs::SplitError *err) { QStringList ret; if (err) *err = ProcessArgs::SplitOk; int p = 0; const int length = args.length(); forever { forever { if (p == length) return ret; if (!isWhiteSpaceWin(args.unicode()[p].unicode())) break; ++p; } QString arg; bool inquote = false; forever { bool copy = true; // copy this char int bslashes = 0; // number of preceding backslashes to insert while (p < length && args.unicode()[p] == QLatin1Char('\\')) { ++p; ++bslashes; } if (p < length && args.unicode()[p] == QLatin1Char('"')) { if (!(bslashes & 1)) { // Even number of backslashes, so the quote is not escaped. if (inquote) { if (p + 1 < length && args.unicode()[p + 1] == QLatin1Char('"')) { // This is not documented on MSDN. // Two consecutive quotes make a literal quote. Brain damage: // this still closes the quoting, so a 3rd quote is required, // which makes the runtime's quoting run out of sync with the // shell's one unless the 2nd quote is escaped. ++p; } else { // Closing quote copy = false; } inquote = false; } else { // Opening quote copy = false; inquote = true; } } bslashes >>= 1; } while (--bslashes >= 0) arg.append(QLatin1Char('\\')); if (p == length || (!inquote && isWhiteSpaceWin(args.unicode()[p].unicode()))) { ret.append(arg); if (inquote) { if (err) *err = ProcessArgs::BadQuoting; return QStringList(); } break; } if (copy) arg.append(args.unicode()[p]); ++p; } } //not reached } /*! Splits \a _args according to system shell word splitting and quoting rules. \section1 Unix The behavior is based on the POSIX shell and bash: \list \li Whitespace splits tokens. \li The backslash quotes the following character. \li A string enclosed in single quotes is not split. No shell meta characters are interpreted. \li A string enclosed in double quotes is not split. Within the string, the backslash quotes shell meta characters - if it is followed by a "meaningless" character, the backslash is output verbatim. \endlist If \a abortOnMeta is \c false, only the splitting and quoting rules apply, while other meta characters (substitutions, redirections, etc.) are ignored. If \a abortOnMeta is \c true, encounters of unhandled meta characters are treated as errors. If \a err is not NULL, stores a status code at the pointer target. For more information, see \l SplitError. If \env is not NULL, performs variable substitution with the given environment. Returns a list of unquoted words or an empty list if an error occurred. \section1 Windows The behavior is defined by the Microsoft C runtime: \list \li Whitespace splits tokens. \li A string enclosed in double quotes is not split. \list \li 3N double quotes within a quoted string yield N literal quotes. This is not documented on MSDN. \endlist \li Backslashes have special semantics if they are followed by a double quote: \list \li 2N backslashes + double quote => N backslashes and begin/end quoting \li 2N+1 backslashes + double quote => N backslashes + literal quote \endlist \endlist Qt and many other implementations comply with this standard, but many do not. If \a abortOnMeta is \c true, cmd shell semantics are applied before proceeding with word splitting: \list \li Cmd ignores \e all special chars between double quotes. Note that the quotes are \e not removed at this stage - the tokenization rules described above still apply. \li The \c circumflex is the escape char for everything including itself. \endlist As the quoting levels are independent from each other and have different semantics, you need a command line like \c{"foo "\^"" bar"} to get \c{foo " bar}. */ static QStringList splitArgsWin(const QString &_args, bool abortOnMeta, ProcessArgs::SplitError *err, const Environment *env, const QString *pwd) { if (abortOnMeta) { ProcessArgs::SplitError perr; if (!err) err = &perr; QString args = prepareArgsWin(_args, &perr, env, pwd).toWindowsArgs(); if (*err != ProcessArgs::SplitOk) return QStringList(); return doSplitArgsWin(args, err); } else { QString args = _args; if (env) envExpandWin(args, env, pwd); return doSplitArgsWin(args, err); } } static bool isMetaUnix(QChar cUnicode) { static const uchar iqm[] = { 0x00, 0x00, 0x00, 0x00, 0xdc, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x38 }; // \'"$`<>|;&(){}*?#[] uint c = cUnicode.unicode(); return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } static QStringList splitArgsUnix(const QString &args, bool abortOnMeta, ProcessArgs::SplitError *err, const Environment *env, const QString *pwd) { static const QString pwdName = QLatin1String("PWD"); QStringList ret; for (int pos = 0; ; ) { QChar c; do { if (pos >= args.length()) goto okret; c = args.unicode()[pos++]; } while (c.isSpace()); QString cret; bool hadWord = false; if (c == QLatin1Char('~')) { if (pos >= args.length() || args.unicode()[pos].isSpace() || args.unicode()[pos] == QLatin1Char('/')) { cret = QDir::homePath(); hadWord = true; goto getc; } else if (abortOnMeta) { goto metaerr; } } do { if (c == QLatin1Char('\'')) { int spos = pos; do { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; } while (c != QLatin1Char('\'')); cret += args.mid(spos, pos - spos - 1); hadWord = true; } else if (c == QLatin1Char('"')) { for (;;) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; nextq: if (c == QLatin1Char('"')) break; if (c == QLatin1Char('\\')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; if (c != QLatin1Char('"') && c != QLatin1Char('\\') && !(abortOnMeta && (c == QLatin1Char('$') || c == QLatin1Char('`')))) cret += QLatin1Char('\\'); } else if (c == QLatin1Char('$') && env) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; bool braced = false; if (c == QLatin1Char('{')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; braced = true; } QString var; while (c.isLetterOrNumber() || c == QLatin1Char('_')) { var += c; if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; } if (var == pwdName && pwd && !pwd->isEmpty()) { cret += *pwd; } else { Environment::const_iterator vit = env->constFind(var); if (vit == env->constEnd()) { if (abortOnMeta) goto metaerr; // Assume this is a shell builtin } else { cret += env->expandedValueForKey(env->key(vit)); } } if (!braced) goto nextq; if (c != QLatin1Char('}')) { if (abortOnMeta) goto metaerr; // Assume this is a complex expansion goto quoteerr; // Otherwise it's just garbage } continue; } else if (abortOnMeta && (c == QLatin1Char('$') || c == QLatin1Char('`'))) { goto metaerr; } cret += c; } hadWord = true; } else if (c == QLatin1Char('$') && env) { if (pos >= args.length()) goto quoteerr; // Bash just takes it verbatim, but whatever c = args.unicode()[pos++]; bool braced = false; if (c == QLatin1Char('{')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; braced = true; } QString var; while (c.isLetterOrNumber() || c == QLatin1Char('_')) { var += c; if (pos >= args.length()) { if (braced) goto quoteerr; c = QLatin1Char(' '); break; } c = args.unicode()[pos++]; } QString val; if (var == pwdName && pwd && !pwd->isEmpty()) { val = *pwd; } else { Environment::const_iterator vit = env->constFind(var); if (vit == env->constEnd()) { if (abortOnMeta) goto metaerr; // Assume this is a shell builtin } else { val = env->expandedValueForKey(env->key(vit)); } } for (int i = 0; i < val.length(); i++) { const QChar cc = val.unicode()[i]; if (cc.unicode() == 9 || cc.unicode() == 10 || cc.unicode() == 32) { if (hadWord) { ret += cret; cret.clear(); hadWord = false; } } else { cret += cc; hadWord = true; } } if (!braced) goto nextc; if (c != QLatin1Char('}')) { if (abortOnMeta) goto metaerr; // Assume this is a complex expansion goto quoteerr; // Otherwise it's just garbage } } else { if (c == QLatin1Char('\\')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; } else if (abortOnMeta && isMetaUnix(c)) { goto metaerr; } cret += c; hadWord = true; } getc: if (pos >= args.length()) break; c = args.unicode()[pos++]; nextc: ; } while (!c.isSpace()); if (hadWord) ret += cret; } okret: if (err) *err = ProcessArgs::SplitOk; return ret; quoteerr: if (err) *err = ProcessArgs::BadQuoting; return QStringList(); metaerr: if (err) *err = ProcessArgs::FoundMeta; return QStringList(); } inline static bool isSpecialCharUnix(ushort c) { // Chars that should be quoted (TM). This includes: static const uchar iqm[] = { 0xff, 0xff, 0xff, 0xff, 0xdf, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x78 }; // 0-32 \'"$`<>|;&(){}*?#!~[] return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } inline static bool hasSpecialCharsUnix(const QString &arg) { for (int x = arg.length() - 1; x >= 0; --x) if (isSpecialCharUnix(arg.unicode()[x].unicode())) return true; return false; } QStringList ProcessArgs::splitArgs(const QString &args, OsType osType, bool abortOnMeta, ProcessArgs::SplitError *err, const Environment *env, const QString *pwd) { if (osType == OsTypeWindows) return splitArgsWin(args, abortOnMeta, err, env, pwd); else return splitArgsUnix(args, abortOnMeta, err, env, pwd); } QString ProcessArgs::quoteArgUnix(const QString &arg) { if (arg.isEmpty()) return QString::fromLatin1("''"); QString ret(arg); if (hasSpecialCharsUnix(ret)) { ret.replace(QLatin1Char('\''), QLatin1String("'\\''")); ret.prepend(QLatin1Char('\'')); ret.append(QLatin1Char('\'')); } return ret; } static bool isSpecialCharWin(ushort c) { // Chars that should be quoted (TM). This includes: // - control chars & space // - the shell meta chars "&()<>^| // - the potential separators ,;= static const uchar iqm[] = { 0xff, 0xff, 0xff, 0xff, 0x45, 0x13, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10 }; return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } static bool hasSpecialCharsWin(const QString &arg) { for (int x = arg.length() - 1; x >= 0; --x) if (isSpecialCharWin(arg.unicode()[x].unicode())) return true; return false; } static QString quoteArgWin(const QString &arg) { if (arg.isEmpty()) return QString::fromLatin1("\"\""); QString ret(arg); if (hasSpecialCharsWin(ret)) { // Quotes are escaped and their preceding backslashes are doubled. // It's impossible to escape anything inside a quoted string on cmd // level, so the outer quoting must be "suspended". ret.replace(QRegularExpression(QLatin1String("(\\\\*)\"")), QLatin1String("\"\\1\\1\\^\"\"")); // The argument must not end with a \ since this would be interpreted // as escaping the quote -- rather put the \ behind the quote: e.g. // rather use "foo"\ than "foo\" int i = ret.length(); while (i > 0 && ret.at(i - 1) == QLatin1Char('\\')) --i; ret.insert(i, QLatin1Char('"')); ret.prepend(QLatin1Char('"')); } // FIXME: Without this, quoting is not foolproof. But it needs support in the process setup, etc. //ret.replace('%', QLatin1String("%PERCENT_SIGN%")); return ret; } ProcessArgs ProcessArgs::prepareArgs(const QString &args, SplitError *err, OsType osType, const Environment *env, const FilePath *pwd, bool abortOnMeta) { QString wdcopy; QString *wd = nullptr; if (pwd) { wdcopy = pwd->toString(); wd = &wdcopy; } ProcessArgs res; if (osType == OsTypeWindows) res = prepareArgsWin(args, err, env, wd); else res = createUnixArgs(splitArgs(args, osType, abortOnMeta, err, env, wd)); return res; } QString ProcessArgs::quoteArg(const QString &arg, OsType osType) { if (osType == OsTypeWindows) return quoteArgWin(arg); else return quoteArgUnix(arg); } void ProcessArgs::addArg(QString *args, const QString &arg, OsType osType) { if (!args->isEmpty()) *args += QLatin1Char(' '); *args += quoteArg(arg, osType); } QString ProcessArgs::joinArgs(const QStringList &args, OsType osType) { QString ret; for (const QString &arg : args) addArg(&ret, arg, osType); return ret; } void ProcessArgs::addArgs(QString *args, const QString &inArgs) { if (!inArgs.isEmpty()) { if (!args->isEmpty()) *args += QLatin1Char(' '); *args += inArgs; } } void ProcessArgs::addArgs(QString *args, const QStringList &inArgs) { for (const QString &arg : inArgs) addArg(args, arg); } bool ProcessArgs::prepareCommand(const CommandLine &cmdLine, QString *outCmd, ProcessArgs *outArgs, const Environment *env, const FilePath *pwd) { FilePath executable = cmdLine.executable(); const QString arguments = cmdLine.arguments(); if (env && executable.isRelativePath()) executable = env->searchInPath(executable.toString()); ProcessArgs::SplitError err; *outArgs = ProcessArgs::prepareArgs(arguments, &err, executable.osType(), env, pwd); if (err == ProcessArgs::SplitOk) { *outCmd = executable.toString(); } else { if (executable.osType() == OsTypeWindows) { *outCmd = QString::fromLatin1(qgetenv("COMSPEC")); *outArgs = ProcessArgs::createWindowsArgs(QLatin1String("/v:off /s /c \"") + quoteArg(executable.toUserOutput()) + ' ' + arguments + '"'); } else { if (err != ProcessArgs::FoundMeta) return false; #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0) *outCmd = qEnvironmentVariable("SHELL", "/bin/sh"); #else // for sdktool *outCmd = qEnvironmentVariableIsSet("SHELL") ? QString::fromLocal8Bit(qgetenv("SHELL")) : QString("/bin/sh"); #endif *outArgs = ProcessArgs::createUnixArgs({"-c", quoteArg(executable.toString()) + ' ' + arguments}); } } return true; } // This function assumes that the resulting string will be quoted. // That's irrelevant if it does not contain quotes itself. static int quoteArgInternalWin(QString &ret, int bslashes) { // Quotes are escaped and their preceding backslashes are doubled. // It's impossible to escape anything inside a quoted string on cmd // level, so the outer quoting must be "suspended". const QChar bs(QLatin1Char('\\')), dq(QLatin1Char('"')); for (int p = 0; p < ret.length(); p++) { if (ret.at(p) == bs) { bslashes++; } else { if (ret.at(p) == dq) { if (bslashes) { ret.insert(p, QString(bslashes, bs)); p += bslashes; } ret.insert(p, QLatin1String("\"\\^\"")); p += 4; } bslashes = 0; } } return bslashes; } // TODO: This documentation is relevant for end-users. Where to put it? /** * Perform safe macro expansion (substitution) on a string for use * in shell commands. * * \section Unix notes * * Explicitly supported shell constructs: * \\ '' "" {} () $(()) ${} $() `` * * Implicitly supported shell constructs: * (()) * * Unsupported shell constructs that will cause problems: * \list * \li Shortened \c{case $v in pat)} syntax. Use \c{case $v in (pat)} instead. * \li Bash-style \c{$""} and \c{$''} string quoting syntax. * \endlist * * The rest of the shell (incl. bash) syntax is simply ignored, * as it is not expected to cause problems. * * Security considerations: * \list * \li Backslash-escaping an expando is treated as a quoting error * \li Do not put expandos into double quoted substitutions: * \badcode * "${VAR:-%{macro}}" * \endcode * \li Do not put expandos into command line arguments which are nested * shell commands: * \badcode * sh -c 'foo \%{file}' * \endcode * \goodcode * file=\%{file} sh -c 'foo "$file"' * \endcode * \endlist * * \section Windows notes * * All quoting syntax supported by splitArgs() is supported here as well. * Additionally, command grouping via parentheses is recognized - note * however, that the parser is much stricter about unquoted parentheses * than cmd itself. * The rest of the cmd syntax is simply ignored, as it is not expected * to cause problems. * * Security considerations: * \list * \li Circumflex-escaping an expando is treated as a quoting error * \li Closing double quotes right before expandos and opening double quotes * right after expandos are treated as quoting errors * \li Do not put expandos into nested commands: * \badcode * for /f "usebackq" \%v in (`foo \%{file}`) do \@echo \%v * \endcode * \li A macro's value must not contain anything which may be interpreted * as an environment variable expansion. A solution is replacing any * percent signs with a fixed string like \c{\%PERCENT_SIGN\%} and * injecting \c{PERCENT_SIGN=\%} into the shell's environment. * \li Enabling delayed environment variable expansion (cmd /v:on) should have * no security implications, but may still wreak havoc due to the * need for doubling circumflexes if any exclamation marks are present, * and the need to circumflex-escape the exclamation marks themselves. * \endlist * * \param cmd pointer to the string in which macros are expanded in-place * \param mx pointer to a macro expander instance * \return false if the string could not be parsed and therefore no safe * substitution was possible */ bool ProcessArgs::expandMacros(QString *cmd, AbstractMacroExpander *mx, OsType osType) { QString str = *cmd; if (str.isEmpty()) return true; QString rsts; int varLen; int varPos = 0; if (!(varLen = mx->findMacro(str, &varPos, &rsts))) return true; int pos = 0; if (osType == OsTypeWindows) { enum { // cmd.exe parsing state ShellBasic, // initial state ShellQuoted, // double-quoted state => *no* other meta chars are interpreted ShellEscaped // circumflex-escaped state => next char is not interpreted } shellState = ShellBasic; enum { // CommandLineToArgv() parsing state and some more CrtBasic, // initial state CrtNeedWord, // after empty expando; insert empty argument if whitespace follows CrtInWord, // in non-whitespace CrtClosed, // previous char closed the double-quoting CrtHadQuote, // closed double-quoting after an expando // The remaining two need to be numerically higher CrtQuoted, // double-quoted state => spaces don't split tokens CrtNeedQuote // expando opened quote; close if no expando follows } crtState = CrtBasic; int bslashes = 0; // previous chars were manual backslashes int rbslashes = 0; // trailing backslashes in replacement forever { if (pos == varPos) { if (shellState == ShellEscaped) return false; // Circumflex'd quoted expando would be Bad (TM). if ((shellState == ShellQuoted) != (crtState == CrtQuoted)) return false; // CRT quoting out of sync with shell quoting. Ahoy to Redmond. rbslashes += bslashes; bslashes = 0; if (crtState < CrtQuoted) { if (rsts.isEmpty()) { if (crtState == CrtBasic) { // Outside any quoting and the string is empty, so put // a pair of quotes. Delaying that is just pedantry. crtState = CrtNeedWord; } } else { if (hasSpecialCharsWin(rsts)) { if (crtState == CrtClosed) { // Quoted expando right after closing quote. Can't do that. return false; } int tbslashes = quoteArgInternalWin(rsts, 0); rsts.prepend(QLatin1Char('"')); if (rbslashes) rsts.prepend(QString(rbslashes, QLatin1Char('\\'))); crtState = CrtNeedQuote; rbslashes = tbslashes; } else { crtState = CrtInWord; // We know that this string contains no spaces. // We know that this string contains no quotes, // so the function won't make a mess. rbslashes = quoteArgInternalWin(rsts, rbslashes); } } } else { rbslashes = quoteArgInternalWin(rsts, rbslashes); } str.replace(pos, varLen, rsts); pos += rsts.length(); varPos = pos; if (!(varLen = mx->findMacro(str, &varPos, &rsts))) { // Don't leave immediately, as we may be in CrtNeedWord state which could // be still resolved, or we may have inserted trailing backslashes. varPos = INT_MAX; } continue; } if (crtState == CrtNeedQuote) { if (rbslashes) { str.insert(pos, QString(rbslashes, QLatin1Char('\\'))); pos += rbslashes; varPos += rbslashes; rbslashes = 0; } str.insert(pos, QLatin1Char('"')); pos++; varPos++; crtState = CrtHadQuote; } ushort cc = str.unicode()[pos].unicode(); if (shellState == ShellBasic && cc == '^') { shellState = ShellEscaped; } else { if (!cc || cc == ' ' || cc == '\t') { if (crtState < CrtQuoted) { if (crtState == CrtNeedWord) { str.insert(pos, QLatin1String("\"\"")); pos += 2; varPos += 2; } crtState = CrtBasic; } if (!cc) break; bslashes = 0; rbslashes = 0; } else { if (cc == '\\') { bslashes++; if (crtState < CrtQuoted) crtState = CrtInWord; } else { if (cc == '"') { if (shellState != ShellEscaped) shellState = (shellState == ShellQuoted) ? ShellBasic : ShellQuoted; if (rbslashes) { // Offset -1: skip possible circumflex. We have at least // one backslash, so a fixed offset is ok. str.insert(pos - 1, QString(rbslashes, QLatin1Char('\\'))); pos += rbslashes; varPos += rbslashes; } if (!(bslashes & 1)) { // Even number of backslashes, so the quote is not escaped. switch (crtState) { case CrtQuoted: // Closing quote crtState = CrtClosed; break; case CrtClosed: // Two consecutive quotes make a literal quote - and // still close quoting. See QtcProcess::quoteArg(). crtState = CrtInWord; break; case CrtHadQuote: // Opening quote right after quoted expando. Can't do that. return false; default: // Opening quote crtState = CrtQuoted; break; } } else if (crtState < CrtQuoted) { crtState = CrtInWord; } } else if (crtState < CrtQuoted) { crtState = CrtInWord; } bslashes = 0; rbslashes = 0; } } if (varPos == INT_MAX && !rbslashes) break; if (shellState == ShellEscaped) shellState = ShellBasic; } pos++; } } else { // !Windows MxState state = {MxBasic, false}; QStack sstack; QStack ostack; while (pos < str.length()) { if (pos == varPos) { // Our expansion rules trigger in any context if (state.dquote) { // We are within a double-quoted string. Escape relevant meta characters. rsts.replace(QRegularExpression(QLatin1String("([$`\"\\\\])")), QLatin1String("\\\\1")); } else if (state.current == MxSingleQuote) { // We are within a single-quoted string. "Suspend" single-quoting and put a // single escaped quote for each single quote inside the string. rsts.replace(QLatin1Char('\''), QLatin1String("'\\''")); } else if (rsts.isEmpty() || hasSpecialCharsUnix(rsts)) { // String contains "quote-worthy" characters. Use single quoting - but // that choice is arbitrary. rsts.replace(QLatin1Char('\''), QLatin1String("'\\''")); rsts.prepend(QLatin1Char('\'')); rsts.append(QLatin1Char('\'')); } // Else just use the string verbatim. str.replace(pos, varLen, rsts); pos += rsts.length(); varPos = pos; if (!(varLen = mx->findMacro(str, &varPos, &rsts))) break; continue; } ushort cc = str.unicode()[pos].unicode(); if (state.current == MxSingleQuote) { // Single quoted context - only the single quote has any special meaning. if (cc == '\'') state = sstack.pop(); } else if (cc == '\\') { // In any other context, the backslash starts an escape. pos += 2; if (varPos < pos) return false; // Backslash'd quoted expando would be Bad (TM). continue; } else if (cc == '$') { cc = str.unicode()[++pos].unicode(); if (cc == '(') { sstack.push(state); if (str.unicode()[pos + 1].unicode() == '(') { // $(( starts a math expression. This may also be a $( ( in fact, // so we push the current string and offset on a stack so we can retry. MxSave sav = {str, pos + 2, varPos}; ostack.push(sav); state.current = MxMath; pos += 2; continue; } else { // $( starts a command substitution. This actually "opens a new context" // which overrides surrounding double quoting. state.current = MxParen; state.dquote = false; } } else if (cc == '{') { // ${ starts a "braced" variable substitution. sstack.push(state); state.current = MxSubst; } // Else assume that a "bare" variable substitution has started } else if (cc == '`') { // Backticks are evil, as every shell interprets escapes within them differently, // which is a danger for the quoting of our own expansions. // So we just apply *our* rules (which match bash) and transform it into a POSIX // command substitution which has clear semantics. str.replace(pos, 1, QLatin1String("$( " )); // add space -> avoid creating $(( varPos += 2; int pos2 = pos += 3; forever { if (pos2 >= str.length()) return false; // Syntax error - unterminated backtick expression. cc = str.unicode()[pos2].unicode(); if (cc == '`') break; if (cc == '\\') { cc = str.unicode()[++pos2].unicode(); if (cc == '$' || cc == '`' || cc == '\\' || (cc == '"' && state.dquote)) { str.remove(pos2 - 1, 1); if (varPos >= pos2) varPos--; continue; } } pos2++; } str[pos2] = QLatin1Char(')'); sstack.push(state); state.current = MxParen; state.dquote = false; continue; } else if (state.current == MxDoubleQuote) { // (Truly) double quoted context - only remaining special char is the closing quote. if (cc == '"') state = sstack.pop(); } else if (cc == '\'') { // Start single quote if we are not in "inherited" double quoted context. if (!state.dquote) { sstack.push(state); state.current = MxSingleQuote; } } else if (cc == '"') { // Same for double quoting. if (!state.dquote) { sstack.push(state); state.current = MxDoubleQuote; state.dquote = true; } } else if (state.current == MxSubst) { // "Braced" substitution context - only remaining special char is the closing brace. if (cc == '}') state = sstack.pop(); } else if (cc == ')') { if (state.current == MxMath) { if (str.unicode()[pos + 1].unicode() == ')') { state = sstack.pop(); pos += 2; } else { // False hit: the $(( was a $( ( in fact. // ash does not care (and will complain), but bash actually parses it. varPos = ostack.top().varPos; pos = ostack.top().pos; str = ostack.top().str; ostack.pop(); state.current = MxParen; state.dquote = false; sstack.push(state); } continue; } else if (state.current == MxParen) { state = sstack.pop(); } else { break; // Syntax error - excess closing parenthesis. } } else if (cc == '}') { if (state.current == MxGroup) state = sstack.pop(); else break; // Syntax error - excess closing brace. } else if (cc == '(') { // Context-saving command grouping. sstack.push(state); state.current = MxParen; } else if (cc == '{') { // Plain command grouping. sstack.push(state); state.current = MxGroup; } pos++; } // FIXME? May complain if (!sstack.empty()), but we don't really care anyway. } *cmd = str; return true; } bool ProcessArgs::ArgIterator::next() { // We delay the setting of m_prev so we can still delete the last argument // after we find that there are no more arguments. It's a bit of a hack ... int prev = m_pos; m_simple = true; m_value.clear(); if (m_osType == OsTypeWindows) { enum { // cmd.exe parsing state ShellBasic, // initial state ShellQuoted, // double-quoted state => *no* other meta chars are interpreted ShellEscaped // circumflex-escaped state => next char is not interpreted } shellState = ShellBasic; enum { // CommandLineToArgv() parsing state and some more CrtBasic, // initial state CrtInWord, // in non-whitespace CrtClosed, // previous char closed the double-quoting CrtQuoted // double-quoted state => spaces don't split tokens } crtState = CrtBasic; enum { NoVar, NewVar, FullVar } varState = NoVar; // inside a potential env variable expansion int bslashes = 0; // number of preceding backslashes for (;; m_pos++) { ushort cc = m_pos < m_str->length() ? m_str->unicode()[m_pos].unicode() : 0; if (shellState == ShellBasic && cc == '^') { varState = NoVar; shellState = ShellEscaped; } else if ((shellState == ShellBasic && isMetaCharWin(cc)) || !cc) { // A "bit" simplistic ... // We ignore crtQuote state here. Whatever ... doReturn: if (m_simple) while (--bslashes >= 0) m_value += QLatin1Char('\\'); else m_value.clear(); if (crtState != CrtBasic) { m_prev = prev; return true; } return false; } else { if (crtState != CrtQuoted && (cc == ' ' || cc == '\t')) { if (crtState != CrtBasic) { // We'll lose shellQuote state here. Whatever ... goto doReturn; } } else { if (cc == '\\') { bslashes++; if (crtState != CrtQuoted) crtState = CrtInWord; varState = NoVar; } else { if (cc == '"') { varState = NoVar; if (shellState != ShellEscaped) shellState = (shellState == ShellQuoted) ? ShellBasic : ShellQuoted; int obslashes = bslashes; bslashes >>= 1; if (!(obslashes & 1)) { // Even number of backslashes, so the quote is not escaped. switch (crtState) { case CrtQuoted: // Closing quote crtState = CrtClosed; continue; case CrtClosed: // Two consecutive quotes make a literal quote - and // still close quoting. See quoteArg(). crtState = CrtInWord; break; default: // Opening quote crtState = CrtQuoted; continue; } } else if (crtState != CrtQuoted) { crtState = CrtInWord; } } else { if (cc == '%') { if (varState == FullVar) { m_simple = false; varState = NoVar; } else { varState = NewVar; } } else if (varState != NoVar) { // This check doesn't really reflect cmd reality, but it is an // approximation of what would be sane. varState = (cc == '_' || cc == '-' || cc == '.' || QChar(cc).isLetterOrNumber()) ? FullVar : NoVar; } if (crtState != CrtQuoted) crtState = CrtInWord; } for (; bslashes > 0; bslashes--) m_value += QLatin1Char('\\'); m_value += QChar(cc); } } if (shellState == ShellEscaped) shellState = ShellBasic; } } } else { MxState state = {MxBasic, false}; QStack sstack; QStack ostack; bool hadWord = false; for (; m_pos < m_str->length(); m_pos++) { ushort cc = m_str->unicode()[m_pos].unicode(); if (state.current == MxSingleQuote) { if (cc == '\'') { state = sstack.pop(); continue; } } else if (cc == '\\') { if (++m_pos >= m_str->length()) break; cc = m_str->unicode()[m_pos].unicode(); if (state.dquote && cc != '"' && cc != '\\' && cc != '$' && cc != '`') m_value += QLatin1Char('\\'); } else if (cc == '$') { if (++m_pos >= m_str->length()) break; cc = m_str->unicode()[m_pos].unicode(); if (cc == '(') { sstack.push(state); if (++m_pos >= m_str->length()) break; if (m_str->unicode()[m_pos].unicode() == '(') { ostack.push(m_pos); state.current = MxMath; } else { state.dquote = false; state.current = MxParen; // m_pos too far by one now - whatever. } } else if (cc == '{') { sstack.push(state); state.current = MxSubst; } else { // m_pos too far by one now - whatever. } m_simple = false; hadWord = true; continue; } else if (cc == '`') { forever { if (++m_pos >= m_str->length()) { m_simple = false; m_prev = prev; return true; } cc = m_str->unicode()[m_pos].unicode(); if (cc == '`') break; if (cc == '\\') m_pos++; // m_pos may be too far by one now - whatever. } m_simple = false; hadWord = true; continue; } else if (state.current == MxDoubleQuote) { if (cc == '"') { state = sstack.pop(); continue; } } else if (cc == '\'') { if (!state.dquote) { sstack.push(state); state.current = MxSingleQuote; hadWord = true; continue; } } else if (cc == '"') { if (!state.dquote) { sstack.push(state); state.dquote = true; state.current = MxDoubleQuote; hadWord = true; continue; } } else if (state.current == MxSubst) { if (cc == '}') state = sstack.pop(); continue; // Not simple anyway } else if (cc == ')') { if (state.current == MxMath) { if (++m_pos >= m_str->length()) break; if (m_str->unicode()[m_pos].unicode() == ')') { ostack.pop(); state = sstack.pop(); } else { // false hit: the $(( was a $( ( in fact. // ash does not care, but bash does. m_pos = ostack.pop(); state.current = MxParen; state.dquote = false; sstack.push(state); } continue; } else if (state.current == MxParen) { state = sstack.pop(); continue; } else { break; } #if 0 // MxGroup is impossible, see below. } else if (cc == '}') { if (state.current == MxGroup) { state = sstack.pop(); continue; } #endif } else if (cc == '(') { sstack.push(state); state.current = MxParen; m_simple = false; hadWord = true; continue; #if 0 // Should match only at the beginning of a command, which we never have currently. } else if (cc == '{') { sstack.push(state); state.current = MxGroup; m_simple = false; hadWord = true; continue; #endif } else if (cc == '<' || cc == '>' || cc == '&' || cc == '|' || cc == ';') { if (sstack.isEmpty()) break; } else if (cc == ' ' || cc == '\t') { if (!hadWord) continue; if (sstack.isEmpty()) break; } m_value += QChar(cc); hadWord = true; } // TODO: Possibly complain here if (!sstack.empty()) if (!m_simple) m_value.clear(); if (hadWord) { m_prev = prev; return true; } return false; } } void ProcessArgs::ArgIterator::deleteArg() { if (!m_prev) while (m_pos < m_str->length() && m_str->at(m_pos).isSpace()) m_pos++; m_str->remove(m_prev, m_pos - m_prev); m_pos = m_prev; } void ProcessArgs::ArgIterator::appendArg(const QString &str) { const QString qstr = quoteArg(str); if (!m_pos) m_str->insert(0, qstr + QLatin1Char(' ')); else m_str->insert(m_pos, QLatin1Char(' ') + qstr); m_pos += qstr.length() + 1; } ProcessArgs ProcessArgs::createWindowsArgs(const QString &args) { ProcessArgs result; result.m_windowsArgs = args; result.m_isWindows = true; return result; } ProcessArgs ProcessArgs::createUnixArgs(const QStringList &args) { ProcessArgs result; result.m_unixArgs = args; result.m_isWindows = false; return result; } QString ProcessArgs::toWindowsArgs() const { QTC_CHECK(m_isWindows); return m_windowsArgs; } QStringList ProcessArgs::toUnixArgs() const { QTC_CHECK(!m_isWindows); return m_unixArgs; } QString ProcessArgs::toString() const { if (m_isWindows) return m_windowsArgs; else return ProcessArgs::joinArgs(m_unixArgs, OsTypeLinux); } /*! \class Utils::CommandLine \brief The CommandLine class represents a command line of a QProcess or similar utility. */ CommandLine::CommandLine() = default; CommandLine::CommandLine(const FilePath &executable) : m_executable(executable) {} CommandLine::CommandLine(const FilePath &exe, const QStringList &args) : m_executable(exe) { addArgs(args); } CommandLine::CommandLine(const FilePath &exe, const QString &args, RawType) : m_executable(exe) { addArgs(args, Raw); } CommandLine CommandLine::fromUserInput(const QString &cmdline, MacroExpander *expander) { CommandLine cmd; const int pos = cmdline.indexOf(' '); if (pos == -1) { cmd.m_executable = FilePath::fromString(cmdline); } else { cmd.m_executable = FilePath::fromString(cmdline.left(pos)); cmd.m_arguments = cmdline.right(cmdline.length() - pos - 1); if (expander) cmd.m_arguments = expander->expand(cmd.m_arguments); } return cmd; } void CommandLine::addArg(const QString &arg) { ProcessArgs::addArg(&m_arguments, arg, m_executable.osType()); } void CommandLine::addArgs(const QStringList &inArgs) { for (const QString &arg : inArgs) addArg(arg); } // Adds cmd's executable and arguments one by one to this commandline. // Useful for 'sudo', 'nice', etc void CommandLine::addCommandLineAsArgs(const CommandLine &cmd) { addArg(cmd.executable().path()); addArgs(cmd.splitArguments()); } void CommandLine::addArgs(const QString &inArgs, RawType) { ProcessArgs::addArgs(&m_arguments, inArgs); } QString CommandLine::toUserOutput() const { QString res = m_executable.toUserOutput(); if (!m_arguments.isEmpty()) res += ' ' + m_arguments; return res; } QStringList CommandLine::splitArguments() const { return ProcessArgs::splitArgs(m_arguments, m_executable.osType()); } } // namespace Utils