diff options
author | Oswald Buddenhagen <[email protected]> | 2010-10-14 18:05:43 +0200 |
---|---|---|
committer | Oswald Buddenhagen <[email protected]> | 2010-11-17 13:19:07 +0100 |
commit | 531c70f05bfc8355f856f2af41be533fb13b85e6 (patch) | |
tree | 33d0d30107949f92e507e26f083d5d3caa98e8b8 /src | |
parent | dc3ab5bf85d6ce9be2131f1a299731288d37c923 (diff) |
add Utils::QtcProcess
this is a wrapper around QProcess with these features:
- setEnvironment() takes a Utils::Environment instead of a QStringList
- instead of taking a stringlist with arguments, take a single shell
command string which is fully compatible with the system's native
shell (the bourne shell on unix and cmd.exe on windows) - with support
for environment variable expansion, and subject to the shell's
splitting and quoting rules. if the command is too complex (e.g.,
contains redirections), it is transparently executed through a real
shell.
- additionally, the class contains a set of helper functions for
manipulating (constructing, splitting, etc.) shell command lines.
in particular, it contains a shell-safe macro expander and the nested
class ArgIterator which can be used for inspecting and manipulating a
shell command line without going through the stringlist indirection
(which is potentially lossy).
some of this is based on KDE code (KShell and KMacroExpander) which i
have written myself.
Diffstat (limited to 'src')
-rw-r--r-- | src/libs/utils/qtcprocess.cpp | 1387 | ||||
-rw-r--r-- | src/libs/utils/qtcprocess.h | 149 | ||||
-rw-r--r-- | src/libs/utils/utils-lib.pri | 2 |
3 files changed, 1538 insertions, 0 deletions
diff --git a/src/libs/utils/qtcprocess.cpp b/src/libs/utils/qtcprocess.cpp new file mode 100644 index 00000000000..dab401d78cf --- /dev/null +++ b/src/libs/utils/qtcprocess.cpp @@ -0,0 +1,1387 @@ +/************************************************************************** +** +** This file is part of Qt Creator +** +** Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +** +** Contact: Nokia Corporation ([email protected]) +** +** Commercial Usage +** +** Licensees holding valid Qt Commercial licenses may use this file in +** accordance with the Qt Commercial License Agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and Nokia. +** +** GNU Lesser General Public License Usage +** +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at http://qt.nokia.com/contact. +** +**************************************************************************/ + +#include "qtcprocess.h" +#include "stringutils.h" + +#ifdef Q_OS_WIN +#include <QtCore/QDir> +#endif + +using namespace Utils; + +/** + * \fn QStringList QtcProcess::splitArgs( + * const QString &args, bool abortOnMeta, SplitError *err, const Environment *env) + * + * Splits \a args according to system shell word splitting and quoting rules. + * + * \section Unix + * + * The behavior is based on the POSIX shell and bash: + * \list + * \li Whitespace splits tokens + * \li The backslash quotes the following character + * \li A string enclosed in single quotes is not split. No shell meta + * characters are interpreted. + * \li A string enclosed in double quotes is not split. Within the string, + * the backslash quotes shell meta characters - if it is followed + * by a "meaningless" character, the backslash is output verbatim. + * \list + * If \a abortOnMeta is \c false, only the splitting and quoting rules apply, + * while other meta characters (substitutions, redirections, etc.) are ignored. + * If \a abortOnMeta is \c true, encounters of unhandled meta characters are + * treated as errors. + * + * \section Windows + * + * The behavior is defined by the Microsoft C runtime: + * \list + * \li Whitespace splits tokens + * \li A string enclosed in double quotes is not split + * \list + * \li 3N double quotes within a quoted string yield N literal quotes. + * This is not documented on MSDN. + * \endlist + * \li Backslashes have special semantics iff they are followed by a double + * quote: + * \list + * \li 2N backslashes + double quote => N backslashes and begin/end quoting + * \li 2N+1 backslashes + double quote => N backslashes + literal quote + * \endlist + * \endlist + * Qt and many other implementations comply with this standard, but many do not. + * + * If \a abortOnMeta is \c true, cmd shell semantics are applied before + * proceeding with word splitting: + * \list + * \li Cmd ignores \em all special chars between double quotes. + * Note that the quotes are \em not removed at this stage - the + * tokenization rules described above still apply. + * \li The \c circumflex is the escape char for everything including itself. + * \endlist + * As the quoting levels are independent from each other and have different + * semantics, you need a command line like \c{"foo "\^"" bar"} to get + * \c{foo " bar}. + * + * \param cmd the command to split + * \param abortOnMeta see above + * \param err if not NULL, a status code will be stored at the pointer + * target, see \ref SplitError + * \param env if not NULL, perform variable substitution with the + * given environment. + * \return a list of unquoted words or an empty list if an error occurred + */ + +#ifdef Q_OS_WIN + +inline static bool isMetaChar(ushort c) +{ + static const uchar iqm[] = { + 0x00, 0x00, 0x00, 0x00, 0x40, 0x03, 0x00, 0x50, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 + }; // &()<>| + + return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); +} + +static void envExpand(QString &args, const Environment *env, const QString *pwd) +{ + static const QString cdName = QLatin1String("CD"); + int off = 0; + next: + for (int prev = -1, that; + (that = args.indexOf(QLatin1Char('%'), off)) >= 0; + prev = that, off = that + 1) { + if (prev >= 0) { + const QString var = args.mid(prev + 1, that - prev - 1).toUpper(); + const QString val = (var == cdName && pwd && !pwd->isEmpty()) + ? QDir::toNativeSeparators(*pwd) : env->value(var); + if (!val.isEmpty()) { // Empty values are impossible, so this is an existence check + args.replace(prev, that - prev + 1, val); + off = prev + val.length(); + goto next; + } + } + } +} + +QString QtcProcess::prepareArgs(const QString &_args, SplitError *err, + const Environment *env, const QString *pwd) +{ + QString args(_args); + + if (env) { + envExpand(args, env, pwd); + } else { + if (args.indexOf(QLatin1Char('%')) >= 0) { + if (err) + *err = FoundMeta; + return QString(); + } + } + + if (!args.isEmpty() && args.unicode()[0].unicode() == '@') + args.remove(0, 1); + + for (int p = 0; p < args.length(); p++) { + ushort c = args.unicode()[p].unicode(); + if (c == '^') { + args.remove(p, 1); + } else if (c == '"') { + do { + if (++p == args.length()) + break; // For cmd, this is no error. + } while (args.unicode()[p].unicode() != '"'); + } else if (isMetaChar(c)) { + if (err) + *err = FoundMeta; + return QString(); + } + } + + if (err) + *err = SplitOk; + return args; +} + +inline static bool isWhiteSpace(ushort c) +{ + return c == ' ' || c == '\t'; +} + +static QStringList doSplitArgs(const QString &args, QtcProcess::SplitError *err) +{ + QStringList ret; + + if (err) + *err = QtcProcess::SplitOk; + + int p = 0; + const int length = args.length(); + forever { + forever { + if (p == length) + return ret; + if (!isWhiteSpace(args.unicode()[p].unicode())) + break; + ++p; + } + + QString arg; + bool inquote = false; + forever { + bool copy = true; // copy this char + int bslashes = 0; // number of preceding backslashes to insert + while (p < length && args.unicode()[p] == QLatin1Char('\\')) { + ++p; + ++bslashes; + } + if (p < length && args.unicode()[p] == QLatin1Char('"')) { + if (!(bslashes & 1)) { + // Even number of backslashes, so the quote is not escaped. + if (inquote) { + if (p + 1 < length && args.unicode()[p + 1] == QLatin1Char('"')) { + // This is not documented on MSDN. + // Two consecutive quotes make a literal quote. Brain damage: + // this still closes the quoting, so a 3rd quote is required, + // which makes the runtime's quoting run out of sync with the + // shell's one unless the 2nd quote is escaped. + ++p; + } else { + // Closing quote + copy = false; + } + inquote = false; + } else { + // Opening quote + copy = false; + inquote = true; + } + } + bslashes >>= 1; + } + + while (--bslashes >= 0) + arg.append(QLatin1Char('\\')); + + if (p == length || (!inquote && isWhiteSpace(args.unicode()[p].unicode()))) { + ret.append(arg); + if (inquote) { + if (err) + *err = QtcProcess::BadQuoting; + return QStringList(); + } + break; + } + + if (copy) + arg.append(args.unicode()[p]); + ++p; + } + } + //not reached +} + +QStringList QtcProcess::splitArgs(const QString &_args, bool abortOnMeta, SplitError *err, + const Environment *env, const QString *pwd) +{ + if (abortOnMeta) { + SplitError perr; + if (!err) + err = &perr; + QString args = prepareArgs(_args, &perr, env, pwd); + if (*err != SplitOk) + return QStringList(); + return doSplitArgs(args, err); + } else { + QString args = _args; + if (env) + envExpand(args, env, pwd); + return doSplitArgs(args, err); + } +} + +#else // Q_OS_WIN + +inline static bool isQuoteMeta(QChar cUnicode) +{ + char c = cUnicode.toAscii(); + return c == '\\' || c == '\'' || c == '"' || c == '$'; +} + +inline static bool isMeta(QChar cUnicode) +{ + static const uchar iqm[] = { + 0x00, 0x00, 0x00, 0x00, 0xdc, 0x07, 0x00, 0xd8, + 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x38 + }; // \'"$`<>|;&(){}*?#[] + + uint c = cUnicode.unicode(); + + return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); +} + +QStringList QtcProcess::splitArgs(const QString &args, bool abortOnMeta, SplitError *err, + const Environment *env, const QString *pwd) +{ + static const QString pwdName = QLatin1String("PWD"); + QStringList ret; + + for (int pos = 0; ; ) { + QChar c; + do { + if (pos >= args.length()) + goto okret; + c = args.unicode()[pos++]; + } while (c.isSpace()); + QString cret; + bool hadWord = false; + do { + if (c == QLatin1Char('\'')) { + int spos = pos; + do { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + } while (c != QLatin1Char('\'')); + cret += args.mid(spos, pos-spos-1); + hadWord = true; + } else if (c == QLatin1Char('"')) { + for (;;) { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + nextq: + if (c == QLatin1Char('"')) + break; + if (c == QLatin1Char('\\')) { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + if (c != QLatin1Char('"') && + c != QLatin1Char('\\') && + !(abortOnMeta && + (c == QLatin1Char('$') || + c == QLatin1Char('`')))) + cret += QLatin1Char('\\'); + } else if (c == QLatin1Char('$') && env) { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + bool braced = false; + if (c == QLatin1Char('{')) { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + braced = true; + } + QString var; + while (c.isLetterOrNumber() || c == QLatin1Char('_')) { + var += c; + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + } + if (var == pwdName && pwd && !pwd->isEmpty()) { + cret += *pwd; + } else { + Environment::const_iterator vit = env->constFind(var); + if (vit == env->constEnd()) { + if (abortOnMeta) + goto metaerr; // Assume this is a shell builtin + } else { + cret += *vit; + } + } + if (!braced) + goto nextq; + if (c != QLatin1Char('}')) { + if (abortOnMeta) + goto metaerr; // Assume this is a complex expansion + goto quoteerr; // Otherwise it's just garbage + } + continue; + } else if (abortOnMeta && + (c == QLatin1Char('$') || + c == QLatin1Char('`'))) { + goto metaerr; + } + cret += c; + } + hadWord = true; + } else if (c == QLatin1Char('$') && env) { + if (pos >= args.length()) + goto quoteerr; // Bash just takes it verbatim, but whatever + c = args.unicode()[pos++]; + bool braced = false; + if (c == QLatin1Char('{')) { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + braced = true; + } + QString var; + while (c.isLetterOrNumber() || c == QLatin1Char('_')) { + var += c; + if (pos >= args.length()) { + if (braced) + goto quoteerr; + c = QLatin1Char(' '); + break; + } + c = args.unicode()[pos++]; + } + QString val; + if (var == pwdName && pwd && !pwd->isEmpty()) { + val = *pwd; + } else { + Environment::const_iterator vit = env->constFind(var); + if (vit == env->constEnd()) { + if (abortOnMeta) + goto metaerr; // Assume this is a shell builtin + } else { + val = *vit; + } + } + for (int i = 0; i < val.length(); i++) { + QChar cc = val.unicode()[i]; + if (cc == 9 || cc == 10 || cc == 32) { + if (hadWord) { + ret += cret; + cret.clear(); + hadWord = false; + } + } else { + cret += cc; + hadWord = true; + } + } + if (!braced) + goto nextc; + if (c != QLatin1Char('}')) { + if (abortOnMeta) + goto metaerr; // Assume this is a complex expansion + goto quoteerr; // Otherwise it's just garbage + } + } else { + if (c == QLatin1Char('\\')) { + if (pos >= args.length()) + goto quoteerr; + c = args.unicode()[pos++]; + } else if (abortOnMeta && isMeta(c)) { + goto metaerr; + } + cret += c; + hadWord = true; + } + if (pos >= args.length()) + break; + c = args.unicode()[pos++]; + nextc: ; + } while (!c.isSpace()); + if (hadWord) + ret += cret; + } + + okret: + if (err) + *err = SplitOk; + return ret; + + quoteerr: + if (err) + *err = BadQuoting; + return QStringList(); + + metaerr: + if (err) + *err = FoundMeta; + return QStringList(); +} + +#endif // Q_OS_WIN + +inline static bool isSpecialCharUnix(ushort c) +{ + // Chars that should be quoted (TM). This includes: + static const uchar iqm[] = { + 0xff, 0xff, 0xff, 0xff, 0xdf, 0x07, 0x00, 0xd8, + 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x78 + }; // 0-32 \'"$`<>|;&(){}*?#!~[] + + return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); +} + +inline static bool hasSpecialCharsUnix(const QString &arg) +{ + for (int x = arg.length() - 1; x >= 0; --x) + if (isSpecialCharUnix(arg.unicode()[x].unicode())) + return true; + return false; +} + +QString QtcProcess::quoteArgUnix(const QString &arg) +{ + if (!arg.length()) + return QString::fromLatin1("''"); + + QString ret(arg); + if (hasSpecialCharsUnix(ret)) { + ret.replace(QLatin1Char('\''), QLatin1String("'\\''")); + ret.prepend(QLatin1Char('\'')); + ret.append(QLatin1Char('\'')); + } + return ret; +} + +void QtcProcess::addArgUnix(QString *args, const QString &arg) +{ + if (!args->isEmpty()) + *args += QLatin1Char(' '); + *args += quoteArgUnix(arg); +} + +QString QtcProcess::joinArgsUnix(const QStringList &args) +{ + QString ret; + foreach (const QString &arg, args) + addArgUnix(&ret, arg); + return ret; +} + +#ifdef Q_OS_WIN +inline static bool isSpecialChar(ushort c) +{ + // Chars that should be quoted (TM). This includes: + // - control chars & space + // - the shell meta chars "&()<>^| + // - the potential separators ,;= + static const uchar iqm[] = { + 0xff, 0xff, 0xff, 0xff, 0x45, 0x13, 0x00, 0x78, + 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10 + }; + + return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); +} + +inline static bool hasSpecialChars(const QString &arg) +{ + for (int x = arg.length() - 1; x >= 0; --x) + if (isSpecialChar(arg.unicode()[x].unicode())) + return true; + return false; +} + +QString QtcProcess::quoteArg(const QString &arg) +{ + if (!arg.length()) + return QString::fromLatin1("\"\""); + + QString ret(arg); + if (hasSpecialChars(ret)) { + // Quotes are escaped and their preceding backslashes are doubled. + // It's impossible to escape anything inside a quoted string on cmd + // level, so the outer quoting must be "suspended". + ret.replace(QRegExp(QLatin1String("(\\\\*)\"")), QLatin1String("\"\\1\\1\\^\"\"")); + // The argument must not end with a \ since this would be interpreted + // as escaping the quote -- rather put the \ behind the quote: e.g. + // rather use "foo"\ than "foo\" + ret.replace(QRegExp(QLatin1String("(\\\\*)$")), QLatin1String("\"\\1")); + ret.prepend(QLatin1Char('"')); + } + // FIXME: Without this, quoting is not foolproof. But it needs support in the process setup, etc. + //ret.replace('%', QLatin1String("%PERCENT_SIGN%")); + return ret; +} + +void QtcProcess::addArg(QString *args, const QString &arg) +{ + if (!args->isEmpty()) + *args += QLatin1Char(' '); + *args += quoteArg(arg); +} + +QString QtcProcess::joinArgs(const QStringList &args) +{ + QString ret; + foreach (const QString &arg, args) + addArg(&ret, arg); + return ret; +} +#endif + +void QtcProcess::addArgs(QString *args, const QString &inArgs) +{ + if (!inArgs.isEmpty()) { + if (!args->isEmpty()) + *args += QLatin1Char(' '); + *args += inArgs; + } +} + +void QtcProcess::addArgs(QString *args, const QStringList &inArgs) +{ + foreach (const QString &arg, inArgs) + addArg(args, arg); +} + +#ifdef Q_OS_WIN +void QtcProcess::prepareCommand(const QString &command, const QString &arguments, + QString *outCmd, QString *outArgs, + const Environment *env, const QString *pwd) +{ + QtcProcess::SplitError err; + *outArgs = QtcProcess::prepareArgs(arguments, &err, env, pwd); + if (err == QtcProcess::SplitOk) { + *outCmd = command; + } else { + *outCmd = QString::fromLatin1(qgetenv("COMSPEC")); + *outArgs = QLatin1String("/v:off /s /c \"") + + quoteArg(QDir::toNativeSeparators(command)) + QLatin1Char(' ') + arguments + + QLatin1Char('"'); + } +} +#else +bool QtcProcess::prepareCommand(const QString &command, const QString &arguments, + QString *outCmd, QStringList *outArgs, + const Environment *env, const QString *pwd) +{ + QtcProcess::SplitError err; + *outArgs = QtcProcess::prepareArgs(arguments, &err, env, pwd); + if (err == QtcProcess::SplitOk) { + *outCmd = command; + } else { + if (err != QtcProcess::FoundMeta) + return false; + *outCmd = QLatin1String("/bin/sh"); + *outArgs << QLatin1String("-c") << (quoteArg(command) + QLatin1Char(' ') + arguments); + } + return true; +} +#endif + +void QtcProcess::start() +{ + Environment env; + if (m_haveEnv) { + env = m_environment; + QProcess::setEnvironment(env.toStringList()); + } else { + env = Environment::systemEnvironment(); + } + + const QString &workDir = workingDirectory(); + QString command; +#ifdef Q_OS_WIN + QString arguments; + prepareCommand(m_command, m_arguments, &command, &arguments, &env, &workDir); + setNativeArguments(arguments); + QProcess::start(command, QStringList()); +#else + QStringList arguments; + if (!prepareCommand(m_command, m_arguments, &command, &arguments, &env, &workDir)) { + setErrorString(tr("Error in command line.")); + // Should be FailedToStart, but we cannot set the process error from the outside, + // so it would be inconsistent. + emit error(QProcess::UnknownError); + return; + } + QProcess::start(command, arguments); +#endif +} + +#ifdef Q_OS_WIN + +// This function assumes that the resulting string will be quoted. +// That's irrelevant if it does not contain quotes itself. +static int quoteArgInternal(QString &ret, int bslashes) +{ + // Quotes are escaped and their preceding backslashes are doubled. + // It's impossible to escape anything inside a quoted string on cmd + // level, so the outer quoting must be "suspended". + const QChar bs(QLatin1Char('\\')), dq(QLatin1Char('"')); + for (int p = 0; p < ret.length(); p++) { + if (ret.at(p) == bs) { + bslashes++; + } else { + if (ret.at(p) == dq) { + if (bslashes) { + ret.insert(p, QString(bslashes, bs)); + p += bslashes; + } + ret.insert(p, QLatin1String("\"\\^\"")); + p += 4; + } + bslashes = 0; + } + } + return bslashes; +} + +#else + +// The main state of the Unix shell parser +enum MxQuoting { MxBasic, MxSingleQuote, MxDoubleQuote, MxParen, MxSubst, MxGroup, MxMath }; +typedef struct { + MxQuoting current; + // Bizarrely enough, double quoting has an impact on the behavior of some + // complex expressions within the quoted string. + bool dquote; +} MxState; +Q_DECLARE_TYPEINFO(MxState, Q_PRIMITIVE_TYPE); + +// Pushed state for the case where a $(()) expansion turns out bogus +typedef struct { + QString str; + int pos, varPos; +} MxSave; +Q_DECLARE_TYPEINFO(MxSave, Q_MOVABLE_TYPE); + +#include <QtCore/QStack> + +#endif + +// TODO: This documentation is relevant for end-users. Where to put it? +/** + * Perform safe macro expansion (substitution) on a string for use + * in shell commands. + * + * \section Unix notes + * + * Explicitly supported shell constructs: + * \\ '' "" {} () $(()) ${} $() `` + * + * Implicitly supported shell constructs: + * (()) + * + * Unsupported shell constructs that will cause problems: + * \list + * \li Shortened \c{case $v in pat)} syntax. Use \c{case $v in (pat)} instead. + * \li Bash-style \c{$""} and \c{$''} string quoting syntax. + * \endlist + * + * The rest of the shell (incl. bash) syntax is simply ignored, + * as it is not expected to cause problems. + * + * Security considerations: + * \list + * \li Backslash-escaping an expando is treated as a quoting error + * \li Do not put expandos into double quoted substitutions: + * \badcode + * "${VAR:-%{macro}}" + * \endcode + * \li Do not put expandos into command line arguments which are nested + * shell commands: + * \badcode + * sh -c 'foo \%{file}' + * \endcode + * \goodcode + * file=\%{file} sh -c 'foo "$file"' + * \endcode + * \endlist + * + * \section Windows notes + * + * All quoting syntax supported by splitArgs() is supported here as well. + * Additionally, command grouping via parentheses is recognized - note + * however, that the parser is much stricter about unquoted parentheses + * than cmd itself. + * The rest of the cmd syntax is simply ignored, as it is not expected + * to cause problems. + * + * Security considerations: + * \list + * \li Circumflex-escaping an expando is treated as a quoting error + * \li Closing double quotes right before expandos and opening double quotes + * right after expandos are treated as quoting errors + * \li Do not put expandos into nested commands: + * \badcode + * for /f "usebackq" \%v in (`foo \%{file}`) do \@echo \%v + * \endcode + * \li A macro's value must not contain anything which may be interpreted + * as an environment variable expansion. A solution is replacing any + * percent signs with a fixed string like \c{\%PERCENT_SIGN\%} and + * injecting \c{PERCENT_SIGN=\%} into the shell's environment. + * \li Enabling delayed environment variable expansion (cmd /v:on) should have + * no security implications, but may still wreak havoc due to the + * need for doubling circumflexes if any exclamation marks are present, + * and the need to circumflex-escape the exclamation marks themselves. + * \endlist + * + * \param cmd pointer to the string in which macros are expanded in-place + * \param mx pointer to a macro expander instance + * \return false if the string could not be parsed and therefore no safe + * substitution was possible + */ +bool QtcProcess::expandMacros(QString *cmd, AbstractMacroExpander *mx) +{ + QString str = *cmd; + if (str.isEmpty()) + return true; + + QString rsts; + int varLen; + int varPos = 0; + if (!(varLen = mx->findMacro(str, &varPos, &rsts))) + return true; + + int pos = 0; + +#ifdef Q_OS_WIN + enum { // cmd.exe parsing state + ShellBasic, // initial state + ShellQuoted, // double-quoted state => *no* other meta chars are interpreted + ShellEscaped // circumflex-escaped state => next char is not interpreted + } shellState = ShellBasic; + enum { // CommandLineToArgv() parsing state and some more + CrtBasic, // initial state + CrtNeedWord, // after empty expando; insert empty argument if whitespace follows + CrtInWord, // in non-whitespace + CrtClosed, // previous char closed the double-quoting + CrtHadQuote, // closed double-quoting after an expando + // The remaining two need to be numerically higher + CrtQuoted, // double-quoted state => spaces don't split tokens + CrtNeedQuote // expando opened quote; close if no expando follows + } crtState = CrtBasic; + int bslashes = 0; // previous chars were manual backslashes + int rbslashes = 0; // trailing backslashes in replacement + + forever { + if (pos == varPos) { + if (shellState == ShellEscaped) + return false; // Circumflex'd quoted expando would be Bad (TM). + if ((shellState == ShellQuoted) != (crtState == CrtQuoted)) + return false; // CRT quoting out of sync with shell quoting. Ahoy to Redmond. + rbslashes += bslashes; + bslashes = 0; + if (crtState < CrtQuoted) { + if (rsts.isEmpty()) { + if (crtState == CrtBasic) { + // Outside any quoting and the string is empty, so put + // a pair of quotes. Delaying that is just pedantry. + crtState = CrtNeedWord; + } + } else { + if (hasSpecialChars(rsts)) { + if (crtState == CrtClosed) { + // Quoted expando right after closing quote. Can't do that. + return false; + } + int tbslashes = quoteArgInternal(rsts, 0); + rsts.prepend(QLatin1Char('"')); + if (rbslashes) + rsts.prepend(QString(rbslashes, QLatin1Char('\\'))); + crtState = CrtNeedQuote; + rbslashes = tbslashes; + } else { + crtState = CrtInWord; // We know that this string contains no spaces. + // We know that this string contains no quotes, + // so the function won't make a mess. + rbslashes = quoteArgInternal(rsts, rbslashes); + } + } + } else { + rbslashes = quoteArgInternal(rsts, rbslashes); + } + str.replace(pos, varLen, rsts); + pos += rsts.length(); + varPos = pos; + if (!(varLen = mx->findMacro(str, &varPos, &rsts))) { + // Don't leave immediately, as we may be in CrtNeedWord state which could + // be still resolved, or we may have inserted trailing backslashes. + varPos = INT_MAX; + } + continue; + } + if (crtState == CrtNeedQuote) { + if (rbslashes) { + str.insert(pos, QString(rbslashes, QLatin1Char('\\'))); + pos += rbslashes; + varPos += rbslashes; + rbslashes = 0; + } + str.insert(pos, QLatin1Char('"')); + pos++; + varPos++; + crtState = CrtHadQuote; + } + ushort cc = str.unicode()[pos].unicode(); + if (shellState == ShellBasic && cc == '^') { + shellState = ShellEscaped; + } else { + if (!cc || cc == ' ' || cc == '\t') { + if (crtState < CrtQuoted) { + if (crtState == CrtNeedWord) { + str.insert(pos, QLatin1String("\"\"")); + pos += 2; + varPos += 2; + } + crtState = CrtBasic; + } + if (!cc) + break; + bslashes = 0; + rbslashes = 0; + } else { + if (cc == '\\') { + bslashes++; + if (crtState < CrtQuoted) + crtState = CrtInWord; + } else { + if (cc == '"') { + if (shellState != ShellEscaped) + shellState = (shellState == ShellQuoted) ? ShellBasic : ShellQuoted; + if (rbslashes) { + // Offset -1: skip possible circumflex. We have at least + // one backslash, so a fixed offset is ok. + str.insert(pos - 1, QString(rbslashes, QLatin1Char('\\'))); + pos += rbslashes; + varPos += rbslashes; + } + if (!(bslashes & 1)) { + // Even number of backslashes, so the quote is not escaped. + switch (crtState) { + case CrtQuoted: + // Closing quote + crtState = CrtClosed; + break; + case CrtClosed: + // Two consecutive quotes make a literal quote - and + // still close quoting. See QtcProcess::quoteArg(). + crtState = CrtInWord; + break; + case CrtHadQuote: + // Opening quote right after quoted expando. Can't do that. + return false; + default: + // Opening quote + crtState = CrtQuoted; + break; + } + } else if (crtState < CrtQuoted) { + crtState = CrtInWord; + } + } else if (crtState < CrtQuoted) { + crtState = CrtInWord; + } + bslashes = 0; + rbslashes = 0; + } + } + if (varPos == INT_MAX && !rbslashes) + break; + if (shellState == ShellEscaped) + shellState = ShellBasic; + } + pos++; + } +#else + MxState state = { MxBasic, false }; + QStack<MxState> sstack; + QStack<MxSave> ostack; + + while (pos < str.length()) { + if (pos == varPos) { + // Our expansion rules trigger in any context + if (state.dquote) { + // We are within a double-quoted string. Escape relevant meta characters. + rsts.replace(QRegExp(QLatin1String("([$`\"\\\\])")), QLatin1String("\\\\1")); + } else if (state.current == MxSingleQuote) { + // We are within a single-quoted string. "Suspend" single-quoting and put a + // single escaped quote for each single quote inside the string. + rsts.replace(QLatin1Char('\''), QLatin1String("'\\''")); + } else if (rsts.isEmpty() || hasSpecialCharsUnix(rsts)) { + // String contains "quote-worthy" characters. Use single quoting - but + // that choice is arbitrary. + rsts.replace(QLatin1Char('\''), QLatin1String("'\\''")); + rsts.prepend(QLatin1Char('\'')); + rsts.append(QLatin1Char('\'')); + } // Else just use the string verbatim. + str.replace(pos, varLen, rsts); + pos += rsts.length(); + varPos = pos; + if (!(varLen = mx->findMacro(str, &varPos, &rsts))) + break; + continue; + } + ushort cc = str.unicode()[pos].unicode(); + if (state.current == MxSingleQuote) { + // Single quoted context - only the single quote has any special meaning. + if (cc == '\'') + state = sstack.pop(); + } else if (cc == '\\') { + // In any other context, the backslash starts an escape. + pos += 2; + if (varPos < pos) + return false; // Backslash'd quoted expando would be Bad (TM). + continue; + } else if (cc == '$') { + cc = str.unicode()[++pos].unicode(); + if (cc == '(') { + sstack.push(state); + if (str.unicode()[pos + 1].unicode() == '(') { + // $(( starts a math expression. This may also be a $( ( in fact, + // so we push the current string and offset on a stack so we can retry. + MxSave sav = { str, pos + 2, varPos }; + ostack.push(sav); + state.current = MxMath; + pos += 2; + continue; + } else { + // $( starts a command substitution. This actually "opens a new context" + // which overrides surrounding double quoting. + state.current = MxParen; + state.dquote = false; + } + } else if (cc == '{') { + // ${ starts a "braced" variable substitution. + sstack.push(state); + state.current = MxSubst; + } // Else assume that a "bare" variable substitution has started + } else if (cc == '`') { + // Backticks are evil, as every shell interprets escapes within them differently, + // which is a danger for the quoting of our own expansions. + // So we just apply *our* rules (which match bash) and transform it into a POSIX + // command substitution which has clear semantics. + str.replace(pos, 1, QLatin1String("$( " )); // add space -> avoid creating $(( + varPos += 2; + int pos2 = pos += 3; + forever { + if (pos2 >= str.length()) + return false; // Syntax error - unterminated backtick expression. + cc = str.unicode()[pos2].unicode(); + if (cc == '`') + break; + if (cc == '\\') { + cc = str.unicode()[++pos2].unicode(); + if (cc == '$' || cc == '`' || cc == '\\' || + (cc == '"' && state.dquote)) + { + str.remove(pos2 - 1, 1); + if (varPos >= pos2) + varPos--; + continue; + } + } + pos2++; + } + str[pos2] = QLatin1Char(')'); + sstack.push(state); + state.current = MxParen; + state.dquote = false; + continue; + } else if (state.current == MxDoubleQuote) { + // (Truly) double quoted context - only remaining special char is the closing quote. + if (cc == '"') + state = sstack.pop(); + } else if (cc == '\'') { + // Start single quote if we are not in "inherited" double quoted context. + if (!state.dquote) { + sstack.push(state); + state.current = MxSingleQuote; + } + } else if (cc == '"') { + // Same for double quoting. + if (!state.dquote) { + sstack.push(state); + state.current = MxDoubleQuote; + state.dquote = true; + } + } else if (state.current == MxSubst) { + // "Braced" substitution context - only remaining special char is the closing brace. + if (cc == '}') + state = sstack.pop(); + } else if (cc == ')') { + if (state.current == MxMath) { + if (str.unicode()[pos + 1].unicode() == ')') { + state = sstack.pop(); + pos += 2; + } else { + // False hit: the $(( was a $( ( in fact. + // ash does not care (and will complain), but bash actually parses it. + varPos = ostack.top().varPos; + pos = ostack.top().pos; + str = ostack.top().str; + ostack.pop(); + state.current = MxParen; + state.dquote = false; + sstack.push(state); + } + continue; + } else if (state.current == MxParen) { + state = sstack.pop(); + } else { + break; // Syntax error - excess closing parenthesis. + } + } else if (cc == '}') { + if (state.current == MxGroup) + state = sstack.pop(); + else + break; // Syntax error - excess closing brace. + } else if (cc == '(') { + // Context-saving command grouping. + sstack.push(state); + state.current = MxParen; + } else if (cc == '{') { + // Plain command grouping. + sstack.push(state); + state.current = MxGroup; + } + pos++; + } + // FIXME? May complain if (!sstack.empty()), but we don't really care anyway. +#endif + + *cmd = str; + return true; +} + +QString QtcProcess::expandMacros(const QString &str, AbstractMacroExpander *mx) +{ + QString ret = str; + expandMacros(&ret, mx); + return ret; +} + +bool QtcProcess::ArgIterator::next() +{ + // We delay the setting of m_prev so we can still delete the last argument + // after we find that there are no more arguments. It's a bit of a hack ... + int prev = m_pos; + + m_simple = true; + m_value.clear(); + +#ifdef Q_OS_WIN + enum { // cmd.exe parsing state + ShellBasic, // initial state + ShellQuoted, // double-quoted state => *no* other meta chars are interpreted + ShellEscaped // circumflex-escaped state => next char is not interpreted + } shellState = ShellBasic; + enum { // CommandLineToArgv() parsing state and some more + CrtBasic, // initial state + CrtInWord, // in non-whitespace + CrtClosed, // previous char closed the double-quoting + CrtQuoted // double-quoted state => spaces don't split tokens + } crtState = CrtBasic; + enum { NoVar, NewVar, FullVar } varState = NoVar; // inside a potential env variable expansion + int bslashes = 0; // number of preceding backslashes + + for (;; m_pos++) { + ushort cc = m_pos < m_str->length() ? m_str->unicode()[m_pos].unicode() : 0; + if (shellState == ShellBasic && cc == '^') { + varState = NoVar; + shellState = ShellEscaped; + } else if ((shellState == ShellBasic && isMetaChar(cc)) || !cc) { // A "bit" simplistic ... + // We ignore crtQuote state here. Whatever ... + doReturn: + if (m_simple) + while (--bslashes >= 0) + m_value += QLatin1Char('\\'); + else + m_value.clear(); + if (crtState != CrtBasic) { + m_prev = prev; + return true; + } + return false; + } else { + if (crtState != CrtQuoted && (cc == ' ' || cc == '\t')) { + if (crtState != CrtBasic) { + // We'll lose shellQuote state here. Whatever ... + goto doReturn; + } + } else { + if (cc == '\\') { + bslashes++; + if (crtState != CrtQuoted) + crtState = CrtInWord; + varState = NoVar; + } else { + if (cc == '"') { + varState = NoVar; + if (shellState != ShellEscaped) + shellState = (shellState == ShellQuoted) ? ShellBasic : ShellQuoted; + int obslashes = bslashes; + bslashes >>= 1; + if (!(obslashes & 1)) { + // Even number of backslashes, so the quote is not escaped. + switch (crtState) { + case CrtQuoted: + // Closing quote + crtState = CrtClosed; + continue; + case CrtClosed: + // Two consecutive quotes make a literal quote - and + // still close quoting. See quoteArg(). + crtState = CrtInWord; + break; + default: + // Opening quote + crtState = CrtQuoted; + continue; + } + } else if (crtState != CrtQuoted) { + crtState = CrtInWord; + } + } else { + if (cc == '%') { + if (varState == FullVar) { + m_simple = false; + varState = NoVar; + } else { + varState = NewVar; + } + } else if (varState != NoVar) { + // This check doesn't really reflect cmd reality, but it is an + // approximation of what would be sane. + varState = (cc == '_' || cc == '-' || cc == '.' + || QChar(cc).isLetterOrNumber()) ? FullVar : NoVar; + + } + if (crtState != CrtQuoted) + crtState = CrtInWord; + } + for (; bslashes > 0; bslashes--) + m_value += QLatin1Char('\\'); + m_value += QChar(cc); + } + } + if (shellState == ShellEscaped) + shellState = ShellBasic; + } + } +#else + MxState state = { MxBasic, false }; + QStack<MxState> sstack; + QStack<int> ostack; + bool hadWord = false; + + for (; m_pos < m_str->length(); m_pos++) { + ushort cc = m_str->unicode()[m_pos].unicode(); + if (state.current == MxSingleQuote) { + if (cc == '\'') { + state = sstack.pop(); + continue; + } + } else if (cc == '\\') { + if (++m_pos >= m_str->length()) + break; + cc = m_str->unicode()[m_pos].unicode(); + if (state.dquote && cc != '"' && cc != '\\' && cc != '$' && cc != '`') + m_value += QLatin1Char('\\'); + } else if (cc == '$') { + if (++m_pos >= m_str->length()) + break; + cc = m_str->unicode()[m_pos].unicode(); + if (cc == '(') { + sstack.push(state); + if (++m_pos >= m_str->length()) + break; + if (m_str->unicode()[m_pos].unicode() == '(') { + ostack.push(m_pos); + state.current = MxMath; + } else { + state.dquote = false; + state.current = MxParen; + // m_pos too far by one now - whatever. + } + } else if (cc == '{') { + sstack.push(state); + state.current = MxSubst; + } else { + // m_pos too far by one now - whatever. + } + m_simple = false; + hadWord = true; + continue; + } else if (cc == '`') { + forever { + if (++m_pos >= m_str->length()) { + m_simple = false; + m_prev = prev; + return true; + } + cc = m_str->unicode()[m_pos].unicode(); + if (cc == '`') + break; + if (cc == '\\') + m_pos++; // m_pos may be too far by one now - whatever. + } + m_simple = false; + hadWord = true; + continue; + } else if (state.current == MxDoubleQuote) { + if (cc == '"') { + state = sstack.pop(); + continue; + } + } else if (cc == '\'') { + if (!state.dquote) { + sstack.push(state); + state.current = MxSingleQuote; + hadWord = true; + continue; + } + } else if (cc == '"') { + if (!state.dquote) { + sstack.push(state); + state.dquote = true; + state.current = MxDoubleQuote; + hadWord = true; + continue; + } + } else if (state.current == MxSubst) { + if (cc == '}') + state = sstack.pop(); + continue; // Not simple anyway + } else if (cc == ')') { + if (state.current == MxMath) { + if (++m_pos >= m_str->length()) + break; + if (m_str->unicode()[m_pos].unicode() == ')') { + ostack.pop(); + state = sstack.pop(); + } else { + // false hit: the $(( was a $( ( in fact. + // ash does not care, but bash does. + m_pos = ostack.pop(); + state.current = MxParen; + state.dquote = false; + sstack.push(state); + } + continue; + } else if (state.current == MxParen) { + state = sstack.pop(); + continue; + } else { + break; + } +#if 0 // MxGroup is impossible, see below. + } else if (cc == '}') { + if (state.current == MxGroup) { + state = sstack.pop(); + continue; + } +#endif + } else if (cc == '(') { + sstack.push(state); + state.current = MxParen; + m_simple = false; + hadWord = true; +#if 0 // Should match only at the beginning of a command, which we never have currently. + } else if (cc == '{') { + sstack.push(state); + state.current = MxGroup; + m_simple = false; + hadWord = true; + continue; +#endif + } else if (cc == '<' || cc == '>' || cc == '&' || cc == '|' || cc == ';') { + if (sstack.isEmpty()) + break; + } else if (cc == ' ' || cc == '\t') { + if (!hadWord) + continue; + if (sstack.isEmpty()) + break; + } + m_value += QChar(cc); + hadWord = true; + } + // TODO: Possibly complain here if (!sstack.empty()) + if (!m_simple) + m_value.clear(); + if (hadWord) { + m_prev = prev; + return true; + } + return false; +#endif +} + +void QtcProcess::ArgIterator::deleteArg() +{ + if (!m_prev) + while (m_pos < m_str->length() && m_str->at(m_pos).isSpace()) + m_pos++; + m_str->remove(m_prev, m_pos - m_prev); + m_pos = m_prev; +} + +void QtcProcess::ArgIterator::appendArg(const QString &str) +{ + const QString qstr = quoteArg(str); + if (!m_pos) + m_str->insert(0, qstr + QLatin1Char(' ')); + else + m_str->insert(m_pos, QLatin1Char(' ') + qstr); + m_pos += qstr.length() + 1; +} diff --git a/src/libs/utils/qtcprocess.h b/src/libs/utils/qtcprocess.h new file mode 100644 index 00000000000..e7439d8c27c --- /dev/null +++ b/src/libs/utils/qtcprocess.h @@ -0,0 +1,149 @@ +/************************************************************************** +** +** This file is part of Qt Creator +** +** Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +** +** Contact: Nokia Corporation ([email protected]) +** +** Commercial Usage +** +** Licensees holding valid Qt Commercial licenses may use this file in +** accordance with the Qt Commercial License Agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and Nokia. +** +** GNU Lesser General Public License Usage +** +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at http://qt.nokia.com/contact. +** +**************************************************************************/ + +#ifndef QTCPROCESS_H +#define QTCPROCESS_H + +#include <QProcess> + +#include "utils_global.h" + +#include "environment.h" + +namespace Utils { +class AbstractMacroExpander; + +/*! + This class provides functionality for dealing with shell-quoted process arguments. +*/ +class QTCREATOR_UTILS_EXPORT QtcProcess : public QProcess +{ + Q_OBJECT + +public: + QtcProcess(QObject *parent = 0) : QProcess(parent), m_haveEnv(false) {} + void setEnvironment(const Environment &env) + { m_environment = env; m_haveEnv = true; } + void setCommand(const QString &command, const QString &arguments) + { m_command = command; m_arguments = arguments; } + void start(); + + enum SplitError { + SplitOk = 0, //! All went just fine + BadQuoting, //! Command contains quoting errors + FoundMeta //! Command contains complex shell constructs + }; + + //! Quote a single argument for usage in a unix shell command + static QString quoteArgUnix(const QString &arg); + //! Quote a single argument and append it to a unix shell command + static void addArgUnix(QString *args, const QString &arg); + //! Join an argument list into a unix shell command + static QString joinArgsUnix(const QStringList &args); +#ifdef Q_OS_WIN + //! Quote a single argument for usage in a shell command + static QString quoteArg(const QString &arg); + //! Quote a single argument and append it to a shell command + static void addArg(QString *args, const QString &arg); + //! Join an argument list into a shell command + static QString joinArgs(const QStringList &args); + //! Prepare argument of a shell command for feeding into QProcess + static QString prepareArgs(const QString &cmd, SplitError *err, + const Environment *env = 0, const QString *pwd = 0); + //! Prepare a shell command for feeding into QProcess + static void prepareCommand(const QString &command, const QString &arguments, + QString *outCmd, QString *outArgs, + const Environment *env = 0, const QString *pwd = 0); +#else + static QString quoteArg(const QString &arg) { return quoteArgUnix(arg); } + static void addArg(QString *args, const QString &arg) { addArgUnix(args, arg); } + static QString joinArgs(const QStringList &args) { return joinArgsUnix(args); } + static QStringList prepareArgs(const QString &cmd, SplitError *err, + const Environment *env = 0, const QString *pwd = 0) + { return splitArgs(cmd, true, err, env, pwd); } + static bool prepareCommand(const QString &command, const QString &arguments, + QString *outCmd, QStringList *outArgs, + const Environment *env = 0, const QString *pwd = 0); +#endif + //! Quote and append each argument to a shell command + static void addArgs(QString *args, const QStringList &inArgs); + //! Append already quoted arguments to a shell command + static void addArgs(QString *args, const QString &inArgs); + //! Split a shell command into separate arguments. ArgIterator is usually a better choice. + static QStringList splitArgs(const QString &cmd, bool abortOnMeta = false, SplitError *err = 0, + const Environment *env = 0, const QString *pwd = 0); + //! Safely replace the expandos in a shell command + static bool expandMacros(QString *cmd, AbstractMacroExpander *mx); + static QString expandMacros(const QString &str, AbstractMacroExpander *mx); + + /*! Iterate over arguments from a command line. + * Assumes that the name of the actual command is *not* part of the line. + * Terminates after the first command if the command line is complex. + */ + class QTCREATOR_UTILS_EXPORT ArgIterator { + public: + ArgIterator(QString *str) : m_str(str), m_pos(0), m_prev(-1) {} + //! Get the next argument. Returns false on encountering end of first command. + bool next(); + //! True iff the argument is a plain string, possibly after unquoting. + bool isSimple() const { return m_simple; } + //! Return the string value of the current argument if it is simple, otherwise empty. + QString value() const { return m_value; } + //! Delete the last argument fetched via next() from the command line. + void deleteArg(); + //! Insert argument into the command line after the last one fetched via next(). + //! This may be used before the first call to next() to insert at the front. + void appendArg(const QString &str); + private: + QString *m_str, m_value; + int m_pos, m_prev; + bool m_simple; + }; + + class QTCREATOR_UTILS_EXPORT ConstArgIterator { + public: + ConstArgIterator(const QString &str) : m_str(str), m_ait(&m_str) {} + bool next() { return m_ait.next(); } + bool isSimple() const { return m_ait.isSimple(); } + QString value() const { return m_ait.value(); } + private: + QString m_str; + ArgIterator m_ait; + }; + +private: + QString m_command; + QString m_arguments; + Environment m_environment; + bool m_haveEnv; +}; + +} + +#endif // QTCPROCESS_H diff --git a/src/libs/utils/utils-lib.pri b/src/libs/utils/utils-lib.pri index aa2e65cf11d..245bb7ecdd2 100644 --- a/src/libs/utils/utils-lib.pri +++ b/src/libs/utils/utils-lib.pri @@ -8,6 +8,7 @@ INCLUDEPATH += $$PWD QT += network SOURCES += $$PWD/environment.cpp \ + $$PWD/qtcprocess.cpp \ $$PWD/reloadpromptutils.cpp \ $$PWD/stringutils.cpp \ $$PWD/filesearch.cpp \ @@ -66,6 +67,7 @@ unix:!macx { SOURCES += $$PWD/unixutils.cpp } HEADERS += $$PWD/environment.h \ + $$PWD/qtcprocess.h \ $$PWD/utils_global.h \ $$PWD/reloadpromptutils.h \ $$PWD/stringutils.h \ |