/************************************************************************** ** ** This file is part of Qt Creator ** ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies). ** ** Contact: Nokia Corporation (qt-info@nokia.com) ** ** No Commercial Usage ** ** This file contains pre-release code and may not be distributed. ** You may use this file in accordance with the terms and conditions ** contained in the Technology Preview License Agreement accompanying ** this package. ** ** GNU Lesser General Public License Usage ** ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 2.1 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 2.1 requirements ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** In addition, as a special exception, Nokia gives you certain additional ** rights. These rights are described in the Nokia Qt LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** If you have questions regarding the use of this file, please contact ** Nokia at qt-info@nokia.com. ** **************************************************************************/ #include "qtcprocess.h" #include "stringutils.h" #ifdef Q_OS_WIN #include #endif using namespace Utils; /** * \fn QStringList QtcProcess::splitArgs( * const QString &args, bool abortOnMeta, SplitError *err, const Environment *env) * * Splits \a args according to system shell word splitting and quoting rules. * * \section Unix * * The behavior is based on the POSIX shell and bash: * \list * \li Whitespace splits tokens * \li The backslash quotes the following character * \li A string enclosed in single quotes is not split. No shell meta * characters are interpreted. * \li A string enclosed in double quotes is not split. Within the string, * the backslash quotes shell meta characters - if it is followed * by a "meaningless" character, the backslash is output verbatim. * \list * If \a abortOnMeta is \c false, only the splitting and quoting rules apply, * while other meta characters (substitutions, redirections, etc.) are ignored. * If \a abortOnMeta is \c true, encounters of unhandled meta characters are * treated as errors. * * \section Windows * * The behavior is defined by the Microsoft C runtime: * \list * \li Whitespace splits tokens * \li A string enclosed in double quotes is not split * \list * \li 3N double quotes within a quoted string yield N literal quotes. * This is not documented on MSDN. * \endlist * \li Backslashes have special semantics iff they are followed by a double * quote: * \list * \li 2N backslashes + double quote => N backslashes and begin/end quoting * \li 2N+1 backslashes + double quote => N backslashes + literal quote * \endlist * \endlist * Qt and many other implementations comply with this standard, but many do not. * * If \a abortOnMeta is \c true, cmd shell semantics are applied before * proceeding with word splitting: * \list * \li Cmd ignores \em all special chars between double quotes. * Note that the quotes are \em not removed at this stage - the * tokenization rules described above still apply. * \li The \c circumflex is the escape char for everything including itself. * \endlist * As the quoting levels are independent from each other and have different * semantics, you need a command line like \c{"foo "\^"" bar"} to get * \c{foo " bar}. * * \param cmd the command to split * \param abortOnMeta see above * \param err if not NULL, a status code will be stored at the pointer * target, see \ref SplitError * \param env if not NULL, perform variable substitution with the * given environment. * \return a list of unquoted words or an empty list if an error occurred */ #ifdef Q_OS_WIN inline static bool isMetaChar(ushort c) { static const uchar iqm[] = { 0x00, 0x00, 0x00, 0x00, 0x40, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; // &()<>| return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } static void envExpand(QString &args, const Environment *env, const QString *pwd) { static const QString cdName = QLatin1String("CD"); int off = 0; next: for (int prev = -1, that; (that = args.indexOf(QLatin1Char('%'), off)) >= 0; prev = that, off = that + 1) { if (prev >= 0) { const QString var = args.mid(prev + 1, that - prev - 1).toUpper(); const QString val = (var == cdName && pwd && !pwd->isEmpty()) ? QDir::toNativeSeparators(*pwd) : env->value(var); if (!val.isEmpty()) { // Empty values are impossible, so this is an existence check args.replace(prev, that - prev + 1, val); off = prev + val.length(); goto next; } } } } QString QtcProcess::prepareArgs(const QString &_args, SplitError *err, const Environment *env, const QString *pwd) { QString args(_args); if (env) { envExpand(args, env, pwd); } else { if (args.indexOf(QLatin1Char('%')) >= 0) { if (err) *err = FoundMeta; return QString(); } } if (!args.isEmpty() && args.unicode()[0].unicode() == '@') args.remove(0, 1); for (int p = 0; p < args.length(); p++) { ushort c = args.unicode()[p].unicode(); if (c == '^') { args.remove(p, 1); } else if (c == '"') { do { if (++p == args.length()) break; // For cmd, this is no error. } while (args.unicode()[p].unicode() != '"'); } else if (isMetaChar(c)) { if (err) *err = FoundMeta; return QString(); } } if (err) *err = SplitOk; return args; } inline static bool isWhiteSpace(ushort c) { return c == ' ' || c == '\t'; } static QStringList doSplitArgs(const QString &args, QtcProcess::SplitError *err) { QStringList ret; if (err) *err = QtcProcess::SplitOk; int p = 0; const int length = args.length(); forever { forever { if (p == length) return ret; if (!isWhiteSpace(args.unicode()[p].unicode())) break; ++p; } QString arg; bool inquote = false; forever { bool copy = true; // copy this char int bslashes = 0; // number of preceding backslashes to insert while (p < length && args.unicode()[p] == QLatin1Char('\\')) { ++p; ++bslashes; } if (p < length && args.unicode()[p] == QLatin1Char('"')) { if (!(bslashes & 1)) { // Even number of backslashes, so the quote is not escaped. if (inquote) { if (p + 1 < length && args.unicode()[p + 1] == QLatin1Char('"')) { // This is not documented on MSDN. // Two consecutive quotes make a literal quote. Brain damage: // this still closes the quoting, so a 3rd quote is required, // which makes the runtime's quoting run out of sync with the // shell's one unless the 2nd quote is escaped. ++p; } else { // Closing quote copy = false; } inquote = false; } else { // Opening quote copy = false; inquote = true; } } bslashes >>= 1; } while (--bslashes >= 0) arg.append(QLatin1Char('\\')); if (p == length || (!inquote && isWhiteSpace(args.unicode()[p].unicode()))) { ret.append(arg); if (inquote) { if (err) *err = QtcProcess::BadQuoting; return QStringList(); } break; } if (copy) arg.append(args.unicode()[p]); ++p; } } //not reached } QStringList QtcProcess::splitArgs(const QString &_args, bool abortOnMeta, SplitError *err, const Environment *env, const QString *pwd) { if (abortOnMeta) { SplitError perr; if (!err) err = &perr; QString args = prepareArgs(_args, &perr, env, pwd); if (*err != SplitOk) return QStringList(); return doSplitArgs(args, err); } else { QString args = _args; if (env) envExpand(args, env, pwd); return doSplitArgs(args, err); } } #else // Q_OS_WIN inline static bool isQuoteMeta(QChar cUnicode) { char c = cUnicode.toAscii(); return c == '\\' || c == '\'' || c == '"' || c == '$'; } inline static bool isMeta(QChar cUnicode) { static const uchar iqm[] = { 0x00, 0x00, 0x00, 0x00, 0xdc, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x38 }; // \'"$`<>|;&(){}*?#[] uint c = cUnicode.unicode(); return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } QStringList QtcProcess::splitArgs(const QString &args, bool abortOnMeta, SplitError *err, const Environment *env, const QString *pwd) { static const QString pwdName = QLatin1String("PWD"); QStringList ret; for (int pos = 0; ; ) { QChar c; do { if (pos >= args.length()) goto okret; c = args.unicode()[pos++]; } while (c.isSpace()); QString cret; bool hadWord = false; do { if (c == QLatin1Char('\'')) { int spos = pos; do { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; } while (c != QLatin1Char('\'')); cret += args.mid(spos, pos-spos-1); hadWord = true; } else if (c == QLatin1Char('"')) { for (;;) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; nextq: if (c == QLatin1Char('"')) break; if (c == QLatin1Char('\\')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; if (c != QLatin1Char('"') && c != QLatin1Char('\\') && !(abortOnMeta && (c == QLatin1Char('$') || c == QLatin1Char('`')))) cret += QLatin1Char('\\'); } else if (c == QLatin1Char('$') && env) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; bool braced = false; if (c == QLatin1Char('{')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; braced = true; } QString var; while (c.isLetterOrNumber() || c == QLatin1Char('_')) { var += c; if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; } if (var == pwdName && pwd && !pwd->isEmpty()) { cret += *pwd; } else { Environment::const_iterator vit = env->constFind(var); if (vit == env->constEnd()) { if (abortOnMeta) goto metaerr; // Assume this is a shell builtin } else { cret += *vit; } } if (!braced) goto nextq; if (c != QLatin1Char('}')) { if (abortOnMeta) goto metaerr; // Assume this is a complex expansion goto quoteerr; // Otherwise it's just garbage } continue; } else if (abortOnMeta && (c == QLatin1Char('$') || c == QLatin1Char('`'))) { goto metaerr; } cret += c; } hadWord = true; } else if (c == QLatin1Char('$') && env) { if (pos >= args.length()) goto quoteerr; // Bash just takes it verbatim, but whatever c = args.unicode()[pos++]; bool braced = false; if (c == QLatin1Char('{')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; braced = true; } QString var; while (c.isLetterOrNumber() || c == QLatin1Char('_')) { var += c; if (pos >= args.length()) { if (braced) goto quoteerr; c = QLatin1Char(' '); break; } c = args.unicode()[pos++]; } QString val; if (var == pwdName && pwd && !pwd->isEmpty()) { val = *pwd; } else { Environment::const_iterator vit = env->constFind(var); if (vit == env->constEnd()) { if (abortOnMeta) goto metaerr; // Assume this is a shell builtin } else { val = *vit; } } for (int i = 0; i < val.length(); i++) { QChar cc = val.unicode()[i]; if (cc == 9 || cc == 10 || cc == 32) { if (hadWord) { ret += cret; cret.clear(); hadWord = false; } } else { cret += cc; hadWord = true; } } if (!braced) goto nextc; if (c != QLatin1Char('}')) { if (abortOnMeta) goto metaerr; // Assume this is a complex expansion goto quoteerr; // Otherwise it's just garbage } } else { if (c == QLatin1Char('\\')) { if (pos >= args.length()) goto quoteerr; c = args.unicode()[pos++]; } else if (abortOnMeta && isMeta(c)) { goto metaerr; } cret += c; hadWord = true; } if (pos >= args.length()) break; c = args.unicode()[pos++]; nextc: ; } while (!c.isSpace()); if (hadWord) ret += cret; } okret: if (err) *err = SplitOk; return ret; quoteerr: if (err) *err = BadQuoting; return QStringList(); metaerr: if (err) *err = FoundMeta; return QStringList(); } #endif // Q_OS_WIN inline static bool isSpecialCharUnix(ushort c) { // Chars that should be quoted (TM). This includes: static const uchar iqm[] = { 0xff, 0xff, 0xff, 0xff, 0xdf, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x78 }; // 0-32 \'"$`<>|;&(){}*?#!~[] return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } inline static bool hasSpecialCharsUnix(const QString &arg) { for (int x = arg.length() - 1; x >= 0; --x) if (isSpecialCharUnix(arg.unicode()[x].unicode())) return true; return false; } QString QtcProcess::quoteArgUnix(const QString &arg) { if (!arg.length()) return QString::fromLatin1("''"); QString ret(arg); if (hasSpecialCharsUnix(ret)) { ret.replace(QLatin1Char('\''), QLatin1String("'\\''")); ret.prepend(QLatin1Char('\'')); ret.append(QLatin1Char('\'')); } return ret; } void QtcProcess::addArgUnix(QString *args, const QString &arg) { if (!args->isEmpty()) *args += QLatin1Char(' '); *args += quoteArgUnix(arg); } QString QtcProcess::joinArgsUnix(const QStringList &args) { QString ret; foreach (const QString &arg, args) addArgUnix(&ret, arg); return ret; } #ifdef Q_OS_WIN inline static bool isSpecialChar(ushort c) { // Chars that should be quoted (TM). This includes: // - control chars & space // - the shell meta chars "&()<>^| // - the potential separators ,;= static const uchar iqm[] = { 0xff, 0xff, 0xff, 0xff, 0x45, 0x13, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10 }; return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); } inline static bool hasSpecialChars(const QString &arg) { for (int x = arg.length() - 1; x >= 0; --x) if (isSpecialChar(arg.unicode()[x].unicode())) return true; return false; } QString QtcProcess::quoteArg(const QString &arg) { if (!arg.length()) return QString::fromLatin1("\"\""); QString ret(arg); if (hasSpecialChars(ret)) { // Quotes are escaped and their preceding backslashes are doubled. // It's impossible to escape anything inside a quoted string on cmd // level, so the outer quoting must be "suspended". ret.replace(QRegExp(QLatin1String("(\\\\*)\"")), QLatin1String("\"\\1\\1\\^\"\"")); // The argument must not end with a \ since this would be interpreted // as escaping the quote -- rather put the \ behind the quote: e.g. // rather use "foo"\ than "foo\" ret.replace(QRegExp(QLatin1String("(\\\\*)$")), QLatin1String("\"\\1")); ret.prepend(QLatin1Char('"')); } // FIXME: Without this, quoting is not foolproof. But it needs support in the process setup, etc. //ret.replace('%', QLatin1String("%PERCENT_SIGN%")); return ret; } void QtcProcess::addArg(QString *args, const QString &arg) { if (!args->isEmpty()) *args += QLatin1Char(' '); *args += quoteArg(arg); } QString QtcProcess::joinArgs(const QStringList &args) { QString ret; foreach (const QString &arg, args) addArg(&ret, arg); return ret; } #endif void QtcProcess::addArgs(QString *args, const QString &inArgs) { if (!inArgs.isEmpty()) { if (!args->isEmpty()) *args += QLatin1Char(' '); *args += inArgs; } } void QtcProcess::addArgs(QString *args, const QStringList &inArgs) { foreach (const QString &arg, inArgs) addArg(args, arg); } #ifdef Q_OS_WIN void QtcProcess::prepareCommand(const QString &command, const QString &arguments, QString *outCmd, QString *outArgs, const Environment *env, const QString *pwd) { QtcProcess::SplitError err; *outArgs = QtcProcess::prepareArgs(arguments, &err, env, pwd); if (err == QtcProcess::SplitOk) { *outCmd = command; } else { *outCmd = QString::fromLatin1(qgetenv("COMSPEC")); *outArgs = QLatin1String("/v:off /s /c \"") + quoteArg(QDir::toNativeSeparators(command)) + QLatin1Char(' ') + arguments + QLatin1Char('"'); } } #else bool QtcProcess::prepareCommand(const QString &command, const QString &arguments, QString *outCmd, QStringList *outArgs, const Environment *env, const QString *pwd) { QtcProcess::SplitError err; *outArgs = QtcProcess::prepareArgs(arguments, &err, env, pwd); if (err == QtcProcess::SplitOk) { *outCmd = command; } else { if (err != QtcProcess::FoundMeta) return false; *outCmd = QLatin1String("/bin/sh"); *outArgs << QLatin1String("-c") << (quoteArg(command) + QLatin1Char(' ') + arguments); } return true; } #endif void QtcProcess::start() { Environment env; if (m_haveEnv) { env = m_environment; QProcess::setEnvironment(env.toStringList()); } else { env = Environment::systemEnvironment(); } const QString &workDir = workingDirectory(); QString command; #ifdef Q_OS_WIN QString arguments; prepareCommand(m_command, m_arguments, &command, &arguments, &env, &workDir); setNativeArguments(arguments); QProcess::start(command, QStringList()); #else QStringList arguments; if (!prepareCommand(m_command, m_arguments, &command, &arguments, &env, &workDir)) { setErrorString(tr("Error in command line.")); // Should be FailedToStart, but we cannot set the process error from the outside, // so it would be inconsistent. emit error(QProcess::UnknownError); return; } QProcess::start(command, arguments); #endif } #ifdef Q_OS_WIN // This function assumes that the resulting string will be quoted. // That's irrelevant if it does not contain quotes itself. static int quoteArgInternal(QString &ret, int bslashes) { // Quotes are escaped and their preceding backslashes are doubled. // It's impossible to escape anything inside a quoted string on cmd // level, so the outer quoting must be "suspended". const QChar bs(QLatin1Char('\\')), dq(QLatin1Char('"')); for (int p = 0; p < ret.length(); p++) { if (ret.at(p) == bs) { bslashes++; } else { if (ret.at(p) == dq) { if (bslashes) { ret.insert(p, QString(bslashes, bs)); p += bslashes; } ret.insert(p, QLatin1String("\"\\^\"")); p += 4; } bslashes = 0; } } return bslashes; } #else // The main state of the Unix shell parser enum MxQuoting { MxBasic, MxSingleQuote, MxDoubleQuote, MxParen, MxSubst, MxGroup, MxMath }; typedef struct { MxQuoting current; // Bizarrely enough, double quoting has an impact on the behavior of some // complex expressions within the quoted string. bool dquote; } MxState; QT_BEGIN_NAMESPACE Q_DECLARE_TYPEINFO(MxState, Q_PRIMITIVE_TYPE); QT_END_NAMESPACE // Pushed state for the case where a $(()) expansion turns out bogus typedef struct { QString str; int pos, varPos; } MxSave; QT_BEGIN_NAMESPACE Q_DECLARE_TYPEINFO(MxSave, Q_MOVABLE_TYPE); QT_END_NAMESPACE #include #endif // TODO: This documentation is relevant for end-users. Where to put it? /** * Perform safe macro expansion (substitution) on a string for use * in shell commands. * * \section Unix notes * * Explicitly supported shell constructs: * \\ '' "" {} () $(()) ${} $() `` * * Implicitly supported shell constructs: * (()) * * Unsupported shell constructs that will cause problems: * \list * \li Shortened \c{case $v in pat)} syntax. Use \c{case $v in (pat)} instead. * \li Bash-style \c{$""} and \c{$''} string quoting syntax. * \endlist * * The rest of the shell (incl. bash) syntax is simply ignored, * as it is not expected to cause problems. * * Security considerations: * \list * \li Backslash-escaping an expando is treated as a quoting error * \li Do not put expandos into double quoted substitutions: * \badcode * "${VAR:-%{macro}}" * \endcode * \li Do not put expandos into command line arguments which are nested * shell commands: * \badcode * sh -c 'foo \%{file}' * \endcode * \goodcode * file=\%{file} sh -c 'foo "$file"' * \endcode * \endlist * * \section Windows notes * * All quoting syntax supported by splitArgs() is supported here as well. * Additionally, command grouping via parentheses is recognized - note * however, that the parser is much stricter about unquoted parentheses * than cmd itself. * The rest of the cmd syntax is simply ignored, as it is not expected * to cause problems. * * Security considerations: * \list * \li Circumflex-escaping an expando is treated as a quoting error * \li Closing double quotes right before expandos and opening double quotes * right after expandos are treated as quoting errors * \li Do not put expandos into nested commands: * \badcode * for /f "usebackq" \%v in (`foo \%{file}`) do \@echo \%v * \endcode * \li A macro's value must not contain anything which may be interpreted * as an environment variable expansion. A solution is replacing any * percent signs with a fixed string like \c{\%PERCENT_SIGN\%} and * injecting \c{PERCENT_SIGN=\%} into the shell's environment. * \li Enabling delayed environment variable expansion (cmd /v:on) should have * no security implications, but may still wreak havoc due to the * need for doubling circumflexes if any exclamation marks are present, * and the need to circumflex-escape the exclamation marks themselves. * \endlist * * \param cmd pointer to the string in which macros are expanded in-place * \param mx pointer to a macro expander instance * \return false if the string could not be parsed and therefore no safe * substitution was possible */ bool QtcProcess::expandMacros(QString *cmd, AbstractMacroExpander *mx) { QString str = *cmd; if (str.isEmpty()) return true; QString rsts; int varLen; int varPos = 0; if (!(varLen = mx->findMacro(str, &varPos, &rsts))) return true; int pos = 0; #ifdef Q_OS_WIN enum { // cmd.exe parsing state ShellBasic, // initial state ShellQuoted, // double-quoted state => *no* other meta chars are interpreted ShellEscaped // circumflex-escaped state => next char is not interpreted } shellState = ShellBasic; enum { // CommandLineToArgv() parsing state and some more CrtBasic, // initial state CrtNeedWord, // after empty expando; insert empty argument if whitespace follows CrtInWord, // in non-whitespace CrtClosed, // previous char closed the double-quoting CrtHadQuote, // closed double-quoting after an expando // The remaining two need to be numerically higher CrtQuoted, // double-quoted state => spaces don't split tokens CrtNeedQuote // expando opened quote; close if no expando follows } crtState = CrtBasic; int bslashes = 0; // previous chars were manual backslashes int rbslashes = 0; // trailing backslashes in replacement forever { if (pos == varPos) { if (shellState == ShellEscaped) return false; // Circumflex'd quoted expando would be Bad (TM). if ((shellState == ShellQuoted) != (crtState == CrtQuoted)) return false; // CRT quoting out of sync with shell quoting. Ahoy to Redmond. rbslashes += bslashes; bslashes = 0; if (crtState < CrtQuoted) { if (rsts.isEmpty()) { if (crtState == CrtBasic) { // Outside any quoting and the string is empty, so put // a pair of quotes. Delaying that is just pedantry. crtState = CrtNeedWord; } } else { if (hasSpecialChars(rsts)) { if (crtState == CrtClosed) { // Quoted expando right after closing quote. Can't do that. return false; } int tbslashes = quoteArgInternal(rsts, 0); rsts.prepend(QLatin1Char('"')); if (rbslashes) rsts.prepend(QString(rbslashes, QLatin1Char('\\'))); crtState = CrtNeedQuote; rbslashes = tbslashes; } else { crtState = CrtInWord; // We know that this string contains no spaces. // We know that this string contains no quotes, // so the function won't make a mess. rbslashes = quoteArgInternal(rsts, rbslashes); } } } else { rbslashes = quoteArgInternal(rsts, rbslashes); } str.replace(pos, varLen, rsts); pos += rsts.length(); varPos = pos; if (!(varLen = mx->findMacro(str, &varPos, &rsts))) { // Don't leave immediately, as we may be in CrtNeedWord state which could // be still resolved, or we may have inserted trailing backslashes. varPos = INT_MAX; } continue; } if (crtState == CrtNeedQuote) { if (rbslashes) { str.insert(pos, QString(rbslashes, QLatin1Char('\\'))); pos += rbslashes; varPos += rbslashes; rbslashes = 0; } str.insert(pos, QLatin1Char('"')); pos++; varPos++; crtState = CrtHadQuote; } ushort cc = str.unicode()[pos].unicode(); if (shellState == ShellBasic && cc == '^') { shellState = ShellEscaped; } else { if (!cc || cc == ' ' || cc == '\t') { if (crtState < CrtQuoted) { if (crtState == CrtNeedWord) { str.insert(pos, QLatin1String("\"\"")); pos += 2; varPos += 2; } crtState = CrtBasic; } if (!cc) break; bslashes = 0; rbslashes = 0; } else { if (cc == '\\') { bslashes++; if (crtState < CrtQuoted) crtState = CrtInWord; } else { if (cc == '"') { if (shellState != ShellEscaped) shellState = (shellState == ShellQuoted) ? ShellBasic : ShellQuoted; if (rbslashes) { // Offset -1: skip possible circumflex. We have at least // one backslash, so a fixed offset is ok. str.insert(pos - 1, QString(rbslashes, QLatin1Char('\\'))); pos += rbslashes; varPos += rbslashes; } if (!(bslashes & 1)) { // Even number of backslashes, so the quote is not escaped. switch (crtState) { case CrtQuoted: // Closing quote crtState = CrtClosed; break; case CrtClosed: // Two consecutive quotes make a literal quote - and // still close quoting. See QtcProcess::quoteArg(). crtState = CrtInWord; break; case CrtHadQuote: // Opening quote right after quoted expando. Can't do that. return false; default: // Opening quote crtState = CrtQuoted; break; } } else if (crtState < CrtQuoted) { crtState = CrtInWord; } } else if (crtState < CrtQuoted) { crtState = CrtInWord; } bslashes = 0; rbslashes = 0; } } if (varPos == INT_MAX && !rbslashes) break; if (shellState == ShellEscaped) shellState = ShellBasic; } pos++; } #else MxState state = { MxBasic, false }; QStack sstack; QStack ostack; while (pos < str.length()) { if (pos == varPos) { // Our expansion rules trigger in any context if (state.dquote) { // We are within a double-quoted string. Escape relevant meta characters. rsts.replace(QRegExp(QLatin1String("([$`\"\\\\])")), QLatin1String("\\\\1")); } else if (state.current == MxSingleQuote) { // We are within a single-quoted string. "Suspend" single-quoting and put a // single escaped quote for each single quote inside the string. rsts.replace(QLatin1Char('\''), QLatin1String("'\\''")); } else if (rsts.isEmpty() || hasSpecialCharsUnix(rsts)) { // String contains "quote-worthy" characters. Use single quoting - but // that choice is arbitrary. rsts.replace(QLatin1Char('\''), QLatin1String("'\\''")); rsts.prepend(QLatin1Char('\'')); rsts.append(QLatin1Char('\'')); } // Else just use the string verbatim. str.replace(pos, varLen, rsts); pos += rsts.length(); varPos = pos; if (!(varLen = mx->findMacro(str, &varPos, &rsts))) break; continue; } ushort cc = str.unicode()[pos].unicode(); if (state.current == MxSingleQuote) { // Single quoted context - only the single quote has any special meaning. if (cc == '\'') state = sstack.pop(); } else if (cc == '\\') { // In any other context, the backslash starts an escape. pos += 2; if (varPos < pos) return false; // Backslash'd quoted expando would be Bad (TM). continue; } else if (cc == '$') { cc = str.unicode()[++pos].unicode(); if (cc == '(') { sstack.push(state); if (str.unicode()[pos + 1].unicode() == '(') { // $(( starts a math expression. This may also be a $( ( in fact, // so we push the current string and offset on a stack so we can retry. MxSave sav = { str, pos + 2, varPos }; ostack.push(sav); state.current = MxMath; pos += 2; continue; } else { // $( starts a command substitution. This actually "opens a new context" // which overrides surrounding double quoting. state.current = MxParen; state.dquote = false; } } else if (cc == '{') { // ${ starts a "braced" variable substitution. sstack.push(state); state.current = MxSubst; } // Else assume that a "bare" variable substitution has started } else if (cc == '`') { // Backticks are evil, as every shell interprets escapes within them differently, // which is a danger for the quoting of our own expansions. // So we just apply *our* rules (which match bash) and transform it into a POSIX // command substitution which has clear semantics. str.replace(pos, 1, QLatin1String("$( " )); // add space -> avoid creating $(( varPos += 2; int pos2 = pos += 3; forever { if (pos2 >= str.length()) return false; // Syntax error - unterminated backtick expression. cc = str.unicode()[pos2].unicode(); if (cc == '`') break; if (cc == '\\') { cc = str.unicode()[++pos2].unicode(); if (cc == '$' || cc == '`' || cc == '\\' || (cc == '"' && state.dquote)) { str.remove(pos2 - 1, 1); if (varPos >= pos2) varPos--; continue; } } pos2++; } str[pos2] = QLatin1Char(')'); sstack.push(state); state.current = MxParen; state.dquote = false; continue; } else if (state.current == MxDoubleQuote) { // (Truly) double quoted context - only remaining special char is the closing quote. if (cc == '"') state = sstack.pop(); } else if (cc == '\'') { // Start single quote if we are not in "inherited" double quoted context. if (!state.dquote) { sstack.push(state); state.current = MxSingleQuote; } } else if (cc == '"') { // Same for double quoting. if (!state.dquote) { sstack.push(state); state.current = MxDoubleQuote; state.dquote = true; } } else if (state.current == MxSubst) { // "Braced" substitution context - only remaining special char is the closing brace. if (cc == '}') state = sstack.pop(); } else if (cc == ')') { if (state.current == MxMath) { if (str.unicode()[pos + 1].unicode() == ')') { state = sstack.pop(); pos += 2; } else { // False hit: the $(( was a $( ( in fact. // ash does not care (and will complain), but bash actually parses it. varPos = ostack.top().varPos; pos = ostack.top().pos; str = ostack.top().str; ostack.pop(); state.current = MxParen; state.dquote = false; sstack.push(state); } continue; } else if (state.current == MxParen) { state = sstack.pop(); } else { break; // Syntax error - excess closing parenthesis. } } else if (cc == '}') { if (state.current == MxGroup) state = sstack.pop(); else break; // Syntax error - excess closing brace. } else if (cc == '(') { // Context-saving command grouping. sstack.push(state); state.current = MxParen; } else if (cc == '{') { // Plain command grouping. sstack.push(state); state.current = MxGroup; } pos++; } // FIXME? May complain if (!sstack.empty()), but we don't really care anyway. #endif *cmd = str; return true; } QString QtcProcess::expandMacros(const QString &str, AbstractMacroExpander *mx) { QString ret = str; expandMacros(&ret, mx); return ret; } bool QtcProcess::ArgIterator::next() { // We delay the setting of m_prev so we can still delete the last argument // after we find that there are no more arguments. It's a bit of a hack ... int prev = m_pos; m_simple = true; m_value.clear(); #ifdef Q_OS_WIN enum { // cmd.exe parsing state ShellBasic, // initial state ShellQuoted, // double-quoted state => *no* other meta chars are interpreted ShellEscaped // circumflex-escaped state => next char is not interpreted } shellState = ShellBasic; enum { // CommandLineToArgv() parsing state and some more CrtBasic, // initial state CrtInWord, // in non-whitespace CrtClosed, // previous char closed the double-quoting CrtQuoted // double-quoted state => spaces don't split tokens } crtState = CrtBasic; enum { NoVar, NewVar, FullVar } varState = NoVar; // inside a potential env variable expansion int bslashes = 0; // number of preceding backslashes for (;; m_pos++) { ushort cc = m_pos < m_str->length() ? m_str->unicode()[m_pos].unicode() : 0; if (shellState == ShellBasic && cc == '^') { varState = NoVar; shellState = ShellEscaped; } else if ((shellState == ShellBasic && isMetaChar(cc)) || !cc) { // A "bit" simplistic ... // We ignore crtQuote state here. Whatever ... doReturn: if (m_simple) while (--bslashes >= 0) m_value += QLatin1Char('\\'); else m_value.clear(); if (crtState != CrtBasic) { m_prev = prev; return true; } return false; } else { if (crtState != CrtQuoted && (cc == ' ' || cc == '\t')) { if (crtState != CrtBasic) { // We'll lose shellQuote state here. Whatever ... goto doReturn; } } else { if (cc == '\\') { bslashes++; if (crtState != CrtQuoted) crtState = CrtInWord; varState = NoVar; } else { if (cc == '"') { varState = NoVar; if (shellState != ShellEscaped) shellState = (shellState == ShellQuoted) ? ShellBasic : ShellQuoted; int obslashes = bslashes; bslashes >>= 1; if (!(obslashes & 1)) { // Even number of backslashes, so the quote is not escaped. switch (crtState) { case CrtQuoted: // Closing quote crtState = CrtClosed; continue; case CrtClosed: // Two consecutive quotes make a literal quote - and // still close quoting. See quoteArg(). crtState = CrtInWord; break; default: // Opening quote crtState = CrtQuoted; continue; } } else if (crtState != CrtQuoted) { crtState = CrtInWord; } } else { if (cc == '%') { if (varState == FullVar) { m_simple = false; varState = NoVar; } else { varState = NewVar; } } else if (varState != NoVar) { // This check doesn't really reflect cmd reality, but it is an // approximation of what would be sane. varState = (cc == '_' || cc == '-' || cc == '.' || QChar(cc).isLetterOrNumber()) ? FullVar : NoVar; } if (crtState != CrtQuoted) crtState = CrtInWord; } for (; bslashes > 0; bslashes--) m_value += QLatin1Char('\\'); m_value += QChar(cc); } } if (shellState == ShellEscaped) shellState = ShellBasic; } } #else MxState state = { MxBasic, false }; QStack sstack; QStack ostack; bool hadWord = false; for (; m_pos < m_str->length(); m_pos++) { ushort cc = m_str->unicode()[m_pos].unicode(); if (state.current == MxSingleQuote) { if (cc == '\'') { state = sstack.pop(); continue; } } else if (cc == '\\') { if (++m_pos >= m_str->length()) break; cc = m_str->unicode()[m_pos].unicode(); if (state.dquote && cc != '"' && cc != '\\' && cc != '$' && cc != '`') m_value += QLatin1Char('\\'); } else if (cc == '$') { if (++m_pos >= m_str->length()) break; cc = m_str->unicode()[m_pos].unicode(); if (cc == '(') { sstack.push(state); if (++m_pos >= m_str->length()) break; if (m_str->unicode()[m_pos].unicode() == '(') { ostack.push(m_pos); state.current = MxMath; } else { state.dquote = false; state.current = MxParen; // m_pos too far by one now - whatever. } } else if (cc == '{') { sstack.push(state); state.current = MxSubst; } else { // m_pos too far by one now - whatever. } m_simple = false; hadWord = true; continue; } else if (cc == '`') { forever { if (++m_pos >= m_str->length()) { m_simple = false; m_prev = prev; return true; } cc = m_str->unicode()[m_pos].unicode(); if (cc == '`') break; if (cc == '\\') m_pos++; // m_pos may be too far by one now - whatever. } m_simple = false; hadWord = true; continue; } else if (state.current == MxDoubleQuote) { if (cc == '"') { state = sstack.pop(); continue; } } else if (cc == '\'') { if (!state.dquote) { sstack.push(state); state.current = MxSingleQuote; hadWord = true; continue; } } else if (cc == '"') { if (!state.dquote) { sstack.push(state); state.dquote = true; state.current = MxDoubleQuote; hadWord = true; continue; } } else if (state.current == MxSubst) { if (cc == '}') state = sstack.pop(); continue; // Not simple anyway } else if (cc == ')') { if (state.current == MxMath) { if (++m_pos >= m_str->length()) break; if (m_str->unicode()[m_pos].unicode() == ')') { ostack.pop(); state = sstack.pop(); } else { // false hit: the $(( was a $( ( in fact. // ash does not care, but bash does. m_pos = ostack.pop(); state.current = MxParen; state.dquote = false; sstack.push(state); } continue; } else if (state.current == MxParen) { state = sstack.pop(); continue; } else { break; } #if 0 // MxGroup is impossible, see below. } else if (cc == '}') { if (state.current == MxGroup) { state = sstack.pop(); continue; } #endif } else if (cc == '(') { sstack.push(state); state.current = MxParen; m_simple = false; hadWord = true; #if 0 // Should match only at the beginning of a command, which we never have currently. } else if (cc == '{') { sstack.push(state); state.current = MxGroup; m_simple = false; hadWord = true; continue; #endif } else if (cc == '<' || cc == '>' || cc == '&' || cc == '|' || cc == ';') { if (sstack.isEmpty()) break; } else if (cc == ' ' || cc == '\t') { if (!hadWord) continue; if (sstack.isEmpty()) break; } m_value += QChar(cc); hadWord = true; } // TODO: Possibly complain here if (!sstack.empty()) if (!m_simple) m_value.clear(); if (hadWord) { m_prev = prev; return true; } return false; #endif } void QtcProcess::ArgIterator::deleteArg() { if (!m_prev) while (m_pos < m_str->length() && m_str->at(m_pos).isSpace()) m_pos++; m_str->remove(m_prev, m_pos - m_prev); m_pos = m_prev; } void QtcProcess::ArgIterator::appendArg(const QString &str) { const QString qstr = quoteArg(str); if (!m_pos) m_str->insert(0, qstr + QLatin1Char(' ')); else m_str->insert(m_pos, QLatin1Char(' ') + qstr); m_pos += qstr.length() + 1; }