| 1 | #include <sys/types.h> |
| 2 | #include <sys/wait.h> |
| 3 | #include <fcntl.h> |
| 4 | #include <dlfcn.h> |
| 5 | #include <Common/Exception.h> |
| 6 | #include <Common/ShellCommand.h> |
| 7 | #include <Common/PipeFDs.h> |
| 8 | #include <common/logger_useful.h> |
| 9 | #include <IO/WriteHelpers.h> |
| 10 | #include <port/unistd.h> |
| 11 | #include <csignal> |
| 12 | |
| 13 | namespace |
| 14 | { |
| 15 | /// By these return codes from the child process, we learn (for sure) about errors when creating it. |
| 16 | enum class ReturnCodes : int |
| 17 | { |
| 18 | CANNOT_DUP_STDIN = 0x55555555, /// The value is not important, but it is chosen so that it's rare to conflict with the program return code. |
| 19 | CANNOT_DUP_STDOUT = 0x55555556, |
| 20 | CANNOT_DUP_STDERR = 0x55555557, |
| 21 | CANNOT_EXEC = 0x55555558, |
| 22 | }; |
| 23 | } |
| 24 | |
| 25 | namespace DB |
| 26 | { |
| 27 | |
| 28 | namespace ErrorCodes |
| 29 | { |
| 30 | extern const int CANNOT_DLSYM; |
| 31 | extern const int CANNOT_FORK; |
| 32 | extern const int CANNOT_WAITPID; |
| 33 | extern const int CHILD_WAS_NOT_EXITED_NORMALLY; |
| 34 | extern const int CANNOT_CREATE_CHILD_PROCESS; |
| 35 | } |
| 36 | |
| 37 | ShellCommand::ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_) |
| 38 | : pid(pid_) |
| 39 | , terminate_in_destructor(terminate_in_destructor_) |
| 40 | , log(&Poco::Logger::get("ShellCommand" )) |
| 41 | , in(in_fd_) |
| 42 | , out(out_fd_) |
| 43 | , err(err_fd_) {} |
| 44 | |
| 45 | ShellCommand::~ShellCommand() |
| 46 | { |
| 47 | if (terminate_in_destructor) |
| 48 | { |
| 49 | int retcode = kill(pid, SIGTERM); |
| 50 | if (retcode != 0) |
| 51 | LOG_WARNING(log, "Cannot kill pid " << pid << " errno '" << errnoToString(retcode) << "'" ); |
| 52 | } |
| 53 | else if (!wait_called) |
| 54 | tryWait(); |
| 55 | } |
| 56 | |
| 57 | std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor) |
| 58 | { |
| 59 | /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, |
| 60 | * because of the resolving of characters in the shared library |
| 61 | * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html |
| 62 | * Therefore, separate the resolving of the symbol from the call. |
| 63 | */ |
| 64 | static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork" ); |
| 65 | |
| 66 | if (!real_vfork) |
| 67 | throwFromErrno("Cannot find symbol vfork in myself" , ErrorCodes::CANNOT_DLSYM); |
| 68 | |
| 69 | PipeFDs pipe_stdin; |
| 70 | PipeFDs pipe_stdout; |
| 71 | PipeFDs pipe_stderr; |
| 72 | |
| 73 | pid_t pid = reinterpret_cast<pid_t(*)()>(real_vfork)(); |
| 74 | |
| 75 | if (-1 == pid) |
| 76 | throwFromErrno("Cannot vfork" , ErrorCodes::CANNOT_FORK); |
| 77 | |
| 78 | if (0 == pid) |
| 79 | { |
| 80 | /// We are in the freshly created process. |
| 81 | |
| 82 | /// Why `_exit` and not `exit`? Because `exit` calls `atexit` and destructors of thread local storage. |
| 83 | /// And there is a lot of garbage (including, for example, mutex is blocked). And this can not be done after `vfork` - deadlock happens. |
| 84 | |
| 85 | /// Replace the file descriptors with the ends of our pipes. |
| 86 | if (STDIN_FILENO != dup2(pipe_stdin.fds_rw[0], STDIN_FILENO)) |
| 87 | _exit(int(ReturnCodes::CANNOT_DUP_STDIN)); |
| 88 | |
| 89 | if (!pipe_stdin_only) |
| 90 | { |
| 91 | if (STDOUT_FILENO != dup2(pipe_stdout.fds_rw[1], STDOUT_FILENO)) |
| 92 | _exit(int(ReturnCodes::CANNOT_DUP_STDOUT)); |
| 93 | |
| 94 | if (STDERR_FILENO != dup2(pipe_stderr.fds_rw[1], STDERR_FILENO)) |
| 95 | _exit(int(ReturnCodes::CANNOT_DUP_STDERR)); |
| 96 | } |
| 97 | |
| 98 | execv(filename, argv); |
| 99 | /// If the process is running, then `execv` does not return here. |
| 100 | |
| 101 | _exit(int(ReturnCodes::CANNOT_EXEC)); |
| 102 | } |
| 103 | |
| 104 | std::unique_ptr<ShellCommand> res(new ShellCommand(pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor)); |
| 105 | |
| 106 | /// Now the ownership of the file descriptors is passed to the result. |
| 107 | pipe_stdin.fds_rw[1] = -1; |
| 108 | pipe_stdout.fds_rw[0] = -1; |
| 109 | pipe_stderr.fds_rw[0] = -1; |
| 110 | |
| 111 | return res; |
| 112 | } |
| 113 | |
| 114 | |
| 115 | std::unique_ptr<ShellCommand> ShellCommand::execute(const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor) |
| 116 | { |
| 117 | /// Arguments in non-constant chunks of memory (as required for `execv`). |
| 118 | /// Moreover, their copying must be done before calling `vfork`, so after `vfork` do a minimum of things. |
| 119 | std::vector<char> argv0("sh" , &("sh" [3])); |
| 120 | std::vector<char> argv1("-c" , &("-c" [3])); |
| 121 | std::vector<char> argv2(command.data(), command.data() + command.size() + 1); |
| 122 | |
| 123 | char * const argv[] = { argv0.data(), argv1.data(), argv2.data(), nullptr }; |
| 124 | |
| 125 | return executeImpl("/bin/sh" , argv, pipe_stdin_only, terminate_in_destructor); |
| 126 | } |
| 127 | |
| 128 | |
| 129 | std::unique_ptr<ShellCommand> ShellCommand::executeDirect(const std::string & path, const std::vector<std::string> & arguments, bool terminate_in_destructor) |
| 130 | { |
| 131 | size_t argv_sum_size = path.size() + 1; |
| 132 | for (const auto & arg : arguments) |
| 133 | argv_sum_size += arg.size() + 1; |
| 134 | |
| 135 | std::vector<char *> argv(arguments.size() + 2); |
| 136 | std::vector<char> argv_data(argv_sum_size); |
| 137 | WriteBuffer writer(argv_data.data(), argv_sum_size); |
| 138 | |
| 139 | argv[0] = writer.position(); |
| 140 | writer.write(path.data(), path.size() + 1); |
| 141 | |
| 142 | for (size_t i = 0, size = arguments.size(); i < size; ++i) |
| 143 | { |
| 144 | argv[i + 1] = writer.position(); |
| 145 | writer.write(arguments[i].data(), arguments[i].size() + 1); |
| 146 | } |
| 147 | |
| 148 | argv[arguments.size() + 1] = nullptr; |
| 149 | |
| 150 | return executeImpl(path.data(), argv.data(), false, terminate_in_destructor); |
| 151 | } |
| 152 | |
| 153 | |
| 154 | int ShellCommand::tryWait() |
| 155 | { |
| 156 | wait_called = true; |
| 157 | |
| 158 | int status = 0; |
| 159 | if (-1 == waitpid(pid, &status, 0)) |
| 160 | throwFromErrno("Cannot waitpid" , ErrorCodes::CANNOT_WAITPID); |
| 161 | |
| 162 | if (WIFEXITED(status)) |
| 163 | return WEXITSTATUS(status); |
| 164 | |
| 165 | if (WIFSIGNALED(status)) |
| 166 | throw Exception("Child process was terminated by signal " + toString(WTERMSIG(status)), ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY); |
| 167 | |
| 168 | if (WIFSTOPPED(status)) |
| 169 | throw Exception("Child process was stopped by signal " + toString(WSTOPSIG(status)), ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY); |
| 170 | |
| 171 | throw Exception("Child process was not exited normally by unknown reason" , ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY); |
| 172 | } |
| 173 | |
| 174 | |
| 175 | void ShellCommand::wait() |
| 176 | { |
| 177 | int retcode = tryWait(); |
| 178 | |
| 179 | if (retcode != EXIT_SUCCESS) |
| 180 | { |
| 181 | switch (retcode) |
| 182 | { |
| 183 | case int(ReturnCodes::CANNOT_DUP_STDIN): |
| 184 | throw Exception("Cannot dup2 stdin of child process" , ErrorCodes::CANNOT_CREATE_CHILD_PROCESS); |
| 185 | case int(ReturnCodes::CANNOT_DUP_STDOUT): |
| 186 | throw Exception("Cannot dup2 stdout of child process" , ErrorCodes::CANNOT_CREATE_CHILD_PROCESS); |
| 187 | case int(ReturnCodes::CANNOT_DUP_STDERR): |
| 188 | throw Exception("Cannot dup2 stderr of child process" , ErrorCodes::CANNOT_CREATE_CHILD_PROCESS); |
| 189 | case int(ReturnCodes::CANNOT_EXEC): |
| 190 | throw Exception("Cannot execv in child process" , ErrorCodes::CANNOT_CREATE_CHILD_PROCESS); |
| 191 | default: |
| 192 | throw Exception("Child process was exited with return code " + toString(retcode), ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY); |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | |
| 198 | } |
| 199 | |