Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 1 | # osmo_gsm_tester: process management |
| 2 | # |
| 3 | # Copyright (C) 2016-2017 by sysmocom - s.f.m.c. GmbH |
| 4 | # |
| 5 | # Author: Neels Hofmeyr <neels@hofmeyr.de> |
| 6 | # |
| 7 | # This program is free software: you can redistribute it and/or modify |
Harald Welte | 2720534 | 2017-06-03 09:51:45 +0200 | [diff] [blame] | 8 | # it under the terms of the GNU General Public License as |
Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 9 | # published by the Free Software Foundation, either version 3 of the |
| 10 | # License, or (at your option) any later version. |
| 11 | # |
| 12 | # This program is distributed in the hope that it will be useful, |
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
Harald Welte | 2720534 | 2017-06-03 09:51:45 +0200 | [diff] [blame] | 15 | # GNU General Public License for more details. |
Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 16 | # |
Harald Welte | 2720534 | 2017-06-03 09:51:45 +0200 | [diff] [blame] | 17 | # You should have received a copy of the GNU General Public License |
Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 20 | import os |
| 21 | import time |
| 22 | import subprocess |
| 23 | import signal |
Pau Espin Pedrol | 0d8deec | 2017-06-23 11:43:38 +0200 | [diff] [blame] | 24 | from datetime import datetime |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 25 | |
Pau Espin Pedrol | 9a4631c | 2018-03-28 19:17:34 +0200 | [diff] [blame] | 26 | from . import log |
| 27 | from .event_loop import MainLoop |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 28 | from .util import Dir |
| 29 | |
| 30 | class Process(log.Origin): |
| 31 | |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 32 | def __init__(self, name, run_dir, popen_args, **popen_kwargs): |
Neels Hofmeyr | 1a7a3f0 | 2017-06-10 01:18:27 +0200 | [diff] [blame] | 33 | super().__init__(log.C_RUN, name) |
Pau Espin Pedrol | 5860367 | 2018-08-09 13:45:55 +0200 | [diff] [blame] | 34 | self.process_obj = None |
| 35 | self.result = None |
| 36 | self.killed = None |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 37 | self.name_str = name |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 38 | self.run_dir = run_dir |
| 39 | self.popen_args = popen_args |
| 40 | self.popen_kwargs = popen_kwargs |
| 41 | self.outputs = {} |
| 42 | if not isinstance(self.run_dir, Dir): |
| 43 | self.run_dir = Dir(os.path.abspath(str(self.run_dir))) |
| 44 | |
| 45 | def set_env(self, key, value): |
| 46 | env = self.popen_kwargs.get('env') or {} |
| 47 | env[key] = value |
| 48 | self.popen_kwargs['env'] = env |
| 49 | |
| 50 | def make_output_log(self, name): |
| 51 | ''' |
| 52 | create a non-existing log output file in run_dir to pipe stdout and |
| 53 | stderr from this process to. |
| 54 | ''' |
| 55 | path = self.run_dir.new_child(name) |
| 56 | f = open(path, 'w') |
| 57 | self.dbg(path) |
Pau Espin Pedrol | 0d8deec | 2017-06-23 11:43:38 +0200 | [diff] [blame] | 58 | f.write('(launched: %s)\n' % datetime.now().strftime(log.LONG_DATEFMT)) |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 59 | f.flush() |
| 60 | self.outputs[name] = (path, f) |
| 61 | return f |
| 62 | |
| 63 | def launch(self): |
Neels Hofmeyr | 1a7a3f0 | 2017-06-10 01:18:27 +0200 | [diff] [blame] | 64 | log.dbg('cd %r; %s %s' % ( |
| 65 | os.path.abspath(str(self.run_dir)), |
| 66 | ' '.join(['%s=%r'%(k,v) for k,v in self.popen_kwargs.get('env', {}).items()]), |
| 67 | ' '.join(self.popen_args))) |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 68 | |
Neels Hofmeyr | 1a7a3f0 | 2017-06-10 01:18:27 +0200 | [diff] [blame] | 69 | self.process_obj = subprocess.Popen( |
| 70 | self.popen_args, |
| 71 | stdout=self.make_output_log('stdout'), |
| 72 | stderr=self.make_output_log('stderr'), |
| 73 | stdin=subprocess.PIPE, |
| 74 | shell=False, |
| 75 | cwd=self.run_dir.path, |
| 76 | **self.popen_kwargs) |
| 77 | self.set_name(self.name_str, pid=self.process_obj.pid) |
| 78 | self.log('Launched') |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 79 | |
Pau Espin Pedrol | 78087be | 2018-11-12 18:20:52 +0100 | [diff] [blame] | 80 | def launch_sync(self, raise_nonsuccess=True): |
Pau Espin Pedrol | 79df739 | 2018-11-12 18:15:30 +0100 | [diff] [blame] | 81 | ''' |
| 82 | calls launch() method and block waiting for it to finish, serving the |
| 83 | mainloop meanwhile. |
| 84 | ''' |
| 85 | try: |
| 86 | self.launch() |
| 87 | self.wait() |
| 88 | except Exception as e: |
| 89 | self.terminate() |
| 90 | raise e |
Pau Espin Pedrol | 78087be | 2018-11-12 18:20:52 +0100 | [diff] [blame] | 91 | if raise_nonsuccess and self.result != 0: |
Pau Espin Pedrol | 79df739 | 2018-11-12 18:15:30 +0100 | [diff] [blame] | 92 | log.ctx(self) |
Pau Espin Pedrol | 78087be | 2018-11-12 18:20:52 +0100 | [diff] [blame] | 93 | raise log.Error('Exited in error %d' % self.result) |
| 94 | return self.result |
Pau Espin Pedrol | 79df739 | 2018-11-12 18:15:30 +0100 | [diff] [blame] | 95 | |
Pau Espin Pedrol | b1526b9 | 2018-05-22 20:32:30 +0200 | [diff] [blame] | 96 | def respawn(self): |
| 97 | self.dbg('respawn') |
| 98 | assert not self.is_running() |
| 99 | self.result = None |
| 100 | self.killed = None |
| 101 | self.launch() |
| 102 | |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 103 | def _poll_termination(self, time_to_wait_for_term=5): |
| 104 | wait_step = 0.001 |
| 105 | waited_time = 0 |
| 106 | while True: |
| 107 | # poll returns None if proc is still running |
| 108 | self.result = self.process_obj.poll() |
| 109 | if self.result is not None: |
| 110 | return True |
| 111 | waited_time += wait_step |
| 112 | # make wait_step approach 1.0 |
| 113 | wait_step = (1. + 5. * wait_step) / 6. |
| 114 | if waited_time >= time_to_wait_for_term: |
| 115 | break |
| 116 | time.sleep(wait_step) |
| 117 | return False |
| 118 | |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 119 | def send_signal(self, sig): |
| 120 | os.kill(self.process_obj.pid, sig) |
| 121 | |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 122 | def terminate(self): |
| 123 | if self.process_obj is None: |
| 124 | return |
| 125 | if self.result is not None: |
| 126 | return |
| 127 | |
| 128 | while True: |
| 129 | # first try SIGINT to allow stdout+stderr flushing |
| 130 | self.log('Terminating (SIGINT)') |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 131 | self.send_signal(signal.SIGINT) |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 132 | self.killed = signal.SIGINT |
| 133 | if self._poll_termination(): |
| 134 | break |
| 135 | |
| 136 | # SIGTERM maybe? |
| 137 | self.log('Terminating (SIGTERM)') |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 138 | self.send_signal(signal.SIGTERM) |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 139 | self.killed = signal.SIGTERM |
| 140 | if self._poll_termination(): |
| 141 | break |
| 142 | |
| 143 | # out of patience |
| 144 | self.log('Terminating (SIGKILL)') |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 145 | self.send_signal(signal.SIGKILL) |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 146 | self.killed = signal.SIGKILL |
| 147 | break; |
| 148 | |
| 149 | self.process_obj.wait() |
| 150 | self.cleanup() |
| 151 | |
| 152 | def cleanup(self): |
Pau Espin Pedrol | 06ada45 | 2018-05-22 19:20:41 +0200 | [diff] [blame] | 153 | self.dbg('Cleanup') |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 154 | self.close_output_logs() |
| 155 | if self.result == 0: |
| 156 | self.log('Terminated: ok', rc=self.result) |
| 157 | elif self.killed: |
| 158 | self.log('Terminated', rc=self.result) |
| 159 | else: |
| 160 | self.err('Terminated: ERROR', rc=self.result) |
Neels Hofmeyr | 85eb324 | 2017-04-09 22:01:16 +0200 | [diff] [blame] | 161 | #self.log_stdout_tail() |
| 162 | self.log_stderr_tail() |
| 163 | |
| 164 | def log_stdout_tail(self): |
| 165 | m = self.get_stdout_tail(prefix='| ') |
| 166 | if not m: |
| 167 | return |
| 168 | self.log('stdout:\n', m, '\n') |
| 169 | |
| 170 | def log_stderr_tail(self): |
| 171 | m = self.get_stderr_tail(prefix='| ') |
| 172 | if not m: |
| 173 | return |
| 174 | self.log('stderr:\n', m, '\n') |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 175 | |
| 176 | def close_output_logs(self): |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 177 | for k, v in self.outputs.items(): |
| 178 | path, f = v |
| 179 | if f: |
| 180 | f.flush() |
| 181 | f.close() |
| 182 | self.outputs[k] = (path, None) |
| 183 | |
| 184 | def poll(self): |
| 185 | if self.process_obj is None: |
| 186 | return |
| 187 | if self.result is not None: |
| 188 | return |
| 189 | self.result = self.process_obj.poll() |
| 190 | if self.result is not None: |
| 191 | self.cleanup() |
| 192 | |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 193 | def is_running(self, poll_first=True): |
| 194 | if poll_first: |
| 195 | self.poll() |
Neels Hofmeyr | 85eb324 | 2017-04-09 22:01:16 +0200 | [diff] [blame] | 196 | return self.process_obj is not None and self.result is None |
| 197 | |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 198 | def get_output(self, which): |
| 199 | v = self.outputs.get(which) |
| 200 | if not v: |
| 201 | return None |
| 202 | path, f = v |
| 203 | with open(path, 'r') as f2: |
| 204 | return f2.read() |
| 205 | |
| 206 | def get_output_tail(self, which, tail=10, prefix=''): |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 207 | out = self.get_output(which) |
| 208 | if not out: |
| 209 | return None |
| 210 | out = out.splitlines() |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 211 | tail = min(len(out), tail) |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 212 | return prefix + ('\n' + prefix).join(out[-tail:]) |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 213 | |
| 214 | def get_stdout(self): |
| 215 | return self.get_output('stdout') |
| 216 | |
| 217 | def get_stderr(self): |
| 218 | return self.get_output('stderr') |
| 219 | |
| 220 | def get_stdout_tail(self, tail=10, prefix=''): |
| 221 | return self.get_output_tail('stdout', tail, prefix) |
| 222 | |
| 223 | def get_stderr_tail(self, tail=10, prefix=''): |
| 224 | return self.get_output_tail('stderr', tail, prefix) |
| 225 | |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 226 | def terminated(self, poll_first=True): |
| 227 | if poll_first: |
| 228 | self.poll() |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 229 | return self.result is not None |
| 230 | |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 231 | def wait(self, timeout=300): |
Pau Espin Pedrol | 9a4631c | 2018-03-28 19:17:34 +0200 | [diff] [blame] | 232 | MainLoop.wait(self, self.terminated, timeout=timeout) |
Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 233 | |
| 234 | |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 235 | class RemoteProcess(Process): |
| 236 | |
Pau Espin Pedrol | 3895fec | 2017-04-28 16:13:03 +0200 | [diff] [blame] | 237 | def __init__(self, name, run_dir, remote_user, remote_host, remote_cwd, popen_args, **popen_kwargs): |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 238 | super().__init__(name, run_dir, popen_args, **popen_kwargs) |
Pau Espin Pedrol | 3895fec | 2017-04-28 16:13:03 +0200 | [diff] [blame] | 239 | self.remote_user = remote_user |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 240 | self.remote_host = remote_host |
| 241 | self.remote_cwd = remote_cwd |
| 242 | |
| 243 | # hacky: instead of just prepending ssh, i.e. piping stdout and stderr |
| 244 | # over the ssh link, we should probably run on the remote side, |
| 245 | # monitoring the process remotely. |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 246 | if self.remote_cwd: |
| 247 | cd = 'cd "%s"; ' % self.remote_cwd |
| 248 | else: |
| 249 | cd = '' |
Pau Espin Pedrol | 302c756 | 2018-10-02 13:08:02 +0200 | [diff] [blame] | 250 | # We need double -t to force tty and be able to forward signals to |
| 251 | # processes (SIGHUP) when we close ssh on the local side. As a result, |
| 252 | # stderr seems to be merged into stdout in ssh client. |
| 253 | self.popen_args = ['ssh', '-t', '-t', self.remote_user+'@'+self.remote_host, |
Neels Hofmeyr | 5356d0a | 2017-04-10 03:45:30 +0200 | [diff] [blame] | 254 | '%s%s' % (cd, |
| 255 | ' '.join(self.popen_args))] |
Neels Hofmeyr | 3531a19 | 2017-03-28 14:30:28 +0200 | [diff] [blame] | 256 | self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs) |
Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 257 | |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 258 | class NetNSProcess(Process): |
| 259 | NETNS_EXEC_BIN = 'osmo-gsm-tester_netns_exec.sh' |
| 260 | def __init__(self, name, run_dir, netns, popen_args, **popen_kwargs): |
| 261 | super().__init__(name, run_dir, popen_args, **popen_kwargs) |
| 262 | self.netns = netns |
| 263 | |
| 264 | self.popen_args = ['sudo', self.NETNS_EXEC_BIN, self.netns] + list(popen_args) |
| 265 | self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs) |
| 266 | |
| 267 | # HACK: Since we run under sudo, only way to kill root-owned process is to kill as root... |
| 268 | # This function is overwritten from Process. |
| 269 | def send_signal(self, sig): |
| 270 | kill_cmd = ('kill', '-%d' % int(sig), str(self.process_obj.pid)) |
| 271 | run_local_netns_sync(self.run_dir, self.name()+"-kill", self.netns, kill_cmd) |
| 272 | |
| 273 | |
Pau Espin Pedrol | e4358a9 | 2018-10-01 11:27:55 +0200 | [diff] [blame] | 274 | def run_local_sync(run_dir, name, popen_args): |
| 275 | run_dir =run_dir.new_dir(name) |
| 276 | proc = Process(name, run_dir, popen_args) |
Pau Espin Pedrol | 79df739 | 2018-11-12 18:15:30 +0100 | [diff] [blame] | 277 | proc.launch_sync() |
Pau Espin Pedrol | e4358a9 | 2018-10-01 11:27:55 +0200 | [diff] [blame] | 278 | |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 279 | def run_local_netns_sync(run_dir, name, netns, popen_args): |
| 280 | run_dir =run_dir.new_dir(name) |
| 281 | proc = NetNSProcess(name, run_dir, netns, popen_args) |
Pau Espin Pedrol | 79df739 | 2018-11-12 18:15:30 +0100 | [diff] [blame] | 282 | proc.launch_sync() |
Pau Espin Pedrol | fd4c144 | 2018-10-25 17:37:23 +0200 | [diff] [blame] | 283 | |
Pau Espin Pedrol | e4358a9 | 2018-10-01 11:27:55 +0200 | [diff] [blame] | 284 | def run_remote_sync(run_dir, remote_user, remote_addr, name, popen_args, remote_cwd=None): |
| 285 | run_dir = run_dir.new_dir(name) |
Pau Espin Pedrol | 8aca1f3 | 2018-10-25 18:31:50 +0200 | [diff] [blame] | 286 | proc = RemoteProcess(name, run_dir, remote_user, remote_addr, remote_cwd, popen_args) |
Pau Espin Pedrol | 79df739 | 2018-11-12 18:15:30 +0100 | [diff] [blame] | 287 | proc.launch_sync() |
Pau Espin Pedrol | e4358a9 | 2018-10-01 11:27:55 +0200 | [diff] [blame] | 288 | |
| 289 | def scp(run_dir, remote_user, remote_addr, name, local_path, remote_path): |
| 290 | run_local_sync(run_dir, name, ('scp', '-r', local_path, '%s@%s:%s' % (remote_user, remote_addr, remote_path))) |
| 291 | |
| 292 | def copy_inst_ssh(run_dir, inst, remote_dir, remote_user, remote_addr, remote_rundir_append, cfg_file_name): |
| 293 | remote_inst = Dir(remote_dir.child(os.path.basename(str(inst)))) |
| 294 | remote_dir_str = str(remote_dir) |
| 295 | run_remote_sync(run_dir, remote_user, remote_addr, 'rm-remote-dir', ('test', '!', '-d', remote_dir_str, '||', 'rm', '-rf', remote_dir_str)) |
| 296 | run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-dir', ('mkdir', '-p', remote_dir_str)) |
| 297 | scp(run_dir, remote_user, remote_addr, 'scp-inst-to-remote', str(inst), remote_dir_str) |
| 298 | |
| 299 | remote_run_dir = remote_dir.child(remote_rundir_append) |
| 300 | run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-run-dir', ('mkdir', '-p', remote_run_dir)) |
| 301 | |
| 302 | remote_config_file = remote_dir.child(os.path.basename(cfg_file_name)) |
| 303 | scp(run_dir, remote_user, remote_addr, 'scp-cfg-to-remote', cfg_file_name, remote_config_file) |
| 304 | return remote_inst |
| 305 | |
Neels Hofmeyr | dae3d3c | 2017-03-28 12:16:58 +0200 | [diff] [blame] | 306 | # vim: expandtab tabstop=4 shiftwidth=4 |