blob: a104e10ea574cb56740d150576590ebfe411f653 [file] [log] [blame]
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +02001# osmo_gsm_tester: process management
2#
3# Copyright (C) 2016-2017 by sysmocom - s.f.m.c. GmbH
4#
5# Author: Neels Hofmeyr <neels@hofmeyr.de>
6#
7# This program is free software: you can redistribute it and/or modify
Harald Welte27205342017-06-03 09:51:45 +02008# it under the terms of the GNU General Public License as
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +02009# published by the Free Software Foundation, either version 3 of the
10# License, or (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Harald Welte27205342017-06-03 09:51:45 +020015# GNU General Public License for more details.
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +020016#
Harald Welte27205342017-06-03 09:51:45 +020017# You should have received a copy of the GNU General Public License
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +020018# along with this program. If not, see <http://www.gnu.org/licenses/>.
19
Neels Hofmeyr3531a192017-03-28 14:30:28 +020020import os
21import time
22import subprocess
23import signal
Holger Hans Peter Freyther20b52c12019-02-27 02:31:50 +000024from abc import ABCMeta, abstractmethod
Pau Espin Pedrol0d8deec2017-06-23 11:43:38 +020025from datetime import datetime
Neels Hofmeyr3531a192017-03-28 14:30:28 +020026
Pau Espin Pedrol9a4631c2018-03-28 19:17:34 +020027from . import log
28from .event_loop import MainLoop
Neels Hofmeyr3531a192017-03-28 14:30:28 +020029from .util import Dir
30
Holger Hans Peter Freyther20b52c12019-02-27 02:31:50 +000031class TerminationStrategy(log.Origin, metaclass=ABCMeta):
32 """A baseclass for terminating a collection of processes."""
33
34 def __init__(self):
35 self._processes = []
36
37 def add_process(self, process):
38 """Remembers a process that needs to be terminated."""
39 self._processes.append(process)
40
41 @abstractmethod
42 def terminate_all(self):
43 "Terminates all scheduled processes and waits for the termination."""
44 pass
45
46
47class ParallelTerminationStrategy(TerminationStrategy):
48 """Processes will be terminated in parallel."""
49
Holger Hans Peter Freyther54b4fa92019-02-27 13:00:33 +000050 def _prune_dead_processes(self, poll_first):
51 """Removes all dead processes from the list."""
52 # Remove all processes that terminated!
53 self._processes = list(filter(lambda proc: proc.is_running(poll_first), self._processes))
54
55 def _build_process_map(self):
56 """Builds a mapping from pid to process."""
57 self._process_map = {}
Holger Hans Peter Freyther20b52c12019-02-27 02:31:50 +000058 for process in self._processes:
Holger Hans Peter Freyther54b4fa92019-02-27 13:00:33 +000059 pid = process.pid()
60 if pid is None:
61 continue
62 self._process_map[pid] = process
63
64 def _poll_once(self):
65 """Polls for to be collected children once."""
66 pid, result = os.waitpid(0, os.WNOHANG)
67 # Did some other process die?
68 if pid == 0:
69 return False
70 proc = self._process_map.get(pid)
71 if proc is None:
72 self.dbg("Unknown process with pid(%d) died." % pid)
73 return False
74 # Update the process state and forget about it
75 self.log("PID %d died..." % pid)
76 proc.result = result
77 proc.cleanup()
78 self._processes.remove(proc)
79 del self._process_map[pid]
80 return True
81
82 def _poll_for_termination(self, time_to_wait_for_term=5):
83 """Waits for the termination of processes until timeout|all ended."""
84
85 wait_step = 0.001
86 waited_time = 0
87 while len(self._processes) > 0:
88 # Collect processes until there are none to be collected.
89 while True:
90 try:
91 if not self._poll_once():
92 break
93 except ChildProcessError:
94 break
95
96 # All processes died and we can return before sleeping
97 if len(self._processes) == 0:
98 break
99 waited_time += wait_step
100 # make wait_step approach 1.0
101 wait_step = (1. + 5. * wait_step) / 6.
102 if waited_time >= time_to_wait_for_term:
103 break
104 time.sleep(wait_step)
105
106 def terminate_all(self):
107 self.dbg("Scheduled to terminate %d processes." % len(self._processes))
108 self._prune_dead_processes(True)
109 self._build_process_map()
110
111 # Iterate through all signals.
112 for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGKILL]:
113 self.dbg("Starting to kill with %s" % sig.name)
114 for process in self._processes:
115 process.kill(sig)
116 if sig == signal.SIGKILL:
117 continue
118 self._poll_for_termination()
Holger Hans Peter Freyther20b52c12019-02-27 02:31:50 +0000119
120
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200121class Process(log.Origin):
122
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200123 def __init__(self, name, run_dir, popen_args, **popen_kwargs):
Neels Hofmeyr1a7a3f02017-06-10 01:18:27 +0200124 super().__init__(log.C_RUN, name)
Pau Espin Pedrol58603672018-08-09 13:45:55 +0200125 self.process_obj = None
126 self.result = None
127 self.killed = None
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200128 self.name_str = name
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200129 self.run_dir = run_dir
130 self.popen_args = popen_args
131 self.popen_kwargs = popen_kwargs
132 self.outputs = {}
133 if not isinstance(self.run_dir, Dir):
134 self.run_dir = Dir(os.path.abspath(str(self.run_dir)))
135
136 def set_env(self, key, value):
137 env = self.popen_kwargs.get('env') or {}
138 env[key] = value
139 self.popen_kwargs['env'] = env
140
141 def make_output_log(self, name):
142 '''
143 create a non-existing log output file in run_dir to pipe stdout and
144 stderr from this process to.
145 '''
146 path = self.run_dir.new_child(name)
147 f = open(path, 'w')
148 self.dbg(path)
Pau Espin Pedrol0d8deec2017-06-23 11:43:38 +0200149 f.write('(launched: %s)\n' % datetime.now().strftime(log.LONG_DATEFMT))
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200150 f.flush()
151 self.outputs[name] = (path, f)
152 return f
153
154 def launch(self):
Neels Hofmeyr1a7a3f02017-06-10 01:18:27 +0200155 log.dbg('cd %r; %s %s' % (
156 os.path.abspath(str(self.run_dir)),
157 ' '.join(['%s=%r'%(k,v) for k,v in self.popen_kwargs.get('env', {}).items()]),
158 ' '.join(self.popen_args)))
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200159
Neels Hofmeyr1a7a3f02017-06-10 01:18:27 +0200160 self.process_obj = subprocess.Popen(
161 self.popen_args,
162 stdout=self.make_output_log('stdout'),
163 stderr=self.make_output_log('stderr'),
164 stdin=subprocess.PIPE,
165 shell=False,
166 cwd=self.run_dir.path,
167 **self.popen_kwargs)
168 self.set_name(self.name_str, pid=self.process_obj.pid)
169 self.log('Launched')
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200170
Pau Espin Pedrol78087be2018-11-12 18:20:52 +0100171 def launch_sync(self, raise_nonsuccess=True):
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100172 '''
173 calls launch() method and block waiting for it to finish, serving the
174 mainloop meanwhile.
175 '''
176 try:
177 self.launch()
178 self.wait()
179 except Exception as e:
180 self.terminate()
181 raise e
Pau Espin Pedrol78087be2018-11-12 18:20:52 +0100182 if raise_nonsuccess and self.result != 0:
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100183 log.ctx(self)
Pau Espin Pedrol78087be2018-11-12 18:20:52 +0100184 raise log.Error('Exited in error %d' % self.result)
185 return self.result
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100186
Pau Espin Pedrolb1526b92018-05-22 20:32:30 +0200187 def respawn(self):
188 self.dbg('respawn')
189 assert not self.is_running()
190 self.result = None
191 self.killed = None
192 self.launch()
193
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200194 def _poll_termination(self, time_to_wait_for_term=5):
195 wait_step = 0.001
196 waited_time = 0
197 while True:
198 # poll returns None if proc is still running
199 self.result = self.process_obj.poll()
200 if self.result is not None:
201 return True
202 waited_time += wait_step
203 # make wait_step approach 1.0
204 wait_step = (1. + 5. * wait_step) / 6.
205 if waited_time >= time_to_wait_for_term:
206 break
207 time.sleep(wait_step)
208 return False
209
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200210 def send_signal(self, sig):
211 os.kill(self.process_obj.pid, sig)
212
Holger Hans Peter Freyther54b4fa92019-02-27 13:00:33 +0000213 def pid(self):
214 if self.process_obj is None:
215 return None
216 return self.process_obj.pid
217
Holger Hans Peter Freyther0d714c92019-02-27 09:50:52 +0000218 def kill(self, sig):
219 """Kills the process with the given signal and remembers it."""
220 self.log('Terminating (%s)' % sig.name)
221 self.send_signal(sig)
222 self.killed = sig
223
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200224 def terminate(self):
225 if self.process_obj is None:
226 return
227 if self.result is not None:
228 return
229
230 while True:
231 # first try SIGINT to allow stdout+stderr flushing
Holger Hans Peter Freyther0d714c92019-02-27 09:50:52 +0000232 self.kill(signal.SIGINT)
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200233 if self._poll_termination():
234 break
235
236 # SIGTERM maybe?
Holger Hans Peter Freyther0d714c92019-02-27 09:50:52 +0000237 self.kill(signal.SIGTERM)
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200238 if self._poll_termination():
239 break
240
241 # out of patience
Holger Hans Peter Freyther0d714c92019-02-27 09:50:52 +0000242 self.kill(signal.SIGKILL)
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200243 break;
244
245 self.process_obj.wait()
246 self.cleanup()
247
248 def cleanup(self):
Pau Espin Pedrol06ada452018-05-22 19:20:41 +0200249 self.dbg('Cleanup')
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200250 self.close_output_logs()
251 if self.result == 0:
252 self.log('Terminated: ok', rc=self.result)
253 elif self.killed:
254 self.log('Terminated', rc=self.result)
255 else:
256 self.err('Terminated: ERROR', rc=self.result)
Neels Hofmeyr85eb3242017-04-09 22:01:16 +0200257 #self.log_stdout_tail()
258 self.log_stderr_tail()
259
260 def log_stdout_tail(self):
261 m = self.get_stdout_tail(prefix='| ')
262 if not m:
263 return
264 self.log('stdout:\n', m, '\n')
265
266 def log_stderr_tail(self):
267 m = self.get_stderr_tail(prefix='| ')
268 if not m:
269 return
270 self.log('stderr:\n', m, '\n')
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200271
272 def close_output_logs(self):
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200273 for k, v in self.outputs.items():
274 path, f = v
275 if f:
276 f.flush()
277 f.close()
278 self.outputs[k] = (path, None)
279
280 def poll(self):
281 if self.process_obj is None:
282 return
283 if self.result is not None:
284 return
285 self.result = self.process_obj.poll()
286 if self.result is not None:
287 self.cleanup()
288
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200289 def is_running(self, poll_first=True):
290 if poll_first:
291 self.poll()
Neels Hofmeyr85eb3242017-04-09 22:01:16 +0200292 return self.process_obj is not None and self.result is None
293
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200294 def get_output(self, which):
295 v = self.outputs.get(which)
296 if not v:
297 return None
298 path, f = v
299 with open(path, 'r') as f2:
300 return f2.read()
301
302 def get_output_tail(self, which, tail=10, prefix=''):
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200303 out = self.get_output(which)
304 if not out:
305 return None
306 out = out.splitlines()
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200307 tail = min(len(out), tail)
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200308 return prefix + ('\n' + prefix).join(out[-tail:])
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200309
310 def get_stdout(self):
311 return self.get_output('stdout')
312
313 def get_stderr(self):
314 return self.get_output('stderr')
315
316 def get_stdout_tail(self, tail=10, prefix=''):
317 return self.get_output_tail('stdout', tail, prefix)
318
319 def get_stderr_tail(self, tail=10, prefix=''):
320 return self.get_output_tail('stderr', tail, prefix)
321
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200322 def terminated(self, poll_first=True):
323 if poll_first:
324 self.poll()
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200325 return self.result is not None
326
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200327 def wait(self, timeout=300):
Pau Espin Pedrol9a4631c2018-03-28 19:17:34 +0200328 MainLoop.wait(self, self.terminated, timeout=timeout)
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +0200329
330
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200331class RemoteProcess(Process):
332
Pau Espin Pedrol3895fec2017-04-28 16:13:03 +0200333 def __init__(self, name, run_dir, remote_user, remote_host, remote_cwd, popen_args, **popen_kwargs):
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200334 super().__init__(name, run_dir, popen_args, **popen_kwargs)
Pau Espin Pedrol3895fec2017-04-28 16:13:03 +0200335 self.remote_user = remote_user
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200336 self.remote_host = remote_host
337 self.remote_cwd = remote_cwd
338
339 # hacky: instead of just prepending ssh, i.e. piping stdout and stderr
340 # over the ssh link, we should probably run on the remote side,
341 # monitoring the process remotely.
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200342 if self.remote_cwd:
343 cd = 'cd "%s"; ' % self.remote_cwd
344 else:
345 cd = ''
Pau Espin Pedrol302c7562018-10-02 13:08:02 +0200346 # We need double -t to force tty and be able to forward signals to
347 # processes (SIGHUP) when we close ssh on the local side. As a result,
348 # stderr seems to be merged into stdout in ssh client.
349 self.popen_args = ['ssh', '-t', '-t', self.remote_user+'@'+self.remote_host,
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200350 '%s%s' % (cd,
351 ' '.join(self.popen_args))]
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200352 self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs)
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +0200353
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200354class NetNSProcess(Process):
355 NETNS_EXEC_BIN = 'osmo-gsm-tester_netns_exec.sh'
356 def __init__(self, name, run_dir, netns, popen_args, **popen_kwargs):
357 super().__init__(name, run_dir, popen_args, **popen_kwargs)
358 self.netns = netns
359
360 self.popen_args = ['sudo', self.NETNS_EXEC_BIN, self.netns] + list(popen_args)
361 self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs)
362
363 # HACK: Since we run under sudo, only way to kill root-owned process is to kill as root...
364 # This function is overwritten from Process.
365 def send_signal(self, sig):
366 kill_cmd = ('kill', '-%d' % int(sig), str(self.process_obj.pid))
367 run_local_netns_sync(self.run_dir, self.name()+"-kill", self.netns, kill_cmd)
368
369
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200370def run_local_sync(run_dir, name, popen_args):
371 run_dir =run_dir.new_dir(name)
372 proc = Process(name, run_dir, popen_args)
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100373 proc.launch_sync()
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200374
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200375def run_local_netns_sync(run_dir, name, netns, popen_args):
376 run_dir =run_dir.new_dir(name)
377 proc = NetNSProcess(name, run_dir, netns, popen_args)
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100378 proc.launch_sync()
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200379
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200380def run_remote_sync(run_dir, remote_user, remote_addr, name, popen_args, remote_cwd=None):
381 run_dir = run_dir.new_dir(name)
Pau Espin Pedrol8aca1f32018-10-25 18:31:50 +0200382 proc = RemoteProcess(name, run_dir, remote_user, remote_addr, remote_cwd, popen_args)
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100383 proc.launch_sync()
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200384
385def scp(run_dir, remote_user, remote_addr, name, local_path, remote_path):
386 run_local_sync(run_dir, name, ('scp', '-r', local_path, '%s@%s:%s' % (remote_user, remote_addr, remote_path)))
387
388def copy_inst_ssh(run_dir, inst, remote_dir, remote_user, remote_addr, remote_rundir_append, cfg_file_name):
389 remote_inst = Dir(remote_dir.child(os.path.basename(str(inst))))
390 remote_dir_str = str(remote_dir)
391 run_remote_sync(run_dir, remote_user, remote_addr, 'rm-remote-dir', ('test', '!', '-d', remote_dir_str, '||', 'rm', '-rf', remote_dir_str))
392 run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-dir', ('mkdir', '-p', remote_dir_str))
393 scp(run_dir, remote_user, remote_addr, 'scp-inst-to-remote', str(inst), remote_dir_str)
394
395 remote_run_dir = remote_dir.child(remote_rundir_append)
396 run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-run-dir', ('mkdir', '-p', remote_run_dir))
397
398 remote_config_file = remote_dir.child(os.path.basename(cfg_file_name))
399 scp(run_dir, remote_user, remote_addr, 'scp-cfg-to-remote', cfg_file_name, remote_config_file)
400 return remote_inst
401
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +0200402# vim: expandtab tabstop=4 shiftwidth=4