blob: 1e53aba0ba82fc91124557df82d495e27568e847 [file] [log] [blame]
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +02001# osmo_gsm_tester: process management
2#
3# Copyright (C) 2016-2017 by sysmocom - s.f.m.c. GmbH
4#
5# Author: Neels Hofmeyr <neels@hofmeyr.de>
6#
7# This program is free software: you can redistribute it and/or modify
Harald Welte27205342017-06-03 09:51:45 +02008# it under the terms of the GNU General Public License as
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +02009# published by the Free Software Foundation, either version 3 of the
10# License, or (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Harald Welte27205342017-06-03 09:51:45 +020015# GNU General Public License for more details.
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +020016#
Harald Welte27205342017-06-03 09:51:45 +020017# You should have received a copy of the GNU General Public License
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +020018# along with this program. If not, see <http://www.gnu.org/licenses/>.
19
Neels Hofmeyr3531a192017-03-28 14:30:28 +020020import os
21import time
22import subprocess
23import signal
Holger Hans Peter Freyther20b52c12019-02-27 02:31:50 +000024from abc import ABCMeta, abstractmethod
Pau Espin Pedrol0d8deec2017-06-23 11:43:38 +020025from datetime import datetime
Neels Hofmeyr3531a192017-03-28 14:30:28 +020026
Pau Espin Pedrol9a4631c2018-03-28 19:17:34 +020027from . import log
28from .event_loop import MainLoop
Neels Hofmeyr3531a192017-03-28 14:30:28 +020029from .util import Dir
30
Holger Hans Peter Freyther20b52c12019-02-27 02:31:50 +000031class TerminationStrategy(log.Origin, metaclass=ABCMeta):
32 """A baseclass for terminating a collection of processes."""
33
34 def __init__(self):
35 self._processes = []
36
37 def add_process(self, process):
38 """Remembers a process that needs to be terminated."""
39 self._processes.append(process)
40
41 @abstractmethod
42 def terminate_all(self):
43 "Terminates all scheduled processes and waits for the termination."""
44 pass
45
46
47class ParallelTerminationStrategy(TerminationStrategy):
48 """Processes will be terminated in parallel."""
49
50 def terminate_all(self):
51 # TODO(zecke): Actually make this non-sequential.
52 for process in self._processes:
53 process.terminate()
54
55
Neels Hofmeyr3531a192017-03-28 14:30:28 +020056class Process(log.Origin):
57
Neels Hofmeyr3531a192017-03-28 14:30:28 +020058 def __init__(self, name, run_dir, popen_args, **popen_kwargs):
Neels Hofmeyr1a7a3f02017-06-10 01:18:27 +020059 super().__init__(log.C_RUN, name)
Pau Espin Pedrol58603672018-08-09 13:45:55 +020060 self.process_obj = None
61 self.result = None
62 self.killed = None
Neels Hofmeyr3531a192017-03-28 14:30:28 +020063 self.name_str = name
Neels Hofmeyr3531a192017-03-28 14:30:28 +020064 self.run_dir = run_dir
65 self.popen_args = popen_args
66 self.popen_kwargs = popen_kwargs
67 self.outputs = {}
68 if not isinstance(self.run_dir, Dir):
69 self.run_dir = Dir(os.path.abspath(str(self.run_dir)))
70
71 def set_env(self, key, value):
72 env = self.popen_kwargs.get('env') or {}
73 env[key] = value
74 self.popen_kwargs['env'] = env
75
76 def make_output_log(self, name):
77 '''
78 create a non-existing log output file in run_dir to pipe stdout and
79 stderr from this process to.
80 '''
81 path = self.run_dir.new_child(name)
82 f = open(path, 'w')
83 self.dbg(path)
Pau Espin Pedrol0d8deec2017-06-23 11:43:38 +020084 f.write('(launched: %s)\n' % datetime.now().strftime(log.LONG_DATEFMT))
Neels Hofmeyr3531a192017-03-28 14:30:28 +020085 f.flush()
86 self.outputs[name] = (path, f)
87 return f
88
89 def launch(self):
Neels Hofmeyr1a7a3f02017-06-10 01:18:27 +020090 log.dbg('cd %r; %s %s' % (
91 os.path.abspath(str(self.run_dir)),
92 ' '.join(['%s=%r'%(k,v) for k,v in self.popen_kwargs.get('env', {}).items()]),
93 ' '.join(self.popen_args)))
Neels Hofmeyr3531a192017-03-28 14:30:28 +020094
Neels Hofmeyr1a7a3f02017-06-10 01:18:27 +020095 self.process_obj = subprocess.Popen(
96 self.popen_args,
97 stdout=self.make_output_log('stdout'),
98 stderr=self.make_output_log('stderr'),
99 stdin=subprocess.PIPE,
100 shell=False,
101 cwd=self.run_dir.path,
102 **self.popen_kwargs)
103 self.set_name(self.name_str, pid=self.process_obj.pid)
104 self.log('Launched')
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200105
Pau Espin Pedrol78087be2018-11-12 18:20:52 +0100106 def launch_sync(self, raise_nonsuccess=True):
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100107 '''
108 calls launch() method and block waiting for it to finish, serving the
109 mainloop meanwhile.
110 '''
111 try:
112 self.launch()
113 self.wait()
114 except Exception as e:
115 self.terminate()
116 raise e
Pau Espin Pedrol78087be2018-11-12 18:20:52 +0100117 if raise_nonsuccess and self.result != 0:
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100118 log.ctx(self)
Pau Espin Pedrol78087be2018-11-12 18:20:52 +0100119 raise log.Error('Exited in error %d' % self.result)
120 return self.result
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100121
Pau Espin Pedrolb1526b92018-05-22 20:32:30 +0200122 def respawn(self):
123 self.dbg('respawn')
124 assert not self.is_running()
125 self.result = None
126 self.killed = None
127 self.launch()
128
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200129 def _poll_termination(self, time_to_wait_for_term=5):
130 wait_step = 0.001
131 waited_time = 0
132 while True:
133 # poll returns None if proc is still running
134 self.result = self.process_obj.poll()
135 if self.result is not None:
136 return True
137 waited_time += wait_step
138 # make wait_step approach 1.0
139 wait_step = (1. + 5. * wait_step) / 6.
140 if waited_time >= time_to_wait_for_term:
141 break
142 time.sleep(wait_step)
143 return False
144
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200145 def send_signal(self, sig):
146 os.kill(self.process_obj.pid, sig)
147
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200148 def terminate(self):
149 if self.process_obj is None:
150 return
151 if self.result is not None:
152 return
153
154 while True:
155 # first try SIGINT to allow stdout+stderr flushing
156 self.log('Terminating (SIGINT)')
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200157 self.send_signal(signal.SIGINT)
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200158 self.killed = signal.SIGINT
159 if self._poll_termination():
160 break
161
162 # SIGTERM maybe?
163 self.log('Terminating (SIGTERM)')
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200164 self.send_signal(signal.SIGTERM)
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200165 self.killed = signal.SIGTERM
166 if self._poll_termination():
167 break
168
169 # out of patience
170 self.log('Terminating (SIGKILL)')
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200171 self.send_signal(signal.SIGKILL)
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200172 self.killed = signal.SIGKILL
173 break;
174
175 self.process_obj.wait()
176 self.cleanup()
177
178 def cleanup(self):
Pau Espin Pedrol06ada452018-05-22 19:20:41 +0200179 self.dbg('Cleanup')
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200180 self.close_output_logs()
181 if self.result == 0:
182 self.log('Terminated: ok', rc=self.result)
183 elif self.killed:
184 self.log('Terminated', rc=self.result)
185 else:
186 self.err('Terminated: ERROR', rc=self.result)
Neels Hofmeyr85eb3242017-04-09 22:01:16 +0200187 #self.log_stdout_tail()
188 self.log_stderr_tail()
189
190 def log_stdout_tail(self):
191 m = self.get_stdout_tail(prefix='| ')
192 if not m:
193 return
194 self.log('stdout:\n', m, '\n')
195
196 def log_stderr_tail(self):
197 m = self.get_stderr_tail(prefix='| ')
198 if not m:
199 return
200 self.log('stderr:\n', m, '\n')
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200201
202 def close_output_logs(self):
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200203 for k, v in self.outputs.items():
204 path, f = v
205 if f:
206 f.flush()
207 f.close()
208 self.outputs[k] = (path, None)
209
210 def poll(self):
211 if self.process_obj is None:
212 return
213 if self.result is not None:
214 return
215 self.result = self.process_obj.poll()
216 if self.result is not None:
217 self.cleanup()
218
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200219 def is_running(self, poll_first=True):
220 if poll_first:
221 self.poll()
Neels Hofmeyr85eb3242017-04-09 22:01:16 +0200222 return self.process_obj is not None and self.result is None
223
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200224 def get_output(self, which):
225 v = self.outputs.get(which)
226 if not v:
227 return None
228 path, f = v
229 with open(path, 'r') as f2:
230 return f2.read()
231
232 def get_output_tail(self, which, tail=10, prefix=''):
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200233 out = self.get_output(which)
234 if not out:
235 return None
236 out = out.splitlines()
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200237 tail = min(len(out), tail)
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200238 return prefix + ('\n' + prefix).join(out[-tail:])
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200239
240 def get_stdout(self):
241 return self.get_output('stdout')
242
243 def get_stderr(self):
244 return self.get_output('stderr')
245
246 def get_stdout_tail(self, tail=10, prefix=''):
247 return self.get_output_tail('stdout', tail, prefix)
248
249 def get_stderr_tail(self, tail=10, prefix=''):
250 return self.get_output_tail('stderr', tail, prefix)
251
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200252 def terminated(self, poll_first=True):
253 if poll_first:
254 self.poll()
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200255 return self.result is not None
256
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200257 def wait(self, timeout=300):
Pau Espin Pedrol9a4631c2018-03-28 19:17:34 +0200258 MainLoop.wait(self, self.terminated, timeout=timeout)
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +0200259
260
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200261class RemoteProcess(Process):
262
Pau Espin Pedrol3895fec2017-04-28 16:13:03 +0200263 def __init__(self, name, run_dir, remote_user, remote_host, remote_cwd, popen_args, **popen_kwargs):
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200264 super().__init__(name, run_dir, popen_args, **popen_kwargs)
Pau Espin Pedrol3895fec2017-04-28 16:13:03 +0200265 self.remote_user = remote_user
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200266 self.remote_host = remote_host
267 self.remote_cwd = remote_cwd
268
269 # hacky: instead of just prepending ssh, i.e. piping stdout and stderr
270 # over the ssh link, we should probably run on the remote side,
271 # monitoring the process remotely.
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200272 if self.remote_cwd:
273 cd = 'cd "%s"; ' % self.remote_cwd
274 else:
275 cd = ''
Pau Espin Pedrol302c7562018-10-02 13:08:02 +0200276 # We need double -t to force tty and be able to forward signals to
277 # processes (SIGHUP) when we close ssh on the local side. As a result,
278 # stderr seems to be merged into stdout in ssh client.
279 self.popen_args = ['ssh', '-t', '-t', self.remote_user+'@'+self.remote_host,
Neels Hofmeyr5356d0a2017-04-10 03:45:30 +0200280 '%s%s' % (cd,
281 ' '.join(self.popen_args))]
Neels Hofmeyr3531a192017-03-28 14:30:28 +0200282 self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs)
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +0200283
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200284class NetNSProcess(Process):
285 NETNS_EXEC_BIN = 'osmo-gsm-tester_netns_exec.sh'
286 def __init__(self, name, run_dir, netns, popen_args, **popen_kwargs):
287 super().__init__(name, run_dir, popen_args, **popen_kwargs)
288 self.netns = netns
289
290 self.popen_args = ['sudo', self.NETNS_EXEC_BIN, self.netns] + list(popen_args)
291 self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs)
292
293 # HACK: Since we run under sudo, only way to kill root-owned process is to kill as root...
294 # This function is overwritten from Process.
295 def send_signal(self, sig):
296 kill_cmd = ('kill', '-%d' % int(sig), str(self.process_obj.pid))
297 run_local_netns_sync(self.run_dir, self.name()+"-kill", self.netns, kill_cmd)
298
299
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200300def run_local_sync(run_dir, name, popen_args):
301 run_dir =run_dir.new_dir(name)
302 proc = Process(name, run_dir, popen_args)
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100303 proc.launch_sync()
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200304
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200305def run_local_netns_sync(run_dir, name, netns, popen_args):
306 run_dir =run_dir.new_dir(name)
307 proc = NetNSProcess(name, run_dir, netns, popen_args)
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100308 proc.launch_sync()
Pau Espin Pedrolfd4c1442018-10-25 17:37:23 +0200309
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200310def run_remote_sync(run_dir, remote_user, remote_addr, name, popen_args, remote_cwd=None):
311 run_dir = run_dir.new_dir(name)
Pau Espin Pedrol8aca1f32018-10-25 18:31:50 +0200312 proc = RemoteProcess(name, run_dir, remote_user, remote_addr, remote_cwd, popen_args)
Pau Espin Pedrol79df7392018-11-12 18:15:30 +0100313 proc.launch_sync()
Pau Espin Pedrole4358a92018-10-01 11:27:55 +0200314
315def scp(run_dir, remote_user, remote_addr, name, local_path, remote_path):
316 run_local_sync(run_dir, name, ('scp', '-r', local_path, '%s@%s:%s' % (remote_user, remote_addr, remote_path)))
317
318def copy_inst_ssh(run_dir, inst, remote_dir, remote_user, remote_addr, remote_rundir_append, cfg_file_name):
319 remote_inst = Dir(remote_dir.child(os.path.basename(str(inst))))
320 remote_dir_str = str(remote_dir)
321 run_remote_sync(run_dir, remote_user, remote_addr, 'rm-remote-dir', ('test', '!', '-d', remote_dir_str, '||', 'rm', '-rf', remote_dir_str))
322 run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-dir', ('mkdir', '-p', remote_dir_str))
323 scp(run_dir, remote_user, remote_addr, 'scp-inst-to-remote', str(inst), remote_dir_str)
324
325 remote_run_dir = remote_dir.child(remote_rundir_append)
326 run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-run-dir', ('mkdir', '-p', remote_run_dir))
327
328 remote_config_file = remote_dir.child(os.path.basename(cfg_file_name))
329 scp(run_dir, remote_user, remote_addr, 'scp-cfg-to-remote', cfg_file_name, remote_config_file)
330 return remote_inst
331
Neels Hofmeyrdae3d3c2017-03-28 12:16:58 +0200332# vim: expandtab tabstop=4 shiftwidth=4