2017-03-28 10:16:58 +00:00
|
|
|
# osmo_gsm_tester: process management
|
|
|
|
#
|
|
|
|
# Copyright (C) 2016-2017 by sysmocom - s.f.m.c. GmbH
|
|
|
|
#
|
|
|
|
# Author: Neels Hofmeyr <neels@hofmeyr.de>
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
2017-06-03 07:51:45 +00:00
|
|
|
# it under the terms of the GNU General Public License as
|
2017-03-28 10:16:58 +00:00
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
2017-06-03 07:51:45 +00:00
|
|
|
# GNU General Public License for more details.
|
2017-03-28 10:16:58 +00:00
|
|
|
#
|
2017-06-03 07:51:45 +00:00
|
|
|
# You should have received a copy of the GNU General Public License
|
2017-03-28 10:16:58 +00:00
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2017-03-28 12:30:28 +00:00
|
|
|
import os
|
|
|
|
import time
|
|
|
|
import subprocess
|
|
|
|
import signal
|
2017-06-23 09:43:38 +00:00
|
|
|
from datetime import datetime
|
2017-03-28 10:16:58 +00:00
|
|
|
|
2018-03-28 17:17:34 +00:00
|
|
|
from . import log
|
|
|
|
from .event_loop import MainLoop
|
2017-03-28 12:30:28 +00:00
|
|
|
from .util import Dir
|
2017-03-28 10:16:58 +00:00
|
|
|
|
2017-03-28 12:30:28 +00:00
|
|
|
class Process(log.Origin):
|
|
|
|
|
|
|
|
def __init__(self, name, run_dir, popen_args, **popen_kwargs):
|
fix and refactor logging: drop 'with', simplify
With the recent fix of the junit report related issues, another issue arose:
the 'with log.Origin' was changed to disallow __enter__ing an object twice to
fix problems, now still code would fail because it tries to do 'with' on the
same object twice. The only reason is to ensure that logging is associated with
a given object. Instead of complicating even more, implement differently.
Refactor logging to simplify use: drop the 'with Origin' style completely, and
instead use the python stack to determine which objects are created by which,
and which object to associate a log statement with.
The new way: we rely on the convention that each class instance has a local
'self' referencing the object instance. If we need to find an origin as a new
object's parent, or to associate a log message with, we traverse each stack
frame, fetching the first local 'self' object that is a log.Origin class
instance.
How to use:
Simply call log.log() anywhere, and it finds an Origin object to log for, from
the stack. Alternatively call self.log() for any Origin() object to skip the
lookup.
Create classes as child class of log.Origin and make sure to call
super().__init__(category, name). This constructor will magically find a parent
Origin on the stack.
When an exception happens, we first escalate the exception up through call
scopes to where ever it is handled by log.log_exn(). This then finds an Origin
object in the traceback's stack frames, no need to nest in 'with' scopes.
Hence the 'with log.Origin' now "happens implicitly", we can write pure natural
python code, no more hassles with scope ordering.
Furthermore, any frame can place additional logging information in a frame by
calling log.ctx(). This is automatically inserted in the ancestry associated
with a log statement / exception.
Change-Id: I5f9b53150f2bb6fa9d63ce27f0806f0ca6a45e90
2017-06-09 23:18:27 +00:00
|
|
|
super().__init__(log.C_RUN, name)
|
2018-08-09 11:45:55 +00:00
|
|
|
self.process_obj = None
|
|
|
|
self.result = None
|
|
|
|
self.killed = None
|
2017-03-28 12:30:28 +00:00
|
|
|
self.name_str = name
|
|
|
|
self.run_dir = run_dir
|
|
|
|
self.popen_args = popen_args
|
|
|
|
self.popen_kwargs = popen_kwargs
|
|
|
|
self.outputs = {}
|
|
|
|
if not isinstance(self.run_dir, Dir):
|
|
|
|
self.run_dir = Dir(os.path.abspath(str(self.run_dir)))
|
|
|
|
|
|
|
|
def set_env(self, key, value):
|
|
|
|
env = self.popen_kwargs.get('env') or {}
|
|
|
|
env[key] = value
|
|
|
|
self.popen_kwargs['env'] = env
|
|
|
|
|
|
|
|
def make_output_log(self, name):
|
|
|
|
'''
|
|
|
|
create a non-existing log output file in run_dir to pipe stdout and
|
|
|
|
stderr from this process to.
|
|
|
|
'''
|
|
|
|
path = self.run_dir.new_child(name)
|
|
|
|
f = open(path, 'w')
|
|
|
|
self.dbg(path)
|
2017-06-23 09:43:38 +00:00
|
|
|
f.write('(launched: %s)\n' % datetime.now().strftime(log.LONG_DATEFMT))
|
2017-03-28 12:30:28 +00:00
|
|
|
f.flush()
|
|
|
|
self.outputs[name] = (path, f)
|
|
|
|
return f
|
|
|
|
|
|
|
|
def launch(self):
|
fix and refactor logging: drop 'with', simplify
With the recent fix of the junit report related issues, another issue arose:
the 'with log.Origin' was changed to disallow __enter__ing an object twice to
fix problems, now still code would fail because it tries to do 'with' on the
same object twice. The only reason is to ensure that logging is associated with
a given object. Instead of complicating even more, implement differently.
Refactor logging to simplify use: drop the 'with Origin' style completely, and
instead use the python stack to determine which objects are created by which,
and which object to associate a log statement with.
The new way: we rely on the convention that each class instance has a local
'self' referencing the object instance. If we need to find an origin as a new
object's parent, or to associate a log message with, we traverse each stack
frame, fetching the first local 'self' object that is a log.Origin class
instance.
How to use:
Simply call log.log() anywhere, and it finds an Origin object to log for, from
the stack. Alternatively call self.log() for any Origin() object to skip the
lookup.
Create classes as child class of log.Origin and make sure to call
super().__init__(category, name). This constructor will magically find a parent
Origin on the stack.
When an exception happens, we first escalate the exception up through call
scopes to where ever it is handled by log.log_exn(). This then finds an Origin
object in the traceback's stack frames, no need to nest in 'with' scopes.
Hence the 'with log.Origin' now "happens implicitly", we can write pure natural
python code, no more hassles with scope ordering.
Furthermore, any frame can place additional logging information in a frame by
calling log.ctx(). This is automatically inserted in the ancestry associated
with a log statement / exception.
Change-Id: I5f9b53150f2bb6fa9d63ce27f0806f0ca6a45e90
2017-06-09 23:18:27 +00:00
|
|
|
log.dbg('cd %r; %s %s' % (
|
|
|
|
os.path.abspath(str(self.run_dir)),
|
|
|
|
' '.join(['%s=%r'%(k,v) for k,v in self.popen_kwargs.get('env', {}).items()]),
|
|
|
|
' '.join(self.popen_args)))
|
2017-03-28 12:30:28 +00:00
|
|
|
|
fix and refactor logging: drop 'with', simplify
With the recent fix of the junit report related issues, another issue arose:
the 'with log.Origin' was changed to disallow __enter__ing an object twice to
fix problems, now still code would fail because it tries to do 'with' on the
same object twice. The only reason is to ensure that logging is associated with
a given object. Instead of complicating even more, implement differently.
Refactor logging to simplify use: drop the 'with Origin' style completely, and
instead use the python stack to determine which objects are created by which,
and which object to associate a log statement with.
The new way: we rely on the convention that each class instance has a local
'self' referencing the object instance. If we need to find an origin as a new
object's parent, or to associate a log message with, we traverse each stack
frame, fetching the first local 'self' object that is a log.Origin class
instance.
How to use:
Simply call log.log() anywhere, and it finds an Origin object to log for, from
the stack. Alternatively call self.log() for any Origin() object to skip the
lookup.
Create classes as child class of log.Origin and make sure to call
super().__init__(category, name). This constructor will magically find a parent
Origin on the stack.
When an exception happens, we first escalate the exception up through call
scopes to where ever it is handled by log.log_exn(). This then finds an Origin
object in the traceback's stack frames, no need to nest in 'with' scopes.
Hence the 'with log.Origin' now "happens implicitly", we can write pure natural
python code, no more hassles with scope ordering.
Furthermore, any frame can place additional logging information in a frame by
calling log.ctx(). This is automatically inserted in the ancestry associated
with a log statement / exception.
Change-Id: I5f9b53150f2bb6fa9d63ce27f0806f0ca6a45e90
2017-06-09 23:18:27 +00:00
|
|
|
self.process_obj = subprocess.Popen(
|
|
|
|
self.popen_args,
|
|
|
|
stdout=self.make_output_log('stdout'),
|
|
|
|
stderr=self.make_output_log('stderr'),
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
shell=False,
|
|
|
|
cwd=self.run_dir.path,
|
|
|
|
**self.popen_kwargs)
|
|
|
|
self.set_name(self.name_str, pid=self.process_obj.pid)
|
|
|
|
self.log('Launched')
|
2017-03-28 12:30:28 +00:00
|
|
|
|
2018-11-12 17:15:30 +00:00
|
|
|
def launch_sync(self):
|
|
|
|
'''
|
|
|
|
calls launch() method and block waiting for it to finish, serving the
|
|
|
|
mainloop meanwhile.
|
|
|
|
'''
|
|
|
|
try:
|
|
|
|
self.launch()
|
|
|
|
self.wait()
|
|
|
|
except Exception as e:
|
|
|
|
self.terminate()
|
|
|
|
raise e
|
|
|
|
if self.result != 0:
|
|
|
|
log.ctx(self)
|
|
|
|
raise log.Error('Exited in error')
|
|
|
|
|
2018-05-22 18:32:30 +00:00
|
|
|
def respawn(self):
|
|
|
|
self.dbg('respawn')
|
|
|
|
assert not self.is_running()
|
|
|
|
self.result = None
|
|
|
|
self.killed = None
|
|
|
|
self.launch()
|
|
|
|
|
2017-03-28 12:30:28 +00:00
|
|
|
def _poll_termination(self, time_to_wait_for_term=5):
|
|
|
|
wait_step = 0.001
|
|
|
|
waited_time = 0
|
|
|
|
while True:
|
|
|
|
# poll returns None if proc is still running
|
|
|
|
self.result = self.process_obj.poll()
|
|
|
|
if self.result is not None:
|
|
|
|
return True
|
|
|
|
waited_time += wait_step
|
|
|
|
# make wait_step approach 1.0
|
|
|
|
wait_step = (1. + 5. * wait_step) / 6.
|
|
|
|
if waited_time >= time_to_wait_for_term:
|
|
|
|
break
|
|
|
|
time.sleep(wait_step)
|
|
|
|
return False
|
|
|
|
|
2018-10-25 15:37:23 +00:00
|
|
|
def send_signal(self, sig):
|
|
|
|
os.kill(self.process_obj.pid, sig)
|
|
|
|
|
2017-03-28 12:30:28 +00:00
|
|
|
def terminate(self):
|
|
|
|
if self.process_obj is None:
|
|
|
|
return
|
|
|
|
if self.result is not None:
|
|
|
|
return
|
|
|
|
|
|
|
|
while True:
|
|
|
|
# first try SIGINT to allow stdout+stderr flushing
|
|
|
|
self.log('Terminating (SIGINT)')
|
2018-10-25 15:37:23 +00:00
|
|
|
self.send_signal(signal.SIGINT)
|
2017-03-28 12:30:28 +00:00
|
|
|
self.killed = signal.SIGINT
|
|
|
|
if self._poll_termination():
|
|
|
|
break
|
|
|
|
|
|
|
|
# SIGTERM maybe?
|
|
|
|
self.log('Terminating (SIGTERM)')
|
2018-10-25 15:37:23 +00:00
|
|
|
self.send_signal(signal.SIGTERM)
|
2017-03-28 12:30:28 +00:00
|
|
|
self.killed = signal.SIGTERM
|
|
|
|
if self._poll_termination():
|
|
|
|
break
|
|
|
|
|
|
|
|
# out of patience
|
|
|
|
self.log('Terminating (SIGKILL)')
|
2018-10-25 15:37:23 +00:00
|
|
|
self.send_signal(signal.SIGKILL)
|
2017-03-28 12:30:28 +00:00
|
|
|
self.killed = signal.SIGKILL
|
|
|
|
break;
|
|
|
|
|
|
|
|
self.process_obj.wait()
|
|
|
|
self.cleanup()
|
|
|
|
|
|
|
|
def cleanup(self):
|
2018-05-22 17:20:41 +00:00
|
|
|
self.dbg('Cleanup')
|
2017-03-28 12:30:28 +00:00
|
|
|
self.close_output_logs()
|
|
|
|
if self.result == 0:
|
|
|
|
self.log('Terminated: ok', rc=self.result)
|
|
|
|
elif self.killed:
|
|
|
|
self.log('Terminated', rc=self.result)
|
|
|
|
else:
|
|
|
|
self.err('Terminated: ERROR', rc=self.result)
|
2017-04-09 20:01:16 +00:00
|
|
|
#self.log_stdout_tail()
|
|
|
|
self.log_stderr_tail()
|
|
|
|
|
|
|
|
def log_stdout_tail(self):
|
|
|
|
m = self.get_stdout_tail(prefix='| ')
|
|
|
|
if not m:
|
|
|
|
return
|
|
|
|
self.log('stdout:\n', m, '\n')
|
|
|
|
|
|
|
|
def log_stderr_tail(self):
|
|
|
|
m = self.get_stderr_tail(prefix='| ')
|
|
|
|
if not m:
|
|
|
|
return
|
|
|
|
self.log('stderr:\n', m, '\n')
|
2017-03-28 12:30:28 +00:00
|
|
|
|
|
|
|
def close_output_logs(self):
|
|
|
|
for k, v in self.outputs.items():
|
|
|
|
path, f = v
|
|
|
|
if f:
|
|
|
|
f.flush()
|
|
|
|
f.close()
|
|
|
|
self.outputs[k] = (path, None)
|
|
|
|
|
|
|
|
def poll(self):
|
|
|
|
if self.process_obj is None:
|
|
|
|
return
|
|
|
|
if self.result is not None:
|
|
|
|
return
|
|
|
|
self.result = self.process_obj.poll()
|
|
|
|
if self.result is not None:
|
|
|
|
self.cleanup()
|
|
|
|
|
2017-04-10 01:45:30 +00:00
|
|
|
def is_running(self, poll_first=True):
|
|
|
|
if poll_first:
|
|
|
|
self.poll()
|
2017-04-09 20:01:16 +00:00
|
|
|
return self.process_obj is not None and self.result is None
|
|
|
|
|
2017-03-28 12:30:28 +00:00
|
|
|
def get_output(self, which):
|
|
|
|
v = self.outputs.get(which)
|
|
|
|
if not v:
|
|
|
|
return None
|
|
|
|
path, f = v
|
|
|
|
with open(path, 'r') as f2:
|
|
|
|
return f2.read()
|
|
|
|
|
|
|
|
def get_output_tail(self, which, tail=10, prefix=''):
|
2017-04-10 01:45:30 +00:00
|
|
|
out = self.get_output(which)
|
|
|
|
if not out:
|
|
|
|
return None
|
|
|
|
out = out.splitlines()
|
2017-03-28 12:30:28 +00:00
|
|
|
tail = min(len(out), tail)
|
2017-04-10 01:45:30 +00:00
|
|
|
return prefix + ('\n' + prefix).join(out[-tail:])
|
2017-03-28 12:30:28 +00:00
|
|
|
|
|
|
|
def get_stdout(self):
|
|
|
|
return self.get_output('stdout')
|
|
|
|
|
|
|
|
def get_stderr(self):
|
|
|
|
return self.get_output('stderr')
|
|
|
|
|
|
|
|
def get_stdout_tail(self, tail=10, prefix=''):
|
|
|
|
return self.get_output_tail('stdout', tail, prefix)
|
|
|
|
|
|
|
|
def get_stderr_tail(self, tail=10, prefix=''):
|
|
|
|
return self.get_output_tail('stderr', tail, prefix)
|
|
|
|
|
2017-04-10 01:45:30 +00:00
|
|
|
def terminated(self, poll_first=True):
|
|
|
|
if poll_first:
|
|
|
|
self.poll()
|
2017-03-28 12:30:28 +00:00
|
|
|
return self.result is not None
|
|
|
|
|
2017-04-10 01:45:30 +00:00
|
|
|
def wait(self, timeout=300):
|
2018-03-28 17:17:34 +00:00
|
|
|
MainLoop.wait(self, self.terminated, timeout=timeout)
|
2017-03-28 12:30:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
class RemoteProcess(Process):
|
|
|
|
|
2017-04-28 14:13:03 +00:00
|
|
|
def __init__(self, name, run_dir, remote_user, remote_host, remote_cwd, popen_args, **popen_kwargs):
|
2017-04-10 01:45:30 +00:00
|
|
|
super().__init__(name, run_dir, popen_args, **popen_kwargs)
|
2017-04-28 14:13:03 +00:00
|
|
|
self.remote_user = remote_user
|
2017-03-28 12:30:28 +00:00
|
|
|
self.remote_host = remote_host
|
|
|
|
self.remote_cwd = remote_cwd
|
|
|
|
|
|
|
|
# hacky: instead of just prepending ssh, i.e. piping stdout and stderr
|
|
|
|
# over the ssh link, we should probably run on the remote side,
|
|
|
|
# monitoring the process remotely.
|
2017-04-10 01:45:30 +00:00
|
|
|
if self.remote_cwd:
|
|
|
|
cd = 'cd "%s"; ' % self.remote_cwd
|
|
|
|
else:
|
|
|
|
cd = ''
|
2018-10-02 11:08:02 +00:00
|
|
|
# We need double -t to force tty and be able to forward signals to
|
|
|
|
# processes (SIGHUP) when we close ssh on the local side. As a result,
|
|
|
|
# stderr seems to be merged into stdout in ssh client.
|
|
|
|
self.popen_args = ['ssh', '-t', '-t', self.remote_user+'@'+self.remote_host,
|
2017-04-10 01:45:30 +00:00
|
|
|
'%s%s' % (cd,
|
|
|
|
' '.join(self.popen_args))]
|
2017-03-28 12:30:28 +00:00
|
|
|
self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs)
|
2017-03-28 10:16:58 +00:00
|
|
|
|
2018-10-25 15:37:23 +00:00
|
|
|
class NetNSProcess(Process):
|
|
|
|
NETNS_EXEC_BIN = 'osmo-gsm-tester_netns_exec.sh'
|
|
|
|
def __init__(self, name, run_dir, netns, popen_args, **popen_kwargs):
|
|
|
|
super().__init__(name, run_dir, popen_args, **popen_kwargs)
|
|
|
|
self.netns = netns
|
|
|
|
|
|
|
|
self.popen_args = ['sudo', self.NETNS_EXEC_BIN, self.netns] + list(popen_args)
|
|
|
|
self.dbg(self.popen_args, dir=self.run_dir, conf=self.popen_kwargs)
|
|
|
|
|
|
|
|
# HACK: Since we run under sudo, only way to kill root-owned process is to kill as root...
|
|
|
|
# This function is overwritten from Process.
|
|
|
|
def send_signal(self, sig):
|
|
|
|
kill_cmd = ('kill', '-%d' % int(sig), str(self.process_obj.pid))
|
|
|
|
run_local_netns_sync(self.run_dir, self.name()+"-kill", self.netns, kill_cmd)
|
|
|
|
|
|
|
|
|
2018-10-01 09:27:55 +00:00
|
|
|
def run_local_sync(run_dir, name, popen_args):
|
|
|
|
run_dir =run_dir.new_dir(name)
|
|
|
|
proc = Process(name, run_dir, popen_args)
|
2018-11-12 17:15:30 +00:00
|
|
|
proc.launch_sync()
|
2018-10-01 09:27:55 +00:00
|
|
|
|
2018-10-25 15:37:23 +00:00
|
|
|
def run_local_netns_sync(run_dir, name, netns, popen_args):
|
|
|
|
run_dir =run_dir.new_dir(name)
|
|
|
|
proc = NetNSProcess(name, run_dir, netns, popen_args)
|
2018-11-12 17:15:30 +00:00
|
|
|
proc.launch_sync()
|
2018-10-25 15:37:23 +00:00
|
|
|
|
2018-10-01 09:27:55 +00:00
|
|
|
def run_remote_sync(run_dir, remote_user, remote_addr, name, popen_args, remote_cwd=None):
|
|
|
|
run_dir = run_dir.new_dir(name)
|
2018-10-25 16:31:50 +00:00
|
|
|
proc = RemoteProcess(name, run_dir, remote_user, remote_addr, remote_cwd, popen_args)
|
2018-11-12 17:15:30 +00:00
|
|
|
proc.launch_sync()
|
2018-10-01 09:27:55 +00:00
|
|
|
|
|
|
|
def scp(run_dir, remote_user, remote_addr, name, local_path, remote_path):
|
|
|
|
run_local_sync(run_dir, name, ('scp', '-r', local_path, '%s@%s:%s' % (remote_user, remote_addr, remote_path)))
|
|
|
|
|
|
|
|
def copy_inst_ssh(run_dir, inst, remote_dir, remote_user, remote_addr, remote_rundir_append, cfg_file_name):
|
|
|
|
remote_inst = Dir(remote_dir.child(os.path.basename(str(inst))))
|
|
|
|
remote_dir_str = str(remote_dir)
|
|
|
|
run_remote_sync(run_dir, remote_user, remote_addr, 'rm-remote-dir', ('test', '!', '-d', remote_dir_str, '||', 'rm', '-rf', remote_dir_str))
|
|
|
|
run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-dir', ('mkdir', '-p', remote_dir_str))
|
|
|
|
scp(run_dir, remote_user, remote_addr, 'scp-inst-to-remote', str(inst), remote_dir_str)
|
|
|
|
|
|
|
|
remote_run_dir = remote_dir.child(remote_rundir_append)
|
|
|
|
run_remote_sync(run_dir, remote_user, remote_addr, 'mk-remote-run-dir', ('mkdir', '-p', remote_run_dir))
|
|
|
|
|
|
|
|
remote_config_file = remote_dir.child(os.path.basename(cfg_file_name))
|
|
|
|
scp(run_dir, remote_user, remote_addr, 'scp-cfg-to-remote', cfg_file_name, remote_config_file)
|
|
|
|
return remote_inst
|
|
|
|
|
2017-03-28 10:16:58 +00:00
|
|
|
# vim: expandtab tabstop=4 shiftwidth=4
|