commit af0449d9e0835b5bf45a1665f5f65a9b3d27543e Author: Harald Welte Date: Mon May 1 19:14:12 2023 +1000 initial checkin of a test lab power management daemon diff --git a/doc/server-power-mgmt-daemon.txt b/doc/server-power-mgmt-daemon.txt new file mode 100644 index 0000000..b9f4f55 --- /dev/null +++ b/doc/server-power-mgmt-daemon.txt @@ -0,0 +1,81 @@ +h3. resources + +A "resource" is a power-switched resource such as a given server. It can be +switched on an off using a "switcher" + +A resource has the following data: +* name +* description +* status (UNKNOWN, OFF, POWERED, AVAILABLE) +* reference to switcher +* switcher configuration (depends on switcher) +* optional: upstream resource dependency +* optional: current power consumption +* expected availability time (last time from OFF->POWERED to POWERED->AVAILABLE) + + +h3. switcher + +A "switcher" is something that can turn on/off the power to a given "resource". + +An abstract base class defines the interface, with derived classes providing implementations for +* intellinet PDU +* sispmctl +* tasmota +* soft-power-off + wake-on-lan +* supermicro [open]ipmi ? + +Each of the derived classes will have specific parameters describing the address/id of the switcher, +such as the IP address + port number on an intellinet PDU + +h3. availability_checker + +An "availability_checker" is something that can check whether or not a given "resource" is available for use +or not. A typical example would be to ping the host after it booted, or to check whether the SSHd is +accepting inbound connections. + +The availability_checker is called internally +* on 75% of expected_avail_time expiration (after OFF->POWERED), afterwards +* every 1s/5s, as long as we remain in POWERED state + + + +h3. usage token + +Every user of a resource has to obtain a usage token before using a service. The daemon will internally keep a use counter, one for each concurrently active token for a give resource. Once the counter drops to zero, the device will be powered off + +usage tokens have the following information: +* reference of the related resource (resolved by name from client API request) +* duration of the token (specified during API call to get the token) +* name of the related user +* description of the user / usage + + +h2. REST API calls + + +h3. GET api/v1/power_resource/ + +* name +* description +* status +* switcher +** type +** channel + +h3. POST api/v1/power_resource//usage_token_get + +Request: +* user +* duration + +Response: +* token +* user +* duration +* resource +* expected_availability_time (0 in case already available) + +h3. PUT api/v1/power_resource//usage_token/ + + diff --git a/lib/avail_always.py b/lib/avail_always.py new file mode 100644 index 0000000..91d2621 --- /dev/null +++ b/lib/avail_always.py @@ -0,0 +1,8 @@ +import icmplib + +from model import AvailabilityChecker + +class AlwaysAvailChecker(AvailabilityChecker): + """A dummy AvailabilityChecker implementation of a resource that's always available.""" + def is_available(self) -> bool: + return True diff --git a/lib/avail_ping.py b/lib/avail_ping.py new file mode 100644 index 0000000..0d93c42 --- /dev/null +++ b/lib/avail_ping.py @@ -0,0 +1,13 @@ +import icmplib + +from model import AvailabilityChecker + + +class IcmpAvailChecker(AvailabilityChecker): + """An AvailabilityChecker implementation for determining host availability via ICMP Ping.""" + def __init__(self, dest_addr: str): + self.dest_addr = dest_addr + + def is_available(self) -> bool: + host = icmplib.ping(self.dest_addr, count=1, timeout=2, privileged=False) + return host.is_alive diff --git a/lib/model.py b/lib/model.py new file mode 100644 index 0000000..69ec604 --- /dev/null +++ b/lib/model.py @@ -0,0 +1,178 @@ +# coding=utf-8 +""" +""" + +# (C) 2023 by Harald Welte +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from typing import Optional, Dict, Tuple + +import abc +import uuid +from datetime import datetime, timedelta + +class Switcher(abc.ABC): + """Base class for some device that can switch a power rail.""" + def __init__(self, group: 'SwitcherGroup', name: str): + self.group = group + self.name = name + self.status = "unknown" + self.status = self._obtain_actual_status() + self.group.channel_add(self) + + @staticmethod + def _is_valid_status(status: str) -> bool: + """Determine if the given status string is valid.""" + if status in ["unknown", "on", "off"]: + return True + else: + return False + + @staticmethod + def _validate_status(status: str): + if not self._is_valid_status(status): + raise ValueError("Invalid Status: %s" % status) + + @staticmethod + def _update_status(self, status: str): + """Helper method used by derived class to update status""" + self._validate_status(status) + self.status = status + + def get_status(self): + """Return the (cached) status""" + return self.status + + @abc.abstractmethod + def _status_change(self, new_status: str): + """Derived class must implement this to actually change the status""" + pass + + def _obtain_actual_status(self) -> str: + """Derived class should override this method, if it can poll the hardware about its actual + current status.""" + return "unknown" + + def status_change(self, new_status: str) -> str: + """Change the status of the switcher. Returns new status""" + if self.status != new_status: + self._update_status(new_status) + return self.status + + +class SwitcherGroup(abc.ABC): + """Base class for some device that can switch a power rail.""" + def __init__(self, name: str, channels: dict = {}): + self.name = name + self.channels = channels + + def __getitem__(self, val): + return self.channel_get(val) + + def channel_add(self, chan: Switcher): + """Add a switcher channel to a switcher group.""" + self.channels[chan.name] = chan + + def channel_get(self, name: str): + """Get a channel within a switcher group.""" + return self.channels[name] + + +class UsageToken: + """I represent one given (concurrent) usage of a given resource.""" + def __init__(self, resource: 'Resource', user_name: str, usage_name: str, duration_s: int): + self.resource = resource + self.user_name = user_name + self.expires_at = datetime.now() + timedelta(seconds=duration_s) + self.uuid = uuid.uuid4() + + def has_expired(self) -> bool: + if self.expires_at <= datetime.now(): + return True + else: + return False + + def to_dict(self): + return {'resource': self.resource.name, + 'user_name': self.user_name, + 'expires_at': self.expires_at, + 'uuid': self.uuid} + +class AvailabilityChecker(abc.ABC): + @abc.abstractmethod + def is_available(self) -> bool: + pass + +class Resource: + """A Resource is some kind of powered device. A Resource tracks its users via UsageTokens + and it will be automatically powered up (by its associated Switcher) once the usage count + is > 0, and powered down once it gets back to 0.""" + def __init__(self, name: str, switcher: Switcher, checker: AvailabilityChecker, desc: str = ""): + self.name = name + self.switcher = switcher + self.avail_checker = checker + self.description = desc + self.status = "unknown" + self.use_count = 0 + self.usage_tokens = [] + self.determine_status() + + def usage_inc(self): + self.use_count += 1 + if self.use_count == 1: + self.switcher.status_change("on") + return self.use_count + + def usage_dec(self): + assert(self.use_count >= 1) + self.use_count -= 1 + if self.use_count == 0: + self.switcher.status_change("off") + return self.use_count + + def usage_token_get(self, user_name: str, usage: str, duration_s: int) -> UsageToken: + """Create/obtain a new usage token.""" + token = UsageToken(self, user_name, usage, duration_s) + self.usage_tokens.append(token) + self.usage_inc() + return token + + def usage_token_put(self, token: UsageToken): + """Release/put an existing usage token.""" + self.usage_tokens.delete(token) + self.usage_dec() + + def get_status(self): + """Return the (cached) status of the resource.""" + return self.status + + def determine_status(self): + """Re-determine the status of the resource, and return it.""" + sw_status = self.switcher.get_status() + if sw_status == "on": + if self.avail_checker.is_available(): + self.status = "available" + else: + self.status = "powered" + else: + self.status = sw_status + return self.get_status() + + def to_dict(self): + return {'name': self.name, + 'description': self.description, + 'status': self.status, + 'use_count': self.use_count} + diff --git a/lib/osmo-lpmgd.py b/lib/osmo-lpmgd.py new file mode 100755 index 0000000..f48dcf7 --- /dev/null +++ b/lib/osmo-lpmgd.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +# RESTful HTTP service for performing power management tasks +# +# (C) 2023 by Harald Welte +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import argparse + +from klein import Klein + +from model import Resource, SwitcherGroup +from rest_server import PwrMgmtRestServer +from switcher_dummy import DummySwitcher +from avail_always import AlwaysAvailChecker + + +def main(argv): + parser = argparse.ArgumentParser() + parser.add_argument("-H", "--host", help="Host/IP to bind HTTP to", default="localhost") + parser.add_argument("-p", "--port", help="TCP port to bind HTTP to", default=8000) + + args = parser.parse_args() + + # TODO: implement this via some kind of config file + + resources = [] + + swgrp1 = SwitcherGroup("swgrp1") + switcher1 = DummySwitcher(swgrp1, "dumsw1", None) + resources.append(Resource("resrc1", switcher=switcher1, checker=AlwaysAvailChecker())) + + prs = PwrMgmtRestServer(resources) + prs.app.run(args.host, args.port) + +if __name__ == "__main__": + main(sys.argv) diff --git a/lib/rest_server.py b/lib/rest_server.py new file mode 100644 index 0000000..abc8996 --- /dev/null +++ b/lib/rest_server.py @@ -0,0 +1,80 @@ +import json + +# (C) 2023 by Harald Welte +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from typing import List +from klein import Klein + +import model + +def set_headers(request): + request.setHeader('Content-Type', 'application/json') + +class ApiError: + def __init__(self, msg:str, sw=None): + self.msg = msg + self.sw = sw + + def __str__(self): + d = {'error': {'message':self.msg}} + if self.sw: + d['error']['status_word'] = self.sw + return json.dumps(d) + +class PwrMgmtRestServer: + app = Klein() + + def __init__(self, resources: List[model.Resource]): + self.resources = {r.name: r for r in resources} + + @app.handle_errors(KeyError) + def key_error(self, request, failure): + set_headers(request) + request.setResponseCode(404) + return str(ApiError("Unknown resource")) + + @app.route('/api/v1/resource//status', methods=['GET']) + def resource_status(self, request, resrc): + resource = self.resources[resrc] + resource.determine_status() + set_headers(request) + return json.dumps(resource.to_dict()) + + @app.route('/api/v1/resource//obtain_usage_token', methods=['POST']) + def resource_obtain_token(self, request, resrc): + resource = self.resources[resrc] + try: + content = json.loads(request.content.read()) + user_name = content['user_name'] + usage = content['usage'] + duration_s = content['duration_seconds'] + except: + set_headers(request) + request.setResponseCode(400) + return str(ApiError("Malformed Request")) + + token = resource.usage_token_get(user_name, usage, duration_s) + + set_headers(request) + return json.dumps(token.to_dict()) + + @app.route('/api/v1/resource//token//release', methods=['GET']) + def token_release(self, request, resrc, token): + resource = self.resources[resrc] + # find token within resource + resource.usage_token_put(token) + #set_headers(request) + request.setResponseCode(200) diff --git a/lib/switcher_dummy.py b/lib/switcher_dummy.py new file mode 100644 index 0000000..874689d --- /dev/null +++ b/lib/switcher_dummy.py @@ -0,0 +1,15 @@ + +from model import Switcher, SwitcherGroup + +class DummySwitcher(Switcher): + def __init__(self, group: SwitcherGroup, name: str, conf): + super().__init__(group, name) + self.conf = conf + + def _status_change(self, new_status: str): + printf("DummySwitcher %s: Status change %s -> %s" % (self.name, self.status, new_status)) + + def _obtain_actual_status(self): + """Our dummy switcher is always off in the initial state""" + if self.status == "unknown": + return "off" diff --git a/lib/switcher_intellinet.py b/lib/switcher_intellinet.py new file mode 100644 index 0000000..7708963 --- /dev/null +++ b/lib/switcher_intellinet.py @@ -0,0 +1,6 @@ +import urllib.request +import xml.etree.ElementTree as ET + +from model import Switcher, SwitcherGroup + +# FIXME: port from osmo-gsm-tester.git/src/osmo_gsm_tester/obj/powersupply_intellinet.py diff --git a/lib/switcher_sispm.py b/lib/switcher_sispm.py new file mode 100644 index 0000000..ce55e47 --- /dev/null +++ b/lib/switcher_sispm.py @@ -0,0 +1,12 @@ +import pysispm + +from model import Switcher, SwitcherGroup + +# FIXME: port from osmo-gsm-tester.git/src/osmo_gsm_tester/obj/powersupply_sispm.py + +class SispmSwitcher(Switcher): + def __init__(self, group: SwitcherGroup, name: str, conf): + super().__init__(group, name) + self.conf = conf + def _status_change(self, new_status: str): + FIXME