initial checkin of a test lab power management daemon

This commit is contained in:
Harald Welte 2023-05-01 19:14:12 +10:00
commit af0449d9e0
9 changed files with 443 additions and 0 deletions

View File

@ -0,0 +1,81 @@
h3. resources
A "resource" is a power-switched resource such as a given server. It can be
switched on an off using a "switcher"
A resource has the following data:
* name
* description
* status (UNKNOWN, OFF, POWERED, AVAILABLE)
* reference to switcher
* switcher configuration (depends on switcher)
* optional: upstream resource dependency
* optional: current power consumption
* expected availability time (last time from OFF->POWERED to POWERED->AVAILABLE)
h3. switcher
A "switcher" is something that can turn on/off the power to a given "resource".
An abstract base class defines the interface, with derived classes providing implementations for
* intellinet PDU
* sispmctl
* tasmota
* soft-power-off + wake-on-lan
* supermicro [open]ipmi ?
Each of the derived classes will have specific parameters describing the address/id of the switcher,
such as the IP address + port number on an intellinet PDU
h3. availability_checker
An "availability_checker" is something that can check whether or not a given "resource" is available for use
or not. A typical example would be to ping the host after it booted, or to check whether the SSHd is
accepting inbound connections.
The availability_checker is called internally
* on 75% of expected_avail_time expiration (after OFF->POWERED), afterwards
* every 1s/5s, as long as we remain in POWERED state
h3. usage token
Every user of a resource has to obtain a usage token before using a service. The daemon will internally keep a use counter, one for each concurrently active token for a give resource. Once the counter drops to zero, the device will be powered off
usage tokens have the following information:
* reference of the related resource (resolved by name from client API request)
* duration of the token (specified during API call to get the token)
* name of the related user
* description of the user / usage
h2. REST API calls
h3. GET api/v1/power_resource/<name>
* name
* description
* status
* switcher
** type
** channel
h3. POST api/v1/power_resource/<name>/usage_token_get
Request:
* user
* duration
Response:
* token
* user
* duration
* resource
* expected_availability_time (0 in case already available)
h3. PUT api/v1/power_resource/<name>/usage_token/<token>

8
lib/avail_always.py Normal file
View File

@ -0,0 +1,8 @@
import icmplib
from model import AvailabilityChecker
class AlwaysAvailChecker(AvailabilityChecker):
"""A dummy AvailabilityChecker implementation of a resource that's always available."""
def is_available(self) -> bool:
return True

13
lib/avail_ping.py Normal file
View File

@ -0,0 +1,13 @@
import icmplib
from model import AvailabilityChecker
class IcmpAvailChecker(AvailabilityChecker):
"""An AvailabilityChecker implementation for determining host availability via ICMP Ping."""
def __init__(self, dest_addr: str):
self.dest_addr = dest_addr
def is_available(self) -> bool:
host = icmplib.ping(self.dest_addr, count=1, timeout=2, privileged=False)
return host.is_alive

178
lib/model.py Normal file
View File

@ -0,0 +1,178 @@
# coding=utf-8
"""
"""
# (C) 2023 by Harald Welte <laforge@osmocom.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from typing import Optional, Dict, Tuple
import abc
import uuid
from datetime import datetime, timedelta
class Switcher(abc.ABC):
"""Base class for some device that can switch a power rail."""
def __init__(self, group: 'SwitcherGroup', name: str):
self.group = group
self.name = name
self.status = "unknown"
self.status = self._obtain_actual_status()
self.group.channel_add(self)
@staticmethod
def _is_valid_status(status: str) -> bool:
"""Determine if the given status string is valid."""
if status in ["unknown", "on", "off"]:
return True
else:
return False
@staticmethod
def _validate_status(status: str):
if not self._is_valid_status(status):
raise ValueError("Invalid Status: %s" % status)
@staticmethod
def _update_status(self, status: str):
"""Helper method used by derived class to update status"""
self._validate_status(status)
self.status = status
def get_status(self):
"""Return the (cached) status"""
return self.status
@abc.abstractmethod
def _status_change(self, new_status: str):
"""Derived class must implement this to actually change the status"""
pass
def _obtain_actual_status(self) -> str:
"""Derived class should override this method, if it can poll the hardware about its actual
current status."""
return "unknown"
def status_change(self, new_status: str) -> str:
"""Change the status of the switcher. Returns new status"""
if self.status != new_status:
self._update_status(new_status)
return self.status
class SwitcherGroup(abc.ABC):
"""Base class for some device that can switch a power rail."""
def __init__(self, name: str, channels: dict = {}):
self.name = name
self.channels = channels
def __getitem__(self, val):
return self.channel_get(val)
def channel_add(self, chan: Switcher):
"""Add a switcher channel to a switcher group."""
self.channels[chan.name] = chan
def channel_get(self, name: str):
"""Get a channel within a switcher group."""
return self.channels[name]
class UsageToken:
"""I represent one given (concurrent) usage of a given resource."""
def __init__(self, resource: 'Resource', user_name: str, usage_name: str, duration_s: int):
self.resource = resource
self.user_name = user_name
self.expires_at = datetime.now() + timedelta(seconds=duration_s)
self.uuid = uuid.uuid4()
def has_expired(self) -> bool:
if self.expires_at <= datetime.now():
return True
else:
return False
def to_dict(self):
return {'resource': self.resource.name,
'user_name': self.user_name,
'expires_at': self.expires_at,
'uuid': self.uuid}
class AvailabilityChecker(abc.ABC):
@abc.abstractmethod
def is_available(self) -> bool:
pass
class Resource:
"""A Resource is some kind of powered device. A Resource tracks its users via UsageTokens
and it will be automatically powered up (by its associated Switcher) once the usage count
is > 0, and powered down once it gets back to 0."""
def __init__(self, name: str, switcher: Switcher, checker: AvailabilityChecker, desc: str = ""):
self.name = name
self.switcher = switcher
self.avail_checker = checker
self.description = desc
self.status = "unknown"
self.use_count = 0
self.usage_tokens = []
self.determine_status()
def usage_inc(self):
self.use_count += 1
if self.use_count == 1:
self.switcher.status_change("on")
return self.use_count
def usage_dec(self):
assert(self.use_count >= 1)
self.use_count -= 1
if self.use_count == 0:
self.switcher.status_change("off")
return self.use_count
def usage_token_get(self, user_name: str, usage: str, duration_s: int) -> UsageToken:
"""Create/obtain a new usage token."""
token = UsageToken(self, user_name, usage, duration_s)
self.usage_tokens.append(token)
self.usage_inc()
return token
def usage_token_put(self, token: UsageToken):
"""Release/put an existing usage token."""
self.usage_tokens.delete(token)
self.usage_dec()
def get_status(self):
"""Return the (cached) status of the resource."""
return self.status
def determine_status(self):
"""Re-determine the status of the resource, and return it."""
sw_status = self.switcher.get_status()
if sw_status == "on":
if self.avail_checker.is_available():
self.status = "available"
else:
self.status = "powered"
else:
self.status = sw_status
return self.get_status()
def to_dict(self):
return {'name': self.name,
'description': self.description,
'status': self.status,
'use_count': self.use_count}

50
lib/osmo-lpmgd.py Executable file
View File

@ -0,0 +1,50 @@
#!/usr/bin/env python3
# RESTful HTTP service for performing power management tasks
#
# (C) 2023 by Harald Welte <laforge@osmocom.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import argparse
from klein import Klein
from model import Resource, SwitcherGroup
from rest_server import PwrMgmtRestServer
from switcher_dummy import DummySwitcher
from avail_always import AlwaysAvailChecker
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("-H", "--host", help="Host/IP to bind HTTP to", default="localhost")
parser.add_argument("-p", "--port", help="TCP port to bind HTTP to", default=8000)
args = parser.parse_args()
# TODO: implement this via some kind of config file
resources = []
swgrp1 = SwitcherGroup("swgrp1")
switcher1 = DummySwitcher(swgrp1, "dumsw1", None)
resources.append(Resource("resrc1", switcher=switcher1, checker=AlwaysAvailChecker()))
prs = PwrMgmtRestServer(resources)
prs.app.run(args.host, args.port)
if __name__ == "__main__":
main(sys.argv)

80
lib/rest_server.py Normal file
View File

@ -0,0 +1,80 @@
import json
# (C) 2023 by Harald Welte <laforge@osmocom.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from typing import List
from klein import Klein
import model
def set_headers(request):
request.setHeader('Content-Type', 'application/json')
class ApiError:
def __init__(self, msg:str, sw=None):
self.msg = msg
self.sw = sw
def __str__(self):
d = {'error': {'message':self.msg}}
if self.sw:
d['error']['status_word'] = self.sw
return json.dumps(d)
class PwrMgmtRestServer:
app = Klein()
def __init__(self, resources: List[model.Resource]):
self.resources = {r.name: r for r in resources}
@app.handle_errors(KeyError)
def key_error(self, request, failure):
set_headers(request)
request.setResponseCode(404)
return str(ApiError("Unknown resource"))
@app.route('/api/v1/resource/<resrc>/status', methods=['GET'])
def resource_status(self, request, resrc):
resource = self.resources[resrc]
resource.determine_status()
set_headers(request)
return json.dumps(resource.to_dict())
@app.route('/api/v1/resource/<resrc>/obtain_usage_token', methods=['POST'])
def resource_obtain_token(self, request, resrc):
resource = self.resources[resrc]
try:
content = json.loads(request.content.read())
user_name = content['user_name']
usage = content['usage']
duration_s = content['duration_seconds']
except:
set_headers(request)
request.setResponseCode(400)
return str(ApiError("Malformed Request"))
token = resource.usage_token_get(user_name, usage, duration_s)
set_headers(request)
return json.dumps(token.to_dict())
@app.route('/api/v1/resource/<resrc>/token/<token>/release', methods=['GET'])
def token_release(self, request, resrc, token):
resource = self.resources[resrc]
# find token within resource
resource.usage_token_put(token)
#set_headers(request)
request.setResponseCode(200)

15
lib/switcher_dummy.py Normal file
View File

@ -0,0 +1,15 @@
from model import Switcher, SwitcherGroup
class DummySwitcher(Switcher):
def __init__(self, group: SwitcherGroup, name: str, conf):
super().__init__(group, name)
self.conf = conf
def _status_change(self, new_status: str):
printf("DummySwitcher %s: Status change %s -> %s" % (self.name, self.status, new_status))
def _obtain_actual_status(self):
"""Our dummy switcher is always off in the initial state"""
if self.status == "unknown":
return "off"

View File

@ -0,0 +1,6 @@
import urllib.request
import xml.etree.ElementTree as ET
from model import Switcher, SwitcherGroup
# FIXME: port from osmo-gsm-tester.git/src/osmo_gsm_tester/obj/powersupply_intellinet.py

12
lib/switcher_sispm.py Normal file
View File

@ -0,0 +1,12 @@
import pysispm
from model import Switcher, SwitcherGroup
# FIXME: port from osmo-gsm-tester.git/src/osmo_gsm_tester/obj/powersupply_sispm.py
class SispmSwitcher(Switcher):
def __init__(self, group: SwitcherGroup, name: str, conf):
super().__init__(group, name)
self.conf = conf
def _status_change(self, new_status: str):
FIXME