bbb-utils/bbb-download.py

147 lines
3.5 KiB
Python
Executable File

#!/usr/bin/env python3
#
# bbb-download.py
#
# BBB recording downloader
# Tries to record all files that are part of a BBB recording "as-is" to disk
#
# Copyright (c) 2023 Sylvain Munaut <tnt@246tNt.com>
# SPDX-License-Identifier: MIT
#
import argparse
import json
import pathlib
import re
import requests
import urllib.parse
import bbb
def bbb_slides_dl(fh):
rv = []
doc = json.load(fh)
for fid,fc in doc.items():
# Validate file id
if not re.match(r'^[0-9a-fA-F]{40,64}-[0-9]+$', fid):
raise RuntimeError('Unknown document ID format in presentation_text.json')
# Iterate pages
for fp in fc.keys():
# Validate it's a slide
if not fp.startswith('slide-'):
raise RuntimeError('Unknown document page type in presentation_text.json')
# Add to the list of "to download"
rv.append((f'presentation/{fid:s}/{fp:s}.png', ['presentation', fid, f'{fp:s}.png']))
return rv
def bbb_shapes_svg_check(fh):
print(bbb.parse_shapes_svg(fh))
return []
BASE_DL = [
# URL Path Local Path Handler
( 'captions.json', 'captions.json', ),
( 'cursor.xml', 'cursor.xml', ),
( 'deskshare.xml', 'deskshare.xml', ),
( 'deskshare/deskshare.webm', 'deskshare.webm', ),
( 'external_videos.json', 'external_videos.json', ),
( 'metadata.xml', 'metadata.xml', ),
( 'notes.xml', 'notes.xml', ),
( 'panzooms.xml', 'panzooms.xml', ),
( 'polls.json', 'polls.json', ),
( 'presentation_text.json', 'presentation_text.json', bbb_slides_dl ),
( 'shapes.svg', 'shapes.svg', bbb_shapes_svg_check ),
( 'slides_new.xml', 'slides_new.xml', ),
( 'video/webcams.webm', 'webcams.webm', ),
]
def download_file(url, filename):
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=128*1024):
f.write(chunk)
def download_all_recursive(opts):
# Create the base directory
p_base = opts.dest.absolute()
p_base.mkdir(parents=True)
# Create the base url
u_base = opts.url
u_base = urllib.parse.urljoin(u_base, f'presentation/{opts.meeting:s}/')
# Loop init
to_dl = list(BASE_DL)
i = 0
# Iterate until we're done
while i < len(to_dl):
# Entry to fetch
e = to_dl[i]
e_url = e[0]
e_path = e[1]
e_handler = e[2] if len(e) > 2 else None
# Source URI
e_url = urllib.parse.urljoin(u_base, e_url)
# Destination path
if not isinstance(e_path, list):
e_path = [e_path]
e_path = pathlib.Path(p_base, *e_path)
e_path.parent.mkdir(parents=True, exist_ok=True)
# Trigger fetch
print(e_url, e_path)
try:
download_file(e_url, e_path)
except:
pass
# If we have a handler, call it
if callable(e_handler):
with open(e_path, 'rb') as fh:
to_dl.extend(e_handler(fh))
# Next
i = i + 1
def parse_opt():
parser = argparse.ArgumentParser(
prog = 'bbb-download.py',
description = 'BBB recording downloader',
)
parser.add_argument('-u', '--url', required=True,
help="BBB instance base URL")
parser.add_argument('-m', '--meeting', required=True,
help="Meeting ID")
parser.add_argument('-d', '--dest', required=True, type=pathlib.Path,
help="Destination directory")
return parser.parse_args()
def main():
opts = parse_opt()
download_all_recursive(opts)
if __name__ == '__main__':
main()