Page MenuHomeDevCentral

Backup script for acquisitariat and other MySQL containers

Authored By
dereckson
Sat, Oct 4, 22:16
Size
12 KB
Referenced Files
None
Subscribers
None

Backup script for acquisitariat and other MySQL containers

#!/usr/bin/env python3
"""
mysqldump-by-size.py
Backup MySQL Docker containers by scanning the data volume:
- If a database total size <= threshold -> do a database-level mysqldump (one file).
- If database size > threshold -> do table-level mysqldump (one directory per db, one file per table),
but ignore tables matching patterns defined in YAML (patterns use % like MySQL; converted to glob *).
Usage:
python3 mysqldump-by-size.py --config config.yml
python3 mysqldump-by-size.py --config config.yml --container acquisitariat --data-dir /srv/acquisitariat/mysql
Requirements:
- Python 3.8+
- PyYAML (pip install pyyaml)
- mysqldump wrapper in PATH that accepts: mysqldump <container> <normal mysqldump options>
- openssl CLI available for encryption
Generated by ChatGPT with GPT 5
"""
import argparse
import datetime
import fnmatch
import os
import shutil
import subprocess
import sys
import tarfile
import tempfile
from pathlib import Path
try:
import yaml
except Exception:
print("PyYAML is required. Install with: pip install pyyaml", file=sys.stderr)
sys.exit(2)
# ---------- Helpers ----------
def human_bytes(n):
for unit in ["B","KB","MB","GB","TB"]:
if n < 1024:
return f"{n:.1f}{unit}"
n /= 1024
return f"{n:.1f}PB"
def get_dir_size(path: Path) -> int:
"""Return total size in bytes of files under path (recursive)."""
total = 0
for p in path.rglob('*'):
if p.is_file():
try:
total += p.stat().st_size
except OSError:
pass
return total
def find_databases(data_dir: Path):
"""Return list of database directory names in the data_dir.
We consider entries that are directories and skip hidden/temporary names.
"""
dbs = []
if not data_dir.is_dir():
return dbs
for entry in sorted(data_dir.iterdir()):
if entry.is_dir():
# skip internal schemas usually
if entry.name in ("mysql", "performance_schema", "sys", "information_schema"):
continue
# also skip files beginning with '.' or '#'
if entry.name.startswith('.') or entry.name.startswith('#'):
continue
dbs.append(entry.name)
return dbs
def find_tables_from_db_dir(db_dir: Path):
"""Try to infer table names by listing per-table files (.ibd, .frm, .MYD, .MYI).
Returns sorted unique table names.
"""
table_basenames = set()
if not db_dir.is_dir():
return []
for f in db_dir.iterdir():
if not f.is_file():
continue
# common MySQL file extensions for table files
for ext in (".ibd", ".frm", ".MYD", ".MYI", ".ibdata"):
if f.name.endswith(ext):
base = f.name[:-len(ext)]
# ignore metadata like gen_clust_index or not table-like files
if base:
table_basenames.add(base)
break
return sorted(table_basenames)
def pattern_to_glob(pat: str) -> str:
"""Convert MySQL-style % wildcards into shell glob * wildcards."""
return pat.replace('%', '*')
def parse_ignore_patterns(patterns):
"""Parse list of strings like 'dbpattern.tablepattern' where patterns use % wildcards.
Return list of (db_glob, table_glob) tuples.
"""
res = []
if not patterns:
return res
for p in patterns:
p = p.strip()
if not p:
continue
if '.' in p:
dbp, tbp = p.split('.', 1)
else:
# If no dot, treat it as table pattern matching any db
dbp, tbp = '%', p
res.append((pattern_to_glob(dbp), pattern_to_glob(tbp)))
return res
def table_ignored(dbname: str, tablename: str, ignore_tuples) -> bool:
"""Return True if the table should be ignored given list of (db_glob, table_glob)."""
for db_glob, table_glob in ignore_tuples:
if fnmatch.fnmatchcase(dbname, db_glob) and fnmatch.fnmatchcase(tablename, table_glob):
return True
return False
def run_cmd(cmd, capture_output=False, check=True):
"""Run a subprocess command with logging."""
print(f"+ {' '.join(map(str, cmd))}")
return subprocess.run(cmd, capture_output=capture_output, check=check)
# ---------- Main backup logic ----------
def backup_container(cfg, container_name=None, data_dir_override=None):
"""
cfg is a dict from YAML with keys:
- containers: list of {name: <container>, data_dir: <path>} OR a dict keyed by name
- key_path: path to AES key file
- ignore_tables: list of patterns like %_file.storageblob
- threshold_bytes: integer threshold to choose db-level vs table-level (default 5*1024*1024)
- output_dir: where to put final encrypted archives (default ./backups)
- mysqldump_opts_db: list of extra options to pass for DB-level (optional)
- mysqldump_opts_table: list of extra options to pass for table-level (optional)
"""
threshold = cfg.get('threshold_bytes', 5 * 1024 * 1024)
key_path = Path(cfg.get('key_path'))
if not key_path or not key_path.is_file():
raise FileNotFoundError(f"Key path not found or not a file: {key_path}")
out_root = Path(cfg.get('output_dir', './backups')).absolute()
out_root.mkdir(parents=True, exist_ok=True)
ignore_tuples = parse_ignore_patterns(cfg.get('ignore_tables', []))
mysqldump_opts_db = cfg.get('mysqldump_opts_db', [])
mysqldump_opts_table = cfg.get('mysqldump_opts_table', [])
# Build container list to process
containers_cfg = cfg.get('containers', [])
# Accept both list of dicts or mapping
containers_list = []
if isinstance(containers_cfg, dict):
# mapping format: name -> {data_dir: ...}
for name, v in containers_cfg.items():
d = v.get('data_dir') if isinstance(v, dict) else v
containers_list.append({'name': name, 'data_dir': d})
else:
for item in containers_cfg:
if isinstance(item, dict):
name = item.get('name')
data_dir = item.get('data_dir')
if name:
containers_list.append({'name': name, 'data_dir': data_dir})
elif isinstance(item, str):
# simple list of names not supported without data_dir, skip
print("Warning: container entry is string (no data_dir). Skipping:", item)
continue
if container_name:
# filter for that container
containers_list = [c for c in containers_list if c['name'] == container_name]
if not containers_list:
raise ValueError(f"Requested container {container_name} not found in config.")
results = []
for c in containers_list:
name = c['name']
data_dir = Path(data_dir_override) if data_dir_override else Path(c.get('data_dir', ''))
if not data_dir:
print(f"Skipping container {name}: no data_dir configured.")
continue
if not data_dir.exists():
print(f"Warning: data_dir {data_dir} for container {name} does not exist. Skipping.")
continue
timestamp = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
tmpdir = Path(tempfile.mkdtemp(prefix=f"mysql-backup-{name}-"))
print(f"Processing container: {name}")
print(f" data_dir: {data_dir}")
print(f" tmpdir: {tmpdir}")
try:
dbs = find_databases(data_dir)
if not dbs:
print(f"No databases found under {data_dir}.")
continue
for db in dbs:
db_dir = data_dir / db
db_size = get_dir_size(db_dir)
print(f"Database {db} size: {human_bytes(db_size)} ({db_dir})")
if db_size <= threshold:
# DB-level dump
outfile = tmpdir / f"{db}.sql"
cmd = ["mysqldump", name] + list(mysqldump_opts_db) + ["--databases", db]
print(f" -> DB-level dump to {outfile}")
with open(outfile, "wb") as fh:
proc = subprocess.run(cmd, stdout=fh)
if proc.returncode != 0:
raise RuntimeError(f"mysqldump failed for {db} (container {name})")
else:
# table-level: create directory for db and dump each table
db_outdir = tmpdir / db
db_outdir.mkdir(parents=True, exist_ok=True)
tables = find_tables_from_db_dir(db_dir)
if not tables:
# If we can't detect tables by files, fallback to entire DB dump (safer)
print(f" -> Could not detect tables for {db}. Falling back to DB-level dump.")
outfile = tmpdir / f"{db}.sql"
cmd = ["mysqldump", name] + list(mysqldump_opts_db) + ["--databases", db]
with open(outfile, "wb") as fh:
proc = subprocess.run(cmd, stdout=fh)
if proc.returncode != 0:
raise RuntimeError(f"mysqldump failed for {db} (container {name})")
else:
for t in tables:
if table_ignored(db, t, ignore_tuples):
print(f" - Skipping table {db}.{t} (matched ignore pattern)")
continue
outfile = db_outdir / f"{t}.sql"
cmd = ["mysqldump", name] + list(mysqldump_opts_table) + [db, t]
print(f" - Dump table {db}.{t} -> {outfile}")
with open(outfile, "wb") as fh:
proc = subprocess.run(cmd, stdout=fh)
if proc.returncode != 0:
raise RuntimeError(f"mysqldump failed for {db}.{t} (container {name})")
# all dumps created under tmpdir. Now tar and encrypt
tarname = out_root / f"{name}_backup_{timestamp}.tar"
gzname = out_root / f"{name}_backup_{timestamp}.tar.gz"
encname = out_root / f"{name}_backup_{timestamp}.tar.gz.enc"
print(f"Creating tar.gz: {gzname}")
# Create gzipped tar in a streaming manner
with tarfile.open(tarname, "w") as tar:
# add contents of tmpdir, but not top-level tmpdir itself
for p in sorted(tmpdir.iterdir()):
tar.add(p, arcname=p.name)
# gzip the tar
with open(tarname, "rb") as f_in, open(gzname, "wb") as f_out:
# use shutil to compress
import gzip
with gzip.GzipFile(fileobj=f_out, mode="wb") as gz:
shutil.copyfileobj(f_in, gz)
tarname.unlink() # remove the intermediate tar
print(f"Encrypting with openssl -> {encname}")
openssl_cmd = [
"openssl", "enc", "-aes-256-cbc", "-salt",
"-in", str(gzname),
"-out", str(encname),
"-pass", f"file:{str(key_path)}"
]
proc = subprocess.run(openssl_cmd)
if proc.returncode != 0:
raise RuntimeError("OpenSSL encryption failed.")
# option: remove plain gz after encrypt
gzname.unlink()
print(f"Backup created and encrypted: {encname}")
results.append(str(encname))
finally:
# cleanup tmpdir
try:
shutil.rmtree(tmpdir)
except Exception as e:
print(f"Warning: failed to remove tmpdir {tmpdir}: {e}")
return results
# ---------- CLI ----------
def load_config(path: Path):
with open(path, "r", encoding="utf-8") as fh:
return yaml.safe_load(fh)
def main():
ap = argparse.ArgumentParser(description="Backup MySQL Docker containers by scanning data volumes.")
ap.add_argument("--config", "-c", required=True, help="YAML configuration file path")
ap.add_argument("--container", "-C", help="Name of a single container to backup (optional)")
ap.add_argument("--data-dir", "-d", help="Override data dir for the container (optional)")
args = ap.parse_args()
cfg_path = Path(args.config)
if not cfg_path.is_file():
print("Config file not found:", cfg_path, file=sys.stderr)
sys.exit(2)
cfg = load_config(cfg_path)
try:
results = backup_container(cfg, container_name=args.container, data_dir_override=args.data_dir)
except Exception as e:
print("Error during backup:", e, file=sys.stderr)
sys.exit(3)
print("All done. Created archives:")
for r in results:
print(" -", r)
if __name__ == "__main__":
main()

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3047545
Default Alt Text
Backup script for acquisitariat and other MySQL containers (12 KB)

Event Timeline