Page MenuHomeDevCentral
Paste P374

Backup script for acquisitariat and other MySQL containers
ActivePublic

Authored by dereckson on Sat, Oct 4, 22:16.
#!/usr/bin/env python3
"""
Async MySQL backup script for Nasqueron containers.
Features:
- Detect MySQL containers (image "nasqueron/mysql") automatically or use arguments
- For each container, detect MySQL data directory via `docker inspect`
- Backup databases:
* At database level if total size ≤ 5 MB
* At table level otherwise (one file per table)
- Ignore tables by pattern from YAML configuration
- Archive and encrypt backups (AES via openssl)
- Run operations concurrently using asyncio for speed
"""
import asyncio
import subprocess
import tarfile
import tempfile
from pathlib import Path
import yaml
import os
import fnmatch
import shlex
CONFIG_PATH = Path("/etc/nasqueron/mysql-backup.yaml")
SIZE_THRESHOLD_MB = 5
# ---------- Utilities ----------
def run_sync(cmd: list[str], **kwargs) -> str:
"""Run a command synchronously and return its stdout."""
return subprocess.run(cmd, stdout=subprocess.PIPE, text=True, check=True, **kwargs).stdout.strip()
async def run_async(cmd: list[str]) -> str:
"""Run a command asynchronously and capture stdout."""
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{stderr.decode()}")
return stdout.decode().strip()
def load_config() -> dict:
with open(CONFIG_PATH) as f:
return yaml.safe_load(f)
def matches_ignore_pattern(db: str, table: str, ignore_patterns: list[str]) -> bool:
"""Return True if table matches an ignore pattern."""
for pattern in ignore_patterns:
if fnmatch.fnmatch(f"{db}.{table}", pattern.replace("%", "*")):
return True
return False
def encrypt_file(path: Path, key_path: Path) -> Path:
"""Encrypt file with openssl AES."""
encrypted = path.with_suffix(path.suffix + ".aes")
cmd = ["openssl", "enc", "-aes-256-cbc", "-salt", "-in", str(path),
"-out", str(encrypted), "-pass", f"file:{key_path}"]
run_sync(cmd)
path.unlink() # Remove unencrypted tar
return encrypted
# ---------- Docker helpers ----------
async def get_mysql_containers() -> list[str]:
"""Return list of container names using the nasqueron/mysql image."""
output = await run_async(["docker", "ps", "--filter", "ancestor=nasqueron/mysql", "--format", "{{.Names}}"])
return output.splitlines()
async def get_data_volume(container: str) -> str:
"""Inspect container to find the host directory mounted to /var/lib/mysql."""
output = await run_async(["docker", "inspect", "-f",
"{{range .Mounts}}{{if eq .Destination \"/var/lib/mysql\"}}{{.Source}}{{end}}{{end}}",
container])
return output.strip()
# ---------- Backup logic ----------
async def get_databases(container: str) -> list[str]:
"""List databases in the container."""
output = await run_async(["mysqldump", container, "--no-data", "--all-databases"])
return [line.split()[1] for line in output.splitlines() if line.startswith("CREATE DATABASE")]
def db_size_mb(db_dir: Path) -> float:
total = sum(f.stat().st_size for f in db_dir.rglob("*") if f.is_file())
return total / (1024 * 1024)
async def dump_database(container: str, db: str, target: Path):
cmd = ["mysqldump", container, db]
async with asyncio.Lock(): # ensure orderly I/O
dump = await run_async(cmd)
target.write_text(dump)
async def dump_table(container: str, db: str, table: str, target: Path):
cmd = ["mysqldump", container, db, table]
async with asyncio.Lock():
dump = await run_async(cmd)
target.write_text(dump)
async def backup_container(container: str, data_dir: str, config: dict):
backup_dir = Path(tempfile.mkdtemp(prefix=f"backup_{container}_"))
ignore_patterns = config.get("ignore_tables", [])
key_path = Path(config["aes_key"])
data_path = Path(data_dir)
db_dirs = [p for p in data_path.iterdir() if p.is_dir() and not p.name.startswith("mysql")]
tasks = []
for db_dir in db_dirs:
db = db_dir.name
size_mb = db_size_mb(db_dir)
db_target = backup_dir / db
db_target.mkdir(parents=True, exist_ok=True)
if size_mb <= SIZE_THRESHOLD_MB:
dump_path = db_target.with_suffix(".sql")
tasks.append(dump_database(container, db, dump_path))
else:
tables = [t.stem for t in db_dir.glob("*.ibd")] # heuristic
for table in tables:
if not matches_ignore_pattern(db, table, ignore_patterns):
table_path = db_target / f"{table}.sql"
tasks.append(dump_table(container, db, table, table_path))
await asyncio.gather(*tasks)
tar_path = backup_dir.with_suffix(".tar")
with tarfile.open(tar_path, "w") as tar:
tar.add(backup_dir, arcname=container)
encrypted_path = encrypt_file(tar_path, key_path)
print(f"Backup completed for {container}: {encrypted_path}")
# ---------- Main entry ----------
async def main():
config = load_config()
containers = config.get("containers", {})
if not containers: # autodetect if config empty
names = await get_mysql_containers()
containers = {name: await get_data_volume(name) for name in names}
tasks = [backup_container(name, path, config) for name, path in containers.items()]
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(main())

Event Timeline

dereckson edited the content of this paste. (Show Details)
dereckson edited the content of this paste. (Show Details)

Need to save structure of the ignored tables I guess:

dump_table_structure method (GPT 5)
async def dump_table_structure(container: str, db: str, table: str, target: Path):
    """
    Dump only the structure (CREATE TABLE statement) of a table, excluding its data.
    """
    cmd = ["mysqldump", container, "--no-data", db, table]
    async with asyncio.Lock():
        dump = await run_async(cmd)
        target.write_text(dump)