Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F12115247
Backup script for acquisitariat and other MySQL containers
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
dereckson
Sat, Oct 4, 22:16
2025-10-04 22:16:15 (UTC+0)
Size
12 KB
Referenced Files
None
Subscribers
None
Backup script for acquisitariat and other MySQL containers
View Options
#!/usr/bin/env python3
"""
mysqldump-by-size.py
Backup MySQL Docker containers by scanning the data volume:
- If a database total size <= threshold -> do a database-level mysqldump (one file).
- If database size > threshold -> do table-level mysqldump (one directory per db, one file per table),
but ignore tables matching patterns defined in YAML (patterns use % like MySQL; converted to glob *).
Usage:
python3 mysqldump-by-size.py --config config.yml
python3 mysqldump-by-size.py --config config.yml --container acquisitariat --data-dir /srv/acquisitariat/mysql
Requirements:
- Python 3.8+
- PyYAML (pip install pyyaml)
- mysqldump wrapper in PATH that accepts: mysqldump <container> <normal mysqldump options>
- openssl CLI available for encryption
Generated by ChatGPT with GPT 5
"""
import
argparse
import
datetime
import
fnmatch
import
os
import
shutil
import
subprocess
import
sys
import
tarfile
import
tempfile
from
pathlib
import
Path
try
:
import
yaml
except
Exception
:
print
(
"PyYAML is required. Install with: pip install pyyaml"
,
file
=
sys
.
stderr
)
sys
.
exit
(
2
)
# ---------- Helpers ----------
def
human_bytes
(
n
):
for
unit
in
[
"B"
,
"KB"
,
"MB"
,
"GB"
,
"TB"
]:
if
n
<
1024
:
return
f
"
{
n
:
.1f
}{
unit
}
"
n
/=
1024
return
f
"
{
n
:
.1f
}
PB"
def
get_dir_size
(
path
:
Path
)
->
int
:
"""Return total size in bytes of files under path (recursive)."""
total
=
0
for
p
in
path
.
rglob
(
'*'
):
if
p
.
is_file
():
try
:
total
+=
p
.
stat
()
.
st_size
except
OSError
:
pass
return
total
def
find_databases
(
data_dir
:
Path
):
"""Return list of database directory names in the data_dir.
We consider entries that are directories and skip hidden/temporary names.
"""
dbs
=
[]
if
not
data_dir
.
is_dir
():
return
dbs
for
entry
in
sorted
(
data_dir
.
iterdir
()):
if
entry
.
is_dir
():
# skip internal schemas usually
if
entry
.
name
in
(
"mysql"
,
"performance_schema"
,
"sys"
,
"information_schema"
):
continue
# also skip files beginning with '.' or '#'
if
entry
.
name
.
startswith
(
'.'
)
or
entry
.
name
.
startswith
(
'#'
):
continue
dbs
.
append
(
entry
.
name
)
return
dbs
def
find_tables_from_db_dir
(
db_dir
:
Path
):
"""Try to infer table names by listing per-table files (.ibd, .frm, .MYD, .MYI).
Returns sorted unique table names.
"""
table_basenames
=
set
()
if
not
db_dir
.
is_dir
():
return
[]
for
f
in
db_dir
.
iterdir
():
if
not
f
.
is_file
():
continue
# common MySQL file extensions for table files
for
ext
in
(
".ibd"
,
".frm"
,
".MYD"
,
".MYI"
,
".ibdata"
):
if
f
.
name
.
endswith
(
ext
):
base
=
f
.
name
[:
-
len
(
ext
)]
# ignore metadata like gen_clust_index or not table-like files
if
base
:
table_basenames
.
add
(
base
)
break
return
sorted
(
table_basenames
)
def
pattern_to_glob
(
pat
:
str
)
->
str
:
"""Convert MySQL-style % wildcards into shell glob * wildcards."""
return
pat
.
replace
(
'%'
,
'*'
)
def
parse_ignore_patterns
(
patterns
):
"""Parse list of strings like 'dbpattern.tablepattern' where patterns use % wildcards.
Return list of (db_glob, table_glob) tuples.
"""
res
=
[]
if
not
patterns
:
return
res
for
p
in
patterns
:
p
=
p
.
strip
()
if
not
p
:
continue
if
'.'
in
p
:
dbp
,
tbp
=
p
.
split
(
'.'
,
1
)
else
:
# If no dot, treat it as table pattern matching any db
dbp
,
tbp
=
'%'
,
p
res
.
append
((
pattern_to_glob
(
dbp
),
pattern_to_glob
(
tbp
)))
return
res
def
table_ignored
(
dbname
:
str
,
tablename
:
str
,
ignore_tuples
)
->
bool
:
"""Return True if the table should be ignored given list of (db_glob, table_glob)."""
for
db_glob
,
table_glob
in
ignore_tuples
:
if
fnmatch
.
fnmatchcase
(
dbname
,
db_glob
)
and
fnmatch
.
fnmatchcase
(
tablename
,
table_glob
):
return
True
return
False
def
run_cmd
(
cmd
,
capture_output
=
False
,
check
=
True
):
"""Run a subprocess command with logging."""
print
(
f
"+
{
' '
.
join
(
map
(
str
,
cmd
))
}
"
)
return
subprocess
.
run
(
cmd
,
capture_output
=
capture_output
,
check
=
check
)
# ---------- Main backup logic ----------
def
backup_container
(
cfg
,
container_name
=
None
,
data_dir_override
=
None
):
"""
cfg is a dict from YAML with keys:
- containers: list of {name: <container>, data_dir: <path>} OR a dict keyed by name
- key_path: path to AES key file
- ignore_tables: list of patterns like %_file.storageblob
- threshold_bytes: integer threshold to choose db-level vs table-level (default 5*1024*1024)
- output_dir: where to put final encrypted archives (default ./backups)
- mysqldump_opts_db: list of extra options to pass for DB-level (optional)
- mysqldump_opts_table: list of extra options to pass for table-level (optional)
"""
threshold
=
cfg
.
get
(
'threshold_bytes'
,
5
*
1024
*
1024
)
key_path
=
Path
(
cfg
.
get
(
'key_path'
))
if
not
key_path
or
not
key_path
.
is_file
():
raise
FileNotFoundError
(
f
"Key path not found or not a file:
{
key_path
}
"
)
out_root
=
Path
(
cfg
.
get
(
'output_dir'
,
'./backups'
))
.
absolute
()
out_root
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
ignore_tuples
=
parse_ignore_patterns
(
cfg
.
get
(
'ignore_tables'
,
[]))
mysqldump_opts_db
=
cfg
.
get
(
'mysqldump_opts_db'
,
[])
mysqldump_opts_table
=
cfg
.
get
(
'mysqldump_opts_table'
,
[])
# Build container list to process
containers_cfg
=
cfg
.
get
(
'containers'
,
[])
# Accept both list of dicts or mapping
containers_list
=
[]
if
isinstance
(
containers_cfg
,
dict
):
# mapping format: name -> {data_dir: ...}
for
name
,
v
in
containers_cfg
.
items
():
d
=
v
.
get
(
'data_dir'
)
if
isinstance
(
v
,
dict
)
else
v
containers_list
.
append
({
'name'
:
name
,
'data_dir'
:
d
})
else
:
for
item
in
containers_cfg
:
if
isinstance
(
item
,
dict
):
name
=
item
.
get
(
'name'
)
data_dir
=
item
.
get
(
'data_dir'
)
if
name
:
containers_list
.
append
({
'name'
:
name
,
'data_dir'
:
data_dir
})
elif
isinstance
(
item
,
str
):
# simple list of names not supported without data_dir, skip
print
(
"Warning: container entry is string (no data_dir). Skipping:"
,
item
)
continue
if
container_name
:
# filter for that container
containers_list
=
[
c
for
c
in
containers_list
if
c
[
'name'
]
==
container_name
]
if
not
containers_list
:
raise
ValueError
(
f
"Requested container
{
container_name
}
not found in config."
)
results
=
[]
for
c
in
containers_list
:
name
=
c
[
'name'
]
data_dir
=
Path
(
data_dir_override
)
if
data_dir_override
else
Path
(
c
.
get
(
'data_dir'
,
''
))
if
not
data_dir
:
print
(
f
"Skipping container
{
name
}
: no data_dir configured."
)
continue
if
not
data_dir
.
exists
():
print
(
f
"Warning: data_dir
{
data_dir
}
for container
{
name
}
does not exist. Skipping."
)
continue
timestamp
=
datetime
.
datetime
.
utcnow
()
.
strftime
(
"%Y%m
%d
T%H%M%SZ"
)
tmpdir
=
Path
(
tempfile
.
mkdtemp
(
prefix
=
f
"mysql-backup-
{
name
}
-"
))
print
(
f
"Processing container:
{
name
}
"
)
print
(
f
" data_dir:
{
data_dir
}
"
)
print
(
f
" tmpdir:
{
tmpdir
}
"
)
try
:
dbs
=
find_databases
(
data_dir
)
if
not
dbs
:
print
(
f
"No databases found under
{
data_dir
}
."
)
continue
for
db
in
dbs
:
db_dir
=
data_dir
/
db
db_size
=
get_dir_size
(
db_dir
)
print
(
f
"Database
{
db
}
size:
{
human_bytes
(
db_size
)
}
(
{
db_dir
}
)"
)
if
db_size
<=
threshold
:
# DB-level dump
outfile
=
tmpdir
/
f
"
{
db
}
.sql"
cmd
=
[
"mysqldump"
,
name
]
+
list
(
mysqldump_opts_db
)
+
[
"--databases"
,
db
]
print
(
f
" -> DB-level dump to
{
outfile
}
"
)
with
open
(
outfile
,
"wb"
)
as
fh
:
proc
=
subprocess
.
run
(
cmd
,
stdout
=
fh
)
if
proc
.
returncode
!=
0
:
raise
RuntimeError
(
f
"mysqldump failed for
{
db
}
(container
{
name
}
)"
)
else
:
# table-level: create directory for db and dump each table
db_outdir
=
tmpdir
/
db
db_outdir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tables
=
find_tables_from_db_dir
(
db_dir
)
if
not
tables
:
# If we can't detect tables by files, fallback to entire DB dump (safer)
print
(
f
" -> Could not detect tables for
{
db
}
. Falling back to DB-level dump."
)
outfile
=
tmpdir
/
f
"
{
db
}
.sql"
cmd
=
[
"mysqldump"
,
name
]
+
list
(
mysqldump_opts_db
)
+
[
"--databases"
,
db
]
with
open
(
outfile
,
"wb"
)
as
fh
:
proc
=
subprocess
.
run
(
cmd
,
stdout
=
fh
)
if
proc
.
returncode
!=
0
:
raise
RuntimeError
(
f
"mysqldump failed for
{
db
}
(container
{
name
}
)"
)
else
:
for
t
in
tables
:
if
table_ignored
(
db
,
t
,
ignore_tuples
):
print
(
f
" - Skipping table
{
db
}
.
{
t
}
(matched ignore pattern)"
)
continue
outfile
=
db_outdir
/
f
"
{
t
}
.sql"
cmd
=
[
"mysqldump"
,
name
]
+
list
(
mysqldump_opts_table
)
+
[
db
,
t
]
print
(
f
" - Dump table
{
db
}
.
{
t
}
->
{
outfile
}
"
)
with
open
(
outfile
,
"wb"
)
as
fh
:
proc
=
subprocess
.
run
(
cmd
,
stdout
=
fh
)
if
proc
.
returncode
!=
0
:
raise
RuntimeError
(
f
"mysqldump failed for
{
db
}
.
{
t
}
(container
{
name
}
)"
)
# all dumps created under tmpdir. Now tar and encrypt
tarname
=
out_root
/
f
"
{
name
}
_backup_
{
timestamp
}
.tar"
gzname
=
out_root
/
f
"
{
name
}
_backup_
{
timestamp
}
.tar.gz"
encname
=
out_root
/
f
"
{
name
}
_backup_
{
timestamp
}
.tar.gz.enc"
print
(
f
"Creating tar.gz:
{
gzname
}
"
)
# Create gzipped tar in a streaming manner
with
tarfile
.
open
(
tarname
,
"w"
)
as
tar
:
# add contents of tmpdir, but not top-level tmpdir itself
for
p
in
sorted
(
tmpdir
.
iterdir
()):
tar
.
add
(
p
,
arcname
=
p
.
name
)
# gzip the tar
with
open
(
tarname
,
"rb"
)
as
f_in
,
open
(
gzname
,
"wb"
)
as
f_out
:
# use shutil to compress
import
gzip
with
gzip
.
GzipFile
(
fileobj
=
f_out
,
mode
=
"wb"
)
as
gz
:
shutil
.
copyfileobj
(
f_in
,
gz
)
tarname
.
unlink
()
# remove the intermediate tar
print
(
f
"Encrypting with openssl ->
{
encname
}
"
)
openssl_cmd
=
[
"openssl"
,
"enc"
,
"-aes-256-cbc"
,
"-salt"
,
"-in"
,
str
(
gzname
),
"-out"
,
str
(
encname
),
"-pass"
,
f
"file:
{
str
(
key_path
)
}
"
]
proc
=
subprocess
.
run
(
openssl_cmd
)
if
proc
.
returncode
!=
0
:
raise
RuntimeError
(
"OpenSSL encryption failed."
)
# option: remove plain gz after encrypt
gzname
.
unlink
()
print
(
f
"Backup created and encrypted:
{
encname
}
"
)
results
.
append
(
str
(
encname
))
finally
:
# cleanup tmpdir
try
:
shutil
.
rmtree
(
tmpdir
)
except
Exception
as
e
:
print
(
f
"Warning: failed to remove tmpdir
{
tmpdir
}
:
{
e
}
"
)
return
results
# ---------- CLI ----------
def
load_config
(
path
:
Path
):
with
open
(
path
,
"r"
,
encoding
=
"utf-8"
)
as
fh
:
return
yaml
.
safe_load
(
fh
)
def
main
():
ap
=
argparse
.
ArgumentParser
(
description
=
"Backup MySQL Docker containers by scanning data volumes."
)
ap
.
add_argument
(
"--config"
,
"-c"
,
required
=
True
,
help
=
"YAML configuration file path"
)
ap
.
add_argument
(
"--container"
,
"-C"
,
help
=
"Name of a single container to backup (optional)"
)
ap
.
add_argument
(
"--data-dir"
,
"-d"
,
help
=
"Override data dir for the container (optional)"
)
args
=
ap
.
parse_args
()
cfg_path
=
Path
(
args
.
config
)
if
not
cfg_path
.
is_file
():
print
(
"Config file not found:"
,
cfg_path
,
file
=
sys
.
stderr
)
sys
.
exit
(
2
)
cfg
=
load_config
(
cfg_path
)
try
:
results
=
backup_container
(
cfg
,
container_name
=
args
.
container
,
data_dir_override
=
args
.
data_dir
)
except
Exception
as
e
:
print
(
"Error during backup:"
,
e
,
file
=
sys
.
stderr
)
sys
.
exit
(
3
)
print
(
"All done. Created archives:"
)
for
r
in
results
:
print
(
" -"
,
r
)
if
__name__
==
"__main__"
:
main
()
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3047545
Default Alt Text
Backup script for acquisitariat and other MySQL containers (12 KB)
Attached To
Mode
P374 Backup script for acquisitariat and other MySQL containers
Attached
Detach File
Event Timeline
Log In to Comment