Files
wace_plm/db/backup.py
hjjeong ba026842f7 feat: Add database backup system
- Add Dockerfile.backup for backup container
- Add backup.py script with PostgreSQL backup functionality
- Add backup service to docker-compose.prod.yml
- Update env.production.example with backup configuration
- Add db/README.md with backup system documentation

Features:
- Automated daily backups (07:30, 18:00)
- Local and FTP remote backup support
- 7-day retention policy
- PostgreSQL 16 client for waceplm database
2025-11-12 18:19:54 +09:00

463 lines
22 KiB
Python

import os
import schedule
import time
import subprocess
from datetime import datetime, timedelta
import logging
import sys
import ftplib # Use the built-in FTP library
from io import BytesIO # Needed for reading file content for FTP upload
# --- Configuration (from environment variables) ---
POSTGRES_HOST = os.getenv('POSTGRES_HOST', 'wace-plm-db')
POSTGRES_PORT = os.getenv('POSTGRES_DOCKER_PORT', '5432')
POSTGRES_USER = os.getenv('POSTGRES_USER')
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
POSTGRES_DB = os.getenv('POSTGRES_DB')
LOCAL_BACKUP_PATH = os.getenv('LOCAL_BACKUP_PATH', '/backups/local')
# FTP Configuration
FTP_HOST = os.getenv('FTP_HOST')
FTP_USER = os.getenv('FTP_USER')
FTP_PASSWORD = os.getenv('FTP_PASSWORD')
FTP_PATH = os.getenv('FTP_PATH', '/') # Default to root FTP directory if not specified
FTP_PORT = int(os.getenv('FTP_PORT', 2122)) # Default FTP port is 21
BACKUP_RETENTION_DAYS = int(os.getenv('BACKUP_RETENTION_DAYS', 7))
BACKUP_TIME_AM = os.getenv('BACKUP_TIME_AM', "07:30")
BACKUP_TIME_PM = os.getenv('BACKUP_TIME_PM', "18:00")
# --- End Configuration ---
# --- Logging Setup ---
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
stream=sys.stdout # Log to stdout to be captured by Docker logs
)
# --- End Logging Setup ---
def check_env_vars():
"""Check if required environment variables are set."""
required_vars = ['POSTGRES_USER', 'POSTGRES_PASSWORD', 'POSTGRES_DB']
# Add FTP vars only if host/user/password/path are provided
if FTP_HOST and FTP_USER and FTP_PASSWORD and FTP_PATH:
required_vars.extend(['FTP_HOST', 'FTP_USER', 'FTP_PASSWORD', 'FTP_PATH'])
logging.info("FTP configuration found in environment variables.")
else:
logging.warning("FTP configuration not fully provided (HOST, USER, PASSWORD, PATH). Remote backups will be skipped.")
# Check database vars
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
logging.error(f"Missing required environment variables: {', '.join(missing_vars)}")
sys.exit(1)
logging.info("All required environment variables checked.")
def create_backup_dirs():
"""Create local backup directory if it doesn't exist."""
try:
os.makedirs(LOCAL_BACKUP_PATH, exist_ok=True)
logging.info(f"Ensured local backup directory exists: {LOCAL_BACKUP_PATH}")
except OSError as e:
logging.error(f"Error creating local backup directory: {e}")
sys.exit(1) # Stop if local backup isn't possible
# Note: We will try to create the remote FTP directory if it doesn't exist during upload/cleanup
def ensure_ftp_dir(ftp, path):
"""Ensures the specified directory exists on the FTP server."""
parts = path.strip('/').split('/')
current_path = ''
for part in parts:
if not part: continue
current_path += '/' + part
try:
ftp.cwd(current_path)
except ftplib.error_perm as e:
if str(e).startswith('550'): # 550: Directory not found or permission denied
try:
ftp.mkd(current_path)
logging.info(f"Created remote FTP directory: {current_path}")
ftp.cwd(current_path) # Go into the newly created dir
except ftplib.error_perm as mkd_e:
logging.error(f"Failed to create or access FTP directory {current_path}: {mkd_e}")
raise # Re-raise the exception to signal failure
else:
logging.error(f"FTP error accessing {current_path}: {e}")
raise
# Ensure we are in the final target directory
ftp.cwd(path)
logging.info(f"Ensured remote FTP directory exists and CWD set to: {path}")
def perform_database_backup(db_config, backup_prefix):
"""Performs PostgreSQL database backup for a specific database configuration."""
timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')
backup_filename = f"{backup_prefix}_{timestamp}.sql" # SQL 파일로 변경 (.gz 제거)
local_filepath = os.path.join(LOCAL_BACKUP_PATH, backup_filename)
logging.info(f"Starting backup for database '{db_config['db_name']}' with prefix '{backup_prefix}'...")
logging.info(f"Local target: {local_filepath}")
# 1. Create local backup using pg_dump with plain SQL format (no compression)
pg_dump_command = [
'pg_dump',
f'--host={db_config["host"]}',
f'--port={db_config["port"]}',
f'--username={db_config["user"]}',
f'--dbname={db_config["db_name"]}',
'--format=plain', # 일반 SQL 텍스트 형식 사용
'--no-owner', # 소유자 정보 제외 (복원 시 유연성 향상)
'--no-privileges', # 권한 정보 제외 (복원 시 유연성 향상)
f'--file={local_filepath}'
]
env = os.environ.copy()
env['PGPASSWORD'] = db_config['password']
try:
process = subprocess.run(
pg_dump_command,
env=env,
check=True,
capture_output=True,
text=True
)
logging.info(f"Successfully created local backup: {local_filepath}")
logging.debug(f"pg_dump stdout: {process.stdout}")
logging.debug(f"pg_dump stderr: {process.stderr}")
# 2. Upload to Remote via FTP if configured
if FTP_HOST and FTP_USER and FTP_PASSWORD and FTP_PATH:
remote_target_path = FTP_PATH.rstrip("/") + "/" + backup_filename
# Ensure log path starts with / for clarity
log_full_path = f"/{remote_target_path.lstrip('/')}"
logging.info(f"Attempting to upload backup via FTP to: ftp://{FTP_HOST}:{FTP_PORT}{log_full_path}")
ftp = None # Initialize ftp to None
try:
ftp = ftplib.FTP()
ftp.connect(FTP_HOST, FTP_PORT, timeout=60) # Increased timeout to 60 seconds
ftp.login(FTP_USER, FTP_PASSWORD)
ftp.set_pasv(True) # Use passive mode, usually necessary
# --- Simplified directory change (mimicking lftp) ---
try:
if FTP_PATH: # Only change directory if FTP_PATH is set
logging.info(f"Changing remote directory to: {FTP_PATH}")
ftp.cwd(FTP_PATH)
logging.info(f"Successfully changed remote directory to: {ftp.pwd()}") # Log current dir
else:
logging.info("FTP_PATH is not set, uploading to user's home directory.")
# Upload the file to the current directory
logging.info(f"Attempting to upload {backup_filename} to current remote directory.")
with open(local_filepath, 'rb') as local_file:
ftp.storbinary(f'STOR {backup_filename}', local_file)
logging.info(f"Successfully uploaded backup via FTP to remote path: {FTP_PATH}/{backup_filename}") # Adjust log message slightly
except ftplib.error_perm as ftp_cwd_err:
logging.error(f"Failed to change FTP directory to '{FTP_PATH}': {ftp_cwd_err}")
except ftplib.all_errors as ftp_err:
logging.error(f"FTP operation failed during/after CWD or during STOR: {ftp_err}")
# --- End Simplified directory change ---
except ftplib.all_errors as ftp_err:
logging.error(f"FTP connection/login failed: {ftp_err}") # Adjusted error scope
# Potentially retry or raise an error to indicate failure
except FileNotFoundError:
logging.error(f"Local backup file not found for FTP upload: {local_filepath}")
except Exception as ftp_e:
logging.error(f"An unexpected error occurred during FTP upload: {ftp_e}")
finally:
if ftp:
try:
ftp.quit()
except ftplib.all_errors:
logging.debug("FTP quit command failed, closing connection.")
ftp.close() # Force close if quit fails
else:
logging.warning("FTP configuration not provided. Skipping remote upload.")
except subprocess.CalledProcessError as e:
logging.error(f"pg_dump failed with exit code {e.returncode} for database '{db_config['db_name']}'")
logging.error(f"pg_dump stderr: {e.stderr}")
if os.path.exists(local_filepath):
try:
os.remove(local_filepath)
logging.info(f"Removed incomplete local backup file: {local_filepath}")
except OSError as remove_err:
logging.error(f"Error removing incomplete local backup file {local_filepath}: {remove_err}")
except Exception as e:
logging.error(f"An unexpected error occurred during backup for '{db_config['db_name']}': {e}")
def perform_backup():
"""Performs the PostgreSQL database backup."""
logging.info("=== Starting backup process ===")
# Database configuration
db_config = {
'host': POSTGRES_HOST,
'port': POSTGRES_PORT,
'user': POSTGRES_USER,
'password': POSTGRES_PASSWORD,
'db_name': POSTGRES_DB
}
# Perform backup
try:
perform_database_backup(db_config, POSTGRES_DB)
logging.info(f"Completed backup for database: {POSTGRES_DB}")
except Exception as e:
logging.error(f"Failed to backup database: {e}")
logging.info("=== Backup process completed ===")
# Legacy function kept for compatibility (now calls the new generic function)
def perform_backup_legacy():
"""Legacy backup function - kept for backward compatibility."""
return perform_backup()
def cleanup_local_backups(backup_dir):
"""Removes local backups older than BACKUP_RETENTION_DAYS."""
if not os.path.isdir(backup_dir):
logging.warning(f"Local cleanup skipped: Directory not found or inaccessible: {backup_dir}")
return
logging.info(f"Starting cleanup of old local backups in: {backup_dir}")
cutoff_date = datetime.now() - timedelta(days=BACKUP_RETENTION_DAYS)
files_deleted = 0
files_checked = 0
try:
for filename in os.listdir(backup_dir):
# Match the filename pattern
is_db_backup = filename.startswith(f"{POSTGRES_DB}_") and filename.endswith(".sql")
if is_db_backup:
files_checked += 1
filepath = os.path.join(backup_dir, filename)
try:
# Use file modification time for age check
file_mod_time_ts = os.path.getmtime(filepath)
file_mod_time = datetime.fromtimestamp(file_mod_time_ts)
if file_mod_time < cutoff_date:
os.remove(filepath)
logging.info(f"Deleted old local backup: {filepath} (modified: {file_mod_time})")
files_deleted += 1
except OSError as e:
logging.error(f"Error processing or deleting local file {filepath}: {e}")
except ValueError: # Should not happen with getmtime
logging.warning(f"Could not get modification time for local file: {filename}. Skipping.")
logging.info(f"Local cleanup finished for {backup_dir}. Checked: {files_checked}, Deleted: {files_deleted}.")
except OSError as e:
logging.error(f"Error listing directory {backup_dir} during local cleanup: {e}")
def parse_mlsd_time(timestr):
"""Parses the timestamp from MLSD command output (YYYYMMDDHHMMSS)."""
try:
return datetime.strptime(timestr, '%Y%m%d%H%M%S')
except ValueError:
logging.warning(f"Could not parse MLSD time string: {timestr}")
return None
def cleanup_remote_backups():
"""Removes remote backups older than BACKUP_RETENTION_DAYS using FTP."""
if not (FTP_HOST and FTP_USER and FTP_PASSWORD and FTP_PATH):
logging.warning("FTP configuration not provided. Skipping remote cleanup.")
return
remote_dir = FTP_PATH.rstrip("/")
# Correct the logging message to avoid double slash if FTP_PATH starts with /
log_path = f"/{remote_dir.lstrip('/')}" if remote_dir else "/"
logging.info(f"Starting cleanup of old remote backups in: ftp://{FTP_HOST}:{FTP_PORT}{log_path}")
cutoff_date = datetime.now() - timedelta(days=BACKUP_RETENTION_DAYS)
files_deleted = 0
files_checked = 0
ftp = None
try:
ftp = ftplib.FTP()
ftp.connect(FTP_HOST, FTP_PORT, timeout=60) # Increased timeout to 60 seconds
ftp.login(FTP_USER, FTP_PASSWORD)
ftp.set_pasv(True)
# --- Simplified directory change (similar to upload) ---
try:
if remote_dir: # Only change directory if remote_dir (derived from FTP_PATH) is set
logging.info(f"Changing remote directory for cleanup to: {remote_dir}")
ftp.cwd(remote_dir)
logging.info(f"Successfully changed remote directory for cleanup to: {ftp.pwd()}") # Log current dir
else:
logging.info("FTP_PATH is not set, performing cleanup in user's home directory.")
# --- Proceed with listing and deletion in the CURRENT directory ---
# Use MLSD for modern servers, fallback needed if not supported
try:
lines = []
ftp.retrlines('MLSD', lines.append)
logging.debug(f"MLSD output for current directory ({ftp.pwd()}):\n" + "\n".join(lines))
for line in lines:
parts = line.split(';')
facts = {}
for part in parts:
if '=' in part:
key, value = part.split('=', 1)
facts[key.strip().lower()] = value.strip()
filename = facts.get('') # Filename is the part without key=value
filetype = facts.get('type')
modify_time_str = facts.get('modify')
# Process files matching database pattern
is_db_backup = filename and filename.startswith(f"{POSTGRES_DB}_") and filename.endswith(".sql")
if filetype == 'file' and is_db_backup:
files_checked += 1
if modify_time_str:
file_mod_time = parse_mlsd_time(modify_time_str)
if file_mod_time and file_mod_time < cutoff_date:
try:
ftp.delete(filename)
logging.info(f"Deleted old remote backup: {filename} (modified: {file_mod_time})")
files_deleted += 1
except ftplib.error_perm as del_err:
logging.error(f"Failed to delete remote file {filename}: {del_err}")
elif not file_mod_time:
logging.warning(f"Skipping remote file due to unparseable time: {filename}")
else:
logging.warning(f"Could not get modification time for remote file: {filename}. Skipping deletion check.")
logging.info(f"Remote cleanup finished using MLSD for {remote_dir}. Checked: {files_checked}, Deleted: {files_deleted}.")
except ftplib.error_perm as mlsd_err:
logging.warning(f"MLSD command failed (server might not support it): {mlsd_err}. Falling back to LIST/MDTM (less reliable).")
# Fallback to LIST and MDTM (less efficient and parsing can be fragile)
files_deleted = 0 # Reset counter for fallback
files_checked = 0
try:
filenames = ftp.nlst()
logging.debug(f"NLST output for {remote_dir}: {filenames}")
for filename in filenames:
# Check for database backup pattern
is_db_backup = filename.startswith(f"{POSTGRES_DB}_") and filename.endswith(".sql")
if is_db_backup:
files_checked += 1
try:
# Attempt to get modification time
mdtm_str = ftp.voidcmd(f"MDTM {filename}")
# Response format is usually "213 YYYYMMDDHHMMSS"
if mdtm_str.startswith("213 "):
file_mod_time = parse_mlsd_time(mdtm_str[4:].strip())
if file_mod_time and file_mod_time < cutoff_date:
try:
ftp.delete(filename)
logging.info(f"Deleted old remote backup (fallback): {filename} (modified: {file_mod_time})")
files_deleted += 1
except ftplib.error_perm as del_err_fb:
logging.error(f"Failed to delete remote file {filename} (fallback): {del_err_fb}")
elif not file_mod_time:
logging.warning(f"Skipping remote file (fallback) due to unparseable time: {filename}")
else:
logging.warning(f"Could not get MDTM for remote file {filename}: {mdtm_str}. Skipping deletion check.")
except ftplib.error_perm as mdtm_err:
logging.warning(f"MDTM command failed for {filename}: {mdtm_err}. Skipping deletion check.")
except Exception as fb_err:
logging.warning(f"Error processing file {filename} in fallback: {fb_err}. Skipping.")
logging.info(f"Remote cleanup finished using LIST/MDTM fallback for {remote_dir}. Checked: {files_checked}, Deleted: {files_deleted}.")
except ftplib.error_perm as list_err:
logging.error(f"Failed to list files using NLST in fallback: {list_err}")
except Exception as fallback_list_err:
logging.error(f"An unexpected error occurred during FTP fallback cleanup: {fallback_list_err}")
except ftplib.error_perm as ftp_cwd_err:
logging.error(f"Failed to change FTP directory for cleanup to '{remote_dir}': {ftp_cwd_err}")
# If we can't change directory, we can't clean it.
return # Exit cleanup function
except ftplib.all_errors as ftp_err:
logging.error(f"FTP connection/login failed during cleanup: {ftp_err}")
return # Exit cleanup function
# --- End Simplified directory change ---
except ftplib.all_errors as ftp_err:
logging.error(f"FTP cleanup failed: {ftp_err}")
except Exception as ftp_clean_e:
logging.error(f"An unexpected error occurred during FTP cleanup: {ftp_clean_e}")
finally:
if ftp:
try:
ftp.quit()
except ftplib.all_errors:
logging.debug("FTP quit command failed during cleanup, closing connection.")
ftp.close()
def run_cleanup():
"""Runs cleanup for both local and remote directories."""
logging.info("Running scheduled cleanup job.")
cleanup_local_backups(LOCAL_BACKUP_PATH)
cleanup_remote_backups()
def run_backup_job():
"""Runs the backup job."""
logging.info("Running scheduled backup job.")
perform_backup()
# Cleanup is handled by a separate schedule
if __name__ == "__main__":
check_env_vars()
create_backup_dirs()
logging.info("Backup script starting.")
logging.info(f"Scheduling backups for {BACKUP_TIME_AM} and {BACKUP_TIME_PM} KST (Asia/Seoul).")
logging.info(f"Backup retention: {BACKUP_RETENTION_DAYS} days.")
logging.info(f"Local backup path: {LOCAL_BACKUP_PATH}")
# Log database configuration
logging.info(f"Database: {POSTGRES_DB} at {POSTGRES_HOST}:{POSTGRES_PORT}")
if FTP_HOST and FTP_USER and FTP_PASSWORD and FTP_PATH:
# Ensure log path starts with / for clarity
log_ftp_path = f"/{FTP_PATH.lstrip('/')}"
logging.info(f"FTP Target: ftp://{FTP_USER}:****@{FTP_HOST}:{FTP_PORT}{log_ftp_path}")
else:
logging.info("FTP Target: Not configured.")
# --- Initial Run ---
logging.info("--- Performing initial backup and cleanup run on startup ---")
try:
run_backup_job() # Perform the backup job first
run_cleanup() # Then run cleanup
logging.info("--- Initial run complete. Proceeding to scheduled runs. ---")
except Exception as initial_run_error:
logging.error(f"Error during initial backup/cleanup run: {initial_run_error}")
# Log the error and continue to the scheduler.
# --- End Initial Run ---
# --- Scheduling ---
schedule.every().day.at(BACKUP_TIME_AM, "Asia/Seoul").do(run_backup_job)
schedule.every().day.at(BACKUP_TIME_PM, "Asia/Seoul").do(run_backup_job)
# Schedule cleanup (e.g., once daily, shortly after the first backup)
# Ensure the time parsing and addition handles potential day rollovers if needed,
# but adding 15 minutes should be safe.
try:
cleanup_dt = datetime.strptime(BACKUP_TIME_AM, "%H:%M") + timedelta(minutes=15)
cleanup_time = cleanup_dt.strftime("%H:%M")
logging.info(f"Scheduling daily cleanup job for {cleanup_time} KST (Asia/Seoul).")
schedule.every().day.at(cleanup_time, "Asia/Seoul").do(run_cleanup)
except ValueError:
logging.error(f"Invalid BACKUP_TIME_AM format: {BACKUP_TIME_AM}. Cannot schedule cleanup accurately.")
# Fallback: Schedule cleanup at a fixed time like 08:00
logging.warning("Scheduling cleanup for 08:00 KST as fallback.")
schedule.every().day.at("08:00", "Asia/Seoul").do(run_cleanup)
# --- End Scheduling ---
logging.info("Scheduler started. Waiting for scheduled jobs...")
while True:
schedule.run_pending()
time.sleep(60) # Check every 60 seconds