diff options
-rw-r--r-- | .vscode/launch.json | 30 | ||||
-rw-r--r-- | README.md | 247 | ||||
-rw-r--r-- | doc/config-fmt.md | 369 | ||||
-rw-r--r-- | src/conf/py-debug/aws.jsonc (renamed from src/conf/py-debug/aws.sample.jsonc) | 4 | ||||
l--------- | src/conf/py-debug/conf.d | 1 | ||||
-rw-r--r-- | src/conf/py-debug/conf.d/core.jsonc | 44 | ||||
-rw-r--r-- | src/conf/py-debug/localfs.jsonc (renamed from src/conf/py-debug/localfs.sample.jsonc) | 6 | ||||
-rw-r--r-- | src/conf/py-debug/null.jsonc | 141 | ||||
-rw-r--r-- | src/conf/py-debug/null.sample.jsonc | 140 | ||||
l--------- | src/conf/py-debug/palhm.jsonc | 1 | ||||
-rw-r--r-- | src/conf/py-sample/conf.d/core.json | 41 | ||||
-rw-r--r-- | src/conf/py-sample/sample.jsonc | 127 | ||||
-rwxr-xr-x | src/palhm.py | 74 | ||||
-rw-r--r-- | src/palhm/__init__.py | 120 | ||||
-rw-r--r-- | src/palhm/exceptions.py | 2 | ||||
-rw-r--r-- | src/palhm/mod/aws.py | 17 |
16 files changed, 1098 insertions, 266 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json index d126290..2d8e2c8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,13 +8,19 @@ "name": "palhm config", "type": "python", "request": "launch", - "cwd": "${workspaceFolder}/src", - "program": "palhm.py", - "args": [ - "-f", - "conf/py-debug/palhm.jsonc", - "config" - ], + "cwd": "${workspaceFolder}", + "program": "src/palhm.py", + "args": [ "-f", "src/conf/py-debug/palhm.jsonc", "config" ], + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "palhm mods", + "type": "python", + "request": "launch", + "cwd": "${workspaceFolder}", + "program": "src/palhm.py", + "args": [ "mods" ], "console": "integratedTerminal", "justMyCode": true }, @@ -22,13 +28,9 @@ "name": "palhm run default", "type": "python", "request": "launch", - "cwd": "${workspaceFolder}/src", - "program": "palhm.py", - "args": [ - "-f", - "conf/py-debug/palhm.jsonc", - "run" - ], + "cwd": "${workspaceFolder}", + "program": "src/palhm.py", + "args": [ "-f", "src/conf/py-debug/palhm.jsonc", "run" ], "console": "integratedTerminal", "justMyCode": true } diff --git a/README.md b/README.md new file mode 100644 index 0000000..30939c2 --- /dev/null +++ b/README.md @@ -0,0 +1,247 @@ +# Periodic Automatic Live Host Maintenance (PALHM) +This is a script that automates periodic maintenance of a machine. PALHM covers +a routinely sequential command run as well as "hot" or "live" back up of the +running host to a backend of your choice. + +PALHM addresses problems of the traditional lazy method of making a copy of the +entirety of drives. + +* Use of high-level data dump tools like mysqldump and slapcat +* Not including data obtainable from the modern package manager such as the + contents of /usr to reduce cost +* Dump of metadata crucial when restoring from backup via use of tools like + lsblk + +The safest way to back up has always been by getting the system offline and +tar'ing the file system or making an image of the storage device. This may not +be practical in set ups where downtime is unacceptable or allocating more +resources for a backup task is not cost-efficient. This is where this script +comes in to play. + +## TL;DR +Goto [#Examples](#examples). + +## Routine Task +The Routine Task is a set of routines that are executed sequentially. It can +consist of commands(Execs) and other previously defined tasks. Routine Tasks are +absolute basic - you may incorporate custom shell scripts or other executables +to do complex routines. + +## Backup Task +PALHM supports backup on different storage backends. It also automates rotation +of backup copies on the supported storage backends. **aws-s3** and **localfs** +are currently implemented. You may incorporate localfs to store backups on NFS +or Samba mount points. The special **null** backend is for testing purposes. + +The files produced as end product of backup are called "Backup Objects". The +Backup Objects have two essential attributes. + +* **pipeline**: commands used to generate the backup output file +* **path**: path to the output file on the backend + +For example, this object definition is for a mysql data dump compressed in zstd +and encrypted using a public key id "backup-pub-key" named as +"all-db.sql.zstd.pgp". + +```jsonc +{ + "path": "all-db.sql.zstd.pgp", + "pipeline": [ + { "type": "exec-inline", "argv": [ "/bin/mysqldump", "-uroot", "--all-databases" ] }, + { "type": "exec-inline", "argv": [ "/bin/zstd" ] }, + { "type": "exec-inline", "argv": [ "/bin/gpg", "-e", "-r", "backup-pub-key", "--compress-algo", "none" ] } + ] +} +``` + +This is equivalent of doing this from the shell + +```sh +mysqldump -uroot --all-databases | zstd | gpg -e -r backup-pub-key --compress-algo none > all-db.sql.zstd.pgp +``` + +except that the output file can be placed on the cloud service depending on the +backend used. The frequently used commands like "compression filters" are +defined in the core +config([conf.d/core.json](src/conf/py-sample/conf.d/core.json)) as Exec +definitions. + +### Backup Object Path +The final path for a Backup Object is formulated as follows. + +``` +localfs: + /media/backup/localhost/2022-05-01T06:59:17+00:00/all-db.sql.zstd.pgp +| ROOT | PREFIX | PATH | + +aws-s3: + s3://your-s3-bucket/backup/your-host/2022-05-01T06:59:17+00:00/all-db.sql.zstd.pgp + | BUCKET | ROOT | PREFIX | PATH | +``` + +| ATTR | DESC | +| - | - | +| ROOT | The root directory for backup | +| PREFIX | The name of the backup | +| PATH | The output path of the backup object | + +The default format of PREFIX is the output of `date --utc --iso-8601=seconds`. +Backup rotation is performed using PREFIX. The PREFIX must be based on values +that, when sorted in ascending order, the oldest backup should appear first. + +PATH may contain the directory separator("/" or "\\"). The backend may or may +not support this. The localfs backend handles this by doing `mkdir -p` on path +before creating a "sink" for output files. Using "/" for PATH on Windows will +fail as per NTFS limitation. The aws-s3 backend will pass the directory +separator "/" through to Boto3 API and sub directory objects will be created +implicitly. + +### Backend-param +The parameters specific to backup backends can be set using backend-param. Here +are parameters commonly appear across backends. + +* root: (string) the path to the backup root +* nb-copy-limit: (decimal) the number of most recent backups to keep +* root-size-limit: (decimal) the total size of the backup root in bytes +* prefix: (TODO) reserved for future + +The value of the decimal type is either a JSON number or a string that +represents a decimal number. The IEEE754 infinity representation("inf", +"Infinity", "-inf" or "-Infinity") can be used for *nb-copy-limit* and +*root-size-limit* to disable both or either of the attributes. The decimal type +is not affected by the limit of IEEE754 type(the 2^53 integer part). The +fractional part of the numbers are ignored as they are compared against the +integers. + +#### Localfs +```jsonc +{ + "tasks": [ + { + "id": "backup", + "type": "backup", + "backend": "localfs", + "backend-param": { + "root": "/media/backup/localhost", // (required) + "dmode": "755", // (optional) mode for new directories + "fmode": "644", // (optional) mode for new files + "nb-copy-limit": "Infinity", // (optional) + "root-size-limit": "Infinity" // (optional) + }, + "object-groups": [ /* ... */ ], + "objects": [ /* ... */ ] + } + ] +} +``` + +#### aws-s3 +```jsonc +{ + "tasks": [ + { + "id": "backup", + "type": "backup", + "backend": "aws-s3", + "backend-param": { + "profile": "default", // (optional) AWS client profile. Defaults to "default" + "bucket": "palhm.test", // (required) S3 bucket name + "root": "/palhm/backup", // (required) + "sink-storage-class": "STANDARD", // (optional) storage class for new uploads + "rot-storage-class": "STANDARD", // (optional) storage class for final uploads + "nb-copy-limit": "Infinity", // (optional) + "root-size-limit": "Infinity" // (optional) + }, + "object-groups": [ /* ... */ ], + "objects": [ /* ... */ ] + } + ] +} +``` + +For profiles configured for root, see `~/.aws/config`. Run `aws configure help` +for more info. + +For possible values for storage class, run `aws s3 cp help`. + +If you wish to keep backup copies in Glacier, you may want to upload backup +objects as STANDARD-IA first and change the storage class to GLACIER on the +rotate stage because in the event of failure, PALHM rolls back the process by +deleting objects already uploaded to the bucket. You may be charged for the +objects stored in Glacier as the minimum storage duration is 90 days(as of +2022). The **rot-storage-class** attribute serves this very purpose. More info +on [the pricing page](https://aws.amazon.com/s3/pricing/). + +### Backup Object Dependency Tree +Backup objects can be configured to form a dependency tree like Makefile +objects. By default, PALHM builds backup files simultaneously(*nb-workers*). On +some environments, this may not be desirable, especially on system with +HDDs[^1]. You can tune this behaviour by either ... + +* Setting *nb-workers* to 1 +* Grouping the backup objects so that the objects from one storage device are + built sequentially + +Say the system has one storage device that holds all data necessary for service +and another one on which OS is installed. The system services static HTTP, MySQL +and OpenLDAP. All the backup tasks need to be grouped separately in order to +reduce IO seek time. + +```jsonc +{ + "object-groups": [ + { "id": "root" }, + { "id": "http" }, + { "id": "sql", "depends": [ "http" ] }, + { "id": "ldap", "depends": [ "sql" ] }, + ] +} +``` + +On start, the objects in "root" and "http" groups will be built simultanesouly. +On completion of all the objects in "http", the objects in the group "sql" and +"ldap" will be built in order. + +## Config JSON Format +See [doc/config-fmt.md](doc/config-fmt.md). + +## Getting Started +### Prerequisites +* Python 3.7 or higher +* `json_reformat` command provided by **yajl** for jsonc support (optional) +* **awscli** and **boto3** for aws-s3 backup backend (optional) + +### Examples +* [localfs.sample.jsonc](src/conf/py-sample/localfs.sample.jsonc) +* [aws.sample.jsonc](src/conf/py-sample/aws.sample.jsonc) + +## Files +| Path | Desc | +| - | - | +| /etc/palhm/palhm.conf | The default config path | +| /etc/palhm/conf.d/core.json | Commonly used Exec and Prefix definitions | + +## Advanced +### Testing Config +When writing backup task, if you're worried about data loss caused by +misconfiguration or vulnerabilities, you can use [systemd's +sandboxing](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Sandboxing) +to test out your config. The distro must be running Systemd in order for this to +work. + +```sh +systemd-run -qP -p Nice=15 -p ProtectSystem=strict -p ReadOnlyPaths=/ -p PrivateDevices=true --wait /usr/local/bin/palhm.py run backup +``` + +If your config runs on a read-only file system, it's safe to assume that the +config does not require a read-write file system in order to run. This means +your config does not modify the file system. + +Also, you can always do a dry run of your backup task by setting the backend to +"**null**". + +## TODO +* JSON schema validation + +## Footnotes +[^1]: Even with SSDs, disrupting sequential reads decreases overall performance diff --git a/doc/config-fmt.md b/doc/config-fmt.md new file mode 100644 index 0000000..8487f6f --- /dev/null +++ b/doc/config-fmt.md @@ -0,0 +1,369 @@ +# PALHM JSON Config Format +PALHM is configured with JSON documents. PALHM supports the original JSON and +JSONC(the JSON with comments). PALHM handles jsonc documents by converting +them to json by an external command. PALHM distinguishes between these two +format by the file name extension. The conversion only occurs when the name of +the config file ends with `.jsonc`. + +To support the IEEE754 infinity, the accepated data types for some values are +both string and number. The former will be parsed by the relevant type class +before they are processed. + +## Structure +The format of the object feature table. + +| ATTR | MEANING | +| - | - | +| Key | The key string of the object | +| Value | The value of the object | +| Required | Whether the object is required as the member of the parent object | +| Include | Include behaviour. "MERGE" or "OVERRIDE" | +| Range | Range of the value if NUMERICAL | + +### include +| ATTR | DESC | +| - | - | +| Key | "include" | +| Value | ARRAY of STRINGs | +| Required | NO | +| Include | MERGE | + +```jsonc +{ + "include": [ "/etc/palhm/conf.d/core.json" ] +} +``` + +The array is the list of paths to other config files to include in the current +config. The config files in the array are merged into the config. No two exec +definitions or task with the same id can exist in included config files. The +global settings such as "vl" and "nb-workers" will be silently overridden if +they are defined in the subsequent config files. Absolute or relative paths can +be used. The relative paths are resolved in the same manner as the `#include` +preprocessor in C: if used in the config file passed to PALHM via the `-f` +option, the paths will be resolved from the current working directory of the +process. Otherwise(if used in the subsequent includes), the paths will be +resolved from the directory of the current config file. A config file cannot be +included twice as PALHM detects circular inclusion by keeping track of the +included config files. + +### modules +| ATTR | DESC | +| - | - | +| Key | "modules" | +| Value | ARRAY of STRINGs | +| Required | NO | +| Include | MERGE | + +The array is the list of PALHM modules to import. Run `palhm mods` for the +list of modules installed on the system. + +```jsonc +{ + "modules": [ "aws" ] +} +``` + +### nb-workers +| ATTR | DESC | +| - | - | +| Key | "nb-workers" | +| Value | INTEGER | +| Required | NO | +| Include | OVERRIDE | +| Range | (-inf, inf) | + +```jsonc +{ + /* The number of threads the process is restricted to. Usually same as + * $(nproc) + */ + "nb-workers": 0, + // Use Python default + "nb-workers": -1, + // No concurrency + "nb-workers": 1 +} +``` + +The number of maximum worker threads. Use a negative integer to use the Python +default value(see +[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor)). +Use zero to set it to the number of threads the process is allowed to +utilise(see [os.sched_getaffinity()](https://docs.python.org/3/library/os.html?highlight=sched_getaffinity#os.sched_getaffinity)). +Use a positive integer to restrict the number of worker threads. + +### vl +| ATTR | DESC | +| - | - | +| Key | "vl" | +| Value | INTEGER | +| Required | NO | +| Include | OVERRIDE | +| Range | (-inf, inf) | + +```jsonc +{ + "vl": 0, // CRITICAL + "vl": 1, // ERROR + "vl": 2, // WARNING + "vl": 3, // INFO + "vl": 4, // DEBUG + 0 + "vl": 5, // DEBUG + 1 + "vl": 6 // DEBUG + 2 + /* ... */ +} +``` + +The verbosity level, the higher the more verbose.The value is translated from +PALHM's "the higher the more verbose" scheme to Python's [logging facility +logging level](https://docs.python.org/3/library/logging.html#logging-levels). +Defaults to 3. + +You don't really need this. THe best practice is using the default value for the +config and using the `-q` option for a crond or timer unit. When debugging info +is required, simply increase the verbosity with the `-v` option. + +### Execs +| ATTR | DESC | +| - | - | +| Key | "execs" | +| Value | ARRAY of [Exec Definition Object](#Exec_Definition_Object)s | +| Required | NO | +| Include | MERGE | + +#### Exec Definition Object +* "id": id string **(required)** +* "argv": argument vector **(required)** +* "env": additional environment variable mapping. The value must be an object + whose members are string to string mapping. The key represents the name of the + variable and the value the value of the variable. +* "ec": valid exit code range. Defaults to "==0" + * Inclusive range format: <MIN>-<MAX> + * Comparator format: \[C\]<N> + * Where + * MIN: minimum inclusive valid exit code + * MAX: maximum inclusive valid exit code + * N: integer for comparison + * C: comparator. One of <, <=, >, >= or ==. Defaults to == + * Examples + * ">=0": ignore exit code(always success) + * "<2" or "0-1": accept exit code 0 and 1 + * "1": accept exit code 1 only + * "vl-stderr": verbosity level of stderr from the process. Defaults to 1 + * "vl-stdout": verbosity level of stdout from the process. Defaults to 3 + + Note that stdout and stderr from the process are not passed to the logger. + "vl-stderr" and "vl-stdout" are merely used to determine whether the outputs + from the process have to be redirected to `/dev/null` or the stdio of the PALHM + process. + +```jsonc +{ + "id": "pgp-enc", + "argv": [ "/bin/pgp", "-e", "-r", "backup", "--compress-algo", "none" ], + "env": { + "LC_ALL": "C", + "GNUPGHOME": "~/gnupg" + }, + "ec": "==0", + "vl-stderr": 1, + "vl-stdout": 3 +} +``` + +### Tasks +| ATTR | DESC | +| - | - | +| Key | "tasks" | +| Value | ARRAY of OBJECTs | +| Required | NO | +| Include | MERGE | + +#### Predefined Pipeline Exec Object +* "type": "exec" **(required)** +* "exec-id": id of the Exec Definition Object **(required)** + +```jsonc +{ + "type": "exec", + "exec-id": "filter-zstd-parallel" +} +``` + +#### Appended Pipeline Exec Object +* "type": "exec-inline" **(required)** +* "exec-id": id of the Exec Definition Object **(required)** +* "argv": array of string, which is the argument vector to append **(required)** +* "env": environment variable mapping object. See [#Exec Definition + Object](#Exec_Definition_Object) + +```jsonc +{ + "type": "exec-append", + "exec-id": "tar", + "argv": [ "-C", "/", "etc", "home", "root", "var" ], + "env": { "LC_ALL": "C" } +} +``` + +#### Inline Pipeline Exec Object +Same as [#Exec Definition Object](#Exec_Definition_Object), except that this +object does not require the "id" member. + +```jsonc +{ + "type": "exec-inline", + "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ] +} +``` + +#### Backup Task Definition Object +* "id": id string **(required)** +* "type": "backup" **(required)** +* "backend": see [README.md#Backend-param](../README.md#Backend-param) + **(required)** +* "backend-param": see [README.md#Backend-param](../README.md#Backend-param) +* "object-groups": array of [Backup Object Group Definition + Objects](#Backup_Object_Group_Definition_Object) +* "objects": array of [Backup Object Definition + Objects](#Backup_Object_Definition_Object) + +```jsonc +{ + "id": "root-backup", + "type": "backup", + "backend": "null", + "backend-param": { /* ... */ }, + "object-groups": { /* ... */ }, + "objects": [ /* ... */ ] +} +``` + +##### Backup Object Group Definition Object +* "id": id string. Valid within the backup task **(required)** +* "depends": array of other object group id strings on which the object group is + dependent. The other groups must appear before the group definition. + +```jsonc +{ + "object-groups": [ + { "id": "root" }, + { "id": "http" }, + { "id": "sql", "depends": [ "http" ] }, + { "id": "ldap", "depends": [ "sql" ] }, + ] +} +``` + +##### Backup Object Definition Object +* "path": path to the backup output on the backend **(required)** +* "group": the id of a [Backup Object Group Definition + Object](#Backup_Object_Group_Definition_Object) +* "pipeline": array of + * [Predefined Pipeline Exec Objects](#Predefined_Pipeline_Exec_Object) + * [Appended Pipeline Exec Objects](#Appended_Pipeline_Exec_Object) + * [Inline Pipeline Exec Objects](#Inline_Pipeline_Exec_Object) + +```jsonc +{ + "path": "srv.tar.zstd", + "group": "tar-1", + "pipeline": [ + { + "type": "exec-append", + "exec-id": "tar", + "argv": [ "-C", "/", "srv" ] + }, + { "type": "exec", "exec-id": "filter-zstd-parallel" } + ] +} +``` + +A set of child processes for the backup ouput file will be created using the +Exec objects in the array. + +The PALHM process waits for any of the child process in the pipeline. The exit +codes returned from the child processes will be tested as they exits one by one. +If PALHM encounters a child process returns an exit code that does not fall into +the acceptable exit code range, it will roll back the current copy of backup +before raising the exception. In this case, the exit code from the rest of child +processes are not processed[^1]. + +#### Routine Task Definition Object +* "id": id string **(required)** +* "type": "routine" **(required)** +* "routine": array of the id strings of + * [Predefined Pipeline Exec Objects](#Predefined_Pipeline_Exec_Object) + * [Appended Pipeline Exec Objects](#Appended_Pipeline_Exec_Object) + * [Inline Pipeline Exec Objects](#Inline_Pipeline_Exec_Object) + * [Builtin Function Objects](#Builtin_Function_Object) + * [Task Pointer Objects](#Task_Pointer_Object) + +```jsonc +[ + { + "id": "update", + "type": "routine", + "routine": [ + { + "type": "exec-inline", + "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ] + }, + { + "type": "exec-inline", + "argv": [ "/bin/sa-update" ] + } + ] + }, + { + "id": "reboot", + "type": "routine", + "routine": [ + { + "type": "builtin", + "builtin-id": "sigmask", + "param": [ { "action": "block", "sig": [ "INT", "TERM" ] } ] + }, + { + "type": "exec-inline", + "argv": [ "/sbin/reboot" ] + } + ] + }, + { + "id": "default", + "type": "routine", + "routine": [ + { "type": "task", "task-id": "update" }, + { "type": "task", "task-id": "reboot" } + ] + } +] +``` + +##### Task Pointer Object +* "type": "task" +* "task-id": id string of + * [Backup Task Definition Object](#Backup_Task_Definition_Object) + * [Routine Task Definition Object](#Routine_Task_Definition_Object) + +##### Builtin Function Object +* "type": "builtin" +* "builtin-id": "sigmask" +* "param": function-specific param object + * [sigmask Builtin Function Param](#sigmask_Builtin_Function_Param) + +##### sigmask Builtin Function Param +The sigmask builtin function is the direct interface to +[pthread_sigmask()](https://docs.python.org/3/library/signal.html?highlight=sigmask#signal.pthread_sigmask). +Run `kill -l` for valid signals on your system. This builtin function can only +be used on Unix systems. + +* "action": "block" or "unblock" +* "sig": array of signal strings. A numberic value and the name of a signal with + or without "SIG" prefix are accepted. Valid values include "TERM", "SIGTERM", + 15, "INT", "SIGINT" and "2" + +## Footnotes +[^1]: they're most likely 141(terminated by SIGPIPE) diff --git a/src/conf/py-debug/aws.sample.jsonc b/src/conf/py-debug/aws.jsonc index 46ad562..df9a63a 100644 --- a/src/conf/py-debug/aws.sample.jsonc +++ b/src/conf/py-debug/aws.jsonc @@ -1,6 +1,6 @@ // PALHM Instance Config { - "include": [ "conf/py-debug/conf.d/core.jsonc" ], + "include": [ "conf.d/core.json" ], "modules": [ "aws" ], "nb-workers": 0, // assumed $(nproc) - default // "nb-workers": 1, // to disable concurrent task despatch @@ -48,7 +48,7 @@ "path": "pm-list.gz", "group": "pre-start", "pipeline": [ - { "type": "exec", "exec-id": "dnf-list-installed" }, + { "type": "exec", "exec-id": "rpm-list-installed" }, { "type": "exec", "exec-id": "filter-gzip-plain" } ] }, diff --git a/src/conf/py-debug/conf.d b/src/conf/py-debug/conf.d new file mode 120000 index 0000000..a32163d --- /dev/null +++ b/src/conf/py-debug/conf.d @@ -0,0 +1 @@ +../py-sample/conf.d
\ No newline at end of file diff --git a/src/conf/py-debug/conf.d/core.jsonc b/src/conf/py-debug/conf.d/core.jsonc deleted file mode 100644 index 4afe7f5..0000000 --- a/src/conf/py-debug/conf.d/core.jsonc +++ /dev/null @@ -1,44 +0,0 @@ -// PALHM Core Config -{ - "execs": [ - // { - // "id": "Exec ID", - // "argv": [ "cmd", "--option1=opt1_val", "-o", "opt2_val" ], - // "env": { "NAME": "VAL" }, - // "ec": "0", // this is assumed - // "ec": "0-127", // inclusive range (not terminated by a signal) - // "ec": "<1", // range (only 0) - // "ec": "<=1", // range (0 and 1) - // "ec": ">0", // range (always fail) - // "ec": ">=0", // range (only 0) - // "vl-stderr": 1 // verbosity level of stderr produced by this process - // verbosity level of stderr produced by this process. Ignored if used - // as part of pipeline - // "vl-stdout": 2 - // }, - { - "id": "tar", - "argv": [ "/usr/bin/tar", "--xattrs", "--selinux" ] - }, - { - "id": "filter-xz-parallel", - "argv": [ "/usr/bin/xz", "-T0" ] - }, - { - "id": "filter-gzip-plain", - "argv": [ "/usr/bin/gzip" ] - }, - { - "id": "filter-zstd-plain", - "argv": [ "/usr/bin/zstd" ] - }, - { - "id": "dnf-list-installed", - "argv": [ "/usr/bin/dnf", "-yq", "list", "installed" ] - }, - { - "id": "lsblk-all-json", - "argv": [ "/usr/bin/lsblk", "-JbOa" ] - } - ] -} diff --git a/src/conf/py-debug/localfs.sample.jsonc b/src/conf/py-debug/localfs.jsonc index ec12808..a33060d 100644 --- a/src/conf/py-debug/localfs.sample.jsonc +++ b/src/conf/py-debug/localfs.jsonc @@ -1,12 +1,12 @@ // PALHM Instance Config { - "include": [ "conf/py-debug/conf.d/core.jsonc" ], + "include": [ "conf.d/core.json" ], "nb-workers": 0, // assumed $(nproc) - default // "nb-workers": 1, // to disable concurrent task despatch // To unlimit the number of workers. // Does not fail on resource alloc failure. // "nb-workers": -1, - "vl": 4, + "vl": 3, "tasks": [ { "id": "backup", @@ -45,7 +45,7 @@ "path": "pm-list.gz", "group": "pre-start", "pipeline": [ - { "type": "exec", "exec-id": "dnf-list-installed" }, + { "type": "exec", "exec-id": "rpm-list-installed" }, { "type": "exec", "exec-id": "filter-gzip-plain" } ] }, diff --git a/src/conf/py-debug/null.jsonc b/src/conf/py-debug/null.jsonc new file mode 100644 index 0000000..b5ce9f8 --- /dev/null +++ b/src/conf/py-debug/null.jsonc @@ -0,0 +1,141 @@ +{ + "include": [ "conf.d/core.json" ], + "nb-workers": 0, // assumed $(nproc) + // "nb-workers": 1, // to disable concurrent task despatch + // "nb-workers": -1, // to unlimit the number of workers. + "vl": 3, + "tasks": [ + { + "id": "backup", + "type": "backup", + "backend": "null", + "object-groups": [ + { "id": "pre-start" }, + { + "id": "data-dump", + "depends": [ "pre-start" ] + }, + { + "id": "tar-0", + "depends": [ "data-dump" ] + }, + { + "id": "tar-1", + "depends": [ "data-dump" ] + } + ], + "objects": [ + { + "path": "pm-list.zstd", + "group": "pre-start", + "pipeline": [ + { "type": "exec", "exec-id": "rpm-list-installed" }, + { "type": "exec", "exec-id": "filter-zstd-plain" } + ] + }, + { + "path": "lsblk.json.zstd", + "group": "pre-start", + "pipeline": [ + { "type": "exec", "exec-id": "lsblk-all-json" }, + { "type": "exec", "exec-id": "filter-zstd-plain" } + ] + }, + { + "path": "db.sql.zstd", + "group": "data-dump", + "pipeline": [ + { + "type": "exec-inline", + "argv": [ + "/bin/mysqldump", + "-uroot", + "--all-databases" + ] + }, + { "type": "exec", "exec-id": "filter-zstd-parallel" } + ] + }, + { + "path": "root.tar.zstd", + "group": "tar-0", + "pipeline": [ + { + "type": "exec-append", + "exec-id": "tar", + "argv": [ + "-C", + "/", + "/etc", + "/home", + "/root", + "/var" + ] + }, + { "type": "exec", "exec-id": "filter-zstd-parallel" } + ] + }, + { + "path": "srv.tar.zstd", + "group": "tar-1", + "pipeline": [ + { + "type": "exec-append", + "exec-id": "tar", + "argv": [ + "-C", + "/", + "/srv" + ] + }, + { "type": "exec", "exec-id": "filter-zstd-parallel" } + ] + } + ] + }, + { + "id": "update", + "type": "routine", + "routine": [ + { + "type": "exec-inline", + "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ] + }, + { + "type": "exec-inline", + "argv": [ "/bin/sa-update" ] + } + ] + }, + { + "id": "reboot", + "type": "routine", + "routine": [ + { +/* + * Block SIGTERM from systemd/init.d so PALHM can exit gracefully after issuing + * reboot. + */ + "type": "builtin", + "builtin-id": "sigmask", + "param": [ + { "action": "block", "sig": [ "TERM" ] } + ] + }, + { + "type": "exec-inline", + "argv": [ "/sbin/reboot" ] + } + ] + }, + { + "id": "default", + "type": "routine", + "routine": [ + { "type": "task", "task-id": "backup" }, + { "type": "task", "task-id": "update" }, + { "type": "task", "task-id": "reboot" } + ] + } + ] +} diff --git a/src/conf/py-debug/null.sample.jsonc b/src/conf/py-debug/null.sample.jsonc deleted file mode 100644 index a83de95..0000000 --- a/src/conf/py-debug/null.sample.jsonc +++ /dev/null @@ -1,140 +0,0 @@ -// PALHM Instance Config -{ - "include": [ "conf/py-debug/conf.d/core.jsonc" ], - "nb-workers": 1, - "vl": 3, - "tasks": [ - { - "id": "backup", - "type": "backup", - "backend": "null", - "object-groups": [ - { "id": "pre-start" }, - { - "id": "data-dump", - "depends": [ "pre-start" ] - }, - { - "id": "tar-media-0", - "depends": [ "data-dump" ] - }, - { - "id": "tar-media-1", - "depends": [ "data-dump" ] - } - ], - "objects": [ - { - "path": "pm-list.gz", - "group": "pre-start", - "pipeline": [ - { "type": "exec", "exec-id": "dnf-list-installed" }, - { "type": "exec", "exec-id": "filter-gzip-plain" } - ] - }, - { - "path": "lsblk.json.gz", - "group": "pre-start", - "pipeline": [ - { - "type": "exec-append", - "exec-id": "lsblk-all-json", - "argv": [ "-a" ] - }, - { "type": "exec", "exec-id": "filter-gzip-plain" } - ] - }, - { - "path": "random-dump.sql.xz", - "group": "data-dump", - "pipeline": [ - { - "type": "exec-inline", - "argv": [ - "/bin/dd", - "if=/dev/urandom", - "bs=4096", - "count=512", - "status=none" - ] - }, - { "type": "exec", "exec-id": "filter-xz-parallel" } - ] - }, - { - "path": "random-dump.0.xz", - "group": "tar-media-0", - "pipeline": [ - { - "type": "exec-inline", - "argv": [ - "/bin/dd", - "if=/dev/zero", - "bs=4096", - "count=512", - "status=none" - ] - }, - { "type": "exec", "exec-id": "filter-xz-parallel" } - ] - }, - { - "path": "random-dump.1.xz", - "group": "tar-media-1", - "pipeline": [ - { - "type": "exec-inline", - "argv": [ - "/bin/dd", - "if=/dev/zero", - "bs=4096", - "count=512", - "status=none" - ] - }, - { "type": "exec", "exec-id": "filter-xz-parallel" } - ] - } - ] - }, - { - "id": "update", - "type": "routine", - "routine": [ - { - "type": "exec-inline", - "argv": [ "/bin/echo", "0" ] - }, - { - "type": "exec-inline", - "argv": [ "/bin/sleep", "1" ] - }, - { - "type": "exec-inline", - "argv": [ "/bin/echo", "1" ] - } - ] - }, - { - "id": "default", - "type": "routine", - "routine": [ - { "type": "task", "task-id": "backup" }, - { "type": "task", "task-id": "update" }, - { - // Block SIGTERM from systemd/init.d so the program is not - // affected by the reboot command. - "type": "builtin", - "builtin-id": "sigmask", - "param": [ - { "action": "block", "sig": [ "TERM" ] } - ] - }, - { - "type": "exec-inline", - "argv": [ "/bin/true" ] - } - ] - } - ] -} diff --git a/src/conf/py-debug/palhm.jsonc b/src/conf/py-debug/palhm.jsonc new file mode 120000 index 0000000..fb68baf --- /dev/null +++ b/src/conf/py-debug/palhm.jsonc @@ -0,0 +1 @@ +aws.jsonc
\ No newline at end of file diff --git a/src/conf/py-sample/conf.d/core.json b/src/conf/py-sample/conf.d/core.json new file mode 100644 index 0000000..46d3feb --- /dev/null +++ b/src/conf/py-sample/conf.d/core.json @@ -0,0 +1,41 @@ +{ + "execs": [ + { + "id": "tar", + "argv": [ "/bin/tar", "--xattrs", "--selinux", "--warning=none", "-cf", "-" ], + "ec": "<2" + }, + { + "id": "filter-xz-parallel", + "argv": [ "/bin/xz", "-T0" ] + }, + { + "id": "filter-gzip-plain", + "argv": [ "/bin/gzip" ] + }, + { + "id": "filter-zstd-plain", + "argv": [ "/bin/zstd" ] + }, + { + "id": "filter-zstd-parallel", + "argv": [ "/bin/zstd", "-T0" ] + }, + { + "id": "rpm-list-installed", + "argv": [ "/bin/rpm", "-qa" ] + }, + { + "id": "dpkg-list-installed", + "argv": [ "/bin/dpkg-query", "-l" ] + }, + { + "id": "lsblk-all-json", + "argv": [ "/bin/lsblk", "-JbOa" ] + }, + { + "id": "os-release", + "argv": [ "/bin/cat", "/etc/os-release" ] + } + ] +} diff --git a/src/conf/py-sample/sample.jsonc b/src/conf/py-sample/sample.jsonc new file mode 100644 index 0000000..f1c4501 --- /dev/null +++ b/src/conf/py-sample/sample.jsonc @@ -0,0 +1,127 @@ +{ + "include": [ "/etc/palhm/conf.d/core.json" ], + // "modules": [ "aws" ], + "nb-workers": 0, + // "vl": 4, + "tasks": [ + { + "id": "backup", + "type": "backup", + "backend": "null", + "backend-param": {}, + "object-groups": [ + { "id": "meta-run" }, + { + "id": "data-dump", + "depends": [ "meta-run" ] + }, + { + "id": "tar-root", + "depends": [ "data-dump" ] + } + ], + "objects": [ + { + "path": "os-release", + "group": "meta-run", + "pipeline": [ { "type": "exec" , "exec-id": "os-release" } ] + }, + { + "path": "pm-list.zstd", + "group": "meta-run", + "pipeline": [ + { "type": "exec", "exec-id": "rpm-list-installed" }, + { "type": "exec", "exec-id": "filter-zstd-plain" } + ] + }, + { + "path": "lsblk.json.zstd", + "group": "meta-run", + "pipeline": [ + { "type": "exec", "exec-id": "lsblk-all-json" }, + { "type": "exec", "exec-id": "filter-zstd-plain" } + ] + }, + // { + // "path": "db.sql.zstd", + // "group": "data-dump", + // "pipeline": [ + // { + // "type": "exec-inline", + // "argv": [ + // "/bin/mysqldump", + // "-uroot", + // "--all-databases" + // ] + // // "ec": "<=2" // don't fail when the DB is offline + // }, + // { "type": "exec", "exec-id": "filter-zstd-parallel" } + // ] + // }, + { + "path": "root.tar.zstd", + "group": "tar-root", + "pipeline": [ + { + "type": "exec-append", + "exec-id": "tar", + "argv": [ + "-C", + "/", + "etc", + "home", + "root", + "var" + ] + }, + { "type": "exec", "exec-id": "filter-zstd-parallel" } + ] + } + ] + }, + { + "id": "update", + "type": "routine", + "routine": [ + { + "type": "exec-inline", + "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ] + } + // { + // "type": "exec-inline", + // "argv": [ "/bin/sa-update" ] + // } + ] + }, + { + "id": "reboot", + "type": "routine", + "routine": [ + { +/* + * Block SIGTERM from systemd/init.d so PALHM can exit gracefully after issuing + * reboot. + */ + "type": "builtin", + "builtin-id": "sigmask", + "param": [ + { "action": "block", "sig": [ "TERM" ] } + ] + }, + { + "type": "exec-inline", + "argv": [ "/sbin/reboot" ] + } + ] + }, + { + "id": "default", + "type": "routine", + "routine": [ + { "type": "task", "task-id": "backup" }, + { "type": "task", "task-id": "update" }, + { "type": "task", "task-id": "reboot" } + ] + } + ] +} diff --git a/src/palhm.py b/src/palhm.py index f3f412b..722664e 100755 --- a/src/palhm.py +++ b/src/palhm.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +import importlib import logging +import os import sys from abc import ABC, abstractmethod from getopt import getopt @@ -49,7 +51,7 @@ class RunCmd (Cmd): def do_cmd (self): ProgConf.alloc_ctx() - if self.args: + if self.args and self.args[0]: # empty string as "default" task = self.args[0] else: task = palhm.DEFAULT.RUN_TASK.value @@ -64,6 +66,53 @@ class RunCmd (Cmd): Run a task in config. Run the "''' + palhm.DEFAULT.RUN_TASK.value + '''" task if [TASK] is not specified.''') +class ModsCmd (Cmd): + def __init__ (self, *args, **kwargs): + pass + + def _walk_mods (self, path: str): + def is_mod_dir (path: str) -> bool: + try: + for i in os.scandir(path): + if i.name.startswith("__init__.py"): + return True + except NotADirectoryError: + pass + return False + + def is_mod_file (path: str) -> str: + if not os.path.isfile(path): + return None + + try: + pos = path.rindex(".") + if path[pos + 1:].startswith("py"): + return os.path.basename(path[:pos]) + except ValueError: + pass + + for i in os.scandir(path): + if i.name.startswith("_"): + continue + elif is_mod_dir(i.path): + print(i.name) + self._walk_mods(i.path) + else: + name = is_mod_file(i.path) + if name: + print(name) + + def do_cmd (self): + for i in importlib.util.find_spec("palhm.mod").submodule_search_locations: + self._walk_mods(i) + + return 0 + + def print_help (): + print( +"Usage: " + sys.argv[0] + " mods" + ''' +Prints the available modules to stdout.''') + class HelpCmd (Cmd): def __init__ (self, optlist, args): self.optlist = optlist @@ -84,7 +133,7 @@ class HelpCmd (Cmd): print( "Usage: " + sys.argv[0] + " [options] CMD [command options ...]" + ''' Options: - -q Set the verbosity level to 0(FATAL error only). Overrides config + -q Set the verbosity level to 0(CRITIAL). Overrides config -v Increase the verbosity level by 1. Overrides config -f FILE Load config from FILE instead of the hard-coded default Config: ''' + ProgConf.conf + ''' @@ -92,14 +141,16 @@ Commands: run run a task config load config and print the contents help [CMD] print this message and exit normally if [CMD] is not specified. - Print usage of [CMD] otherwise''') + Print usage of [CMD] otherwise + mods list available modules''') return 0 CmdMap = { "config": ConfigCmd, "run": RunCmd, - "help": HelpCmd + "help": HelpCmd, + "mods": ModsCmd } optlist, args = getopt(sys.argv[1:], "qvf:") @@ -115,14 +166,13 @@ if not args or not args[0] in CmdMap: err_unknown_cmd() for p in optlist: - match p[0]: - case "-q": ProgConf.override_vl = logging.ERROR - case "-v": - if ProgConf.override_vl is None: - ProgConf.override_vl = palhm.DEFAULT.VL.value - 10 - else: - ProgConf.override_vl -= 10 - case "-f": ProgConf.conf = p[1] + if p[0] == "-q": ProgConf.override_vl = logging.ERROR + elif p[0] == "-v": + if ProgConf.override_vl is None: + ProgConf.override_vl = palhm.DEFAULT.VL.value - 10 + else: + ProgConf.override_vl -= 10 + elif p[0] == "-f": ProgConf.conf = p[1] logging.basicConfig(format = "%(name)s %(message)s") diff --git a/src/palhm/__init__.py b/src/palhm/__init__.py index 8c44ace..7e5afb4 100644 --- a/src/palhm/__init__.py +++ b/src/palhm/__init__.py @@ -1,3 +1,4 @@ +from .exceptions import InvalidConfigError import io import json import logging @@ -15,8 +16,6 @@ from datetime import datetime, timezone from decimal import Decimal from enum import Enum from importlib import import_module -from mailbox import FormatError -from multiprocessing import ProcessError from typing import Iterable @@ -72,10 +71,16 @@ class GlobalContext: for m in jobj.get("modules", iter(())): loaded = self.modules[m] = import_module("." + m, "palhm.mod") - intersect = set(self.backup_backends.keys()).intersection(loaded.backup_backends.keys()) - if intersect: - raise RuntimeError("Backup Backend conflict detected. ID(s): " + intersect) - self.backup_backends |= loaded.backup_backends + + if hasattr(loaded, "backup_backends"): + intersect = ( + set(self.backup_backends.keys()) + .intersection(loaded.backup_backends.keys())) + if intersect: + raise InvalidConfigError( + "Backup Backend conflict detected.", + intersect) + self.backup_backends |= loaded.backup_backends def get_vl (self) -> int: return self.vl @@ -123,36 +128,33 @@ class Exec (Runnable, ExecvHolder): b = int(m[2]) ret = range(a, b + 1) if len(ret) == 0: - raise ValueError("Invalid range: " + ec) + raise ValueError("Invalid range", ec) return ret m = re.match(Exec.RE.EC_RANGE.value, x) if m: op = str(m[1]) if m[1] else "==" n = int(m[2]) - match op: - case "==": return range(n, n + 1) - case "<": return range(0, n) - case "<=": return range(0, n + 1) - case ">": return range(n + 1, 256) - case ">=": return range(n, 256) - case _: raise RuntimeError("FIXME") + if op == "==": return range(n, n + 1) + elif op == "<": return range(0, n) + elif op == "<=": return range(0, n + 1) + elif op == ">": return range(n + 1, 256) + elif op == ">=": return range(n, 256) + else: raise RuntimeError("FIXME") - raise ValueError("Invalid value: " + ec) + raise ValueError("Invalid value", ec) def from_conf (ctx: GlobalContext, jobj: dict): - match jobj["type"]: - case "exec": - exec_id = jobj["exec-id"] - exec = ctx.exec_map[exec_id] - ret = exec - case "exec-append": - exec_id = jobj["exec-id"] - exec = ctx.exec_map[exec_id] - ret = exec.mkappend(jobj["argv"]) - case "exec-inline": - ret = Exec(jobj) - # case _: - # raise RuntimeError("FIXME") + if jobj["type"] == "exec": + exec_id = jobj["exec-id"] + exec = ctx.exec_map[exec_id] + ret = exec + elif jobj["type"] == "exec-append": + exec_id = jobj["exec-id"] + exec = ctx.exec_map[exec_id] + ret = exec.mkappend(jobj["argv"], jobj.get("env", {})) + elif jobj["type"] == "exec-inline": + ret = Exec(jobj) + else: raise RuntimeError("FIXME") ret.vl_stderr = jobj.get("vl-stderr", ret.vl_stderr) ret.vl_stdout = jobj.get("vl-stdout", ret.vl_stdout) @@ -173,9 +175,10 @@ class Exec (Runnable, ExecvHolder): self.vl_stderr = jobj.get("vl-stderr", Exec.DEFAULT.VL_STDERR.value) self.vl_stdout = jobj.get("vl-stdout", Exec.DEFAULT.VL_STDOUT.value) - def mkappend (self, extra_argv: Iterable): + def mkappend (self, extra_argv: Iterable, extra_env: dict = {}): ny = deepcopy(self) ny.argv.extend(extra_argv) + ny.env |= extra_env return ny def run (self, ctx: GlobalContext): @@ -201,8 +204,10 @@ class Exec (Runnable, ExecvHolder): def raise_oob_ec (self, ec: int): if not self.test_ec(ec): - raise ProcessError( - str(self) + " returned " + str(ec) + " not in " + str(self.ec)) + raise ChildProcessError( + str(self) + ": exit code test fail", + ec, + self.ec) def __str__ (self) -> str: return str().join( @@ -308,6 +313,18 @@ class NullBackupBackend (BackupBackend): def rotate (self, ctx: GlobalContext): pass + def _fs_usage_info (self, ctx: GlobalContext) -> Iterable[tuple[str, int]]: + return iter(()) + + def _excl_fs_copies (self, ctx: GlobalContext) -> set[str]: + return set[str]() + + def _rm_fs_recursive (self, ctx: GlobalContext, pl: Iterable[str]): + pass + + def _fs_quota_target (self, ctx: GlobalContext) -> tuple[Decimal, Decimal]: + return (Decimal('inf'), Decimal('inf')) + def __str__ (self): return "null" @@ -469,7 +486,7 @@ class RoutineTask (Task): def run (self, ctx: GlobalContext): for r in self.routines: - self.l.debug("run: " + str(r)) + self.l.info("run: " + str(r)) p = r.run(ctx) return self @@ -536,7 +553,7 @@ class DepResolv: def build (og_map: dict): def dive (og: BackupObjectGroup, obj_set: set, recurse_path: set): if og in recurse_path: - raise RecursionError("Circular reference detected.") + raise RecursionError("Circular reference detected whilst building dependency tree") recurse_path.add(og) obj_set.update(og.objects) @@ -611,7 +628,7 @@ class BackupTask (Task): for og in jobj_ogrps: ogid = og["id"] if ogid in og_map: - raise KeyError("Duplicate object group: " + ogid) + raise KeyError("Duplicate object group", ogid) og_map[ogid] = BackupObjectGroup() # load depends @@ -620,7 +637,8 @@ class BackupTask (Task): for depend in og.get("depends", iter(())): if ogid == depend: raise ReferenceError( - "An object group dependent on itself: " + ogid) + "An object group dependent on itself", + ogid) og_map[ogid].depends.add(og_map[depend]) # implicit default @@ -633,7 +651,7 @@ class BackupTask (Task): gid = jo.get("group", DEFAULT.OBJ_GRP.value) if path in obj_path_set: - raise KeyError("Duplicate path: " + path) + raise KeyError("Duplicate path", path) obj_path_set.add(path) og_map[gid].objects.append(BackupObject(jo, ctx)) @@ -651,6 +669,7 @@ class BackupTask (Task): for bo in self.dep_tree.avail_q: bo.bbctx = bbctx + self.l.info("make: " + bo.path) self.l.debug("despatch: " + str(bo)) fs.add(th_pool.submit(bo.run, ctx)) self.dep_tree.avail_q.clear() @@ -686,11 +705,11 @@ def merge_conf (a: dict, b: dict) -> dict: # exec conflicts c = chk_dup_id("execs", a, b) if c: - raise KeyError("Dup execs: " + c) + raise KeyError("Dup execs", c) # task conflicts c = chk_dup_id("tasks", a, b) if c: - raise KeyError("Dup tasks: " + c) + raise KeyError("Dup tasks", c) return a | b @@ -701,27 +720,38 @@ def load_jsonc (path: str) -> dict: stdin = in_file, capture_output = True) if p.returncode != 0: - raise FormatError(path) + raise ChildProcessError(p, path) return json.load(io.BytesIO(p.stdout)) def load_conf (path: str, inc_set: set = set()) -> dict: - if path in inc_set: - raise ReferenceError("Config included multiple times: " + path) - inc_set.add(path) + JSONC_EXT = ".jsonc" - if path.endswith(".jsonc"): - jobj = load_jsonc(path) + rpath = os.path.realpath(path, strict = True) + if rpath in inc_set: + raise RecursionError("Config already included", rpath) + inc_set.add(rpath) + + if rpath[-len(JSONC_EXT):].lower() == JSONC_EXT: + jobj = load_jsonc(rpath) else: - with open(path) as file: + with open(rpath) as file: jobj = json.load(file) # TODO: do schema validation + # pushd + saved_cwd = os.getcwd() + dn = os.path.dirname(rpath) + os.chdir(dn) + for i in jobj.get("include", iter(())): inc_conf = load_conf(i, inc_set) jobj = merge_conf(jobj, inc_conf) + # popd + os.chdir(saved_cwd) + return jobj def setup_conf (jobj: dict) -> GlobalContext: diff --git a/src/palhm/exceptions.py b/src/palhm/exceptions.py new file mode 100644 index 0000000..f63f2f9 --- /dev/null +++ b/src/palhm/exceptions.py @@ -0,0 +1,2 @@ +class InvalidConfigError (Exception): ... +class APIFailError (Exception): ... diff --git a/src/palhm/mod/aws.py b/src/palhm/mod/aws.py index fcb16f1..01fb8bc 100644 --- a/src/palhm/mod/aws.py +++ b/src/palhm/mod/aws.py @@ -7,6 +7,7 @@ from typing import Callable, Iterable import boto3 import botocore from palhm import BackupBackend, Exec, GlobalContext +from palhm.exceptions import APIFailError class CONST (Enum): @@ -50,10 +51,13 @@ class S3BackupBackend (BackupBackend): Key = self.cur_backup_key) sleep(1) # Make sure we don't proceed - raise FileExistsError(self.cur_backup_uri) + raise FileExistsError( + "Failed to set up a backup dir. Check the prefix function", + self.cur_backup_uri) except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] != "404": # expected status code - raise + c = e.response["Error"]["Code"] + if c != "404": # expected status code + raise APIFailError("Unexpected status code", c) return super().open(ctx) @@ -125,8 +129,9 @@ class S3BackupBackend (BackupBackend): o_key = i["Key"] o_size = i.get("Size", 0) if not o_key.startswith(self.root_key): - raise RuntimeError("The endpoint returned an object " + - "irrelevant to the request: " + o_key) + raise APIFailError( + "The endpoint returned an object irrelevant to the request", + o_key) l = o_key.find("/", len(prefix)) if l >= 0: @@ -200,7 +205,7 @@ class S3BackupBackend (BackupBackend): def rotate (self, ctx: GlobalContext): ret = super()._do_fs_rotate(ctx) - if self.sc_rot: + if self.sc_rot and self.sc_rot != self.sc_sink: def chsc (k): self.client.copy_object( Bucket = self.bucket, |