aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Timber <dxdt@dev.snart.me>2022-05-13 14:45:59 +0800
committerDavid Timber <dxdt@dev.snart.me>2022-05-13 14:45:59 +0800
commit515bf01a057f0b40d89c6b7b247eb4e2fc19d1b7 (patch)
tree1a625d2a85b858227c3bd67955da3da90be49bda
parenta01c87416b241315a9268bb4eb5206ade8328069 (diff)
Impl ...
- launch.json: change debug cwd to the project root dir - Add subcmd "mods" - Docs - Tidy up sample and debug config files - Change core exec - 'dnf-list-instaled' -> 'rpm-list-installed' as dnf does not work on ro fs - Accept the exit code 1 from tar(allow live fs change) - Add the generic sample config - Fix 'run' subcmd not accepting empty task-id - Change module loading: modules are not required to have the 'backup_backends' var - Reduce required Python version by removing the use of match ... case - Fix 'exec-append' not taking 'env' into account - Remove use of exceptions from irrelevant packages - Fix unimpl methods of NullBackupBackend - Tidy up instantiation of raised exceptions - Change "include" behaviour - Relative config paths are now resolved like #include C preprocessor - Fix bug where "include" circular ref checked is not done with absolute paths of config files - Add own exception hierachy - aws-s3: change storage class only when "rot-storage-class" is different from "sink-storage-class"
-rw-r--r--.vscode/launch.json30
-rw-r--r--README.md247
-rw-r--r--doc/config-fmt.md369
-rw-r--r--src/conf/py-debug/aws.jsonc (renamed from src/conf/py-debug/aws.sample.jsonc)4
l---------src/conf/py-debug/conf.d1
-rw-r--r--src/conf/py-debug/conf.d/core.jsonc44
-rw-r--r--src/conf/py-debug/localfs.jsonc (renamed from src/conf/py-debug/localfs.sample.jsonc)6
-rw-r--r--src/conf/py-debug/null.jsonc141
-rw-r--r--src/conf/py-debug/null.sample.jsonc140
l---------src/conf/py-debug/palhm.jsonc1
-rw-r--r--src/conf/py-sample/conf.d/core.json41
-rw-r--r--src/conf/py-sample/sample.jsonc127
-rwxr-xr-xsrc/palhm.py74
-rw-r--r--src/palhm/__init__.py120
-rw-r--r--src/palhm/exceptions.py2
-rw-r--r--src/palhm/mod/aws.py17
16 files changed, 1098 insertions, 266 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json
index d126290..2d8e2c8 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -8,13 +8,19 @@
"name": "palhm config",
"type": "python",
"request": "launch",
- "cwd": "${workspaceFolder}/src",
- "program": "palhm.py",
- "args": [
- "-f",
- "conf/py-debug/palhm.jsonc",
- "config"
- ],
+ "cwd": "${workspaceFolder}",
+ "program": "src/palhm.py",
+ "args": [ "-f", "src/conf/py-debug/palhm.jsonc", "config" ],
+ "console": "integratedTerminal",
+ "justMyCode": true
+ },
+ {
+ "name": "palhm mods",
+ "type": "python",
+ "request": "launch",
+ "cwd": "${workspaceFolder}",
+ "program": "src/palhm.py",
+ "args": [ "mods" ],
"console": "integratedTerminal",
"justMyCode": true
},
@@ -22,13 +28,9 @@
"name": "palhm run default",
"type": "python",
"request": "launch",
- "cwd": "${workspaceFolder}/src",
- "program": "palhm.py",
- "args": [
- "-f",
- "conf/py-debug/palhm.jsonc",
- "run"
- ],
+ "cwd": "${workspaceFolder}",
+ "program": "src/palhm.py",
+ "args": [ "-f", "src/conf/py-debug/palhm.jsonc", "run" ],
"console": "integratedTerminal",
"justMyCode": true
}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..30939c2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,247 @@
+# Periodic Automatic Live Host Maintenance (PALHM)
+This is a script that automates periodic maintenance of a machine. PALHM covers
+a routinely sequential command run as well as "hot" or "live" back up of the
+running host to a backend of your choice.
+
+PALHM addresses problems of the traditional lazy method of making a copy of the
+entirety of drives.
+
+* Use of high-level data dump tools like mysqldump and slapcat
+* Not including data obtainable from the modern package manager such as the
+ contents of /usr to reduce cost
+* Dump of metadata crucial when restoring from backup via use of tools like
+ lsblk
+
+The safest way to back up has always been by getting the system offline and
+tar'ing the file system or making an image of the storage device. This may not
+be practical in set ups where downtime is unacceptable or allocating more
+resources for a backup task is not cost-efficient. This is where this script
+comes in to play.
+
+## TL;DR
+Goto [#Examples](#examples).
+
+## Routine Task
+The Routine Task is a set of routines that are executed sequentially. It can
+consist of commands(Execs) and other previously defined tasks. Routine Tasks are
+absolute basic - you may incorporate custom shell scripts or other executables
+to do complex routines.
+
+## Backup Task
+PALHM supports backup on different storage backends. It also automates rotation
+of backup copies on the supported storage backends. **aws-s3** and **localfs**
+are currently implemented. You may incorporate localfs to store backups on NFS
+or Samba mount points. The special **null** backend is for testing purposes.
+
+The files produced as end product of backup are called "Backup Objects". The
+Backup Objects have two essential attributes.
+
+* **pipeline**: commands used to generate the backup output file
+* **path**: path to the output file on the backend
+
+For example, this object definition is for a mysql data dump compressed in zstd
+and encrypted using a public key id "backup-pub-key" named as
+"all-db.sql.zstd.pgp".
+
+```jsonc
+{
+ "path": "all-db.sql.zstd.pgp",
+ "pipeline": [
+ { "type": "exec-inline", "argv": [ "/bin/mysqldump", "-uroot", "--all-databases" ] },
+ { "type": "exec-inline", "argv": [ "/bin/zstd" ] },
+ { "type": "exec-inline", "argv": [ "/bin/gpg", "-e", "-r", "backup-pub-key", "--compress-algo", "none" ] }
+ ]
+}
+```
+
+This is equivalent of doing this from the shell
+
+```sh
+mysqldump -uroot --all-databases | zstd | gpg -e -r backup-pub-key --compress-algo none > all-db.sql.zstd.pgp
+```
+
+except that the output file can be placed on the cloud service depending on the
+backend used. The frequently used commands like "compression filters" are
+defined in the core
+config([conf.d/core.json](src/conf/py-sample/conf.d/core.json)) as Exec
+definitions.
+
+### Backup Object Path
+The final path for a Backup Object is formulated as follows.
+
+```
+localfs:
+ /media/backup/localhost/2022-05-01T06:59:17+00:00/all-db.sql.zstd.pgp
+| ROOT | PREFIX | PATH |
+
+aws-s3:
+ s3://your-s3-bucket/backup/your-host/2022-05-01T06:59:17+00:00/all-db.sql.zstd.pgp
+ | BUCKET | ROOT | PREFIX | PATH |
+```
+
+| ATTR | DESC |
+| - | - |
+| ROOT | The root directory for backup |
+| PREFIX | The name of the backup |
+| PATH | The output path of the backup object |
+
+The default format of PREFIX is the output of `date --utc --iso-8601=seconds`.
+Backup rotation is performed using PREFIX. The PREFIX must be based on values
+that, when sorted in ascending order, the oldest backup should appear first.
+
+PATH may contain the directory separator("/" or "\\"). The backend may or may
+not support this. The localfs backend handles this by doing `mkdir -p` on path
+before creating a "sink" for output files. Using "/" for PATH on Windows will
+fail as per NTFS limitation. The aws-s3 backend will pass the directory
+separator "/" through to Boto3 API and sub directory objects will be created
+implicitly.
+
+### Backend-param
+The parameters specific to backup backends can be set using backend-param. Here
+are parameters commonly appear across backends.
+
+* root: (string) the path to the backup root
+* nb-copy-limit: (decimal) the number of most recent backups to keep
+* root-size-limit: (decimal) the total size of the backup root in bytes
+* prefix: (TODO) reserved for future
+
+The value of the decimal type is either a JSON number or a string that
+represents a decimal number. The IEEE754 infinity representation("inf",
+"Infinity", "-inf" or "-Infinity") can be used for *nb-copy-limit* and
+*root-size-limit* to disable both or either of the attributes. The decimal type
+is not affected by the limit of IEEE754 type(the 2^53 integer part). The
+fractional part of the numbers are ignored as they are compared against the
+integers.
+
+#### Localfs
+```jsonc
+{
+ "tasks": [
+ {
+ "id": "backup",
+ "type": "backup",
+ "backend": "localfs",
+ "backend-param": {
+ "root": "/media/backup/localhost", // (required)
+ "dmode": "755", // (optional) mode for new directories
+ "fmode": "644", // (optional) mode for new files
+ "nb-copy-limit": "Infinity", // (optional)
+ "root-size-limit": "Infinity" // (optional)
+ },
+ "object-groups": [ /* ... */ ],
+ "objects": [ /* ... */ ]
+ }
+ ]
+}
+```
+
+#### aws-s3
+```jsonc
+{
+ "tasks": [
+ {
+ "id": "backup",
+ "type": "backup",
+ "backend": "aws-s3",
+ "backend-param": {
+ "profile": "default", // (optional) AWS client profile. Defaults to "default"
+ "bucket": "palhm.test", // (required) S3 bucket name
+ "root": "/palhm/backup", // (required)
+ "sink-storage-class": "STANDARD", // (optional) storage class for new uploads
+ "rot-storage-class": "STANDARD", // (optional) storage class for final uploads
+ "nb-copy-limit": "Infinity", // (optional)
+ "root-size-limit": "Infinity" // (optional)
+ },
+ "object-groups": [ /* ... */ ],
+ "objects": [ /* ... */ ]
+ }
+ ]
+}
+```
+
+For profiles configured for root, see `~/.aws/config`. Run `aws configure help`
+for more info.
+
+For possible values for storage class, run `aws s3 cp help`.
+
+If you wish to keep backup copies in Glacier, you may want to upload backup
+objects as STANDARD-IA first and change the storage class to GLACIER on the
+rotate stage because in the event of failure, PALHM rolls back the process by
+deleting objects already uploaded to the bucket. You may be charged for the
+objects stored in Glacier as the minimum storage duration is 90 days(as of
+2022). The **rot-storage-class** attribute serves this very purpose. More info
+on [the pricing page](https://aws.amazon.com/s3/pricing/).
+
+### Backup Object Dependency Tree
+Backup objects can be configured to form a dependency tree like Makefile
+objects. By default, PALHM builds backup files simultaneously(*nb-workers*). On
+some environments, this may not be desirable, especially on system with
+HDDs[^1]. You can tune this behaviour by either ...
+
+* Setting *nb-workers* to 1
+* Grouping the backup objects so that the objects from one storage device are
+ built sequentially
+
+Say the system has one storage device that holds all data necessary for service
+and another one on which OS is installed. The system services static HTTP, MySQL
+and OpenLDAP. All the backup tasks need to be grouped separately in order to
+reduce IO seek time.
+
+```jsonc
+{
+ "object-groups": [
+ { "id": "root" },
+ { "id": "http" },
+ { "id": "sql", "depends": [ "http" ] },
+ { "id": "ldap", "depends": [ "sql" ] },
+ ]
+}
+```
+
+On start, the objects in "root" and "http" groups will be built simultanesouly.
+On completion of all the objects in "http", the objects in the group "sql" and
+"ldap" will be built in order.
+
+## Config JSON Format
+See [doc/config-fmt.md](doc/config-fmt.md).
+
+## Getting Started
+### Prerequisites
+* Python 3.7 or higher
+* `json_reformat` command provided by **yajl** for jsonc support (optional)
+* **awscli** and **boto3** for aws-s3 backup backend (optional)
+
+### Examples
+* [localfs.sample.jsonc](src/conf/py-sample/localfs.sample.jsonc)
+* [aws.sample.jsonc](src/conf/py-sample/aws.sample.jsonc)
+
+## Files
+| Path | Desc |
+| - | - |
+| /etc/palhm/palhm.conf | The default config path |
+| /etc/palhm/conf.d/core.json | Commonly used Exec and Prefix definitions |
+
+## Advanced
+### Testing Config
+When writing backup task, if you're worried about data loss caused by
+misconfiguration or vulnerabilities, you can use [systemd's
+sandboxing](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Sandboxing)
+to test out your config. The distro must be running Systemd in order for this to
+work.
+
+```sh
+systemd-run -qP -p Nice=15 -p ProtectSystem=strict -p ReadOnlyPaths=/ -p PrivateDevices=true --wait /usr/local/bin/palhm.py run backup
+```
+
+If your config runs on a read-only file system, it's safe to assume that the
+config does not require a read-write file system in order to run. This means
+your config does not modify the file system.
+
+Also, you can always do a dry run of your backup task by setting the backend to
+"**null**".
+
+## TODO
+* JSON schema validation
+
+## Footnotes
+[^1]: Even with SSDs, disrupting sequential reads decreases overall performance
diff --git a/doc/config-fmt.md b/doc/config-fmt.md
new file mode 100644
index 0000000..8487f6f
--- /dev/null
+++ b/doc/config-fmt.md
@@ -0,0 +1,369 @@
+# PALHM JSON Config Format
+PALHM is configured with JSON documents. PALHM supports the original JSON and
+JSONC(the JSON with comments). PALHM handles jsonc documents by converting
+them to json by an external command. PALHM distinguishes between these two
+format by the file name extension. The conversion only occurs when the name of
+the config file ends with `.jsonc`.
+
+To support the IEEE754 infinity, the accepated data types for some values are
+both string and number. The former will be parsed by the relevant type class
+before they are processed.
+
+## Structure
+The format of the object feature table.
+
+| ATTR | MEANING |
+| - | - |
+| Key | The key string of the object |
+| Value | The value of the object |
+| Required | Whether the object is required as the member of the parent object |
+| Include | Include behaviour. "MERGE" or "OVERRIDE" |
+| Range | Range of the value if NUMERICAL |
+
+### include
+| ATTR | DESC |
+| - | - |
+| Key | "include" |
+| Value | ARRAY of STRINGs |
+| Required | NO |
+| Include | MERGE |
+
+```jsonc
+{
+ "include": [ "/etc/palhm/conf.d/core.json" ]
+}
+```
+
+The array is the list of paths to other config files to include in the current
+config. The config files in the array are merged into the config. No two exec
+definitions or task with the same id can exist in included config files. The
+global settings such as "vl" and "nb-workers" will be silently overridden if
+they are defined in the subsequent config files. Absolute or relative paths can
+be used. The relative paths are resolved in the same manner as the `#include`
+preprocessor in C: if used in the config file passed to PALHM via the `-f`
+option, the paths will be resolved from the current working directory of the
+process. Otherwise(if used in the subsequent includes), the paths will be
+resolved from the directory of the current config file. A config file cannot be
+included twice as PALHM detects circular inclusion by keeping track of the
+included config files.
+
+### modules
+| ATTR | DESC |
+| - | - |
+| Key | "modules" |
+| Value | ARRAY of STRINGs |
+| Required | NO |
+| Include | MERGE |
+
+The array is the list of PALHM modules to import. Run `palhm mods` for the
+list of modules installed on the system.
+
+```jsonc
+{
+ "modules": [ "aws" ]
+}
+```
+
+### nb-workers
+| ATTR | DESC |
+| - | - |
+| Key | "nb-workers" |
+| Value | INTEGER |
+| Required | NO |
+| Include | OVERRIDE |
+| Range | (-inf, inf) |
+
+```jsonc
+{
+ /* The number of threads the process is restricted to. Usually same as
+ * $(nproc)
+ */
+ "nb-workers": 0,
+ // Use Python default
+ "nb-workers": -1,
+ // No concurrency
+ "nb-workers": 1
+}
+```
+
+The number of maximum worker threads. Use a negative integer to use the Python
+default value(see
+[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor)).
+Use zero to set it to the number of threads the process is allowed to
+utilise(see [os.sched_getaffinity()](https://docs.python.org/3/library/os.html?highlight=sched_getaffinity#os.sched_getaffinity)).
+Use a positive integer to restrict the number of worker threads.
+
+### vl
+| ATTR | DESC |
+| - | - |
+| Key | "vl" |
+| Value | INTEGER |
+| Required | NO |
+| Include | OVERRIDE |
+| Range | (-inf, inf) |
+
+```jsonc
+{
+ "vl": 0, // CRITICAL
+ "vl": 1, // ERROR
+ "vl": 2, // WARNING
+ "vl": 3, // INFO
+ "vl": 4, // DEBUG + 0
+ "vl": 5, // DEBUG + 1
+ "vl": 6 // DEBUG + 2
+ /* ... */
+}
+```
+
+The verbosity level, the higher the more verbose.The value is translated from
+PALHM's "the higher the more verbose" scheme to Python's [logging facility
+logging level](https://docs.python.org/3/library/logging.html#logging-levels).
+Defaults to 3.
+
+You don't really need this. THe best practice is using the default value for the
+config and using the `-q` option for a crond or timer unit. When debugging info
+is required, simply increase the verbosity with the `-v` option.
+
+### Execs
+| ATTR | DESC |
+| - | - |
+| Key | "execs" |
+| Value | ARRAY of [Exec Definition Object](#Exec_Definition_Object)s |
+| Required | NO |
+| Include | MERGE |
+
+#### Exec Definition Object
+* "id": id string **(required)**
+* "argv": argument vector **(required)**
+* "env": additional environment variable mapping. The value must be an object
+ whose members are string to string mapping. The key represents the name of the
+ variable and the value the value of the variable.
+* "ec": valid exit code range. Defaults to "==0"
+ * Inclusive range format: &lt;MIN&gt;-&lt;MAX&gt;
+ * Comparator format: \[C\]&lt;N&gt;
+ * Where
+ * MIN: minimum inclusive valid exit code
+ * MAX: maximum inclusive valid exit code
+ * N: integer for comparison
+ * C: comparator. One of &lt;, &lt;=, &gt;, &gt;= or ==. Defaults to ==
+ * Examples
+ * ">=0": ignore exit code(always success)
+ * "<2" or "0-1": accept exit code 0 and 1
+ * "1": accept exit code 1 only
+ * "vl-stderr": verbosity level of stderr from the process. Defaults to 1
+ * "vl-stdout": verbosity level of stdout from the process. Defaults to 3
+
+ Note that stdout and stderr from the process are not passed to the logger.
+ "vl-stderr" and "vl-stdout" are merely used to determine whether the outputs
+ from the process have to be redirected to `/dev/null` or the stdio of the PALHM
+ process.
+
+```jsonc
+{
+ "id": "pgp-enc",
+ "argv": [ "/bin/pgp", "-e", "-r", "backup", "--compress-algo", "none" ],
+ "env": {
+ "LC_ALL": "C",
+ "GNUPGHOME": "~/gnupg"
+ },
+ "ec": "==0",
+ "vl-stderr": 1,
+ "vl-stdout": 3
+}
+```
+
+### Tasks
+| ATTR | DESC |
+| - | - |
+| Key | "tasks" |
+| Value | ARRAY of OBJECTs |
+| Required | NO |
+| Include | MERGE |
+
+#### Predefined Pipeline Exec Object
+* "type": "exec" **(required)**
+* "exec-id": id of the Exec Definition Object **(required)**
+
+```jsonc
+{
+ "type": "exec",
+ "exec-id": "filter-zstd-parallel"
+}
+```
+
+#### Appended Pipeline Exec Object
+* "type": "exec-inline" **(required)**
+* "exec-id": id of the Exec Definition Object **(required)**
+* "argv": array of string, which is the argument vector to append **(required)**
+* "env": environment variable mapping object. See [#Exec Definition
+ Object](#Exec_Definition_Object)
+
+```jsonc
+{
+ "type": "exec-append",
+ "exec-id": "tar",
+ "argv": [ "-C", "/", "etc", "home", "root", "var" ],
+ "env": { "LC_ALL": "C" }
+}
+```
+
+#### Inline Pipeline Exec Object
+Same as [#Exec Definition Object](#Exec_Definition_Object), except that this
+object does not require the "id" member.
+
+```jsonc
+{
+ "type": "exec-inline",
+ "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ]
+}
+```
+
+#### Backup Task Definition Object
+* "id": id string **(required)**
+* "type": "backup" **(required)**
+* "backend": see [README.md#Backend-param](../README.md#Backend-param)
+ **(required)**
+* "backend-param": see [README.md#Backend-param](../README.md#Backend-param)
+* "object-groups": array of [Backup Object Group Definition
+ Objects](#Backup_Object_Group_Definition_Object)
+* "objects": array of [Backup Object Definition
+ Objects](#Backup_Object_Definition_Object)
+
+```jsonc
+{
+ "id": "root-backup",
+ "type": "backup",
+ "backend": "null",
+ "backend-param": { /* ... */ },
+ "object-groups": { /* ... */ },
+ "objects": [ /* ... */ ]
+}
+```
+
+##### Backup Object Group Definition Object
+* "id": id string. Valid within the backup task **(required)**
+* "depends": array of other object group id strings on which the object group is
+ dependent. The other groups must appear before the group definition.
+
+```jsonc
+{
+ "object-groups": [
+ { "id": "root" },
+ { "id": "http" },
+ { "id": "sql", "depends": [ "http" ] },
+ { "id": "ldap", "depends": [ "sql" ] },
+ ]
+}
+```
+
+##### Backup Object Definition Object
+* "path": path to the backup output on the backend **(required)**
+* "group": the id of a [Backup Object Group Definition
+ Object](#Backup_Object_Group_Definition_Object)
+* "pipeline": array of
+ * [Predefined Pipeline Exec Objects](#Predefined_Pipeline_Exec_Object)
+ * [Appended Pipeline Exec Objects](#Appended_Pipeline_Exec_Object)
+ * [Inline Pipeline Exec Objects](#Inline_Pipeline_Exec_Object)
+
+```jsonc
+{
+ "path": "srv.tar.zstd",
+ "group": "tar-1",
+ "pipeline": [
+ {
+ "type": "exec-append",
+ "exec-id": "tar",
+ "argv": [ "-C", "/", "srv" ]
+ },
+ { "type": "exec", "exec-id": "filter-zstd-parallel" }
+ ]
+}
+```
+
+A set of child processes for the backup ouput file will be created using the
+Exec objects in the array.
+
+The PALHM process waits for any of the child process in the pipeline. The exit
+codes returned from the child processes will be tested as they exits one by one.
+If PALHM encounters a child process returns an exit code that does not fall into
+the acceptable exit code range, it will roll back the current copy of backup
+before raising the exception. In this case, the exit code from the rest of child
+processes are not processed[^1].
+
+#### Routine Task Definition Object
+* "id": id string **(required)**
+* "type": "routine" **(required)**
+* "routine": array of the id strings of
+ * [Predefined Pipeline Exec Objects](#Predefined_Pipeline_Exec_Object)
+ * [Appended Pipeline Exec Objects](#Appended_Pipeline_Exec_Object)
+ * [Inline Pipeline Exec Objects](#Inline_Pipeline_Exec_Object)
+ * [Builtin Function Objects](#Builtin_Function_Object)
+ * [Task Pointer Objects](#Task_Pointer_Object)
+
+```jsonc
+[
+ {
+ "id": "update",
+ "type": "routine",
+ "routine": [
+ {
+ "type": "exec-inline",
+ "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ]
+ },
+ {
+ "type": "exec-inline",
+ "argv": [ "/bin/sa-update" ]
+ }
+ ]
+ },
+ {
+ "id": "reboot",
+ "type": "routine",
+ "routine": [
+ {
+ "type": "builtin",
+ "builtin-id": "sigmask",
+ "param": [ { "action": "block", "sig": [ "INT", "TERM" ] } ]
+ },
+ {
+ "type": "exec-inline",
+ "argv": [ "/sbin/reboot" ]
+ }
+ ]
+ },
+ {
+ "id": "default",
+ "type": "routine",
+ "routine": [
+ { "type": "task", "task-id": "update" },
+ { "type": "task", "task-id": "reboot" }
+ ]
+ }
+]
+```
+
+##### Task Pointer Object
+* "type": "task"
+* "task-id": id string of
+ * [Backup Task Definition Object](#Backup_Task_Definition_Object)
+ * [Routine Task Definition Object](#Routine_Task_Definition_Object)
+
+##### Builtin Function Object
+* "type": "builtin"
+* "builtin-id": "sigmask"
+* "param": function-specific param object
+ * [sigmask Builtin Function Param](#sigmask_Builtin_Function_Param)
+
+##### sigmask Builtin Function Param
+The sigmask builtin function is the direct interface to
+[pthread_sigmask()](https://docs.python.org/3/library/signal.html?highlight=sigmask#signal.pthread_sigmask).
+Run `kill -l` for valid signals on your system. This builtin function can only
+be used on Unix systems.
+
+* "action": "block" or "unblock"
+* "sig": array of signal strings. A numberic value and the name of a signal with
+ or without "SIG" prefix are accepted. Valid values include "TERM", "SIGTERM",
+ 15, "INT", "SIGINT" and "2"
+
+## Footnotes
+[^1]: they're most likely 141(terminated by SIGPIPE)
diff --git a/src/conf/py-debug/aws.sample.jsonc b/src/conf/py-debug/aws.jsonc
index 46ad562..df9a63a 100644
--- a/src/conf/py-debug/aws.sample.jsonc
+++ b/src/conf/py-debug/aws.jsonc
@@ -1,6 +1,6 @@
// PALHM Instance Config
{
- "include": [ "conf/py-debug/conf.d/core.jsonc" ],
+ "include": [ "conf.d/core.json" ],
"modules": [ "aws" ],
"nb-workers": 0, // assumed $(nproc) - default
// "nb-workers": 1, // to disable concurrent task despatch
@@ -48,7 +48,7 @@
"path": "pm-list.gz",
"group": "pre-start",
"pipeline": [
- { "type": "exec", "exec-id": "dnf-list-installed" },
+ { "type": "exec", "exec-id": "rpm-list-installed" },
{ "type": "exec", "exec-id": "filter-gzip-plain" }
]
},
diff --git a/src/conf/py-debug/conf.d b/src/conf/py-debug/conf.d
new file mode 120000
index 0000000..a32163d
--- /dev/null
+++ b/src/conf/py-debug/conf.d
@@ -0,0 +1 @@
+../py-sample/conf.d \ No newline at end of file
diff --git a/src/conf/py-debug/conf.d/core.jsonc b/src/conf/py-debug/conf.d/core.jsonc
deleted file mode 100644
index 4afe7f5..0000000
--- a/src/conf/py-debug/conf.d/core.jsonc
+++ /dev/null
@@ -1,44 +0,0 @@
-// PALHM Core Config
-{
- "execs": [
- // {
- // "id": "Exec ID",
- // "argv": [ "cmd", "--option1=opt1_val", "-o", "opt2_val" ],
- // "env": { "NAME": "VAL" },
- // "ec": "0", // this is assumed
- // "ec": "0-127", // inclusive range (not terminated by a signal)
- // "ec": "<1", // range (only 0)
- // "ec": "<=1", // range (0 and 1)
- // "ec": ">0", // range (always fail)
- // "ec": ">=0", // range (only 0)
- // "vl-stderr": 1 // verbosity level of stderr produced by this process
- // verbosity level of stderr produced by this process. Ignored if used
- // as part of pipeline
- // "vl-stdout": 2
- // },
- {
- "id": "tar",
- "argv": [ "/usr/bin/tar", "--xattrs", "--selinux" ]
- },
- {
- "id": "filter-xz-parallel",
- "argv": [ "/usr/bin/xz", "-T0" ]
- },
- {
- "id": "filter-gzip-plain",
- "argv": [ "/usr/bin/gzip" ]
- },
- {
- "id": "filter-zstd-plain",
- "argv": [ "/usr/bin/zstd" ]
- },
- {
- "id": "dnf-list-installed",
- "argv": [ "/usr/bin/dnf", "-yq", "list", "installed" ]
- },
- {
- "id": "lsblk-all-json",
- "argv": [ "/usr/bin/lsblk", "-JbOa" ]
- }
- ]
-}
diff --git a/src/conf/py-debug/localfs.sample.jsonc b/src/conf/py-debug/localfs.jsonc
index ec12808..a33060d 100644
--- a/src/conf/py-debug/localfs.sample.jsonc
+++ b/src/conf/py-debug/localfs.jsonc
@@ -1,12 +1,12 @@
// PALHM Instance Config
{
- "include": [ "conf/py-debug/conf.d/core.jsonc" ],
+ "include": [ "conf.d/core.json" ],
"nb-workers": 0, // assumed $(nproc) - default
// "nb-workers": 1, // to disable concurrent task despatch
// To unlimit the number of workers.
// Does not fail on resource alloc failure.
// "nb-workers": -1,
- "vl": 4,
+ "vl": 3,
"tasks": [
{
"id": "backup",
@@ -45,7 +45,7 @@
"path": "pm-list.gz",
"group": "pre-start",
"pipeline": [
- { "type": "exec", "exec-id": "dnf-list-installed" },
+ { "type": "exec", "exec-id": "rpm-list-installed" },
{ "type": "exec", "exec-id": "filter-gzip-plain" }
]
},
diff --git a/src/conf/py-debug/null.jsonc b/src/conf/py-debug/null.jsonc
new file mode 100644
index 0000000..b5ce9f8
--- /dev/null
+++ b/src/conf/py-debug/null.jsonc
@@ -0,0 +1,141 @@
+{
+ "include": [ "conf.d/core.json" ],
+ "nb-workers": 0, // assumed $(nproc)
+ // "nb-workers": 1, // to disable concurrent task despatch
+ // "nb-workers": -1, // to unlimit the number of workers.
+ "vl": 3,
+ "tasks": [
+ {
+ "id": "backup",
+ "type": "backup",
+ "backend": "null",
+ "object-groups": [
+ { "id": "pre-start" },
+ {
+ "id": "data-dump",
+ "depends": [ "pre-start" ]
+ },
+ {
+ "id": "tar-0",
+ "depends": [ "data-dump" ]
+ },
+ {
+ "id": "tar-1",
+ "depends": [ "data-dump" ]
+ }
+ ],
+ "objects": [
+ {
+ "path": "pm-list.zstd",
+ "group": "pre-start",
+ "pipeline": [
+ { "type": "exec", "exec-id": "rpm-list-installed" },
+ { "type": "exec", "exec-id": "filter-zstd-plain" }
+ ]
+ },
+ {
+ "path": "lsblk.json.zstd",
+ "group": "pre-start",
+ "pipeline": [
+ { "type": "exec", "exec-id": "lsblk-all-json" },
+ { "type": "exec", "exec-id": "filter-zstd-plain" }
+ ]
+ },
+ {
+ "path": "db.sql.zstd",
+ "group": "data-dump",
+ "pipeline": [
+ {
+ "type": "exec-inline",
+ "argv": [
+ "/bin/mysqldump",
+ "-uroot",
+ "--all-databases"
+ ]
+ },
+ { "type": "exec", "exec-id": "filter-zstd-parallel" }
+ ]
+ },
+ {
+ "path": "root.tar.zstd",
+ "group": "tar-0",
+ "pipeline": [
+ {
+ "type": "exec-append",
+ "exec-id": "tar",
+ "argv": [
+ "-C",
+ "/",
+ "/etc",
+ "/home",
+ "/root",
+ "/var"
+ ]
+ },
+ { "type": "exec", "exec-id": "filter-zstd-parallel" }
+ ]
+ },
+ {
+ "path": "srv.tar.zstd",
+ "group": "tar-1",
+ "pipeline": [
+ {
+ "type": "exec-append",
+ "exec-id": "tar",
+ "argv": [
+ "-C",
+ "/",
+ "/srv"
+ ]
+ },
+ { "type": "exec", "exec-id": "filter-zstd-parallel" }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "update",
+ "type": "routine",
+ "routine": [
+ {
+ "type": "exec-inline",
+ "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ]
+ },
+ {
+ "type": "exec-inline",
+ "argv": [ "/bin/sa-update" ]
+ }
+ ]
+ },
+ {
+ "id": "reboot",
+ "type": "routine",
+ "routine": [
+ {
+/*
+ * Block SIGTERM from systemd/init.d so PALHM can exit gracefully after issuing
+ * reboot.
+ */
+ "type": "builtin",
+ "builtin-id": "sigmask",
+ "param": [
+ { "action": "block", "sig": [ "TERM" ] }
+ ]
+ },
+ {
+ "type": "exec-inline",
+ "argv": [ "/sbin/reboot" ]
+ }
+ ]
+ },
+ {
+ "id": "default",
+ "type": "routine",
+ "routine": [
+ { "type": "task", "task-id": "backup" },
+ { "type": "task", "task-id": "update" },
+ { "type": "task", "task-id": "reboot" }
+ ]
+ }
+ ]
+}
diff --git a/src/conf/py-debug/null.sample.jsonc b/src/conf/py-debug/null.sample.jsonc
deleted file mode 100644
index a83de95..0000000
--- a/src/conf/py-debug/null.sample.jsonc
+++ /dev/null
@@ -1,140 +0,0 @@
-// PALHM Instance Config
-{
- "include": [ "conf/py-debug/conf.d/core.jsonc" ],
- "nb-workers": 1,
- "vl": 3,
- "tasks": [
- {
- "id": "backup",
- "type": "backup",
- "backend": "null",
- "object-groups": [
- { "id": "pre-start" },
- {
- "id": "data-dump",
- "depends": [ "pre-start" ]
- },
- {
- "id": "tar-media-0",
- "depends": [ "data-dump" ]
- },
- {
- "id": "tar-media-1",
- "depends": [ "data-dump" ]
- }
- ],
- "objects": [
- {
- "path": "pm-list.gz",
- "group": "pre-start",
- "pipeline": [
- { "type": "exec", "exec-id": "dnf-list-installed" },
- { "type": "exec", "exec-id": "filter-gzip-plain" }
- ]
- },
- {
- "path": "lsblk.json.gz",
- "group": "pre-start",
- "pipeline": [
- {
- "type": "exec-append",
- "exec-id": "lsblk-all-json",
- "argv": [ "-a" ]
- },
- { "type": "exec", "exec-id": "filter-gzip-plain" }
- ]
- },
- {
- "path": "random-dump.sql.xz",
- "group": "data-dump",
- "pipeline": [
- {
- "type": "exec-inline",
- "argv": [
- "/bin/dd",
- "if=/dev/urandom",
- "bs=4096",
- "count=512",
- "status=none"
- ]
- },
- { "type": "exec", "exec-id": "filter-xz-parallel" }
- ]
- },
- {
- "path": "random-dump.0.xz",
- "group": "tar-media-0",
- "pipeline": [
- {
- "type": "exec-inline",
- "argv": [
- "/bin/dd",
- "if=/dev/zero",
- "bs=4096",
- "count=512",
- "status=none"
- ]
- },
- { "type": "exec", "exec-id": "filter-xz-parallel" }
- ]
- },
- {
- "path": "random-dump.1.xz",
- "group": "tar-media-1",
- "pipeline": [
- {
- "type": "exec-inline",
- "argv": [
- "/bin/dd",
- "if=/dev/zero",
- "bs=4096",
- "count=512",
- "status=none"
- ]
- },
- { "type": "exec", "exec-id": "filter-xz-parallel" }
- ]
- }
- ]
- },
- {
- "id": "update",
- "type": "routine",
- "routine": [
- {
- "type": "exec-inline",
- "argv": [ "/bin/echo", "0" ]
- },
- {
- "type": "exec-inline",
- "argv": [ "/bin/sleep", "1" ]
- },
- {
- "type": "exec-inline",
- "argv": [ "/bin/echo", "1" ]
- }
- ]
- },
- {
- "id": "default",
- "type": "routine",
- "routine": [
- { "type": "task", "task-id": "backup" },
- { "type": "task", "task-id": "update" },
- {
- // Block SIGTERM from systemd/init.d so the program is not
- // affected by the reboot command.
- "type": "builtin",
- "builtin-id": "sigmask",
- "param": [
- { "action": "block", "sig": [ "TERM" ] }
- ]
- },
- {
- "type": "exec-inline",
- "argv": [ "/bin/true" ]
- }
- ]
- }
- ]
-}
diff --git a/src/conf/py-debug/palhm.jsonc b/src/conf/py-debug/palhm.jsonc
new file mode 120000
index 0000000..fb68baf
--- /dev/null
+++ b/src/conf/py-debug/palhm.jsonc
@@ -0,0 +1 @@
+aws.jsonc \ No newline at end of file
diff --git a/src/conf/py-sample/conf.d/core.json b/src/conf/py-sample/conf.d/core.json
new file mode 100644
index 0000000..46d3feb
--- /dev/null
+++ b/src/conf/py-sample/conf.d/core.json
@@ -0,0 +1,41 @@
+{
+ "execs": [
+ {
+ "id": "tar",
+ "argv": [ "/bin/tar", "--xattrs", "--selinux", "--warning=none", "-cf", "-" ],
+ "ec": "<2"
+ },
+ {
+ "id": "filter-xz-parallel",
+ "argv": [ "/bin/xz", "-T0" ]
+ },
+ {
+ "id": "filter-gzip-plain",
+ "argv": [ "/bin/gzip" ]
+ },
+ {
+ "id": "filter-zstd-plain",
+ "argv": [ "/bin/zstd" ]
+ },
+ {
+ "id": "filter-zstd-parallel",
+ "argv": [ "/bin/zstd", "-T0" ]
+ },
+ {
+ "id": "rpm-list-installed",
+ "argv": [ "/bin/rpm", "-qa" ]
+ },
+ {
+ "id": "dpkg-list-installed",
+ "argv": [ "/bin/dpkg-query", "-l" ]
+ },
+ {
+ "id": "lsblk-all-json",
+ "argv": [ "/bin/lsblk", "-JbOa" ]
+ },
+ {
+ "id": "os-release",
+ "argv": [ "/bin/cat", "/etc/os-release" ]
+ }
+ ]
+}
diff --git a/src/conf/py-sample/sample.jsonc b/src/conf/py-sample/sample.jsonc
new file mode 100644
index 0000000..f1c4501
--- /dev/null
+++ b/src/conf/py-sample/sample.jsonc
@@ -0,0 +1,127 @@
+{
+ "include": [ "/etc/palhm/conf.d/core.json" ],
+ // "modules": [ "aws" ],
+ "nb-workers": 0,
+ // "vl": 4,
+ "tasks": [
+ {
+ "id": "backup",
+ "type": "backup",
+ "backend": "null",
+ "backend-param": {},
+ "object-groups": [
+ { "id": "meta-run" },
+ {
+ "id": "data-dump",
+ "depends": [ "meta-run" ]
+ },
+ {
+ "id": "tar-root",
+ "depends": [ "data-dump" ]
+ }
+ ],
+ "objects": [
+ {
+ "path": "os-release",
+ "group": "meta-run",
+ "pipeline": [ { "type": "exec" , "exec-id": "os-release" } ]
+ },
+ {
+ "path": "pm-list.zstd",
+ "group": "meta-run",
+ "pipeline": [
+ { "type": "exec", "exec-id": "rpm-list-installed" },
+ { "type": "exec", "exec-id": "filter-zstd-plain" }
+ ]
+ },
+ {
+ "path": "lsblk.json.zstd",
+ "group": "meta-run",
+ "pipeline": [
+ { "type": "exec", "exec-id": "lsblk-all-json" },
+ { "type": "exec", "exec-id": "filter-zstd-plain" }
+ ]
+ },
+ // {
+ // "path": "db.sql.zstd",
+ // "group": "data-dump",
+ // "pipeline": [
+ // {
+ // "type": "exec-inline",
+ // "argv": [
+ // "/bin/mysqldump",
+ // "-uroot",
+ // "--all-databases"
+ // ]
+ // // "ec": "<=2" // don't fail when the DB is offline
+ // },
+ // { "type": "exec", "exec-id": "filter-zstd-parallel" }
+ // ]
+ // },
+ {
+ "path": "root.tar.zstd",
+ "group": "tar-root",
+ "pipeline": [
+ {
+ "type": "exec-append",
+ "exec-id": "tar",
+ "argv": [
+ "-C",
+ "/",
+ "etc",
+ "home",
+ "root",
+ "var"
+ ]
+ },
+ { "type": "exec", "exec-id": "filter-zstd-parallel" }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "update",
+ "type": "routine",
+ "routine": [
+ {
+ "type": "exec-inline",
+ "argv": [ "/bin/dnf", "--refresh", "-yq", "update" ]
+ }
+ // {
+ // "type": "exec-inline",
+ // "argv": [ "/bin/sa-update" ]
+ // }
+ ]
+ },
+ {
+ "id": "reboot",
+ "type": "routine",
+ "routine": [
+ {
+/*
+ * Block SIGTERM from systemd/init.d so PALHM can exit gracefully after issuing
+ * reboot.
+ */
+ "type": "builtin",
+ "builtin-id": "sigmask",
+ "param": [
+ { "action": "block", "sig": [ "TERM" ] }
+ ]
+ },
+ {
+ "type": "exec-inline",
+ "argv": [ "/sbin/reboot" ]
+ }
+ ]
+ },
+ {
+ "id": "default",
+ "type": "routine",
+ "routine": [
+ { "type": "task", "task-id": "backup" },
+ { "type": "task", "task-id": "update" },
+ { "type": "task", "task-id": "reboot" }
+ ]
+ }
+ ]
+}
diff --git a/src/palhm.py b/src/palhm.py
index f3f412b..722664e 100755
--- a/src/palhm.py
+++ b/src/palhm.py
@@ -1,5 +1,7 @@
#!/usr/bin/env python3
+import importlib
import logging
+import os
import sys
from abc import ABC, abstractmethod
from getopt import getopt
@@ -49,7 +51,7 @@ class RunCmd (Cmd):
def do_cmd (self):
ProgConf.alloc_ctx()
- if self.args:
+ if self.args and self.args[0]: # empty string as "default"
task = self.args[0]
else:
task = palhm.DEFAULT.RUN_TASK.value
@@ -64,6 +66,53 @@ class RunCmd (Cmd):
Run a task in config. Run the "''' + palhm.DEFAULT.RUN_TASK.value +
'''" task if [TASK] is not specified.''')
+class ModsCmd (Cmd):
+ def __init__ (self, *args, **kwargs):
+ pass
+
+ def _walk_mods (self, path: str):
+ def is_mod_dir (path: str) -> bool:
+ try:
+ for i in os.scandir(path):
+ if i.name.startswith("__init__.py"):
+ return True
+ except NotADirectoryError:
+ pass
+ return False
+
+ def is_mod_file (path: str) -> str:
+ if not os.path.isfile(path):
+ return None
+
+ try:
+ pos = path.rindex(".")
+ if path[pos + 1:].startswith("py"):
+ return os.path.basename(path[:pos])
+ except ValueError:
+ pass
+
+ for i in os.scandir(path):
+ if i.name.startswith("_"):
+ continue
+ elif is_mod_dir(i.path):
+ print(i.name)
+ self._walk_mods(i.path)
+ else:
+ name = is_mod_file(i.path)
+ if name:
+ print(name)
+
+ def do_cmd (self):
+ for i in importlib.util.find_spec("palhm.mod").submodule_search_locations:
+ self._walk_mods(i)
+
+ return 0
+
+ def print_help ():
+ print(
+"Usage: " + sys.argv[0] + " mods" + '''
+Prints the available modules to stdout.''')
+
class HelpCmd (Cmd):
def __init__ (self, optlist, args):
self.optlist = optlist
@@ -84,7 +133,7 @@ class HelpCmd (Cmd):
print(
"Usage: " + sys.argv[0] + " [options] CMD [command options ...]" + '''
Options:
- -q Set the verbosity level to 0(FATAL error only). Overrides config
+ -q Set the verbosity level to 0(CRITIAL). Overrides config
-v Increase the verbosity level by 1. Overrides config
-f FILE Load config from FILE instead of the hard-coded default
Config: ''' + ProgConf.conf + '''
@@ -92,14 +141,16 @@ Commands:
run run a task
config load config and print the contents
help [CMD] print this message and exit normally if [CMD] is not specified.
- Print usage of [CMD] otherwise''')
+ Print usage of [CMD] otherwise
+ mods list available modules''')
return 0
CmdMap = {
"config": ConfigCmd,
"run": RunCmd,
- "help": HelpCmd
+ "help": HelpCmd,
+ "mods": ModsCmd
}
optlist, args = getopt(sys.argv[1:], "qvf:")
@@ -115,14 +166,13 @@ if not args or not args[0] in CmdMap:
err_unknown_cmd()
for p in optlist:
- match p[0]:
- case "-q": ProgConf.override_vl = logging.ERROR
- case "-v":
- if ProgConf.override_vl is None:
- ProgConf.override_vl = palhm.DEFAULT.VL.value - 10
- else:
- ProgConf.override_vl -= 10
- case "-f": ProgConf.conf = p[1]
+ if p[0] == "-q": ProgConf.override_vl = logging.ERROR
+ elif p[0] == "-v":
+ if ProgConf.override_vl is None:
+ ProgConf.override_vl = palhm.DEFAULT.VL.value - 10
+ else:
+ ProgConf.override_vl -= 10
+ elif p[0] == "-f": ProgConf.conf = p[1]
logging.basicConfig(format = "%(name)s %(message)s")
diff --git a/src/palhm/__init__.py b/src/palhm/__init__.py
index 8c44ace..7e5afb4 100644
--- a/src/palhm/__init__.py
+++ b/src/palhm/__init__.py
@@ -1,3 +1,4 @@
+from .exceptions import InvalidConfigError
import io
import json
import logging
@@ -15,8 +16,6 @@ from datetime import datetime, timezone
from decimal import Decimal
from enum import Enum
from importlib import import_module
-from mailbox import FormatError
-from multiprocessing import ProcessError
from typing import Iterable
@@ -72,10 +71,16 @@ class GlobalContext:
for m in jobj.get("modules", iter(())):
loaded = self.modules[m] = import_module("." + m, "palhm.mod")
- intersect = set(self.backup_backends.keys()).intersection(loaded.backup_backends.keys())
- if intersect:
- raise RuntimeError("Backup Backend conflict detected. ID(s): " + intersect)
- self.backup_backends |= loaded.backup_backends
+
+ if hasattr(loaded, "backup_backends"):
+ intersect = (
+ set(self.backup_backends.keys())
+ .intersection(loaded.backup_backends.keys()))
+ if intersect:
+ raise InvalidConfigError(
+ "Backup Backend conflict detected.",
+ intersect)
+ self.backup_backends |= loaded.backup_backends
def get_vl (self) -> int:
return self.vl
@@ -123,36 +128,33 @@ class Exec (Runnable, ExecvHolder):
b = int(m[2])
ret = range(a, b + 1)
if len(ret) == 0:
- raise ValueError("Invalid range: " + ec)
+ raise ValueError("Invalid range", ec)
return ret
m = re.match(Exec.RE.EC_RANGE.value, x)
if m:
op = str(m[1]) if m[1] else "=="
n = int(m[2])
- match op:
- case "==": return range(n, n + 1)
- case "<": return range(0, n)
- case "<=": return range(0, n + 1)
- case ">": return range(n + 1, 256)
- case ">=": return range(n, 256)
- case _: raise RuntimeError("FIXME")
+ if op == "==": return range(n, n + 1)
+ elif op == "<": return range(0, n)
+ elif op == "<=": return range(0, n + 1)
+ elif op == ">": return range(n + 1, 256)
+ elif op == ">=": return range(n, 256)
+ else: raise RuntimeError("FIXME")
- raise ValueError("Invalid value: " + ec)
+ raise ValueError("Invalid value", ec)
def from_conf (ctx: GlobalContext, jobj: dict):
- match jobj["type"]:
- case "exec":
- exec_id = jobj["exec-id"]
- exec = ctx.exec_map[exec_id]
- ret = exec
- case "exec-append":
- exec_id = jobj["exec-id"]
- exec = ctx.exec_map[exec_id]
- ret = exec.mkappend(jobj["argv"])
- case "exec-inline":
- ret = Exec(jobj)
- # case _:
- # raise RuntimeError("FIXME")
+ if jobj["type"] == "exec":
+ exec_id = jobj["exec-id"]
+ exec = ctx.exec_map[exec_id]
+ ret = exec
+ elif jobj["type"] == "exec-append":
+ exec_id = jobj["exec-id"]
+ exec = ctx.exec_map[exec_id]
+ ret = exec.mkappend(jobj["argv"], jobj.get("env", {}))
+ elif jobj["type"] == "exec-inline":
+ ret = Exec(jobj)
+ else: raise RuntimeError("FIXME")
ret.vl_stderr = jobj.get("vl-stderr", ret.vl_stderr)
ret.vl_stdout = jobj.get("vl-stdout", ret.vl_stdout)
@@ -173,9 +175,10 @@ class Exec (Runnable, ExecvHolder):
self.vl_stderr = jobj.get("vl-stderr", Exec.DEFAULT.VL_STDERR.value)
self.vl_stdout = jobj.get("vl-stdout", Exec.DEFAULT.VL_STDOUT.value)
- def mkappend (self, extra_argv: Iterable):
+ def mkappend (self, extra_argv: Iterable, extra_env: dict = {}):
ny = deepcopy(self)
ny.argv.extend(extra_argv)
+ ny.env |= extra_env
return ny
def run (self, ctx: GlobalContext):
@@ -201,8 +204,10 @@ class Exec (Runnable, ExecvHolder):
def raise_oob_ec (self, ec: int):
if not self.test_ec(ec):
- raise ProcessError(
- str(self) + " returned " + str(ec) + " not in " + str(self.ec))
+ raise ChildProcessError(
+ str(self) + ": exit code test fail",
+ ec,
+ self.ec)
def __str__ (self) -> str:
return str().join(
@@ -308,6 +313,18 @@ class NullBackupBackend (BackupBackend):
def rotate (self, ctx: GlobalContext):
pass
+ def _fs_usage_info (self, ctx: GlobalContext) -> Iterable[tuple[str, int]]:
+ return iter(())
+
+ def _excl_fs_copies (self, ctx: GlobalContext) -> set[str]:
+ return set[str]()
+
+ def _rm_fs_recursive (self, ctx: GlobalContext, pl: Iterable[str]):
+ pass
+
+ def _fs_quota_target (self, ctx: GlobalContext) -> tuple[Decimal, Decimal]:
+ return (Decimal('inf'), Decimal('inf'))
+
def __str__ (self):
return "null"
@@ -469,7 +486,7 @@ class RoutineTask (Task):
def run (self, ctx: GlobalContext):
for r in self.routines:
- self.l.debug("run: " + str(r))
+ self.l.info("run: " + str(r))
p = r.run(ctx)
return self
@@ -536,7 +553,7 @@ class DepResolv:
def build (og_map: dict):
def dive (og: BackupObjectGroup, obj_set: set, recurse_path: set):
if og in recurse_path:
- raise RecursionError("Circular reference detected.")
+ raise RecursionError("Circular reference detected whilst building dependency tree")
recurse_path.add(og)
obj_set.update(og.objects)
@@ -611,7 +628,7 @@ class BackupTask (Task):
for og in jobj_ogrps:
ogid = og["id"]
if ogid in og_map:
- raise KeyError("Duplicate object group: " + ogid)
+ raise KeyError("Duplicate object group", ogid)
og_map[ogid] = BackupObjectGroup()
# load depends
@@ -620,7 +637,8 @@ class BackupTask (Task):
for depend in og.get("depends", iter(())):
if ogid == depend:
raise ReferenceError(
- "An object group dependent on itself: " + ogid)
+ "An object group dependent on itself",
+ ogid)
og_map[ogid].depends.add(og_map[depend])
# implicit default
@@ -633,7 +651,7 @@ class BackupTask (Task):
gid = jo.get("group", DEFAULT.OBJ_GRP.value)
if path in obj_path_set:
- raise KeyError("Duplicate path: " + path)
+ raise KeyError("Duplicate path", path)
obj_path_set.add(path)
og_map[gid].objects.append(BackupObject(jo, ctx))
@@ -651,6 +669,7 @@ class BackupTask (Task):
for bo in self.dep_tree.avail_q:
bo.bbctx = bbctx
+ self.l.info("make: " + bo.path)
self.l.debug("despatch: " + str(bo))
fs.add(th_pool.submit(bo.run, ctx))
self.dep_tree.avail_q.clear()
@@ -686,11 +705,11 @@ def merge_conf (a: dict, b: dict) -> dict:
# exec conflicts
c = chk_dup_id("execs", a, b)
if c:
- raise KeyError("Dup execs: " + c)
+ raise KeyError("Dup execs", c)
# task conflicts
c = chk_dup_id("tasks", a, b)
if c:
- raise KeyError("Dup tasks: " + c)
+ raise KeyError("Dup tasks", c)
return a | b
@@ -701,27 +720,38 @@ def load_jsonc (path: str) -> dict:
stdin = in_file,
capture_output = True)
if p.returncode != 0:
- raise FormatError(path)
+ raise ChildProcessError(p, path)
return json.load(io.BytesIO(p.stdout))
def load_conf (path: str, inc_set: set = set()) -> dict:
- if path in inc_set:
- raise ReferenceError("Config included multiple times: " + path)
- inc_set.add(path)
+ JSONC_EXT = ".jsonc"
- if path.endswith(".jsonc"):
- jobj = load_jsonc(path)
+ rpath = os.path.realpath(path, strict = True)
+ if rpath in inc_set:
+ raise RecursionError("Config already included", rpath)
+ inc_set.add(rpath)
+
+ if rpath[-len(JSONC_EXT):].lower() == JSONC_EXT:
+ jobj = load_jsonc(rpath)
else:
- with open(path) as file:
+ with open(rpath) as file:
jobj = json.load(file)
# TODO: do schema validation
+ # pushd
+ saved_cwd = os.getcwd()
+ dn = os.path.dirname(rpath)
+ os.chdir(dn)
+
for i in jobj.get("include", iter(())):
inc_conf = load_conf(i, inc_set)
jobj = merge_conf(jobj, inc_conf)
+ # popd
+ os.chdir(saved_cwd)
+
return jobj
def setup_conf (jobj: dict) -> GlobalContext:
diff --git a/src/palhm/exceptions.py b/src/palhm/exceptions.py
new file mode 100644
index 0000000..f63f2f9
--- /dev/null
+++ b/src/palhm/exceptions.py
@@ -0,0 +1,2 @@
+class InvalidConfigError (Exception): ...
+class APIFailError (Exception): ...
diff --git a/src/palhm/mod/aws.py b/src/palhm/mod/aws.py
index fcb16f1..01fb8bc 100644
--- a/src/palhm/mod/aws.py
+++ b/src/palhm/mod/aws.py
@@ -7,6 +7,7 @@ from typing import Callable, Iterable
import boto3
import botocore
from palhm import BackupBackend, Exec, GlobalContext
+from palhm.exceptions import APIFailError
class CONST (Enum):
@@ -50,10 +51,13 @@ class S3BackupBackend (BackupBackend):
Key = self.cur_backup_key)
sleep(1)
# Make sure we don't proceed
- raise FileExistsError(self.cur_backup_uri)
+ raise FileExistsError(
+ "Failed to set up a backup dir. Check the prefix function",
+ self.cur_backup_uri)
except botocore.exceptions.ClientError as e:
- if e.response["Error"]["Code"] != "404": # expected status code
- raise
+ c = e.response["Error"]["Code"]
+ if c != "404": # expected status code
+ raise APIFailError("Unexpected status code", c)
return super().open(ctx)
@@ -125,8 +129,9 @@ class S3BackupBackend (BackupBackend):
o_key = i["Key"]
o_size = i.get("Size", 0)
if not o_key.startswith(self.root_key):
- raise RuntimeError("The endpoint returned an object " +
- "irrelevant to the request: " + o_key)
+ raise APIFailError(
+ "The endpoint returned an object irrelevant to the request",
+ o_key)
l = o_key.find("/", len(prefix))
if l >= 0:
@@ -200,7 +205,7 @@ class S3BackupBackend (BackupBackend):
def rotate (self, ctx: GlobalContext):
ret = super()._do_fs_rotate(ctx)
- if self.sc_rot:
+ if self.sc_rot and self.sc_rot != self.sc_sink:
def chsc (k):
self.client.copy_object(
Bucket = self.bucket,