aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md18
-rw-r--r--doc/config-fmt.md2
-rw-r--r--src/conf/py-debug/aws.jsonc3
-rw-r--r--src/conf/py-debug/localfs.jsonc6
-rw-r--r--src/palhm/__init__.py25
-rw-r--r--src/palhm/mod/aws.py10
6 files changed, 52 insertions, 12 deletions
diff --git a/README.md b/README.md
index 9403e79..d56a3c6 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,8 @@ Backup Objects have two essential attributes.
* **pipeline**: commands used to generate the backup output file
* **path**: path to the output file on the backend
+* **alloc-size**: expected size of the object. Recommended for some backends
+ like AWS in order to determine the transfer block size
For example, this object definition is for a mysql data dump compressed in zstd
and encrypted using a public key id "backup-pub-key" named as
@@ -46,6 +48,7 @@ and encrypted using a public key id "backup-pub-key" named as
```jsonc
{
"path": "all-db.sql.zstd.pgp",
+ "alloc-size": 268435456, // 256MiB
"pipeline": [
{ "type": "exec-inline", "argv": [ "/bin/mysqldump", "-uroot", "--all-databases" ] },
{ "type": "exec-inline", "argv": [ "/bin/zstd" ] },
@@ -126,7 +129,8 @@ integers.
"dmode": "755", // (optional) mode for new directories
"fmode": "644", // (optional) mode for new files
"nb-copy-limit": "Infinity", // (optional)
- "root-size-limit": "Infinity" // (optional)
+ "root-size-limit": "Infinity", // (optional)
+ "block-size": 8388608 // 16MiB: (optional)block size for underlying dd command
},
"object-groups": [ /* ... */ ],
"objects": [ /* ... */ ]
@@ -334,6 +338,18 @@ palhm.py -q run check-dnssec
| /etc/palhm/palhm.conf | The default config path |
| /etc/palhm/conf.d/core.json | Commonly used Exec and Prefix definitions |
+## Troubleshoot
+### Large Files on AWS S3
+To fit awscli into the pipelining design, the sink data is fed via stdin of
+awscli. As a result, uploading files larger than 80GiB will fail without
+following measures.
+
+- Specifying `alloc-size` for large backup objects so that awscli can determine
+ the optimal multipart size
+- Increasing the default multipart size in config
+ - E.g. `aws configure set default.s3.multipart_chunksize 32MiB` will increase
+ the max to 320GiB (32MiB * 10000)
+
## Advanced
### Testing Config
When writing backup task, if you're worried about data loss caused by
diff --git a/doc/config-fmt.md b/doc/config-fmt.md
index df196c5..585f37a 100644
--- a/doc/config-fmt.md
+++ b/doc/config-fmt.md
@@ -258,6 +258,7 @@ object does not require the "id" member.
##### Backup Object Definition Object
* "path": path to the backup output on the backend **(REQUIRED)**
+* "alloc-size": the expected size of the object in bytes
* "group": the id of a [Backup Object Group Definition
Object](#backup-object-group-definition-object)
* "pipeline": array of
@@ -268,6 +269,7 @@ object does not require the "id" member.
```jsonc
{
"path": "srv.tar.zstd",
+ "alloc-size": 2097152,
"group": "tar-1",
"pipeline": [
{
diff --git a/src/conf/py-debug/aws.jsonc b/src/conf/py-debug/aws.jsonc
index df9a63a..23e8faf 100644
--- a/src/conf/py-debug/aws.jsonc
+++ b/src/conf/py-debug/aws.jsonc
@@ -66,6 +66,7 @@
},
{
"path": "random-dump.sql.xz",
+ "alloc-size": 2097152,
"group": "data-dump",
"pipeline": [
{
@@ -83,6 +84,7 @@
},
{
"path": "random-dump.0.xz",
+ "alloc-size": 2097152,
"group": "tar-media-0",
"pipeline": [
{
@@ -100,6 +102,7 @@
},
{
"path": "random-dump.1.xz",
+ "alloc-size": 2097152,
"group": "tar-media-1",
"pipeline": [
{
diff --git a/src/conf/py-debug/localfs.jsonc b/src/conf/py-debug/localfs.jsonc
index a33060d..80efd5d 100644
--- a/src/conf/py-debug/localfs.jsonc
+++ b/src/conf/py-debug/localfs.jsonc
@@ -23,7 +23,8 @@
// "dmode": "755",
// "fmode": "644",
"nb-copy-limit": 2,
- "root-size-limit": "Infinity"
+ "root-size-limit": "Infinity",
+ "block-size": 4096
},
"object-groups": [
{ "id": "pre-start" },
@@ -63,6 +64,7 @@
},
{
"path": "random-dump.sql.xz",
+ "alloc-size": 2097152,
"group": "data-dump",
"pipeline": [
{
@@ -80,6 +82,7 @@
},
{
"path": "random-dump.0.xz",
+ "alloc-size": 2097152,
"group": "tar-media-0",
"pipeline": [
{
@@ -97,6 +100,7 @@
},
{
"path": "random-dump.1.xz",
+ "alloc-size": 2097152,
"group": "tar-media-1",
"pipeline": [
{
diff --git a/src/palhm/__init__.py b/src/palhm/__init__.py
index 091d072..75b1b21 100644
--- a/src/palhm/__init__.py
+++ b/src/palhm/__init__.py
@@ -18,6 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import platform
+import resource
import sys
import time
@@ -381,7 +382,7 @@ class BackupBackend (ABC):
def close (self, ctx: GlobalContext):
...
@abstractmethod
- def sink (self, ctx: GlobalContext, path: str) -> Exec:
+ def sink (self, ctx: GlobalContext, bo) -> Exec:
...
@abstractmethod
def rotate (self, ctx: GlobalContext):
@@ -478,10 +479,17 @@ class NullBackupBackend (BackupBackend):
class LocalfsBackupBackend (BackupBackend):
def __init__ (self, param: dict):
+ def _getpagesize () -> int:
+ try:
+ return resource.getpagesize()
+ except:
+ return 4096
+
self.backup_root = param["root"]
self.mkprefix = BackupBackend.mkprefix_iso8601
self.nb_copy_limit = Decimal(param.get("nb-copy-limit", "Infinity"))
self.root_size_limit = Decimal(param.get("root-size-limit", "Infinity"))
+ self.block_size = param.get("block-size", _getpagesize())
self.dmode = int(param.get("dmode", "750"), 8)
self.fmode = int(param.get("fmode", "640"), 8)
self.cur_backup_path = None
@@ -499,13 +507,17 @@ class LocalfsBackupBackend (BackupBackend):
def close (self, ctx: GlobalContext):
pass
- def sink (self, ctx: GlobalContext, path: str) -> Exec:
- path = os.sep.join([ self.cur_backup_path, path ])
+ def sink (self, ctx: GlobalContext, bo) -> Exec:
+ path = os.sep.join([ self.cur_backup_path, bo.path ])
os.makedirs(os.path.dirname(path), self.dmode, True)
self.sink_list.append(path)
+ if bo.alloc_size is not None:
+ try: os.truncate(bo.path, bo.alloc_size)
+ except: pass
+
e = Exec()
- e.argv = [ "/bin/cp", "/dev/stdin", path ]
+ e.argv = [ "/bin/dd", "bs=" + str(self.block_size), "of=" + path ]
return e
@@ -549,7 +561,7 @@ class LocalfsBackupBackend (BackupBackend):
root_size_limit: {root_size_limit}
dmode: {dmode:o}
fmode: {fmode:o}'''.format(
- root = self.root,
+ root = self.backup_root,
nb_copy_limit = self.nb_copy_limit,
root_size_limit = self.root_size_limit,
dmode = self.dmode,
@@ -724,6 +736,7 @@ class BackupObject (Runnable):
self.pipeline = []
self.path = jobj["path"]
self.bbctx = None
+ self.alloc_size = jobj.get("alloc-size", None)
for e in jobj["pipeline"]:
ny_exec = Exec.from_conf(ctx, e)
@@ -743,7 +756,7 @@ class BackupObject (Runnable):
pmap[eh] = p
last_stdio = p.stdout
- sink_exec = self.bbctx.sink(ctx, self.path)
+ sink_exec = self.bbctx.sink(ctx, self)
sink_p = subprocess.Popen(
args = sink_exec.argv,
stdin = last_stdio,
diff --git a/src/palhm/mod/aws.py b/src/palhm/mod/aws.py
index 725861d..3e7c2b4 100644
--- a/src/palhm/mod/aws.py
+++ b/src/palhm/mod/aws.py
@@ -25,7 +25,7 @@ from typing import Callable, Iterable
import boto3
import botocore
-from palhm import MUA, BackupBackend, Exec, GlobalContext
+from palhm import MUA, BackupBackend, BackupObject, Exec, GlobalContext
from palhm.exceptions import APIFailError
@@ -202,7 +202,7 @@ class S3BackupBackend (BackupBackend):
def close (self, ctx: GlobalContext):
self._cleanup_multiparts(ctx)
- def sink (self, ctx: GlobalContext, path: str) -> Exec:
+ def sink (self, ctx: GlobalContext, bo) -> Exec:
l = self._logger(ctx)
e = Exec()
@@ -214,10 +214,12 @@ class S3BackupBackend (BackupBackend):
"--only-show-errors" ]
if self.sc_sink:
e.argv.append("--storage-class=" + self.sc_sink)
- e.argv.extend(["-", "/".join([self.cur_backup_uri, path])])
+ if bo.alloc_size is not None:
+ e.argv.append("--expected-size=" + str(bo.alloc_size))
+ e.argv.extend(["-", "/".join([self.cur_backup_uri, bo.path])])
l.debug("sink: " + str(e))
- self.sink_list.append(mks3objkey([self.cur_backup_key, path]))
+ self.sink_list.append(mks3objkey([self.cur_backup_key, bo.path]))
return e