From 6789c980678f46a602d9f82bfc0ec2a3d52a645e Mon Sep 17 00:00:00 2001 From: David Timber Date: Mon, 26 Sep 2022 17:24:21 +0800 Subject: Fix bugs - AWS S3 backend large files - Added 'alloc-size' for aws large file upload and localfs preallocation - Added 'block-size' for localfs(implemented) - Updated docs - Fix bug in localfs str() op --- src/conf/py-debug/aws.jsonc | 3 +++ src/conf/py-debug/localfs.jsonc | 6 +++++- src/palhm/__init__.py | 25 +++++++++++++++++++------ src/palhm/mod/aws.py | 10 ++++++---- 4 files changed, 33 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/conf/py-debug/aws.jsonc b/src/conf/py-debug/aws.jsonc index df9a63a..23e8faf 100644 --- a/src/conf/py-debug/aws.jsonc +++ b/src/conf/py-debug/aws.jsonc @@ -66,6 +66,7 @@ }, { "path": "random-dump.sql.xz", + "alloc-size": 2097152, "group": "data-dump", "pipeline": [ { @@ -83,6 +84,7 @@ }, { "path": "random-dump.0.xz", + "alloc-size": 2097152, "group": "tar-media-0", "pipeline": [ { @@ -100,6 +102,7 @@ }, { "path": "random-dump.1.xz", + "alloc-size": 2097152, "group": "tar-media-1", "pipeline": [ { diff --git a/src/conf/py-debug/localfs.jsonc b/src/conf/py-debug/localfs.jsonc index a33060d..80efd5d 100644 --- a/src/conf/py-debug/localfs.jsonc +++ b/src/conf/py-debug/localfs.jsonc @@ -23,7 +23,8 @@ // "dmode": "755", // "fmode": "644", "nb-copy-limit": 2, - "root-size-limit": "Infinity" + "root-size-limit": "Infinity", + "block-size": 4096 }, "object-groups": [ { "id": "pre-start" }, @@ -63,6 +64,7 @@ }, { "path": "random-dump.sql.xz", + "alloc-size": 2097152, "group": "data-dump", "pipeline": [ { @@ -80,6 +82,7 @@ }, { "path": "random-dump.0.xz", + "alloc-size": 2097152, "group": "tar-media-0", "pipeline": [ { @@ -97,6 +100,7 @@ }, { "path": "random-dump.1.xz", + "alloc-size": 2097152, "group": "tar-media-1", "pipeline": [ { diff --git a/src/palhm/__init__.py b/src/palhm/__init__.py index 091d072..75b1b21 100644 --- a/src/palhm/__init__.py +++ b/src/palhm/__init__.py @@ -18,6 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import platform +import resource import sys import time @@ -381,7 +382,7 @@ class BackupBackend (ABC): def close (self, ctx: GlobalContext): ... @abstractmethod - def sink (self, ctx: GlobalContext, path: str) -> Exec: + def sink (self, ctx: GlobalContext, bo) -> Exec: ... @abstractmethod def rotate (self, ctx: GlobalContext): @@ -478,10 +479,17 @@ class NullBackupBackend (BackupBackend): class LocalfsBackupBackend (BackupBackend): def __init__ (self, param: dict): + def _getpagesize () -> int: + try: + return resource.getpagesize() + except: + return 4096 + self.backup_root = param["root"] self.mkprefix = BackupBackend.mkprefix_iso8601 self.nb_copy_limit = Decimal(param.get("nb-copy-limit", "Infinity")) self.root_size_limit = Decimal(param.get("root-size-limit", "Infinity")) + self.block_size = param.get("block-size", _getpagesize()) self.dmode = int(param.get("dmode", "750"), 8) self.fmode = int(param.get("fmode", "640"), 8) self.cur_backup_path = None @@ -499,13 +507,17 @@ class LocalfsBackupBackend (BackupBackend): def close (self, ctx: GlobalContext): pass - def sink (self, ctx: GlobalContext, path: str) -> Exec: - path = os.sep.join([ self.cur_backup_path, path ]) + def sink (self, ctx: GlobalContext, bo) -> Exec: + path = os.sep.join([ self.cur_backup_path, bo.path ]) os.makedirs(os.path.dirname(path), self.dmode, True) self.sink_list.append(path) + if bo.alloc_size is not None: + try: os.truncate(bo.path, bo.alloc_size) + except: pass + e = Exec() - e.argv = [ "/bin/cp", "/dev/stdin", path ] + e.argv = [ "/bin/dd", "bs=" + str(self.block_size), "of=" + path ] return e @@ -549,7 +561,7 @@ class LocalfsBackupBackend (BackupBackend): root_size_limit: {root_size_limit} dmode: {dmode:o} fmode: {fmode:o}'''.format( - root = self.root, + root = self.backup_root, nb_copy_limit = self.nb_copy_limit, root_size_limit = self.root_size_limit, dmode = self.dmode, @@ -724,6 +736,7 @@ class BackupObject (Runnable): self.pipeline = [] self.path = jobj["path"] self.bbctx = None + self.alloc_size = jobj.get("alloc-size", None) for e in jobj["pipeline"]: ny_exec = Exec.from_conf(ctx, e) @@ -743,7 +756,7 @@ class BackupObject (Runnable): pmap[eh] = p last_stdio = p.stdout - sink_exec = self.bbctx.sink(ctx, self.path) + sink_exec = self.bbctx.sink(ctx, self) sink_p = subprocess.Popen( args = sink_exec.argv, stdin = last_stdio, diff --git a/src/palhm/mod/aws.py b/src/palhm/mod/aws.py index 725861d..3e7c2b4 100644 --- a/src/palhm/mod/aws.py +++ b/src/palhm/mod/aws.py @@ -25,7 +25,7 @@ from typing import Callable, Iterable import boto3 import botocore -from palhm import MUA, BackupBackend, Exec, GlobalContext +from palhm import MUA, BackupBackend, BackupObject, Exec, GlobalContext from palhm.exceptions import APIFailError @@ -202,7 +202,7 @@ class S3BackupBackend (BackupBackend): def close (self, ctx: GlobalContext): self._cleanup_multiparts(ctx) - def sink (self, ctx: GlobalContext, path: str) -> Exec: + def sink (self, ctx: GlobalContext, bo) -> Exec: l = self._logger(ctx) e = Exec() @@ -214,10 +214,12 @@ class S3BackupBackend (BackupBackend): "--only-show-errors" ] if self.sc_sink: e.argv.append("--storage-class=" + self.sc_sink) - e.argv.extend(["-", "/".join([self.cur_backup_uri, path])]) + if bo.alloc_size is not None: + e.argv.append("--expected-size=" + str(bo.alloc_size)) + e.argv.extend(["-", "/".join([self.cur_backup_uri, bo.path])]) l.debug("sink: " + str(e)) - self.sink_list.append(mks3objkey([self.cur_backup_key, path])) + self.sink_list.append(mks3objkey([self.cur_backup_key, bo.path])) return e -- cgit v1.2.3-70-g09d2