split json backup files, #406
This commit is contained in:
parent
77fef5de57
commit
515b724047
|
@ -246,9 +246,10 @@ class ThumbManager(ThumbManagerBase):
|
||||||
class ValidatorCallback:
|
class ValidatorCallback:
|
||||||
"""handle callback validate thumbnails page by page"""
|
"""handle callback validate thumbnails page by page"""
|
||||||
|
|
||||||
def __init__(self, source, index_name):
|
def __init__(self, source, index_name, counter=0):
|
||||||
self.source = source
|
self.source = source
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
|
self.counter = counter
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""run the task for page"""
|
"""run the task for page"""
|
||||||
|
@ -384,9 +385,10 @@ class EmbedCallback:
|
||||||
MEDIA_DIR = CONFIG["application"]["videos"]
|
MEDIA_DIR = CONFIG["application"]["videos"]
|
||||||
FORMAT = MP4Cover.FORMAT_JPEG
|
FORMAT = MP4Cover.FORMAT_JPEG
|
||||||
|
|
||||||
def __init__(self, source, index_name):
|
def __init__(self, source, index_name, counter=0):
|
||||||
self.source = source
|
self.source = source
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
|
self.counter = counter
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""run embed"""
|
"""run embed"""
|
||||||
|
|
|
@ -18,6 +18,8 @@ from home.src.ta.helper import get_mapping, ignore_filelist
|
||||||
class ElasticBackup:
|
class ElasticBackup:
|
||||||
"""dump index to nd-json files for later bulk import"""
|
"""dump index to nd-json files for later bulk import"""
|
||||||
|
|
||||||
|
INDEX_SPLIT = ["comment"]
|
||||||
|
|
||||||
def __init__(self, reason=False, task=False):
|
def __init__(self, reason=False, task=False):
|
||||||
self.config = AppConfig().config
|
self.config = AppConfig().config
|
||||||
self.cache_dir = self.config["application"]["cache_dir"]
|
self.cache_dir = self.config["application"]["cache_dir"]
|
||||||
|
@ -51,14 +53,18 @@ class ElasticBackup:
|
||||||
|
|
||||||
def backup_index(self, index_name):
|
def backup_index(self, index_name):
|
||||||
"""export all documents of a single index"""
|
"""export all documents of a single index"""
|
||||||
paginate = IndexPaginate(
|
paginate_kwargs = {
|
||||||
f"ta_{index_name}",
|
"data": {"query": {"match_all": {}}},
|
||||||
data={"query": {"match_all": {}}},
|
"keep_source": True,
|
||||||
keep_source=True,
|
"callback": BackupCallback,
|
||||||
callback=BackupCallback,
|
"task": self.task,
|
||||||
task=self.task,
|
"total": self._get_total(index_name),
|
||||||
total=self._get_total(index_name),
|
}
|
||||||
)
|
|
||||||
|
if index_name in self.INDEX_SPLIT:
|
||||||
|
paginate_kwargs.update({"size": 200})
|
||||||
|
|
||||||
|
paginate = IndexPaginate(f"ta_{index_name}", **paginate_kwargs)
|
||||||
_ = paginate.get_results()
|
_ = paginate.get_results()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -206,9 +212,10 @@ class ElasticBackup:
|
||||||
class BackupCallback:
|
class BackupCallback:
|
||||||
"""handle backup ndjson writer as callback for IndexPaginate"""
|
"""handle backup ndjson writer as callback for IndexPaginate"""
|
||||||
|
|
||||||
def __init__(self, source, index_name):
|
def __init__(self, source, index_name, counter=0):
|
||||||
self.source = source
|
self.source = source
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
|
self.counter = counter
|
||||||
self.timestamp = datetime.now().strftime("%Y%m%d")
|
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
@ -237,7 +244,8 @@ class BackupCallback:
|
||||||
def _write_es_json(self, file_content):
|
def _write_es_json(self, file_content):
|
||||||
"""write nd-json file for es _bulk API to disk"""
|
"""write nd-json file for es _bulk API to disk"""
|
||||||
cache_dir = AppConfig().config["application"]["cache_dir"]
|
cache_dir = AppConfig().config["application"]["cache_dir"]
|
||||||
file_name = f"es_{self.index_name.lstrip('ta_')}-{self.timestamp}.json"
|
index = self.index_name.lstrip("ta_")
|
||||||
|
file_name = f"es_{index}-{self.timestamp}-{self.counter}.json"
|
||||||
file_path = os.path.join(cache_dir, "backup", file_name)
|
file_path = os.path.join(cache_dir, "backup", file_name)
|
||||||
with open(file_path, "a+", encoding="utf-8") as f:
|
with open(file_path, "a+", encoding="utf-8") as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
|
|
|
@ -204,7 +204,9 @@ class IndexPaginate:
|
||||||
all_results.append(hit["_source"])
|
all_results.append(hit["_source"])
|
||||||
|
|
||||||
if self.kwargs.get("callback"):
|
if self.kwargs.get("callback"):
|
||||||
self.kwargs.get("callback")(all_hits, self.index_name).run()
|
self.kwargs.get("callback")(
|
||||||
|
all_hits, self.index_name, counter=counter
|
||||||
|
).run()
|
||||||
|
|
||||||
if self.kwargs.get("task"):
|
if self.kwargs.get("task"):
|
||||||
print(f"{self.index_name}: processing page {counter}")
|
print(f"{self.index_name}: processing page {counter}")
|
||||||
|
|
Loading…
Reference in New Issue