mirror of
https://github.com/tubearchivist/tubearchivist.git
synced 2024-12-24 10:50:11 +00:00
Use snapshots for mapping backup, #build
Changes: - Use system snapshots at startup to backup before mapping changes - Refactor startup, index setup and backup classes - bump yt-dlp and redis
This commit is contained in:
commit
18b8a17df1
@ -43,7 +43,6 @@ class ApiBaseView(APIView):
|
|||||||
def get_document(self, document_id):
|
def get_document(self, document_id):
|
||||||
"""get single document from es"""
|
"""get single document from es"""
|
||||||
path = f"{self.search_base}{document_id}"
|
path = f"{self.search_base}{document_id}"
|
||||||
print(path)
|
|
||||||
response, status_code = ElasticWrap(path).get()
|
response, status_code = ElasticWrap(path).get()
|
||||||
try:
|
try:
|
||||||
self.response["data"] = SearchProcess(response).process()
|
self.response["data"] = SearchProcess(response).process()
|
||||||
@ -64,8 +63,6 @@ class ApiBaseView(APIView):
|
|||||||
|
|
||||||
def get_document_list(self, request, pagination=True):
|
def get_document_list(self, request, pagination=True):
|
||||||
"""get a list of results"""
|
"""get a list of results"""
|
||||||
print(self.search_base)
|
|
||||||
|
|
||||||
if pagination:
|
if pagination:
|
||||||
self.initiate_pagination(request)
|
self.initiate_pagination(request)
|
||||||
|
|
||||||
@ -437,7 +434,6 @@ class DownloadApiListView(ApiBaseView):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def post(request):
|
def post(request):
|
||||||
"""add list of videos to download queue"""
|
"""add list of videos to download queue"""
|
||||||
print(f"request meta data: {request.META}")
|
|
||||||
data = request.data
|
data = request.data
|
||||||
try:
|
try:
|
||||||
to_add = data["data"]
|
to_add = data["data"]
|
||||||
@ -524,10 +520,7 @@ class TaskApiView(ApiBaseView):
|
|||||||
|
|
||||||
def post(self, request):
|
def post(self, request):
|
||||||
"""handle post request"""
|
"""handle post request"""
|
||||||
|
response = TaskHandler(request.data).run_task()
|
||||||
data = request.data
|
|
||||||
print(data)
|
|
||||||
response = TaskHandler(data).run_task()
|
|
||||||
|
|
||||||
return Response(response)
|
return Response(response)
|
||||||
|
|
||||||
@ -662,6 +655,6 @@ class SearchView(ApiBaseView):
|
|||||||
return Response(
|
return Response(
|
||||||
{"message": "no search query specified"}, status=400
|
{"message": "no search query specified"}, status=400
|
||||||
)
|
)
|
||||||
print("searching for: " + search_query)
|
|
||||||
search_results = SearchForm().multi_search(search_query)
|
search_results = SearchForm().multi_search(search_query)
|
||||||
return Response(search_results)
|
return Response(search_results)
|
||||||
|
@ -5,9 +5,10 @@ import sys
|
|||||||
|
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
from home.src.es.connect import ElasticWrap
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.es.index_setup import index_check
|
from home.src.es.index_setup import ElasitIndexWrap
|
||||||
from home.src.es.snapshot import ElasticSnapshot
|
from home.src.es.snapshot import ElasticSnapshot
|
||||||
from home.src.ta.config import AppConfig as ArchivistConfig
|
from home.src.ta.config import AppConfig as ArchivistConfig
|
||||||
|
from home.src.ta.helper import clear_dl_cache
|
||||||
from home.src.ta.ta_redis import RedisArchivist
|
from home.src.ta.ta_redis import RedisArchivist
|
||||||
|
|
||||||
|
|
||||||
@ -27,10 +28,11 @@ class StartupCheck:
|
|||||||
print("run startup checks")
|
print("run startup checks")
|
||||||
self.es_version_check()
|
self.es_version_check()
|
||||||
self.release_lock()
|
self.release_lock()
|
||||||
index_check()
|
ElasitIndexWrap().setup()
|
||||||
self.sync_redis_state()
|
self.sync_redis_state()
|
||||||
self.set_redis_conf()
|
self.set_redis_conf()
|
||||||
self.make_folders()
|
self.make_folders()
|
||||||
|
clear_dl_cache(self.config_handler.config)
|
||||||
self.snapshot_check()
|
self.snapshot_check()
|
||||||
self.set_has_run()
|
self.set_has_run()
|
||||||
|
|
||||||
|
224
tubearchivist/home/src/es/backup.py
Normal file
224
tubearchivist/home/src/es/backup.py
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
"""
|
||||||
|
Functionality:
|
||||||
|
- Handle json zip file based backup
|
||||||
|
- create backup
|
||||||
|
- restore backup
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import zipfile
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||||
|
from home.src.ta.config import AppConfig
|
||||||
|
from home.src.ta.helper import get_mapping, ignore_filelist
|
||||||
|
|
||||||
|
|
||||||
|
class ElasticBackup:
|
||||||
|
"""dump index to nd-json files for later bulk import"""
|
||||||
|
|
||||||
|
def __init__(self, reason=False):
|
||||||
|
self.config = AppConfig().config
|
||||||
|
self.cache_dir = self.config["application"]["cache_dir"]
|
||||||
|
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||||
|
self.index_config = get_mapping()
|
||||||
|
self.reason = reason
|
||||||
|
|
||||||
|
def backup_all_indexes(self):
|
||||||
|
"""backup all indexes, add reason to init"""
|
||||||
|
print("backup all indexes")
|
||||||
|
if not self.reason:
|
||||||
|
raise ValueError("missing backup reason in ElasticBackup")
|
||||||
|
|
||||||
|
for index in self.index_config:
|
||||||
|
index_name = index["index_name"]
|
||||||
|
print(f"backup: export in progress for {index_name}")
|
||||||
|
if not self.index_exists(index_name):
|
||||||
|
print(f"skip backup for not yet existing index {index_name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.backup_index(index_name)
|
||||||
|
|
||||||
|
self.zip_it()
|
||||||
|
if self.reason == "auto":
|
||||||
|
self.rotate_backup()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def backup_index(index_name):
|
||||||
|
"""export all documents of a single index"""
|
||||||
|
data = {
|
||||||
|
"query": {"match_all": {}},
|
||||||
|
"sort": [{"_doc": {"order": "desc"}}],
|
||||||
|
}
|
||||||
|
paginate = IndexPaginate(
|
||||||
|
f"ta_{index_name}", data, keep_source=True, callback=BackupCallback
|
||||||
|
)
|
||||||
|
_ = paginate.get_results()
|
||||||
|
|
||||||
|
def zip_it(self):
|
||||||
|
"""pack it up into single zip file"""
|
||||||
|
file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip"
|
||||||
|
folder = os.path.join(self.cache_dir, "backup")
|
||||||
|
|
||||||
|
to_backup = []
|
||||||
|
for file in os.listdir(folder):
|
||||||
|
if file.endswith(".json"):
|
||||||
|
to_backup.append(os.path.join(folder, file))
|
||||||
|
|
||||||
|
backup_file = os.path.join(folder, file_name)
|
||||||
|
|
||||||
|
comp = zipfile.ZIP_DEFLATED
|
||||||
|
with zipfile.ZipFile(backup_file, "w", compression=comp) as zip_f:
|
||||||
|
for backup_file in to_backup:
|
||||||
|
zip_f.write(backup_file, os.path.basename(backup_file))
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
for backup_file in to_backup:
|
||||||
|
os.remove(backup_file)
|
||||||
|
|
||||||
|
def post_bulk_restore(self, file_name):
|
||||||
|
"""send bulk to es"""
|
||||||
|
file_path = os.path.join(self.cache_dir, file_name)
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
data = f.read()
|
||||||
|
|
||||||
|
if not data.strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||||
|
|
||||||
|
def get_all_backup_files(self):
|
||||||
|
"""build all available backup files for view"""
|
||||||
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
|
backup_files = os.listdir(backup_dir)
|
||||||
|
all_backup_files = ignore_filelist(backup_files)
|
||||||
|
all_available_backups = [
|
||||||
|
i
|
||||||
|
for i in all_backup_files
|
||||||
|
if i.startswith("ta_") and i.endswith(".zip")
|
||||||
|
]
|
||||||
|
all_available_backups.sort(reverse=True)
|
||||||
|
|
||||||
|
backup_dicts = []
|
||||||
|
for backup_file in all_available_backups:
|
||||||
|
file_split = backup_file.split("-")
|
||||||
|
if len(file_split) == 2:
|
||||||
|
timestamp = file_split[1].strip(".zip")
|
||||||
|
reason = False
|
||||||
|
elif len(file_split) == 3:
|
||||||
|
timestamp = file_split[1]
|
||||||
|
reason = file_split[2].strip(".zip")
|
||||||
|
|
||||||
|
to_add = {
|
||||||
|
"filename": backup_file,
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"reason": reason,
|
||||||
|
}
|
||||||
|
backup_dicts.append(to_add)
|
||||||
|
|
||||||
|
return backup_dicts
|
||||||
|
|
||||||
|
def restore(self, filename):
|
||||||
|
"""
|
||||||
|
restore from backup zip file
|
||||||
|
call reset from ElasitIndexWrap first to start blank
|
||||||
|
"""
|
||||||
|
zip_content = self._unpack_zip_backup(filename)
|
||||||
|
self._restore_json_files(zip_content)
|
||||||
|
|
||||||
|
def _unpack_zip_backup(self, filename):
|
||||||
|
"""extract backup zip and return filelist"""
|
||||||
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
|
file_path = os.path.join(backup_dir, filename)
|
||||||
|
|
||||||
|
with zipfile.ZipFile(file_path, "r") as z:
|
||||||
|
zip_content = z.namelist()
|
||||||
|
z.extractall(backup_dir)
|
||||||
|
|
||||||
|
return zip_content
|
||||||
|
|
||||||
|
def _restore_json_files(self, zip_content):
|
||||||
|
"""go through the unpacked files and restore"""
|
||||||
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
|
|
||||||
|
for json_f in zip_content:
|
||||||
|
|
||||||
|
file_name = os.path.join(backup_dir, json_f)
|
||||||
|
|
||||||
|
if not json_f.startswith("es_") or not json_f.endswith(".json"):
|
||||||
|
os.remove(file_name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("restoring: " + json_f)
|
||||||
|
self.post_bulk_restore(file_name)
|
||||||
|
os.remove(file_name)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def index_exists(index_name):
|
||||||
|
"""check if index already exists to skip"""
|
||||||
|
_, status_code = ElasticWrap(f"ta_{index_name}").get()
|
||||||
|
exists = status_code == 200
|
||||||
|
|
||||||
|
return exists
|
||||||
|
|
||||||
|
def rotate_backup(self):
|
||||||
|
"""delete old backups if needed"""
|
||||||
|
rotate = self.config["scheduler"]["run_backup_rotate"]
|
||||||
|
if not rotate:
|
||||||
|
return
|
||||||
|
|
||||||
|
all_backup_files = self.get_all_backup_files()
|
||||||
|
auto = [i for i in all_backup_files if i["reason"] == "auto"]
|
||||||
|
|
||||||
|
if len(auto) <= rotate:
|
||||||
|
print("no backup files to rotate")
|
||||||
|
return
|
||||||
|
|
||||||
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
|
|
||||||
|
all_to_delete = auto[rotate:]
|
||||||
|
for to_delete in all_to_delete:
|
||||||
|
file_path = os.path.join(backup_dir, to_delete["filename"])
|
||||||
|
print(f"remove old backup file: {file_path}")
|
||||||
|
os.remove(file_path)
|
||||||
|
|
||||||
|
|
||||||
|
class BackupCallback:
|
||||||
|
"""handle backup ndjson writer as callback for IndexPaginate"""
|
||||||
|
|
||||||
|
def __init__(self, source, index_name):
|
||||||
|
self.source = source
|
||||||
|
self.index_name = index_name
|
||||||
|
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""run the junk task"""
|
||||||
|
file_content = self._build_bulk()
|
||||||
|
self._write_es_json(file_content)
|
||||||
|
|
||||||
|
def _build_bulk(self):
|
||||||
|
"""build bulk query data from all_results"""
|
||||||
|
bulk_list = []
|
||||||
|
|
||||||
|
for document in self.source:
|
||||||
|
document_id = document["_id"]
|
||||||
|
es_index = document["_index"]
|
||||||
|
action = {"index": {"_index": es_index, "_id": document_id}}
|
||||||
|
source = document["_source"]
|
||||||
|
bulk_list.append(json.dumps(action))
|
||||||
|
bulk_list.append(json.dumps(source))
|
||||||
|
|
||||||
|
# add last newline
|
||||||
|
bulk_list.append("\n")
|
||||||
|
file_content = "\n".join(bulk_list)
|
||||||
|
|
||||||
|
return file_content
|
||||||
|
|
||||||
|
def _write_es_json(self, file_content):
|
||||||
|
"""write nd-json file for es _bulk API to disk"""
|
||||||
|
cache_dir = AppConfig().config["application"]["cache_dir"]
|
||||||
|
file_name = f"es_{self.index_name.lstrip('ta_')}-{self.timestamp}.json"
|
||||||
|
file_path = os.path.join(cache_dir, "backup", file_name)
|
||||||
|
with open(file_path, "a+", encoding="utf-8") as f:
|
||||||
|
f.write(file_content)
|
@ -5,22 +5,17 @@ functionality:
|
|||||||
- backup and restore metadata
|
- backup and restore metadata
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
from home.src.es.backup import ElasticBackup
|
||||||
import os
|
from home.src.es.connect import ElasticWrap
|
||||||
import zipfile
|
from home.src.es.snapshot import ElasticSnapshot
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
|
||||||
from home.src.ta.config import AppConfig
|
from home.src.ta.config import AppConfig
|
||||||
from home.src.ta.helper import ignore_filelist
|
from home.src.ta.helper import get_mapping
|
||||||
|
|
||||||
|
|
||||||
class ElasticIndex:
|
class ElasticIndex:
|
||||||
"""
|
"""interact with a single index"""
|
||||||
handle mapping and settings on elastic search for a given index
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, index_name, expected_map, expected_set):
|
def __init__(self, index_name, expected_map=False, expected_set=False):
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
self.expected_map = expected_map
|
self.expected_map = expected_map
|
||||||
self.expected_set = expected_set
|
self.expected_set = expected_set
|
||||||
@ -61,23 +56,23 @@ class ElasticIndex:
|
|||||||
if list(value.keys()) == ["properties"]:
|
if list(value.keys()) == ["properties"]:
|
||||||
for key_n, value_n in value["properties"].items():
|
for key_n, value_n in value["properties"].items():
|
||||||
if key not in now_map:
|
if key not in now_map:
|
||||||
print(key_n, value_n)
|
print(f"detected mapping change: {key_n}, {value_n}")
|
||||||
return True
|
return True
|
||||||
if key_n not in now_map[key]["properties"].keys():
|
if key_n not in now_map[key]["properties"].keys():
|
||||||
print(key_n, value_n)
|
print(f"detected mapping change: {key_n}, {value_n}")
|
||||||
return True
|
return True
|
||||||
if not value_n == now_map[key]["properties"][key_n]:
|
if not value_n == now_map[key]["properties"][key_n]:
|
||||||
print(key_n, value_n)
|
print(f"detected mapping change: {key_n}, {value_n}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# not nested
|
# not nested
|
||||||
if key not in now_map.keys():
|
if key not in now_map.keys():
|
||||||
print(key, value)
|
print(f"detected mapping change: {key}, {value}")
|
||||||
return True
|
return True
|
||||||
if not value == now_map[key]:
|
if not value == now_map[key]:
|
||||||
print(key, value)
|
print(f"detected mapping change: {key}, {value}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
@ -100,6 +95,7 @@ class ElasticIndex:
|
|||||||
|
|
||||||
def rebuild_index(self):
|
def rebuild_index(self):
|
||||||
"""rebuild with new mapping"""
|
"""rebuild with new mapping"""
|
||||||
|
print(f"applying new mappings to index ta_{self.index_name}...")
|
||||||
self.create_blank(for_backup=True)
|
self.create_blank(for_backup=True)
|
||||||
self.reindex("backup")
|
self.reindex("backup")
|
||||||
self.delete_index(backup=False)
|
self.delete_index(backup=False)
|
||||||
@ -129,6 +125,7 @@ class ElasticIndex:
|
|||||||
|
|
||||||
def create_blank(self, for_backup=False):
|
def create_blank(self, for_backup=False):
|
||||||
"""apply new mapping and settings for blank new index"""
|
"""apply new mapping and settings for blank new index"""
|
||||||
|
print(f"create new blank index with name ta_{self.index_name}...")
|
||||||
path = f"ta_{self.index_name}"
|
path = f"ta_{self.index_name}"
|
||||||
if for_backup:
|
if for_backup:
|
||||||
path = f"{path}_backup"
|
path = f"{path}_backup"
|
||||||
@ -142,270 +139,72 @@ class ElasticIndex:
|
|||||||
_, _ = ElasticWrap(path).put(data)
|
_, _ = ElasticWrap(path).put(data)
|
||||||
|
|
||||||
|
|
||||||
class BackupCallback:
|
class ElasitIndexWrap:
|
||||||
"""handle backup ndjson writer as callback for IndexPaginate"""
|
"""interact with all index mapping and setup"""
|
||||||
|
|
||||||
def __init__(self, source, index_name):
|
def __init__(self):
|
||||||
self.source = source
|
self.index_config = get_mapping()
|
||||||
self.index_name = index_name
|
self.backup_run = False
|
||||||
self.timestamp = datetime.now().strftime("%Y%m%d")
|
|
||||||
|
|
||||||
def run(self):
|
def setup(self):
|
||||||
"""run the junk task"""
|
"""setup elastic index, run at startup"""
|
||||||
file_content = self._build_bulk()
|
for index in self.index_config:
|
||||||
self._write_es_json(file_content)
|
index_name, expected_map, expected_set = self._config_split(index)
|
||||||
|
|
||||||
def _build_bulk(self):
|
|
||||||
"""build bulk query data from all_results"""
|
|
||||||
bulk_list = []
|
|
||||||
|
|
||||||
for document in self.source:
|
|
||||||
document_id = document["_id"]
|
|
||||||
es_index = document["_index"]
|
|
||||||
action = {"index": {"_index": es_index, "_id": document_id}}
|
|
||||||
source = document["_source"]
|
|
||||||
bulk_list.append(json.dumps(action))
|
|
||||||
bulk_list.append(json.dumps(source))
|
|
||||||
|
|
||||||
# add last newline
|
|
||||||
bulk_list.append("\n")
|
|
||||||
file_content = "\n".join(bulk_list)
|
|
||||||
|
|
||||||
return file_content
|
|
||||||
|
|
||||||
def _write_es_json(self, file_content):
|
|
||||||
"""write nd-json file for es _bulk API to disk"""
|
|
||||||
cache_dir = AppConfig().config["application"]["cache_dir"]
|
|
||||||
file_name = f"es_{self.index_name.lstrip('ta_')}-{self.timestamp}.json"
|
|
||||||
file_path = os.path.join(cache_dir, "backup", file_name)
|
|
||||||
with open(file_path, "a+", encoding="utf-8") as f:
|
|
||||||
f.write(file_content)
|
|
||||||
|
|
||||||
|
|
||||||
class ElasticBackup:
|
|
||||||
"""dump index to nd-json files for later bulk import"""
|
|
||||||
|
|
||||||
def __init__(self, index_config, reason):
|
|
||||||
self.config = AppConfig().config
|
|
||||||
self.cache_dir = self.config["application"]["cache_dir"]
|
|
||||||
self.timestamp = datetime.now().strftime("%Y%m%d")
|
|
||||||
self.index_config = index_config
|
|
||||||
self.reason = reason
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def backup_index(index_name):
|
|
||||||
"""export all documents of a single index"""
|
|
||||||
data = {
|
|
||||||
"query": {"match_all": {}},
|
|
||||||
"sort": [{"_doc": {"order": "desc"}}],
|
|
||||||
}
|
|
||||||
paginate = IndexPaginate(
|
|
||||||
f"ta_{index_name}", data, keep_source=True, callback=BackupCallback
|
|
||||||
)
|
|
||||||
_ = paginate.get_results()
|
|
||||||
|
|
||||||
def zip_it(self):
|
|
||||||
"""pack it up into single zip file"""
|
|
||||||
file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip"
|
|
||||||
folder = os.path.join(self.cache_dir, "backup")
|
|
||||||
|
|
||||||
to_backup = []
|
|
||||||
for file in os.listdir(folder):
|
|
||||||
if file.endswith(".json"):
|
|
||||||
to_backup.append(os.path.join(folder, file))
|
|
||||||
|
|
||||||
backup_file = os.path.join(folder, file_name)
|
|
||||||
|
|
||||||
comp = zipfile.ZIP_DEFLATED
|
|
||||||
with zipfile.ZipFile(backup_file, "w", compression=comp) as zip_f:
|
|
||||||
for backup_file in to_backup:
|
|
||||||
zip_f.write(backup_file, os.path.basename(backup_file))
|
|
||||||
|
|
||||||
# cleanup
|
|
||||||
for backup_file in to_backup:
|
|
||||||
os.remove(backup_file)
|
|
||||||
|
|
||||||
def post_bulk_restore(self, file_name):
|
|
||||||
"""send bulk to es"""
|
|
||||||
file_path = os.path.join(self.cache_dir, file_name)
|
|
||||||
with open(file_path, "r", encoding="utf-8") as f:
|
|
||||||
data = f.read()
|
|
||||||
|
|
||||||
if not data.strip():
|
|
||||||
return
|
|
||||||
|
|
||||||
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
|
||||||
|
|
||||||
def get_all_backup_files(self):
|
|
||||||
"""build all available backup files for view"""
|
|
||||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
|
||||||
backup_files = os.listdir(backup_dir)
|
|
||||||
all_backup_files = ignore_filelist(backup_files)
|
|
||||||
all_available_backups = [
|
|
||||||
i
|
|
||||||
for i in all_backup_files
|
|
||||||
if i.startswith("ta_") and i.endswith(".zip")
|
|
||||||
]
|
|
||||||
all_available_backups.sort(reverse=True)
|
|
||||||
|
|
||||||
backup_dicts = []
|
|
||||||
for backup_file in all_available_backups:
|
|
||||||
file_split = backup_file.split("-")
|
|
||||||
if len(file_split) == 2:
|
|
||||||
timestamp = file_split[1].strip(".zip")
|
|
||||||
reason = False
|
|
||||||
elif len(file_split) == 3:
|
|
||||||
timestamp = file_split[1]
|
|
||||||
reason = file_split[2].strip(".zip")
|
|
||||||
|
|
||||||
to_add = {
|
|
||||||
"filename": backup_file,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"reason": reason,
|
|
||||||
}
|
|
||||||
backup_dicts.append(to_add)
|
|
||||||
|
|
||||||
return backup_dicts
|
|
||||||
|
|
||||||
def unpack_zip_backup(self, filename):
|
|
||||||
"""extract backup zip and return filelist"""
|
|
||||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
|
||||||
file_path = os.path.join(backup_dir, filename)
|
|
||||||
|
|
||||||
with zipfile.ZipFile(file_path, "r") as z:
|
|
||||||
zip_content = z.namelist()
|
|
||||||
z.extractall(backup_dir)
|
|
||||||
|
|
||||||
return zip_content
|
|
||||||
|
|
||||||
def restore_json_files(self, zip_content):
|
|
||||||
"""go through the unpacked files and restore"""
|
|
||||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
|
||||||
|
|
||||||
for json_f in zip_content:
|
|
||||||
|
|
||||||
file_name = os.path.join(backup_dir, json_f)
|
|
||||||
|
|
||||||
if not json_f.startswith("es_") or not json_f.endswith(".json"):
|
|
||||||
os.remove(file_name)
|
|
||||||
continue
|
|
||||||
|
|
||||||
print("restoring: " + json_f)
|
|
||||||
self.post_bulk_restore(file_name)
|
|
||||||
os.remove(file_name)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def index_exists(index_name):
|
|
||||||
"""check if index already exists to skip"""
|
|
||||||
_, status_code = ElasticWrap(f"ta_{index_name}").get()
|
|
||||||
exists = status_code == 200
|
|
||||||
|
|
||||||
return exists
|
|
||||||
|
|
||||||
def rotate_backup(self):
|
|
||||||
"""delete old backups if needed"""
|
|
||||||
rotate = self.config["scheduler"]["run_backup_rotate"]
|
|
||||||
if not rotate:
|
|
||||||
return
|
|
||||||
|
|
||||||
all_backup_files = self.get_all_backup_files()
|
|
||||||
auto = [i for i in all_backup_files if i["reason"] == "auto"]
|
|
||||||
|
|
||||||
if len(auto) <= rotate:
|
|
||||||
print("no backup files to rotate")
|
|
||||||
return
|
|
||||||
|
|
||||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
|
||||||
|
|
||||||
all_to_delete = auto[rotate:]
|
|
||||||
for to_delete in all_to_delete:
|
|
||||||
file_path = os.path.join(backup_dir, to_delete["filename"])
|
|
||||||
print(f"remove old backup file: {file_path}")
|
|
||||||
os.remove(file_path)
|
|
||||||
|
|
||||||
|
|
||||||
def get_mapping():
|
|
||||||
"""read index_mapping.json and get expected mapping and settings"""
|
|
||||||
with open("home/src/es/index_mapping.json", "r", encoding="utf-8") as f:
|
|
||||||
index_config = json.load(f).get("index_config")
|
|
||||||
|
|
||||||
return index_config
|
|
||||||
|
|
||||||
|
|
||||||
def index_check(force_restore=False):
|
|
||||||
"""check if all indexes are created and have correct mapping"""
|
|
||||||
|
|
||||||
backed_up = False
|
|
||||||
index_config = get_mapping()
|
|
||||||
|
|
||||||
for index in index_config:
|
|
||||||
index_name = index["index_name"]
|
|
||||||
expected_map = index["expected_map"]
|
|
||||||
expected_set = index["expected_set"]
|
|
||||||
handler = ElasticIndex(index_name, expected_map, expected_set)
|
handler = ElasticIndex(index_name, expected_map, expected_set)
|
||||||
# force restore
|
|
||||||
if force_restore:
|
|
||||||
handler.delete_index(backup=False)
|
|
||||||
handler.create_blank()
|
|
||||||
continue
|
|
||||||
|
|
||||||
# create new
|
|
||||||
if not handler.exists:
|
if not handler.exists:
|
||||||
print(f"create new blank index with name ta_{index_name}...")
|
|
||||||
handler.create_blank()
|
handler.create_blank()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# validate index
|
|
||||||
rebuild = handler.validate()
|
rebuild = handler.validate()
|
||||||
if rebuild:
|
if rebuild:
|
||||||
# make backup before rebuild
|
self._check_backup()
|
||||||
if not backed_up:
|
|
||||||
print("running backup first")
|
|
||||||
backup_all_indexes(reason="update")
|
|
||||||
backed_up = True
|
|
||||||
|
|
||||||
print(f"applying new mappings to index ta_{index_name}...")
|
|
||||||
handler.rebuild_index()
|
handler.rebuild_index()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# else all good
|
# else all good
|
||||||
print(f"ta_{index_name} index is created and up to date...")
|
print(f"ta_{index_name} index is created and up to date...")
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""reset all indexes to blank"""
|
||||||
|
self.delete_all()
|
||||||
|
self.create_all_blank()
|
||||||
|
|
||||||
def get_available_backups():
|
def delete_all(self):
|
||||||
"""return dict of available backups for settings view"""
|
"""delete all indexes"""
|
||||||
index_config = get_mapping()
|
print("reset elastic index")
|
||||||
backup_handler = ElasticBackup(index_config, reason=False)
|
for index in self.index_config:
|
||||||
all_backup_files = backup_handler.get_all_backup_files()
|
index_name, _, _ = self._config_split(index)
|
||||||
return all_backup_files
|
handler = ElasticIndex(index_name)
|
||||||
|
handler.delete_index(backup=False)
|
||||||
|
|
||||||
|
def create_all_blank(self):
|
||||||
|
"""create all blank indexes"""
|
||||||
|
print("create all new indexes in elastic from template")
|
||||||
|
for index in self.index_config:
|
||||||
|
index_name, expected_map, expected_set = self._config_split(index)
|
||||||
|
handler = ElasticIndex(index_name, expected_map, expected_set)
|
||||||
|
handler.create_blank()
|
||||||
|
|
||||||
def backup_all_indexes(reason):
|
@staticmethod
|
||||||
"""backup all es indexes to disk"""
|
def _config_split(index):
|
||||||
index_config = get_mapping()
|
"""split index config keys"""
|
||||||
backup_handler = ElasticBackup(index_config, reason)
|
|
||||||
|
|
||||||
for index in backup_handler.index_config:
|
|
||||||
index_name = index["index_name"]
|
index_name = index["index_name"]
|
||||||
print(f"backup: export in progress for {index_name}")
|
expected_map = index["expected_map"]
|
||||||
if not backup_handler.index_exists(index_name):
|
expected_set = index["expected_set"]
|
||||||
print(f"skip backup for not yet existing index {index_name}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
backup_handler.backup_index(index_name)
|
return index_name, expected_map, expected_set
|
||||||
|
|
||||||
backup_handler.zip_it()
|
def _check_backup(self):
|
||||||
|
"""create backup if needed"""
|
||||||
|
if self.backup_run:
|
||||||
|
return
|
||||||
|
|
||||||
if reason == "auto":
|
config = AppConfig().config
|
||||||
backup_handler.rotate_backup()
|
if config["application"]["enable_snapshot"]:
|
||||||
|
# take snapshot if enabled
|
||||||
|
ElasticSnapshot().take_snapshot_now(wait=True)
|
||||||
|
else:
|
||||||
|
# fallback to json backup
|
||||||
|
ElasticBackup(reason="update").backup_all_indexes()
|
||||||
|
|
||||||
|
self.backup_run = True
|
||||||
def restore_from_backup(filename):
|
|
||||||
"""restore indexes from backup file"""
|
|
||||||
# delete
|
|
||||||
index_check(force_restore=True)
|
|
||||||
# recreate
|
|
||||||
index_config = get_mapping()
|
|
||||||
backup_handler = ElasticBackup(index_config, reason=False)
|
|
||||||
zip_content = backup_handler.unpack_zip_backup(filename)
|
|
||||||
backup_handler.restore_json_files(zip_content)
|
|
||||||
|
@ -5,10 +5,11 @@ functionality:
|
|||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from os import environ
|
from os import environ
|
||||||
|
from time import sleep
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
from home.src.es.connect import ElasticWrap
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.es.index_setup import get_mapping
|
from home.src.ta.helper import get_mapping
|
||||||
|
|
||||||
|
|
||||||
class ElasticSnapshot:
|
class ElasticSnapshot:
|
||||||
@ -142,15 +143,42 @@ class ElasticSnapshot:
|
|||||||
print("snapshot: last snapshot is up-to-date")
|
print("snapshot: last snapshot is up-to-date")
|
||||||
return outdated
|
return outdated
|
||||||
|
|
||||||
def take_snapshot_now(self):
|
def take_snapshot_now(self, wait=False):
|
||||||
"""execute daily snapshot now"""
|
"""execute daily snapshot now"""
|
||||||
path = f"_slm/policy/{self.POLICY}/_execute"
|
path = f"_slm/policy/{self.POLICY}/_execute"
|
||||||
response, statuscode = ElasticWrap(path).post()
|
response, statuscode = ElasticWrap(path).post()
|
||||||
if statuscode == 200:
|
if statuscode == 200:
|
||||||
print(f"snapshot: executing now: {response}")
|
print(f"snapshot: executing now: {response}")
|
||||||
|
|
||||||
|
if wait:
|
||||||
|
self._wait_for_snapshot(response["snapshot_name"])
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def _wait_for_snapshot(self, snapshot_name):
|
||||||
|
"""return after snapshot_name completes"""
|
||||||
|
path = f"_snapshot/{self.REPO}/{snapshot_name}"
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# wait for task to be created
|
||||||
|
sleep(1)
|
||||||
|
_, statuscode = ElasticWrap(path).get()
|
||||||
|
if statuscode == 200:
|
||||||
|
break
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# wait for snapshot success
|
||||||
|
response, statuscode = ElasticWrap(path).get()
|
||||||
|
snapshot_state = response["snapshots"][0]["state"]
|
||||||
|
if snapshot_state == "SUCCESS":
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"snapshot: {snapshot_name} in state {snapshot_state}")
|
||||||
|
print("snapshot: wait to complete")
|
||||||
|
sleep(5)
|
||||||
|
|
||||||
|
print(f"snapshot: completed - {response}")
|
||||||
|
|
||||||
def get_snapshot_stats(self):
|
def get_snapshot_stats(self):
|
||||||
"""get snapshot info for frontend"""
|
"""get snapshot info for frontend"""
|
||||||
snapshot_info = self._build_policy_details()
|
snapshot_info = self._build_policy_details()
|
||||||
|
@ -421,6 +421,7 @@ class YoutubeChannel(YouTubeItem):
|
|||||||
to_write[key] = False
|
to_write[key] = False
|
||||||
continue
|
continue
|
||||||
if value in [0, "0"]:
|
if value in [0, "0"]:
|
||||||
|
if key in to_write:
|
||||||
del to_write[key]
|
del to_write[key]
|
||||||
continue
|
continue
|
||||||
if value == "1":
|
if value == "1":
|
||||||
|
@ -33,7 +33,10 @@ class Comments:
|
|||||||
|
|
||||||
self._send_notification(notify)
|
self._send_notification(notify)
|
||||||
comments_raw, channel_id = self.get_yt_comments()
|
comments_raw, channel_id = self.get_yt_comments()
|
||||||
|
if comments_raw:
|
||||||
self.format_comments(comments_raw)
|
self.format_comments(comments_raw)
|
||||||
|
else:
|
||||||
|
self.comments_format = []
|
||||||
|
|
||||||
self.json_data = {
|
self.json_data = {
|
||||||
"youtube_id": self.youtube_id,
|
"youtube_id": self.youtube_id,
|
||||||
|
@ -80,12 +80,12 @@ class YoutubePlaylist(YouTubeItem):
|
|||||||
downloaded = entry["id"] in self.all_youtube_ids
|
downloaded = entry["id"] in self.all_youtube_ids
|
||||||
else:
|
else:
|
||||||
downloaded = False
|
downloaded = False
|
||||||
if not entry["uploader"]:
|
if not entry["channel"]:
|
||||||
continue
|
continue
|
||||||
to_append = {
|
to_append = {
|
||||||
"youtube_id": entry["id"],
|
"youtube_id": entry["id"],
|
||||||
"title": entry["title"],
|
"title": entry["title"],
|
||||||
"uploader": entry["uploader"],
|
"uploader": entry["channel"],
|
||||||
"idx": idx,
|
"idx": idx,
|
||||||
"downloaded": downloaded,
|
"downloaded": downloaded,
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@ Loose collection of helper functions
|
|||||||
- don't import AppConfig class here to avoid circular imports
|
- don't import AppConfig class here to avoid circular imports
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
@ -117,6 +119,23 @@ def time_parser(timestamp):
|
|||||||
return int(hours) * 60 * 60 + int(minutes) * 60 + float(seconds)
|
return int(hours) * 60 * 60 + int(minutes) * 60 + float(seconds)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_dl_cache(config):
|
||||||
|
"""clear leftover files from dl cache"""
|
||||||
|
print("clear download cache")
|
||||||
|
cache_dir = os.path.join(config["application"]["cache_dir"], "download")
|
||||||
|
for cached in os.listdir(cache_dir):
|
||||||
|
to_delete = os.path.join(cache_dir, cached)
|
||||||
|
os.remove(to_delete)
|
||||||
|
|
||||||
|
|
||||||
|
def get_mapping():
|
||||||
|
"""read index_mapping.json and get expected mapping and settings"""
|
||||||
|
with open("home/src/es/index_mapping.json", "r", encoding="utf-8") as f:
|
||||||
|
index_config = json.load(f).get("index_config")
|
||||||
|
|
||||||
|
return index_config
|
||||||
|
|
||||||
|
|
||||||
class UrlListParser:
|
class UrlListParser:
|
||||||
"""take a multi line string and detect valid youtube ids"""
|
"""take a multi line string and detect valid youtube ids"""
|
||||||
|
|
||||||
|
@ -17,7 +17,8 @@ from home.src.download.subscriptions import (
|
|||||||
)
|
)
|
||||||
from home.src.download.thumbnails import ThumbFilesystem, ThumbValidator
|
from home.src.download.thumbnails import ThumbFilesystem, ThumbValidator
|
||||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||||
from home.src.es.index_setup import backup_all_indexes, restore_from_backup
|
from home.src.es.backup import ElasticBackup
|
||||||
|
from home.src.es.index_setup import ElasitIndexWrap
|
||||||
from home.src.index.channel import YoutubeChannel
|
from home.src.index.channel import YoutubeChannel
|
||||||
from home.src.index.filesystem import (
|
from home.src.index.filesystem import (
|
||||||
ImportFolderScanner,
|
ImportFolderScanner,
|
||||||
@ -25,7 +26,7 @@ from home.src.index.filesystem import (
|
|||||||
scan_filesystem,
|
scan_filesystem,
|
||||||
)
|
)
|
||||||
from home.src.ta.config import AppConfig, ScheduleBuilder
|
from home.src.ta.config import AppConfig, ScheduleBuilder
|
||||||
from home.src.ta.helper import UrlListParser
|
from home.src.ta.helper import UrlListParser, clear_dl_cache
|
||||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
@ -168,7 +169,7 @@ def run_backup(reason="auto"):
|
|||||||
try:
|
try:
|
||||||
have_lock = my_lock.acquire(blocking=False)
|
have_lock = my_lock.acquire(blocking=False)
|
||||||
if have_lock:
|
if have_lock:
|
||||||
backup_all_indexes(reason)
|
ElasticBackup(reason=reason).backup_all_indexes()
|
||||||
else:
|
else:
|
||||||
print("Did not acquire lock for backup task.")
|
print("Did not acquire lock for backup task.")
|
||||||
finally:
|
finally:
|
||||||
@ -180,7 +181,8 @@ def run_backup(reason="auto"):
|
|||||||
@shared_task
|
@shared_task
|
||||||
def run_restore_backup(filename):
|
def run_restore_backup(filename):
|
||||||
"""called from settings page, dump backup to zip file"""
|
"""called from settings page, dump backup to zip file"""
|
||||||
restore_from_backup(filename)
|
ElasitIndexWrap().reset()
|
||||||
|
ElasticBackup().restore(filename)
|
||||||
print("index restore finished")
|
print("index restore finished")
|
||||||
|
|
||||||
|
|
||||||
@ -192,11 +194,7 @@ def kill_dl(task_id):
|
|||||||
_ = RedisArchivist().del_message("dl_queue_id")
|
_ = RedisArchivist().del_message("dl_queue_id")
|
||||||
RedisQueue().clear()
|
RedisQueue().clear()
|
||||||
|
|
||||||
# clear cache
|
clear_dl_cache(CONFIG)
|
||||||
cache_dir = os.path.join(CONFIG["application"]["cache_dir"], "download")
|
|
||||||
for cached in os.listdir(cache_dir):
|
|
||||||
to_delete = os.path.join(cache_dir, cached)
|
|
||||||
os.remove(to_delete)
|
|
||||||
|
|
||||||
# notify
|
# notify
|
||||||
mess_dict = {
|
mess_dict = {
|
||||||
|
@ -16,8 +16,8 @@ from django.http import JsonResponse
|
|||||||
from django.shortcuts import redirect, render
|
from django.shortcuts import redirect, render
|
||||||
from django.views import View
|
from django.views import View
|
||||||
from home.src.download.yt_dlp_base import CookieHandler
|
from home.src.download.yt_dlp_base import CookieHandler
|
||||||
|
from home.src.es.backup import ElasticBackup
|
||||||
from home.src.es.connect import ElasticWrap
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.es.index_setup import get_available_backups
|
|
||||||
from home.src.es.snapshot import ElasticSnapshot
|
from home.src.es.snapshot import ElasticSnapshot
|
||||||
from home.src.frontend.api_calls import PostData
|
from home.src.frontend.api_calls import PostData
|
||||||
from home.src.frontend.forms import (
|
from home.src.frontend.forms import (
|
||||||
@ -939,7 +939,7 @@ class SettingsView(View):
|
|||||||
config_handler = AppConfig(request.user.id)
|
config_handler = AppConfig(request.user.id)
|
||||||
colors = config_handler.colors
|
colors = config_handler.colors
|
||||||
|
|
||||||
available_backups = get_available_backups()
|
available_backups = ElasticBackup().get_all_backup_files()
|
||||||
user_form = UserSettingsForm()
|
user_form = UserSettingsForm()
|
||||||
app_form = ApplicationSettingsForm()
|
app_form = ApplicationSettingsForm()
|
||||||
scheduler_form = SchedulerSettingsForm()
|
scheduler_form = SchedulerSettingsForm()
|
||||||
|
@ -5,9 +5,9 @@ django-auth-ldap==4.1.0
|
|||||||
django-cors-headers==3.13.0
|
django-cors-headers==3.13.0
|
||||||
djangorestframework==3.14.0
|
djangorestframework==3.14.0
|
||||||
Pillow==9.3.0
|
Pillow==9.3.0
|
||||||
redis==4.3.4
|
redis==4.3.5
|
||||||
requests==2.28.1
|
requests==2.28.1
|
||||||
ryd-client==0.0.6
|
ryd-client==0.0.6
|
||||||
uWSGI==2.0.21
|
uWSGI==2.0.21
|
||||||
whitenoise==6.2.0
|
whitenoise==6.2.0
|
||||||
yt_dlp==2022.10.4
|
yt_dlp==2022.11.11
|
||||||
|
@ -841,6 +841,10 @@ video:-webkit-full-screen {
|
|||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.comment-box h3 {
|
||||||
|
line-break: anywhere;
|
||||||
|
}
|
||||||
|
|
||||||
.comments-replies {
|
.comments-replies {
|
||||||
display: none;
|
display: none;
|
||||||
padding-left: 1rem;
|
padding-left: 1rem;
|
||||||
|
Loading…
Reference in New Issue
Block a user