From ef0d49089061cfe242fdbaf61423e64b0eeed098 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jul 2023 15:56:59 +0700 Subject: [PATCH 1/8] [API] change dates to ISO format --- tubearchivist/home/src/ta/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index 6016d3c..b193640 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -91,7 +91,7 @@ def date_praser(timestamp: int | str) -> str: elif isinstance(timestamp, str): date_obj = datetime.strptime(timestamp, "%Y-%m-%d") - return datetime.strftime(date_obj, "%d %b, %Y") + return date_obj.date().isoformat() def time_parser(timestamp: str) -> float: From f970ec867ed00cd72fc4299b27e1b844bf22390d Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jul 2023 16:10:52 +0700 Subject: [PATCH 2/8] clarify subtitle language codes on settings page, #505 --- tubearchivist/home/templates/home/settings.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 25228dd..6805ba8 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -128,8 +128,8 @@

Subtitles

Subtitles download setting: {{ config.downloads.subtitle }}
- Choose which subtitles to download, add comma separated two letter language ISO code,
- e.g. en, de

+ Choose which subtitles to download, add comma separated language codes,
+ e.g. en, de, zh-Hans

{{ app_form.downloads_subtitle }}

From 6f915a57338c71dc2f9db4d3994455c88d7f41e4 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jul 2023 16:34:39 +0700 Subject: [PATCH 3/8] fix channel tv art extraction --- tubearchivist/home/src/index/channel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 4203c3d..1907062 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -92,8 +92,9 @@ class YoutubeChannel(YouTubeItem): def _get_tv_art(self): """extract tv artwork""" for i in self.youtube_meta["thumbnails"]: - if i.get("id") == "avatar_uncropped": + if i.get("id") == "banner_uncropped": return i["url"] + for i in self.youtube_meta["thumbnails"]: if not i.get("width"): continue if i["width"] // i["height"] < 2 and not i["width"] == i["height"]: From ab8fed14bb8cfbe36342007e25af407b7d0e5349 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jul 2023 17:56:28 +0700 Subject: [PATCH 4/8] handle edge cases in migration clean up --- tubearchivist/config/management/commands/ta_migpath.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tubearchivist/config/management/commands/ta_migpath.py b/tubearchivist/config/management/commands/ta_migpath.py index f407800..9fe6662 100644 --- a/tubearchivist/config/management/commands/ta_migpath.py +++ b/tubearchivist/config/management/commands/ta_migpath.py @@ -2,6 +2,7 @@ import json import os +import shutil from django.core.management.base import BaseCommand from home.src.es.connect import ElasticWrap, IndexPaginate @@ -167,5 +168,8 @@ class FolderMigration: all_folders = ignore_filelist(os.listdir(self.videos)) for folder in all_folders: folder_path = os.path.join(self.videos, folder) + if not os.path.isdir(folder_path): + continue + if not ignore_filelist(os.listdir(folder_path)): - os.rmdir(folder_path) + shutil.rmtree(folder_path) From aea403a874b1c9d648899b351fb91b5af22aea73 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jul 2023 18:19:22 +0700 Subject: [PATCH 5/8] additional thumb dl error handling --- tubearchivist/home/src/download/thumbnails.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index 88b6558..95461e8 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -54,7 +54,10 @@ class ThumbManagerBase: if response.status_code == 404: return self.get_fallback() - except requests.exceptions.RequestException: + except ( + requests.exceptions.RequestException, + requests.exceptions.ReadTimeout, + ): print(f"{self.item_id}: retry thumbnail download {url}") sleep((i + 1) ** i) From 75a63c48283e1254dba02b8ebfbc06b8418d9a2b Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 29 Jul 2023 17:05:01 +0700 Subject: [PATCH 6/8] paginate channel migration index update --- .../config/management/commands/ta_migpath.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tubearchivist/config/management/commands/ta_migpath.py b/tubearchivist/config/management/commands/ta_migpath.py index 9fe6662..607c58e 100644 --- a/tubearchivist/config/management/commands/ta_migpath.py +++ b/tubearchivist/config/management/commands/ta_migpath.py @@ -98,7 +98,8 @@ class FolderMigration: def migrate_videos(self, to_migrate): """migrate all videos of channel""" - for video in to_migrate: + total = len(to_migrate) + for idx, video in enumerate(to_migrate): new_media_url = self._move_video_file(video) if not new_media_url: continue @@ -113,6 +114,9 @@ class FolderMigration: self.bulk_list.append(json.dumps(action)) self.bulk_list.append(json.dumps(source)) + if idx % 1000 == 0: + print(f"processing migration [{idx}/{total}]") + self.send_bulk() def _move_video_file(self, video): """move video file to new location""" @@ -158,11 +162,14 @@ class FolderMigration: return self.bulk_list.append("\n") + path = "_bulk?refresh=true" data = "\n".join(self.bulk_list) - response, status = ElasticWrap("_bulk").post(data=data, ndjson=True) + response, status = ElasticWrap(path).post(data=data, ndjson=True) if not status == 200: print(response) + self.bulk_list = [] + def delete_old(self): """delete old empty folders""" all_folders = ignore_filelist(os.listdir(self.videos)) From 4473e9c5b20b4258df02081d9c5de1df3ce1cd7f Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 29 Jul 2023 23:41:54 +0700 Subject: [PATCH 7/8] add apprise notifications --- tubearchivist/home/config.json | 3 + .../home/src/download/yt_dlp_handler.py | 2 + tubearchivist/home/src/frontend/forms.py | 32 +++++++++++ tubearchivist/home/src/index/reindex.py | 23 +++++++- tubearchivist/home/src/ta/config.py | 12 ++++ tubearchivist/home/src/ta/notify.py | 55 +++++++++++++++++++ tubearchivist/home/tasks.py | 28 ++++++++-- .../home/templates/home/settings.html | 15 +++++ tubearchivist/requirements.txt | 1 + 9 files changed, 165 insertions(+), 6 deletions(-) create mode 100644 tubearchivist/home/src/ta/notify.py diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index 2a30ced..d45a2a1 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -47,8 +47,11 @@ }, "scheduler": { "update_subscribed": false, + "update_subscribed_notify": false, "download_pending": false, + "download_pending_notify": false, "check_reindex": {"minute": "0", "hour": "12", "day_of_week": "*"}, + "check_reindex_notify": false, "check_reindex_days": 90, "thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"}, "run_backup": false, diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index abd3a94..75aa792 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -191,6 +191,8 @@ class VideoDownloader: self._add_subscribed_channels() DownloadPostProcess(self).run() + return self.videos + def _notify(self, video_data, message): """send progress notification to task""" if not self.task: diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py index edde00c..fe5f5b2 100644 --- a/tubearchivist/home/src/frontend/forms.py +++ b/tubearchivist/home/src/frontend/forms.py @@ -157,9 +157,41 @@ class ApplicationSettingsForm(forms.Form): class SchedulerSettingsForm(forms.Form): """handle scheduler settings""" + HELP_TEXT = "Add Apprise notification URLs, one per line" + update_subscribed = forms.CharField(required=False) + update_subscribed_notify = forms.CharField( + label=False, + widget=forms.Textarea( + attrs={ + "rows": 2, + "placeholder": HELP_TEXT, + } + ), + required=False, + ) download_pending = forms.CharField(required=False) + download_pending_notify = forms.CharField( + label=False, + widget=forms.Textarea( + attrs={ + "rows": 2, + "placeholder": HELP_TEXT, + } + ), + required=False, + ) check_reindex = forms.CharField(required=False) + check_reindex_notify = forms.CharField( + label=False, + widget=forms.Textarea( + attrs={ + "rows": 2, + "placeholder": HELP_TEXT, + } + ), + required=False, + ) check_reindex_days = forms.IntegerField(required=False) thumbnail_check = forms.CharField(required=False) run_backup = forms.CharField(required=False) diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index 7c69b49..b89c00d 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -227,6 +227,11 @@ class Reindex(ReindexBase): super().__init__() self.task = task self.all_indexed_ids = False + self.processed = { + "videos": 0, + "channels": 0, + "playlists": 0, + } def reindex_all(self): """reindex all in queue""" @@ -316,6 +321,7 @@ class Reindex(ReindexBase): thumb_handler.download_video_thumb(video.json_data["vid_thumb_url"]) Comments(youtube_id, config=self.config).reindex_comments() + self.processed["videos"] += 1 return @@ -327,8 +333,7 @@ class Reindex(ReindexBase): new_path = os.path.join(videos, media_url_should) os.rename(old_path, new_path) - @staticmethod - def _reindex_single_channel(channel_id): + def _reindex_single_channel(self, channel_id): """refresh channel data and sync to videos""" # read current state channel = YoutubeChannel(channel_id) @@ -354,6 +359,7 @@ class Reindex(ReindexBase): channel.upload_to_es() ChannelFullScan(channel_id).scan() + self.processed["channels"] += 1 def _reindex_single_playlist(self, playlist_id): """refresh playlist data""" @@ -369,6 +375,7 @@ class Reindex(ReindexBase): playlist.json_data["playlist_subscribed"] = subscribed playlist.upload_to_es() + self.processed["playlists"] += 1 return def _get_all_videos(self): @@ -390,6 +397,18 @@ class Reindex(ReindexBase): valid = CookieHandler(self.config).validate() return valid + def build_message(self): + """build progress message""" + message = "" + for key, value in self.processed.items(): + if value: + message = message + f"{value} {key}, " + + if message: + message = f"reindexed {message.rstrip(', ')}" + + return message + class ReindexProgress(ReindexBase): """ diff --git a/tubearchivist/home/src/ta/config.py b/tubearchivist/home/src/ta/config.py index 671602c..d1cccce 100644 --- a/tubearchivist/home/src/ta/config.py +++ b/tubearchivist/home/src/ta/config.py @@ -184,6 +184,11 @@ class ScheduleBuilder: "version_check": "0 11 *", } CONFIG = ["check_reindex_days", "run_backup_rotate"] + NOTIFY = [ + "update_subscribed_notify", + "download_pending_notify", + "check_reindex_notify", + ] MSG = "message:setting" def __init__(self): @@ -213,6 +218,13 @@ class ScheduleBuilder: redis_config["scheduler"][key] = to_write if key in self.CONFIG and value: redis_config["scheduler"][key] = int(value) + if key in self.NOTIFY and value: + if value == "0": + to_write = False + else: + to_write = value + redis_config["scheduler"][key] = to_write + RedisArchivist().set_message("config", redis_config) mess_dict = { "status": self.MSG, diff --git a/tubearchivist/home/src/ta/notify.py b/tubearchivist/home/src/ta/notify.py new file mode 100644 index 0000000..d882128 --- /dev/null +++ b/tubearchivist/home/src/ta/notify.py @@ -0,0 +1,55 @@ +"""send notifications using apprise""" + +import apprise +from home.src.ta.config import AppConfig +from home.src.ta.task_manager import TaskManager + + +class Notifications: + """notification handler""" + + def __init__(self, name, task_id, task_title): + self.name = name + self.task_id = task_id + self.task_title = task_title + + def send(self): + """send notifications""" + apobj = apprise.Apprise() + hooks: str | None = self.get_url() + if not hooks: + return + + hook_list: list[str] = self.parse_hooks(hooks=hooks) + title, body = self.build_message() + + if not body: + return + + for hook in hook_list: + apobj.add(hook) + + apobj.notify(body=body, title=title) + + def get_url(self) -> str | None: + """get apprise urls for task""" + config = AppConfig().config + hooks: str = config["scheduler"].get(f"{self.name}_notify") + + return hooks + + def parse_hooks(self, hooks: str) -> list[str]: + """create list of hooks""" + + hook_list: list[str] = [i.strip() for i in hooks.split()] + + return hook_list + + def build_message(self) -> tuple[str, str | None]: + """build message to send notification""" + task = TaskManager().get_task(self.task_id) + status = task.get("status") + title: str = f"[TA] {self.task_title} process ended with {status}" + body: str | None = task.get("result") + + return title, body diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 76bc888..5ce1d62 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -23,6 +23,7 @@ from home.src.index.filesystem import Scanner from home.src.index.manual import ImportFolderScanner from home.src.index.reindex import Reindex, ReindexManual, ReindexPopulate from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder +from home.src.ta.notify import Notifications from home.src.ta.ta_redis import RedisArchivist from home.src.ta.task_manager import TaskManager from home.src.ta.urlparser import Parser @@ -130,6 +131,12 @@ class BaseTask(Task): message.update({"messages": ["New task received."]}) RedisArchivist().set_message(key, message) + def after_return(self, status, retval, task_id, args, kwargs, einfo): + """callback after task returns""" + print(f"{task_id} return callback") + task_title = self.TASK_CONFIG.get(self.name).get("title") + Notifications(self.name, task_id, task_title).send() + def send_progress(self, message_lines, progress=False, title=False): """send progress message""" message, key = self._build_message() @@ -169,7 +176,7 @@ def update_subscribed(self): if manager.is_pending(self): print(f"[task][{self.name}] rescan already running") self.send_progress("Rescan already in progress.") - return + return None manager.init(self) handler = SubscriptionScanner(task=self) @@ -178,6 +185,10 @@ def update_subscribed(self): if missing_videos: print(missing_videos) extrac_dl.delay(missing_videos, auto_start=auto_start) + message = f"Found {len(missing_videos)} videos to add to the queue." + return message + + return None @shared_task(name="download_pending", bind=True, base=BaseTask) @@ -187,10 +198,16 @@ def download_pending(self, auto_only=False): if manager.is_pending(self): print(f"[task][{self.name}] download queue already running") self.send_progress("Download Queue is already running.") - return + return None manager.init(self) - VideoDownloader(task=self).run_queue(auto_only=auto_only) + downloader = VideoDownloader(task=self) + videos_downloaded = downloader.run_queue(auto_only=auto_only) + + if videos_downloaded: + return f"downloaded {len(videos_downloaded)} videos." + + return None @shared_task(name="extract_download", bind=True, base=BaseTask) @@ -235,7 +252,10 @@ def check_reindex(self, data=False, extract_videos=False): self.send_progress("Add outdated documents to the reindex Queue.") populate.add_outdated() - Reindex(task=self).reindex_all() + handler = Reindex(task=self) + handler.reindex_all() + + return handler.build_message() @shared_task(bind=True, name="manual_import", base=BaseTask) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 6805ba8..73a0bc3 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -250,6 +250,11 @@

Periodically rescan your subscriptions:

{{ scheduler_form.update_subscribed }}
+
+

Send notification on task completed:

+

Current notification urls: {{ config.scheduler.update_subscribed_notify }}

+ {{ scheduler_form.update_subscribed_notify }} +

Start download

@@ -266,6 +271,11 @@

Automatic video download schedule:

{{ scheduler_form.download_pending }}
+
+

Send notification on task completed:

+

Current notification urls: {{ config.scheduler.download_pending_notify }}

+ {{ scheduler_form.download_pending_notify }} +

Refresh Metadata

@@ -287,6 +297,11 @@

Refresh older than x days, recommended 90:

{{ scheduler_form.check_reindex_days }}
+
+

Send notification on task completed:

+

Current notification urls: {{ config.scheduler.check_reindex_notify }}

+ {{ scheduler_form.check_reindex_notify }} +

Thumbnail check

diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index f251cff..9933c48 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,3 +1,4 @@ +apprise==1.4.5 celery==5.3.1 Django==4.2.3 django-auth-ldap==4.4.0 From c395a949cc4ae8d956c709bbd1eaff0851f3b9ed Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 30 Jul 2023 00:13:15 +0700 Subject: [PATCH 8/8] add localhost and tubearchivist to default trusted --- tubearchivist/home/src/ta/helper.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index b193640..eb924b3 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -138,8 +138,14 @@ def is_shorts(youtube_id: str) -> bool: def ta_host_parser(ta_host: str) -> tuple[list[str], list[str]]: """parse ta_host env var for ALLOWED_HOSTS and CSRF_TRUSTED_ORIGINS""" - allowed_hosts: list[str] = [] - csrf_trusted_origins: list[str] = [] + allowed_hosts: list[str] = [ + "localhost", + "tubearchivist", + ] + csrf_trusted_origins: list[str] = [ + "http://localhost", + "http://tubearchivist", + ] for host in ta_host.split(): host_clean = host.strip() if not host_clean.startswith("http"):