From 94295cdbd45859e6df3e0e44b939ec08f528ac8f Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Dec 2023 10:41:10 +0700 Subject: [PATCH 1/6] add type hints to ReleaseVersion --- tubearchivist/home/src/ta/config.py | 35 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tubearchivist/home/src/ta/config.py b/tubearchivist/home/src/ta/config.py index 61338f82..39819322 100644 --- a/tubearchivist/home/src/ta/config.py +++ b/tubearchivist/home/src/ta/config.py @@ -269,14 +269,13 @@ class ReleaseVersion: REMOTE_URL = "https://www.tubearchivist.com/api/release/latest/" NEW_KEY = "versioncheck:new" - def __init__(self): - self.local_version = settings.TA_VERSION - self.is_unstable = settings.TA_VERSION.endswith("-unstable") - self.remote_version = False - self.is_breaking = False - self.response = False + def __init__(self) -> None: + self.local_version: str = settings.TA_VERSION + self.is_unstable: bool = settings.TA_VERSION.endswith("-unstable") + self.remote_version: str = "" + self.is_breaking: bool = False - def check(self): + def check(self) -> None: """check version""" print(f"[{self.local_version}]: look for updates") self.get_remote_version() @@ -290,18 +289,18 @@ class ReleaseVersion: RedisArchivist().set_message(self.NEW_KEY, message) print(f"[{self.local_version}]: found new version {new_version}") - def get_local_version(self): + def get_local_version(self) -> str: """read version from local""" return self.local_version - def get_remote_version(self): + def get_remote_version(self) -> None: """read version from remote""" sleep(randint(0, 60)) - self.response = requests.get(self.REMOTE_URL, timeout=20).json() - self.remote_version = self.response["release_version"] - self.is_breaking = self.response["breaking_changes"] + response = requests.get(self.REMOTE_URL, timeout=20).json() + self.remote_version = response["release_version"] + self.is_breaking = response["breaking_changes"] - def _has_update(self): + def _has_update(self) -> str | bool: """check if there is an update""" remote_parsed = self._parse_version(self.remote_version) local_parsed = self._parse_version(self.local_version) @@ -314,12 +313,12 @@ class ReleaseVersion: return False @staticmethod - def _parse_version(version): + def _parse_version(version) -> tuple[int, ...]: """return version parts""" clean = version.rstrip("-unstable").lstrip("v") return tuple((int(i) for i in clean.split("."))) - def is_updated(self): + def is_updated(self) -> str | bool: """check if update happened in the mean time""" message = self.get_update() if not message: @@ -334,15 +333,15 @@ class ReleaseVersion: return False - def get_update(self): + def get_update(self) -> dict: """return new version dict if available""" message = RedisArchivist().get_message(self.NEW_KEY) if not message.get("status"): - return False + return {} return message - def clear_fail(self): + def clear_fail(self) -> None: """clear key, catch previous error in v0.4.5""" message = self.get_update() if not message: From 86fe31d2580097700e6ef55ba12363fc2994e4d0 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 25 Dec 2023 11:40:09 +0700 Subject: [PATCH 2/6] cleanup subtitles after deleting channels --- tubearchivist/home/src/index/channel.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 273efffb..3bfaec1e 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -199,6 +199,15 @@ class YoutubeChannel(YouTubeItem): } _, _ = ElasticWrap("ta_comment/_delete_by_query").post(data) + def delete_es_subtitles(self): + """delete all subtitles from this channel""" + data = { + "query": { + "term": {"subtitle_channel_id": {"value": self.youtube_id}} + } + } + _, _ = ElasticWrap("ta_subtitle/_delete_by_query").post(data) + def delete_playlists(self): """delete all indexed playlist from es""" all_playlists = self.get_indexed_playlists() @@ -229,6 +238,7 @@ class YoutubeChannel(YouTubeItem): print(f"{self.youtube_id}: delete indexed videos") self.delete_es_videos() self.delete_es_comments() + self.delete_es_subtitles() self.del_in_es() def index_channel_playlists(self): From 49659322a110377af666500cb976d376f4939b81 Mon Sep 17 00:00:00 2001 From: PhuriousGeorge Date: Sun, 14 Jan 2024 20:12:44 -0600 Subject: [PATCH 3/6] Limit worker lifespan - RAM useage mitigation (#644) Limit worker lifespan to save our precious RAM as discussed on [Discord](https://discord.com/channels/920056098122248193/1179480913701241002/1180026088802496512) Mitigates #500 though RAM usage can still ramp rather high before worker is culled --- docker_assets/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_assets/run.sh b/docker_assets/run.sh index 618b0f3d..b06d5b72 100644 --- a/docker_assets/run.sh +++ b/docker_assets/run.sh @@ -18,7 +18,7 @@ python manage.py ta_migpath # start all tasks nginx & -celery -A home.tasks worker --loglevel=INFO & +celery -A home.tasks worker --loglevel=INFO --max-tasks-per-child 10 & celery -A home beat --loglevel=INFO \ -s "${BEAT_SCHEDULE_PATH:-${cachedir}/celerybeat-schedule}" & uwsgi --ini uwsgi.ini From 4049a2a3c1dfc61bdfd3235145a8cdeeca0b727d Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Jan 2024 09:23:37 +0700 Subject: [PATCH 4/6] bump requirements --- tubearchivist/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index 4b93d0d5..a39d438a 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,13 +1,13 @@ -apprise==1.6.0 +apprise==1.7.1 celery==5.3.6 -Django==4.2.7 +Django==4.2.9 django-auth-ldap==4.6.0 django-cors-headers==4.3.1 djangorestframework==3.14.0 -Pillow==10.1.0 +Pillow==10.2.0 redis==5.0.0 requests==2.31.0 ryd-client==0.0.6 uWSGI==2.0.23 whitenoise==6.6.0 -yt-dlp @ git+https://github.com/yt-dlp/yt-dlp@6b5d93b0b0240e287389d1d43b2d5293e18aa4cc +yt-dlp==2023.12.30 From 65738ef52c22760860c60921aa0faf2155344e58 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Jan 2024 11:34:11 +0700 Subject: [PATCH 5/6] validate expected video ID with remote ID to avoid redirect --- tubearchivist/home/src/index/video.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index d06ce5e9..1b258aeb 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -177,6 +177,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): def _process_youtube_meta(self): """extract relevant fields from youtube""" + self._validate_id() # extract self.channel_id = self.youtube_meta["channel_id"] upload_date = self.youtube_meta["upload_date"] @@ -202,6 +203,19 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): "active": True, } + def _validate_id(self): + """validate expected video ID, raise value error on mismatch""" + remote_id = self.youtube_meta["id"] + + if not self.youtube_id == remote_id: + # unexpected redirect + message = ( + f"[reindex][{self.youtube_id}] got an unexpected redirect " + + f"to {remote_id}, you are probably getting blocked by YT. " + "See FAQ for more details." + ) + raise ValueError(message) + def _add_channel(self): """add channel dict to video json_data""" channel = ta_channel.YoutubeChannel(self.channel_id) From 18ba8086642018755c2d24ee04d3b790c1df6500 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Jan 2024 12:06:03 +0700 Subject: [PATCH 6/6] bump TA_VERSION unstable --- tubearchivist/config/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index 32577951..b58b9ac8 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -269,4 +269,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [ # TA application settings TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist" -TA_VERSION = "v0.4.5" +TA_VERSION = "v0.4.6-unstable"