diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 7ab8b47..f0923aa 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -26,13 +26,13 @@ class ApiBaseView(APIView): self.response = {"data": False} self.status_code = False self.context = False + self.default_conf = AppConfig().config def config_builder(self): """build confic context""" - default_conf = AppConfig().config self.context = { - "es_url": default_conf["application"]["es_url"], - "es_auth": default_conf["application"]["es_auth"], + "es_url": self.default_conf["application"]["es_url"], + "es_auth": self.default_conf["application"]["es_auth"], } def get_document(self, document_id): @@ -48,6 +48,19 @@ class ApiBaseView(APIView): self.response["data"] = False self.status_code = response.status_code + def process_keys(self): + """process keys for frontend""" + all_keys = self.response["data"].keys() + if "media_url" in all_keys: + media_url = self.response["data"]["media_url"] + self.response["data"]["media_url"] = f"/media/{media_url}" + if "vid_thumb_url" in all_keys: + youtube_id = self.response["data"]["youtube_id"] + vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id) + cache_dir = self.default_conf["application"]["cache_dir"] + new_thumb = f"{cache_dir}/{vid_thumb_url}" + self.response["data"]["vid_thumb_url"] = new_thumb + def get_paginate(self): """add pagination detail to response""" self.response["paginate"] = False @@ -75,6 +88,7 @@ class VideoApiView(ApiBaseView): """get request""" self.config_builder() self.get_document(video_id) + self.process_keys() return Response(self.response, status=self.status_code) diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index 82c76f4..8c4249a 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -23,6 +23,8 @@ "format": false, "add_metadata": false, "add_thumbnail": false, + "subtitle": false, + "subtitle_source": false, "throttledratelimit": false, "integrate_ryd": false }, diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 6d17a33..be71049 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -212,6 +212,9 @@ class VideoDownloader: host_uid = self.config["application"]["HOST_UID"] host_gid = self.config["application"]["HOST_GID"] channel_name = clean_string(vid_dict["channel"]["channel_name"]) + if len(channel_name) <= 3: + # fall back to channel id + channel_name = vid_dict["channel"]["channel_id"] # make archive folder with correct permissions new_folder = os.path.join(videos, channel_name) if not os.path.exists(new_folder): diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index db413fb..3272ed3 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -25,6 +25,10 @@ "type": "keyword", "index": false }, + "channel_tvart_url": { + "type": "keyword", + "index": false + }, "channel_thumb_url": { "type": "keyword", "index": false @@ -84,6 +88,10 @@ "type": "keyword", "index": false }, + "channel_tvart_url": { + "type": "keyword", + "index": false + }, "channel_thumb_url": { "type": "keyword", "index": false diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py index 86b1884..77648cf 100644 --- a/tubearchivist/home/src/frontend/forms.py +++ b/tubearchivist/home/src/frontend/forms.py @@ -68,6 +68,12 @@ class ApplicationSettingsForm(forms.Form): ("1", "enable Cast"), ] + SUBTITLE_SOURCE_CHOICES = [ + ("", "-- change subtitle source settings"), + ("auto", "also download auto generated"), + ("user", "only download uploader"), + ] + subscriptions_channel_size = forms.IntegerField(required=False) downloads_limit_count = forms.IntegerField(required=False) downloads_limit_speed = forms.IntegerField(required=False) @@ -81,6 +87,10 @@ class ApplicationSettingsForm(forms.Form): downloads_add_thumbnail = forms.ChoiceField( widget=forms.Select, choices=THUMBNAIL_CHOICES, required=False ) + downloads_subtitle = forms.CharField(required=False) + downloads_subtitle_source = forms.ChoiceField( + widget=forms.Select, choices=SUBTITLE_SOURCE_CHOICES, required=False + ) downloads_integrate_ryd = forms.ChoiceField( widget=forms.Select, choices=RYD_CHOICES, required=False ) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 50a0696..57a1c4e 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -198,6 +198,9 @@ class YoutubeChannel(YouTubeItem): """get folder where media files get stored""" channel_name = self.json_data["channel_name"] folder_name = clean_string(channel_name) + if len(folder_name) <= 3: + # fall back to channel id + folder_name = self.json_data["channel_id"] folder_path = os.path.join(self.app_conf["videos"], folder_name) return folder_path diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py index af96abf..054a882 100644 --- a/tubearchivist/home/src/index/generic.py +++ b/tubearchivist/home/src/index/generic.py @@ -122,6 +122,7 @@ class Pagination: "page_from": page_from, "prev_pages": prev_pages, "current_page": page_get, + "max_hits": False, } if self.search_get: pagination.update({"search_get": self.search_get}) @@ -131,6 +132,11 @@ class Pagination: """validate pagination with total_hits after making api call""" page_get = self.page_get max_pages = math.ceil(total_hits / self.page_size) + if total_hits > 10000: + # es returns maximal 10000 results + self.pagination["max_hits"] = True + max_pages = max_pages - 1 + if page_get < max_pages and max_pages > 1: self.pagination["last_page"] = max_pages else: diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 240a1b3..5b92198 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -14,7 +14,107 @@ from home.src.ta.helper import DurationConverter, clean_string from ryd_client import ryd_client -class YoutubeVideo(YouTubeItem): +class YoutubeSubtitle: + """handle video subtitle functionality""" + + def __init__(self, config, youtube_meta, media_url, youtube_id): + self.config = config + self.youtube_meta = youtube_meta + self.media_url = media_url + self.youtube_id = youtube_id + self.languages = False + + def sub_conf_parse(self): + """add additional conf values to self""" + languages_raw = self.config["downloads"]["subtitle"] + self.languages = [i.strip() for i in languages_raw.split(",")] + + def get_subtitles(self): + """check what to do""" + self.sub_conf_parse() + if not self.languages: + # no subtitles + return False + + relevant_subtitles = self.get_user_subtitles() + if relevant_subtitles: + return relevant_subtitles + + if self.config["downloads"]["subtitle_source"] == "auto": + relevant_auto = self.get_auto_caption() + return relevant_auto + + return False + + def get_auto_caption(self): + """get auto_caption subtitles""" + print(f"{self.youtube_id}: get auto generated subtitles") + all_subtitles = self.youtube_meta.get("automatic_captions") + + if not all_subtitles: + return False + + relevant_subtitles = [] + + for lang in self.languages: + media_url = self.media_url.replace(".mp4", f"-{lang}.vtt") + all_formats = all_subtitles.get(lang) + subtitle = [i for i in all_formats if i["ext"] == "vtt"][0] + subtitle.update( + {"lang": lang, "source": "auto", "media_url": media_url} + ) + relevant_subtitles.append(subtitle) + break + + return relevant_subtitles + + def _normalize_lang(self): + """normalize country specific language keys""" + all_subtitles = self.youtube_meta.get("subtitles") + all_keys = list(all_subtitles.keys()) + for key in all_keys: + lang = key.split("-")[0] + old = all_subtitles.pop(key) + all_subtitles[lang] = old + + return all_subtitles + + def get_user_subtitles(self): + """get subtitles uploaded from channel owner""" + print(f"{self.youtube_id}: get user uploaded subtitles") + all_subtitles = self._normalize_lang() + if not all_subtitles: + return False + + relevant_subtitles = [] + + for lang in self.languages: + media_url = self.media_url.replace(".mp4", f"-{lang}.vtt") + all_formats = all_subtitles.get(lang) + subtitle = [i for i in all_formats if i["ext"] == "vtt"][0] + subtitle.update( + {"lang": lang, "source": "user", "media_url": media_url} + ) + relevant_subtitles.append(subtitle) + break + + return relevant_subtitles + + def download_subtitles(self, relevant_subtitles): + """download subtitle files to archive""" + for subtitle in relevant_subtitles: + dest_path = os.path.join( + self.config["application"]["videos"], subtitle["media_url"] + ) + response = requests.get(subtitle["url"]) + if response.ok: + with open(dest_path, "w", encoding="utf-8") as subfile: + subfile.write(response.text) + else: + print(f"{self.youtube_id}: failed to download subtitle") + + +class YoutubeVideo(YouTubeItem, YoutubeSubtitle): """represents a single youtube video""" es_path = False @@ -37,6 +137,7 @@ class YoutubeVideo(YouTubeItem): self._add_stats() self.add_file_path() self.add_player() + self._check_subtitles() if self.config["downloads"]["integrate_ryd"]: self._get_ryd_stats() @@ -96,7 +197,7 @@ class YoutubeVideo(YouTubeItem): vid_path = os.path.join(cache_path, file_cached) return vid_path - return False + raise FileNotFoundError def add_player(self): """add player information for new videos""" @@ -125,6 +226,10 @@ class YoutubeVideo(YouTubeItem): """build media_url for where file will be located""" channel_name = self.json_data["channel"]["channel_name"] clean_channel_name = clean_string(channel_name) + if len(clean_channel_name) <= 3: + # fall back to channel id + clean_channel_name = self.json_data["channel"]["channel_id"] + timestamp = self.json_data["published"].replace("-", "") youtube_id = self.json_data["youtube_id"] title = self.json_data["title"] @@ -163,6 +268,19 @@ class YoutubeVideo(YouTubeItem): return True + def _check_subtitles(self): + """optionally add subtitles""" + handler = YoutubeSubtitle( + self.config, + self.youtube_meta, + media_url=self.json_data["media_url"], + youtube_id=self.youtube_id, + ) + subtitles = handler.get_subtitles() + if subtitles: + self.json_data["subtitles"] = subtitles + handler.download_subtitles(relevant_subtitles=subtitles) + def index_new_video(youtube_id): """combined classes to create new video in index""" diff --git a/tubearchivist/home/templates/home/base.html b/tubearchivist/home/templates/home/base.html index 3bd4899..da47e17 100644 --- a/tubearchivist/home/templates/home/base.html +++ b/tubearchivist/home/templates/home/base.html @@ -109,9 +109,21 @@ {% endif %} {% if pagination.last_page > 0 %} {% if pagination.search_get %} - Last ({{ pagination.last_page }}) + + {% if pagination.max_hits %} + Max ({{ pagination.last_page }}) + {% else %} + Last ({{ pagination.last_page }}) + {% endif %} + {% else %} - Last ({{ pagination.last_page }}) + + {% if pagination.max_hits %} + Max ({{ pagination.last_page }}) + {% else %} + Last ({{ pagination.last_page }}) + {% endif %} + {% endif %} {% endif %} {% endif %} diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 0716d3e..3b9ffe9 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -94,6 +94,17 @@ Embed thumbnail into the mediafile.
{{ app_form.downloads_add_thumbnail }} +
+

Subtitles download setting: {{ config.downloads.subtitle }}
+ Choose which subtitles to download, add comma separated two letter language ISO code,
+ e.g. en, de

+ {{ app_form.downloads_subtitle }}

+
+
+

Subtitle source settings: {{ config.downloads.subtitle_source }}

+ Download only user generated, or also less accurate auto generated subtitles.
+ {{ app_form.downloads_subtitle_source }} +

Integrations

diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index d08678e..815d2c0 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,12 +1,12 @@ beautifulsoup4==4.10.0 celery==5.2.3 -Django==4.0.1 +Django==4.0.2 django-cors-headers==3.11.0 djangorestframework==3.13.1 -Pillow==9.0.0 -redis==4.1.1 +Pillow==9.0.1 +redis==4.1.2 requests==2.27.1 ryd-client==0.0.3 uWSGI==2.0.20 whitenoise==5.3.0 -yt_dlp==2022.1.21 +yt_dlp==2022.2.4