diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py
index 7ab8b47..f0923aa 100644
--- a/tubearchivist/api/views.py
+++ b/tubearchivist/api/views.py
@@ -26,13 +26,13 @@ class ApiBaseView(APIView):
self.response = {"data": False}
self.status_code = False
self.context = False
+ self.default_conf = AppConfig().config
def config_builder(self):
"""build confic context"""
- default_conf = AppConfig().config
self.context = {
- "es_url": default_conf["application"]["es_url"],
- "es_auth": default_conf["application"]["es_auth"],
+ "es_url": self.default_conf["application"]["es_url"],
+ "es_auth": self.default_conf["application"]["es_auth"],
}
def get_document(self, document_id):
@@ -48,6 +48,19 @@ class ApiBaseView(APIView):
self.response["data"] = False
self.status_code = response.status_code
+ def process_keys(self):
+ """process keys for frontend"""
+ all_keys = self.response["data"].keys()
+ if "media_url" in all_keys:
+ media_url = self.response["data"]["media_url"]
+ self.response["data"]["media_url"] = f"/media/{media_url}"
+ if "vid_thumb_url" in all_keys:
+ youtube_id = self.response["data"]["youtube_id"]
+ vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id)
+ cache_dir = self.default_conf["application"]["cache_dir"]
+ new_thumb = f"{cache_dir}/{vid_thumb_url}"
+ self.response["data"]["vid_thumb_url"] = new_thumb
+
def get_paginate(self):
"""add pagination detail to response"""
self.response["paginate"] = False
@@ -75,6 +88,7 @@ class VideoApiView(ApiBaseView):
"""get request"""
self.config_builder()
self.get_document(video_id)
+ self.process_keys()
return Response(self.response, status=self.status_code)
diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json
index 82c76f4..8c4249a 100644
--- a/tubearchivist/home/config.json
+++ b/tubearchivist/home/config.json
@@ -23,6 +23,8 @@
"format": false,
"add_metadata": false,
"add_thumbnail": false,
+ "subtitle": false,
+ "subtitle_source": false,
"throttledratelimit": false,
"integrate_ryd": false
},
diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py
index 6d17a33..be71049 100644
--- a/tubearchivist/home/src/download/yt_dlp_handler.py
+++ b/tubearchivist/home/src/download/yt_dlp_handler.py
@@ -212,6 +212,9 @@ class VideoDownloader:
host_uid = self.config["application"]["HOST_UID"]
host_gid = self.config["application"]["HOST_GID"]
channel_name = clean_string(vid_dict["channel"]["channel_name"])
+ if len(channel_name) <= 3:
+ # fall back to channel id
+ channel_name = vid_dict["channel"]["channel_id"]
# make archive folder with correct permissions
new_folder = os.path.join(videos, channel_name)
if not os.path.exists(new_folder):
diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json
index db413fb..3272ed3 100644
--- a/tubearchivist/home/src/es/index_mapping.json
+++ b/tubearchivist/home/src/es/index_mapping.json
@@ -25,6 +25,10 @@
"type": "keyword",
"index": false
},
+ "channel_tvart_url": {
+ "type": "keyword",
+ "index": false
+ },
"channel_thumb_url": {
"type": "keyword",
"index": false
@@ -84,6 +88,10 @@
"type": "keyword",
"index": false
},
+ "channel_tvart_url": {
+ "type": "keyword",
+ "index": false
+ },
"channel_thumb_url": {
"type": "keyword",
"index": false
diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py
index 86b1884..77648cf 100644
--- a/tubearchivist/home/src/frontend/forms.py
+++ b/tubearchivist/home/src/frontend/forms.py
@@ -68,6 +68,12 @@ class ApplicationSettingsForm(forms.Form):
("1", "enable Cast"),
]
+ SUBTITLE_SOURCE_CHOICES = [
+ ("", "-- change subtitle source settings"),
+ ("auto", "also download auto generated"),
+ ("user", "only download uploader"),
+ ]
+
subscriptions_channel_size = forms.IntegerField(required=False)
downloads_limit_count = forms.IntegerField(required=False)
downloads_limit_speed = forms.IntegerField(required=False)
@@ -81,6 +87,10 @@ class ApplicationSettingsForm(forms.Form):
downloads_add_thumbnail = forms.ChoiceField(
widget=forms.Select, choices=THUMBNAIL_CHOICES, required=False
)
+ downloads_subtitle = forms.CharField(required=False)
+ downloads_subtitle_source = forms.ChoiceField(
+ widget=forms.Select, choices=SUBTITLE_SOURCE_CHOICES, required=False
+ )
downloads_integrate_ryd = forms.ChoiceField(
widget=forms.Select, choices=RYD_CHOICES, required=False
)
diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py
index 50a0696..57a1c4e 100644
--- a/tubearchivist/home/src/index/channel.py
+++ b/tubearchivist/home/src/index/channel.py
@@ -198,6 +198,9 @@ class YoutubeChannel(YouTubeItem):
"""get folder where media files get stored"""
channel_name = self.json_data["channel_name"]
folder_name = clean_string(channel_name)
+ if len(folder_name) <= 3:
+ # fall back to channel id
+ folder_name = self.json_data["channel_id"]
folder_path = os.path.join(self.app_conf["videos"], folder_name)
return folder_path
diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py
index af96abf..054a882 100644
--- a/tubearchivist/home/src/index/generic.py
+++ b/tubearchivist/home/src/index/generic.py
@@ -122,6 +122,7 @@ class Pagination:
"page_from": page_from,
"prev_pages": prev_pages,
"current_page": page_get,
+ "max_hits": False,
}
if self.search_get:
pagination.update({"search_get": self.search_get})
@@ -131,6 +132,11 @@ class Pagination:
"""validate pagination with total_hits after making api call"""
page_get = self.page_get
max_pages = math.ceil(total_hits / self.page_size)
+ if total_hits > 10000:
+ # es returns maximal 10000 results
+ self.pagination["max_hits"] = True
+ max_pages = max_pages - 1
+
if page_get < max_pages and max_pages > 1:
self.pagination["last_page"] = max_pages
else:
diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index 240a1b3..5b92198 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -14,7 +14,107 @@ from home.src.ta.helper import DurationConverter, clean_string
from ryd_client import ryd_client
-class YoutubeVideo(YouTubeItem):
+class YoutubeSubtitle:
+ """handle video subtitle functionality"""
+
+ def __init__(self, config, youtube_meta, media_url, youtube_id):
+ self.config = config
+ self.youtube_meta = youtube_meta
+ self.media_url = media_url
+ self.youtube_id = youtube_id
+ self.languages = False
+
+ def sub_conf_parse(self):
+ """add additional conf values to self"""
+ languages_raw = self.config["downloads"]["subtitle"]
+ self.languages = [i.strip() for i in languages_raw.split(",")]
+
+ def get_subtitles(self):
+ """check what to do"""
+ self.sub_conf_parse()
+ if not self.languages:
+ # no subtitles
+ return False
+
+ relevant_subtitles = self.get_user_subtitles()
+ if relevant_subtitles:
+ return relevant_subtitles
+
+ if self.config["downloads"]["subtitle_source"] == "auto":
+ relevant_auto = self.get_auto_caption()
+ return relevant_auto
+
+ return False
+
+ def get_auto_caption(self):
+ """get auto_caption subtitles"""
+ print(f"{self.youtube_id}: get auto generated subtitles")
+ all_subtitles = self.youtube_meta.get("automatic_captions")
+
+ if not all_subtitles:
+ return False
+
+ relevant_subtitles = []
+
+ for lang in self.languages:
+ media_url = self.media_url.replace(".mp4", f"-{lang}.vtt")
+ all_formats = all_subtitles.get(lang)
+ subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
+ subtitle.update(
+ {"lang": lang, "source": "auto", "media_url": media_url}
+ )
+ relevant_subtitles.append(subtitle)
+ break
+
+ return relevant_subtitles
+
+ def _normalize_lang(self):
+ """normalize country specific language keys"""
+ all_subtitles = self.youtube_meta.get("subtitles")
+ all_keys = list(all_subtitles.keys())
+ for key in all_keys:
+ lang = key.split("-")[0]
+ old = all_subtitles.pop(key)
+ all_subtitles[lang] = old
+
+ return all_subtitles
+
+ def get_user_subtitles(self):
+ """get subtitles uploaded from channel owner"""
+ print(f"{self.youtube_id}: get user uploaded subtitles")
+ all_subtitles = self._normalize_lang()
+ if not all_subtitles:
+ return False
+
+ relevant_subtitles = []
+
+ for lang in self.languages:
+ media_url = self.media_url.replace(".mp4", f"-{lang}.vtt")
+ all_formats = all_subtitles.get(lang)
+ subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
+ subtitle.update(
+ {"lang": lang, "source": "user", "media_url": media_url}
+ )
+ relevant_subtitles.append(subtitle)
+ break
+
+ return relevant_subtitles
+
+ def download_subtitles(self, relevant_subtitles):
+ """download subtitle files to archive"""
+ for subtitle in relevant_subtitles:
+ dest_path = os.path.join(
+ self.config["application"]["videos"], subtitle["media_url"]
+ )
+ response = requests.get(subtitle["url"])
+ if response.ok:
+ with open(dest_path, "w", encoding="utf-8") as subfile:
+ subfile.write(response.text)
+ else:
+ print(f"{self.youtube_id}: failed to download subtitle")
+
+
+class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"""represents a single youtube video"""
es_path = False
@@ -37,6 +137,7 @@ class YoutubeVideo(YouTubeItem):
self._add_stats()
self.add_file_path()
self.add_player()
+ self._check_subtitles()
if self.config["downloads"]["integrate_ryd"]:
self._get_ryd_stats()
@@ -96,7 +197,7 @@ class YoutubeVideo(YouTubeItem):
vid_path = os.path.join(cache_path, file_cached)
return vid_path
- return False
+ raise FileNotFoundError
def add_player(self):
"""add player information for new videos"""
@@ -125,6 +226,10 @@ class YoutubeVideo(YouTubeItem):
"""build media_url for where file will be located"""
channel_name = self.json_data["channel"]["channel_name"]
clean_channel_name = clean_string(channel_name)
+ if len(clean_channel_name) <= 3:
+ # fall back to channel id
+ clean_channel_name = self.json_data["channel"]["channel_id"]
+
timestamp = self.json_data["published"].replace("-", "")
youtube_id = self.json_data["youtube_id"]
title = self.json_data["title"]
@@ -163,6 +268,19 @@ class YoutubeVideo(YouTubeItem):
return True
+ def _check_subtitles(self):
+ """optionally add subtitles"""
+ handler = YoutubeSubtitle(
+ self.config,
+ self.youtube_meta,
+ media_url=self.json_data["media_url"],
+ youtube_id=self.youtube_id,
+ )
+ subtitles = handler.get_subtitles()
+ if subtitles:
+ self.json_data["subtitles"] = subtitles
+ handler.download_subtitles(relevant_subtitles=subtitles)
+
def index_new_video(youtube_id):
"""combined classes to create new video in index"""
diff --git a/tubearchivist/home/templates/home/base.html b/tubearchivist/home/templates/home/base.html
index 3bd4899..da47e17 100644
--- a/tubearchivist/home/templates/home/base.html
+++ b/tubearchivist/home/templates/home/base.html
@@ -109,9 +109,21 @@
{% endif %}
{% if pagination.last_page > 0 %}
{% if pagination.search_get %}
- Last ({{ pagination.last_page }})
+
+ {% if pagination.max_hits %}
+ Max ({{ pagination.last_page }})
+ {% else %}
+ Last ({{ pagination.last_page }})
+ {% endif %}
+
{% else %}
- Last ({{ pagination.last_page }})
+
+ {% if pagination.max_hits %}
+ Max ({{ pagination.last_page }})
+ {% else %}
+ Last ({{ pagination.last_page }})
+ {% endif %}
+
{% endif %}
{% endif %}
{% endif %}
diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html
index 0716d3e..3b9ffe9 100644
--- a/tubearchivist/home/templates/home/settings.html
+++ b/tubearchivist/home/templates/home/settings.html
@@ -94,6 +94,17 @@
Embed thumbnail into the mediafile.
{{ app_form.downloads_add_thumbnail }}
+
Subtitles download setting: {{ config.downloads.subtitle }}
+ Choose which subtitles to download, add comma separated two letter language ISO code,
+ e.g. en, de
+ {{ app_form.downloads_subtitle }}
Subtitle source settings: {{ config.downloads.subtitle_source }}
+ Download only user generated, or also less accurate auto generated subtitles.