mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-11-22 20:00:15 +00:00
add subtitle functionality, #build
Changes: - merges new subtitle download and index functionality - merges player improvements and api integrations from @n8detar - merges fix for non ascii channel names - merges fix for pagination error with 10k+ videos
This commit is contained in:
commit
3efa388b5a
@ -33,7 +33,7 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- archivist-es
|
- archivist-es
|
||||||
archivist-es:
|
archivist-es:
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
|
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.0
|
||||||
container_name: archivist-es
|
container_name: archivist-es
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
|
@ -23,10 +23,6 @@ response = requests.get(url, headers=headers)
|
|||||||
## Video Item View
|
## Video Item View
|
||||||
/api/video/\<video_id>/
|
/api/video/\<video_id>/
|
||||||
|
|
||||||
## Video Player View
|
|
||||||
returns all relevant information to create video player
|
|
||||||
/api/video/\<video_id>/player
|
|
||||||
|
|
||||||
## Channel List View
|
## Channel List View
|
||||||
/api/channel/
|
/api/channel/
|
||||||
|
|
||||||
|
@ -6,7 +6,6 @@ from api.views import (
|
|||||||
DownloadApiListView,
|
DownloadApiListView,
|
||||||
DownloadApiView,
|
DownloadApiView,
|
||||||
PlaylistApiView,
|
PlaylistApiView,
|
||||||
VideoApiPlayerView,
|
|
||||||
VideoApiView,
|
VideoApiView,
|
||||||
)
|
)
|
||||||
from django.urls import path
|
from django.urls import path
|
||||||
@ -17,11 +16,6 @@ urlpatterns = [
|
|||||||
VideoApiView.as_view(),
|
VideoApiView.as_view(),
|
||||||
name="api-video",
|
name="api-video",
|
||||||
),
|
),
|
||||||
path(
|
|
||||||
"video/<slug:video_id>/player/",
|
|
||||||
VideoApiPlayerView.as_view(),
|
|
||||||
name="api-video-player",
|
|
||||||
),
|
|
||||||
path(
|
path(
|
||||||
"channel/",
|
"channel/",
|
||||||
ChannelApiListView.as_view(),
|
ChannelApiListView.as_view(),
|
||||||
|
@ -60,6 +60,12 @@ class ApiBaseView(APIView):
|
|||||||
cache_dir = self.default_conf["application"]["cache_dir"]
|
cache_dir = self.default_conf["application"]["cache_dir"]
|
||||||
new_thumb = f"{cache_dir}/{vid_thumb_url}"
|
new_thumb = f"{cache_dir}/{vid_thumb_url}"
|
||||||
self.response["data"]["vid_thumb_url"] = new_thumb
|
self.response["data"]["vid_thumb_url"] = new_thumb
|
||||||
|
if "subtitles" in all_keys:
|
||||||
|
all_subtitles = self.response["data"]["subtitles"]
|
||||||
|
for idx, _ in enumerate(all_subtitles):
|
||||||
|
url = self.response["data"]["subtitles"][idx]["media_url"]
|
||||||
|
new_url = f"/media/{url}"
|
||||||
|
self.response["data"]["subtitles"][idx]["media_url"] = new_url
|
||||||
|
|
||||||
def get_paginate(self):
|
def get_paginate(self):
|
||||||
"""add pagination detail to response"""
|
"""add pagination detail to response"""
|
||||||
@ -92,38 +98,6 @@ class VideoApiView(ApiBaseView):
|
|||||||
return Response(self.response, status=self.status_code)
|
return Response(self.response, status=self.status_code)
|
||||||
|
|
||||||
|
|
||||||
class VideoApiPlayerView(ApiBaseView):
|
|
||||||
"""resolves to /api/video/<video_id>/player
|
|
||||||
GET: returns dict of video to build player
|
|
||||||
"""
|
|
||||||
|
|
||||||
search_base = "/ta_video/_doc/"
|
|
||||||
|
|
||||||
def get(self, request, video_id):
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
"""get request"""
|
|
||||||
self.config_builder()
|
|
||||||
self.get_document(video_id)
|
|
||||||
player = self.process_response()
|
|
||||||
return Response(player, status=self.status_code)
|
|
||||||
|
|
||||||
def process_response(self):
|
|
||||||
"""build all needed vars for player"""
|
|
||||||
vid_data = self.response["data"]
|
|
||||||
youtube_id = vid_data["youtube_id"]
|
|
||||||
vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id)
|
|
||||||
player = {
|
|
||||||
"youtube_id": youtube_id,
|
|
||||||
"media_url": "/media/" + vid_data["media_url"],
|
|
||||||
"vid_thumb_url": "/cache/" + vid_thumb_url,
|
|
||||||
"title": vid_data["title"],
|
|
||||||
"channel_name": vid_data["channel"]["channel_name"],
|
|
||||||
"channel_id": vid_data["channel"]["channel_id"],
|
|
||||||
"is_watched": vid_data["player"]["watched"],
|
|
||||||
}
|
|
||||||
return player
|
|
||||||
|
|
||||||
|
|
||||||
class ChannelApiView(ApiBaseView):
|
class ChannelApiView(ApiBaseView):
|
||||||
"""resolves to /api/channel/<channel_id>/
|
"""resolves to /api/channel/<channel_id>/
|
||||||
GET: returns metadata dict of channel
|
GET: returns metadata dict of channel
|
||||||
|
@ -156,6 +156,32 @@
|
|||||||
"normalizer": "to_lower"
|
"normalizer": "to_lower"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"subtitles": {
|
||||||
|
"properties": {
|
||||||
|
"ext": {
|
||||||
|
"type": "keyword",
|
||||||
|
"index": false
|
||||||
|
},
|
||||||
|
"lang": {
|
||||||
|
"type": "keyword",
|
||||||
|
"index": false
|
||||||
|
},
|
||||||
|
"media_url": {
|
||||||
|
"type": "keyword",
|
||||||
|
"index": false
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": "keyword",
|
||||||
|
"index": false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"expected_set": {
|
"expected_set": {
|
||||||
@ -277,6 +303,73 @@
|
|||||||
},
|
},
|
||||||
"number_of_replicas": "0"
|
"number_of_replicas": "0"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index_name": "subtitle",
|
||||||
|
"expected_map": {
|
||||||
|
"youtube_id": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256,
|
||||||
|
"normalizer": "to_lower"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subtitle_fragment_id": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"subtitle_channel": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256,
|
||||||
|
"normalizer": "to_lower"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subtitle_channel_id": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"subtitle_start": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"subtitle_end": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"subtitle_last_refresh": {
|
||||||
|
"type": "date"
|
||||||
|
},
|
||||||
|
"subtitle_index": {
|
||||||
|
"type" : "long"
|
||||||
|
},
|
||||||
|
"subtitle_lang": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"subtitle_source": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"subtitle_line": {
|
||||||
|
"type" : "text",
|
||||||
|
"analyzer": "english"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"expected_set": {
|
||||||
|
"analysis": {
|
||||||
|
"normalizer": {
|
||||||
|
"to_lower": {
|
||||||
|
"type": "custom",
|
||||||
|
"filter": ["lowercase"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"number_of_replicas": "0"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -46,8 +46,9 @@ class FilesystemScanner:
|
|||||||
all_downloaded = []
|
all_downloaded = []
|
||||||
for channel_name in all_channels:
|
for channel_name in all_channels:
|
||||||
channel_path = os.path.join(self.VIDEOS, channel_name)
|
channel_path = os.path.join(self.VIDEOS, channel_name)
|
||||||
videos = os.listdir(channel_path)
|
channel_files = os.listdir(channel_path)
|
||||||
all_videos = ignore_filelist(videos)
|
channel_files_clean = ignore_filelist(channel_files)
|
||||||
|
all_videos = [i for i in channel_files_clean if i.endswith(".mp4")]
|
||||||
for video in all_videos:
|
for video in all_videos:
|
||||||
youtube_id = video[9:20]
|
youtube_id = video[9:20]
|
||||||
all_downloaded.append((channel_name, video, youtube_id))
|
all_downloaded.append((channel_name, video, youtube_id))
|
||||||
|
@ -4,10 +4,13 @@ functionality:
|
|||||||
- index and update in es
|
- index and update in es
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.index import channel as ta_channel
|
from home.src.index import channel as ta_channel
|
||||||
from home.src.index.generic import YouTubeItem
|
from home.src.index.generic import YouTubeItem
|
||||||
from home.src.ta.helper import DurationConverter, clean_string
|
from home.src.ta.helper import DurationConverter, clean_string
|
||||||
@ -17,16 +20,13 @@ from ryd_client import ryd_client
|
|||||||
class YoutubeSubtitle:
|
class YoutubeSubtitle:
|
||||||
"""handle video subtitle functionality"""
|
"""handle video subtitle functionality"""
|
||||||
|
|
||||||
def __init__(self, config, youtube_meta, media_url, youtube_id):
|
def __init__(self, video):
|
||||||
self.config = config
|
self.video = video
|
||||||
self.youtube_meta = youtube_meta
|
|
||||||
self.media_url = media_url
|
|
||||||
self.youtube_id = youtube_id
|
|
||||||
self.languages = False
|
self.languages = False
|
||||||
|
|
||||||
def sub_conf_parse(self):
|
def sub_conf_parse(self):
|
||||||
"""add additional conf values to self"""
|
"""add additional conf values to self"""
|
||||||
languages_raw = self.config["downloads"]["subtitle"]
|
languages_raw = self.video.config["downloads"]["subtitle"]
|
||||||
self.languages = [i.strip() for i in languages_raw.split(",")]
|
self.languages = [i.strip() for i in languages_raw.split(",")]
|
||||||
|
|
||||||
def get_subtitles(self):
|
def get_subtitles(self):
|
||||||
@ -36,82 +36,226 @@ class YoutubeSubtitle:
|
|||||||
# no subtitles
|
# no subtitles
|
||||||
return False
|
return False
|
||||||
|
|
||||||
relevant_subtitles = self.get_user_subtitles()
|
relevant_subtitles = []
|
||||||
if relevant_subtitles:
|
for lang in self.languages:
|
||||||
return relevant_subtitles
|
user_sub = self.get_user_subtitles(lang)
|
||||||
|
if user_sub:
|
||||||
|
relevant_subtitles.append(user_sub)
|
||||||
|
continue
|
||||||
|
|
||||||
if self.config["downloads"]["subtitle_source"] == "auto":
|
if self.video.config["downloads"]["subtitle_source"] == "auto":
|
||||||
relevant_auto = self.get_auto_caption()
|
auto_cap = self.get_auto_caption(lang)
|
||||||
return relevant_auto
|
if auto_cap:
|
||||||
|
relevant_subtitles.append(auto_cap)
|
||||||
|
|
||||||
return False
|
return relevant_subtitles
|
||||||
|
|
||||||
def get_auto_caption(self):
|
def get_auto_caption(self, lang):
|
||||||
"""get auto_caption subtitles"""
|
"""get auto_caption subtitles"""
|
||||||
print(f"{self.youtube_id}: get auto generated subtitles")
|
print(f"{self.video.youtube_id}-{lang}: get auto generated subtitles")
|
||||||
all_subtitles = self.youtube_meta.get("automatic_captions")
|
all_subtitles = self.video.youtube_meta.get("automatic_captions")
|
||||||
|
|
||||||
if not all_subtitles:
|
if not all_subtitles:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
relevant_subtitles = []
|
video_media_url = self.video.json_data["media_url"]
|
||||||
|
media_url = video_media_url.replace(".mp4", f"-{lang}.vtt")
|
||||||
|
all_formats = all_subtitles.get(lang)
|
||||||
|
subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
|
||||||
|
subtitle.update(
|
||||||
|
{"lang": lang, "source": "auto", "media_url": media_url}
|
||||||
|
)
|
||||||
|
|
||||||
for lang in self.languages:
|
return subtitle
|
||||||
media_url = self.media_url.replace(".mp4", f"-{lang}.vtt")
|
|
||||||
all_formats = all_subtitles.get(lang)
|
|
||||||
subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
|
|
||||||
subtitle.update(
|
|
||||||
{"lang": lang, "source": "auto", "media_url": media_url}
|
|
||||||
)
|
|
||||||
relevant_subtitles.append(subtitle)
|
|
||||||
break
|
|
||||||
|
|
||||||
return relevant_subtitles
|
|
||||||
|
|
||||||
def _normalize_lang(self):
|
def _normalize_lang(self):
|
||||||
"""normalize country specific language keys"""
|
"""normalize country specific language keys"""
|
||||||
all_subtitles = self.youtube_meta.get("subtitles")
|
all_subtitles = self.video.youtube_meta.get("subtitles")
|
||||||
|
if not all_subtitles:
|
||||||
|
return False
|
||||||
|
|
||||||
all_keys = list(all_subtitles.keys())
|
all_keys = list(all_subtitles.keys())
|
||||||
for key in all_keys:
|
for key in all_keys:
|
||||||
lang = key.split("-")[0]
|
lang = key.split("-")[0]
|
||||||
old = all_subtitles.pop(key)
|
old = all_subtitles.pop(key)
|
||||||
|
if lang == "live_chat":
|
||||||
|
continue
|
||||||
all_subtitles[lang] = old
|
all_subtitles[lang] = old
|
||||||
|
|
||||||
return all_subtitles
|
return all_subtitles
|
||||||
|
|
||||||
def get_user_subtitles(self):
|
def get_user_subtitles(self, lang):
|
||||||
"""get subtitles uploaded from channel owner"""
|
"""get subtitles uploaded from channel owner"""
|
||||||
print(f"{self.youtube_id}: get user uploaded subtitles")
|
print(f"{self.video.youtube_id}-{lang}: get user uploaded subtitles")
|
||||||
all_subtitles = self._normalize_lang()
|
all_subtitles = self._normalize_lang()
|
||||||
if not all_subtitles:
|
if not all_subtitles:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
relevant_subtitles = []
|
video_media_url = self.video.json_data["media_url"]
|
||||||
|
media_url = video_media_url.replace(".mp4", f"-{lang}.vtt")
|
||||||
|
all_formats = all_subtitles.get(lang)
|
||||||
|
if not all_formats:
|
||||||
|
# no user subtitles found
|
||||||
|
return False
|
||||||
|
|
||||||
for lang in self.languages:
|
subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
|
||||||
media_url = self.media_url.replace(".mp4", f"-{lang}.vtt")
|
subtitle.update(
|
||||||
all_formats = all_subtitles.get(lang)
|
{"lang": lang, "source": "user", "media_url": media_url}
|
||||||
subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
|
)
|
||||||
subtitle.update(
|
|
||||||
{"lang": lang, "source": "user", "media_url": media_url}
|
|
||||||
)
|
|
||||||
relevant_subtitles.append(subtitle)
|
|
||||||
break
|
|
||||||
|
|
||||||
return relevant_subtitles
|
return subtitle
|
||||||
|
|
||||||
def download_subtitles(self, relevant_subtitles):
|
def download_subtitles(self, relevant_subtitles):
|
||||||
"""download subtitle files to archive"""
|
"""download subtitle files to archive"""
|
||||||
|
videos_base = self.video.config["application"]["videos"]
|
||||||
for subtitle in relevant_subtitles:
|
for subtitle in relevant_subtitles:
|
||||||
dest_path = os.path.join(
|
dest_path = os.path.join(videos_base, subtitle["media_url"])
|
||||||
self.config["application"]["videos"], subtitle["media_url"]
|
source = subtitle["source"]
|
||||||
)
|
|
||||||
response = requests.get(subtitle["url"])
|
response = requests.get(subtitle["url"])
|
||||||
if response.ok:
|
if not response.ok:
|
||||||
with open(dest_path, "w", encoding="utf-8") as subfile:
|
print(f"{self.video.youtube_id}: failed to download subtitle")
|
||||||
subfile.write(response.text)
|
continue
|
||||||
|
|
||||||
|
parser = SubtitleParser(response.text, subtitle.get("lang"))
|
||||||
|
parser.process()
|
||||||
|
subtitle_str = parser.get_subtitle_str()
|
||||||
|
self._write_subtitle_file(dest_path, subtitle_str)
|
||||||
|
query_str = parser.create_bulk_import(self.video, source)
|
||||||
|
self._index_subtitle(query_str)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _write_subtitle_file(dest_path, subtitle_str):
|
||||||
|
"""write subtitle file to disk"""
|
||||||
|
# create folder here for first video of channel
|
||||||
|
os.makedirs(os.path.split(dest_path)[0], exist_ok=True)
|
||||||
|
with open(dest_path, "w", encoding="utf-8") as subfile:
|
||||||
|
subfile.write(subtitle_str)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _index_subtitle(query_str):
|
||||||
|
"""send subtitle to es for indexing"""
|
||||||
|
_, _ = ElasticWrap("_bulk").post(data=query_str, ndjson=True)
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleParser:
|
||||||
|
"""parse subtitle str from youtube"""
|
||||||
|
|
||||||
|
time_reg = r"^([0-9]{2}:?){3}\.[0-9]{3} --> ([0-9]{2}:?){3}\.[0-9]{3}"
|
||||||
|
stamp_reg = r"<([0-9]{2}:?){3}\.[0-9]{3}>"
|
||||||
|
tag_reg = r"</?c>"
|
||||||
|
|
||||||
|
def __init__(self, subtitle_str, lang):
|
||||||
|
self.subtitle_str = subtitle_str
|
||||||
|
self.lang = lang
|
||||||
|
self.header = False
|
||||||
|
self.parsed_cue_list = False
|
||||||
|
self.all_text_lines = False
|
||||||
|
self.matched = False
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
"""collection to process subtitle string"""
|
||||||
|
self._parse_cues()
|
||||||
|
self._match_text_lines()
|
||||||
|
self._add_id()
|
||||||
|
|
||||||
|
def _parse_cues(self):
|
||||||
|
"""split into cues"""
|
||||||
|
all_cues = self.subtitle_str.replace("\n \n", "\n").split("\n\n")
|
||||||
|
self.header = all_cues[0]
|
||||||
|
self.all_text_lines = []
|
||||||
|
self.parsed_cue_list = [self._cue_cleaner(i) for i in all_cues[1:]]
|
||||||
|
|
||||||
|
def _cue_cleaner(self, cue):
|
||||||
|
"""parse single cue"""
|
||||||
|
all_lines = cue.split("\n")
|
||||||
|
cue_dict = {"lines": []}
|
||||||
|
|
||||||
|
for line in all_lines:
|
||||||
|
if re.match(self.time_reg, line):
|
||||||
|
clean = re.search(self.time_reg, line).group()
|
||||||
|
start, end = clean.split(" --> ")
|
||||||
|
cue_dict.update({"start": start, "end": end})
|
||||||
else:
|
else:
|
||||||
print(f"{self.youtube_id}: failed to download subtitle")
|
clean = re.sub(self.stamp_reg, "", line)
|
||||||
|
clean = re.sub(self.tag_reg, "", clean)
|
||||||
|
cue_dict["lines"].append(clean)
|
||||||
|
if clean and clean not in self.all_text_lines:
|
||||||
|
self.all_text_lines.append(clean)
|
||||||
|
|
||||||
|
return cue_dict
|
||||||
|
|
||||||
|
def _match_text_lines(self):
|
||||||
|
"""match unique text lines with timestamps"""
|
||||||
|
|
||||||
|
self.matched = []
|
||||||
|
|
||||||
|
while self.all_text_lines:
|
||||||
|
check = self.all_text_lines[0]
|
||||||
|
matches = [i for i in self.parsed_cue_list if check in i["lines"]]
|
||||||
|
new_cue = matches[-1]
|
||||||
|
new_cue["start"] = matches[0]["start"]
|
||||||
|
|
||||||
|
for line in new_cue["lines"]:
|
||||||
|
try:
|
||||||
|
self.all_text_lines.remove(line)
|
||||||
|
except ValueError:
|
||||||
|
print("failed to process:")
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
self.matched.append(new_cue)
|
||||||
|
|
||||||
|
def _add_id(self):
|
||||||
|
"""add id to matched cues"""
|
||||||
|
for idx, _ in enumerate(self.matched):
|
||||||
|
self.matched[idx]["id"] = idx + 1
|
||||||
|
|
||||||
|
def get_subtitle_str(self):
|
||||||
|
"""stitch cues and return processed new string"""
|
||||||
|
new_subtitle_str = self.header + "\n\n"
|
||||||
|
|
||||||
|
for cue in self.matched:
|
||||||
|
timestamp = f"{cue.get('start')} --> {cue.get('end')}"
|
||||||
|
lines = "\n".join(cue.get("lines"))
|
||||||
|
cue_text = f"{cue.get('id')}\n{timestamp}\n{lines}\n\n"
|
||||||
|
new_subtitle_str = new_subtitle_str + cue_text
|
||||||
|
|
||||||
|
return new_subtitle_str
|
||||||
|
|
||||||
|
def create_bulk_import(self, video, source):
|
||||||
|
"""process matched for es import"""
|
||||||
|
bulk_list = []
|
||||||
|
channel = video.json_data.get("channel")
|
||||||
|
|
||||||
|
document = {
|
||||||
|
"youtube_id": video.youtube_id,
|
||||||
|
"title": video.json_data.get("title"),
|
||||||
|
"subtitle_channel": channel.get("channel_name"),
|
||||||
|
"subtitle_channel_id": channel.get("channel_id"),
|
||||||
|
"subtitle_last_refresh": int(datetime.now().strftime("%s")),
|
||||||
|
"subtitle_lang": self.lang,
|
||||||
|
"subtitle_source": source,
|
||||||
|
}
|
||||||
|
|
||||||
|
for match in self.matched:
|
||||||
|
match_id = match.get("id")
|
||||||
|
document_id = f"{video.youtube_id}-{self.lang}-{match_id}"
|
||||||
|
action = {"index": {"_index": "ta_subtitle", "_id": document_id}}
|
||||||
|
document.update(
|
||||||
|
{
|
||||||
|
"subtitle_fragment_id": document_id,
|
||||||
|
"subtitle_start": match.get("start"),
|
||||||
|
"subtitle_end": match.get("end"),
|
||||||
|
"subtitle_index": match_id,
|
||||||
|
"subtitle_line": " ".join(match.get("lines")),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
bulk_list.append(json.dumps(action))
|
||||||
|
bulk_list.append(json.dumps(document))
|
||||||
|
|
||||||
|
bulk_list.append("\n")
|
||||||
|
query_str = "\n".join(bulk_list)
|
||||||
|
|
||||||
|
return query_str
|
||||||
|
|
||||||
|
|
||||||
class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
||||||
@ -204,10 +348,17 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
try:
|
try:
|
||||||
# when indexing from download task
|
# when indexing from download task
|
||||||
vid_path = self.build_dl_cache_path()
|
vid_path = self.build_dl_cache_path()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError as err:
|
||||||
# when reindexing
|
# when reindexing needs to handle title rename
|
||||||
base = self.app_conf["videos"]
|
channel = os.path.split(self.json_data["media_url"])[0]
|
||||||
vid_path = os.path.join(base, self.json_data["media_url"])
|
channel_dir = os.path.join(self.app_conf["videos"], channel)
|
||||||
|
all_files = os.listdir(channel_dir)
|
||||||
|
for file in all_files:
|
||||||
|
if self.youtube_id in file:
|
||||||
|
vid_path = os.path.join(channel_dir, file)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError("could not find video file") from err
|
||||||
|
|
||||||
duration_handler = DurationConverter()
|
duration_handler = DurationConverter()
|
||||||
duration = duration_handler.get_sec(vid_path)
|
duration = duration_handler.get_sec(vid_path)
|
||||||
@ -242,11 +393,18 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
"""delete video file, meta data"""
|
"""delete video file, meta data"""
|
||||||
self.get_from_es()
|
self.get_from_es()
|
||||||
video_base = self.app_conf["videos"]
|
video_base = self.app_conf["videos"]
|
||||||
media_url = self.json_data["media_url"]
|
to_del = [self.json_data.get("media_url")]
|
||||||
print(f"{self.youtube_id}: delete {media_url} from file system")
|
|
||||||
to_delete = os.path.join(video_base, media_url)
|
all_subtitles = self.json_data.get("subtitles")
|
||||||
os.remove(to_delete)
|
if all_subtitles:
|
||||||
|
to_del = to_del + [i.get("media_url") for i in all_subtitles]
|
||||||
|
|
||||||
|
for media_url in to_del:
|
||||||
|
file_path = os.path.join(video_base, media_url)
|
||||||
|
os.remove(file_path)
|
||||||
|
|
||||||
self.del_in_es()
|
self.del_in_es()
|
||||||
|
self._delete_subtitles()
|
||||||
|
|
||||||
def _get_ryd_stats(self):
|
def _get_ryd_stats(self):
|
||||||
"""get optional stats from returnyoutubedislikeapi.com"""
|
"""get optional stats from returnyoutubedislikeapi.com"""
|
||||||
@ -270,17 +428,17 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
|
|
||||||
def _check_subtitles(self):
|
def _check_subtitles(self):
|
||||||
"""optionally add subtitles"""
|
"""optionally add subtitles"""
|
||||||
handler = YoutubeSubtitle(
|
handler = YoutubeSubtitle(self)
|
||||||
self.config,
|
|
||||||
self.youtube_meta,
|
|
||||||
media_url=self.json_data["media_url"],
|
|
||||||
youtube_id=self.youtube_id,
|
|
||||||
)
|
|
||||||
subtitles = handler.get_subtitles()
|
subtitles = handler.get_subtitles()
|
||||||
if subtitles:
|
if subtitles:
|
||||||
self.json_data["subtitles"] = subtitles
|
self.json_data["subtitles"] = subtitles
|
||||||
handler.download_subtitles(relevant_subtitles=subtitles)
|
handler.download_subtitles(relevant_subtitles=subtitles)
|
||||||
|
|
||||||
|
def _delete_subtitles(self):
|
||||||
|
"""delete indexed subtitles"""
|
||||||
|
data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}}
|
||||||
|
_, _ = ElasticWrap("ta_subtitle/_delete_by_query").post(data=data)
|
||||||
|
|
||||||
|
|
||||||
def index_new_video(youtube_id):
|
def index_new_video(youtube_id):
|
||||||
"""combined classes to create new video in index"""
|
"""combined classes to create new video in index"""
|
||||||
|
@ -169,7 +169,11 @@ class DurationConverter:
|
|||||||
capture_output=True,
|
capture_output=True,
|
||||||
check=True,
|
check=True,
|
||||||
)
|
)
|
||||||
duration_sec = int(float(duration.stdout.decode().strip()))
|
duration_raw = duration.stdout.decode().strip()
|
||||||
|
if duration_raw == "N/A":
|
||||||
|
return 0
|
||||||
|
|
||||||
|
duration_sec = int(float(duration_raw))
|
||||||
return duration_sec
|
return duration_sec
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -3,10 +3,14 @@
|
|||||||
{% load static %}
|
{% load static %}
|
||||||
{% load humanize %}
|
{% load humanize %}
|
||||||
<div class="video-main">
|
<div class="video-main">
|
||||||
<video
|
<video poster="/cache/{{ video.vid_thumb_url }}" controls preload="false" width="100%" playsinline
|
||||||
src="/media/{{ video.media_url }}"
|
id="video-item" ontimeupdate="onVideoProgress('{{ video.youtube_id }}')" onloadedmetadata="setVideoProgress(0)">
|
||||||
poster="/cache/{{ video.vid_thumb_url }}" controls preload="false"
|
<source src="/media/{{ video.media_url }}" type="video/mp4">
|
||||||
type='video/mp4' width="100%" playsinline id="video-item" ontimeupdate="onVideoProgress('{{ video.youtube_id }}')" onloadedmetadata="setVideoProgress(0)">
|
{% if video.subtitles %}
|
||||||
|
{% for subtitle in video.subtitles %}
|
||||||
|
<track label="{{subtitle.name}}" kind="subtitles" srclang="{{subtitle.lang}}" src="/media/{{subtitle.media_url}}">
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
</video>
|
</video>
|
||||||
</div>
|
</div>
|
||||||
<div class="boxed-content">
|
<div class="boxed-content">
|
||||||
@ -57,10 +61,10 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="info-box-item">
|
<div class="info-box-item">
|
||||||
<div>
|
<div>
|
||||||
<p>Views: {{ video.stats.view_count|intcomma }}</p>
|
<p class="thumb-icon"><img src="{% static 'img/icon-eye.svg' %}" alt="views">: {{ video.stats.view_count|intcomma }}</p>
|
||||||
<p class="thumb-icon like"><img src="{% static 'img/icon-thumb.svg' %}" alt="thumbs-up">: {{ video.stats.like_count|intcomma }}</p>
|
<p class="thumb-icon like"><img src="{% static 'img/icon-thumb.svg' %}" alt="thumbs-up">: {{ video.stats.like_count|intcomma }}</p>
|
||||||
{% if video.stats.dislike_count %}
|
{% if video.stats.dislike_count %}
|
||||||
<p class="thumb-icon dislike"><img src="{% static 'img/icon-thumb.svg' %}" alt="thumbs-down">: {{ video.stats.dislike_count|intcomma }}</p>
|
<p class="thumb-icon"><img class="dislike" src="{% static 'img/icon-thumb.svg' %}" alt="thumbs-down">: {{ video.stats.dislike_count|intcomma }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if video.stats.average_rating %}
|
{% if video.stats.average_rating %}
|
||||||
<p class="rating-stars">Rating:
|
<p class="rating-stars">Rating:
|
||||||
|
@ -4,9 +4,9 @@ Django==4.0.2
|
|||||||
django-cors-headers==3.11.0
|
django-cors-headers==3.11.0
|
||||||
djangorestframework==3.13.1
|
djangorestframework==3.13.1
|
||||||
Pillow==9.0.1
|
Pillow==9.0.1
|
||||||
redis==4.1.2
|
redis==4.1.3
|
||||||
requests==2.27.1
|
requests==2.27.1
|
||||||
ryd-client==0.0.3
|
ryd-client==0.0.3
|
||||||
uWSGI==2.0.20
|
uWSGI==2.0.20
|
||||||
whitenoise==5.3.0
|
whitenoise==6.0.0
|
||||||
yt_dlp==2022.2.4
|
yt_dlp==2022.2.4
|
||||||
|
Loading…
Reference in New Issue
Block a user