From 26cc7846c6995c9355ade3db3b28206abd8a3fd0 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 26 Jul 2022 19:51:47 +0700 Subject: [PATCH 01/20] WIP: new import folder parser for offline import --- tubearchivist/home/src/index/filesystem.py | 173 ++++++++++++++++++++- 1 file changed, 172 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 9dd7b476..dfd556c9 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -157,7 +157,178 @@ class FilesystemScanner: _, _ = ElasticWrap(path).delete() -class ManualImport: +class ImportFolderScanner: + """import and indexing existing video files + - identify all media files belonging to a video + - identify youtube id + - convert if needed + """ + + CONFIG = AppConfig().config + CACHE_DIR = CONFIG["application"]["cache_dir"] + IMPORT_DIR = os.path.join(CACHE_DIR, "import") + + EXT_MAP = { + "media": [".mp4", ".mkv", ".webm"], + "metadata": [".json"], + "thumb": [".jpg", ".png", ".webp"], + "subtitle": [".vtt"], + } + + def __init__(self): + self.to_import = False + + def scan(self): + """scan and match media files""" + all_files = self.get_all_files() + self.match_files(all_files) + self.process_videos() + + return self.to_import + + def get_all_files(self): + """get all files in /import""" + all_files = ignore_filelist(os.listdir(self.IMPORT_DIR)) + all_files.sort() + + return all_files + + @staticmethod + def _get_template(): + """base dict for video""" + return { + "media": False, + "video_id": False, + "metadata": False, + "thumb": False, + "subtitle": [], + } + + def match_files(self, all_files): + """loop through all files, join what matches""" + self.to_import = [] + + current_video = self._get_template() + last_base = False + + for file_path in all_files: + base_name_raw, ext = os.path.splitext(file_path) + base_name, _ = os.path.splitext(base_name_raw) + + key, file_path = self._detect_type(file_path, ext) + if not key or not file_path: + continue + + if base_name != last_base: + if last_base: + self.to_import.append(current_video) + + current_video = self._get_template() + last_base = base_name + + if key == "subtitle": + current_video["subtitle"].append(file_path) + else: + current_video[key] = file_path + + if current_video.get("media"): + self.to_import.append(current_video) + + def _detect_type(self, file_path, ext): + """detect metadata type for file""" + + for key, value in self.EXT_MAP.items(): + if ext in value: + return key, file_path + + return False, False + + def process_videos(self): + """loop through all videos""" + for current_video in self.to_import: + self._detect_youtube_id(current_video) + self._dump_thumb(current_video) + self._convert_video(current_video) + + def _detect_youtube_id(self, current_video): + """find video id from filename or json""" + print(current_video) + youtube_id = self._extract_id_from_filename(current_video["media"]) + if youtube_id: + current_video["video_id"] = youtube_id + return + + youtube_id = self._extract_id_from_json(current_video["metadata"]) + if youtube_id: + current_video["video_id"] = youtube_id + return + + print(current_video["media"]) + raise ValueError("failed to find video id") + + @staticmethod + def _extract_id_from_filename(file_name): + """ + look at the file name for the youtube id + expects filename ending in []. + """ + base_name, _ = os.path.splitext(file_name) + id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", base_name) + if id_search: + youtube_id = id_search.group(1) + return youtube_id + + print(f"id extraction failed from filename: {file_name}") + + return False + + def _extract_id_from_json(self, json_file): + """open json file and extract id""" + json_path = os.path.join(self.CACHE_DIR, "import", json_file) + with open(json_path, "r", encoding="utf-8") as f: + json_content = f.read() + + youtube_id = json.loads(json_content)["id"] + + return youtube_id + + def _dump_thumb(self, current_video): + """extract embedded thumb before converting""" + if current_video["thumb"]: + return + + # write thumb to disk here + # ffmpeg -dump_attachment:t "" -i filename.mkv + # ffmpeg -i video.mp4 -map 0:v -map -0:V -c copy cover.jpg + # webm + + def _convert_video(self, current_video): + """convert if needed""" + current_path = os.path.join( + self.CACHE_DIR, "import", current_video["media"] + ) + base_path, ext = os.path.splitext(current_path) + if ext == ".mp4": + return + + new_path = base_path + ".mp4" + subprocess.run( + [ + "ffmpeg", + "-i", + current_path, + new_path, + "-loglevel", + "warning", + "-stats", + ], + check=True, + ) + current_video["media"] = new_path + os.remove(current_path) + + +class ManualImportOld: """import and indexing existing video files""" CONFIG = AppConfig().config From e1c470239877327d3030246d8c4bd7549e8d9ede Mon Sep 17 00:00:00 2001 From: DanielBatteryStapler Date: Sat, 30 Jul 2022 10:05:10 -0400 Subject: [PATCH 02/20] implement LDAP as authentication backend support (#274) --- Dockerfile | 2 +- README.md | 12 ++++++++++++ docs/FAQ.md | 3 --- tubearchivist/config/settings.py | 29 +++++++++++++++++++++++++++++ tubearchivist/requirements.txt | 1 + 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0d09add1..071da0c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ FROM python:3.10.5-slim-bullseye AS builder ARG TARGETPLATFORM RUN apt-get update -RUN apt-get install -y --no-install-recommends build-essential gcc +RUN apt-get install -y --no-install-recommends build-essential gcc libldap2-dev libsasl2-dev libssl-dev # install requirements COPY ./tubearchivist/requirements.txt /requirements.txt diff --git a/README.md b/README.md index dd2c0142..1a65628f 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,18 @@ Should that not be an option, the Tube Archivist container takes these two addit Changing any of these two environment variables will change the files *nginx.conf* and *uwsgi.ini* at startup using `sed` in your container. +## LDAP Authentication +LDAP authentication is not yet available in *stable* builds but is implemented for *unstable*. It can be enabled and configured using the following environment variables: + + - `TA_LDAP` (ex: `true`) Set to anything besides empty string to use LDAP authentication instead of local user authentication. + - `TA_LDAP_SERVER_URI` (ex: `ldap://ldap-server:389`) Set to the uri of your LDAP server. + - `TA_LDAP_BIND_DN` (ex: `uid=search-user,ou=users,dc=your-server`) DN of the user that is able to perform searches on your LDAP account. + - `TA_LDAP_BIND_PASSWORD` (ex: `yoursecretpassword`) Password for the search user. + - `TA_LDAP_USER_BASE` (ex: `ou=users,dc=your-server`) Search base for user filter. + - `TA_LDAP_USER_FILTER` (ex: `(objectClass=user)`) Filter for valid users. Login usernames are automatically matched using `uid` and does not need to be specified in this filter. + +When LDAP authentication is enabled django passwords (e.g. the password defined in TA_PASSWORD) will not allow you to login, only the LDAP server is used. + ### Elasticsearch **Note**: Tube Archivist depends on Elasticsearch 8. diff --git a/docs/FAQ.md b/docs/FAQ.md index cec6a718..f3e4cc90 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -29,6 +29,3 @@ So Docker is the only supported installation method. If you don't have any exper ## 4. Finetuning Elasticsearch A minimal configuration of Elasticsearch (ES) is provided in the example docker-compose.yml file. ES is highly configurable and very interesting to learn more about. Refer to the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) if you want to get into it. - -## 5. Advanced Authentication -If you like to use things like SSO, LDAP or 2FA to login, consider using something like Authelia as a reverse proxy so this project can focus on the core task. Tube Archivist has a *remember me* checkbox at login to extend your sessions lifetime in your browser. diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index 569eed62..0fe904e7 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -14,7 +14,9 @@ import hashlib from os import environ, path from pathlib import Path +import ldap from corsheaders.defaults import default_headers +from django_auth_ldap.config import LDAPSearch from home.src.ta.config import AppConfig # Build paths inside the project like this: BASE_DIR / 'subdir'. @@ -83,6 +85,33 @@ TEMPLATES = [ WSGI_APPLICATION = "config.wsgi.application" +if bool(environ.get("TA_LDAP")): + global AUTH_LDAP_SERVER_URI + AUTH_LDAP_SERVER_URI = environ.get("TA_LDAP_SERVER_URI") + + global AUTH_LDAP_BIND_DN + AUTH_LDAP_BIND_DN = environ.get("TA_LDAP_BIND_DN") + + global AUTH_LDAP_BIND_PASSWORD + AUTH_LDAP_BIND_PASSWORD = environ.get("TA_LDAP_BIND_PASSWORD") + + global AUTH_LDAP_USER_SEARCH + AUTH_LDAP_USER_SEARCH = LDAPSearch( + environ.get("TA_LDAP_USER_BASE"), + ldap.SCOPE_SUBTREE, + "(&(uid=%(user)s)" + environ.get("TA_LDAP_USER_FILTER") + ")", + ) + + global AUTH_LDAP_USER_ATTR_MAP + AUTH_LDAP_USER_ATTR_MAP = { + "username": "uid", + "first_name": "givenName", + "last_name": "sn", + "email": "mail", + } + + global AUTHENTICATION_BACKENDS + AUTHENTICATION_BACKENDS = ("django_auth_ldap.backend.LDAPBackend",) # Database # https://docs.djangoproject.com/en/3.2/ref/settings/#databases diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index b07e8e46..ca8bc1d8 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -10,3 +10,4 @@ ryd-client==0.0.3 uWSGI==2.0.20 whitenoise==6.2.0 yt_dlp==2022.7.18 +django-auth-ldap==4.1.0 From 91a169fa3e9c759493e90851895bca8bc87d46c2 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 28 Jul 2022 16:03:32 +0700 Subject: [PATCH 03/20] add FUNDING.yml --- .github/FUNDING.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..6248d85f --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,5 @@ +github: bbilly1 +ko_fi: bbilly1 +patreon: octocat +tidelift: npm/octo-package +custom: https://paypal.me/bbilly1 \ No newline at end of file From 61da0c8af375b420f80b8ef8bd637822446759ec Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 30 Jul 2022 20:52:45 +0700 Subject: [PATCH 04/20] WIP: extract thumb from mkv --- tubearchivist/home/src/index/filesystem.py | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index dfd556c9..c16cff65 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -297,6 +297,46 @@ class ImportFolderScanner: if current_video["thumb"]: return + media_file = current_video["media"] + media_path = os.path.join(self.CACHE_DIR, "import", media_file) + base_name, ext = os.path.splitext(media_path) + + if ext == ".mkv": + thumb_stream = self._get_mkv_thumb_stream(media_path) + elif ext == ".mp4": + thumb_stream = 0 + elif ext == ".webm": + print("webm doesn't support thumbnail embed") + + @staticmethod + def _get_mkv_thumb_stream(media_path): + """get stream idx of thumbnail for mkv files""" + streams_raw = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "stream_tags", + "-print_format", + "json", + media_path, + ], + capture_output=True, + check=True, + ) + streams = json.loads(streams_raw.stdout.decode()) + + for idx, stream in enumerate(streams["streams"]): + tags = stream["tags"] + if "mimetype" in tags and tags["filename"].startswith("cover"): + return idx + + return False + + + # ffprobe /cache/import/The\ Single\ Australian\ Farm\ That’s\ Bigger\ Than\ 49\ Countries\ \[YiSN21jKp4s\].mkv -hide_banner -show_entries "stream_tags" -print_format json + # write thumb to disk here # ffmpeg -dump_attachment:t "" -i filename.mkv # ffmpeg -i video.mp4 -map 0:v -map -0:V -c copy cover.jpg From 195acd3246757bf73fd6db25ce6a73a29b49389e Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 31 Jul 2022 00:02:09 +0700 Subject: [PATCH 05/20] WPI: dump thumbnails from mp4 and mkv --- tubearchivist/home/src/index/filesystem.py | 100 +++++++++++++++++---- 1 file changed, 81 insertions(+), 19 deletions(-) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index c16cff65..f961d48b 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -301,23 +301,74 @@ class ImportFolderScanner: media_path = os.path.join(self.CACHE_DIR, "import", media_file) base_name, ext = os.path.splitext(media_path) + new_path = False if ext == ".mkv": - thumb_stream = self._get_mkv_thumb_stream(media_path) + idx, thumb_type = self._get_mkv_thumb_stream(media_path) + if idx: + new_path = self.dump_mpv_thumb(media_path, idx, thumb_type) + elif ext == ".mp4": - thumb_stream = 0 - elif ext == ".webm": - print("webm doesn't support thumbnail embed") + thumb_type = self.get_mp4_thumb_type(media_path) + if thumb_type: + new_path = self.dump_mp4_thumb(media_path, thumb_type) + + if new_path: + current_video["thumb"] = new_path + + def _get_mkv_thumb_stream(self, media_path): + """get stream idx of thumbnail for mkv files""" + streams = self._get_streams(media_path) + attachments = [ + i for i in streams["streams"] if i["codec_type"] == "attachment" + ] + + for idx, stream in enumerate(attachments): + tags = stream["tags"] + if "mimetype" in tags and tags["filename"].startswith("cover"): + _, ext = os.path.splitext(tags["filename"]) + return idx, ext + + return False, False @staticmethod - def _get_mkv_thumb_stream(media_path): - """get stream idx of thumbnail for mkv files""" + def dump_mpv_thumb(media_path, idx, thumb_type): + """write cover to disk for mkv""" + _, media_ext = os.path.splitext(media_path) + new_path = f"{media_path.rstrip(media_ext)}{thumb_type}" + subprocess.run( + [ + "ffmpeg", + "-v", + "quiet", + f"-dump_attachment:t:{idx}", + new_path, + "-i", + media_path, + ], + check=False, + ) + + return new_path + + def get_mp4_thumb_type(self, media_path): + """dedect filetype of embedded thumbnail""" + streams = self._get_streams(media_path) + + for stream in streams["streams"]: + if stream["codec_name"] in ["png", "jpg"]: + return stream["codec_name"] + + return False + + @staticmethod + def _get_streams(media_path): + """return all streams from media_path""" streams_raw = subprocess.run( [ "ffprobe", "-v", "error", - "-show_entries", - "stream_tags", + "-show_streams", "-print_format", "json", media_path, @@ -327,20 +378,31 @@ class ImportFolderScanner: ) streams = json.loads(streams_raw.stdout.decode()) - for idx, stream in enumerate(streams["streams"]): - tags = stream["tags"] - if "mimetype" in tags and tags["filename"].startswith("cover"): - return idx + return streams - return False + @staticmethod + def dump_mp4_thumb(media_path, thumb_type): + """save cover to disk""" + _, ext = os.path.splitext(media_path) + new_path = f"{media_path.rstrip(ext)}.{thumb_type}" + subprocess.run( + [ + "ffmpeg", + "-i", + media_path, + "-map", + "0:v", + "-map", + "-0:V", + "-c", + "copy", + new_path, + ], + check=True, + ) - # ffprobe /cache/import/The\ Single\ Australian\ Farm\ That’s\ Bigger\ Than\ 49\ Countries\ \[YiSN21jKp4s\].mkv -hide_banner -show_entries "stream_tags" -print_format json - - # write thumb to disk here - # ffmpeg -dump_attachment:t "" -i filename.mkv - # ffmpeg -i video.mp4 -map 0:v -map -0:V -c copy cover.jpg - # webm + return new_path def _convert_video(self, current_video): """convert if needed""" From d0675368f48bbd33c5a224bf4afedc5cea457b2e Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 8 Aug 2022 14:52:06 +0700 Subject: [PATCH 06/20] bump django --- tubearchivist/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index ca8bc1d8..42e4656b 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,6 +1,7 @@ beautifulsoup4==4.11.1 celery==5.2.7 -Django==4.0.6 +Django==4.1 +django-auth-ldap==4.1.0 django-cors-headers==3.13.0 djangorestframework==3.13.1 Pillow==9.2.0 @@ -10,4 +11,3 @@ ryd-client==0.0.3 uWSGI==2.0.20 whitenoise==6.2.0 yt_dlp==2022.7.18 -django-auth-ldap==4.1.0 From b70cbfa0a87a521df4a1eb370f0e82b016a0166b Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 8 Aug 2022 14:52:33 +0700 Subject: [PATCH 07/20] convert thumbnail to jpg for manual import --- tubearchivist/home/src/index/filesystem.py | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index f961d48b..4d79e88b 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -19,6 +19,9 @@ from home.src.index.video import index_new_video from home.src.ta.config import AppConfig from home.src.ta.helper import clean_string, ignore_filelist from home.src.ta.ta_redis import RedisArchivist +from PIL import Image, ImageFile + +ImageFile.LOAD_TRUNCATED_IMAGES = True class FilesystemScanner: @@ -248,6 +251,7 @@ class ImportFolderScanner: for current_video in self.to_import: self._detect_youtube_id(current_video) self._dump_thumb(current_video) + self._convert_thumb(current_video) self._convert_video(current_video) def _detect_youtube_id(self, current_video): @@ -360,6 +364,25 @@ class ImportFolderScanner: return False + def _convert_thumb(self, current_video): + """convert all thumbnails to jpg""" + if not current_video["thumb"]: + return + + thumb_file = current_video["thumb"] + thumb_path = os.path.join(self.CACHE_DIR, "import", thumb_file) + + base_path, ext = os.path.splitext(thumb_path) + if ext == ".jpg": + return + + new_path = f"{base_path}.jpg" + img_raw = Image.open(thumb_path) + img_raw.convert("RGB").save(new_path) + + os.remove(thumb_path) + current_video["thumb"] = new_path + @staticmethod def _get_streams(media_path): """return all streams from media_path""" From 9fd6f9c58b76af2fcfb7e22d6c60c755b63f9813 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 8 Aug 2022 15:57:42 +0700 Subject: [PATCH 08/20] use absolute paths for manual import --- tubearchivist/home/src/index/filesystem.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 4d79e88b..f4ec8fed 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -191,7 +191,8 @@ class ImportFolderScanner: def get_all_files(self): """get all files in /import""" - all_files = ignore_filelist(os.listdir(self.IMPORT_DIR)) + rel_paths = ignore_filelist(os.listdir(self.IMPORT_DIR)) + all_files = [os.path.join(self.IMPORT_DIR, i) for i in rel_paths] all_files.sort() return all_files @@ -301,9 +302,8 @@ class ImportFolderScanner: if current_video["thumb"]: return - media_file = current_video["media"] - media_path = os.path.join(self.CACHE_DIR, "import", media_file) - base_name, ext = os.path.splitext(media_path) + media_path = current_video["media"] + _, ext = os.path.splitext(media_path) new_path = False if ext == ".mkv": @@ -369,8 +369,7 @@ class ImportFolderScanner: if not current_video["thumb"]: return - thumb_file = current_video["thumb"] - thumb_path = os.path.join(self.CACHE_DIR, "import", thumb_file) + thumb_path = current_video["thumb"] base_path, ext = os.path.splitext(thumb_path) if ext == ".jpg": @@ -429,9 +428,7 @@ class ImportFolderScanner: def _convert_video(self, current_video): """convert if needed""" - current_path = os.path.join( - self.CACHE_DIR, "import", current_video["media"] - ) + current_path = current_video["media"] base_path, ext = os.path.splitext(current_path) if ext == ".mp4": return From 8f711d359bf1a96298a3f3da13ed7dbdb9e30c9c Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 8 Aug 2022 18:28:32 +0700 Subject: [PATCH 09/20] implement youtube_meta and media_path overwritein YoutubeVideo --- tubearchivist/home/src/index/video.py | 44 +++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index cc8d65b7..48c3eeb4 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -426,17 +426,20 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self.video_overwrites = video_overwrites self.es_path = f"{self.index_name}/_doc/{youtube_id}" - def build_json(self): + def build_json(self, youtube_meta_overwrite=False, media_path=False): """build json dict of video""" self.get_from_youtube() - if not self.youtube_meta: + if not self.youtube_meta and not youtube_meta_overwrite: return + if not self.youtube_meta: + self.youtube_meta = youtube_meta_overwrite + self._process_youtube_meta() self._add_channel() self._add_stats() self.add_file_path() - self.add_player() + self.add_player(media_path) if self.config["downloads"]["integrate_ryd"]: self._get_ryd_stats() @@ -518,8 +521,28 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): raise FileNotFoundError - def add_player(self): + def add_player(self, media_path=False): """add player information for new videos""" + vid_path = self._get_vid_path(media_path) + + duration_handler = DurationConverter() + duration = duration_handler.get_sec(vid_path) + duration_str = duration_handler.get_str(duration) + self.json_data.update( + { + "player": { + "watched": False, + "duration": duration, + "duration_str": duration_str, + } + } + ) + + def _get_vid_path(self, media_path=False): + """get path of media file""" + if media_path: + return media_path + try: # when indexing from download task vid_path = self.build_dl_cache_path() @@ -535,18 +558,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): else: raise FileNotFoundError("could not find video file") from err - duration_handler = DurationConverter() - duration = duration_handler.get_sec(vid_path) - duration_str = duration_handler.get_str(duration) - self.json_data.update( - { - "player": { - "watched": False, - "duration": duration, - "duration_str": duration_str, - } - } - ) + return vid_path def add_file_path(self): """build media_url for where file will be located""" From b76fa693968a8d28e21f08a46ea7993b49768173 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 8 Aug 2022 19:18:27 +0700 Subject: [PATCH 10/20] WIP: rewrite ManualImport, identify and archive --- tubearchivist/home/src/index/filesystem.py | 164 ++++++--------------- tubearchivist/home/tasks.py | 7 +- 2 files changed, 51 insertions(+), 120 deletions(-) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index f4ec8fed..5e80ebc8 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -12,10 +12,9 @@ import shutil import subprocess from home.src.download.queue import PendingList -from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.connect import ElasticWrap from home.src.index.reindex import Reindex -from home.src.index.video import index_new_video +from home.src.index.video import YoutubeVideo, index_new_video from home.src.ta.config import AppConfig from home.src.ta.helper import clean_string, ignore_filelist from home.src.ta.ta_redis import RedisArchivist @@ -255,6 +254,8 @@ class ImportFolderScanner: self._convert_thumb(current_video) self._convert_video(current_video) + ManualImport(current_video, self.CONFIG).run() + def _detect_youtube_id(self, current_video): """find video id from filename or json""" print(current_video) @@ -450,132 +451,65 @@ class ImportFolderScanner: os.remove(current_path) -class ManualImportOld: - """import and indexing existing video files""" +class ManualImport: + """import single identified video""" - CONFIG = AppConfig().config - CACHE_DIR = CONFIG["application"]["cache_dir"] - IMPORT_DIR = os.path.join(CACHE_DIR, "import") + def __init__(self, current_video, config): + self.current_video = current_video + self.config = config - def __init__(self): - self.identified = self.import_folder_parser() + def run(self): + """run all""" + json_data = self.index_metadata() + self._move_to_archive(json_data) + self._cleanup() - def import_folder_parser(self): - """detect files in import folder""" - import_files = os.listdir(self.IMPORT_DIR) - to_import = ignore_filelist(import_files) - to_import.sort() - video_files = [i for i in to_import if not i.endswith(".json")] + def index_metadata(self): + """get metadata from yt or json""" + video = YoutubeVideo(self.current_video["video_id"]) + video.build_json( + youtube_meta_overwrite=self._get_info_json(), + media_path=self.current_video["media"], + ) + video.check_subtitles() + video.upload_to_es() - identified = [] + return video.json_data - for file_path in video_files: + def _get_info_json(self): + """read info_json from file""" + if not self.current_video["metadata"]: + return False - file_dict = {"video_file": file_path} - file_name, _ = os.path.splitext(file_path) + with open(self.current_video["metadata"], "r", encoding="utf-8") as f: + info_json = json.loads(f.read()) - matching_json = [ - i - for i in to_import - if i.startswith(file_name) and i.endswith(".json") - ] - if matching_json: - json_file = matching_json[0] - youtube_id = self.extract_id_from_json(json_file) - file_dict.update({"json_file": json_file}) - else: - youtube_id = self.extract_id_from_filename(file_name) - file_dict.update({"json_file": False}) + return info_json - file_dict.update({"youtube_id": youtube_id}) - identified.append(file_dict) + def _move_to_archive(self, json_data): + """move identified media file to archive""" + videos = self.config["application"]["videos"] - return identified + channel, file = os.path.split(json_data["media_url"]) + channel_folder = os.path.join(videos, channel) + if not os.path.exists(channel_folder): + os.makedirs(channel_folder) - @staticmethod - def extract_id_from_filename(file_name): - """ - look at the file name for the youtube id - expects filename ending in []. - """ - id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name) - if id_search: - youtube_id = id_search.group(1) - return youtube_id + old_path = self.current_video["media"] + new_path = os.path.join(channel_folder, file) + shutil.move(old_path, new_path, copy_function=shutil.copyfile) - print("failed to extract youtube id for: " + file_name) - raise Exception + def _cleanup(self): + """cleanup leftover files""" + if os.path.exists(self.current_video["metadata"]): + os.remove(self.current_video["metadata"]) - def extract_id_from_json(self, json_file): - """open json file and extract id""" - json_path = os.path.join(self.CACHE_DIR, "import", json_file) - with open(json_path, "r", encoding="utf-8") as f: - json_content = f.read() + if os.path.exists(self.current_video["thumb"]): + os.remove(self.current_video["thumb"]) - youtube_id = json.loads(json_content)["id"] - - return youtube_id - - def process_import(self): - """go through identified media files""" - - all_videos_added = [] - - for media_file in self.identified: - json_file = media_file["json_file"] - video_file = media_file["video_file"] - youtube_id = media_file["youtube_id"] - - video_path = os.path.join(self.CACHE_DIR, "import", video_file) - - self.move_to_cache(video_path, youtube_id) - - # identify and archive - vid_dict = index_new_video(youtube_id) - VideoDownloader([youtube_id]).move_to_archive(vid_dict) - youtube_id = vid_dict["youtube_id"] - thumb_url = vid_dict["vid_thumb_url"] - all_videos_added.append((youtube_id, thumb_url)) - - # cleanup - if os.path.exists(video_path): - os.remove(video_path) - if json_file: - json_path = os.path.join(self.CACHE_DIR, "import", json_file) - os.remove(json_path) - - return all_videos_added - - def move_to_cache(self, video_path, youtube_id): - """move identified video file to cache, convert to mp4""" - file_name = os.path.split(video_path)[-1] - video_file, ext = os.path.splitext(file_name) - - # make sure youtube_id is in filename - if youtube_id not in video_file: - video_file = f"{video_file}_{youtube_id}" - - # move, convert if needed - if ext == ".mp4": - new_file = video_file + ext - dest_path = os.path.join(self.CACHE_DIR, "download", new_file) - shutil.move(video_path, dest_path, copy_function=shutil.copyfile) - else: - print(f"processing with ffmpeg: {video_file}") - new_file = video_file + ".mp4" - dest_path = os.path.join(self.CACHE_DIR, "download", new_file) - subprocess.run( - [ - "ffmpeg", - "-i", - video_path, - dest_path, - "-loglevel", - "warning", - "-stats", - ], - check=True, - ) + for subtitle_file in self.current_video["subtitle"]: + if os.path.exists(subtitle_file): + os.remove(subtitle_file) def scan_filesystem(): diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 8f78d4dd..ede17f26 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -20,7 +20,7 @@ from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.index_setup import backup_all_indexes, restore_from_backup from home.src.index.channel import YoutubeChannel from home.src.index.filesystem import ( - ManualImport, + ImportFolderScanner, reindex_old_documents, scan_filesystem, ) @@ -150,10 +150,7 @@ def run_manual_import(): try: have_lock = my_lock.acquire(blocking=False) if have_lock: - import_handler = ManualImport() - if import_handler.identified: - all_videos_added = import_handler.process_import() - ThumbManager().download_vid(all_videos_added) + ImportFolderScanner().scan() else: print("Did not acquire lock form import.") From fb4d6b7be3f4b76129829ee765de8c0a61135d94 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Aug 2022 21:03:54 +0700 Subject: [PATCH 11/20] major refactor ThumbManager --- tubearchivist/api/src/search_processor.py | 4 +- tubearchivist/home/src/download/queue.py | 11 +- .../home/src/download/subscriptions.py | 14 +- tubearchivist/home/src/download/thumbnails.py | 485 +++++++++--------- tubearchivist/home/src/frontend/searching.py | 4 +- tubearchivist/home/src/index/channel.py | 8 +- tubearchivist/home/src/index/playlist.py | 18 +- tubearchivist/home/src/index/reindex.py | 8 +- tubearchivist/home/tasks.py | 14 +- 9 files changed, 279 insertions(+), 287 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index 6a1e2ddd..07102310 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -74,7 +74,7 @@ class SearchProcess: media_url = urllib.parse.quote(video_dict["media_url"]) vid_last_refresh = date_praser(video_dict["vid_last_refresh"]) published = date_praser(video_dict["published"]) - vid_thumb_url = ThumbManager().vid_thumb_path(video_id) + vid_thumb_url = ThumbManager(video_id).vid_thumb_path() channel = self._process_channel(video_dict["channel"]) if "subtitles" in video_dict: @@ -113,7 +113,7 @@ class SearchProcess: def _process_download(self, download_dict): """run on single download item""" video_id = download_dict["youtube_id"] - vid_thumb_url = ThumbManager().vid_thumb_path(video_id) + vid_thumb_url = ThumbManager(video_id).vid_thumb_path() published = date_praser(download_dict["published"]) download_dict.update( diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index b318b128..db7ae5d5 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -161,10 +161,7 @@ class PendingList(PendingIndex): self._parse_channel(entry["url"]) elif entry["type"] == "playlist": self._parse_playlist(entry["url"]) - new_thumbs = PlaylistSubscription().process_url_str( - [entry], subscribed=False - ) - ThumbManager().download_playlist(new_thumbs) + PlaylistSubscription().process_url_str([entry], subscribed=False) else: raise ValueError(f"invalid url_type: {entry}") @@ -198,7 +195,6 @@ class PendingList(PendingIndex): self.get_channels() bulk_list = [] - thumb_handler = ThumbManager() for idx, youtube_id in enumerate(self.missing_videos): video_details = self.get_youtube_details(youtube_id) if not video_details: @@ -209,8 +205,9 @@ class PendingList(PendingIndex): bulk_list.append(json.dumps(action)) bulk_list.append(json.dumps(video_details)) - thumb_needed = [(youtube_id, video_details["vid_thumb_url"])] - thumb_handler.download_vid(thumb_needed) + url = video_details["vid_thumb_url"] + ThumbManager(youtube_id).download_video_thumb(url) + self._notify_add(idx) if bulk_list: diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index d353ed06..7f8a1676 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -5,6 +5,7 @@ Functionality: """ from home.src.download import queue # partial import +from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import IndexPaginate from home.src.index.channel import YoutubeChannel @@ -129,11 +130,9 @@ class PlaylistSubscription: all_indexed = IndexPaginate("ta_video", data).get_results() all_youtube_ids = [i["youtube_id"] for i in all_indexed] - new_thumbs = [] for idx, playlist in enumerate(new_playlists): - url_type = playlist["type"] playlist_id = playlist["url"] - if not url_type == "playlist": + if not playlist["type"] == "playlist": print(f"{playlist_id} not a playlist, skipping...") continue @@ -144,8 +143,11 @@ class PlaylistSubscription: playlist_h.upload_to_es() playlist_h.add_vids_to_playlist() self.channel_validate(playlist_h.json_data["playlist_channel_id"]) - thumb = playlist_h.json_data["playlist_thumbnail"] - new_thumbs.append((playlist_id, thumb)) + + url = playlist_h.json_data["playlist_thumbnail"] + thumb = ThumbManager(playlist_id, item_type="playlist") + thumb.download_playlist_thumb(url) + # notify message = { "status": "message:subplaylist", @@ -157,8 +159,6 @@ class PlaylistSubscription: "message:subplaylist", message=message, expire=True ) - return new_thumbs - @staticmethod def channel_validate(channel_id): """make sure channel of playlist is there""" diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index c317ee4d..8e6b8716 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -6,136 +6,64 @@ functionality: import base64 import os -from collections import Counter from io import BytesIO from time import sleep import requests from home.src.download import queue # partial import -from home.src.download import subscriptions # partial import +from home.src.es.connect import IndexPaginate from home.src.ta.config import AppConfig -from home.src.ta.helper import ignore_filelist -from home.src.ta.ta_redis import RedisArchivist from mutagen.mp4 import MP4, MP4Cover from PIL import Image, ImageFile, ImageFilter ImageFile.LOAD_TRUNCATED_IMAGES = True -class ThumbManager: - """handle thumbnails related functions""" +class ThumbManagerBase: + """base class for thumbnail management""" CONFIG = AppConfig().config - MEDIA_DIR = CONFIG["application"]["videos"] CACHE_DIR = CONFIG["application"]["cache_dir"] VIDEO_DIR = os.path.join(CACHE_DIR, "videos") CHANNEL_DIR = os.path.join(CACHE_DIR, "channels") PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists") - def get_all_thumbs(self): - """get all video artwork already downloaded""" - all_thumb_folders = ignore_filelist(os.listdir(self.VIDEO_DIR)) - all_thumbs = [] - for folder in all_thumb_folders: - folder_path = os.path.join(self.VIDEO_DIR, folder) - if os.path.isfile(folder_path): - self.update_path(folder) - all_thumbs.append(folder_path) - continue - # raise exemption here in a future version - # raise FileExistsError("video cache dir has files inside") + def __init__(self, item_id, item_type, fallback=False): + self.item_id = item_id + self.item_type = item_type + self.fallback = fallback - all_folder_thumbs = ignore_filelist(os.listdir(folder_path)) - all_thumbs.extend(all_folder_thumbs) + def download_raw(self, url): + """download thumbnail for video""" - return all_thumbs + for i in range(3): + try: + response = requests.get(url, stream=True) + if response.ok: + return Image.open(response.raw) + if response.status_code == 404: + return self.get_fallback() - def update_path(self, file_name): - """reorganize thumbnails into folders as update path from v0.0.5""" - folder_name = file_name[0].lower() - folder_path = os.path.join(self.VIDEO_DIR, folder_name) - old_file = os.path.join(self.VIDEO_DIR, file_name) - new_file = os.path.join(folder_path, file_name) - os.makedirs(folder_path, exist_ok=True) - os.rename(old_file, new_file) + except ConnectionError: + print(f"{self.item_id}: retry thumbnail download {url}") + sleep((i + 1) ** i) - def get_needed_thumbs(self, missing_only=False): - """get a list of all missing thumbnails""" - all_thumbs = self.get_all_thumbs() + return False - pending = queue.PendingList() - pending.get_download() - pending.get_indexed() + def get_fallback(self): + """get fallback thumbnail if not available""" + if self.fallback: + img_raw = Image.open(self.fallback) + return img_raw - needed_thumbs = [] - for video in pending.all_videos: - youtube_id = video["youtube_id"] - thumb_url = video["vid_thumb_url"] - if missing_only: - if youtube_id + ".jpg" not in all_thumbs: - needed_thumbs.append((youtube_id, thumb_url)) - else: - needed_thumbs.append((youtube_id, thumb_url)) - - for video in pending.all_pending + pending.all_ignored: - youtube_id = video["youtube_id"] - thumb_url = video["vid_thumb_url"] - if missing_only: - if youtube_id + ".jpg" not in all_thumbs: - needed_thumbs.append((youtube_id, thumb_url)) - else: - needed_thumbs.append((youtube_id, thumb_url)) - - return needed_thumbs - - def get_missing_channels(self): - """get all channel artwork""" - all_channel_art = os.listdir(self.CHANNEL_DIR) - files = [i[0:24] for i in all_channel_art] - cached_channel_ids = [k for (k, v) in Counter(files).items() if v > 1] - channel_sub = subscriptions.ChannelSubscription() - channels = channel_sub.get_channels(subscribed_only=False) - - missing_channels = [] - for channel in channels: - channel_id = channel["channel_id"] - if channel_id not in cached_channel_ids: - channel_banner = channel["channel_banner_url"] - channel_thumb = channel["channel_thumb_url"] - missing_channels.append( - (channel_id, channel_thumb, channel_banner) - ) - - return missing_channels - - def get_missing_playlists(self): - """get all missing playlist artwork""" - all_downloaded = ignore_filelist(os.listdir(self.PLAYLIST_DIR)) - all_ids_downloaded = [i.replace(".jpg", "") for i in all_downloaded] - playlist_sub = subscriptions.PlaylistSubscription() - playlists = playlist_sub.get_playlists(subscribed_only=False) - - missing_playlists = [] - for playlist in playlists: - playlist_id = playlist["playlist_id"] - if playlist_id not in all_ids_downloaded: - playlist_thumb = playlist["playlist_thumbnail"] - missing_playlists.append((playlist_id, playlist_thumb)) - - return missing_playlists - - def get_raw_img(self, img_url, thumb_type): - """get raw image from youtube and handle 404""" - try: - app_root = self.CONFIG["application"]["app_root"] - except KeyError: - # lazy keyerror fix to not have to deal with a strange startup - # racing contition between the threads in HomeConfig.ready() - app_root = "/app" + app_root = self.CONFIG["application"]["app_root"] default_map = { "video": os.path.join( app_root, "static/img/default-video-thumb.jpg" ), + "playlist": os.path.join( + app_root, "static/img/default-video-thumb.jpg" + ), "icon": os.path.join( app_root, "static/img/default-channel-icon.jpg" ), @@ -143,116 +71,134 @@ class ThumbManager: app_root, "static/img/default-channel-banner.jpg" ), } - if img_url: - try: - response = requests.get(img_url, stream=True) - except ConnectionError: - sleep(5) - response = requests.get(img_url, stream=True) - if not response.ok and not response.status_code == 404: - print("retry thumbnail download for " + img_url) - sleep(5) - response = requests.get(img_url, stream=True) - else: - response = False - if not response or response.status_code == 404: - # use default - img_raw = Image.open(default_map[thumb_type]) - else: - # use response - img_obj = response.raw - img_raw = Image.open(img_obj) + + img_raw = Image.open(default_map[self.item_type]) return img_raw - def download_vid(self, missing_thumbs, notify=True): - """download all missing thumbnails from list""" - print(f"downloading {len(missing_thumbs)} thumbnails") - for idx, (youtube_id, thumb_url) in enumerate(missing_thumbs): - folder_path = os.path.join(self.VIDEO_DIR, youtube_id[0].lower()) - thumb_path = os.path.join( - self.CACHE_DIR, self.vid_thumb_path(youtube_id) - ) - os.makedirs(folder_path, exist_ok=True) - img_raw = self.get_raw_img(thumb_url, "video") +class ThumbManager(ThumbManagerBase): + """handle thumbnails related functions""" - width, height = img_raw.size - if not width / height == 16 / 9: - new_height = width / 16 * 9 - offset = (height - new_height) / 2 - img_raw = img_raw.crop((0, offset, width, height - offset)) - img_raw.convert("RGB").save(thumb_path) + def __init__(self, item_id, item_type="video", fallback=False): + super().__init__(item_id, item_type, fallback=fallback) - progress = f"{idx + 1}/{len(missing_thumbs)}" - if notify: - mess_dict = { - "status": "message:add", - "level": "info", - "title": "Processing Videos", - "message": "Downloading Thumbnails, Progress: " + progress, - } - if idx + 1 == len(missing_thumbs): - expire = 4 - else: - expire = True + def download(self, url): + """download thumbnail""" + print(f"{self.item_id}: download {self.item_type} thumbnail") + if self.item_type == "video": + self.download_video_thumb(url) + elif self.item_type == "channel": + self.download_channel_art(url) + elif self.item_type == "playlist": + self.download_playlist_thumb(url) - RedisArchivist().set_message( - "message:add", mess_dict, expire=expire - ) + def delete(self): + """delete thumbnail file""" + print(f"{self.item_id}: delete {self.item_type} thumbnail") + if self.item_type == "video": + self.delete_video_thumb() + elif self.item_type == "channel": + self.delete_channel_thumb() + elif self.item_type == "playlist": + self.delete_playlist_thumb() - if idx + 1 % 25 == 0: - print("thumbnail progress: " + progress) + def download_video_thumb(self, url, skip_existing=False): + """pass url for video thumbnail""" + folder_path = os.path.join(self.VIDEO_DIR, self.item_id[0].lower()) + thumb_path = self.vid_thumb_path(absolute=True) - def download_chan(self, missing_channels): - """download needed artwork for channels""" - print(f"downloading {len(missing_channels)} channel artwork") - for channel in missing_channels: - channel_id, channel_thumb, channel_banner = channel + if skip_existing and os.path.exists(thumb_path): + return - thumb_path = os.path.join( - self.CHANNEL_DIR, channel_id + "_thumb.jpg" - ) - img_raw = self.get_raw_img(channel_thumb, "icon") - img_raw.convert("RGB").save(thumb_path) + os.makedirs(folder_path, exist_ok=True) + img_raw = self.download_raw(url) + width, height = img_raw.size - banner_path = os.path.join( - self.CHANNEL_DIR, channel_id + "_banner.jpg" - ) - img_raw = self.get_raw_img(channel_banner, "banner") - img_raw.convert("RGB").save(banner_path) + if not width / height == 16 / 9: + new_height = width / 16 * 9 + offset = (height - new_height) / 2 + img_raw = img_raw.crop((0, offset, width, height - offset)) - mess_dict = { - "status": "message:download", - "level": "info", - "title": "Processing Channels", - "message": "Downloading Channel Art.", - } - key = "message:download" - RedisArchivist().set_message(key, mess_dict, expire=True) + img_raw.convert("RGB").save(thumb_path) - def download_playlist(self, missing_playlists): - """download needed artwork for playlists""" - print(f"downloading {len(missing_playlists)} playlist artwork") - for playlist in missing_playlists: - playlist_id, playlist_thumb_url = playlist - thumb_path = os.path.join(self.PLAYLIST_DIR, playlist_id + ".jpg") - img_raw = self.get_raw_img(playlist_thumb_url, "video") - img_raw.convert("RGB").save(thumb_path) + def vid_thumb_path(self, absolute=False): + """build expected path for video thumbnail from youtube_id""" + folder_name = self.item_id[0].lower() + folder_path = os.path.join("videos", folder_name) + thumb_path = os.path.join(folder_path, f"{self.item_id}.jpg") + if absolute: + thumb_path = os.path.join(self.CACHE_DIR, thumb_path) - mess_dict = { - "status": "message:download", - "level": "info", - "title": "Processing Playlists", - "message": "Downloading Playlist Art.", - } - key = "message:download" - RedisArchivist().set_message(key, mess_dict, expire=True) + return thumb_path - def get_base64_blur(self, youtube_id): + def download_channel_art(self, urls, skip_existing=False): + """pass tuple of channel thumbnails""" + channel_thumb, channel_banner = urls + self._download_channel_thumb(channel_thumb, skip_existing) + self._download_channel_banner(channel_banner, skip_existing) + + def _download_channel_thumb(self, channel_thumb, skip_existing): + """download channel thumbnail""" + + thumb_path = os.path.join( + self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg" + ) + self.item_type = "icon" + + if skip_existing and os.path.exists(thumb_path): + return + + img_raw = self.download_raw(channel_thumb) + img_raw.convert("RGB").save(thumb_path) + + def _download_channel_banner(self, channel_banner, skip_existing): + """download channel banner""" + + banner_path = os.path.join( + self.CHANNEL_DIR, self.item_id + "_banner.jpg" + ) + self.item_type = "banner" + if skip_existing and os.path.exists(banner_path): + return + + img_raw = self.download_raw(channel_banner) + img_raw.convert("RGB").save(banner_path) + + def download_playlist_thumb(self, url, skip_existing=False): + """pass thumbnail url""" + thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg") + if skip_existing and os.path.exists(thumb_path): + return + + img_raw = self.download_raw(url) + img_raw.convert("RGB").save(thumb_path) + + def delete_video_thumb(self): + """delete video thumbnail if exists""" + thumb_path = self.vid_thumb_path() + to_delete = os.path.join(self.CACHE_DIR, thumb_path) + if os.path.exists(to_delete): + os.remove(to_delete) + + def delete_channel_thumb(self): + """delete all artwork of channel""" + thumb = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg") + banner = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_banner.jpg") + if os.path.exists(thumb): + os.remove(thumb) + if os.path.exists(banner): + os.remove(banner) + + def delete_playlist_thumb(self): + """delete playlist thumbnail""" + thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg") + if os.path.exists(thumb_path): + os.remove(thumb_path) + + def get_vid_base64_blur(self): """return base64 encoded placeholder""" - img_path = self.vid_thumb_path(youtube_id) - file_path = os.path.join(self.CACHE_DIR, img_path) + file_path = os.path.join(self.CACHE_DIR, self.vid_thumb_path()) img_raw = Image.open(file_path) img_raw.thumbnail((img_raw.width // 20, img_raw.height // 20)) img_blur = img_raw.filter(ImageFilter.BLUR) @@ -264,40 +210,109 @@ class ThumbManager: return data_url - @staticmethod - def vid_thumb_path(youtube_id): - """build expected path for video thumbnail from youtube_id""" - folder_name = youtube_id[0].lower() - folder_path = os.path.join("videos", folder_name) - thumb_path = os.path.join(folder_path, youtube_id + ".jpg") - return thumb_path - def delete_vid_thumb(self, youtube_id): - """delete video thumbnail if exists""" - thumb_path = self.vid_thumb_path(youtube_id) - to_delete = os.path.join(self.CACHE_DIR, thumb_path) - if os.path.exists(to_delete): - os.remove(to_delete) +class ValidatorCallback: + """handle callback validate thumbnails page by page""" - def delete_chan_thumb(self, channel_id): - """delete all artwork of channel""" - thumb = os.path.join(self.CHANNEL_DIR, channel_id + "_thumb.jpg") - banner = os.path.join(self.CHANNEL_DIR, channel_id + "_banner.jpg") - if os.path.exists(thumb): - os.remove(thumb) - if os.path.exists(banner): - os.remove(banner) + def __init__(self, source, index_name): + self.source = source + self.index_name = index_name - def cleanup_downloaded(self): - """find downloaded thumbnails without video indexed""" - all_thumbs = self.get_all_thumbs() - all_indexed = self.get_needed_thumbs() - all_needed_thumbs = [i[0] + ".jpg" for i in all_indexed] - for thumb in all_thumbs: - if thumb not in all_needed_thumbs: - # cleanup - youtube_id = thumb.rstrip(".jpg") - self.delete_vid_thumb(youtube_id) + def run(self): + """run the task for page""" + print(f"{self.index_name}: validate artwork") + if self.index_name == "ta_video": + self._validate_videos() + elif self.index_name == "ta_channel": + self._validate_channels() + elif self.index_name == "ta_playlist": + self._validate_playlists() + + def _validate_videos(self): + """check if video thumbnails are correct""" + for video in self.source: + url = video["_source"]["vid_thumb_url"] + handler = ThumbManager(video["_source"]["youtube_id"]) + handler.download_video_thumb(url, skip_existing=True) + + def _validate_channels(self): + """check if all channel artwork is there""" + for channel in self.source: + urls = ( + channel["_source"]["channel_thumb_url"], + channel["_source"]["channel_banner_url"], + ) + handler = ThumbManager(channel["_source"]["channel_id"]) + handler.download_channel_art(urls, skip_existing=True) + + def _validate_playlists(self): + """check if all playlist artwork is there""" + for playlist in self.source: + url = playlist["_source"]["playlist_thumbnail"] + handler = ThumbManager(playlist["_source"]["playlist_id"]) + handler.download_playlist_thumb(url, skip_existing=True) + + +class ThumbValidator: + """validate thumbnails""" + + def download_missing(self): + """download all missing artwork""" + self.download_missing_videos() + self.download_missing_channels() + self.download_missing_playlists() + + def download_missing_videos(self): + """get all missing video thumbnails""" + data = { + "query": {"term": {"active": {"value": True}}}, + "sort": [{"youtube_id": {"order": "asc"}}], + "_source": ["vid_thumb_url", "youtube_id"], + } + paginate = IndexPaginate( + "ta_video", data, size=5000, callback=ValidatorCallback + ) + _ = paginate.get_results() + + def download_missing_channels(self): + """get all missing channel thumbnails""" + data = { + "query": {"term": {"channel_active": {"value": True}}}, + "sort": [{"channel_id": {"order": "asc"}}], + "_source": { + "excludes": ["channel_description", "channel_overwrites"] + }, + } + paginate = IndexPaginate( + "ta_channel", data, callback=ValidatorCallback + ) + _ = paginate.get_results() + + def download_missing_playlists(self): + """get all missing playlist artwork""" + data = { + "query": {"term": {"playlist_active": {"value": True}}}, + "sort": [{"playlist_id": {"order": "asc"}}], + "_source": ["playlist_id", "playlist_thumbnail"], + } + paginate = IndexPaginate( + "ta_playlist", data, callback=ValidatorCallback + ) + _ = paginate.get_results() + + +class ThumbFilesystem: + """filesystem tasks for thumbnails""" + + CONFIG = AppConfig().config + CACHE_DIR = CONFIG["application"]["cache_dir"] + MEDIA_DIR = CONFIG["application"]["videos"] + VIDEO_DIR = os.path.join(CACHE_DIR, "videos") + + def sync(self): + """embed thumbnails to mediafiles""" + video_list = self.get_thumb_list() + self._embed_thumbs(video_list) def get_thumb_list(self): """get list of mediafiles and matching thumbnails""" @@ -307,10 +322,10 @@ class ThumbManager: video_list = [] for video in pending.all_videos: - youtube_id = video["youtube_id"] + video_id = video["youtube_id"] media_url = os.path.join(self.MEDIA_DIR, video["media_url"]) thumb_path = os.path.join( - self.CACHE_DIR, self.vid_thumb_path(youtube_id) + self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path() ) video_list.append( { @@ -322,7 +337,7 @@ class ThumbManager: return video_list @staticmethod - def write_all_thumbs(video_list): + def _embed_thumbs(video_list): """rewrite the thumbnail into media file""" counter = 1 @@ -340,15 +355,3 @@ class ThumbManager: if counter % 50 == 0: print(f"thumbnail write progress {counter}/{len(video_list)}") counter = counter + 1 - - -def validate_thumbnails(): - """check if all thumbnails are there and organized correctly""" - handler = ThumbManager() - thumbs_to_download = handler.get_needed_thumbs(missing_only=True) - handler.download_vid(thumbs_to_download) - missing_channels = handler.get_missing_channels() - handler.download_chan(missing_channels) - missing_playlists = handler.get_missing_playlists() - handler.download_playlist(missing_playlists) - handler.cleanup_downloaded() diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py index b14cc65d..ce3209ab 100644 --- a/tubearchivist/home/src/frontend/searching.py +++ b/tubearchivist/home/src/frontend/searching.py @@ -119,7 +119,7 @@ class SearchHandler: if "vid_thumb_url" in hit_keys: youtube_id = hit["source"]["youtube_id"] - thumb_path = ThumbManager().vid_thumb_path(youtube_id) + thumb_path = ThumbManager(youtube_id).vid_thumb_path() hit["source"]["vid_thumb_url"] = thumb_path if "channel_last_refresh" in hit_keys: @@ -138,7 +138,7 @@ class SearchHandler: if "subtitle_fragment_id" in hit_keys: youtube_id = hit["source"]["youtube_id"] - thumb_path = ThumbManager().vid_thumb_path(youtube_id) + thumb_path = ThumbManager(youtube_id).vid_thumb_path() hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}" return hit diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 400c5f5a..d41449ae 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -192,11 +192,11 @@ class YoutubeChannel(YouTubeItem): def get_channel_art(self): """download channel art for new channels""" channel_id = self.youtube_id - channel_thumb = self.json_data["channel_thumb_url"] - channel_banner = self.json_data["channel_banner_url"] - ThumbManager().download_chan( - [(channel_id, channel_thumb, channel_banner)] + urls = ( + self.json_data["channel_thumb_url"], + self.json_data["channel_banner_url"], ) + ThumbManager(channel_id, item_type="channel").download(urls) def sync_to_videos(self): """sync new channel_dict to all videos of channel""" diff --git a/tubearchivist/home/src/index/playlist.py b/tubearchivist/home/src/index/playlist.py index 69dc5db5..7115f9d9 100644 --- a/tubearchivist/home/src/index/playlist.py +++ b/tubearchivist/home/src/index/playlist.py @@ -81,12 +81,10 @@ class YoutubePlaylist(YouTubeItem): self.all_members = all_members - @staticmethod - def get_playlist_art(): + def get_playlist_art(self): """download artwork of playlist""" - thumbnails = ThumbManager() - missing_playlists = thumbnails.get_missing_playlists() - thumbnails.download_playlist(missing_playlists) + url = self.json_data["playlist_thumbnail"] + ThumbManager(self.youtube_id, item_type="playlist").download(url) def add_vids_to_playlist(self): """sync the playlist id to videos""" @@ -145,17 +143,15 @@ class YoutubePlaylist(YouTubeItem): previous_item = False else: previous_item = all_entries[current_idx - 1] - prev_thumb = ThumbManager().vid_thumb_path( - previous_item["youtube_id"] - ) - previous_item["vid_thumb"] = prev_thumb + prev_id = previous_item["youtube_id"] + previous_item["vid_thumb"] = ThumbManager(prev_id).vid_thumb_path() if current_idx == len(all_entries) - 1: next_item = False else: next_item = all_entries[current_idx + 1] - next_thumb = ThumbManager().vid_thumb_path(next_item["youtube_id"]) - next_item["vid_thumb"] = next_thumb + next_id = next_item["youtube_id"] + next_item["vid_thumb"] = ThumbManager(next_id).vid_thumb_path() self.nav = { "playlist_meta": { diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index f0b88bd1..db231f89 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -181,10 +181,10 @@ class Reindex: video.upload_to_es() - thumb_handler = ThumbManager() - thumb_handler.delete_vid_thumb(youtube_id) - to_download = (youtube_id, video.json_data["vid_thumb_url"]) - thumb_handler.download_vid([to_download], notify=False) + thumb_handler = ThumbManager(youtube_id) + thumb_handler.delete_video_thumb() + thumb_handler.download_video_thumb(video.json_data["vid_thumb_url"]) + return @staticmethod diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index ede17f26..b3da7621 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -15,7 +15,7 @@ from home.src.download.subscriptions import ( ChannelSubscription, PlaylistSubscription, ) -from home.src.download.thumbnails import ThumbManager, validate_thumbnails +from home.src.download.thumbnails import ThumbFilesystem, ThumbValidator from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.index_setup import backup_all_indexes, restore_from_backup from home.src.index.channel import YoutubeChannel @@ -201,21 +201,19 @@ def kill_dl(task_id): def rescan_filesystem(): """check the media folder for mismatches""" scan_filesystem() - validate_thumbnails() + ThumbValidator().download_missing() @shared_task(name="thumbnail_check") def thumbnail_check(): """validate thumbnails""" - validate_thumbnails() + ThumbValidator().download_missing() @shared_task def re_sync_thumbs(): """sync thumbnails to mediafiles""" - handler = ThumbManager() - video_list = handler.get_thumb_list() - handler.write_all_thumbs(video_list) + ThumbFilesystem().sync() @shared_task @@ -226,9 +224,7 @@ def subscribe_to(url_str): for item in to_subscribe_list: to_sub_id = item["url"] if item["type"] == "playlist": - new_thumbs = PlaylistSubscription().process_url_str([item]) - if new_thumbs: - ThumbManager().download_playlist(new_thumbs) + PlaylistSubscription().process_url_str([item]) continue if item["type"] == "video": From 44473a364fe49fe46d1f630ae5af78a5ff304c65 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Aug 2022 21:20:58 +0700 Subject: [PATCH 12/20] fix double download of playlist art --- tubearchivist/home/src/index/playlist.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubearchivist/home/src/index/playlist.py b/tubearchivist/home/src/index/playlist.py index 7115f9d9..252e913b 100644 --- a/tubearchivist/home/src/index/playlist.py +++ b/tubearchivist/home/src/index/playlist.py @@ -41,7 +41,6 @@ class YoutubePlaylist(YouTubeItem): self.process_youtube_meta() self.get_entries() self.json_data["playlist_entries"] = self.all_members - self.get_playlist_art() self.json_data["playlist_subscribed"] = subscribed def process_youtube_meta(self): From 35f1084cc283816a790c8b7d82cce5396f2cc977 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Aug 2022 21:47:26 +0700 Subject: [PATCH 13/20] download thumbnail for manual import --- tubearchivist/home/src/index/filesystem.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 5e80ebc8..85e1c953 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -12,6 +12,7 @@ import shutil import subprocess from home.src.download.queue import PendingList +from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap from home.src.index.reindex import Reindex from home.src.index.video import YoutubeVideo, index_new_video @@ -466,7 +467,8 @@ class ManualImport: def index_metadata(self): """get metadata from yt or json""" - video = YoutubeVideo(self.current_video["video_id"]) + video_id = self.current_video["video_id"] + video = YoutubeVideo(video_id) video.build_json( youtube_meta_overwrite=self._get_info_json(), media_path=self.current_video["media"], @@ -474,6 +476,9 @@ class ManualImport: video.check_subtitles() video.upload_to_es() + url = video.json_data["vid_thumb_url"] + ThumbManager(video_id).download_video_thumb(url) + return video.json_data def _get_info_json(self): From 7029441f89fc9a4981fa1e9af1d437fa959fb49a Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Aug 2022 22:24:02 +0700 Subject: [PATCH 14/20] use fallback thumb when no url passed --- tubearchivist/home/src/download/thumbnails.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index 8e6b8716..43cd981e 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -35,6 +35,8 @@ class ThumbManagerBase: def download_raw(self, url): """download thumbnail for video""" + if not url: + return self.get_fallback() for i in range(3): try: From 994f7c2443b062c3c2b958a37aec061ee2f1ae7f Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Aug 2022 22:31:15 +0700 Subject: [PATCH 15/20] use user provided thumb for offline import --- tubearchivist/home/src/index/filesystem.py | 9 +++++++-- tubearchivist/home/src/index/video.py | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 85e1c953..ac498303 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -476,8 +476,13 @@ class ManualImport: video.check_subtitles() video.upload_to_es() - url = video.json_data["vid_thumb_url"] - ThumbManager(video_id).download_video_thumb(url) + if video.offline_import and self.current_video["thumb"]: + old_path = self.current_video["thumb"] + new_path = ThumbManager(video_id).vid_thumb_path(absolute=True) + shutil.move(old_path, new_path, copy_function=shutil.copyfile) + else: + url = video.json_data["vid_thumb_url"] + ThumbManager(video_id).download_video_thumb(url) return video.json_data diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 48c3eeb4..a2b54970 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -425,6 +425,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self.channel_id = False self.video_overwrites = video_overwrites self.es_path = f"{self.index_name}/_doc/{youtube_id}" + self.offline_import = False def build_json(self, youtube_meta_overwrite=False, media_path=False): """build json dict of video""" @@ -434,6 +435,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): if not self.youtube_meta: self.youtube_meta = youtube_meta_overwrite + self.offline_import = True self._process_youtube_meta() self._add_channel() From 31061c0eb05b660f2fb06d715b9a607326678f49 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Aug 2022 22:38:17 +0700 Subject: [PATCH 16/20] handle offline import without info.json fail --- tubearchivist/home/src/index/filesystem.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index ac498303..13a0ae5a 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -473,6 +473,10 @@ class ManualImport: youtube_meta_overwrite=self._get_info_json(), media_path=self.current_video["media"], ) + if not video.json_data: + print(f"{video_id}: manual import failed, and no metadata found.") + raise ValueError + video.check_subtitles() video.upload_to_es() From c26ef012c15b0388c2874ba04f196cb74fbb8cdb Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 11 Aug 2022 11:10:58 +0700 Subject: [PATCH 17/20] raise ValueError when not matching media files --- tubearchivist/home/src/index/filesystem.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 13a0ae5a..38d790b8 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -250,6 +250,10 @@ class ImportFolderScanner: def process_videos(self): """loop through all videos""" for current_video in self.to_import: + if not current_video["media"]: + print(f"{current_video}: no matching media file found.") + raise ValueError + self._detect_youtube_id(current_video) self._dump_thumb(current_video) self._convert_thumb(current_video) From 0891c49620ef442c80037a36c45d13c7123267cd Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 11 Aug 2022 19:57:00 +0700 Subject: [PATCH 18/20] implement offline channel import --- tubearchivist/home/src/index/channel.py | 51 +++++++++++++++++++--- tubearchivist/home/src/index/filesystem.py | 12 ++++- tubearchivist/home/src/index/video.py | 5 ++- 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index d41449ae..34e4adc7 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -173,30 +173,71 @@ class YoutubeChannel(YouTubeItem): self.es_path = f"{self.index_name}/_doc/{youtube_id}" self.all_playlists = False - def build_json(self, upload=False): + def build_json(self, upload=False, fallback=False): """get from es or from youtube""" self.get_from_es() if self.json_data: return - self.get_from_youtube() + self.get_from_youtube(fallback) + if upload: self.upload_to_es() return - def get_from_youtube(self): + def get_from_youtube(self, fallback=False): """use bs4 to scrape channel about page""" self.json_data = ChannelScraper(self.youtube_id).get_json() + + if not self.json_data and fallback: + self._video_fallback(fallback) + self.get_channel_art() + def _video_fallback(self, fallback): + """use video metadata as fallback""" + print(f"{self.youtube_id}: fallback to video metadata") + self.json_data = { + "channel_active": False, + "channel_last_refresh": int(datetime.now().strftime("%s")), + "channel_subs": fallback.get("channel_follower_count", 0), + "channel_name": fallback["uploader"], + "channel_banner_url": False, + "channel_tvart_url": False, + "channel_id": self.youtube_id, + "channel_subscribed": False, + "channel_description": False, + "channel_thumb_url": False, + "channel_views": 0, + } + self._info_json_fallback() + + def _info_json_fallback(self): + """read channel info.json for additional metadata""" + info_json = os.path.join( + self.config["application"]["cache_dir"], + "import", + f"{self.youtube_id}.info.json", + ) + if os.path.exists(info_json): + print(f"{self.youtube_id}: read info.json file") + with open(info_json, "r", encoding="utf-8") as f: + content = json.loads(f.read()) + + self.json_data.update( + { + "channel_subs": content["channel_follower_count"], + "channel_description": content["description"], + } + ) + def get_channel_art(self): """download channel art for new channels""" - channel_id = self.youtube_id urls = ( self.json_data["channel_thumb_url"], self.json_data["channel_banner_url"], ) - ThumbManager(channel_id, item_type="channel").download(urls) + ThumbManager(self.youtube_id, item_type="channel").download(urls) def sync_to_videos(self): """sync new channel_dict to all videos of channel""" diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 38d790b8..3689fed8 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -467,7 +467,7 @@ class ManualImport: """run all""" json_data = self.index_metadata() self._move_to_archive(json_data) - self._cleanup() + self._cleanup(json_data) def index_metadata(self): """get metadata from yt or json""" @@ -517,7 +517,7 @@ class ManualImport: new_path = os.path.join(channel_folder, file) shutil.move(old_path, new_path, copy_function=shutil.copyfile) - def _cleanup(self): + def _cleanup(self, json_data): """cleanup leftover files""" if os.path.exists(self.current_video["metadata"]): os.remove(self.current_video["metadata"]) @@ -529,6 +529,14 @@ class ManualImport: if os.path.exists(subtitle_file): os.remove(subtitle_file) + channel_info = os.path.join( + self.config["application"]["cache_dir"], + "import", + f"{json_data['channel']['channel_id']}.info.json", + ) + if os.path.exists(channel_info): + os.remove(channel_info) + def scan_filesystem(): """grouped function to delete and update index""" diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index a2b54970..d7d8b983 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -492,7 +492,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): def _add_channel(self): """add channel dict to video json_data""" channel = ta_channel.YoutubeChannel(self.channel_id) - channel.build_json(upload=True) + channel.build_json(upload=True, fallback=self.youtube_meta) self.json_data.update({"channel": channel.json_data}) def _add_stats(self): @@ -500,13 +500,14 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): # likes like_count = self.youtube_meta.get("like_count", 0) dislike_count = self.youtube_meta.get("dislike_count", 0) + average_rating = self.youtube_meta.get("average_rating", 0) self.json_data.update( { "stats": { "view_count": self.youtube_meta["view_count"], "like_count": like_count, "dislike_count": dislike_count, - "average_rating": self.youtube_meta["average_rating"], + "average_rating": average_rating, } } ) From ececc3cedddcf058b74fcba957810e6df30612d9 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 11 Aug 2022 19:57:12 +0700 Subject: [PATCH 19/20] bump yt-dlp --- tubearchivist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index 42e4656b..f9729822 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -10,4 +10,4 @@ requests==2.28.1 ryd-client==0.0.3 uWSGI==2.0.20 whitenoise==6.2.0 -yt_dlp==2022.7.18 +yt_dlp==2022.8.8 From 980b99783bf1355d2d68913b3d2996c7994b0025 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 12 Aug 2022 05:35:47 +0700 Subject: [PATCH 20/20] add sponsor info --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 1a65628f..7fb417c1 100644 --- a/README.md +++ b/README.md @@ -220,3 +220,12 @@ Second best way to support the development is to provide for caffeinated beverag * [Paypal.me](https://paypal.me/bbilly1) for a one time coffee * [Paypal Subscription](https://www.paypal.com/webapps/billing/plans/subscribe?plan_id=P-03770005GR991451KMFGVPMQ) for a monthly coffee * [ko-fi.com](https://ko-fi.com/bbilly1) for an alternative platform + + +## Sponsor +Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously donating credit for the tubearchivist.com VPS and buildserver. +

+ + + +