From 26cc7846c6995c9355ade3db3b28206abd8a3fd0 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Tue, 26 Jul 2022 19:51:47 +0700
Subject: [PATCH 01/20] WIP: new import folder parser for offline import

---
 tubearchivist/home/src/index/filesystem.py | 173 ++++++++++++++++++++-
 1 file changed, 172 insertions(+), 1 deletion(-)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index 9dd7b476..dfd556c9 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -157,7 +157,178 @@ class FilesystemScanner:
             _, _ = ElasticWrap(path).delete()
 
 
-class ManualImport:
+class ImportFolderScanner:
+    """import and indexing existing video files
+    - identify all media files belonging to a video
+    - identify youtube id
+    - convert if needed
+    """
+
+    CONFIG = AppConfig().config
+    CACHE_DIR = CONFIG["application"]["cache_dir"]
+    IMPORT_DIR = os.path.join(CACHE_DIR, "import")
+
+    EXT_MAP = {
+        "media": [".mp4", ".mkv", ".webm"],
+        "metadata": [".json"],
+        "thumb": [".jpg", ".png", ".webp"],
+        "subtitle": [".vtt"],
+    }
+
+    def __init__(self):
+        self.to_import = False
+
+    def scan(self):
+        """scan and match media files"""
+        all_files = self.get_all_files()
+        self.match_files(all_files)
+        self.process_videos()
+
+        return self.to_import
+
+    def get_all_files(self):
+        """get all files in /import"""
+        all_files = ignore_filelist(os.listdir(self.IMPORT_DIR))
+        all_files.sort()
+
+        return all_files
+
+    @staticmethod
+    def _get_template():
+        """base dict for video"""
+        return {
+            "media": False,
+            "video_id": False,
+            "metadata": False,
+            "thumb": False,
+            "subtitle": [],
+        }
+
+    def match_files(self, all_files):
+        """loop through all files, join what matches"""
+        self.to_import = []
+
+        current_video = self._get_template()
+        last_base = False
+
+        for file_path in all_files:
+            base_name_raw, ext = os.path.splitext(file_path)
+            base_name, _ = os.path.splitext(base_name_raw)
+
+            key, file_path = self._detect_type(file_path, ext)
+            if not key or not file_path:
+                continue
+
+            if base_name != last_base:
+                if last_base:
+                    self.to_import.append(current_video)
+
+                current_video = self._get_template()
+                last_base = base_name
+
+            if key == "subtitle":
+                current_video["subtitle"].append(file_path)
+            else:
+                current_video[key] = file_path
+
+        if current_video.get("media"):
+            self.to_import.append(current_video)
+
+    def _detect_type(self, file_path, ext):
+        """detect metadata type for file"""
+
+        for key, value in self.EXT_MAP.items():
+            if ext in value:
+                return key, file_path
+
+        return False, False
+
+    def process_videos(self):
+        """loop through all videos"""
+        for current_video in self.to_import:
+            self._detect_youtube_id(current_video)
+            self._dump_thumb(current_video)
+            self._convert_video(current_video)
+
+    def _detect_youtube_id(self, current_video):
+        """find video id from filename or json"""
+        print(current_video)
+        youtube_id = self._extract_id_from_filename(current_video["media"])
+        if youtube_id:
+            current_video["video_id"] = youtube_id
+            return
+
+        youtube_id = self._extract_id_from_json(current_video["metadata"])
+        if youtube_id:
+            current_video["video_id"] = youtube_id
+            return
+
+        print(current_video["media"])
+        raise ValueError("failed to find video id")
+
+    @staticmethod
+    def _extract_id_from_filename(file_name):
+        """
+        look at the file name for the youtube id
+        expects filename ending in [<youtube_id>].<ext>
+        """
+        base_name, _ = os.path.splitext(file_name)
+        id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", base_name)
+        if id_search:
+            youtube_id = id_search.group(1)
+            return youtube_id
+
+        print(f"id extraction failed from filename: {file_name}")
+
+        return False
+
+    def _extract_id_from_json(self, json_file):
+        """open json file and extract id"""
+        json_path = os.path.join(self.CACHE_DIR, "import", json_file)
+        with open(json_path, "r", encoding="utf-8") as f:
+            json_content = f.read()
+
+        youtube_id = json.loads(json_content)["id"]
+
+        return youtube_id
+
+    def _dump_thumb(self, current_video):
+        """extract embedded thumb before converting"""
+        if current_video["thumb"]:
+            return
+
+        # write thumb to disk here
+        # ffmpeg -dump_attachment:t "" -i filename.mkv
+        # ffmpeg -i video.mp4 -map 0:v -map -0:V -c copy cover.jpg
+        # webm
+
+    def _convert_video(self, current_video):
+        """convert if needed"""
+        current_path = os.path.join(
+            self.CACHE_DIR, "import", current_video["media"]
+        )
+        base_path, ext = os.path.splitext(current_path)
+        if ext == ".mp4":
+            return
+
+        new_path = base_path + ".mp4"
+        subprocess.run(
+            [
+                "ffmpeg",
+                "-i",
+                current_path,
+                new_path,
+                "-loglevel",
+                "warning",
+                "-stats",
+            ],
+            check=True,
+        )
+        current_video["media"] = new_path
+        os.remove(current_path)
+
+
+class ManualImportOld:
     """import and indexing existing video files"""
 
     CONFIG = AppConfig().config

From e1c470239877327d3030246d8c4bd7549e8d9ede Mon Sep 17 00:00:00 2001
From: DanielBatteryStapler <danielbatterystapler@gmail.com>
Date: Sat, 30 Jul 2022 10:05:10 -0400
Subject: [PATCH 02/20] implement LDAP as authentication backend support (#274)

---
 Dockerfile                       |  2 +-
 README.md                        | 12 ++++++++++++
 docs/FAQ.md                      |  3 ---
 tubearchivist/config/settings.py | 29 +++++++++++++++++++++++++++++
 tubearchivist/requirements.txt   |  1 +
 5 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0d09add1..071da0c5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,7 +7,7 @@ FROM python:3.10.5-slim-bullseye AS builder
 ARG TARGETPLATFORM
 
 RUN apt-get update
-RUN apt-get install -y --no-install-recommends build-essential gcc
+RUN apt-get install -y --no-install-recommends build-essential gcc libldap2-dev libsasl2-dev libssl-dev
 
 # install requirements
 COPY ./tubearchivist/requirements.txt /requirements.txt
diff --git a/README.md b/README.md
index dd2c0142..1a65628f 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,18 @@ Should that not be an option, the Tube Archivist container takes these two addit
 
 Changing any of these two environment variables will change the files *nginx.conf* and *uwsgi.ini* at startup using `sed` in your container.
 
+## LDAP Authentication
+LDAP authentication is not yet available in *stable* builds but is implemented for *unstable*. It can be enabled and configured using the following environment variables:
+
+ - `TA_LDAP` (ex: `true`) Set to anything besides empty string to use LDAP authentication instead of local user authentication.
+ - `TA_LDAP_SERVER_URI` (ex: `ldap://ldap-server:389`) Set to the uri of your LDAP server.
+ - `TA_LDAP_BIND_DN` (ex: `uid=search-user,ou=users,dc=your-server`) DN of the user that is able to perform searches on your LDAP account.
+ - `TA_LDAP_BIND_PASSWORD` (ex: `yoursecretpassword`) Password for the search user.
+ - `TA_LDAP_USER_BASE` (ex: `ou=users,dc=your-server`) Search base for user filter.
+ - `TA_LDAP_USER_FILTER` (ex: `(objectClass=user)`) Filter for valid users. Login usernames are automatically matched using `uid` and does not need to be specified in this filter.
+
+When LDAP authentication is enabled django passwords (e.g. the password defined in TA_PASSWORD) will not allow you to login, only the LDAP server is used.
+
 ### Elasticsearch
 **Note**: Tube Archivist depends on Elasticsearch 8. 
 
diff --git a/docs/FAQ.md b/docs/FAQ.md
index cec6a718..f3e4cc90 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -29,6 +29,3 @@ So Docker is the only supported installation method. If you don't have any exper
 
 ## 4. Finetuning Elasticsearch
 A minimal configuration of Elasticsearch (ES) is provided in the example docker-compose.yml file. ES is highly configurable and very interesting to learn more about. Refer to the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) if you want to get into it.
-
-## 5. Advanced Authentication
-If you like to use things like SSO, LDAP or 2FA to login, consider using something like Authelia as a reverse proxy so this project can focus on the core task. Tube Archivist has a *remember me* checkbox at login to extend your sessions lifetime in your browser. 
diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py
index 569eed62..0fe904e7 100644
--- a/tubearchivist/config/settings.py
+++ b/tubearchivist/config/settings.py
@@ -14,7 +14,9 @@ import hashlib
 from os import environ, path
 from pathlib import Path
 
+import ldap
 from corsheaders.defaults import default_headers
+from django_auth_ldap.config import LDAPSearch
 from home.src.ta.config import AppConfig
 
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
@@ -83,6 +85,33 @@ TEMPLATES = [
 
 WSGI_APPLICATION = "config.wsgi.application"
 
+if bool(environ.get("TA_LDAP")):
+    global AUTH_LDAP_SERVER_URI
+    AUTH_LDAP_SERVER_URI = environ.get("TA_LDAP_SERVER_URI")
+
+    global AUTH_LDAP_BIND_DN
+    AUTH_LDAP_BIND_DN = environ.get("TA_LDAP_BIND_DN")
+
+    global AUTH_LDAP_BIND_PASSWORD
+    AUTH_LDAP_BIND_PASSWORD = environ.get("TA_LDAP_BIND_PASSWORD")
+
+    global AUTH_LDAP_USER_SEARCH
+    AUTH_LDAP_USER_SEARCH = LDAPSearch(
+        environ.get("TA_LDAP_USER_BASE"),
+        ldap.SCOPE_SUBTREE,
+        "(&(uid=%(user)s)" + environ.get("TA_LDAP_USER_FILTER") + ")",
+    )
+
+    global AUTH_LDAP_USER_ATTR_MAP
+    AUTH_LDAP_USER_ATTR_MAP = {
+        "username": "uid",
+        "first_name": "givenName",
+        "last_name": "sn",
+        "email": "mail",
+    }
+
+    global AUTHENTICATION_BACKENDS
+    AUTHENTICATION_BACKENDS = ("django_auth_ldap.backend.LDAPBackend",)
 
 # Database
 # https://docs.djangoproject.com/en/3.2/ref/settings/#databases
diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt
index b07e8e46..ca8bc1d8 100644
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@@ -10,3 +10,4 @@ ryd-client==0.0.3
 uWSGI==2.0.20
 whitenoise==6.2.0
 yt_dlp==2022.7.18
+django-auth-ldap==4.1.0

From 91a169fa3e9c759493e90851895bca8bc87d46c2 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 28 Jul 2022 16:03:32 +0700
Subject: [PATCH 03/20] add FUNDING.yml

---
 .github/FUNDING.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .github/FUNDING.yml

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..6248d85f
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,5 @@
+github: bbilly1
+ko_fi: bbilly1
+patreon: octocat
+tidelift: npm/octo-package
+custom: https://paypal.me/bbilly1
\ No newline at end of file

From 61da0c8af375b420f80b8ef8bd637822446759ec Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Sat, 30 Jul 2022 20:52:45 +0700
Subject: [PATCH 04/20] WIP: extract thumb from mkv

---
 tubearchivist/home/src/index/filesystem.py | 40 ++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index dfd556c9..c16cff65 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -297,6 +297,46 @@ class ImportFolderScanner:
         if current_video["thumb"]:
             return
 
+        media_file = current_video["media"]
+        media_path = os.path.join(self.CACHE_DIR, "import", media_file)
+        base_name, ext = os.path.splitext(media_path)
+
+        if ext == ".mkv":
+            thumb_stream = self._get_mkv_thumb_stream(media_path)
+        elif ext == ".mp4":
+            thumb_stream = 0
+        elif ext == ".webm":
+            print("webm doesn't support thumbnail embed")
+
+    @staticmethod
+    def _get_mkv_thumb_stream(media_path):
+        """get stream idx of thumbnail for mkv files"""
+        streams_raw = subprocess.run(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-show_entries",
+                "stream_tags",
+                "-print_format",
+                "json",
+                media_path,
+            ],
+            capture_output=True,
+            check=True,
+        )
+        streams = json.loads(streams_raw.stdout.decode())
+
+        for idx, stream in enumerate(streams["streams"]):
+            tags = stream["tags"]
+            if "mimetype" in tags and tags["filename"].startswith("cover"):
+                return idx
+
+        return False
+
+
+        # ffprobe /cache/import/The\ Single\ Australian\ Farm\ That’s\ Bigger\ Than\ 49\ Countries\ \[YiSN21jKp4s\].mkv -hide_banner -show_entries "stream_tags" -print_format json
+
         # write thumb to disk here
         # ffmpeg -dump_attachment:t "" -i filename.mkv
         # ffmpeg -i video.mp4 -map 0:v -map -0:V -c copy cover.jpg

From 195acd3246757bf73fd6db25ce6a73a29b49389e Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Sun, 31 Jul 2022 00:02:09 +0700
Subject: [PATCH 05/20] WPI: dump thumbnails from mp4 and mkv

---
 tubearchivist/home/src/index/filesystem.py | 100 +++++++++++++++++----
 1 file changed, 81 insertions(+), 19 deletions(-)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index c16cff65..f961d48b 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -301,23 +301,74 @@ class ImportFolderScanner:
         media_path = os.path.join(self.CACHE_DIR, "import", media_file)
         base_name, ext = os.path.splitext(media_path)
 
+        new_path = False
         if ext == ".mkv":
-            thumb_stream = self._get_mkv_thumb_stream(media_path)
+            idx, thumb_type = self._get_mkv_thumb_stream(media_path)
+            if idx:
+                new_path = self.dump_mpv_thumb(media_path, idx, thumb_type)
+
         elif ext == ".mp4":
-            thumb_stream = 0
-        elif ext == ".webm":
-            print("webm doesn't support thumbnail embed")
+            thumb_type = self.get_mp4_thumb_type(media_path)
+            if thumb_type:
+                new_path = self.dump_mp4_thumb(media_path, thumb_type)
+
+        if new_path:
+            current_video["thumb"] = new_path
+
+    def _get_mkv_thumb_stream(self, media_path):
+        """get stream idx of thumbnail for mkv files"""
+        streams = self._get_streams(media_path)
+        attachments = [
+            i for i in streams["streams"] if i["codec_type"] == "attachment"
+        ]
+
+        for idx, stream in enumerate(attachments):
+            tags = stream["tags"]
+            if "mimetype" in tags and tags["filename"].startswith("cover"):
+                _, ext = os.path.splitext(tags["filename"])
+                return idx, ext
+
+        return False, False
 
     @staticmethod
-    def _get_mkv_thumb_stream(media_path):
-        """get stream idx of thumbnail for mkv files"""
+    def dump_mpv_thumb(media_path, idx, thumb_type):
+        """write cover to disk for mkv"""
+        _, media_ext = os.path.splitext(media_path)
+        new_path = f"{media_path.rstrip(media_ext)}{thumb_type}"
+        subprocess.run(
+            [
+                "ffmpeg",
+                "-v",
+                "quiet",
+                f"-dump_attachment:t:{idx}",
+                new_path,
+                "-i",
+                media_path,
+            ],
+            check=False,
+        )
+
+        return new_path
+
+    def get_mp4_thumb_type(self, media_path):
+        """dedect filetype of embedded thumbnail"""
+        streams = self._get_streams(media_path)
+
+        for stream in streams["streams"]:
+            if stream["codec_name"] in ["png", "jpg"]:
+                return stream["codec_name"]
+
+        return False
+
+    @staticmethod
+    def _get_streams(media_path):
+        """return all streams from media_path"""
         streams_raw = subprocess.run(
             [
                 "ffprobe",
                 "-v",
                 "error",
-                "-show_entries",
-                "stream_tags",
+                "-show_streams",
                 "-print_format",
                 "json",
                 media_path,
@@ -327,20 +378,31 @@ class ImportFolderScanner:
         )
         streams = json.loads(streams_raw.stdout.decode())
 
-        for idx, stream in enumerate(streams["streams"]):
-            tags = stream["tags"]
-            if "mimetype" in tags and tags["filename"].startswith("cover"):
-                return idx
+        return streams
 
-        return False
+    @staticmethod
+    def dump_mp4_thumb(media_path, thumb_type):
+        """save cover to disk"""
+        _, ext = os.path.splitext(media_path)
+        new_path = f"{media_path.rstrip(ext)}.{thumb_type}"
 
+        subprocess.run(
+            [
+                "ffmpeg",
+                "-i",
+                media_path,
+                "-map",
+                "0:v",
+                "-map",
+                "-0:V",
+                "-c",
+                "copy",
+                new_path,
+            ],
+            check=True,
+        )
 
-        # ffprobe /cache/import/The\ Single\ Australian\ Farm\ That’s\ Bigger\ Than\ 49\ Countries\ \[YiSN21jKp4s\].mkv -hide_banner -show_entries "stream_tags" -print_format json
-
-        # write thumb to disk here
-        # ffmpeg -dump_attachment:t "" -i filename.mkv
-        # ffmpeg -i video.mp4 -map 0:v -map -0:V -c copy cover.jpg
-        # webm
+        return new_path
 
     def _convert_video(self, current_video):
         """convert if needed"""

From d0675368f48bbd33c5a224bf4afedc5cea457b2e Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Mon, 8 Aug 2022 14:52:06 +0700
Subject: [PATCH 06/20] bump django

---
 tubearchivist/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt
index ca8bc1d8..42e4656b 100644
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@@ -1,6 +1,7 @@
 beautifulsoup4==4.11.1
 celery==5.2.7
-Django==4.0.6
+Django==4.1
+django-auth-ldap==4.1.0
 django-cors-headers==3.13.0
 djangorestframework==3.13.1
 Pillow==9.2.0
@@ -10,4 +11,3 @@ ryd-client==0.0.3
 uWSGI==2.0.20
 whitenoise==6.2.0
 yt_dlp==2022.7.18
-django-auth-ldap==4.1.0

From b70cbfa0a87a521df4a1eb370f0e82b016a0166b Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Mon, 8 Aug 2022 14:52:33 +0700
Subject: [PATCH 07/20] convert thumbnail to jpg for manual import

---
 tubearchivist/home/src/index/filesystem.py | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index f961d48b..4d79e88b 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -19,6 +19,9 @@ from home.src.index.video import index_new_video
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import clean_string, ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist
+from PIL import Image, ImageFile
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True
 
 
 class FilesystemScanner:
@@ -248,6 +251,7 @@ class ImportFolderScanner:
         for current_video in self.to_import:
             self._detect_youtube_id(current_video)
             self._dump_thumb(current_video)
+            self._convert_thumb(current_video)
             self._convert_video(current_video)
 
     def _detect_youtube_id(self, current_video):
@@ -360,6 +364,25 @@ class ImportFolderScanner:
 
         return False
 
+    def _convert_thumb(self, current_video):
+        """convert all thumbnails to jpg"""
+        if not current_video["thumb"]:
+            return
+
+        thumb_file = current_video["thumb"]
+        thumb_path = os.path.join(self.CACHE_DIR, "import", thumb_file)
+
+        base_path, ext = os.path.splitext(thumb_path)
+        if ext == ".jpg":
+            return
+
+        new_path = f"{base_path}.jpg"
+        img_raw = Image.open(thumb_path)
+        img_raw.convert("RGB").save(new_path)
+
+        os.remove(thumb_path)
+        current_video["thumb"] = new_path
+
     @staticmethod
     def _get_streams(media_path):
         """return all streams from media_path"""

From 9fd6f9c58b76af2fcfb7e22d6c60c755b63f9813 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Mon, 8 Aug 2022 15:57:42 +0700
Subject: [PATCH 08/20] use absolute paths for manual import

---
 tubearchivist/home/src/index/filesystem.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index 4d79e88b..f4ec8fed 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -191,7 +191,8 @@ class ImportFolderScanner:
 
     def get_all_files(self):
         """get all files in /import"""
-        all_files = ignore_filelist(os.listdir(self.IMPORT_DIR))
+        rel_paths = ignore_filelist(os.listdir(self.IMPORT_DIR))
+        all_files = [os.path.join(self.IMPORT_DIR, i) for i in rel_paths]
         all_files.sort()
 
         return all_files
@@ -301,9 +302,8 @@ class ImportFolderScanner:
         if current_video["thumb"]:
             return
 
-        media_file = current_video["media"]
-        media_path = os.path.join(self.CACHE_DIR, "import", media_file)
-        base_name, ext = os.path.splitext(media_path)
+        media_path = current_video["media"]
+        _, ext = os.path.splitext(media_path)
 
         new_path = False
         if ext == ".mkv":
@@ -369,8 +369,7 @@ class ImportFolderScanner:
         if not current_video["thumb"]:
             return
 
-        thumb_file = current_video["thumb"]
-        thumb_path = os.path.join(self.CACHE_DIR, "import", thumb_file)
+        thumb_path = current_video["thumb"]
 
         base_path, ext = os.path.splitext(thumb_path)
         if ext == ".jpg":
@@ -429,9 +428,7 @@ class ImportFolderScanner:
 
     def _convert_video(self, current_video):
         """convert if needed"""
-        current_path = os.path.join(
-            self.CACHE_DIR, "import", current_video["media"]
-        )
+        current_path = current_video["media"]
         base_path, ext = os.path.splitext(current_path)
         if ext == ".mp4":
             return

From 8f711d359bf1a96298a3f3da13ed7dbdb9e30c9c Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Mon, 8 Aug 2022 18:28:32 +0700
Subject: [PATCH 09/20] implement youtube_meta and media_path overwritein
 YoutubeVideo

---
 tubearchivist/home/src/index/video.py | 44 +++++++++++++++++----------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index cc8d65b7..48c3eeb4 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -426,17 +426,20 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
         self.video_overwrites = video_overwrites
         self.es_path = f"{self.index_name}/_doc/{youtube_id}"
 
-    def build_json(self):
+    def build_json(self, youtube_meta_overwrite=False, media_path=False):
         """build json dict of video"""
         self.get_from_youtube()
-        if not self.youtube_meta:
+        if not self.youtube_meta and not youtube_meta_overwrite:
             return
 
+        if not self.youtube_meta:
+            self.youtube_meta = youtube_meta_overwrite
+
         self._process_youtube_meta()
         self._add_channel()
         self._add_stats()
         self.add_file_path()
-        self.add_player()
+        self.add_player(media_path)
         if self.config["downloads"]["integrate_ryd"]:
             self._get_ryd_stats()
 
@@ -518,8 +521,28 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
 
         raise FileNotFoundError
 
-    def add_player(self):
+    def add_player(self, media_path=False):
         """add player information for new videos"""
+        vid_path = self._get_vid_path(media_path)
+
+        duration_handler = DurationConverter()
+        duration = duration_handler.get_sec(vid_path)
+        duration_str = duration_handler.get_str(duration)
+        self.json_data.update(
+            {
+                "player": {
+                    "watched": False,
+                    "duration": duration,
+                    "duration_str": duration_str,
+                }
+            }
+        )
+
+    def _get_vid_path(self, media_path=False):
+        """get path of media file"""
+        if media_path:
+            return media_path
+
         try:
             # when indexing from download task
             vid_path = self.build_dl_cache_path()
@@ -535,18 +558,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
             else:
                 raise FileNotFoundError("could not find video file") from err
 
-        duration_handler = DurationConverter()
-        duration = duration_handler.get_sec(vid_path)
-        duration_str = duration_handler.get_str(duration)
-        self.json_data.update(
-            {
-                "player": {
-                    "watched": False,
-                    "duration": duration,
-                    "duration_str": duration_str,
-                }
-            }
-        )
+        return vid_path
 
     def add_file_path(self):
         """build media_url for where file will be located"""

From b76fa693968a8d28e21f08a46ea7993b49768173 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Mon, 8 Aug 2022 19:18:27 +0700
Subject: [PATCH 10/20] WIP: rewrite ManualImport, identify and archive

---
 tubearchivist/home/src/index/filesystem.py | 164 ++++++---------------
 tubearchivist/home/tasks.py                |   7 +-
 2 files changed, 51 insertions(+), 120 deletions(-)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index f4ec8fed..5e80ebc8 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -12,10 +12,9 @@ import shutil
 import subprocess
 
 from home.src.download.queue import PendingList
-from home.src.download.yt_dlp_handler import VideoDownloader
 from home.src.es.connect import ElasticWrap
 from home.src.index.reindex import Reindex
-from home.src.index.video import index_new_video
+from home.src.index.video import YoutubeVideo, index_new_video
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import clean_string, ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist
@@ -255,6 +254,8 @@ class ImportFolderScanner:
             self._convert_thumb(current_video)
             self._convert_video(current_video)
 
+            ManualImport(current_video, self.CONFIG).run()
+
     def _detect_youtube_id(self, current_video):
         """find video id from filename or json"""
         print(current_video)
@@ -450,132 +451,65 @@ class ImportFolderScanner:
         os.remove(current_path)
 
 
-class ManualImportOld:
-    """import and indexing existing video files"""
+class ManualImport:
+    """import single identified video"""
 
-    CONFIG = AppConfig().config
-    CACHE_DIR = CONFIG["application"]["cache_dir"]
-    IMPORT_DIR = os.path.join(CACHE_DIR, "import")
+    def __init__(self, current_video, config):
+        self.current_video = current_video
+        self.config = config
 
-    def __init__(self):
-        self.identified = self.import_folder_parser()
+    def run(self):
+        """run all"""
+        json_data = self.index_metadata()
+        self._move_to_archive(json_data)
+        self._cleanup()
 
-    def import_folder_parser(self):
-        """detect files in import folder"""
-        import_files = os.listdir(self.IMPORT_DIR)
-        to_import = ignore_filelist(import_files)
-        to_import.sort()
-        video_files = [i for i in to_import if not i.endswith(".json")]
+    def index_metadata(self):
+        """get metadata from yt or json"""
+        video = YoutubeVideo(self.current_video["video_id"])
+        video.build_json(
+            youtube_meta_overwrite=self._get_info_json(),
+            media_path=self.current_video["media"],
+        )
+        video.check_subtitles()
+        video.upload_to_es()
 
-        identified = []
+        return video.json_data
 
-        for file_path in video_files:
+    def _get_info_json(self):
+        """read info_json from file"""
+        if not self.current_video["metadata"]:
+            return False
 
-            file_dict = {"video_file": file_path}
-            file_name, _ = os.path.splitext(file_path)
+        with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
+            info_json = json.loads(f.read())
 
-            matching_json = [
-                i
-                for i in to_import
-                if i.startswith(file_name) and i.endswith(".json")
-            ]
-            if matching_json:
-                json_file = matching_json[0]
-                youtube_id = self.extract_id_from_json(json_file)
-                file_dict.update({"json_file": json_file})
-            else:
-                youtube_id = self.extract_id_from_filename(file_name)
-                file_dict.update({"json_file": False})
+        return info_json
 
-            file_dict.update({"youtube_id": youtube_id})
-            identified.append(file_dict)
+    def _move_to_archive(self, json_data):
+        """move identified media file to archive"""
+        videos = self.config["application"]["videos"]
 
-        return identified
+        channel, file = os.path.split(json_data["media_url"])
+        channel_folder = os.path.join(videos, channel)
+        if not os.path.exists(channel_folder):
+            os.makedirs(channel_folder)
 
-    @staticmethod
-    def extract_id_from_filename(file_name):
-        """
-        look at the file name for the youtube id
-        expects filename ending in [<youtube_id>].<ext>
-        """
-        id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
-        if id_search:
-            youtube_id = id_search.group(1)
-            return youtube_id
+        old_path = self.current_video["media"]
+        new_path = os.path.join(channel_folder, file)
+        shutil.move(old_path, new_path, copy_function=shutil.copyfile)
 
-        print("failed to extract youtube id for: " + file_name)
-        raise Exception
+    def _cleanup(self):
+        """cleanup leftover files"""
+        if os.path.exists(self.current_video["metadata"]):
+            os.remove(self.current_video["metadata"])
 
-    def extract_id_from_json(self, json_file):
-        """open json file and extract id"""
-        json_path = os.path.join(self.CACHE_DIR, "import", json_file)
-        with open(json_path, "r", encoding="utf-8") as f:
-            json_content = f.read()
+        if os.path.exists(self.current_video["thumb"]):
+            os.remove(self.current_video["thumb"])
 
-        youtube_id = json.loads(json_content)["id"]
-
-        return youtube_id
-
-    def process_import(self):
-        """go through identified media files"""
-
-        all_videos_added = []
-
-        for media_file in self.identified:
-            json_file = media_file["json_file"]
-            video_file = media_file["video_file"]
-            youtube_id = media_file["youtube_id"]
-
-            video_path = os.path.join(self.CACHE_DIR, "import", video_file)
-
-            self.move_to_cache(video_path, youtube_id)
-
-            # identify and archive
-            vid_dict = index_new_video(youtube_id)
-            VideoDownloader([youtube_id]).move_to_archive(vid_dict)
-            youtube_id = vid_dict["youtube_id"]
-            thumb_url = vid_dict["vid_thumb_url"]
-            all_videos_added.append((youtube_id, thumb_url))
-
-            # cleanup
-            if os.path.exists(video_path):
-                os.remove(video_path)
-            if json_file:
-                json_path = os.path.join(self.CACHE_DIR, "import", json_file)
-                os.remove(json_path)
-
-        return all_videos_added
-
-    def move_to_cache(self, video_path, youtube_id):
-        """move identified video file to cache, convert to mp4"""
-        file_name = os.path.split(video_path)[-1]
-        video_file, ext = os.path.splitext(file_name)
-
-        # make sure youtube_id is in filename
-        if youtube_id not in video_file:
-            video_file = f"{video_file}_{youtube_id}"
-
-        # move, convert if needed
-        if ext == ".mp4":
-            new_file = video_file + ext
-            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
-            shutil.move(video_path, dest_path, copy_function=shutil.copyfile)
-        else:
-            print(f"processing with ffmpeg: {video_file}")
-            new_file = video_file + ".mp4"
-            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
-            subprocess.run(
-                [
-                    "ffmpeg",
-                    "-i",
-                    video_path,
-                    dest_path,
-                    "-loglevel",
-                    "warning",
-                    "-stats",
-                ],
-                check=True,
-            )
+        for subtitle_file in self.current_video["subtitle"]:
+            if os.path.exists(subtitle_file):
+                os.remove(subtitle_file)
 
 
 def scan_filesystem():
diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py
index 8f78d4dd..ede17f26 100644
--- a/tubearchivist/home/tasks.py
+++ b/tubearchivist/home/tasks.py
@@ -20,7 +20,7 @@ from home.src.download.yt_dlp_handler import VideoDownloader
 from home.src.es.index_setup import backup_all_indexes, restore_from_backup
 from home.src.index.channel import YoutubeChannel
 from home.src.index.filesystem import (
-    ManualImport,
+    ImportFolderScanner,
     reindex_old_documents,
     scan_filesystem,
 )
@@ -150,10 +150,7 @@ def run_manual_import():
     try:
         have_lock = my_lock.acquire(blocking=False)
         if have_lock:
-            import_handler = ManualImport()
-            if import_handler.identified:
-                all_videos_added = import_handler.process_import()
-                ThumbManager().download_vid(all_videos_added)
+            ImportFolderScanner().scan()
         else:
             print("Did not acquire lock form import.")
 

From fb4d6b7be3f4b76129829ee765de8c0a61135d94 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 10 Aug 2022 21:03:54 +0700
Subject: [PATCH 11/20] major refactor ThumbManager

---
 tubearchivist/api/src/search_processor.py     |   4 +-
 tubearchivist/home/src/download/queue.py      |  11 +-
 .../home/src/download/subscriptions.py        |  14 +-
 tubearchivist/home/src/download/thumbnails.py | 485 +++++++++---------
 tubearchivist/home/src/frontend/searching.py  |   4 +-
 tubearchivist/home/src/index/channel.py       |   8 +-
 tubearchivist/home/src/index/playlist.py      |  18 +-
 tubearchivist/home/src/index/reindex.py       |   8 +-
 tubearchivist/home/tasks.py                   |  14 +-
 9 files changed, 279 insertions(+), 287 deletions(-)

diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py
index 6a1e2ddd..07102310 100644
--- a/tubearchivist/api/src/search_processor.py
+++ b/tubearchivist/api/src/search_processor.py
@@ -74,7 +74,7 @@ class SearchProcess:
         media_url = urllib.parse.quote(video_dict["media_url"])
         vid_last_refresh = date_praser(video_dict["vid_last_refresh"])
         published = date_praser(video_dict["published"])
-        vid_thumb_url = ThumbManager().vid_thumb_path(video_id)
+        vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
         channel = self._process_channel(video_dict["channel"])
 
         if "subtitles" in video_dict:
@@ -113,7 +113,7 @@ class SearchProcess:
     def _process_download(self, download_dict):
         """run on single download item"""
         video_id = download_dict["youtube_id"]
-        vid_thumb_url = ThumbManager().vid_thumb_path(video_id)
+        vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
         published = date_praser(download_dict["published"])
 
         download_dict.update(
diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py
index b318b128..db7ae5d5 100644
--- a/tubearchivist/home/src/download/queue.py
+++ b/tubearchivist/home/src/download/queue.py
@@ -161,10 +161,7 @@ class PendingList(PendingIndex):
             self._parse_channel(entry["url"])
         elif entry["type"] == "playlist":
             self._parse_playlist(entry["url"])
-            new_thumbs = PlaylistSubscription().process_url_str(
-                [entry], subscribed=False
-            )
-            ThumbManager().download_playlist(new_thumbs)
+            PlaylistSubscription().process_url_str([entry], subscribed=False)
         else:
             raise ValueError(f"invalid url_type: {entry}")
 
@@ -198,7 +195,6 @@ class PendingList(PendingIndex):
         self.get_channels()
         bulk_list = []
 
-        thumb_handler = ThumbManager()
         for idx, youtube_id in enumerate(self.missing_videos):
             video_details = self.get_youtube_details(youtube_id)
             if not video_details:
@@ -209,8 +205,9 @@ class PendingList(PendingIndex):
             bulk_list.append(json.dumps(action))
             bulk_list.append(json.dumps(video_details))
 
-            thumb_needed = [(youtube_id, video_details["vid_thumb_url"])]
-            thumb_handler.download_vid(thumb_needed)
+            url = video_details["vid_thumb_url"]
+            ThumbManager(youtube_id).download_video_thumb(url)
+
             self._notify_add(idx)
 
         if bulk_list:
diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py
index d353ed06..7f8a1676 100644
--- a/tubearchivist/home/src/download/subscriptions.py
+++ b/tubearchivist/home/src/download/subscriptions.py
@@ -5,6 +5,7 @@ Functionality:
 """
 
 from home.src.download import queue  # partial import
+from home.src.download.thumbnails import ThumbManager
 from home.src.download.yt_dlp_base import YtWrap
 from home.src.es.connect import IndexPaginate
 from home.src.index.channel import YoutubeChannel
@@ -129,11 +130,9 @@ class PlaylistSubscription:
         all_indexed = IndexPaginate("ta_video", data).get_results()
         all_youtube_ids = [i["youtube_id"] for i in all_indexed]
 
-        new_thumbs = []
         for idx, playlist in enumerate(new_playlists):
-            url_type = playlist["type"]
             playlist_id = playlist["url"]
-            if not url_type == "playlist":
+            if not playlist["type"] == "playlist":
                 print(f"{playlist_id} not a playlist, skipping...")
                 continue
 
@@ -144,8 +143,11 @@ class PlaylistSubscription:
             playlist_h.upload_to_es()
             playlist_h.add_vids_to_playlist()
             self.channel_validate(playlist_h.json_data["playlist_channel_id"])
-            thumb = playlist_h.json_data["playlist_thumbnail"]
-            new_thumbs.append((playlist_id, thumb))
+
+            url = playlist_h.json_data["playlist_thumbnail"]
+            thumb = ThumbManager(playlist_id, item_type="playlist")
+            thumb.download_playlist_thumb(url)
+
             # notify
             message = {
                 "status": "message:subplaylist",
@@ -157,8 +159,6 @@ class PlaylistSubscription:
                 "message:subplaylist", message=message, expire=True
             )
 
-        return new_thumbs
-
     @staticmethod
     def channel_validate(channel_id):
         """make sure channel of playlist is there"""
diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py
index c317ee4d..8e6b8716 100644
--- a/tubearchivist/home/src/download/thumbnails.py
+++ b/tubearchivist/home/src/download/thumbnails.py
@@ -6,136 +6,64 @@ functionality:
 
 import base64
 import os
-from collections import Counter
 from io import BytesIO
 from time import sleep
 
 import requests
 from home.src.download import queue  # partial import
-from home.src.download import subscriptions  # partial import
+from home.src.es.connect import IndexPaginate
 from home.src.ta.config import AppConfig
-from home.src.ta.helper import ignore_filelist
-from home.src.ta.ta_redis import RedisArchivist
 from mutagen.mp4 import MP4, MP4Cover
 from PIL import Image, ImageFile, ImageFilter
 
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 
 
-class ThumbManager:
-    """handle thumbnails related functions"""
+class ThumbManagerBase:
+    """base class for thumbnail management"""
 
     CONFIG = AppConfig().config
-    MEDIA_DIR = CONFIG["application"]["videos"]
     CACHE_DIR = CONFIG["application"]["cache_dir"]
     VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
     CHANNEL_DIR = os.path.join(CACHE_DIR, "channels")
     PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists")
 
-    def get_all_thumbs(self):
-        """get all video artwork already downloaded"""
-        all_thumb_folders = ignore_filelist(os.listdir(self.VIDEO_DIR))
-        all_thumbs = []
-        for folder in all_thumb_folders:
-            folder_path = os.path.join(self.VIDEO_DIR, folder)
-            if os.path.isfile(folder_path):
-                self.update_path(folder)
-                all_thumbs.append(folder_path)
-                continue
-                # raise exemption here in a future version
-                # raise FileExistsError("video cache dir has files inside")
+    def __init__(self, item_id, item_type, fallback=False):
+        self.item_id = item_id
+        self.item_type = item_type
+        self.fallback = fallback
 
-            all_folder_thumbs = ignore_filelist(os.listdir(folder_path))
-            all_thumbs.extend(all_folder_thumbs)
+    def download_raw(self, url):
+        """download thumbnail for video"""
 
-        return all_thumbs
+        for i in range(3):
+            try:
+                response = requests.get(url, stream=True)
+                if response.ok:
+                    return Image.open(response.raw)
+                if response.status_code == 404:
+                    return self.get_fallback()
 
-    def update_path(self, file_name):
-        """reorganize thumbnails into folders as update path from v0.0.5"""
-        folder_name = file_name[0].lower()
-        folder_path = os.path.join(self.VIDEO_DIR, folder_name)
-        old_file = os.path.join(self.VIDEO_DIR, file_name)
-        new_file = os.path.join(folder_path, file_name)
-        os.makedirs(folder_path, exist_ok=True)
-        os.rename(old_file, new_file)
+            except ConnectionError:
+                print(f"{self.item_id}: retry thumbnail download {url}")
+                sleep((i + 1) ** i)
 
-    def get_needed_thumbs(self, missing_only=False):
-        """get a list of all missing thumbnails"""
-        all_thumbs = self.get_all_thumbs()
+        return False
 
-        pending = queue.PendingList()
-        pending.get_download()
-        pending.get_indexed()
+    def get_fallback(self):
+        """get fallback thumbnail if not available"""
+        if self.fallback:
+            img_raw = Image.open(self.fallback)
+            return img_raw
 
-        needed_thumbs = []
-        for video in pending.all_videos:
-            youtube_id = video["youtube_id"]
-            thumb_url = video["vid_thumb_url"]
-            if missing_only:
-                if youtube_id + ".jpg" not in all_thumbs:
-                    needed_thumbs.append((youtube_id, thumb_url))
-            else:
-                needed_thumbs.append((youtube_id, thumb_url))
-
-        for video in pending.all_pending + pending.all_ignored:
-            youtube_id = video["youtube_id"]
-            thumb_url = video["vid_thumb_url"]
-            if missing_only:
-                if youtube_id + ".jpg" not in all_thumbs:
-                    needed_thumbs.append((youtube_id, thumb_url))
-            else:
-                needed_thumbs.append((youtube_id, thumb_url))
-
-        return needed_thumbs
-
-    def get_missing_channels(self):
-        """get all channel artwork"""
-        all_channel_art = os.listdir(self.CHANNEL_DIR)
-        files = [i[0:24] for i in all_channel_art]
-        cached_channel_ids = [k for (k, v) in Counter(files).items() if v > 1]
-        channel_sub = subscriptions.ChannelSubscription()
-        channels = channel_sub.get_channels(subscribed_only=False)
-
-        missing_channels = []
-        for channel in channels:
-            channel_id = channel["channel_id"]
-            if channel_id not in cached_channel_ids:
-                channel_banner = channel["channel_banner_url"]
-                channel_thumb = channel["channel_thumb_url"]
-                missing_channels.append(
-                    (channel_id, channel_thumb, channel_banner)
-                )
-
-        return missing_channels
-
-    def get_missing_playlists(self):
-        """get all missing playlist artwork"""
-        all_downloaded = ignore_filelist(os.listdir(self.PLAYLIST_DIR))
-        all_ids_downloaded = [i.replace(".jpg", "") for i in all_downloaded]
-        playlist_sub = subscriptions.PlaylistSubscription()
-        playlists = playlist_sub.get_playlists(subscribed_only=False)
-
-        missing_playlists = []
-        for playlist in playlists:
-            playlist_id = playlist["playlist_id"]
-            if playlist_id not in all_ids_downloaded:
-                playlist_thumb = playlist["playlist_thumbnail"]
-                missing_playlists.append((playlist_id, playlist_thumb))
-
-        return missing_playlists
-
-    def get_raw_img(self, img_url, thumb_type):
-        """get raw image from youtube and handle 404"""
-        try:
-            app_root = self.CONFIG["application"]["app_root"]
-        except KeyError:
-            # lazy keyerror fix to not have to deal with a strange startup
-            # racing contition between the threads in HomeConfig.ready()
-            app_root = "/app"
+        app_root = self.CONFIG["application"]["app_root"]
         default_map = {
             "video": os.path.join(
                 app_root, "static/img/default-video-thumb.jpg"
             ),
+            "playlist": os.path.join(
+                app_root, "static/img/default-video-thumb.jpg"
+            ),
             "icon": os.path.join(
                 app_root, "static/img/default-channel-icon.jpg"
             ),
@@ -143,116 +71,134 @@ class ThumbManager:
                 app_root, "static/img/default-channel-banner.jpg"
             ),
         }
-        if img_url:
-            try:
-                response = requests.get(img_url, stream=True)
-            except ConnectionError:
-                sleep(5)
-                response = requests.get(img_url, stream=True)
-            if not response.ok and not response.status_code == 404:
-                print("retry thumbnail download for " + img_url)
-                sleep(5)
-                response = requests.get(img_url, stream=True)
-        else:
-            response = False
-        if not response or response.status_code == 404:
-            # use default
-            img_raw = Image.open(default_map[thumb_type])
-        else:
-            # use response
-            img_obj = response.raw
-            img_raw = Image.open(img_obj)
+
+        img_raw = Image.open(default_map[self.item_type])
 
         return img_raw
 
-    def download_vid(self, missing_thumbs, notify=True):
-        """download all missing thumbnails from list"""
-        print(f"downloading {len(missing_thumbs)} thumbnails")
-        for idx, (youtube_id, thumb_url) in enumerate(missing_thumbs):
-            folder_path = os.path.join(self.VIDEO_DIR, youtube_id[0].lower())
-            thumb_path = os.path.join(
-                self.CACHE_DIR, self.vid_thumb_path(youtube_id)
-            )
 
-            os.makedirs(folder_path, exist_ok=True)
-            img_raw = self.get_raw_img(thumb_url, "video")
+class ThumbManager(ThumbManagerBase):
+    """handle thumbnails related functions"""
 
-            width, height = img_raw.size
-            if not width / height == 16 / 9:
-                new_height = width / 16 * 9
-                offset = (height - new_height) / 2
-                img_raw = img_raw.crop((0, offset, width, height - offset))
-            img_raw.convert("RGB").save(thumb_path)
+    def __init__(self, item_id, item_type="video", fallback=False):
+        super().__init__(item_id, item_type, fallback=fallback)
 
-            progress = f"{idx + 1}/{len(missing_thumbs)}"
-            if notify:
-                mess_dict = {
-                    "status": "message:add",
-                    "level": "info",
-                    "title": "Processing Videos",
-                    "message": "Downloading Thumbnails, Progress: " + progress,
-                }
-                if idx + 1 == len(missing_thumbs):
-                    expire = 4
-                else:
-                    expire = True
+    def download(self, url):
+        """download thumbnail"""
+        print(f"{self.item_id}: download {self.item_type} thumbnail")
+        if self.item_type == "video":
+            self.download_video_thumb(url)
+        elif self.item_type == "channel":
+            self.download_channel_art(url)
+        elif self.item_type == "playlist":
+            self.download_playlist_thumb(url)
 
-                RedisArchivist().set_message(
-                    "message:add", mess_dict, expire=expire
-                )
+    def delete(self):
+        """delete thumbnail file"""
+        print(f"{self.item_id}: delete {self.item_type} thumbnail")
+        if self.item_type == "video":
+            self.delete_video_thumb()
+        elif self.item_type == "channel":
+            self.delete_channel_thumb()
+        elif self.item_type == "playlist":
+            self.delete_playlist_thumb()
 
-            if idx + 1 % 25 == 0:
-                print("thumbnail progress: " + progress)
+    def download_video_thumb(self, url, skip_existing=False):
+        """pass url for video thumbnail"""
+        folder_path = os.path.join(self.VIDEO_DIR, self.item_id[0].lower())
+        thumb_path = self.vid_thumb_path(absolute=True)
 
-    def download_chan(self, missing_channels):
-        """download needed artwork for channels"""
-        print(f"downloading {len(missing_channels)} channel artwork")
-        for channel in missing_channels:
-            channel_id, channel_thumb, channel_banner = channel
+        if skip_existing and os.path.exists(thumb_path):
+            return
 
-            thumb_path = os.path.join(
-                self.CHANNEL_DIR, channel_id + "_thumb.jpg"
-            )
-            img_raw = self.get_raw_img(channel_thumb, "icon")
-            img_raw.convert("RGB").save(thumb_path)
+        os.makedirs(folder_path, exist_ok=True)
+        img_raw = self.download_raw(url)
+        width, height = img_raw.size
 
-            banner_path = os.path.join(
-                self.CHANNEL_DIR, channel_id + "_banner.jpg"
-            )
-            img_raw = self.get_raw_img(channel_banner, "banner")
-            img_raw.convert("RGB").save(banner_path)
+        if not width / height == 16 / 9:
+            new_height = width / 16 * 9
+            offset = (height - new_height) / 2
+            img_raw = img_raw.crop((0, offset, width, height - offset))
 
-            mess_dict = {
-                "status": "message:download",
-                "level": "info",
-                "title": "Processing Channels",
-                "message": "Downloading Channel Art.",
-            }
-            key = "message:download"
-            RedisArchivist().set_message(key, mess_dict, expire=True)
+        img_raw.convert("RGB").save(thumb_path)
 
-    def download_playlist(self, missing_playlists):
-        """download needed artwork for playlists"""
-        print(f"downloading {len(missing_playlists)} playlist artwork")
-        for playlist in missing_playlists:
-            playlist_id, playlist_thumb_url = playlist
-            thumb_path = os.path.join(self.PLAYLIST_DIR, playlist_id + ".jpg")
-            img_raw = self.get_raw_img(playlist_thumb_url, "video")
-            img_raw.convert("RGB").save(thumb_path)
+    def vid_thumb_path(self, absolute=False):
+        """build expected path for video thumbnail from youtube_id"""
+        folder_name = self.item_id[0].lower()
+        folder_path = os.path.join("videos", folder_name)
+        thumb_path = os.path.join(folder_path, f"{self.item_id}.jpg")
+        if absolute:
+            thumb_path = os.path.join(self.CACHE_DIR, thumb_path)
 
-            mess_dict = {
-                "status": "message:download",
-                "level": "info",
-                "title": "Processing Playlists",
-                "message": "Downloading Playlist Art.",
-            }
-            key = "message:download"
-            RedisArchivist().set_message(key, mess_dict, expire=True)
+        return thumb_path
 
-    def get_base64_blur(self, youtube_id):
+    def download_channel_art(self, urls, skip_existing=False):
+        """pass tuple of channel thumbnails"""
+        channel_thumb, channel_banner = urls
+        self._download_channel_thumb(channel_thumb, skip_existing)
+        self._download_channel_banner(channel_banner, skip_existing)
+
+    def _download_channel_thumb(self, channel_thumb, skip_existing):
+        """download channel thumbnail"""
+
+        thumb_path = os.path.join(
+            self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg"
+        )
+        self.item_type = "icon"
+
+        if skip_existing and os.path.exists(thumb_path):
+            return
+
+        img_raw = self.download_raw(channel_thumb)
+        img_raw.convert("RGB").save(thumb_path)
+
+    def _download_channel_banner(self, channel_banner, skip_existing):
+        """download channel banner"""
+
+        banner_path = os.path.join(
+            self.CHANNEL_DIR, self.item_id + "_banner.jpg"
+        )
+        self.item_type = "banner"
+        if skip_existing and os.path.exists(banner_path):
+            return
+
+        img_raw = self.download_raw(channel_banner)
+        img_raw.convert("RGB").save(banner_path)
+
+    def download_playlist_thumb(self, url, skip_existing=False):
+        """pass thumbnail url"""
+        thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
+        if skip_existing and os.path.exists(thumb_path):
+            return
+
+        img_raw = self.download_raw(url)
+        img_raw.convert("RGB").save(thumb_path)
+
+    def delete_video_thumb(self):
+        """delete video thumbnail if exists"""
+        thumb_path = self.vid_thumb_path()
+        to_delete = os.path.join(self.CACHE_DIR, thumb_path)
+        if os.path.exists(to_delete):
+            os.remove(to_delete)
+
+    def delete_channel_thumb(self):
+        """delete all artwork of channel"""
+        thumb = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg")
+        banner = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_banner.jpg")
+        if os.path.exists(thumb):
+            os.remove(thumb)
+        if os.path.exists(banner):
+            os.remove(banner)
+
+    def delete_playlist_thumb(self):
+        """delete playlist thumbnail"""
+        thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
+        if os.path.exists(thumb_path):
+            os.remove(thumb_path)
+
+    def get_vid_base64_blur(self):
         """return base64 encoded placeholder"""
-        img_path = self.vid_thumb_path(youtube_id)
-        file_path = os.path.join(self.CACHE_DIR, img_path)
+        file_path = os.path.join(self.CACHE_DIR, self.vid_thumb_path())
         img_raw = Image.open(file_path)
         img_raw.thumbnail((img_raw.width // 20, img_raw.height // 20))
         img_blur = img_raw.filter(ImageFilter.BLUR)
@@ -264,40 +210,109 @@ class ThumbManager:
 
         return data_url
 
-    @staticmethod
-    def vid_thumb_path(youtube_id):
-        """build expected path for video thumbnail from youtube_id"""
-        folder_name = youtube_id[0].lower()
-        folder_path = os.path.join("videos", folder_name)
-        thumb_path = os.path.join(folder_path, youtube_id + ".jpg")
-        return thumb_path
 
-    def delete_vid_thumb(self, youtube_id):
-        """delete video thumbnail if exists"""
-        thumb_path = self.vid_thumb_path(youtube_id)
-        to_delete = os.path.join(self.CACHE_DIR, thumb_path)
-        if os.path.exists(to_delete):
-            os.remove(to_delete)
+class ValidatorCallback:
+    """handle callback validate thumbnails page by page"""
 
-    def delete_chan_thumb(self, channel_id):
-        """delete all artwork of channel"""
-        thumb = os.path.join(self.CHANNEL_DIR, channel_id + "_thumb.jpg")
-        banner = os.path.join(self.CHANNEL_DIR, channel_id + "_banner.jpg")
-        if os.path.exists(thumb):
-            os.remove(thumb)
-        if os.path.exists(banner):
-            os.remove(banner)
+    def __init__(self, source, index_name):
+        self.source = source
+        self.index_name = index_name
 
-    def cleanup_downloaded(self):
-        """find downloaded thumbnails without video indexed"""
-        all_thumbs = self.get_all_thumbs()
-        all_indexed = self.get_needed_thumbs()
-        all_needed_thumbs = [i[0] + ".jpg" for i in all_indexed]
-        for thumb in all_thumbs:
-            if thumb not in all_needed_thumbs:
-                # cleanup
-                youtube_id = thumb.rstrip(".jpg")
-                self.delete_vid_thumb(youtube_id)
+    def run(self):
+        """run the task for page"""
+        print(f"{self.index_name}: validate artwork")
+        if self.index_name == "ta_video":
+            self._validate_videos()
+        elif self.index_name == "ta_channel":
+            self._validate_channels()
+        elif self.index_name == "ta_playlist":
+            self._validate_playlists()
+
+    def _validate_videos(self):
+        """check if video thumbnails are correct"""
+        for video in self.source:
+            url = video["_source"]["vid_thumb_url"]
+            handler = ThumbManager(video["_source"]["youtube_id"])
+            handler.download_video_thumb(url, skip_existing=True)
+
+    def _validate_channels(self):
+        """check if all channel artwork is there"""
+        for channel in self.source:
+            urls = (
+                channel["_source"]["channel_thumb_url"],
+                channel["_source"]["channel_banner_url"],
+            )
+            handler = ThumbManager(channel["_source"]["channel_id"])
+            handler.download_channel_art(urls, skip_existing=True)
+
+    def _validate_playlists(self):
+        """check if all playlist artwork is there"""
+        for playlist in self.source:
+            url = playlist["_source"]["playlist_thumbnail"]
+            handler = ThumbManager(playlist["_source"]["playlist_id"])
+            handler.download_playlist_thumb(url, skip_existing=True)
+
+
+class ThumbValidator:
+    """validate thumbnails"""
+
+    def download_missing(self):
+        """download all missing artwork"""
+        self.download_missing_videos()
+        self.download_missing_channels()
+        self.download_missing_playlists()
+
+    def download_missing_videos(self):
+        """get all missing video thumbnails"""
+        data = {
+            "query": {"term": {"active": {"value": True}}},
+            "sort": [{"youtube_id": {"order": "asc"}}],
+            "_source": ["vid_thumb_url", "youtube_id"],
+        }
+        paginate = IndexPaginate(
+            "ta_video", data, size=5000, callback=ValidatorCallback
+        )
+        _ = paginate.get_results()
+
+    def download_missing_channels(self):
+        """get all missing channel thumbnails"""
+        data = {
+            "query": {"term": {"channel_active": {"value": True}}},
+            "sort": [{"channel_id": {"order": "asc"}}],
+            "_source": {
+                "excludes": ["channel_description", "channel_overwrites"]
+            },
+        }
+        paginate = IndexPaginate(
+            "ta_channel", data, callback=ValidatorCallback
+        )
+        _ = paginate.get_results()
+
+    def download_missing_playlists(self):
+        """get all missing playlist artwork"""
+        data = {
+            "query": {"term": {"playlist_active": {"value": True}}},
+            "sort": [{"playlist_id": {"order": "asc"}}],
+            "_source": ["playlist_id", "playlist_thumbnail"],
+        }
+        paginate = IndexPaginate(
+            "ta_playlist", data, callback=ValidatorCallback
+        )
+        _ = paginate.get_results()
+
+
+class ThumbFilesystem:
+    """filesystem tasks for thumbnails"""
+
+    CONFIG = AppConfig().config
+    CACHE_DIR = CONFIG["application"]["cache_dir"]
+    MEDIA_DIR = CONFIG["application"]["videos"]
+    VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
+
+    def sync(self):
+        """embed thumbnails to mediafiles"""
+        video_list = self.get_thumb_list()
+        self._embed_thumbs(video_list)
 
     def get_thumb_list(self):
         """get list of mediafiles and matching thumbnails"""
@@ -307,10 +322,10 @@ class ThumbManager:
 
         video_list = []
         for video in pending.all_videos:
-            youtube_id = video["youtube_id"]
+            video_id = video["youtube_id"]
             media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
             thumb_path = os.path.join(
-                self.CACHE_DIR, self.vid_thumb_path(youtube_id)
+                self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path()
             )
             video_list.append(
                 {
@@ -322,7 +337,7 @@ class ThumbManager:
         return video_list
 
     @staticmethod
-    def write_all_thumbs(video_list):
+    def _embed_thumbs(video_list):
         """rewrite the thumbnail into media file"""
 
         counter = 1
@@ -340,15 +355,3 @@ class ThumbManager:
             if counter % 50 == 0:
                 print(f"thumbnail write progress {counter}/{len(video_list)}")
             counter = counter + 1
-
-
-def validate_thumbnails():
-    """check if all thumbnails are there and organized correctly"""
-    handler = ThumbManager()
-    thumbs_to_download = handler.get_needed_thumbs(missing_only=True)
-    handler.download_vid(thumbs_to_download)
-    missing_channels = handler.get_missing_channels()
-    handler.download_chan(missing_channels)
-    missing_playlists = handler.get_missing_playlists()
-    handler.download_playlist(missing_playlists)
-    handler.cleanup_downloaded()
diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py
index b14cc65d..ce3209ab 100644
--- a/tubearchivist/home/src/frontend/searching.py
+++ b/tubearchivist/home/src/frontend/searching.py
@@ -119,7 +119,7 @@ class SearchHandler:
 
         if "vid_thumb_url" in hit_keys:
             youtube_id = hit["source"]["youtube_id"]
-            thumb_path = ThumbManager().vid_thumb_path(youtube_id)
+            thumb_path = ThumbManager(youtube_id).vid_thumb_path()
             hit["source"]["vid_thumb_url"] = thumb_path
 
         if "channel_last_refresh" in hit_keys:
@@ -138,7 +138,7 @@ class SearchHandler:
 
         if "subtitle_fragment_id" in hit_keys:
             youtube_id = hit["source"]["youtube_id"]
-            thumb_path = ThumbManager().vid_thumb_path(youtube_id)
+            thumb_path = ThumbManager(youtube_id).vid_thumb_path()
             hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}"
 
         return hit
diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py
index 400c5f5a..d41449ae 100644
--- a/tubearchivist/home/src/index/channel.py
+++ b/tubearchivist/home/src/index/channel.py
@@ -192,11 +192,11 @@ class YoutubeChannel(YouTubeItem):
     def get_channel_art(self):
         """download channel art for new channels"""
         channel_id = self.youtube_id
-        channel_thumb = self.json_data["channel_thumb_url"]
-        channel_banner = self.json_data["channel_banner_url"]
-        ThumbManager().download_chan(
-            [(channel_id, channel_thumb, channel_banner)]
+        urls = (
+            self.json_data["channel_thumb_url"],
+            self.json_data["channel_banner_url"],
         )
+        ThumbManager(channel_id, item_type="channel").download(urls)
 
     def sync_to_videos(self):
         """sync new channel_dict to all videos of channel"""
diff --git a/tubearchivist/home/src/index/playlist.py b/tubearchivist/home/src/index/playlist.py
index 69dc5db5..7115f9d9 100644
--- a/tubearchivist/home/src/index/playlist.py
+++ b/tubearchivist/home/src/index/playlist.py
@@ -81,12 +81,10 @@ class YoutubePlaylist(YouTubeItem):
 
         self.all_members = all_members
 
-    @staticmethod
-    def get_playlist_art():
+    def get_playlist_art(self):
         """download artwork of playlist"""
-        thumbnails = ThumbManager()
-        missing_playlists = thumbnails.get_missing_playlists()
-        thumbnails.download_playlist(missing_playlists)
+        url = self.json_data["playlist_thumbnail"]
+        ThumbManager(self.youtube_id, item_type="playlist").download(url)
 
     def add_vids_to_playlist(self):
         """sync the playlist id to videos"""
@@ -145,17 +143,15 @@ class YoutubePlaylist(YouTubeItem):
             previous_item = False
         else:
             previous_item = all_entries[current_idx - 1]
-            prev_thumb = ThumbManager().vid_thumb_path(
-                previous_item["youtube_id"]
-            )
-            previous_item["vid_thumb"] = prev_thumb
+            prev_id = previous_item["youtube_id"]
+            previous_item["vid_thumb"] = ThumbManager(prev_id).vid_thumb_path()
 
         if current_idx == len(all_entries) - 1:
             next_item = False
         else:
             next_item = all_entries[current_idx + 1]
-            next_thumb = ThumbManager().vid_thumb_path(next_item["youtube_id"])
-            next_item["vid_thumb"] = next_thumb
+            next_id = next_item["youtube_id"]
+            next_item["vid_thumb"] = ThumbManager(next_id).vid_thumb_path()
 
         self.nav = {
             "playlist_meta": {
diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py
index f0b88bd1..db231f89 100644
--- a/tubearchivist/home/src/index/reindex.py
+++ b/tubearchivist/home/src/index/reindex.py
@@ -181,10 +181,10 @@ class Reindex:
 
         video.upload_to_es()
 
-        thumb_handler = ThumbManager()
-        thumb_handler.delete_vid_thumb(youtube_id)
-        to_download = (youtube_id, video.json_data["vid_thumb_url"])
-        thumb_handler.download_vid([to_download], notify=False)
+        thumb_handler = ThumbManager(youtube_id)
+        thumb_handler.delete_video_thumb()
+        thumb_handler.download_video_thumb(video.json_data["vid_thumb_url"])
+
         return
 
     @staticmethod
diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py
index ede17f26..b3da7621 100644
--- a/tubearchivist/home/tasks.py
+++ b/tubearchivist/home/tasks.py
@@ -15,7 +15,7 @@ from home.src.download.subscriptions import (
     ChannelSubscription,
     PlaylistSubscription,
 )
-from home.src.download.thumbnails import ThumbManager, validate_thumbnails
+from home.src.download.thumbnails import ThumbFilesystem, ThumbValidator
 from home.src.download.yt_dlp_handler import VideoDownloader
 from home.src.es.index_setup import backup_all_indexes, restore_from_backup
 from home.src.index.channel import YoutubeChannel
@@ -201,21 +201,19 @@ def kill_dl(task_id):
 def rescan_filesystem():
     """check the media folder for mismatches"""
     scan_filesystem()
-    validate_thumbnails()
+    ThumbValidator().download_missing()
 
 
 @shared_task(name="thumbnail_check")
 def thumbnail_check():
     """validate thumbnails"""
-    validate_thumbnails()
+    ThumbValidator().download_missing()
 
 
 @shared_task
 def re_sync_thumbs():
     """sync thumbnails to mediafiles"""
-    handler = ThumbManager()
-    video_list = handler.get_thumb_list()
-    handler.write_all_thumbs(video_list)
+    ThumbFilesystem().sync()
 
 
 @shared_task
@@ -226,9 +224,7 @@ def subscribe_to(url_str):
     for item in to_subscribe_list:
         to_sub_id = item["url"]
         if item["type"] == "playlist":
-            new_thumbs = PlaylistSubscription().process_url_str([item])
-            if new_thumbs:
-                ThumbManager().download_playlist(new_thumbs)
+            PlaylistSubscription().process_url_str([item])
             continue
 
         if item["type"] == "video":

From 44473a364fe49fe46d1f630ae5af78a5ff304c65 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 10 Aug 2022 21:20:58 +0700
Subject: [PATCH 12/20] fix double download of playlist art

---
 tubearchivist/home/src/index/playlist.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tubearchivist/home/src/index/playlist.py b/tubearchivist/home/src/index/playlist.py
index 7115f9d9..252e913b 100644
--- a/tubearchivist/home/src/index/playlist.py
+++ b/tubearchivist/home/src/index/playlist.py
@@ -41,7 +41,6 @@ class YoutubePlaylist(YouTubeItem):
             self.process_youtube_meta()
             self.get_entries()
             self.json_data["playlist_entries"] = self.all_members
-            self.get_playlist_art()
             self.json_data["playlist_subscribed"] = subscribed
 
     def process_youtube_meta(self):

From 35f1084cc283816a790c8b7d82cce5396f2cc977 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 10 Aug 2022 21:47:26 +0700
Subject: [PATCH 13/20] download thumbnail for manual import

---
 tubearchivist/home/src/index/filesystem.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index 5e80ebc8..85e1c953 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -12,6 +12,7 @@ import shutil
 import subprocess
 
 from home.src.download.queue import PendingList
+from home.src.download.thumbnails import ThumbManager
 from home.src.es.connect import ElasticWrap
 from home.src.index.reindex import Reindex
 from home.src.index.video import YoutubeVideo, index_new_video
@@ -466,7 +467,8 @@ class ManualImport:
 
     def index_metadata(self):
         """get metadata from yt or json"""
-        video = YoutubeVideo(self.current_video["video_id"])
+        video_id = self.current_video["video_id"]
+        video = YoutubeVideo(video_id)
         video.build_json(
             youtube_meta_overwrite=self._get_info_json(),
             media_path=self.current_video["media"],
@@ -474,6 +476,9 @@ class ManualImport:
         video.check_subtitles()
         video.upload_to_es()
 
+        url = video.json_data["vid_thumb_url"]
+        ThumbManager(video_id).download_video_thumb(url)
+
         return video.json_data
 
     def _get_info_json(self):

From 7029441f89fc9a4981fa1e9af1d437fa959fb49a Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 10 Aug 2022 22:24:02 +0700
Subject: [PATCH 14/20] use fallback thumb when no url passed

---
 tubearchivist/home/src/download/thumbnails.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py
index 8e6b8716..43cd981e 100644
--- a/tubearchivist/home/src/download/thumbnails.py
+++ b/tubearchivist/home/src/download/thumbnails.py
@@ -35,6 +35,8 @@ class ThumbManagerBase:
 
     def download_raw(self, url):
         """download thumbnail for video"""
+        if not url:
+            return self.get_fallback()
 
         for i in range(3):
             try:

From 994f7c2443b062c3c2b958a37aec061ee2f1ae7f Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 10 Aug 2022 22:31:15 +0700
Subject: [PATCH 15/20] use user provided thumb for offline import

---
 tubearchivist/home/src/index/filesystem.py | 9 +++++++--
 tubearchivist/home/src/index/video.py      | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index 85e1c953..ac498303 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -476,8 +476,13 @@ class ManualImport:
         video.check_subtitles()
         video.upload_to_es()
 
-        url = video.json_data["vid_thumb_url"]
-        ThumbManager(video_id).download_video_thumb(url)
+        if video.offline_import and self.current_video["thumb"]:
+            old_path = self.current_video["thumb"]
+            new_path = ThumbManager(video_id).vid_thumb_path(absolute=True)
+            shutil.move(old_path, new_path, copy_function=shutil.copyfile)
+        else:
+            url = video.json_data["vid_thumb_url"]
+            ThumbManager(video_id).download_video_thumb(url)
 
         return video.json_data
 
diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index 48c3eeb4..a2b54970 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -425,6 +425,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
         self.channel_id = False
         self.video_overwrites = video_overwrites
         self.es_path = f"{self.index_name}/_doc/{youtube_id}"
+        self.offline_import = False
 
     def build_json(self, youtube_meta_overwrite=False, media_path=False):
         """build json dict of video"""
@@ -434,6 +435,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
 
         if not self.youtube_meta:
             self.youtube_meta = youtube_meta_overwrite
+            self.offline_import = True
 
         self._process_youtube_meta()
         self._add_channel()

From 31061c0eb05b660f2fb06d715b9a607326678f49 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 10 Aug 2022 22:38:17 +0700
Subject: [PATCH 16/20] handle offline import without info.json fail

---
 tubearchivist/home/src/index/filesystem.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index ac498303..13a0ae5a 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -473,6 +473,10 @@ class ManualImport:
             youtube_meta_overwrite=self._get_info_json(),
             media_path=self.current_video["media"],
         )
+        if not video.json_data:
+            print(f"{video_id}: manual import failed, and no metadata found.")
+            raise ValueError
+
         video.check_subtitles()
         video.upload_to_es()
 

From c26ef012c15b0388c2874ba04f196cb74fbb8cdb Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 11 Aug 2022 11:10:58 +0700
Subject: [PATCH 17/20] raise ValueError when not matching media files

---
 tubearchivist/home/src/index/filesystem.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index 13a0ae5a..38d790b8 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -250,6 +250,10 @@ class ImportFolderScanner:
     def process_videos(self):
         """loop through all videos"""
         for current_video in self.to_import:
+            if not current_video["media"]:
+                print(f"{current_video}: no matching media file found.")
+                raise ValueError
+
             self._detect_youtube_id(current_video)
             self._dump_thumb(current_video)
             self._convert_thumb(current_video)

From 0891c49620ef442c80037a36c45d13c7123267cd Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 11 Aug 2022 19:57:00 +0700
Subject: [PATCH 18/20] implement offline channel import

---
 tubearchivist/home/src/index/channel.py    | 51 +++++++++++++++++++---
 tubearchivist/home/src/index/filesystem.py | 12 ++++-
 tubearchivist/home/src/index/video.py      |  5 ++-
 3 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py
index d41449ae..34e4adc7 100644
--- a/tubearchivist/home/src/index/channel.py
+++ b/tubearchivist/home/src/index/channel.py
@@ -173,30 +173,71 @@ class YoutubeChannel(YouTubeItem):
         self.es_path = f"{self.index_name}/_doc/{youtube_id}"
         self.all_playlists = False
 
-    def build_json(self, upload=False):
+    def build_json(self, upload=False, fallback=False):
         """get from es or from youtube"""
         self.get_from_es()
         if self.json_data:
             return
 
-        self.get_from_youtube()
+        self.get_from_youtube(fallback)
+
         if upload:
             self.upload_to_es()
         return
 
-    def get_from_youtube(self):
+    def get_from_youtube(self, fallback=False):
         """use bs4 to scrape channel about page"""
         self.json_data = ChannelScraper(self.youtube_id).get_json()
+
+        if not self.json_data and fallback:
+            self._video_fallback(fallback)
+
         self.get_channel_art()
 
+    def _video_fallback(self, fallback):
+        """use video metadata as fallback"""
+        print(f"{self.youtube_id}: fallback to video metadata")
+        self.json_data = {
+            "channel_active": False,
+            "channel_last_refresh": int(datetime.now().strftime("%s")),
+            "channel_subs": fallback.get("channel_follower_count", 0),
+            "channel_name": fallback["uploader"],
+            "channel_banner_url": False,
+            "channel_tvart_url": False,
+            "channel_id": self.youtube_id,
+            "channel_subscribed": False,
+            "channel_description": False,
+            "channel_thumb_url": False,
+            "channel_views": 0,
+        }
+        self._info_json_fallback()
+
+    def _info_json_fallback(self):
+        """read channel info.json for additional metadata"""
+        info_json = os.path.join(
+            self.config["application"]["cache_dir"],
+            "import",
+            f"{self.youtube_id}.info.json",
+        )
+        if os.path.exists(info_json):
+            print(f"{self.youtube_id}: read info.json file")
+            with open(info_json, "r", encoding="utf-8") as f:
+                content = json.loads(f.read())
+
+            self.json_data.update(
+                {
+                    "channel_subs": content["channel_follower_count"],
+                    "channel_description": content["description"],
+                }
+            )
+
     def get_channel_art(self):
         """download channel art for new channels"""
-        channel_id = self.youtube_id
         urls = (
             self.json_data["channel_thumb_url"],
             self.json_data["channel_banner_url"],
         )
-        ThumbManager(channel_id, item_type="channel").download(urls)
+        ThumbManager(self.youtube_id, item_type="channel").download(urls)
 
     def sync_to_videos(self):
         """sync new channel_dict to all videos of channel"""
diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py
index 38d790b8..3689fed8 100644
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@@ -467,7 +467,7 @@ class ManualImport:
         """run all"""
         json_data = self.index_metadata()
         self._move_to_archive(json_data)
-        self._cleanup()
+        self._cleanup(json_data)
 
     def index_metadata(self):
         """get metadata from yt or json"""
@@ -517,7 +517,7 @@ class ManualImport:
         new_path = os.path.join(channel_folder, file)
         shutil.move(old_path, new_path, copy_function=shutil.copyfile)
 
-    def _cleanup(self):
+    def _cleanup(self, json_data):
         """cleanup leftover files"""
         if os.path.exists(self.current_video["metadata"]):
             os.remove(self.current_video["metadata"])
@@ -529,6 +529,14 @@ class ManualImport:
             if os.path.exists(subtitle_file):
                 os.remove(subtitle_file)
 
+        channel_info = os.path.join(
+            self.config["application"]["cache_dir"],
+            "import",
+            f"{json_data['channel']['channel_id']}.info.json",
+        )
+        if os.path.exists(channel_info):
+            os.remove(channel_info)
+
 
 def scan_filesystem():
     """grouped function to delete and update index"""
diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index a2b54970..d7d8b983 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -492,7 +492,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
     def _add_channel(self):
         """add channel dict to video json_data"""
         channel = ta_channel.YoutubeChannel(self.channel_id)
-        channel.build_json(upload=True)
+        channel.build_json(upload=True, fallback=self.youtube_meta)
         self.json_data.update({"channel": channel.json_data})
 
     def _add_stats(self):
@@ -500,13 +500,14 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
         # likes
         like_count = self.youtube_meta.get("like_count", 0)
         dislike_count = self.youtube_meta.get("dislike_count", 0)
+        average_rating = self.youtube_meta.get("average_rating", 0)
         self.json_data.update(
             {
                 "stats": {
                     "view_count": self.youtube_meta["view_count"],
                     "like_count": like_count,
                     "dislike_count": dislike_count,
-                    "average_rating": self.youtube_meta["average_rating"],
+                    "average_rating": average_rating,
                 }
             }
         )

From ececc3cedddcf058b74fcba957810e6df30612d9 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 11 Aug 2022 19:57:12 +0700
Subject: [PATCH 19/20] bump yt-dlp

---
 tubearchivist/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt
index 42e4656b..f9729822 100644
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@@ -10,4 +10,4 @@ requests==2.28.1
 ryd-client==0.0.3
 uWSGI==2.0.20
 whitenoise==6.2.0
-yt_dlp==2022.7.18
+yt_dlp==2022.8.8

From 980b99783bf1355d2d68913b3d2996c7994b0025 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Fri, 12 Aug 2022 05:35:47 +0700
Subject: [PATCH 20/20] add sponsor info

---
 README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/README.md b/README.md
index 1a65628f..7fb417c1 100644
--- a/README.md
+++ b/README.md
@@ -220,3 +220,12 @@ Second best way to support the development is to provide for caffeinated beverag
 * [Paypal.me](https://paypal.me/bbilly1) for a one time coffee
 * [Paypal Subscription](https://www.paypal.com/webapps/billing/plans/subscribe?plan_id=P-03770005GR991451KMFGVPMQ) for a monthly coffee
 * [ko-fi.com](https://ko-fi.com/bbilly1) for an alternative platform
+
+
+## Sponsor
+Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously donating credit for the tubearchivist.com VPS and buildserver. 
+<p>
+  <a href="https://www.digitalocean.com/">
+    <img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/PoweredByDO/DO_Powered_by_Badge_blue.svg" width="201px">
+  </a>
+</p>