offline import, #build

Changed: - added offline import for videos and channels - thumbnail manager rewrite
2025-06-30 14:21:09 +00:00 · 2022-08-12 05:36:54 +07:00 · 2022-08-12 05:36:54 +07:00 · c39ce61b2c
commit c39ce61b2c
parent 13018192f3 980b99783b
13 changed files with 712 additions and 404 deletions
--- a/README.md
+++ b/README.md
@ -220,3 +220,12 @@ Second best way to support the development is to provide for caffeinated beverag
 * [Paypal.me](https://paypal.me/bbilly1) for a one time coffee
 * [Paypal Subscription](https://www.paypal.com/webapps/billing/plans/subscribe?plan_id=P-03770005GR991451KMFGVPMQ) for a monthly coffee
 * [ko-fi.com](https://ko-fi.com/bbilly1) for an alternative platform
+
+
+## Sponsor
+Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously donating credit for the tubearchivist.com VPS and buildserver. 
+<p>
+  <a href="https://www.digitalocean.com/">
+    <img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/PoweredByDO/DO_Powered_by_Badge_blue.svg" width="201px">
+  </a>
+</p>
--- a/tubearchivist/api/src/search_processor.py
+++ b/tubearchivist/api/src/search_processor.py
@ -74,7 +74,7 @@ class SearchProcess:
        media_url = urllib.parse.quote(video_dict["media_url"])
        vid_last_refresh = date_praser(video_dict["vid_last_refresh"])
        published = date_praser(video_dict["published"])
-        vid_thumb_url = ThumbManager().vid_thumb_path(video_id)
+        vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
        channel = self._process_channel(video_dict["channel"])

        if "subtitles" in video_dict:
@ -113,7 +113,7 @@ class SearchProcess:
    def _process_download(self, download_dict):
        """run on single download item"""
        video_id = download_dict["youtube_id"]
-        vid_thumb_url = ThumbManager().vid_thumb_path(video_id)
+        vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
        published = date_praser(download_dict["published"])

        download_dict.update(
--- a/tubearchivist/home/src/download/queue.py
+++ b/tubearchivist/home/src/download/queue.py
@ -161,10 +161,7 @@ class PendingList(PendingIndex):
            self._parse_channel(entry["url"])
        elif entry["type"] == "playlist":
            self._parse_playlist(entry["url"])
-            new_thumbs = PlaylistSubscription().process_url_str(
-                [entry], subscribed=False
-            )
-            ThumbManager().download_playlist(new_thumbs)
+            PlaylistSubscription().process_url_str([entry], subscribed=False)
        else:
            raise ValueError(f"invalid url_type: {entry}")

@ -198,7 +195,6 @@ class PendingList(PendingIndex):
        self.get_channels()
        bulk_list = []

-        thumb_handler = ThumbManager()
        for idx, youtube_id in enumerate(self.missing_videos):
            video_details = self.get_youtube_details(youtube_id)
            if not video_details:
@ -209,8 +205,9 @@ class PendingList(PendingIndex):
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(video_details))

-            thumb_needed = [(youtube_id, video_details["vid_thumb_url"])]
-            thumb_handler.download_vid(thumb_needed)
+            url = video_details["vid_thumb_url"]
+            ThumbManager(youtube_id).download_video_thumb(url)
+
            self._notify_add(idx)

        if bulk_list:
--- a/tubearchivist/home/src/download/subscriptions.py
+++ b/tubearchivist/home/src/download/subscriptions.py
@ -5,6 +5,7 @@ Functionality:
 """

 from home.src.download import queue  # partial import
+from home.src.download.thumbnails import ThumbManager
 from home.src.download.yt_dlp_base import YtWrap
 from home.src.es.connect import IndexPaginate
 from home.src.index.channel import YoutubeChannel
@ -129,11 +130,9 @@ class PlaylistSubscription:
        all_indexed = IndexPaginate("ta_video", data).get_results()
        all_youtube_ids = [i["youtube_id"] for i in all_indexed]

-        new_thumbs = []
        for idx, playlist in enumerate(new_playlists):
-            url_type = playlist["type"]
            playlist_id = playlist["url"]
-            if not url_type == "playlist":
+            if not playlist["type"] == "playlist":
                print(f"{playlist_id} not a playlist, skipping...")
                continue

@ -144,8 +143,11 @@ class PlaylistSubscription:
            playlist_h.upload_to_es()
            playlist_h.add_vids_to_playlist()
            self.channel_validate(playlist_h.json_data["playlist_channel_id"])
-            thumb = playlist_h.json_data["playlist_thumbnail"]
-            new_thumbs.append((playlist_id, thumb))
+
+            url = playlist_h.json_data["playlist_thumbnail"]
+            thumb = ThumbManager(playlist_id, item_type="playlist")
+            thumb.download_playlist_thumb(url)
+
            # notify
            message = {
                "status": "message:subplaylist",
@ -157,8 +159,6 @@ class PlaylistSubscription:
                "message:subplaylist", message=message, expire=True
            )

-        return new_thumbs
-
    @staticmethod
    def channel_validate(channel_id):
        """make sure channel of playlist is there"""
--- a/tubearchivist/home/src/download/thumbnails.py
+++ b/tubearchivist/home/src/download/thumbnails.py
@ -6,136 +6,66 @@ functionality:

 import base64
 import os
-from collections import Counter
 from io import BytesIO
 from time import sleep

 import requests
 from home.src.download import queue  # partial import
-from home.src.download import subscriptions  # partial import
+from home.src.es.connect import IndexPaginate
 from home.src.ta.config import AppConfig
-from home.src.ta.helper import ignore_filelist
-from home.src.ta.ta_redis import RedisArchivist
 from mutagen.mp4 import MP4, MP4Cover
 from PIL import Image, ImageFile, ImageFilter

 ImageFile.LOAD_TRUNCATED_IMAGES = True


-class ThumbManager:
-    """handle thumbnails related functions"""
+class ThumbManagerBase:
+    """base class for thumbnail management"""

    CONFIG = AppConfig().config
-    MEDIA_DIR = CONFIG["application"]["videos"]
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
    CHANNEL_DIR = os.path.join(CACHE_DIR, "channels")
    PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists")

-    def get_all_thumbs(self):
-        """get all video artwork already downloaded"""
-        all_thumb_folders = ignore_filelist(os.listdir(self.VIDEO_DIR))
-        all_thumbs = []
-        for folder in all_thumb_folders:
-            folder_path = os.path.join(self.VIDEO_DIR, folder)
-            if os.path.isfile(folder_path):
-                self.update_path(folder)
-                all_thumbs.append(folder_path)
-                continue
-                # raise exemption here in a future version
-                # raise FileExistsError("video cache dir has files inside")
+    def __init__(self, item_id, item_type, fallback=False):
+        self.item_id = item_id
+        self.item_type = item_type
+        self.fallback = fallback

-            all_folder_thumbs = ignore_filelist(os.listdir(folder_path))
-            all_thumbs.extend(all_folder_thumbs)
+    def download_raw(self, url):
+        """download thumbnail for video"""
+        if not url:
+            return self.get_fallback()

-        return all_thumbs
+        for i in range(3):
+            try:
+                response = requests.get(url, stream=True)
+                if response.ok:
+                    return Image.open(response.raw)
+                if response.status_code == 404:
+                    return self.get_fallback()

-    def update_path(self, file_name):
-        """reorganize thumbnails into folders as update path from v0.0.5"""
-        folder_name = file_name[0].lower()
-        folder_path = os.path.join(self.VIDEO_DIR, folder_name)
-        old_file = os.path.join(self.VIDEO_DIR, file_name)
-        new_file = os.path.join(folder_path, file_name)
-        os.makedirs(folder_path, exist_ok=True)
-        os.rename(old_file, new_file)
+            except ConnectionError:
+                print(f"{self.item_id}: retry thumbnail download {url}")
+                sleep((i + 1) ** i)

-    def get_needed_thumbs(self, missing_only=False):
-        """get a list of all missing thumbnails"""
-        all_thumbs = self.get_all_thumbs()
+        return False

-        pending = queue.PendingList()
-        pending.get_download()
-        pending.get_indexed()
+    def get_fallback(self):
+        """get fallback thumbnail if not available"""
+        if self.fallback:
+            img_raw = Image.open(self.fallback)
+            return img_raw

-        needed_thumbs = []
-        for video in pending.all_videos:
-            youtube_id = video["youtube_id"]
-            thumb_url = video["vid_thumb_url"]
-            if missing_only:
-                if youtube_id + ".jpg" not in all_thumbs:
-                    needed_thumbs.append((youtube_id, thumb_url))
-            else:
-                needed_thumbs.append((youtube_id, thumb_url))
-
-        for video in pending.all_pending + pending.all_ignored:
-            youtube_id = video["youtube_id"]
-            thumb_url = video["vid_thumb_url"]
-            if missing_only:
-                if youtube_id + ".jpg" not in all_thumbs:
-                    needed_thumbs.append((youtube_id, thumb_url))
-            else:
-                needed_thumbs.append((youtube_id, thumb_url))
-
-        return needed_thumbs
-
-    def get_missing_channels(self):
-        """get all channel artwork"""
-        all_channel_art = os.listdir(self.CHANNEL_DIR)
-        files = [i[0:24] for i in all_channel_art]
-        cached_channel_ids = [k for (k, v) in Counter(files).items() if v > 1]
-        channel_sub = subscriptions.ChannelSubscription()
-        channels = channel_sub.get_channels(subscribed_only=False)
-
-        missing_channels = []
-        for channel in channels:
-            channel_id = channel["channel_id"]
-            if channel_id not in cached_channel_ids:
-                channel_banner = channel["channel_banner_url"]
-                channel_thumb = channel["channel_thumb_url"]
-                missing_channels.append(
-                    (channel_id, channel_thumb, channel_banner)
-                )
-
-        return missing_channels
-
-    def get_missing_playlists(self):
-        """get all missing playlist artwork"""
-        all_downloaded = ignore_filelist(os.listdir(self.PLAYLIST_DIR))
-        all_ids_downloaded = [i.replace(".jpg", "") for i in all_downloaded]
-        playlist_sub = subscriptions.PlaylistSubscription()
-        playlists = playlist_sub.get_playlists(subscribed_only=False)
-
-        missing_playlists = []
-        for playlist in playlists:
-            playlist_id = playlist["playlist_id"]
-            if playlist_id not in all_ids_downloaded:
-                playlist_thumb = playlist["playlist_thumbnail"]
-                missing_playlists.append((playlist_id, playlist_thumb))
-
-        return missing_playlists
-
-    def get_raw_img(self, img_url, thumb_type):
-        """get raw image from youtube and handle 404"""
-        try:
-            app_root = self.CONFIG["application"]["app_root"]
-        except KeyError:
-            # lazy keyerror fix to not have to deal with a strange startup
-            # racing contition between the threads in HomeConfig.ready()
-            app_root = "/app"
+        app_root = self.CONFIG["application"]["app_root"]
        default_map = {
            "video": os.path.join(
                app_root, "static/img/default-video-thumb.jpg"
            ),
+            "playlist": os.path.join(
+                app_root, "static/img/default-video-thumb.jpg"
+            ),
            "icon": os.path.join(
                app_root, "static/img/default-channel-icon.jpg"
            ),
@ -143,116 +73,134 @@ class ThumbManager:
                app_root, "static/img/default-channel-banner.jpg"
            ),
        }
-        if img_url:
-            try:
-                response = requests.get(img_url, stream=True)
-            except ConnectionError:
-                sleep(5)
-                response = requests.get(img_url, stream=True)
-            if not response.ok and not response.status_code == 404:
-                print("retry thumbnail download for " + img_url)
-                sleep(5)
-                response = requests.get(img_url, stream=True)
-        else:
-            response = False
-        if not response or response.status_code == 404:
-            # use default
-            img_raw = Image.open(default_map[thumb_type])
-        else:
-            # use response
-            img_obj = response.raw
-            img_raw = Image.open(img_obj)
+
+        img_raw = Image.open(default_map[self.item_type])

        return img_raw

-    def download_vid(self, missing_thumbs, notify=True):
-        """download all missing thumbnails from list"""
-        print(f"downloading {len(missing_thumbs)} thumbnails")
-        for idx, (youtube_id, thumb_url) in enumerate(missing_thumbs):
-            folder_path = os.path.join(self.VIDEO_DIR, youtube_id[0].lower())
-            thumb_path = os.path.join(
-                self.CACHE_DIR, self.vid_thumb_path(youtube_id)
-            )

-            os.makedirs(folder_path, exist_ok=True)
-            img_raw = self.get_raw_img(thumb_url, "video")
+class ThumbManager(ThumbManagerBase):
+    """handle thumbnails related functions"""

-            width, height = img_raw.size
-            if not width / height == 16 / 9:
-                new_height = width / 16 * 9
-                offset = (height - new_height) / 2
-                img_raw = img_raw.crop((0, offset, width, height - offset))
-            img_raw.convert("RGB").save(thumb_path)
+    def __init__(self, item_id, item_type="video", fallback=False):
+        super().__init__(item_id, item_type, fallback=fallback)

-            progress = f"{idx + 1}/{len(missing_thumbs)}"
-            if notify:
-                mess_dict = {
-                    "status": "message:add",
-                    "level": "info",
-                    "title": "Processing Videos",
-                    "message": "Downloading Thumbnails, Progress: " + progress,
-                }
-                if idx + 1 == len(missing_thumbs):
-                    expire = 4
-                else:
-                    expire = True
+    def download(self, url):
+        """download thumbnail"""
+        print(f"{self.item_id}: download {self.item_type} thumbnail")
+        if self.item_type == "video":
+            self.download_video_thumb(url)
+        elif self.item_type == "channel":
+            self.download_channel_art(url)
+        elif self.item_type == "playlist":
+            self.download_playlist_thumb(url)

-                RedisArchivist().set_message(
-                    "message:add", mess_dict, expire=expire
-                )
+    def delete(self):
+        """delete thumbnail file"""
+        print(f"{self.item_id}: delete {self.item_type} thumbnail")
+        if self.item_type == "video":
+            self.delete_video_thumb()
+        elif self.item_type == "channel":
+            self.delete_channel_thumb()
+        elif self.item_type == "playlist":
+            self.delete_playlist_thumb()

-            if idx + 1 % 25 == 0:
-                print("thumbnail progress: " + progress)
+    def download_video_thumb(self, url, skip_existing=False):
+        """pass url for video thumbnail"""
+        folder_path = os.path.join(self.VIDEO_DIR, self.item_id[0].lower())
+        thumb_path = self.vid_thumb_path(absolute=True)

-    def download_chan(self, missing_channels):
-        """download needed artwork for channels"""
-        print(f"downloading {len(missing_channels)} channel artwork")
-        for channel in missing_channels:
-            channel_id, channel_thumb, channel_banner = channel
+        if skip_existing and os.path.exists(thumb_path):
+            return

-            thumb_path = os.path.join(
-                self.CHANNEL_DIR, channel_id + "_thumb.jpg"
-            )
-            img_raw = self.get_raw_img(channel_thumb, "icon")
-            img_raw.convert("RGB").save(thumb_path)
+        os.makedirs(folder_path, exist_ok=True)
+        img_raw = self.download_raw(url)
+        width, height = img_raw.size

-            banner_path = os.path.join(
-                self.CHANNEL_DIR, channel_id + "_banner.jpg"
-            )
-            img_raw = self.get_raw_img(channel_banner, "banner")
-            img_raw.convert("RGB").save(banner_path)
+        if not width / height == 16 / 9:
+            new_height = width / 16 * 9
+            offset = (height - new_height) / 2
+            img_raw = img_raw.crop((0, offset, width, height - offset))

-            mess_dict = {
-                "status": "message:download",
-                "level": "info",
-                "title": "Processing Channels",
-                "message": "Downloading Channel Art.",
-            }
-            key = "message:download"
-            RedisArchivist().set_message(key, mess_dict, expire=True)
+        img_raw.convert("RGB").save(thumb_path)

-    def download_playlist(self, missing_playlists):
-        """download needed artwork for playlists"""
-        print(f"downloading {len(missing_playlists)} playlist artwork")
-        for playlist in missing_playlists:
-            playlist_id, playlist_thumb_url = playlist
-            thumb_path = os.path.join(self.PLAYLIST_DIR, playlist_id + ".jpg")
-            img_raw = self.get_raw_img(playlist_thumb_url, "video")
-            img_raw.convert("RGB").save(thumb_path)
+    def vid_thumb_path(self, absolute=False):
+        """build expected path for video thumbnail from youtube_id"""
+        folder_name = self.item_id[0].lower()
+        folder_path = os.path.join("videos", folder_name)
+        thumb_path = os.path.join(folder_path, f"{self.item_id}.jpg")
+        if absolute:
+            thumb_path = os.path.join(self.CACHE_DIR, thumb_path)

-            mess_dict = {
-                "status": "message:download",
-                "level": "info",
-                "title": "Processing Playlists",
-                "message": "Downloading Playlist Art.",
-            }
-            key = "message:download"
-            RedisArchivist().set_message(key, mess_dict, expire=True)
+        return thumb_path

-    def get_base64_blur(self, youtube_id):
+    def download_channel_art(self, urls, skip_existing=False):
+        """pass tuple of channel thumbnails"""
+        channel_thumb, channel_banner = urls
+        self._download_channel_thumb(channel_thumb, skip_existing)
+        self._download_channel_banner(channel_banner, skip_existing)
+
+    def _download_channel_thumb(self, channel_thumb, skip_existing):
+        """download channel thumbnail"""
+
+        thumb_path = os.path.join(
+            self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg"
+        )
+        self.item_type = "icon"
+
+        if skip_existing and os.path.exists(thumb_path):
+            return
+
+        img_raw = self.download_raw(channel_thumb)
+        img_raw.convert("RGB").save(thumb_path)
+
+    def _download_channel_banner(self, channel_banner, skip_existing):
+        """download channel banner"""
+
+        banner_path = os.path.join(
+            self.CHANNEL_DIR, self.item_id + "_banner.jpg"
+        )
+        self.item_type = "banner"
+        if skip_existing and os.path.exists(banner_path):
+            return
+
+        img_raw = self.download_raw(channel_banner)
+        img_raw.convert("RGB").save(banner_path)
+
+    def download_playlist_thumb(self, url, skip_existing=False):
+        """pass thumbnail url"""
+        thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
+        if skip_existing and os.path.exists(thumb_path):
+            return
+
+        img_raw = self.download_raw(url)
+        img_raw.convert("RGB").save(thumb_path)
+
+    def delete_video_thumb(self):
+        """delete video thumbnail if exists"""
+        thumb_path = self.vid_thumb_path()
+        to_delete = os.path.join(self.CACHE_DIR, thumb_path)
+        if os.path.exists(to_delete):
+            os.remove(to_delete)
+
+    def delete_channel_thumb(self):
+        """delete all artwork of channel"""
+        thumb = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg")
+        banner = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_banner.jpg")
+        if os.path.exists(thumb):
+            os.remove(thumb)
+        if os.path.exists(banner):
+            os.remove(banner)
+
+    def delete_playlist_thumb(self):
+        """delete playlist thumbnail"""
+        thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
+        if os.path.exists(thumb_path):
+            os.remove(thumb_path)
+
+    def get_vid_base64_blur(self):
        """return base64 encoded placeholder"""
-        img_path = self.vid_thumb_path(youtube_id)
-        file_path = os.path.join(self.CACHE_DIR, img_path)
+        file_path = os.path.join(self.CACHE_DIR, self.vid_thumb_path())
        img_raw = Image.open(file_path)
        img_raw.thumbnail((img_raw.width // 20, img_raw.height // 20))
        img_blur = img_raw.filter(ImageFilter.BLUR)
@ -264,40 +212,109 @@ class ThumbManager:

        return data_url

-    @staticmethod
-    def vid_thumb_path(youtube_id):
-        """build expected path for video thumbnail from youtube_id"""
-        folder_name = youtube_id[0].lower()
-        folder_path = os.path.join("videos", folder_name)
-        thumb_path = os.path.join(folder_path, youtube_id + ".jpg")
-        return thumb_path

-    def delete_vid_thumb(self, youtube_id):
-        """delete video thumbnail if exists"""
-        thumb_path = self.vid_thumb_path(youtube_id)
-        to_delete = os.path.join(self.CACHE_DIR, thumb_path)
-        if os.path.exists(to_delete):
-            os.remove(to_delete)
+class ValidatorCallback:
+    """handle callback validate thumbnails page by page"""

-    def delete_chan_thumb(self, channel_id):
-        """delete all artwork of channel"""
-        thumb = os.path.join(self.CHANNEL_DIR, channel_id + "_thumb.jpg")
-        banner = os.path.join(self.CHANNEL_DIR, channel_id + "_banner.jpg")
-        if os.path.exists(thumb):
-            os.remove(thumb)
-        if os.path.exists(banner):
-            os.remove(banner)
+    def __init__(self, source, index_name):
+        self.source = source
+        self.index_name = index_name

-    def cleanup_downloaded(self):
-        """find downloaded thumbnails without video indexed"""
-        all_thumbs = self.get_all_thumbs()
-        all_indexed = self.get_needed_thumbs()
-        all_needed_thumbs = [i[0] + ".jpg" for i in all_indexed]
-        for thumb in all_thumbs:
-            if thumb not in all_needed_thumbs:
-                # cleanup
-                youtube_id = thumb.rstrip(".jpg")
-                self.delete_vid_thumb(youtube_id)
+    def run(self):
+        """run the task for page"""
+        print(f"{self.index_name}: validate artwork")
+        if self.index_name == "ta_video":
+            self._validate_videos()
+        elif self.index_name == "ta_channel":
+            self._validate_channels()
+        elif self.index_name == "ta_playlist":
+            self._validate_playlists()
+
+    def _validate_videos(self):
+        """check if video thumbnails are correct"""
+        for video in self.source:
+            url = video["_source"]["vid_thumb_url"]
+            handler = ThumbManager(video["_source"]["youtube_id"])
+            handler.download_video_thumb(url, skip_existing=True)
+
+    def _validate_channels(self):
+        """check if all channel artwork is there"""
+        for channel in self.source:
+            urls = (
+                channel["_source"]["channel_thumb_url"],
+                channel["_source"]["channel_banner_url"],
+            )
+            handler = ThumbManager(channel["_source"]["channel_id"])
+            handler.download_channel_art(urls, skip_existing=True)
+
+    def _validate_playlists(self):
+        """check if all playlist artwork is there"""
+        for playlist in self.source:
+            url = playlist["_source"]["playlist_thumbnail"]
+            handler = ThumbManager(playlist["_source"]["playlist_id"])
+            handler.download_playlist_thumb(url, skip_existing=True)
+
+
+class ThumbValidator:
+    """validate thumbnails"""
+
+    def download_missing(self):
+        """download all missing artwork"""
+        self.download_missing_videos()
+        self.download_missing_channels()
+        self.download_missing_playlists()
+
+    def download_missing_videos(self):
+        """get all missing video thumbnails"""
+        data = {
+            "query": {"term": {"active": {"value": True}}},
+            "sort": [{"youtube_id": {"order": "asc"}}],
+            "_source": ["vid_thumb_url", "youtube_id"],
+        }
+        paginate = IndexPaginate(
+            "ta_video", data, size=5000, callback=ValidatorCallback
+        )
+        _ = paginate.get_results()
+
+    def download_missing_channels(self):
+        """get all missing channel thumbnails"""
+        data = {
+            "query": {"term": {"channel_active": {"value": True}}},
+            "sort": [{"channel_id": {"order": "asc"}}],
+            "_source": {
+                "excludes": ["channel_description", "channel_overwrites"]
+            },
+        }
+        paginate = IndexPaginate(
+            "ta_channel", data, callback=ValidatorCallback
+        )
+        _ = paginate.get_results()
+
+    def download_missing_playlists(self):
+        """get all missing playlist artwork"""
+        data = {
+            "query": {"term": {"playlist_active": {"value": True}}},
+            "sort": [{"playlist_id": {"order": "asc"}}],
+            "_source": ["playlist_id", "playlist_thumbnail"],
+        }
+        paginate = IndexPaginate(
+            "ta_playlist", data, callback=ValidatorCallback
+        )
+        _ = paginate.get_results()
+
+
+class ThumbFilesystem:
+    """filesystem tasks for thumbnails"""
+
+    CONFIG = AppConfig().config
+    CACHE_DIR = CONFIG["application"]["cache_dir"]
+    MEDIA_DIR = CONFIG["application"]["videos"]
+    VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
+
+    def sync(self):
+        """embed thumbnails to mediafiles"""
+        video_list = self.get_thumb_list()
+        self._embed_thumbs(video_list)

    def get_thumb_list(self):
        """get list of mediafiles and matching thumbnails"""
@ -307,10 +324,10 @@ class ThumbManager:

        video_list = []
        for video in pending.all_videos:
-            youtube_id = video["youtube_id"]
+            video_id = video["youtube_id"]
            media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
            thumb_path = os.path.join(
-                self.CACHE_DIR, self.vid_thumb_path(youtube_id)
+                self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path()
            )
            video_list.append(
                {
@ -322,7 +339,7 @@ class ThumbManager:
        return video_list

    @staticmethod
-    def write_all_thumbs(video_list):
+    def _embed_thumbs(video_list):
        """rewrite the thumbnail into media file"""

        counter = 1
@ -340,15 +357,3 @@ class ThumbManager:
            if counter % 50 == 0:
                print(f"thumbnail write progress {counter}/{len(video_list)}")
            counter = counter + 1
-
-
-def validate_thumbnails():
-    """check if all thumbnails are there and organized correctly"""
-    handler = ThumbManager()
-    thumbs_to_download = handler.get_needed_thumbs(missing_only=True)
-    handler.download_vid(thumbs_to_download)
-    missing_channels = handler.get_missing_channels()
-    handler.download_chan(missing_channels)
-    missing_playlists = handler.get_missing_playlists()
-    handler.download_playlist(missing_playlists)
-    handler.cleanup_downloaded()
--- a/tubearchivist/home/src/frontend/searching.py
+++ b/tubearchivist/home/src/frontend/searching.py
@ -119,7 +119,7 @@ class SearchHandler:

        if "vid_thumb_url" in hit_keys:
            youtube_id = hit["source"]["youtube_id"]
-            thumb_path = ThumbManager().vid_thumb_path(youtube_id)
+            thumb_path = ThumbManager(youtube_id).vid_thumb_path()
            hit["source"]["vid_thumb_url"] = thumb_path

        if "channel_last_refresh" in hit_keys:
@ -138,7 +138,7 @@ class SearchHandler:

        if "subtitle_fragment_id" in hit_keys:
            youtube_id = hit["source"]["youtube_id"]
-            thumb_path = ThumbManager().vid_thumb_path(youtube_id)
+            thumb_path = ThumbManager(youtube_id).vid_thumb_path()
            hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}"

        return hit
--- a/tubearchivist/home/src/index/channel.py
+++ b/tubearchivist/home/src/index/channel.py
@ -173,30 +173,71 @@ class YoutubeChannel(YouTubeItem):
        self.es_path = f"{self.index_name}/_doc/{youtube_id}"
        self.all_playlists = False

-    def build_json(self, upload=False):
+    def build_json(self, upload=False, fallback=False):
        """get from es or from youtube"""
        self.get_from_es()
        if self.json_data:
            return

-        self.get_from_youtube()
+        self.get_from_youtube(fallback)
+
        if upload:
            self.upload_to_es()
        return

-    def get_from_youtube(self):
+    def get_from_youtube(self, fallback=False):
        """use bs4 to scrape channel about page"""
        self.json_data = ChannelScraper(self.youtube_id).get_json()
+
+        if not self.json_data and fallback:
+            self._video_fallback(fallback)
+
        self.get_channel_art()

+    def _video_fallback(self, fallback):
+        """use video metadata as fallback"""
+        print(f"{self.youtube_id}: fallback to video metadata")
+        self.json_data = {
+            "channel_active": False,
+            "channel_last_refresh": int(datetime.now().strftime("%s")),
+            "channel_subs": fallback.get("channel_follower_count", 0),
+            "channel_name": fallback["uploader"],
+            "channel_banner_url": False,
+            "channel_tvart_url": False,
+            "channel_id": self.youtube_id,
+            "channel_subscribed": False,
+            "channel_description": False,
+            "channel_thumb_url": False,
+            "channel_views": 0,
+        }
+        self._info_json_fallback()
+
+    def _info_json_fallback(self):
+        """read channel info.json for additional metadata"""
+        info_json = os.path.join(
+            self.config["application"]["cache_dir"],
+            "import",
+            f"{self.youtube_id}.info.json",
+        )
+        if os.path.exists(info_json):
+            print(f"{self.youtube_id}: read info.json file")
+            with open(info_json, "r", encoding="utf-8") as f:
+                content = json.loads(f.read())
+
+            self.json_data.update(
+                {
+                    "channel_subs": content["channel_follower_count"],
+                    "channel_description": content["description"],
+                }
+            )
+
    def get_channel_art(self):
        """download channel art for new channels"""
-        channel_id = self.youtube_id
-        channel_thumb = self.json_data["channel_thumb_url"]
-        channel_banner = self.json_data["channel_banner_url"]
-        ThumbManager().download_chan(
-            [(channel_id, channel_thumb, channel_banner)]
+        urls = (
+            self.json_data["channel_thumb_url"],
+            self.json_data["channel_banner_url"],
        )
+        ThumbManager(self.youtube_id, item_type="channel").download(urls)

    def sync_to_videos(self):
        """sync new channel_dict to all videos of channel"""
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@ -12,13 +12,16 @@ import shutil
 import subprocess

 from home.src.download.queue import PendingList
-from home.src.download.yt_dlp_handler import VideoDownloader
+from home.src.download.thumbnails import ThumbManager
 from home.src.es.connect import ElasticWrap
 from home.src.index.reindex import Reindex
-from home.src.index.video import index_new_video
+from home.src.index.video import YoutubeVideo, index_new_video
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import clean_string, ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist
+from PIL import Image, ImageFile
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True


 class FilesystemScanner:
@ -157,63 +160,140 @@ class FilesystemScanner:
            _, _ = ElasticWrap(path).delete()


-class ManualImport:
-    """import and indexing existing video files"""
+class ImportFolderScanner:
+    """import and indexing existing video files
+    - identify all media files belonging to a video
+    - identify youtube id
+    - convert if needed
+    """

    CONFIG = AppConfig().config
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    IMPORT_DIR = os.path.join(CACHE_DIR, "import")

+    EXT_MAP = {
+        "media": [".mp4", ".mkv", ".webm"],
+        "metadata": [".json"],
+        "thumb": [".jpg", ".png", ".webp"],
+        "subtitle": [".vtt"],
+    }
+
    def __init__(self):
-        self.identified = self.import_folder_parser()
+        self.to_import = False

-    def import_folder_parser(self):
-        """detect files in import folder"""
-        import_files = os.listdir(self.IMPORT_DIR)
-        to_import = ignore_filelist(import_files)
-        to_import.sort()
-        video_files = [i for i in to_import if not i.endswith(".json")]
+    def scan(self):
+        """scan and match media files"""
+        all_files = self.get_all_files()
+        self.match_files(all_files)
+        self.process_videos()

-        identified = []
+        return self.to_import

-        for file_path in video_files:
+    def get_all_files(self):
+        """get all files in /import"""
+        rel_paths = ignore_filelist(os.listdir(self.IMPORT_DIR))
+        all_files = [os.path.join(self.IMPORT_DIR, i) for i in rel_paths]
+        all_files.sort()

-            file_dict = {"video_file": file_path}
-            file_name, _ = os.path.splitext(file_path)
-
-            matching_json = [
-                i
-                for i in to_import
-                if i.startswith(file_name) and i.endswith(".json")
-            ]
-            if matching_json:
-                json_file = matching_json[0]
-                youtube_id = self.extract_id_from_json(json_file)
-                file_dict.update({"json_file": json_file})
-            else:
-                youtube_id = self.extract_id_from_filename(file_name)
-                file_dict.update({"json_file": False})
-
-            file_dict.update({"youtube_id": youtube_id})
-            identified.append(file_dict)
-
-        return identified
+        return all_files

    @staticmethod
-    def extract_id_from_filename(file_name):
+    def _get_template():
+        """base dict for video"""
+        return {
+            "media": False,
+            "video_id": False,
+            "metadata": False,
+            "thumb": False,
+            "subtitle": [],
+        }
+
+    def match_files(self, all_files):
+        """loop through all files, join what matches"""
+        self.to_import = []
+
+        current_video = self._get_template()
+        last_base = False
+
+        for file_path in all_files:
+            base_name_raw, ext = os.path.splitext(file_path)
+            base_name, _ = os.path.splitext(base_name_raw)
+
+            key, file_path = self._detect_type(file_path, ext)
+            if not key or not file_path:
+                continue
+
+            if base_name != last_base:
+                if last_base:
+                    self.to_import.append(current_video)
+
+                current_video = self._get_template()
+                last_base = base_name
+
+            if key == "subtitle":
+                current_video["subtitle"].append(file_path)
+            else:
+                current_video[key] = file_path
+
+        if current_video.get("media"):
+            self.to_import.append(current_video)
+
+    def _detect_type(self, file_path, ext):
+        """detect metadata type for file"""
+
+        for key, value in self.EXT_MAP.items():
+            if ext in value:
+                return key, file_path
+
+        return False, False
+
+    def process_videos(self):
+        """loop through all videos"""
+        for current_video in self.to_import:
+            if not current_video["media"]:
+                print(f"{current_video}: no matching media file found.")
+                raise ValueError
+
+            self._detect_youtube_id(current_video)
+            self._dump_thumb(current_video)
+            self._convert_thumb(current_video)
+            self._convert_video(current_video)
+
+            ManualImport(current_video, self.CONFIG).run()
+
+    def _detect_youtube_id(self, current_video):
+        """find video id from filename or json"""
+        print(current_video)
+        youtube_id = self._extract_id_from_filename(current_video["media"])
+        if youtube_id:
+            current_video["video_id"] = youtube_id
+            return
+
+        youtube_id = self._extract_id_from_json(current_video["metadata"])
+        if youtube_id:
+            current_video["video_id"] = youtube_id
+            return
+
+        print(current_video["media"])
+        raise ValueError("failed to find video id")
+
+    @staticmethod
+    def _extract_id_from_filename(file_name):
        """
        look at the file name for the youtube id
        expects filename ending in [<youtube_id>].<ext>
        """
-        id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
+        base_name, _ = os.path.splitext(file_name)
+        id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", base_name)
        if id_search:
            youtube_id = id_search.group(1)
            return youtube_id

-        print("failed to extract youtube id for: " + file_name)
-        raise Exception
+        print(f"id extraction failed from filename: {file_name}")

-    def extract_id_from_json(self, json_file):
+        return False
+
+    def _extract_id_from_json(self, json_file):
        """open json file and extract id"""
        json_path = os.path.join(self.CACHE_DIR, "import", json_file)
        with open(json_path, "r", encoding="utf-8") as f:
@ -223,66 +303,239 @@ class ManualImport:

        return youtube_id

-    def process_import(self):
-        """go through identified media files"""
+    def _dump_thumb(self, current_video):
+        """extract embedded thumb before converting"""
+        if current_video["thumb"]:
+            return

-        all_videos_added = []
+        media_path = current_video["media"]
+        _, ext = os.path.splitext(media_path)

-        for media_file in self.identified:
-            json_file = media_file["json_file"]
-            video_file = media_file["video_file"]
-            youtube_id = media_file["youtube_id"]
+        new_path = False
+        if ext == ".mkv":
+            idx, thumb_type = self._get_mkv_thumb_stream(media_path)
+            if idx:
+                new_path = self.dump_mpv_thumb(media_path, idx, thumb_type)

-            video_path = os.path.join(self.CACHE_DIR, "import", video_file)
+        elif ext == ".mp4":
+            thumb_type = self.get_mp4_thumb_type(media_path)
+            if thumb_type:
+                new_path = self.dump_mp4_thumb(media_path, thumb_type)

-            self.move_to_cache(video_path, youtube_id)
+        if new_path:
+            current_video["thumb"] = new_path

-            # identify and archive
-            vid_dict = index_new_video(youtube_id)
-            VideoDownloader([youtube_id]).move_to_archive(vid_dict)
-            youtube_id = vid_dict["youtube_id"]
-            thumb_url = vid_dict["vid_thumb_url"]
-            all_videos_added.append((youtube_id, thumb_url))
+    def _get_mkv_thumb_stream(self, media_path):
+        """get stream idx of thumbnail for mkv files"""
+        streams = self._get_streams(media_path)
+        attachments = [
+            i for i in streams["streams"] if i["codec_type"] == "attachment"
+        ]

-            # cleanup
-            if os.path.exists(video_path):
-                os.remove(video_path)
-            if json_file:
-                json_path = os.path.join(self.CACHE_DIR, "import", json_file)
-                os.remove(json_path)
+        for idx, stream in enumerate(attachments):
+            tags = stream["tags"]
+            if "mimetype" in tags and tags["filename"].startswith("cover"):
+                _, ext = os.path.splitext(tags["filename"])
+                return idx, ext

-        return all_videos_added
+        return False, False

-    def move_to_cache(self, video_path, youtube_id):
-        """move identified video file to cache, convert to mp4"""
-        file_name = os.path.split(video_path)[-1]
-        video_file, ext = os.path.splitext(file_name)
+    @staticmethod
+    def dump_mpv_thumb(media_path, idx, thumb_type):
+        """write cover to disk for mkv"""
+        _, media_ext = os.path.splitext(media_path)
+        new_path = f"{media_path.rstrip(media_ext)}{thumb_type}"
+        subprocess.run(
+            [
+                "ffmpeg",
+                "-v",
+                "quiet",
+                f"-dump_attachment:t:{idx}",
+                new_path,
+                "-i",
+                media_path,
+            ],
+            check=False,
+        )

-        # make sure youtube_id is in filename
-        if youtube_id not in video_file:
-            video_file = f"{video_file}_{youtube_id}"
+        return new_path

-        # move, convert if needed
+    def get_mp4_thumb_type(self, media_path):
+        """dedect filetype of embedded thumbnail"""
+        streams = self._get_streams(media_path)
+
+        for stream in streams["streams"]:
+            if stream["codec_name"] in ["png", "jpg"]:
+                return stream["codec_name"]
+
+        return False
+
+    def _convert_thumb(self, current_video):
+        """convert all thumbnails to jpg"""
+        if not current_video["thumb"]:
+            return
+
+        thumb_path = current_video["thumb"]
+
+        base_path, ext = os.path.splitext(thumb_path)
+        if ext == ".jpg":
+            return
+
+        new_path = f"{base_path}.jpg"
+        img_raw = Image.open(thumb_path)
+        img_raw.convert("RGB").save(new_path)
+
+        os.remove(thumb_path)
+        current_video["thumb"] = new_path
+
+    @staticmethod
+    def _get_streams(media_path):
+        """return all streams from media_path"""
+        streams_raw = subprocess.run(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-show_streams",
+                "-print_format",
+                "json",
+                media_path,
+            ],
+            capture_output=True,
+            check=True,
+        )
+        streams = json.loads(streams_raw.stdout.decode())
+
+        return streams
+
+    @staticmethod
+    def dump_mp4_thumb(media_path, thumb_type):
+        """save cover to disk"""
+        _, ext = os.path.splitext(media_path)
+        new_path = f"{media_path.rstrip(ext)}.{thumb_type}"
+
+        subprocess.run(
+            [
+                "ffmpeg",
+                "-i",
+                media_path,
+                "-map",
+                "0:v",
+                "-map",
+                "-0:V",
+                "-c",
+                "copy",
+                new_path,
+            ],
+            check=True,
+        )
+
+        return new_path
+
+    def _convert_video(self, current_video):
+        """convert if needed"""
+        current_path = current_video["media"]
+        base_path, ext = os.path.splitext(current_path)
        if ext == ".mp4":
-            new_file = video_file + ext
-            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
-            shutil.move(video_path, dest_path, copy_function=shutil.copyfile)
+            return
+
+        new_path = base_path + ".mp4"
+        subprocess.run(
+            [
+                "ffmpeg",
+                "-i",
+                current_path,
+                new_path,
+                "-loglevel",
+                "warning",
+                "-stats",
+            ],
+            check=True,
+        )
+        current_video["media"] = new_path
+        os.remove(current_path)
+
+
+class ManualImport:
+    """import single identified video"""
+
+    def __init__(self, current_video, config):
+        self.current_video = current_video
+        self.config = config
+
+    def run(self):
+        """run all"""
+        json_data = self.index_metadata()
+        self._move_to_archive(json_data)
+        self._cleanup(json_data)
+
+    def index_metadata(self):
+        """get metadata from yt or json"""
+        video_id = self.current_video["video_id"]
+        video = YoutubeVideo(video_id)
+        video.build_json(
+            youtube_meta_overwrite=self._get_info_json(),
+            media_path=self.current_video["media"],
+        )
+        if not video.json_data:
+            print(f"{video_id}: manual import failed, and no metadata found.")
+            raise ValueError
+
+        video.check_subtitles()
+        video.upload_to_es()
+
+        if video.offline_import and self.current_video["thumb"]:
+            old_path = self.current_video["thumb"]
+            new_path = ThumbManager(video_id).vid_thumb_path(absolute=True)
+            shutil.move(old_path, new_path, copy_function=shutil.copyfile)
        else:
-            print(f"processing with ffmpeg: {video_file}")
-            new_file = video_file + ".mp4"
-            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
-            subprocess.run(
-                [
-                    "ffmpeg",
-                    "-i",
-                    video_path,
-                    dest_path,
-                    "-loglevel",
-                    "warning",
-                    "-stats",
-                ],
-                check=True,
-            )
+            url = video.json_data["vid_thumb_url"]
+            ThumbManager(video_id).download_video_thumb(url)
+
+        return video.json_data
+
+    def _get_info_json(self):
+        """read info_json from file"""
+        if not self.current_video["metadata"]:
+            return False
+
+        with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
+            info_json = json.loads(f.read())
+
+        return info_json
+
+    def _move_to_archive(self, json_data):
+        """move identified media file to archive"""
+        videos = self.config["application"]["videos"]
+
+        channel, file = os.path.split(json_data["media_url"])
+        channel_folder = os.path.join(videos, channel)
+        if not os.path.exists(channel_folder):
+            os.makedirs(channel_folder)
+
+        old_path = self.current_video["media"]
+        new_path = os.path.join(channel_folder, file)
+        shutil.move(old_path, new_path, copy_function=shutil.copyfile)
+
+    def _cleanup(self, json_data):
+        """cleanup leftover files"""
+        if os.path.exists(self.current_video["metadata"]):
+            os.remove(self.current_video["metadata"])
+
+        if os.path.exists(self.current_video["thumb"]):
+            os.remove(self.current_video["thumb"])
+
+        for subtitle_file in self.current_video["subtitle"]:
+            if os.path.exists(subtitle_file):
+                os.remove(subtitle_file)
+
+        channel_info = os.path.join(
+            self.config["application"]["cache_dir"],
+            "import",
+            f"{json_data['channel']['channel_id']}.info.json",
+        )
+        if os.path.exists(channel_info):
+            os.remove(channel_info)


 def scan_filesystem():
--- a/tubearchivist/home/src/index/playlist.py
+++ b/tubearchivist/home/src/index/playlist.py
@ -41,7 +41,6 @@ class YoutubePlaylist(YouTubeItem):
            self.process_youtube_meta()
            self.get_entries()
            self.json_data["playlist_entries"] = self.all_members
-            self.get_playlist_art()
            self.json_data["playlist_subscribed"] = subscribed

    def process_youtube_meta(self):
@ -81,12 +80,10 @@ class YoutubePlaylist(YouTubeItem):

        self.all_members = all_members

-    @staticmethod
-    def get_playlist_art():
+    def get_playlist_art(self):
        """download artwork of playlist"""
-        thumbnails = ThumbManager()
-        missing_playlists = thumbnails.get_missing_playlists()
-        thumbnails.download_playlist(missing_playlists)
+        url = self.json_data["playlist_thumbnail"]
+        ThumbManager(self.youtube_id, item_type="playlist").download(url)

    def add_vids_to_playlist(self):
        """sync the playlist id to videos"""
@ -145,17 +142,15 @@ class YoutubePlaylist(YouTubeItem):
            previous_item = False
        else:
            previous_item = all_entries[current_idx - 1]
-            prev_thumb = ThumbManager().vid_thumb_path(
-                previous_item["youtube_id"]
-            )
-            previous_item["vid_thumb"] = prev_thumb
+            prev_id = previous_item["youtube_id"]
+            previous_item["vid_thumb"] = ThumbManager(prev_id).vid_thumb_path()

        if current_idx == len(all_entries) - 1:
            next_item = False
        else:
            next_item = all_entries[current_idx + 1]
-            next_thumb = ThumbManager().vid_thumb_path(next_item["youtube_id"])
-            next_item["vid_thumb"] = next_thumb
+            next_id = next_item["youtube_id"]
+            next_item["vid_thumb"] = ThumbManager(next_id).vid_thumb_path()

        self.nav = {
            "playlist_meta": {
--- a/tubearchivist/home/src/index/reindex.py
+++ b/tubearchivist/home/src/index/reindex.py
@ -181,10 +181,10 @@ class Reindex:

        video.upload_to_es()

-        thumb_handler = ThumbManager()
-        thumb_handler.delete_vid_thumb(youtube_id)
-        to_download = (youtube_id, video.json_data["vid_thumb_url"])
-        thumb_handler.download_vid([to_download], notify=False)
+        thumb_handler = ThumbManager(youtube_id)
+        thumb_handler.delete_video_thumb()
+        thumb_handler.download_video_thumb(video.json_data["vid_thumb_url"])
+
        return

    @staticmethod
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@ -425,18 +425,23 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
        self.channel_id = False
        self.video_overwrites = video_overwrites
        self.es_path = f"{self.index_name}/_doc/{youtube_id}"
+        self.offline_import = False

-    def build_json(self):
+    def build_json(self, youtube_meta_overwrite=False, media_path=False):
        """build json dict of video"""
        self.get_from_youtube()
-        if not self.youtube_meta:
+        if not self.youtube_meta and not youtube_meta_overwrite:
            return

+        if not self.youtube_meta:
+            self.youtube_meta = youtube_meta_overwrite
+            self.offline_import = True
+
        self._process_youtube_meta()
        self._add_channel()
        self._add_stats()
        self.add_file_path()
-        self.add_player()
+        self.add_player(media_path)
        if self.config["downloads"]["integrate_ryd"]:
            self._get_ryd_stats()

@ -487,7 +492,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
    def _add_channel(self):
        """add channel dict to video json_data"""
        channel = ta_channel.YoutubeChannel(self.channel_id)
-        channel.build_json(upload=True)
+        channel.build_json(upload=True, fallback=self.youtube_meta)
        self.json_data.update({"channel": channel.json_data})

    def _add_stats(self):
@ -495,13 +500,14 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
        # likes
        like_count = self.youtube_meta.get("like_count", 0)
        dislike_count = self.youtube_meta.get("dislike_count", 0)
+        average_rating = self.youtube_meta.get("average_rating", 0)
        self.json_data.update(
            {
                "stats": {
                    "view_count": self.youtube_meta["view_count"],
                    "like_count": like_count,
                    "dislike_count": dislike_count,
-                    "average_rating": self.youtube_meta["average_rating"],
+                    "average_rating": average_rating,
                }
            }
        )
@ -518,8 +524,28 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):

        raise FileNotFoundError

-    def add_player(self):
+    def add_player(self, media_path=False):
        """add player information for new videos"""
+        vid_path = self._get_vid_path(media_path)
+
+        duration_handler = DurationConverter()
+        duration = duration_handler.get_sec(vid_path)
+        duration_str = duration_handler.get_str(duration)
+        self.json_data.update(
+            {
+                "player": {
+                    "watched": False,
+                    "duration": duration,
+                    "duration_str": duration_str,
+                }
+            }
+        )
+
+    def _get_vid_path(self, media_path=False):
+        """get path of media file"""
+        if media_path:
+            return media_path
+
        try:
            # when indexing from download task
            vid_path = self.build_dl_cache_path()
@ -535,18 +561,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
            else:
                raise FileNotFoundError("could not find video file") from err

-        duration_handler = DurationConverter()
-        duration = duration_handler.get_sec(vid_path)
-        duration_str = duration_handler.get_str(duration)
-        self.json_data.update(
-            {
-                "player": {
-                    "watched": False,
-                    "duration": duration,
-                    "duration_str": duration_str,
-                }
-            }
-        )
+        return vid_path

    def add_file_path(self):
        """build media_url for where file will be located"""
--- a/tubearchivist/home/tasks.py
+++ b/tubearchivist/home/tasks.py
@ -15,12 +15,12 @@ from home.src.download.subscriptions import (
    ChannelSubscription,
    PlaylistSubscription,
 )
-from home.src.download.thumbnails import ThumbManager, validate_thumbnails
+from home.src.download.thumbnails import ThumbFilesystem, ThumbValidator
 from home.src.download.yt_dlp_handler import VideoDownloader
 from home.src.es.index_setup import backup_all_indexes, restore_from_backup
 from home.src.index.channel import YoutubeChannel
 from home.src.index.filesystem import (
-    ManualImport,
+    ImportFolderScanner,
    reindex_old_documents,
    scan_filesystem,
 )
@ -150,10 +150,7 @@ def run_manual_import():
    try:
        have_lock = my_lock.acquire(blocking=False)
        if have_lock:
-            import_handler = ManualImport()
-            if import_handler.identified:
-                all_videos_added = import_handler.process_import()
-                ThumbManager().download_vid(all_videos_added)
+            ImportFolderScanner().scan()
        else:
            print("Did not acquire lock form import.")

@ -204,21 +201,19 @@ def kill_dl(task_id):
 def rescan_filesystem():
    """check the media folder for mismatches"""
    scan_filesystem()
-    validate_thumbnails()
+    ThumbValidator().download_missing()


@shared_task(name="thumbnail_check")
 def thumbnail_check():
    """validate thumbnails"""
-    validate_thumbnails()
+    ThumbValidator().download_missing()


@shared_task
 def re_sync_thumbs():
    """sync thumbnails to mediafiles"""
-    handler = ThumbManager()
-    video_list = handler.get_thumb_list()
-    handler.write_all_thumbs(video_list)
+    ThumbFilesystem().sync()


@shared_task
@ -229,9 +224,7 @@ def subscribe_to(url_str):
    for item in to_subscribe_list:
        to_sub_id = item["url"]
        if item["type"] == "playlist":
-            new_thumbs = PlaylistSubscription().process_url_str([item])
-            if new_thumbs:
-                ThumbManager().download_playlist(new_thumbs)
+            PlaylistSubscription().process_url_str([item])
            continue

        if item["type"] == "video":
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@ -1,6 +1,7 @@
 beautifulsoup4==4.11.1
 celery==5.2.7
-Django==4.0.6
+Django==4.1
+django-auth-ldap==4.1.0
 django-cors-headers==3.13.0
 djangorestframework==3.13.1
 Pillow==9.2.0
@ -9,5 +10,4 @@ requests==2.28.1
 ryd-client==0.0.3
 uWSGI==2.0.20
 whitenoise==6.2.0
-yt_dlp==2022.7.18
-django-auth-ldap==4.1.0
+yt_dlp==2022.8.8