major refactor, #build

Changes: - merges new restructured and split up modules - merges refactor channel, video, playlist index classes - merges code clean up and readability improvements
2025-07-14 13:08:18 +00:00 · 2022-01-27 16:08:16 +07:00 · 2022-01-27 16:08:16 +07:00 · 8591c44ef2
commit 8591c44ef2
parent 26bc2d2af4 2b2ff814e3
45 changed files with 3080 additions and 2981 deletions
--- a/.github/workflows/lint_python.yml
+++ b/.github/workflows/lint_python.yml
@ -9,7 +9,6 @@ jobs:
      - run: pip install --upgrade pip wheel
      - run: pip install bandit black codespell flake8 flake8-bugbear
                         flake8-comprehensions isort
      - run: bandit --recursive  --skip B105,B108,B404,B603,B607 .
      - run: black --check --diff --line-length 79 .
      - run: codespell
      - run: flake8 . --count --max-complexity=12 --max-line-length=79
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,6 @@ __pycache__
 # django testing db
 db.sqlite3
 # vscode custom conf
 .vscode
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,5 +0,0 @@
 {
    "python.linting.pylintEnabled": true,
    "python.linting.pycodestyleEnabled": false,
    "python.linting.enabled": true
 }
--- a/8
+++ b/8
@ -1,6 +1,6 @@
 # build the tube archivist image from default python slim image
-FROM python:3.10.1-slim-bullseye
+FROM python:3.10.2-slim-bullseye
 ARG TARGETPLATFORM
 ENV PYTHONUNBUFFERED 1
@ -35,12 +35,12 @@ COPY ./tubearchivist/requirements.txt /requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt --src /usr/local/src
 # copy config files
-COPY nginx.conf /etc/nginx/conf.d/
+COPY docker_assets/nginx.conf /etc/nginx/conf.d/
 # copy application into container
 COPY ./tubearchivist /app
-COPY ./run.sh /app
+COPY ./docker_assets/run.sh /app
-COPY ./uwsgi.ini /app
+COPY ./docker_assets/uwsgi.ini /app
 # volumes
 VOLUME /cache
--- a/deploy.sh
+++ b/deploy.sh
@ -85,9 +85,7 @@ function validate {
    fi
    echo "run validate on $check_path"
-    
+
    echo "running bandit"
    bandit --recursive --skip B105,B108,B404,B603,B607 "$check_path"
    echo "running black"
    black --diff --color --check -l 79 "$check_path"
    echo "running codespell"
--- a/docker_assets/nginx.conf
+++ b/docker_assets/nginx.conf
--- a/docker_assets/run.sh
+++ b/docker_assets/run.sh
@ -1,11 +1,6 @@
 #!/bin/bash
 # startup script inside the container for tubearchivist
 # check environment
 if [[ -z "$DJANGO_DEBUG" ]]; then
    export DJANGO_DEBUG=False
 fi
 if [[ -z "$ELASTIC_USER" ]]; then
    export ELASTIC_USER=elastic
 fi
--- a/docker_assets/uwsgi.ini
+++ b/docker_assets/uwsgi.ini
--- a/tubearchivist/api/views.py
+++ b/tubearchivist/api/views.py
@ -1,9 +1,9 @@
 """all API views"""
 import requests
-from home.src.config import AppConfig
+from home.src.download.thumbnails import ThumbManager
-from home.src.helper import UrlListParser
+from home.src.ta.config import AppConfig
-from home.src.thumbnails import ThumbManager
+from home.src.ta.helper import UrlListParser
 from home.tasks import extrac_dl, subscribe_to
 from rest_framework.authentication import (
    SessionAuthentication,
--- a/tubearchivist/config/settings.py
+++ b/tubearchivist/config/settings.py
@ -15,7 +15,7 @@ from os import environ, path
 from pathlib import Path
 from corsheaders.defaults import default_headers
-from home.src.config import AppConfig
+from home.src.ta.config import AppConfig
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
 BASE_DIR = Path(__file__).resolve().parent.parent
--- a/tubearchivist/home/apps.py
+++ b/tubearchivist/home/apps.py
@ -3,9 +3,9 @@
 import os
 from django.apps import AppConfig
-from home.src.config import AppConfig as ArchivistConfig
+from home.src.es.index_setup import index_check
-from home.src.helper import RedisArchivist
+from home.src.ta.config import AppConfig as ArchivistConfig
-from home.src.index_management import index_check
+from home.src.ta.ta_redis import RedisArchivist
 def sync_redis_state():
--- a/tubearchivist/home/src/download.py
+++ b/tubearchivist/home/src/download.py
@ -1,802 +0,0 @@
 """
 Functionality:
 - handele the download queue
 - manage subscriptions to channels
 - manage subscriptions to playlists
 - downloading videos
 """
 import json
 import os
 import shutil
 from datetime import datetime
 from time import sleep
 import requests
 import yt_dlp
 from home.src.config import AppConfig
 from home.src.helper import (
    DurationConverter,
    RedisArchivist,
    RedisQueue,
    clean_string,
    ignore_filelist,
 )
 from home.src.index import (
    IndexPaginate,
    YoutubeChannel,
    YoutubePlaylist,
    YoutubeVideo,
    index_new_video,
 )
 class PendingList:
    """manage the pending videos list"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    VIDEOS = CONFIG["application"]["videos"]
    def __init__(self):
        self.all_channel_ids = False
        self.all_downloaded = False
        self.missing_from_playlists = []
    def parse_url_list(self, youtube_ids):
        """extract youtube ids from list"""
        missing_videos = []
        for entry in youtube_ids:
            # notify
            mess_dict = {
                "status": "message:add",
                "level": "info",
                "title": "Adding to download queue.",
                "message": "Extracting lists",
            }
            RedisArchivist().set_message("message:add", mess_dict)
            # extract
            url = entry["url"]
            url_type = entry["type"]
            if url_type == "video":
                missing_videos.append(url)
            elif url_type == "channel":
                video_results = ChannelSubscription().get_last_youtube_videos(
                    url, limit=False
                )
                youtube_ids = [i[0] for i in video_results]
                missing_videos = missing_videos + youtube_ids
            elif url_type == "playlist":
                self.missing_from_playlists.append(entry)
                video_results = YoutubePlaylist(url).get_entries()
                youtube_ids = [i["youtube_id"] for i in video_results]
                missing_videos = missing_videos + youtube_ids
        return missing_videos
    def add_to_pending(self, missing_videos, ignore=False):
        """build the bulk json data from pending"""
        # check if channel is indexed
        channel_handler = ChannelSubscription()
        all_indexed = channel_handler.get_channels(subscribed_only=False)
        self.all_channel_ids = [i["channel_id"] for i in all_indexed]
        # check if already there
        self.all_downloaded = self.get_all_downloaded()
        bulk_list, all_videos_added = self.build_bulk(missing_videos, ignore)
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        request = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request)
            raise ValueError("failed to add video to download queue")
        return all_videos_added
    def build_bulk(self, missing_videos, ignore=False):
        """build the bulk lists"""
        bulk_list = []
        all_videos_added = []
        for idx, youtube_id in enumerate(missing_videos):
            # check if already downloaded
            if youtube_id in self.all_downloaded:
                continue
            video = self.get_youtube_details(youtube_id)
            # skip on download error
            if not video:
                continue
            channel_indexed = video["channel_id"] in self.all_channel_ids
            video["channel_indexed"] = channel_indexed
            if ignore:
                video["status"] = "ignore"
            else:
                video["status"] = "pending"
            action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(video))
            all_videos_added.append((youtube_id, video["vid_thumb_url"]))
            # notify
            progress = f"{idx + 1}/{len(missing_videos)}"
            mess_dict = {
                "status": "message:add",
                "level": "info",
                "title": "Adding new videos to download queue.",
                "message": "Progress: " + progress,
            }
            if idx + 1 == len(missing_videos):
                RedisArchivist().set_message(
                    "message:add", mess_dict, expire=4
                )
            else:
                RedisArchivist().set_message("message:add", mess_dict)
            if idx + 1 % 25 == 0:
                print("adding to queue progress: " + progress)
        return bulk_list, all_videos_added
    @staticmethod
    def get_youtube_details(youtube_id):
        """get details from youtubedl for single pending video"""
        obs = {
            "default_search": "ytsearch",
            "quiet": True,
            "check_formats": "selected",
            "noplaylist": True,
            "writethumbnail": True,
            "simulate": True,
        }
        try:
            vid = yt_dlp.YoutubeDL(obs).extract_info(youtube_id)
        except yt_dlp.utils.DownloadError:
            print("failed to extract info for: " + youtube_id)
            return False
        # stop if video is streaming live now
        if vid["is_live"]:
            return False
        # parse response
        seconds = vid["duration"]
        duration_str = DurationConverter.get_str(seconds)
        if duration_str == "NA":
            print(f"skip extracting duration for: {youtube_id}")
        upload_date = vid["upload_date"]
        upload_dt = datetime.strptime(upload_date, "%Y%m%d")
        published = upload_dt.strftime("%Y-%m-%d")
        # build dict
        youtube_details = {
            "youtube_id": youtube_id,
            "channel_name": vid["channel"],
            "vid_thumb_url": vid["thumbnail"],
            "title": vid["title"],
            "channel_id": vid["channel_id"],
            "duration": duration_str,
            "published": published,
            "timestamp": int(datetime.now().strftime("%s")),
        }
        return youtube_details
    @staticmethod
    def get_all_pending():
        """get a list of all pending videos in ta_download"""
        data = {
            "query": {"match_all": {}},
            "sort": [{"timestamp": {"order": "asc"}}],
        }
        all_results = IndexPaginate("ta_download", data).get_results()
        all_pending = []
        all_ignore = []
        for result in all_results:
            if result["status"] == "pending":
                all_pending.append(result)
            elif result["status"] == "ignore":
                all_ignore.append(result)
        return all_pending, all_ignore
    @staticmethod
    def get_all_indexed():
        """get a list of all videos indexed"""
        data = {
            "query": {"match_all": {}},
            "sort": [{"published": {"order": "desc"}}],
        }
        all_indexed = IndexPaginate("ta_video", data).get_results()
        return all_indexed
    def get_all_downloaded(self):
        """get a list of all videos in archive"""
        channel_folders = os.listdir(self.VIDEOS)
        all_channel_folders = ignore_filelist(channel_folders)
        all_downloaded = []
        for channel_folder in all_channel_folders:
            channel_path = os.path.join(self.VIDEOS, channel_folder)
            videos = os.listdir(channel_path)
            all_videos = ignore_filelist(videos)
            youtube_vids = [i[9:20] for i in all_videos]
            for youtube_id in youtube_vids:
                all_downloaded.append(youtube_id)
        return all_downloaded
    def delete_from_pending(self, youtube_id):
        """delete the youtube_id from ta_download"""
        url = f"{self.ES_URL}/ta_download/_doc/{youtube_id}"
        response = requests.delete(url, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
    def delete_pending(self, status):
        """delete download queue based on status value"""
        data = {"query": {"term": {"status": {"value": status}}}}
        payload = json.dumps(data)
        url = self.ES_URL + "/ta_download/_delete_by_query"
        headers = {"Content-type": "application/json"}
        response = requests.post(
            url, data=payload, headers=headers, auth=self.ES_AUTH
        )
        if not response.ok:
            print(response.text)
    def ignore_from_pending(self, ignore_list):
        """build the bulk query string"""
        stamp = int(datetime.now().strftime("%s"))
        bulk_list = []
        for youtube_id in ignore_list:
            action = {"update": {"_id": youtube_id, "_index": "ta_download"}}
            source = {"doc": {"status": "ignore", "timestamp": stamp}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(source))
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        request = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request)
            raise ValueError("failed to set video to ignore")
 class ChannelSubscription:
    """manage the list of channels subscribed"""
    def __init__(self):
        config = AppConfig().config
        self.es_url = config["application"]["es_url"]
        self.es_auth = config["application"]["es_auth"]
        self.channel_size = config["subscriptions"]["channel_size"]
    @staticmethod
    def get_channels(subscribed_only=True):
        """get a list of all channels subscribed to"""
        data = {
            "sort": [{"channel_name.keyword": {"order": "asc"}}],
        }
        if subscribed_only:
            data["query"] = {"term": {"channel_subscribed": {"value": True}}}
        else:
            data["query"] = {"match_all": {}}
        all_channels = IndexPaginate("ta_channel", data).get_results()
        return all_channels
    def get_last_youtube_videos(self, channel_id, limit=True):
        """get a list of last videos from channel"""
        url = f"https://www.youtube.com/channel/{channel_id}/videos"
        obs = {
            "default_search": "ytsearch",
            "quiet": True,
            "skip_download": True,
            "extract_flat": True,
        }
        if limit:
            obs["playlistend"] = self.channel_size
        chan = yt_dlp.YoutubeDL(obs).extract_info(url, download=False)
        last_videos = [(i["id"], i["title"]) for i in chan["entries"]]
        return last_videos
    def find_missing(self):
        """add missing videos from subscribed channels to pending"""
        all_channels = self.get_channels()
        pending_handler = PendingList()
        all_pending, all_ignore = pending_handler.get_all_pending()
        all_ids = [i["youtube_id"] for i in all_ignore + all_pending]
        all_downloaded = pending_handler.get_all_downloaded()
        to_ignore = all_ids + all_downloaded
        missing_videos = []
        for idx, channel in enumerate(all_channels):
            channel_id = channel["channel_id"]
            last_videos = self.get_last_youtube_videos(channel_id)
            for video in last_videos:
                if video[0] not in to_ignore:
                    missing_videos.append(video[0])
            # notify
            message = {
                "status": "message:rescan",
                "level": "info",
                "title": "Scanning channels: Looking for new videos.",
                "message": f"Progress: {idx + 1}/{len(all_channels)}",
            }
            if idx + 1 == len(all_channels):
                RedisArchivist().set_message(
                    "message:rescan", message=message, expire=4
                )
            else:
                RedisArchivist().set_message("message:rescan", message=message)
        return missing_videos
    def change_subscribe(self, channel_id, channel_subscribed):
        """subscribe or unsubscribe from channel and update"""
        if not isinstance(channel_subscribed, bool):
            print("invalid status, should be bool")
            return
        headers = {"Content-type": "application/json"}
        channel_handler = YoutubeChannel(channel_id)
        channel_dict = channel_handler.channel_dict
        channel_dict["channel_subscribed"] = channel_subscribed
        if channel_subscribed:
            # handle subscribe
            url = self.es_url + "/ta_channel/_doc/" + channel_id
            payload = json.dumps(channel_dict)
            print(channel_dict)
        else:
            url = self.es_url + "/ta_channel/_update/" + channel_id
            payload = json.dumps({"doc": channel_dict})
        # update channel
        request = requests.post(
            url, data=payload, headers=headers, auth=self.es_auth
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed change subscribe status")
        # sync to videos
        channel_handler.sync_to_videos()
        if channel_handler.source == "scraped":
            channel_handler.get_channel_art()
 class PlaylistSubscription:
    """manage the playlist download functionality"""
    def __init__(self):
        self.config = AppConfig().config
    @staticmethod
    def get_playlists(subscribed_only=True):
        """get a list of all active playlists"""
        data = {
            "sort": [{"playlist_channel.keyword": {"order": "desc"}}],
        }
        data["query"] = {
            "bool": {"must": [{"term": {"playlist_active": {"value": True}}}]}
        }
        if subscribed_only:
            data["query"]["bool"]["must"].append(
                {"term": {"playlist_subscribed": {"value": True}}}
            )
        all_playlists = IndexPaginate("ta_playlist", data).get_results()
        return all_playlists
    def process_url_str(self, new_playlists, subscribed=True):
        """process playlist subscribe form url_str"""
        all_indexed = PendingList().get_all_indexed()
        all_youtube_ids = [i["youtube_id"] for i in all_indexed]
        new_thumbs = []
        for idx, playlist in enumerate(new_playlists):
            url_type = playlist["type"]
            playlist_id = playlist["url"]
            if not url_type == "playlist":
                print(f"{playlist_id} not a playlist, skipping...")
                continue
            playlist_h = YoutubePlaylist(
                playlist_id, all_youtube_ids=all_youtube_ids
            )
            if not playlist_h.get_es_playlist():
                playlist_h.get_playlist_dict()
                playlist_h.playlist_dict["playlist_subscribed"] = subscribed
                playlist_h.upload_to_es()
                playlist_h.add_vids_to_playlist()
                thumb = playlist_h.playlist_dict["playlist_thumbnail"]
                new_thumbs.append((playlist_id, thumb))
                self.channel_validate(playlist_h)
            else:
                self.change_subscribe(playlist_id, subscribe_status=True)
            # notify
            message = {
                "status": "message:subplaylist",
                "level": "info",
                "title": "Subscribing to Playlists",
                "message": f"Processing {idx + 1} of {len(new_playlists)}",
            }
            RedisArchivist().set_message(
                "message:subplaylist", message=message
            )
        return new_thumbs
    @staticmethod
    def channel_validate(playlist_handler):
        """make sure channel of playlist is there"""
        channel_id = playlist_handler.playlist_dict["playlist_channel_id"]
        channel_handler = YoutubeChannel(channel_id)
        if channel_handler.source == "scraped":
            channel_handler.channel_dict["channel_subscribed"] = False
            channel_handler.upload_to_es()
            channel_handler.get_channel_art()
    def change_subscribe(self, playlist_id, subscribe_status):
        """change the subscribe status of a playlist"""
        es_url = self.config["application"]["es_url"]
        es_auth = self.config["application"]["es_auth"]
        playlist_handler = YoutubePlaylist(playlist_id)
        playlist_handler.get_playlist_dict()
        subed_now = playlist_handler.playlist_dict["playlist_subscribed"]
        if subed_now == subscribe_status:
            # status already as expected, do nothing
            return False
        # update subscribed status
        headers = {"Content-type": "application/json"}
        url = f"{es_url}/ta_playlist/_update/{playlist_id}"
        payload = json.dumps(
            {"doc": {"playlist_subscribed": subscribe_status}}
        )
        response = requests.post(
            url, data=payload, headers=headers, auth=es_auth
        )
        if not response.ok:
            print(response.text)
            raise ValueError("failed to change subscribe status")
        return True
    @staticmethod
    def get_to_ignore():
        """get all youtube_ids already downloaded or ignored"""
        pending_handler = PendingList()
        all_pending, all_ignore = pending_handler.get_all_pending()
        all_ids = [i["youtube_id"] for i in all_ignore + all_pending]
        all_downloaded = pending_handler.get_all_downloaded()
        to_ignore = all_ids + all_downloaded
        return to_ignore
    def find_missing(self):
        """find videos in subscribed playlists not downloaded yet"""
        all_playlists = [i["playlist_id"] for i in self.get_playlists()]
        to_ignore = self.get_to_ignore()
        missing_videos = []
        counter = 1
        for playlist_id in all_playlists:
            size_limit = self.config["subscriptions"]["channel_size"]
            playlist_handler = YoutubePlaylist(playlist_id)
            playlist = playlist_handler.update_playlist()
            if not playlist:
                playlist_handler.deactivate()
                continue
            if size_limit:
                playlist_entries = playlist["playlist_entries"][:size_limit]
            else:
                playlist_entries = playlist["playlist_entries"]
            all_missing = [i for i in playlist_entries if not i["downloaded"]]
            message = {
                "status": "message:rescan",
                "level": "info",
                "title": "Scanning playlists: Looking for new videos.",
                "message": f"Progress: {counter}/{len(all_playlists)}",
            }
            RedisArchivist().set_message("message:rescan", message=message)
            for video in all_missing:
                youtube_id = video["youtube_id"]
                if youtube_id not in to_ignore:
                    missing_videos.append(youtube_id)
            counter = counter + 1
        return missing_videos
 class VideoDownloader:
    """
    handle the video download functionality
    if not initiated with list, take from queue
    """
    def __init__(self, youtube_id_list=False):
        self.youtube_id_list = youtube_id_list
        self.config = AppConfig().config
        self.channels = set()
    def run_queue(self):
        """setup download queue in redis loop until no more items"""
        queue = RedisQueue("dl_queue")
        limit_queue = self.config["downloads"]["limit_count"]
        if limit_queue:
            queue.trim(limit_queue - 1)
        while True:
            youtube_id = queue.get_next()
            if not youtube_id:
                break
            try:
                self.dl_single_vid(youtube_id)
            except yt_dlp.utils.DownloadError:
                print("failed to download " + youtube_id)
                continue
            vid_dict = index_new_video(youtube_id)
            self.channels.add(vid_dict["channel"]["channel_id"])
            self.move_to_archive(vid_dict)
            self.delete_from_pending(youtube_id)
        autodelete_days = self.config["downloads"]["autodelete_days"]
        if autodelete_days:
            print(f"auto delete older than {autodelete_days} days")
            self.auto_delete_watched(autodelete_days)
    @staticmethod
    def add_pending():
        """add pending videos to download queue"""
        mess_dict = {
            "status": "message:download",
            "level": "info",
            "title": "Looking for videos to download",
            "message": "Scanning your download queue.",
        }
        RedisArchivist().set_message("message:download", mess_dict)
        all_pending, _ = PendingList().get_all_pending()
        to_add = [i["youtube_id"] for i in all_pending]
        if not to_add:
            # there is nothing pending
            print("download queue is empty")
            mess_dict = {
                "status": "message:download",
                "level": "error",
                "title": "Download queue is empty",
                "message": "Add some videos to the queue first.",
            }
            RedisArchivist().set_message("message:download", mess_dict)
            return
        queue = RedisQueue("dl_queue")
        queue.add_list(to_add)
    @staticmethod
    def progress_hook(response):
        """process the progress_hooks from yt_dlp"""
        # title
        path = os.path.split(response["filename"])[-1][12:]
        filename = os.path.splitext(os.path.splitext(path)[0])[0]
        filename_clean = filename.replace("_", " ")
        title = "Downloading: " + filename_clean
        # message
        try:
            percent = response["_percent_str"]
            size = response["_total_bytes_str"]
            speed = response["_speed_str"]
            eta = response["_eta_str"]
            message = f"{percent} of {size} at {speed} - time left: {eta}"
        except KeyError:
            message = "processing"
        mess_dict = {
            "status": "message:download",
            "level": "info",
            "title": title,
            "message": message,
        }
        RedisArchivist().set_message("message:download", mess_dict)
    def build_obs(self):
        """build obs dictionary for yt-dlp"""
        obs = {
            "default_search": "ytsearch",
            "merge_output_format": "mp4",
            "restrictfilenames": True,
            "outtmpl": (
                self.config["application"]["cache_dir"]
                + "/download/"
                + self.config["application"]["file_template"]
            ),
            "progress_hooks": [self.progress_hook],
            "noprogress": True,
            "quiet": True,
            "continuedl": True,
            "retries": 3,
            "writethumbnail": False,
            "noplaylist": True,
            "check_formats": "selected",
        }
        if self.config["downloads"]["format"]:
            obs["format"] = self.config["downloads"]["format"]
        if self.config["downloads"]["limit_speed"]:
            obs["ratelimit"] = self.config["downloads"]["limit_speed"] * 1024
        throttle = self.config["downloads"]["throttledratelimit"]
        if throttle:
            obs["throttledratelimit"] = throttle * 1024
        postprocessors = []
        if self.config["downloads"]["add_metadata"]:
            postprocessors.append(
                {
                    "key": "FFmpegMetadata",
                    "add_chapters": True,
                    "add_metadata": True,
                }
            )
        if self.config["downloads"]["add_thumbnail"]:
            postprocessors.append(
                {
                    "key": "EmbedThumbnail",
                    "already_have_thumbnail": True,
                }
            )
            obs["writethumbnail"] = True
        obs["postprocessors"] = postprocessors
        return obs
    def dl_single_vid(self, youtube_id):
        """download single video"""
        dl_cache = self.config["application"]["cache_dir"] + "/download/"
        obs = self.build_obs()
        # check if already in cache to continue from there
        all_cached = ignore_filelist(os.listdir(dl_cache))
        for file_name in all_cached:
            if youtube_id in file_name:
                obs["outtmpl"] = os.path.join(dl_cache, file_name)
        with yt_dlp.YoutubeDL(obs) as ydl:
            try:
                ydl.download([youtube_id])
            except yt_dlp.utils.DownloadError:
                print("retry failed download: " + youtube_id)
                sleep(10)
                ydl.download([youtube_id])
        if obs["writethumbnail"]:
            # webp files don't get cleaned up automatically
            all_cached = ignore_filelist(os.listdir(dl_cache))
            to_clean = [i for i in all_cached if not i.endswith(".mp4")]
            for file_name in to_clean:
                file_path = os.path.join(dl_cache, file_name)
                os.remove(file_path)
    def move_to_archive(self, vid_dict):
        """move downloaded video from cache to archive"""
        videos = self.config["application"]["videos"]
        host_uid = self.config["application"]["HOST_UID"]
        host_gid = self.config["application"]["HOST_GID"]
        channel_name = clean_string(vid_dict["channel"]["channel_name"])
        # make archive folder with correct permissions
        new_folder = os.path.join(videos, channel_name)
        if not os.path.exists(new_folder):
            os.makedirs(new_folder)
            if host_uid and host_gid:
                os.chown(new_folder, host_uid, host_gid)
        # find real filename
        cache_dir = self.config["application"]["cache_dir"]
        all_cached = ignore_filelist(os.listdir(cache_dir + "/download/"))
        for file_str in all_cached:
            if vid_dict["youtube_id"] in file_str:
                old_file = file_str
        old_file_path = os.path.join(cache_dir, "download", old_file)
        new_file_path = os.path.join(videos, vid_dict["media_url"])
        # move media file and fix permission
        shutil.move(old_file_path, new_file_path)
        if host_uid and host_gid:
            os.chown(new_file_path, host_uid, host_gid)
    def delete_from_pending(self, youtube_id):
        """delete downloaded video from pending index if its there"""
        es_url = self.config["application"]["es_url"]
        es_auth = self.config["application"]["es_auth"]
        url = f"{es_url}/ta_download/_doc/{youtube_id}"
        response = requests.delete(url, auth=es_auth)
        if not response.ok and not response.status_code == 404:
            print(response.text)
    def add_subscribed_channels(self):
        """add all channels subscribed to refresh"""
        all_subscribed = PlaylistSubscription().get_playlists()
        if not all_subscribed:
            return
        channel_ids = [i["playlist_channel_id"] for i in all_subscribed]
        for channel_id in channel_ids:
            self.channels.add(channel_id)
        return
    def validate_playlists(self):
        """look for playlist needing to update"""
        print("sync playlists")
        self.add_subscribed_channels()
        all_indexed = PendingList().get_all_indexed()
        all_youtube_ids = [i["youtube_id"] for i in all_indexed]
        for id_c, channel_id in enumerate(self.channels):
            playlists = YoutubeChannel(channel_id).get_indexed_playlists()
            all_playlist_ids = [i["playlist_id"] for i in playlists]
            for id_p, playlist_id in enumerate(all_playlist_ids):
                playlist_handler = YoutubePlaylist(
                    playlist_id, all_youtube_ids=all_youtube_ids
                )
                playlist_dict = playlist_handler.update_playlist()
                if not playlist_dict:
                    playlist_handler.deactivate()
                    continue
                playlist_handler.add_vids_to_playlist()
                # notify
                title = (
                    "Processing playlists for channels: "
                    + f"{id_c + 1}/{len(self.channels)}"
                )
                message = f"Progress: {id_p + 1}/{len(all_playlist_ids)}"
                mess_dict = {
                    "status": "message:download",
                    "level": "info",
                    "title": title,
                    "message": message,
                }
                if id_p + 1 == len(all_playlist_ids):
                    RedisArchivist().set_message(
                        "message:download", mess_dict, expire=4
                    )
                else:
                    RedisArchivist().set_message("message:download", mess_dict)
    @staticmethod
    def auto_delete_watched(autodelete_days):
        """delete watched videos after x days"""
        now = int(datetime.now().strftime("%s"))
        now_lte = now - autodelete_days * 24 * 60 * 60
        data = {
            "query": {"range": {"player.watched_date": {"lte": now_lte}}},
            "sort": [{"player.watched_date": {"order": "asc"}}],
        }
        all_to_delete = IndexPaginate("ta_video", data).get_results()
        all_youtube_ids = [i["youtube_id"] for i in all_to_delete]
        if not all_youtube_ids:
            return
        for youtube_id in all_youtube_ids:
            print(f"autodelete {youtube_id}")
            YoutubeVideo(youtube_id).delete_media_file()
        print("add deleted to ignore list")
        pending_handler = PendingList()
        pending_handler.add_to_pending(all_youtube_ids, ignore=True)
--- a/tubearchivist/home/src/download/init.py
+++ b/tubearchivist/home/src/download/init.py
--- a/tubearchivist/home/src/download/queue.py
+++ b/tubearchivist/home/src/download/queue.py
@ -0,0 +1,263 @@
 """
 Functionality:
 - handle download queue
 - linked with ta_dowload index
 """
 import json
 import os
 from datetime import datetime
 import requests
 import yt_dlp
 from home.src.download.subscriptions import ChannelSubscription
 from home.src.es.connect import IndexPaginate
 from home.src.index.playlist import YoutubePlaylist
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import DurationConverter, ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist
 class PendingList:
    """manage the pending videos list"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    VIDEOS = CONFIG["application"]["videos"]
    def __init__(self):
        self.all_channel_ids = False
        self.all_downloaded = False
        self.missing_from_playlists = []
    def parse_url_list(self, youtube_ids):
        """extract youtube ids from list"""
        missing_videos = []
        for entry in youtube_ids:
            # notify
            mess_dict = {
                "status": "message:add",
                "level": "info",
                "title": "Adding to download queue.",
                "message": "Extracting lists",
            }
            RedisArchivist().set_message("message:add", mess_dict)
            # extract
            url = entry["url"]
            url_type = entry["type"]
            if url_type == "video":
                missing_videos.append(url)
            elif url_type == "channel":
                video_results = ChannelSubscription().get_last_youtube_videos(
                    url, limit=False
                )
                youtube_ids = [i[0] for i in video_results]
                missing_videos = missing_videos + youtube_ids
            elif url_type == "playlist":
                self.missing_from_playlists.append(entry)
                playlist = YoutubePlaylist(url)
                playlist.build_json()
                video_results = playlist.json_data.get("playlist_entries")
                youtube_ids = [i["youtube_id"] for i in video_results]
                missing_videos = missing_videos + youtube_ids
        return missing_videos
    def add_to_pending(self, missing_videos, ignore=False):
        """build the bulk json data from pending"""
        # check if channel is indexed
        channel_handler = ChannelSubscription()
        all_indexed = channel_handler.get_channels(subscribed_only=False)
        self.all_channel_ids = [i["channel_id"] for i in all_indexed]
        # check if already there
        self.all_downloaded = self.get_all_downloaded()
        bulk_list, all_videos_added = self.build_bulk(missing_videos, ignore)
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        request = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request)
            raise ValueError("failed to add video to download queue")
        return all_videos_added
    def build_bulk(self, missing_videos, ignore=False):
        """build the bulk lists"""
        bulk_list = []
        all_videos_added = []
        for idx, youtube_id in enumerate(missing_videos):
            # check if already downloaded
            if youtube_id in self.all_downloaded:
                continue
            video = self.get_youtube_details(youtube_id)
            # skip on download error
            if not video:
                continue
            channel_indexed = video["channel_id"] in self.all_channel_ids
            video["channel_indexed"] = channel_indexed
            if ignore:
                video["status"] = "ignore"
            else:
                video["status"] = "pending"
            action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(video))
            all_videos_added.append((youtube_id, video["vid_thumb_url"]))
            # notify
            progress = f"{idx + 1}/{len(missing_videos)}"
            mess_dict = {
                "status": "message:add",
                "level": "info",
                "title": "Adding new videos to download queue.",
                "message": "Progress: " + progress,
            }
            if idx + 1 == len(missing_videos):
                RedisArchivist().set_message(
                    "message:add", mess_dict, expire=4
                )
            else:
                RedisArchivist().set_message("message:add", mess_dict)
            if idx + 1 % 25 == 0:
                print("adding to queue progress: " + progress)
        return bulk_list, all_videos_added
    @staticmethod
    def get_youtube_details(youtube_id):
        """get details from youtubedl for single pending video"""
        obs = {
            "default_search": "ytsearch",
            "quiet": True,
            "check_formats": "selected",
            "noplaylist": True,
            "writethumbnail": True,
            "simulate": True,
        }
        try:
            vid = yt_dlp.YoutubeDL(obs).extract_info(youtube_id)
        except yt_dlp.utils.DownloadError:
            print("failed to extract info for: " + youtube_id)
            return False
        # stop if video is streaming live now
        if vid["is_live"]:
            return False
        # parse response
        seconds = vid["duration"]
        duration_str = DurationConverter.get_str(seconds)
        if duration_str == "NA":
            print(f"skip extracting duration for: {youtube_id}")
        upload_date = vid["upload_date"]
        upload_dt = datetime.strptime(upload_date, "%Y%m%d")
        published = upload_dt.strftime("%Y-%m-%d")
        # build dict
        youtube_details = {
            "youtube_id": youtube_id,
            "channel_name": vid["channel"],
            "vid_thumb_url": vid["thumbnail"],
            "title": vid["title"],
            "channel_id": vid["channel_id"],
            "duration": duration_str,
            "published": published,
            "timestamp": int(datetime.now().strftime("%s")),
        }
        return youtube_details
    @staticmethod
    def get_all_pending():
        """get a list of all pending videos in ta_download"""
        data = {
            "query": {"match_all": {}},
            "sort": [{"timestamp": {"order": "asc"}}],
        }
        all_results = IndexPaginate("ta_download", data).get_results()
        all_pending = []
        all_ignore = []
        for result in all_results:
            if result["status"] == "pending":
                all_pending.append(result)
            elif result["status"] == "ignore":
                all_ignore.append(result)
        return all_pending, all_ignore
    @staticmethod
    def get_all_indexed():
        """get a list of all videos indexed"""
        data = {
            "query": {"match_all": {}},
            "sort": [{"published": {"order": "desc"}}],
        }
        all_indexed = IndexPaginate("ta_video", data).get_results()
        return all_indexed
    def get_all_downloaded(self):
        """get a list of all videos in archive"""
        channel_folders = os.listdir(self.VIDEOS)
        all_channel_folders = ignore_filelist(channel_folders)
        all_downloaded = []
        for channel_folder in all_channel_folders:
            channel_path = os.path.join(self.VIDEOS, channel_folder)
            videos = os.listdir(channel_path)
            all_videos = ignore_filelist(videos)
            youtube_vids = [i[9:20] for i in all_videos]
            for youtube_id in youtube_vids:
                all_downloaded.append(youtube_id)
        return all_downloaded
    def delete_from_pending(self, youtube_id):
        """delete the youtube_id from ta_download"""
        url = f"{self.ES_URL}/ta_download/_doc/{youtube_id}"
        response = requests.delete(url, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
    def delete_pending(self, status):
        """delete download queue based on status value"""
        data = {"query": {"term": {"status": {"value": status}}}}
        payload = json.dumps(data)
        url = self.ES_URL + "/ta_download/_delete_by_query"
        headers = {"Content-type": "application/json"}
        response = requests.post(
            url, data=payload, headers=headers, auth=self.ES_AUTH
        )
        if not response.ok:
            print(response.text)
    def ignore_from_pending(self, ignore_list):
        """build the bulk query string"""
        stamp = int(datetime.now().strftime("%s"))
        bulk_list = []
        for youtube_id in ignore_list:
            action = {"update": {"_id": youtube_id, "_index": "ta_download"}}
            source = {"doc": {"status": "ignore", "timestamp": stamp}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(source))
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        request = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request)
            raise ValueError("failed to set video to ignore")
--- a/tubearchivist/home/src/download/subscriptions.py
+++ b/tubearchivist/home/src/download/subscriptions.py
@ -0,0 +1,214 @@
 """
 Functionality:
 - handle channel subscriptions
 - handle playlist subscriptions
 """
 import yt_dlp
 from home.src.download import queue  # partial import
 from home.src.es.connect import IndexPaginate
 from home.src.index.channel import YoutubeChannel
 from home.src.index.playlist import YoutubePlaylist
 from home.src.ta.config import AppConfig
 from home.src.ta.ta_redis import RedisArchivist
 class ChannelSubscription:
    """manage the list of channels subscribed"""
    def __init__(self):
        config = AppConfig().config
        self.es_url = config["application"]["es_url"]
        self.es_auth = config["application"]["es_auth"]
        self.channel_size = config["subscriptions"]["channel_size"]
    @staticmethod
    def get_channels(subscribed_only=True):
        """get a list of all channels subscribed to"""
        data = {
            "sort": [{"channel_name.keyword": {"order": "asc"}}],
        }
        if subscribed_only:
            data["query"] = {"term": {"channel_subscribed": {"value": True}}}
        else:
            data["query"] = {"match_all": {}}
        all_channels = IndexPaginate("ta_channel", data).get_results()
        return all_channels
    def get_last_youtube_videos(self, channel_id, limit=True):
        """get a list of last videos from channel"""
        url = f"https://www.youtube.com/channel/{channel_id}/videos"
        obs = {
            "default_search": "ytsearch",
            "quiet": True,
            "skip_download": True,
            "extract_flat": True,
        }
        if limit:
            obs["playlistend"] = self.channel_size
        chan = yt_dlp.YoutubeDL(obs).extract_info(url, download=False)
        last_videos = [(i["id"], i["title"]) for i in chan["entries"]]
        return last_videos
    def find_missing(self):
        """add missing videos from subscribed channels to pending"""
        all_channels = self.get_channels()
        pending_handler = queue.PendingList()
        all_pending, all_ignore = pending_handler.get_all_pending()
        all_ids = [i["youtube_id"] for i in all_ignore + all_pending]
        all_downloaded = pending_handler.get_all_downloaded()
        to_ignore = all_ids + all_downloaded
        missing_videos = []
        for idx, channel in enumerate(all_channels):
            channel_id = channel["channel_id"]
            last_videos = self.get_last_youtube_videos(channel_id)
            for video in last_videos:
                if video[0] not in to_ignore:
                    missing_videos.append(video[0])
            # notify
            message = {
                "status": "message:rescan",
                "level": "info",
                "title": "Scanning channels: Looking for new videos.",
                "message": f"Progress: {idx + 1}/{len(all_channels)}",
            }
            if idx + 1 == len(all_channels):
                RedisArchivist().set_message(
                    "message:rescan", message=message, expire=4
                )
            else:
                RedisArchivist().set_message("message:rescan", message=message)
        return missing_videos
    @staticmethod
    def change_subscribe(channel_id, channel_subscribed):
        """subscribe or unsubscribe from channel and update"""
        channel = YoutubeChannel(channel_id)
        channel.build_json()
        channel.json_data["channel_subscribed"] = channel_subscribed
        channel.upload_to_es()
        channel.sync_to_videos()
 class PlaylistSubscription:
    """manage the playlist download functionality"""
    def __init__(self):
        self.config = AppConfig().config
    @staticmethod
    def get_playlists(subscribed_only=True):
        """get a list of all active playlists"""
        data = {
            "sort": [{"playlist_channel.keyword": {"order": "desc"}}],
        }
        data["query"] = {
            "bool": {"must": [{"term": {"playlist_active": {"value": True}}}]}
        }
        if subscribed_only:
            data["query"]["bool"]["must"].append(
                {"term": {"playlist_subscribed": {"value": True}}}
            )
        all_playlists = IndexPaginate("ta_playlist", data).get_results()
        return all_playlists
    def process_url_str(self, new_playlists, subscribed=True):
        """process playlist subscribe form url_str"""
        all_indexed = queue.PendingList().get_all_indexed()
        all_youtube_ids = [i["youtube_id"] for i in all_indexed]
        new_thumbs = []
        for idx, playlist in enumerate(new_playlists):
            url_type = playlist["type"]
            playlist_id = playlist["url"]
            if not url_type == "playlist":
                print(f"{playlist_id} not a playlist, skipping...")
                continue
            playlist_h = YoutubePlaylist(playlist_id)
            playlist_h.all_youtube_ids = all_youtube_ids
            playlist_h.build_json()
            playlist_h.json_data["playlist_subscribed"] = subscribed
            playlist_h.upload_to_es()
            playlist_h.add_vids_to_playlist()
            self.channel_validate(playlist_h.json_data["playlist_channel_id"])
            thumb = playlist_h.json_data["playlist_thumbnail"]
            new_thumbs.append((playlist_id, thumb))
            # notify
            message = {
                "status": "message:subplaylist",
                "level": "info",
                "title": "Subscribing to Playlists",
                "message": f"Processing {idx + 1} of {len(new_playlists)}",
            }
            RedisArchivist().set_message(
                "message:subplaylist", message=message
            )
        return new_thumbs
    @staticmethod
    def channel_validate(channel_id):
        """make sure channel of playlist is there"""
        channel = YoutubeChannel(channel_id)
        channel.build_json()
    @staticmethod
    def change_subscribe(playlist_id, subscribe_status):
        """change the subscribe status of a playlist"""
        playlist = YoutubePlaylist(playlist_id)
        playlist.build_json()
        playlist.json_data["playlist_subscribed"] = subscribe_status
        playlist.upload_to_es()
    @staticmethod
    def get_to_ignore():
        """get all youtube_ids already downloaded or ignored"""
        pending_handler = queue.PendingList()
        all_pending, all_ignore = pending_handler.get_all_pending()
        all_ids = [i["youtube_id"] for i in all_ignore + all_pending]
        all_downloaded = pending_handler.get_all_downloaded()
        to_ignore = all_ids + all_downloaded
        return to_ignore
    def find_missing(self):
        """find videos in subscribed playlists not downloaded yet"""
        all_playlists = [i["playlist_id"] for i in self.get_playlists()]
        to_ignore = self.get_to_ignore()
        missing_videos = []
        for idx, playlist_id in enumerate(all_playlists):
            size_limit = self.config["subscriptions"]["channel_size"]
            playlist = YoutubePlaylist(playlist_id)
            playlist.update_playlist()
            if not playlist:
                playlist.deactivate()
                continue
            playlist_entries = playlist.json_data["playlist_entries"]
            if size_limit:
                del playlist_entries[size_limit:]
            all_missing = [i for i in playlist_entries if not i["downloaded"]]
            message = {
                "status": "message:rescan",
                "level": "info",
                "title": "Scanning playlists: Looking for new videos.",
                "message": f"Progress: {idx + 1}/{len(all_playlists)}",
            }
            RedisArchivist().set_message("message:rescan", message=message)
            for video in all_missing:
                youtube_id = video["youtube_id"]
                if youtube_id not in to_ignore:
                    missing_videos.append(youtube_id)
        return missing_videos
--- a/tubearchivist/home/src/download/thumbnails.py
+++ b/tubearchivist/home/src/download/thumbnails.py
@ -1,16 +1,19 @@
 """
 functionality:
 - handle download and caching for thumbnails
 - check for missing thumbnails
 """
 import os
 from collections import Counter
 from time import sleep
 import home.src.download as download
 import requests
-from home.src.config import AppConfig
+from home.src.download import queue  # partial import
-from home.src.helper import RedisArchivist, ignore_filelist
+from home.src.download import subscriptions  # partial import
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist
 from mutagen.mp4 import MP4, MP4Cover
 from PIL import Image
@ -55,8 +58,8 @@ class ThumbManager:
    def get_needed_thumbs(self, missing_only=False):
        """get a list of all missing thumbnails"""
        all_thumbs = self.get_all_thumbs()
-        all_indexed = download.PendingList().get_all_indexed()
+        all_indexed = queue.PendingList().get_all_indexed()
-        all_in_queue, all_ignored = download.PendingList().get_all_pending()
+        all_in_queue, all_ignored = queue.PendingList().get_all_pending()
        needed_thumbs = []
        for video in all_indexed:
@ -84,9 +87,8 @@ class ThumbManager:
        all_channel_art = os.listdir(self.CHANNEL_DIR)
        files = [i[0:24] for i in all_channel_art]
        cached_channel_ids = [k for (k, v) in Counter(files).items() if v > 1]
-        channels = download.ChannelSubscription().get_channels(
+        channel_sub = subscriptions.ChannelSubscription()
-            subscribed_only=False
+        channels = channel_sub.get_channels(subscribed_only=False)
        )
        missing_channels = []
        for channel in channels:
@ -104,10 +106,8 @@ class ThumbManager:
        """get all missing playlist artwork"""
        all_downloaded = ignore_filelist(os.listdir(self.PLAYLIST_DIR))
        all_ids_downloaded = [i.replace(".jpg", "") for i in all_downloaded]
-
+        playlist_sub = subscriptions.PlaylistSubscription()
-        playlists = download.PlaylistSubscription().get_playlists(
+        playlists = playlist_sub.get_playlists(subscribed_only=False)
            subscribed_only=False
        )
        missing_playlists = []
        for playlist in playlists:
@ -276,7 +276,7 @@ class ThumbManager:
    def get_thumb_list(self):
        """get list of mediafiles and matching thumbnails"""
-        all_indexed = download.PendingList().get_all_indexed()
+        all_indexed = queue.PendingList().get_all_indexed()
        video_list = []
        for video in all_indexed:
            youtube_id = video["youtube_id"]
--- a/tubearchivist/home/src/download/yt_dlp_handler.py
+++ b/tubearchivist/home/src/download/yt_dlp_handler.py
@ -0,0 +1,313 @@
 """
 functionality:
 - handle yt_dlp
 - build options and post processor
 - download video files
 - move to archive
 """
 import os
 import shutil
 from datetime import datetime
 from time import sleep
 import requests
 import yt_dlp
 from home.src.download.queue import PendingList
 from home.src.download.subscriptions import PlaylistSubscription
 from home.src.es.connect import IndexPaginate
 from home.src.index.channel import YoutubeChannel
 from home.src.index.playlist import YoutubePlaylist
 from home.src.index.video import YoutubeVideo, index_new_video
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import clean_string, ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist, RedisQueue
 class VideoDownloader:
    """
    handle the video download functionality
    if not initiated with list, take from queue
    """
    def __init__(self, youtube_id_list=False):
        self.obs = False
        self.youtube_id_list = youtube_id_list
        self.config = AppConfig().config
        self._build_obs()
        self.channels = set()
    def run_queue(self):
        """setup download queue in redis loop until no more items"""
        queue = RedisQueue("dl_queue")
        limit_queue = self.config["downloads"]["limit_count"]
        if limit_queue:
            queue.trim(limit_queue - 1)
        while True:
            youtube_id = queue.get_next()
            if not youtube_id:
                break
            try:
                self._dl_single_vid(youtube_id)
            except yt_dlp.utils.DownloadError:
                print("failed to download " + youtube_id)
                continue
            vid_dict = index_new_video(youtube_id)
            self.channels.add(vid_dict["channel"]["channel_id"])
            self.move_to_archive(vid_dict)
            self._delete_from_pending(youtube_id)
        autodelete_days = self.config["downloads"]["autodelete_days"]
        if autodelete_days:
            print(f"auto delete older than {autodelete_days} days")
            self.auto_delete_watched(autodelete_days)
    @staticmethod
    def add_pending():
        """add pending videos to download queue"""
        mess_dict = {
            "status": "message:download",
            "level": "info",
            "title": "Looking for videos to download",
            "message": "Scanning your download queue.",
        }
        RedisArchivist().set_message("message:download", mess_dict)
        all_pending, _ = PendingList().get_all_pending()
        to_add = [i["youtube_id"] for i in all_pending]
        if not to_add:
            # there is nothing pending
            print("download queue is empty")
            mess_dict = {
                "status": "message:download",
                "level": "error",
                "title": "Download queue is empty",
                "message": "Add some videos to the queue first.",
            }
            RedisArchivist().set_message("message:download", mess_dict)
            return
        queue = RedisQueue("dl_queue")
        queue.add_list(to_add)
    @staticmethod
    def _progress_hook(response):
        """process the progress_hooks from yt_dlp"""
        # title
        path = os.path.split(response["filename"])[-1][12:]
        filename = os.path.splitext(os.path.splitext(path)[0])[0]
        filename_clean = filename.replace("_", " ")
        title = "Downloading: " + filename_clean
        # message
        try:
            percent = response["_percent_str"]
            size = response["_total_bytes_str"]
            speed = response["_speed_str"]
            eta = response["_eta_str"]
            message = f"{percent} of {size} at {speed} - time left: {eta}"
        except KeyError:
            message = "processing"
        mess_dict = {
            "status": "message:download",
            "level": "info",
            "title": title,
            "message": message,
        }
        RedisArchivist().set_message("message:download", mess_dict)
    def _build_obs(self):
        """collection to build all obs passed to yt-dlp"""
        self._build_obs_basic()
        self._build_obs_user()
        self._build_obs_postprocessors()
    def _build_obs_basic(self):
        """initial obs"""
        self.obs = {
            "default_search": "ytsearch",
            "merge_output_format": "mp4",
            "restrictfilenames": True,
            "outtmpl": (
                self.config["application"]["cache_dir"]
                + "/download/"
                + self.config["application"]["file_template"]
            ),
            "progress_hooks": [self._progress_hook],
            "noprogress": True,
            "quiet": True,
            "continuedl": True,
            "retries": 3,
            "writethumbnail": False,
            "noplaylist": True,
            "check_formats": "selected",
        }
    def _build_obs_user(self):
        """build user customized options"""
        if self.config["downloads"]["format"]:
            self.obs["format"] = self.config["downloads"]["format"]
        if self.config["downloads"]["limit_speed"]:
            self.obs["ratelimit"] = (
                self.config["downloads"]["limit_speed"] * 1024
            )
        throttle = self.config["downloads"]["throttledratelimit"]
        if throttle:
            self.obs["throttledratelimit"] = throttle * 1024
    def _build_obs_postprocessors(self):
        """add postprocessor to obs"""
        postprocessors = []
        if self.config["downloads"]["add_metadata"]:
            postprocessors.append(
                {
                    "key": "FFmpegMetadata",
                    "add_chapters": True,
                    "add_metadata": True,
                }
            )
        if self.config["downloads"]["add_thumbnail"]:
            postprocessors.append(
                {
                    "key": "EmbedThumbnail",
                    "already_have_thumbnail": True,
                }
            )
            self.obs["writethumbnail"] = True
        self.obs["postprocessors"] = postprocessors
    def _dl_single_vid(self, youtube_id):
        """download single video"""
        dl_cache = self.config["application"]["cache_dir"] + "/download/"
        # check if already in cache to continue from there
        all_cached = ignore_filelist(os.listdir(dl_cache))
        for file_name in all_cached:
            if youtube_id in file_name:
                self.obs["outtmpl"] = os.path.join(dl_cache, file_name)
        with yt_dlp.YoutubeDL(self.obs) as ydl:
            try:
                ydl.download([youtube_id])
            except yt_dlp.utils.DownloadError:
                print("retry failed download: " + youtube_id)
                sleep(10)
                ydl.download([youtube_id])
        if self.obs["writethumbnail"]:
            # webp files don't get cleaned up automatically
            all_cached = ignore_filelist(os.listdir(dl_cache))
            to_clean = [i for i in all_cached if not i.endswith(".mp4")]
            for file_name in to_clean:
                file_path = os.path.join(dl_cache, file_name)
                os.remove(file_path)
    def move_to_archive(self, vid_dict):
        """move downloaded video from cache to archive"""
        videos = self.config["application"]["videos"]
        host_uid = self.config["application"]["HOST_UID"]
        host_gid = self.config["application"]["HOST_GID"]
        channel_name = clean_string(vid_dict["channel"]["channel_name"])
        # make archive folder with correct permissions
        new_folder = os.path.join(videos, channel_name)
        if not os.path.exists(new_folder):
            os.makedirs(new_folder)
            if host_uid and host_gid:
                os.chown(new_folder, host_uid, host_gid)
        # find real filename
        cache_dir = self.config["application"]["cache_dir"]
        all_cached = ignore_filelist(os.listdir(cache_dir + "/download/"))
        for file_str in all_cached:
            if vid_dict["youtube_id"] in file_str:
                old_file = file_str
        old_file_path = os.path.join(cache_dir, "download", old_file)
        new_file_path = os.path.join(videos, vid_dict["media_url"])
        # move media file and fix permission
        shutil.move(old_file_path, new_file_path)
        if host_uid and host_gid:
            os.chown(new_file_path, host_uid, host_gid)
    def _delete_from_pending(self, youtube_id):
        """delete downloaded video from pending index if its there"""
        es_url = self.config["application"]["es_url"]
        es_auth = self.config["application"]["es_auth"]
        url = f"{es_url}/ta_download/_doc/{youtube_id}"
        response = requests.delete(url, auth=es_auth)
        if not response.ok and not response.status_code == 404:
            print(response.text)
    def _add_subscribed_channels(self):
        """add all channels subscribed to refresh"""
        all_subscribed = PlaylistSubscription().get_playlists()
        if not all_subscribed:
            return
        channel_ids = [i["playlist_channel_id"] for i in all_subscribed]
        for channel_id in channel_ids:
            self.channels.add(channel_id)
        return
    def validate_playlists(self):
        """look for playlist needing to update"""
        print("sync playlists")
        self._add_subscribed_channels()
        all_indexed = PendingList().get_all_indexed()
        all_youtube_ids = [i["youtube_id"] for i in all_indexed]
        for id_c, channel_id in enumerate(self.channels):
            playlists = YoutubeChannel(channel_id).get_indexed_playlists()
            all_playlist_ids = [i["playlist_id"] for i in playlists]
            for id_p, playlist_id in enumerate(all_playlist_ids):
                playlist = YoutubePlaylist(playlist_id)
                playlist.all_youtube_ids = all_youtube_ids
                playlist.build_json(scrape=True)
                if not playlist.json_data:
                    playlist.deactivate()
                playlist.add_vids_to_playlist()
                playlist.upload_to_es()
                # notify
                title = (
                    "Processing playlists for channels: "
                    + f"{id_c + 1}/{len(self.channels)}"
                )
                message = f"Progress: {id_p + 1}/{len(all_playlist_ids)}"
                mess_dict = {
                    "status": "message:download",
                    "level": "info",
                    "title": title,
                    "message": message,
                }
                if id_p + 1 == len(all_playlist_ids):
                    RedisArchivist().set_message(
                        "message:download", mess_dict, expire=4
                    )
                else:
                    RedisArchivist().set_message("message:download", mess_dict)
    @staticmethod
    def auto_delete_watched(autodelete_days):
        """delete watched videos after x days"""
        now = int(datetime.now().strftime("%s"))
        now_lte = now - autodelete_days * 24 * 60 * 60
        data = {
            "query": {"range": {"player.watched_date": {"lte": now_lte}}},
            "sort": [{"player.watched_date": {"order": "asc"}}],
        }
        all_to_delete = IndexPaginate("ta_video", data).get_results()
        all_youtube_ids = [i["youtube_id"] for i in all_to_delete]
        if not all_youtube_ids:
            return
        for youtube_id in all_youtube_ids:
            print(f"autodelete {youtube_id}")
            YoutubeVideo(youtube_id).delete_media_file()
        print("add deleted to ignore list")
        pending_handler = PendingList()
        pending_handler.add_to_pending(all_youtube_ids, ignore=True)
--- a/tubearchivist/home/src/es/init.py
+++ b/tubearchivist/home/src/es/init.py
--- a/tubearchivist/home/src/es/connect.py
+++ b/tubearchivist/home/src/es/connect.py
@ -0,0 +1,148 @@
 """
 functionality:
 - wrapper around requests to call elastic search
 - reusable search_after to extract total index
 """
 import json
 import requests
 from home.src.ta.config import AppConfig
 class ElasticWrap:
    """makes all calls to elastic search
    returns response json and status code tuple
    """
    def __init__(self, path, config=False):
        self.url = False
        self.auth = False
        self.path = path
        self.config = config
        self._get_config()
    def _get_config(self):
        """add config if not passed"""
        if not self.config:
            self.config = AppConfig().config
        es_url = self.config["application"]["es_url"]
        self.auth = self.config["application"]["es_auth"]
        self.url = f"{es_url}/{self.path}"
    def get(self, data=False):
        """get data from es"""
        if data:
            response = requests.get(self.url, json=data, auth=self.auth)
        else:
            response = requests.get(self.url, auth=self.auth)
        if not response.ok:
            print(response.text)
        return response.json(), response.status_code
    def post(self, data=False, ndjson=False):
        """post data to es"""
        if ndjson:
            headers = {"Content-type": "application/x-ndjson"}
            payload = data
        else:
            headers = {"Content-type": "application/json"}
            payload = json.dumps(data)
        if data:
            response = requests.post(
                self.url, data=payload, headers=headers, auth=self.auth
            )
        else:
            response = requests.post(self.url, headers=headers, auth=self.auth)
        if not response.ok:
            print(response.text)
        return response.json(), response.status_code
    def put(self, data, refresh=False):
        """put data to es"""
        if refresh:
            self.url = f"{self.url}/?refresh=true"
        response = requests.put(f"{self.url}", json=data, auth=self.auth)
        if not response.ok:
            print(response.text)
            print(data)
            raise ValueError("failed to add item to index")
        return response.json(), response.status_code
    def delete(self, data=False):
        """delete document from es"""
        if data:
            response = requests.delete(self.url, json=data, auth=self.auth)
        else:
            response = requests.delete(self.url, auth=self.auth)
        if not response.ok:
            print(response.text)
        return response.json(), response.status_code
 class IndexPaginate:
    """use search_after to go through whole index"""
    DEFAULT_SIZE = 500
    def __init__(self, index_name, data, size=False):
        self.index_name = index_name
        self.data = data
        self.pit_id = False
        self.size = size
    def get_results(self):
        """get all results"""
        self.get_pit()
        self.validate_data()
        all_results = self.run_loop()
        self.clean_pit()
        return all_results
    def get_pit(self):
        """get pit for index"""
        path = f"{self.index_name}/_pit?keep_alive=10m"
        response, _ = ElasticWrap(path).post()
        self.pit_id = response["id"]
    def validate_data(self):
        """add pit and size to data"""
        if "sort" not in self.data.keys():
            print(self.data)
            raise ValueError("missing sort key in data")
        size = self.size or self.DEFAULT_SIZE
        self.data["size"] = size
        self.data["pit"] = {"id": self.pit_id, "keep_alive": "10m"}
    def run_loop(self):
        """loop through results until last hit"""
        all_results = []
        while True:
            response, _ = ElasticWrap("_search").get(data=self.data)
            all_hits = response["hits"]["hits"]
            if all_hits:
                for hit in all_hits:
                    source = hit["_source"]
                    search_after = hit["sort"]
                    all_results.append(source)
                # update search_after with last hit data
                self.data["search_after"] = search_after
            else:
                break
        return all_results
    def clean_pit(self):
        """delete pit from elastic search"""
        data = {"id": self.pit_id}
        ElasticWrap("_pit").delete(data=data)
--- a/tubearchivist/home/src/es/index_mapping.json
+++ b/tubearchivist/home/src/es/index_mapping.json
@ -0,0 +1,274 @@
 {
    "index_config": [{
            "index_name": "channel",
            "expected_map": {
                "channel_id": {
                    "type": "keyword"
                },
                "channel_name": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        },
                        "search_as_you_type": {
                            "type": "search_as_you_type",
                            "doc_values": false,
                            "max_shingle_size": 3
                        }
                    }
                },
                "channel_banner_url": {
                    "type": "keyword",
                    "index": false
                },
                "channel_thumb_url": {
                    "type": "keyword",
                    "index": false
                },
                "channel_description": {
                    "type": "text"
                },
                "channel_last_refresh": {
                    "type": "date",
                    "format": "epoch_second"
                }
            },
            "expected_set": {
                "analysis": {
                    "normalizer": {
                        "to_lower": {
                            "type": "custom",
                            "filter": ["lowercase"]
                        }
                    }
                },
                "number_of_replicas": "0"
            }
        },
        {
            "index_name": "video",
            "expected_map": {
                "vid_thumb_url": {
                    "type": "text",
                    "index": false
                },
                "date_downloaded": {
                    "type": "date"
                },
                "channel": {
                    "properties": {
                        "channel_id": {
                            "type": "keyword"
                        },
                        "channel_name": {
                            "type": "text",
                            "analyzer": "english",
                            "fields": {
                                "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256,
                                    "normalizer": "to_lower"
                                },
                                "search_as_you_type": {
                                    "type": "search_as_you_type",
                                    "doc_values": false,
                                    "max_shingle_size": 3
                                }
                            }
                        },
                        "channel_banner_url": {
                            "type": "keyword",
                            "index": false
                        },
                        "channel_thumb_url": {
                            "type": "keyword",
                            "index": false
                        },
                        "channel_description": {
                            "type": "text"
                        },
                        "channel_last_refresh": {
                            "type": "date",
                            "format": "epoch_second"
                        }
                    }
                },
                "description": {
                    "type": "text"
                },
                "media_url": {
                    "type": "keyword",
                    "index": false
                },
                "tags": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "title": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        },
                        "search_as_you_type": {
                            "type": "search_as_you_type",
                            "doc_values": false,
                            "max_shingle_size": 3
                        }
                    }
                },
                "vid_last_refresh": {
                    "type": "date"
                },
                "youtube_id": {
                    "type": "keyword"
                },
                "published": {
                    "type": "date"
                },
                "playlist": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        }
                    }
                }
            },
            "expected_set": {
                "analysis": {
                    "normalizer": {
                        "to_lower": {
                            "type": "custom",
                            "filter": ["lowercase"]
                        }
                    }
                },
                "number_of_replicas": "0"
            }
        },
        {
            "index_name": "download",
            "expected_map": {
                "timestamp": {
                    "type": "date"
                },
                "channel_id": {
                    "type": "keyword"
                },
                "channel_name": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        }
                    }
                },
                "status": {
                    "type": "keyword"
                },
                "title": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        }
                    }
                },
                "vid_thumb_url": {
                    "type": "keyword"
                },
                "youtube_id": {
                    "type": "keyword"
                }
            },
            "expected_set": {
                "analysis": {
                    "normalizer": {
                        "to_lower": {
                            "type": "custom",
                            "filter": ["lowercase"]
                        }
                    }
                },
                "number_of_replicas": "0"
            }
        },
        {
            "index_name": "playlist",
            "expected_map": {
                "playlist_id": {
                    "type": "keyword"
                },
                "playlist_description": {
                    "type": "text"
                },
                "playlist_name": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        },
                        "search_as_you_type": {
                            "type": "search_as_you_type",
                            "doc_values": false,
                            "max_shingle_size": 3
                        }
                    }
                },
                "playlist_channel": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                            "normalizer": "to_lower"
                        }
                    }
                },
                "playlist_channel_id": {
                    "type": "keyword"
                },
                "playlist_thumbnail": {
                    "type": "keyword"
                },
                "playlist_last_refresh": {
                    "type": "date"
                }
            },
            "expected_set": {
                "analysis": {
                    "normalizer": {
                        "to_lower": {
                            "type": "custom",
                            "filter": ["lowercase"]
                        }
                    }
                },
                "number_of_replicas": "0"
            }
        }
    ]
 }
--- a/tubearchivist/home/src/index_management.py
+++ b/tubearchivist/home/src/index_management.py
@ -1,9 +1,8 @@
 """
-Functionality:
+functionality:
- initial elastic search setup
+- setup elastic index at first start
- index configuration is represented in INDEX_CONFIG
+- verify and update index mapping and settings if needed
- index mapping and settings validation
+- backup and restore metadata
 - backup and restore
 """
 import json
@ -12,213 +11,8 @@ import zipfile
 from datetime import datetime
 import requests
-from home.src.config import AppConfig
+from home.src.ta.config import AppConfig
-from home.src.helper import ignore_filelist
+from home.src.ta.helper import ignore_filelist
 # expected mapping and settings
 INDEX_CONFIG = [
    {
        "index_name": "channel",
        "expected_map": {
            "channel_id": {
                "type": "keyword",
            },
            "channel_name": {
                "type": "text",
                "analyzer": "english",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    },
                    "search_as_you_type": {
                        "type": "search_as_you_type",
                        "doc_values": False,
                        "max_shingle_size": 3,
                    },
                },
            },
            "channel_banner_url": {"type": "keyword", "index": False},
            "channel_thumb_url": {"type": "keyword", "index": False},
            "channel_description": {"type": "text"},
            "channel_last_refresh": {"type": "date", "format": "epoch_second"},
        },
        "expected_set": {
            "analysis": {
                "normalizer": {
                    "to_lower": {"type": "custom", "filter": ["lowercase"]}
                }
            },
            "number_of_replicas": "0",
        },
    },
    {
        "index_name": "video",
        "expected_map": {
            "vid_thumb_url": {"type": "text", "index": False},
            "date_downloaded": {"type": "date"},
            "channel": {
                "properties": {
                    "channel_id": {
                        "type": "keyword",
                    },
                    "channel_name": {
                        "type": "text",
                        "analyzer": "english",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                                "normalizer": "to_lower",
                            },
                            "search_as_you_type": {
                                "type": "search_as_you_type",
                                "doc_values": False,
                                "max_shingle_size": 3,
                            },
                        },
                    },
                    "channel_banner_url": {"type": "keyword", "index": False},
                    "channel_thumb_url": {"type": "keyword", "index": False},
                    "channel_description": {"type": "text"},
                    "channel_last_refresh": {
                        "type": "date",
                        "format": "epoch_second",
                    },
                }
            },
            "description": {"type": "text"},
            "media_url": {"type": "keyword", "index": False},
            "tags": {
                "type": "text",
                "analyzer": "english",
                "fields": {
                    "keyword": {"type": "keyword", "ignore_above": 256}
                },
            },
            "title": {
                "type": "text",
                "analyzer": "english",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    },
                    "search_as_you_type": {
                        "type": "search_as_you_type",
                        "doc_values": False,
                        "max_shingle_size": 3,
                    },
                },
            },
            "vid_last_refresh": {"type": "date"},
            "youtube_id": {"type": "keyword"},
            "published": {"type": "date"},
            "playlist": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    }
                },
            },
        },
        "expected_set": {
            "analysis": {
                "normalizer": {
                    "to_lower": {"type": "custom", "filter": ["lowercase"]}
                }
            },
            "number_of_replicas": "0",
        },
    },
    {
        "index_name": "download",
        "expected_map": {
            "timestamp": {"type": "date"},
            "channel_id": {"type": "keyword"},
            "channel_name": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    }
                },
            },
            "status": {"type": "keyword"},
            "title": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    }
                },
            },
            "vid_thumb_url": {"type": "keyword"},
            "youtube_id": {"type": "keyword"},
        },
        "expected_set": {
            "analysis": {
                "normalizer": {
                    "to_lower": {"type": "custom", "filter": ["lowercase"]}
                }
            },
            "number_of_replicas": "0",
        },
    },
    {
        "index_name": "playlist",
        "expected_map": {
            "playlist_id": {"type": "keyword"},
            "playlist_description": {"type": "text"},
            "playlist_name": {
                "type": "text",
                "analyzer": "english",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    },
                    "search_as_you_type": {
                        "type": "search_as_you_type",
                        "doc_values": False,
                        "max_shingle_size": 3,
                    },
                },
            },
            "playlist_channel": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256,
                        "normalizer": "to_lower",
                    }
                },
            },
            "playlist_channel_id": {"type": "keyword"},
            "playlist_thumbnail": {"type": "keyword"},
            "playlist_last_refresh": {"type": "date"},
        },
        "expected_set": {
            "analysis": {
                "normalizer": {
                    "to_lower": {"type": "custom", "filter": ["lowercase"]}
                }
            },
            "number_of_replicas": "0",
        },
    },
 ]
 class ElasticIndex:
@ -602,48 +396,21 @@ class ElasticBackup:
            os.remove(file_path)
-def get_available_backups():
+def get_mapping():
-    """return dict of available backups for settings view"""
+    """read index_mapping.json and get expected mapping and settings"""
-    backup_handler = ElasticBackup(INDEX_CONFIG, reason=False)
+    with open("home/src/es/index_mapping.json", "r", encoding="utf-8") as f:
-    all_backup_files = backup_handler.get_all_backup_files()
+        index_config = json.load(f).get("index_config")
    return all_backup_files
-
+    return index_config
 def backup_all_indexes(reason):
    """backup all es indexes to disk"""
    backup_handler = ElasticBackup(INDEX_CONFIG, reason)
    for index in backup_handler.index_config:
        index_name = index["index_name"]
        if not backup_handler.index_exists(index_name):
            continue
        all_results = backup_handler.get_all_documents(index_name)
        file_content = backup_handler.build_bulk(all_results)
        backup_handler.write_es_json(file_content, index_name)
        backup_handler.write_ta_json(all_results, index_name)
    backup_handler.zip_it()
    if reason == "auto":
        backup_handler.rotate_backup()
 def restore_from_backup(filename):
    """restore indexes from backup file"""
    # delete
    index_check(force_restore=True)
    # recreate
    backup_handler = ElasticBackup(INDEX_CONFIG, reason=False)
    zip_content = backup_handler.unpack_zip_backup(filename)
    backup_handler.restore_json_files(zip_content)
 def index_check(force_restore=False):
    """check if all indexes are created and have correct mapping"""
    backed_up = False
    index_config = get_mapping()
-    for index in INDEX_CONFIG:
+    for index in index_config:
        index_name = index["index_name"]
        expected_map = index["expected_map"]
        expected_set = index["expected_set"]
@ -675,3 +442,42 @@ def index_check(force_restore=False):
        # else all good
        print(f"ta_{index_name} index is created and up to date...")
 def get_available_backups():
    """return dict of available backups for settings view"""
    index_config = get_mapping()
    backup_handler = ElasticBackup(index_config, reason=False)
    all_backup_files = backup_handler.get_all_backup_files()
    return all_backup_files
 def backup_all_indexes(reason):
    """backup all es indexes to disk"""
    index_config = get_mapping()
    backup_handler = ElasticBackup(index_config, reason)
    for index in backup_handler.index_config:
        index_name = index["index_name"]
        if not backup_handler.index_exists(index_name):
            continue
        all_results = backup_handler.get_all_documents(index_name)
        file_content = backup_handler.build_bulk(all_results)
        backup_handler.write_es_json(file_content, index_name)
        backup_handler.write_ta_json(all_results, index_name)
    backup_handler.zip_it()
    if reason == "auto":
        backup_handler.rotate_backup()
 def restore_from_backup(filename):
    """restore indexes from backup file"""
    # delete
    index_check(force_restore=True)
    # recreate
    index_config = get_mapping()
    backup_handler = ElasticBackup(index_config, reason=False)
    zip_content = backup_handler.unpack_zip_backup(filename)
    backup_handler.restore_json_files(zip_content)
--- a/tubearchivist/home/src/frontend/init.py
+++ b/tubearchivist/home/src/frontend/init.py
--- a/tubearchivist/home/src/frontend/api_calls.py
+++ b/tubearchivist/home/src/frontend/api_calls.py
@ -4,19 +4,18 @@ Functionality:
 - called via user input
 """
-from home.src.download import (
+from home.src.download.queue import PendingList
 from home.src.download.subscriptions import (
    ChannelSubscription,
    PendingList,
    PlaylistSubscription,
 )
-from home.src.helper import RedisArchivist, RedisQueue, UrlListParser
+from home.src.frontend.searching import SearchForm
-from home.src.index import (
+from home.src.frontend.watched import WatchState
-    WatchState,
+from home.src.index.channel import YoutubeChannel
-    YoutubeChannel,
+from home.src.index.playlist import YoutubePlaylist
-    YoutubePlaylist,
+from home.src.index.video import YoutubeVideo
-    YoutubeVideo,
+from home.src.ta.helper import UrlListParser
-)
+from home.src.ta.ta_redis import RedisArchivist, RedisQueue
 from home.src.searching import SearchForm
 from home.tasks import (
    download_pending,
    download_single,
@ -306,7 +305,7 @@ class PostData:
        playlist_dict = self.exec_val
        playlist_id = playlist_dict["playlist-id"]
        playlist_action = playlist_dict["playlist-action"]
-        print(f"delete {playlist_action} from playlist {playlist_id}")
+        print(f"{playlist_id}: delete playlist {playlist_action}")
        if playlist_action == "metadata":
            YoutubePlaylist(playlist_id).delete_metadata()
        elif playlist_action == "all":
--- a/tubearchivist/home/src/frontend/forms.py
+++ b/tubearchivist/home/src/frontend/forms.py
--- a/tubearchivist/home/src/frontend/searching.py
+++ b/tubearchivist/home/src/frontend/searching.py
@ -6,36 +6,26 @@ Functionality:
 - calculate pagination values
 """
 import math
 import urllib.parse
 from datetime import datetime
-import requests
+from home.src.download.thumbnails import ThumbManager
-from home.src.config import AppConfig
+from home.src.es.connect import ElasticWrap
-from home.src.helper import RedisArchivist
+from home.src.ta.config import AppConfig
 from home.src.thumbnails import ThumbManager
 class SearchHandler:
    """search elastic search"""
-    CONFIG = AppConfig().config
+    def __init__(self, path, config, data=False):
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    def __init__(self, url, data):
        self.max_hits = None
-        self.url = url
+        self.path = path
        self.config = config
        self.data = data
    def get_data(self):
        """get the data"""
-        if self.data:
+        response, _ = ElasticWrap(self.path, config=self.config).get(self.data)
            response = requests.get(
                self.url, json=self.data, auth=self.ES_AUTH
            ).json()
        else:
            response = requests.get(self.url, auth=self.ES_AUTH).json()
        if "hits" in response.keys():
            self.max_hits = response["hits"]["total"]["value"]
@ -153,11 +143,10 @@ class SearchForm:
    """build query from search form data"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    def multi_search(self, search_query):
        """searching through index"""
-        url = self.ES_URL + "/ta_video,ta_channel,ta_playlist/_search"
+        path = "ta_video,ta_channel,ta_playlist/_search"
        data = {
            "size": 30,
            "query": {
@ -184,7 +173,7 @@ class SearchForm:
                }
            },
        }
-        look_up = SearchHandler(url, data)
+        look_up = SearchHandler(path, config=self.CONFIG, data=data)
        search_results = look_up.get_data()
        all_results = self.build_results(search_results)
@ -212,62 +201,3 @@ class SearchForm:
        }
        return all_results
 class Pagination:
    """
    figure out the pagination based on page size and total_hits
    """
    def __init__(self, page_get, user_id, search_get=False):
        self.user_id = user_id
        self.page_size = self.get_page_size()
        self.page_get = page_get
        self.search_get = search_get
        self.pagination = self.first_guess()
    def get_page_size(self):
        """get default or user modified page_size"""
        key = f"{self.user_id}:page_size"
        page_size = RedisArchivist().get_message(key)["status"]
        if not page_size:
            config = AppConfig().config
            page_size = config["archive"]["page_size"]
        return page_size
    def first_guess(self):
        """build first guess before api call"""
        page_get = self.page_get
        if page_get in [0, 1]:
            page_from = 0
            prev_pages = False
        elif page_get > 1:
            page_from = (page_get - 1) * self.page_size
            prev_pages = [
                i for i in range(page_get - 1, page_get - 6, -1) if i > 1
            ]
            prev_pages.reverse()
        pagination = {
            "page_size": self.page_size,
            "page_from": page_from,
            "prev_pages": prev_pages,
            "current_page": page_get,
        }
        if self.search_get:
            pagination.update({"search_get": self.search_get})
        return pagination
    def validate(self, total_hits):
        """validate pagination with total_hits after making api call"""
        page_get = self.page_get
        max_pages = math.ceil(total_hits / self.page_size)
        if page_get < max_pages and max_pages > 1:
            self.pagination["last_page"] = max_pages
        else:
            self.pagination["last_page"] = False
        next_pages = [
            i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
        ]
        self.pagination["next_pages"] = next_pages
--- a/tubearchivist/home/src/frontend/watched.py
+++ b/tubearchivist/home/src/frontend/watched.py
@ -0,0 +1,128 @@
 """
 functionality:
 - handle watched state for videos, channels and playlists
 """
 import json
 from datetime import datetime
 import requests
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import UrlListParser
 class WatchState:
    """handle watched checkbox for videos and channels"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    HEADERS = {"Content-type": "application/json"}
    def __init__(self, youtube_id):
        self.youtube_id = youtube_id
        self.stamp = int(datetime.now().strftime("%s"))
    def mark_as_watched(self):
        """update es with new watched value"""
        url_type = self.dedect_type()
        if url_type == "video":
            self.mark_vid_watched()
        elif url_type == "channel":
            self.mark_channel_watched()
        elif url_type == "playlist":
            self.mark_playlist_watched()
        print(f"marked {self.youtube_id} as watched")
    def mark_as_unwatched(self):
        """revert watched state to false"""
        url_type = self.dedect_type()
        if url_type == "video":
            self.mark_vid_watched(revert=True)
        print(f"revert {self.youtube_id} as unwatched")
    def dedect_type(self):
        """find youtube id type"""
        print(self.youtube_id)
        url_process = UrlListParser(self.youtube_id).process_list()
        url_type = url_process[0]["type"]
        return url_type
    def mark_vid_watched(self, revert=False):
        """change watched status of single video"""
        url = self.ES_URL + "/ta_video/_update/" + self.youtube_id
        data = {
            "doc": {"player": {"watched": True, "watched_date": self.stamp}}
        }
        if revert:
            data["doc"]["player"]["watched"] = False
        payload = json.dumps(data)
        request = requests.post(
            url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed to mark video as watched")
    def mark_channel_watched(self):
        """change watched status of every video in channel"""
        data = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "channel.channel_id": {
                                    "value": self.youtube_id
                                }
                            }
                        },
                        {"term": {"player.watched": {"value": False}}},
                    ]
                }
            },
            "script": {
                "source": "ctx._source.player['watched'] = true",
                "lang": "painless",
            },
        }
        payload = json.dumps(data)
        url = f"{self.ES_URL}/ta_video/_update_by_query"
        request = requests.post(
            url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed mark channel as watched")
    def mark_playlist_watched(self):
        """change watched state of all videos in playlist"""
        data = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "playlist.keyword": {"value": self.youtube_id}
                            }
                        },
                        {"term": {"player.watched": {"value": False}}},
                    ]
                }
            },
            "script": {
                "source": "ctx._source.player['watched'] = true",
                "lang": "painless",
            },
        }
        payload = json.dumps(data)
        url = f"{self.ES_URL}/ta_video/_update_by_query"
        request = requests.post(
            url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed mark playlist as watched")
--- a/tubearchivist/home/src/index.py
+++ b/tubearchivist/home/src/index.py
@ -1,970 +0,0 @@
 """
 Functionality:
 - index new videos into elastisearch
 - extract video info with yt_dlp
 - scrape youtube channel page if needed
 """
 import json
 import os
 import re
 from datetime import datetime
 from time import sleep
 import requests
 import yt_dlp
 from bs4 import BeautifulSoup
 from home.src.config import AppConfig
 from home.src.helper import DurationConverter, UrlListParser, clean_string
 from home.src.thumbnails import ThumbManager
 from ryd_client import ryd_client
 class YoutubeChannel:
    """represents a single youtube channel"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    VIDEOS = CONFIG["application"]["videos"]
    def __init__(self, channel_id):
        self.channel_id = channel_id
        self.json_data = None
        self.source = None
        self.channel_dict = self.build_channel_dict()
    def build_channel_dict(self, scrape=False):
        """combine the dicts build from extracted json payload"""
        if scrape:
            channel_dict = False
        else:
            channel_dict = self.get_es_channel()
        if not channel_dict:
            print("scrape data from youtube")
            self.scrape_channel()
            channel_dict = self.parse_channel_main()
            channel_dict.update(self.parse_channel_meta())
            self.source = "scraped"
        return channel_dict
    def get_es_channel(self):
        """get from elastic search first if possible"""
        channel_id = self.channel_id
        url = f"{self.ES_URL}/ta_channel/_doc/{channel_id}"
        response = requests.get(url, auth=self.ES_AUTH)
        if response.ok:
            channel_source = response.json()["_source"]
            self.source = "elastic"
            return channel_source
        return False
    def scrape_channel(self):
        """scrape channel page for additional infos"""
        channel_id = self.channel_id
        url = f"https://www.youtube.com/channel/{channel_id}/about?hl=en"
        cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
        response = requests.get(url, cookies=cookies, auth=self.ES_AUTH)
        if response.ok:
            channel_page = response.text
        else:
            print(f"failed to extract channel info for: {channel_id}")
            raise ConnectionError
        soup = BeautifulSoup(channel_page, "html.parser")
        # load script into json
        all_scripts = soup.find("body").find_all("script")
        for script in all_scripts:
            if "var ytInitialData = " in str(script):
                script_content = str(script)
                break
        # extract payload
        script_content = script_content.split("var ytInitialData = ")[1]
        json_raw = script_content.rstrip(";</script>")
        json_data = json.loads(json_raw)
        # add to self
        self.json_data = json_data
    def parse_channel_main(self):
        """extract maintab values from scraped channel json data"""
        main_tab = self.json_data["header"]["c4TabbedHeaderRenderer"]
        channel_name = main_tab["title"]
        last_refresh = int(datetime.now().strftime("%s"))
        # channel_subs
        try:
            sub_text_simple = main_tab["subscriberCountText"]["simpleText"]
            sub_text = sub_text_simple.split(" ")[0]
            if sub_text[-1] == "K":
                channel_subs = int(float(sub_text.replace("K", "")) * 1000)
            elif sub_text[-1] == "M":
                channel_subs = int(float(sub_text.replace("M", "")) * 1000000)
            elif int(sub_text) >= 0:
                channel_subs = int(sub_text)
            else:
                message = f"{sub_text} not dealt with"
                print(message)
        except KeyError:
            channel_subs = 0
        # banner
        try:
            all_banners = main_tab["banner"]["thumbnails"]
            banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]
        except KeyError:
            banner = False
        # build and return dict
        main_channel_dict = {
            "channel_active": True,
            "channel_last_refresh": last_refresh,
            "channel_subs": channel_subs,
            "channel_banner_url": banner,
            "channel_name": channel_name,
            "channel_id": self.channel_id,
        }
        return main_channel_dict
    def parse_channel_meta(self):
        """extract meta tab values from channel payload"""
        # meta tab
        json_data = self.json_data
        meta_tab = json_data["metadata"]["channelMetadataRenderer"]
        description = meta_tab["description"]
        all_thumbs = meta_tab["avatar"]["thumbnails"]
        thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]
        # stats tab
        renderer = "twoColumnBrowseResultsRenderer"
        all_tabs = json_data["contents"][renderer]["tabs"]
        for tab in all_tabs:
            if "tabRenderer" in tab.keys():
                if tab["tabRenderer"]["title"] == "About":
                    about_tab = tab["tabRenderer"]["content"][
                        "sectionListRenderer"
                    ]["contents"][0]["itemSectionRenderer"]["contents"][0][
                        "channelAboutFullMetadataRenderer"
                    ]
                    break
        try:
            channel_views_text = about_tab["viewCountText"]["simpleText"]
            channel_views = int(re.sub(r"\D", "", channel_views_text))
        except KeyError:
            channel_views = 0
        meta_channel_dict = {
            "channel_description": description,
            "channel_thumb_url": thumb_url,
            "channel_views": channel_views,
        }
        return meta_channel_dict
    def get_channel_art(self):
        """download channel art for new channels"""
        channel_id = self.channel_id
        channel_thumb = self.channel_dict["channel_thumb_url"]
        channel_banner = self.channel_dict["channel_banner_url"]
        ThumbManager().download_chan(
            [(channel_id, channel_thumb, channel_banner)]
        )
    def upload_to_es(self):
        """upload channel data to elastic search"""
        url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}"
        response = requests.put(url, json=self.channel_dict, auth=self.ES_AUTH)
        print(f"added {self.channel_id} to es")
        if not response.ok:
            print(response.text)
            raise ValueError("failed to add channel to index")
    def sync_to_videos(self):
        """sync new channel_dict to all videos of channel"""
        headers = {"Content-type": "application/json"}
        channel_id = self.channel_id
        # add ingest pipeline
        processors = []
        for field, value in self.channel_dict.items():
            line = {"set": {"field": "channel." + field, "value": value}}
            processors.append(line)
        data = {"description": channel_id, "processors": processors}
        payload = json.dumps(data)
        url = self.ES_URL + "/_ingest/pipeline/" + channel_id
        request = requests.put(
            url, data=payload, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
        # apply pipeline
        data = {"query": {"match": {"channel.channel_id": channel_id}}}
        payload = json.dumps(data)
        url = self.ES_URL + "/ta_video/_update_by_query?pipeline=" + channel_id
        request = requests.post(
            url, data=payload, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
    def get_folder_path(self):
        """get folder where media files get stored"""
        channel_name = self.channel_dict["channel_name"]
        folder_name = clean_string(channel_name)
        folder_path = os.path.join(self.VIDEOS, folder_name)
        return folder_path
    def delete_es_videos(self):
        """delete all channel documents from elasticsearch"""
        headers = {"Content-type": "application/json"}
        data = {
            "query": {
                "term": {"channel.channel_id": {"value": self.channel_id}}
            }
        }
        payload = json.dumps(data)
        url = self.ES_URL + "/ta_video/_delete_by_query"
        response = requests.post(
            url, data=payload, headers=headers, auth=self.ES_AUTH
        )
        if not response.ok:
            print(response.text)
    def delete_playlists(self):
        """delete all indexed playlist from es"""
        all_playlists = self.get_indexed_playlists()
        for playlist in all_playlists:
            playlist_id = playlist["playlist_id"]
            YoutubePlaylist(playlist_id).delete_metadata()
    def delete_channel(self):
        """delete channel and all videos"""
        print(f"deleting {self.channel_id} and all matching media files")
        folder_path = self.get_folder_path()
        print("delete all media files")
        try:
            all_videos = os.listdir(folder_path)
            for video in all_videos:
                video_path = os.path.join(folder_path, video)
                os.remove(video_path)
            os.rmdir(folder_path)
        except FileNotFoundError:
            print(f"no videos found for {folder_path}")
        ThumbManager().delete_chan_thumb(self.channel_id)
        print("delete indexed playlists")
        self.delete_playlists()
        print("delete indexed videos")
        self.delete_es_videos()
        url = self.ES_URL + "/ta_channel/_doc/" + self.channel_id
        response = requests.delete(url, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
    def get_all_playlists(self):
        """get all playlists owned by this channel"""
        url = (
            f"https://www.youtube.com/channel/{self.channel_id}"
            + "/playlists?view=1&sort=dd&shelf_id=0"
        )
        obs = {
            "quiet": True,
            "skip_download": True,
            "extract_flat": True,
        }
        playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
        all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
        return all_entries
    def get_indexed_playlists(self):
        """get all indexed playlists from channel"""
        data = {
            "query": {
                "term": {"playlist_channel_id": {"value": self.channel_id}}
            },
            "sort": [{"playlist_channel.keyword": {"order": "desc"}}],
        }
        all_playlists = IndexPaginate("ta_playlist", data).get_results()
        return all_playlists
 class YoutubeVideo:
    """represents a single youtube video"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    VIDEOS = CONFIG["application"]["videos"]
    def __init__(self, youtube_id):
        self.youtube_id = youtube_id
        self.channel_id = None
        self.vid_dict = None
    def get_vid_dict(self):
        """wrapper to loop around yt_dlp to retry on failure"""
        print(f"get video data for {self.youtube_id}")
        vid_dict = False
        for i in range(3):
            try:
                vid_dict = self.get_youtubedl_vid_data()
            except KeyError as e:
                print(e)
                sleep((i + 1) ** 2)
                continue
            else:
                break
        self.vid_dict = vid_dict
        if self.CONFIG["downloads"]["integrate_ryd"]:
            self.get_ryd_stats()
    def get_youtubedl_vid_data(self):
        """parse youtubedl extract info"""
        youtube_id = self.youtube_id
        obs = {
            "quiet": True,
            "default_search": "ytsearch",
            "skip_download": True,
            "check_formats": "selected",
            "noplaylist": True,
        }
        try:
            vid = yt_dlp.YoutubeDL(obs).extract_info(youtube_id)
        except (
            yt_dlp.utils.ExtractorError,
            yt_dlp.utils.DownloadError,
        ):
            print("failed to get info for " + youtube_id)
            return False
        # extract
        self.channel_id = vid["channel_id"]
        upload_date = vid["upload_date"]
        upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
        published = upload_date_time.strftime("%Y-%m-%d")
        last_refresh = int(datetime.now().strftime("%s"))
        # likes
        try:
            like_count = vid["like_count"]
        except KeyError:
            like_count = 0
        try:
            dislike_count = vid["dislike_count"]
        except KeyError:
            dislike_count = 0
        # build dicts
        stats = {
            "view_count": vid["view_count"],
            "like_count": like_count,
            "dislike_count": dislike_count,
            "average_rating": vid["average_rating"],
        }
        vid_basic = {
            "title": vid["title"],
            "description": vid["description"],
            "category": vid["categories"],
            "vid_thumb_url": vid["thumbnail"],
            "tags": vid["tags"],
            "published": published,
            "stats": stats,
            "vid_last_refresh": last_refresh,
            "date_downloaded": last_refresh,
            "youtube_id": youtube_id,
            "active": True,
            "channel": False,
        }
        return vid_basic
    def add_player(self, missing_vid):
        """add player information for new videos"""
        cache_path = self.CACHE_DIR + "/download/"
        videos = self.VIDEOS
        if missing_vid:
            # coming from scan_filesystem
            channel_name, file_name, _ = missing_vid
            vid_path = os.path.join(videos, channel_name, file_name)
        else:
            # coming from VideoDownload
            all_cached = os.listdir(cache_path)
            for file_cached in all_cached:
                if self.youtube_id in file_cached:
                    vid_path = os.path.join(cache_path, file_cached)
                    break
        duration_handler = DurationConverter()
        duration = duration_handler.get_sec(vid_path)
        duration_str = duration_handler.get_str(duration)
        player = {
            "watched": False,
            "duration": duration,
            "duration_str": duration_str,
        }
        self.vid_dict["player"] = player
    def build_file_path(self, channel_name):
        """build media_url from where file will be located"""
        clean_channel_name = clean_string(channel_name)
        timestamp = self.vid_dict["published"].replace("-", "")
        youtube_id = self.vid_dict["youtube_id"]
        title = self.vid_dict["title"]
        clean_title = clean_string(title)
        filename = f"{timestamp}_{youtube_id}_{clean_title}.mp4"
        media_url = os.path.join(clean_channel_name, filename)
        self.vid_dict["media_url"] = media_url
    def get_es_data(self):
        """get current data from elastic search"""
        url = self.ES_URL + "/ta_video/_doc/" + self.youtube_id
        response = requests.get(url, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
        es_vid_dict = json.loads(response.text)
        return es_vid_dict
    def upload_to_es(self):
        """upload video data to elastic search"""
        url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}/?refresh=true"
        response = requests.put(url, json=self.vid_dict, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
            raise ValueError("failed to add video to index")
    def deactivate(self):
        """deactivate document on extractor error"""
        youtube_id = self.youtube_id
        headers = {"Content-type": "application/json"}
        url = f"{self.ES_URL}/ta_video/_update/{youtube_id}"
        data = {"script": "ctx._source.active = false"}
        json_str = json.dumps(data)
        response = requests.post(
            url, data=json_str, headers=headers, auth=self.ES_AUTH
        )
        print(f"deactivated {youtube_id}")
        if not response.ok:
            print(response.text)
    def delete_media_file(self):
        """delete video file, meta data, thumbnails"""
        # delete media file
        es_vid_dict = self.get_es_data()
        media_url = es_vid_dict["_source"]["media_url"]
        print(f"delete {media_url} from file system")
        to_delete = os.path.join(self.VIDEOS, media_url)
        os.remove(to_delete)
        # delete from index
        url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}"
        response = requests.delete(url, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
        # delete thumbs from cache
        ThumbManager().delete_vid_thumb(self.youtube_id)
    def get_ryd_stats(self):
        """get optional stats from returnyoutubedislikeapi.com"""
        try:
            print(f"get ryd stats for: {self.youtube_id}")
            result = ryd_client.get(self.youtube_id)
        except requests.exceptions.ConnectionError:
            print(f"failed to query ryd api, skipping {self.youtube_id}")
            return False
        if result["status"] == 404:
            return False
        dislikes = {
            "dislike_count": result["dislikes"],
            "average_rating": result["rating"],
        }
        self.vid_dict["stats"].update(dislikes)
        return True
 class YoutubePlaylist:
    """represent a single playlist on YouTube"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    def __init__(self, playlist_id, all_youtube_ids=False):
        self.playlist_id = playlist_id
        self.stamp = int(datetime.now().strftime("%s"))
        self.all_youtube_ids = all_youtube_ids
        self.playlist_dict = False
    def get_playlist_dict(self, scrape=False):
        """get data from es or youtube"""
        print(f"get playlist with id {self.playlist_id}")
        if scrape:
            playlist_dict = self.get_youtube_playlist()
            if not playlist_dict:
                return False
            playlist_dict["playlist_entries"] = self.get_entries()
        else:
            playlist_dict = self.get_es_playlist()
            if not playlist_dict:
                playlist_dict = self.get_youtube_playlist()
                playlist_dict["playlist_entries"] = self.get_entries()
        self.playlist_dict = playlist_dict
        return True
    def get_youtube_playlist(self):
        """get meta data dict from youtube"""
        url = "https://www.youtube.com/playlist?list=" + self.playlist_id
        obs = {
            "default_search": "ytsearch",
            "quiet": True,
            "skip_download": True,
            "extract_flat": True,
            "playlistend": 0,
        }
        try:
            playlist = yt_dlp.YoutubeDL(obs).extract_info(url, download=False)
        except (
            yt_dlp.utils.ExtractorError,
            yt_dlp.utils.DownloadError,
        ):
            print("failed to get info for " + self.playlist_id)
            return False
        playlist_es = {
            "playlist_id": self.playlist_id,
            "playlist_active": True,
            "playlist_subscribed": False,
            "playlist_name": playlist["title"],
            "playlist_channel": playlist["channel"],
            "playlist_channel_id": playlist["channel_id"],
            "playlist_thumbnail": playlist["thumbnails"][-1]["url"],
            "playlist_description": playlist["description"] or False,
            "playlist_last_refresh": self.stamp,
        }
        return playlist_es
    def get_es_playlist(self):
        """get indexed data from es"""
        url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}"
        response = requests.get(url, auth=self.ES_AUTH)
        if response.ok:
            return json.loads(response.text)["_source"]
        return False
    def get_entries(self, playlistend=False):
        """get all videos in playlist"""
        url = "https://www.youtube.com/playlist?list=" + self.playlist_id
        obs = {
            "default_search": "ytsearch",
            "quiet": True,
            "skip_download": True,
            "extract_flat": True,
        }
        if playlistend:
            obs["playlistend"] = playlistend
        try:
            playlist = yt_dlp.YoutubeDL(obs).extract_info(url, download=False)
        except (
            yt_dlp.utils.ExtractorError,
            yt_dlp.utils.DownloadError,
        ):
            print("failed to get plealist entries for " + self.playlist_id)
            return False
        all_members = []
        for idx, entry in enumerate(playlist["entries"]):
            uploader = entry["uploader"]
            youtube_id = entry["id"]
            if self.all_youtube_ids:
                downloaded = youtube_id in self.all_youtube_ids
            else:
                downloaded = False
            if not uploader:
                continue
            to_append = {
                "youtube_id": youtube_id,
                "title": entry["title"],
                "uploader": uploader,
                "idx": idx,
                "downloaded": downloaded,
            }
            all_members.append(to_append)
        return all_members
    def upload_to_es(self):
        """add playlist to es with its entries"""
        playlist = self.playlist_dict
        url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}"
        response = requests.put(url, json=playlist, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
            raise ValueError("failed to add playlist to index")
    def add_vids_to_playlist(self):
        """sync the playlist id to videos"""
        playlist_dict = self.playlist_dict
        script = (
            'if (!ctx._source.containsKey("playlist")) '
            + "{ctx._source.playlist = [params.playlist]} "
            + "else if (!ctx._source.playlist.contains(params.playlist)) "
            + "{ctx._source.playlist.add(params.playlist)} "
            + "else {ctx.op = 'none'}"
        )
        bulk_list = []
        for entry in playlist_dict["playlist_entries"]:
            youtube_id = entry["youtube_id"]
            action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
            source = {
                "script": {
                    "source": script,
                    "lang": "painless",
                    "params": {"playlist": self.playlist_id},
                }
            }
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(source))
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        response = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not response.ok:
            print(response.text)
    def update_playlist(self):
        """update metadata for playlist with data from YouTube"""
        subscribed = self.get_es_playlist()["playlist_subscribed"]
        self.get_playlist_dict(scrape=True)
        if not self.playlist_dict:
            # return false to deactivate
            return False
        self.playlist_dict["playlist_subscribed"] = subscribed
        self.upload_to_es()
        return self.playlist_dict
    def build_nav(self, youtube_id):
        """find next and previous in playlist of a given youtube_id"""
        all_entries_available = self.playlist_dict["playlist_entries"]
        all_entries = [i for i in all_entries_available if i["downloaded"]]
        current = [i for i in all_entries if i["youtube_id"] == youtube_id]
        # stop if not found or playlist of 1
        if not current or not len(all_entries) > 1:
            return False
        current_idx = all_entries.index(current[0])
        if current_idx == 0:
            previous_item = False
        else:
            previous_item = all_entries[current_idx - 1]
            prev_thumb = ThumbManager().vid_thumb_path(
                previous_item["youtube_id"]
            )
            previous_item["vid_thumb"] = prev_thumb
        if current_idx == len(all_entries) - 1:
            next_item = False
        else:
            next_item = all_entries[current_idx + 1]
            next_thumb = ThumbManager().vid_thumb_path(next_item["youtube_id"])
            next_item["vid_thumb"] = next_thumb
        nav = {
            "playlist_meta": {
                "current_idx": current[0]["idx"],
                "playlist_id": self.playlist_id,
                "playlist_name": self.playlist_dict["playlist_name"],
                "playlist_channel": self.playlist_dict["playlist_channel"],
            },
            "playlist_previous": previous_item,
            "playlist_next": next_item,
        }
        return nav
    def delete_metadata(self):
        """delete metadata for playlist"""
        script = (
            "ctx._source.playlist.removeAll("
            + "Collections.singleton(params.playlist)) "
        )
        data = {
            "query": {
                "term": {"playlist.keyword": {"value": self.playlist_id}}
            },
            "script": {
                "source": script,
                "lang": "painless",
                "params": {"playlist": self.playlist_id},
            },
        }
        payload = json.dumps(data)
        url = f"{self.ES_URL}/ta_video/_update_by_query"
        headers = {"Content-type": "application/json"}
        response = requests.post(
            url, data=payload, headers=headers, auth=self.ES_AUTH
        )
        if not response.ok:
            print(response.text)
        self.delete_playlist()
    def delete_videos_playlist(self):
        """delete playlist with all videos"""
        print(f"delete playlist {self.playlist_id} with all videos")
        self.get_playlist_dict()
        all_youtube_id = [
            i["youtube_id"]
            for i in self.playlist_dict["playlist_entries"]
            if i["downloaded"]
        ]
        for youtube_id in all_youtube_id:
            YoutubeVideo(youtube_id).delete_media_file()
        self.delete_playlist()
    def delete_playlist(self):
        """delete only playlist document"""
        url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}"
        response = requests.delete(url, auth=self.ES_AUTH)
        if not response.ok:
            print(response.text)
    def deactivate(self):
        """deactivate document on extractor error"""
        headers = {"Content-type": "application/json"}
        url = f"{self.ES_URL}/ta_playlist/_update/{self.playlist_id}"
        data = {"script": "ctx._source.playlist_active = false"}
        json_str = json.dumps(data)
        response = requests.post(
            url, data=json_str, headers=headers, auth=self.ES_AUTH
        )
        print(f"deactivated {self.playlist_id}")
        if not response.ok:
            print(response.text)
 class WatchState:
    """handle watched checkbox for videos and channels"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    HEADERS = {"Content-type": "application/json"}
    def __init__(self, youtube_id):
        self.youtube_id = youtube_id
        self.stamp = int(datetime.now().strftime("%s"))
    def mark_as_watched(self):
        """update es with new watched value"""
        url_type = self.dedect_type()
        if url_type == "video":
            self.mark_vid_watched()
        elif url_type == "channel":
            self.mark_channel_watched()
        elif url_type == "playlist":
            self.mark_playlist_watched()
        print(f"marked {self.youtube_id} as watched")
    def mark_as_unwatched(self):
        """revert watched state to false"""
        url_type = self.dedect_type()
        if url_type == "video":
            self.mark_vid_watched(revert=True)
        print(f"revert {self.youtube_id} as unwatched")
    def dedect_type(self):
        """find youtube id type"""
        print(self.youtube_id)
        url_process = UrlListParser(self.youtube_id).process_list()
        url_type = url_process[0]["type"]
        return url_type
    def mark_vid_watched(self, revert=False):
        """change watched status of single video"""
        url = self.ES_URL + "/ta_video/_update/" + self.youtube_id
        data = {
            "doc": {"player": {"watched": True, "watched_date": self.stamp}}
        }
        if revert:
            data["doc"]["player"]["watched"] = False
        payload = json.dumps(data)
        request = requests.post(
            url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed to mark video as watched")
    def mark_channel_watched(self):
        """change watched status of every video in channel"""
        data = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "channel.channel_id": {
                                    "value": self.youtube_id
                                }
                            }
                        },
                        {"term": {"player.watched": {"value": False}}},
                    ]
                }
            },
            "script": {
                "source": "ctx._source.player['watched'] = true",
                "lang": "painless",
            },
        }
        payload = json.dumps(data)
        url = f"{self.ES_URL}/ta_video/_update_by_query"
        request = requests.post(
            url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed mark channel as watched")
    def mark_playlist_watched(self):
        """change watched state of all videos in playlist"""
        data = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "playlist.keyword": {"value": self.youtube_id}
                            }
                        },
                        {"term": {"player.watched": {"value": False}}},
                    ]
                }
            },
            "script": {
                "source": "ctx._source.player['watched'] = true",
                "lang": "painless",
            },
        }
        payload = json.dumps(data)
        url = f"{self.ES_URL}/ta_video/_update_by_query"
        request = requests.post(
            url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
            raise ValueError("failed mark playlist as watched")
 class IndexPaginate:
    """use search_after to go through whole index"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    HEADERS = {"Content-type": "application/json"}
    DEFAULT_SIZE = 500
    def __init__(self, index_name, data, size=False):
        self.index_name = index_name
        self.data = data
        self.pit_id = False
        self.size = size
    def get_results(self):
        """get all results"""
        self.get_pit()
        self.validate_data()
        all_results = self.run_loop()
        self.clean_pit()
        return all_results
    def get_pit(self):
        """get pit for index"""
        url = f"{self.ES_URL}/{self.index_name}/_pit?keep_alive=10m"
        response = requests.post(url, auth=self.ES_AUTH)
        json_data = json.loads(response.text)
        self.pit_id = json_data["id"]
    def validate_data(self):
        """add pit and size to data"""
        if "sort" not in self.data.keys():
            print(self.data)
            raise ValueError("missing sort key in data")
        size = self.size or self.DEFAULT_SIZE
        self.data["size"] = size
        self.data["pit"] = {"id": self.pit_id, "keep_alive": "10m"}
    def run_loop(self):
        """loop through results until last hit"""
        query_str = json.dumps(self.data)
        url = self.ES_URL + "/_search"
        all_results = []
        while True:
            response = requests.get(
                url, data=query_str, headers=self.HEADERS, auth=self.ES_AUTH
            )
            json_data = json.loads(response.text)
            all_hits = json_data["hits"]["hits"]
            if all_hits:
                for hit in all_hits:
                    source = hit["_source"]
                    search_after = hit["sort"]
                    all_results.append(source)
                # update search_after with last hit data
                self.data["search_after"] = search_after
                query_str = json.dumps(self.data)
            else:
                break
        return all_results
    def clean_pit(self):
        """delete pit from elastic search"""
        query_str = json.dumps({"id": self.pit_id})
        requests.delete(
            self.ES_URL + "/_pit",
            data=query_str,
            headers=self.HEADERS,
            auth=self.ES_AUTH,
        )
 def index_new_video(youtube_id, missing_vid=False):
    """combine video and channel classes for new video index"""
    vid_handler = YoutubeVideo(youtube_id)
    vid_handler.get_vid_dict()
    if not vid_handler.vid_dict:
        raise ValueError("failed to get metadata for " + youtube_id)
    channel_handler = YoutubeChannel(vid_handler.channel_id)
    # add filepath to vid_dict
    channel_name = channel_handler.channel_dict["channel_name"]
    vid_handler.build_file_path(channel_name)
    # add channel and player to video
    vid_handler.add_player(missing_vid)
    vid_handler.vid_dict["channel"] = channel_handler.channel_dict
    # add new channel to es
    if channel_handler.source == "scraped":
        channel_handler.channel_dict["channel_subscribed"] = False
        channel_handler.upload_to_es()
        channel_handler.get_channel_art()
    # upload video to es
    vid_handler.upload_to_es()
    # return vid_dict for further processing
    return vid_handler.vid_dict
--- a/tubearchivist/home/src/index/init.py
+++ b/tubearchivist/home/src/index/init.py
--- a/tubearchivist/home/src/index/channel.py
+++ b/tubearchivist/home/src/index/channel.py
@ -0,0 +1,266 @@
 """
 functionality:
 - get metadata from youtube for a channel
 - index and update in es
 """
 import json
 import os
 import re
 from datetime import datetime
 import requests
 import yt_dlp
 from bs4 import BeautifulSoup
 from home.src.download.thumbnails import ThumbManager
 from home.src.es.connect import ElasticWrap, IndexPaginate
 from home.src.index.generic import YouTubeItem
 from home.src.index.playlist import YoutubePlaylist
 from home.src.ta.helper import clean_string
 class ChannelScraper:
    """custom scraper using bs4 to scrape channel about page
    will be able to be integrated into yt-dlp
    once #2237 and #2350 are merged upstream
    """
    def __init__(self, channel_id):
        self.channel_id = channel_id
        self.soup = False
        self.yt_json = False
        self.json_data = False
    def get_json(self):
        """main method to return channel dict"""
        self.get_soup()
        self._extract_yt_json()
        self._parse_channel_main()
        self._parse_channel_meta()
        return self.json_data
    def get_soup(self):
        """return soup from youtube"""
        print(f"{self.channel_id}: scrape channel data from youtube")
        url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
        cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
        response = requests.get(url, cookies=cookies)
        if response.ok:
            channel_page = response.text
        else:
            print(f"{self.channel_id}: failed to extract channel info")
            raise ConnectionError
        self.soup = BeautifulSoup(channel_page, "html.parser")
    def _extract_yt_json(self):
        """parse soup and get ytInitialData json"""
        all_scripts = self.soup.find("body").find_all("script")
        for script in all_scripts:
            if "var ytInitialData = " in str(script):
                script_content = str(script)
                break
        # extract payload
        script_content = script_content.split("var ytInitialData = ")[1]
        json_raw = script_content.rstrip(";</script>")
        self.yt_json = json.loads(json_raw)
    def _parse_channel_main(self):
        """extract maintab values from scraped channel json data"""
        main_tab = self.yt_json["header"]["c4TabbedHeaderRenderer"]
        # build and return dict
        self.json_data = {
            "channel_active": True,
            "channel_last_refresh": int(datetime.now().strftime("%s")),
            "channel_subs": self._get_channel_subs(main_tab),
            "channel_name": main_tab["title"],
            "channel_banner_url": self._get_thumbnails(main_tab, "banner"),
            "channel_tvart_url": self._get_thumbnails(main_tab, "tvBanner"),
            "channel_id": self.channel_id,
            "channel_subscribed": False,
        }
    @staticmethod
    def _get_thumbnails(main_tab, thumb_name):
        """extract banner url from main_tab"""
        try:
            all_banners = main_tab[thumb_name]["thumbnails"]
            banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]
        except KeyError:
            banner = False
        return banner
    @staticmethod
    def _get_channel_subs(main_tab):
        """process main_tab to get channel subs as int"""
        try:
            sub_text_simple = main_tab["subscriberCountText"]["simpleText"]
            sub_text = sub_text_simple.split(" ")[0]
            if sub_text[-1] == "K":
                channel_subs = int(float(sub_text.replace("K", "")) * 1000)
            elif sub_text[-1] == "M":
                channel_subs = int(float(sub_text.replace("M", "")) * 1000000)
            elif int(sub_text) >= 0:
                channel_subs = int(sub_text)
            else:
                message = f"{sub_text} not dealt with"
                print(message)
        except KeyError:
            channel_subs = 0
        return channel_subs
    def _parse_channel_meta(self):
        """extract meta tab values from channel payload"""
        # meta tab
        meta_tab = self.yt_json["metadata"]["channelMetadataRenderer"]
        all_thumbs = meta_tab["avatar"]["thumbnails"]
        thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]
        # stats tab
        renderer = "twoColumnBrowseResultsRenderer"
        all_tabs = self.yt_json["contents"][renderer]["tabs"]
        for tab in all_tabs:
            if "tabRenderer" in tab.keys():
                if tab["tabRenderer"]["title"] == "About":
                    about_tab = tab["tabRenderer"]["content"][
                        "sectionListRenderer"
                    ]["contents"][0]["itemSectionRenderer"]["contents"][0][
                        "channelAboutFullMetadataRenderer"
                    ]
                    break
        try:
            channel_views_text = about_tab["viewCountText"]["simpleText"]
            channel_views = int(re.sub(r"\D", "", channel_views_text))
        except KeyError:
            channel_views = 0
        self.json_data.update(
            {
                "channel_description": meta_tab["description"],
                "channel_thumb_url": thumb_url,
                "channel_views": channel_views,
            }
        )
 class YoutubeChannel(YouTubeItem):
    """represents a single youtube channel"""
    es_path = False
    index_name = "ta_channel"
    yt_base = "https://www.youtube.com/channel/"
    def __init__(self, youtube_id):
        super().__init__(youtube_id)
        self.es_path = f"{self.index_name}/_doc/{youtube_id}"
    def build_json(self, upload=False):
        """get from es or from youtube"""
        self.get_from_es()
        if self.json_data:
            return
        self.get_from_youtube()
        if upload:
            self.upload_to_es()
        return
    def get_from_youtube(self):
        """use bs4 to scrape channel about page"""
        self.json_data = ChannelScraper(self.youtube_id).get_json()
        self.get_channel_art()
    def get_channel_art(self):
        """download channel art for new channels"""
        channel_id = self.youtube_id
        channel_thumb = self.json_data["channel_thumb_url"]
        channel_banner = self.json_data["channel_banner_url"]
        ThumbManager().download_chan(
            [(channel_id, channel_thumb, channel_banner)]
        )
    def sync_to_videos(self):
        """sync new channel_dict to all videos of channel"""
        # add ingest pipeline
        processors = []
        for field, value in self.json_data.items():
            line = {"set": {"field": "channel." + field, "value": value}}
            processors.append(line)
        data = {"description": self.youtube_id, "processors": processors}
        ingest_path = f"_ingest/pipeline/{self.youtube_id}"
        _, _ = ElasticWrap(ingest_path).put(data)
        # apply pipeline
        data = {"query": {"match": {"channel.channel_id": self.youtube_id}}}
        update_path = f"ta_video/_update_by_query?pipeline={self.youtube_id}"
        _, _ = ElasticWrap(update_path).post(data)
    def get_folder_path(self):
        """get folder where media files get stored"""
        channel_name = self.json_data["channel_name"]
        folder_name = clean_string(channel_name)
        folder_path = os.path.join(self.app_conf["videos"], folder_name)
        return folder_path
    def delete_es_videos(self):
        """delete all channel documents from elasticsearch"""
        data = {
            "query": {
                "term": {"channel.channel_id": {"value": self.youtube_id}}
            }
        }
        _, _ = ElasticWrap("ta_video/_delete_by_query").post(data)
    def delete_playlists(self):
        """delete all indexed playlist from es"""
        all_playlists = self.get_indexed_playlists()
        for playlist in all_playlists:
            playlist_id = playlist["playlist_id"]
            YoutubePlaylist(playlist_id).delete_metadata()
    def delete_channel(self):
        """delete channel and all videos"""
        print(f"{self.youtube_id}: delete channel")
        self.get_from_es()
        folder_path = self.get_folder_path()
        print(f"{self.youtube_id}: delete all media files")
        try:
            all_videos = os.listdir(folder_path)
            for video in all_videos:
                video_path = os.path.join(folder_path, video)
                os.remove(video_path)
            os.rmdir(folder_path)
        except FileNotFoundError:
            print(f"no videos found for {folder_path}")
        print(f"{self.youtube_id}: delete indexed playlists")
        self.delete_playlists()
        print(f"{self.youtube_id}: delete indexed videos")
        self.delete_es_videos()
        self.del_in_es()
    def get_all_playlists(self):
        """get all playlists owned by this channel"""
        url = (
            f"https://www.youtube.com/channel/{self.youtube_id}"
            + "/playlists?view=1&sort=dd&shelf_id=0"
        )
        obs = {
            "quiet": True,
            "skip_download": True,
            "extract_flat": True,
        }
        playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
        all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
        return all_entries
    def get_indexed_playlists(self):
        """get all indexed playlists from channel"""
        data = {
            "query": {
                "term": {"playlist_channel_id": {"value": self.youtube_id}}
            },
            "sort": [{"playlist_channel.keyword": {"order": "desc"}}],
        }
        all_playlists = IndexPaginate("ta_playlist", data).get_results()
        return all_playlists
--- a/tubearchivist/home/src/index/filesystem.py
+++ b/tubearchivist/home/src/index/filesystem.py
@ -0,0 +1,325 @@
 """
 Functionality:
 - reindexing old documents
 - syncing updated values between indexes
 - scan the filesystem to delete or index
 """
 import json
 import os
 import re
 import shutil
 import subprocess
 from datetime import datetime
 import requests
 from home.src.download.queue import PendingList
 from home.src.download.yt_dlp_handler import VideoDownloader
 from home.src.index.reindex import Reindex
 from home.src.index.video import index_new_video
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import clean_string, ignore_filelist
 from home.src.ta.ta_redis import RedisArchivist
 class FilesystemScanner:
    """handle scanning and fixing from filesystem"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    VIDEOS = CONFIG["application"]["videos"]
    def __init__(self):
        self.all_downloaded = self.get_all_downloaded()
        self.all_indexed = self.get_all_indexed()
        self.mismatch = None
        self.to_rename = None
        self.to_index = None
        self.to_delete = None
    def get_all_downloaded(self):
        """get a list of all video files downloaded"""
        channels = os.listdir(self.VIDEOS)
        all_channels = ignore_filelist(channels)
        all_channels.sort()
        all_downloaded = []
        for channel_name in all_channels:
            channel_path = os.path.join(self.VIDEOS, channel_name)
            videos = os.listdir(channel_path)
            all_videos = ignore_filelist(videos)
            for video in all_videos:
                youtube_id = video[9:20]
                all_downloaded.append((channel_name, video, youtube_id))
        return all_downloaded
    @staticmethod
    def get_all_indexed():
        """get a list of all indexed videos"""
        index_handler = PendingList()
        all_indexed_raw = index_handler.get_all_indexed()
        all_indexed = []
        for video in all_indexed_raw:
            youtube_id = video["youtube_id"]
            media_url = video["media_url"]
            published = video["published"]
            title = video["title"]
            all_indexed.append((youtube_id, media_url, published, title))
        return all_indexed
    def list_comarison(self):
        """compare the lists to figure out what to do"""
        self.find_unindexed()
        self.find_missing()
        self.find_bad_media_url()
    def find_unindexed(self):
        """find video files without a matching document indexed"""
        all_indexed_ids = [i[0] for i in self.all_indexed]
        to_index = []
        for downloaded in self.all_downloaded:
            if downloaded[2] not in all_indexed_ids:
                to_index.append(downloaded)
        self.to_index = to_index
    def find_missing(self):
        """find indexed videos without matching media file"""
        all_downloaded_ids = [i[2] for i in self.all_downloaded]
        to_delete = []
        for video in self.all_indexed:
            youtube_id = video[0]
            if youtube_id not in all_downloaded_ids:
                to_delete.append(video)
        self.to_delete = to_delete
    def find_bad_media_url(self):
        """rename media files not matching the indexed title"""
        to_fix = []
        to_rename = []
        for downloaded in self.all_downloaded:
            channel, filename, downloaded_id = downloaded
            # find in indexed
            for indexed in self.all_indexed:
                indexed_id, media_url, published, title = indexed
                if indexed_id == downloaded_id:
                    # found it
                    title_c = clean_string(title)
                    pub = published.replace("-", "")
                    expected_filename = f"{pub}_{indexed_id}_{title_c}.mp4"
                    new_url = os.path.join(channel, expected_filename)
                    if expected_filename != filename:
                        # file to rename
                        to_rename.append(
                            (channel, filename, expected_filename)
                        )
                    if media_url != new_url:
                        # media_url to update in es
                        to_fix.append((indexed_id, new_url))
                    break
        self.mismatch = to_fix
        self.to_rename = to_rename
    def rename_files(self):
        """rename media files as identified by find_bad_media_url"""
        for bad_filename in self.to_rename:
            channel, filename, expected_filename = bad_filename
            print(f"renaming [{filename}] to [{expected_filename}]")
            old_path = os.path.join(self.VIDEOS, channel, filename)
            new_path = os.path.join(self.VIDEOS, channel, expected_filename)
            os.rename(old_path, new_path)
    def send_mismatch_bulk(self):
        """build bulk update"""
        bulk_list = []
        for video_mismatch in self.mismatch:
            youtube_id, media_url = video_mismatch
            print(f"{youtube_id}: fixing media url {media_url}")
            action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
            source = {"doc": {"media_url": media_url}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(source))
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        # make the call
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        request = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
    def delete_from_index(self):
        """find indexed but deleted mediafile"""
        for indexed in self.to_delete:
            youtube_id = indexed[0]
            print(f"deleting {youtube_id} from index")
            url = self.ES_URL + "/ta_video/_doc/" + youtube_id
            request = requests.delete(url, auth=self.ES_AUTH)
            if not request.ok:
                print(request.text)
 class ManualImport:
    """import and indexing existing video files"""
    CONFIG = AppConfig().config
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    IMPORT_DIR = os.path.join(CACHE_DIR, "import")
    def __init__(self):
        self.identified = self.import_folder_parser()
    def import_folder_parser(self):
        """detect files in import folder"""
        import_files = os.listdir(self.IMPORT_DIR)
        to_import = ignore_filelist(import_files)
        to_import.sort()
        video_files = [i for i in to_import if not i.endswith(".json")]
        identified = []
        for file_path in video_files:
            file_dict = {"video_file": file_path}
            file_name, _ = os.path.splitext(file_path)
            matching_json = [
                i
                for i in to_import
                if i.startswith(file_name) and i.endswith(".json")
            ]
            if matching_json:
                json_file = matching_json[0]
                youtube_id = self.extract_id_from_json(json_file)
                file_dict.update({"json_file": json_file})
            else:
                youtube_id = self.extract_id_from_filename(file_name)
                file_dict.update({"json_file": False})
            file_dict.update({"youtube_id": youtube_id})
            identified.append(file_dict)
        return identified
    @staticmethod
    def extract_id_from_filename(file_name):
        """
        look at the file name for the youtube id
        expects filename ending in [<youtube_id>].<ext>
        """
        id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
        if id_search:
            youtube_id = id_search.group(1)
            return youtube_id
        print("failed to extract youtube id for: " + file_name)
        raise Exception
    def extract_id_from_json(self, json_file):
        """open json file and extract id"""
        json_path = os.path.join(self.CACHE_DIR, "import", json_file)
        with open(json_path, "r", encoding="utf-8") as f:
            json_content = f.read()
        youtube_id = json.loads(json_content)["id"]
        return youtube_id
    def process_import(self):
        """go through identified media files"""
        all_videos_added = []
        for media_file in self.identified:
            json_file = media_file["json_file"]
            video_file = media_file["video_file"]
            youtube_id = media_file["youtube_id"]
            video_path = os.path.join(self.CACHE_DIR, "import", video_file)
            self.move_to_cache(video_path, youtube_id)
            # identify and archive
            vid_dict = index_new_video(youtube_id)
            VideoDownloader([youtube_id]).move_to_archive(vid_dict)
            youtube_id = vid_dict["youtube_id"]
            thumb_url = vid_dict["vid_thumb_url"]
            all_videos_added.append((youtube_id, thumb_url))
            # cleanup
            if os.path.exists(video_path):
                os.remove(video_path)
            if json_file:
                json_path = os.path.join(self.CACHE_DIR, "import", json_file)
                os.remove(json_path)
        return all_videos_added
    def move_to_cache(self, video_path, youtube_id):
        """move identified video file to cache, convert to mp4"""
        file_name = os.path.split(video_path)[-1]
        video_file, ext = os.path.splitext(file_name)
        # make sure youtube_id is in filename
        if youtube_id not in video_file:
            video_file = f"{video_file}_{youtube_id}"
        # move, convert if needed
        if ext == ".mp4":
            new_file = video_file + ext
            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
            shutil.move(video_path, dest_path)
        else:
            print(f"processing with ffmpeg: {video_file}")
            new_file = video_file + ".mp4"
            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
            subprocess.run(
                [
                    "ffmpeg",
                    "-i",
                    video_path,
                    dest_path,
                    "-loglevel",
                    "warning",
                    "-stats",
                ],
                check=True,
            )
 def scan_filesystem():
    """grouped function to delete and update index"""
    filesystem_handler = FilesystemScanner()
    filesystem_handler.list_comarison()
    if filesystem_handler.to_rename:
        print("renaming files")
        filesystem_handler.rename_files()
    if filesystem_handler.mismatch:
        print("fixing media urls in index")
        filesystem_handler.send_mismatch_bulk()
    if filesystem_handler.to_delete:
        print("delete metadata from index")
        filesystem_handler.delete_from_index()
    if filesystem_handler.to_index:
        print("index new videos")
        for missing_vid in filesystem_handler.to_index:
            youtube_id = missing_vid[2]
            index_new_video(youtube_id)
 def reindex_old_documents():
    """daily refresh of old documents"""
    # continue if needed
    reindex_handler = Reindex()
    reindex_handler.check_outdated()
    reindex_handler.reindex()
    # set timestamp
    now = int(datetime.now().strftime("%s"))
    RedisArchivist().set_message("last_reindex", now, expire=False)
--- a/tubearchivist/home/src/index/generic.py
+++ b/tubearchivist/home/src/index/generic.py
@ -0,0 +1,142 @@
 """
 functionality:
 - generic base class to inherit from for video, channel and playlist
 """
 import math
 import yt_dlp
 from home.src.es.connect import ElasticWrap
 from home.src.ta.config import AppConfig
 from home.src.ta.ta_redis import RedisArchivist
 class YouTubeItem:
    """base class for youtube"""
    es_path = False
    index_name = False
    yt_base = False
    yt_obs = {
        "quiet": True,
        "default_search": "ytsearch",
        "skip_download": True,
        "check_formats": "selected",
        "noplaylist": True,
    }
    def __init__(self, youtube_id):
        self.youtube_id = youtube_id
        self.config = False
        self.app_conf = False
        self.youtube_meta = False
        self.json_data = False
        self._get_conf()
    def _get_conf(self):
        """read user conf"""
        self.config = AppConfig().config
        self.app_conf = self.config["application"]
    def get_from_youtube(self):
        """use yt-dlp to get meta data from youtube"""
        print(f"{self.youtube_id}: get metadata from youtube")
        try:
            yt_item = yt_dlp.YoutubeDL(self.yt_obs)
            response = yt_item.extract_info(self.yt_base + self.youtube_id)
        except (
            yt_dlp.utils.ExtractorError,
            yt_dlp.utils.DownloadError,
        ):
            print(f"{self.youtube_id}: failed to get info from youtube")
            self.youtube_meta = False
        self.youtube_meta = response
    def get_from_es(self):
        """get indexed data from elastic search"""
        print(f"{self.youtube_id}: get metadata from es")
        response, _ = ElasticWrap(f"{self.es_path}").get()
        source = response.get("_source")
        self.json_data = source
    def upload_to_es(self):
        """add json_data to elastic"""
        _, _ = ElasticWrap(self.es_path).put(self.json_data, refresh=True)
    def deactivate(self):
        """deactivate document in es"""
        key_match = {
            "video": "active",
            "channel": "channel_active",
            "playlist": "playlist_active",
        }
        update_path = f"{self.index_name}/_update/{self.youtube_id}"
        data = {
            "script": f"ctx._source.{key_match.get(self.index_name)} = false"
        }
        _, _ = ElasticWrap(update_path).post(data)
    def del_in_es(self):
        """delete item from elastic search"""
        print(f"{self.youtube_id}: delete from es")
        _, _ = ElasticWrap(self.es_path).delete()
 class Pagination:
    """
    figure out the pagination based on page size and total_hits
    """
    def __init__(self, page_get, user_id, search_get=False):
        self.user_id = user_id
        self.page_size = self.get_page_size()
        self.page_get = page_get
        self.search_get = search_get
        self.pagination = self.first_guess()
    def get_page_size(self):
        """get default or user modified page_size"""
        key = f"{self.user_id}:page_size"
        page_size = RedisArchivist().get_message(key)["status"]
        if not page_size:
            config = AppConfig().config
            page_size = config["archive"]["page_size"]
        return page_size
    def first_guess(self):
        """build first guess before api call"""
        page_get = self.page_get
        if page_get in [0, 1]:
            page_from = 0
            prev_pages = False
        elif page_get > 1:
            page_from = (page_get - 1) * self.page_size
            prev_pages = [
                i for i in range(page_get - 1, page_get - 6, -1) if i > 1
            ]
            prev_pages.reverse()
        pagination = {
            "page_size": self.page_size,
            "page_from": page_from,
            "prev_pages": prev_pages,
            "current_page": page_get,
        }
        if self.search_get:
            pagination.update({"search_get": self.search_get})
        return pagination
    def validate(self, total_hits):
        """validate pagination with total_hits after making api call"""
        page_get = self.page_get
        max_pages = math.ceil(total_hits / self.page_size)
        if page_get < max_pages and max_pages > 1:
            self.pagination["last_page"] = max_pages
        else:
            self.pagination["last_page"] = False
        next_pages = [
            i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
        ]
        self.pagination["next_pages"] = next_pages
--- a/tubearchivist/home/src/index/playlist.py
+++ b/tubearchivist/home/src/index/playlist.py
@ -0,0 +1,205 @@
 """
 functionality:
 - get metadata from youtube for a playlist
 - index and update in es
 """
 import json
 from datetime import datetime
 from home.src.download.thumbnails import ThumbManager
 from home.src.es.connect import ElasticWrap
 from home.src.index.generic import YouTubeItem
 from home.src.index.video import YoutubeVideo
 class YoutubePlaylist(YouTubeItem):
    """represents a single youtube playlist"""
    es_path = False
    index_name = "ta_playlist"
    yt_obs = {
        "default_search": "ytsearch",
        "quiet": True,
        "skip_download": True,
        "extract_flat": True,
    }
    yt_base = "https://www.youtube.com/playlist?list="
    def __init__(self, youtube_id):
        super().__init__(youtube_id)
        self.es_path = f"{self.index_name}/_doc/{youtube_id}"
        self.all_members = False
        self.nav = False
        self.all_youtube_ids = []
    def build_json(self, scrape=False):
        """collection to create json_data"""
        if not scrape:
            self.get_from_es()
        if scrape or not self.json_data:
            self.get_from_youtube()
            self.process_youtube_meta()
            self.get_entries()
            self.json_data["playlist_entries"] = self.all_members
            self.get_playlist_art()
    def process_youtube_meta(self):
        """extract relevant fields from youtube"""
        self.json_data = {
            "playlist_id": self.youtube_id,
            "playlist_active": True,
            "playlist_subscribed": False,
            "playlist_name": self.youtube_meta["title"],
            "playlist_channel": self.youtube_meta["channel"],
            "playlist_channel_id": self.youtube_meta["channel_id"],
            "playlist_thumbnail": self.youtube_meta["thumbnails"][-1]["url"],
            "playlist_description": self.youtube_meta["description"] or False,
            "playlist_last_refresh": int(datetime.now().strftime("%s")),
        }
    def get_entries(self, playlistend=False):
        """get all videos in playlist"""
        if playlistend:
            # implement playlist end
            print(playlistend)
        all_members = []
        for idx, entry in enumerate(self.youtube_meta["entries"]):
            if self.all_youtube_ids:
                downloaded = entry["id"] in self.all_youtube_ids
            else:
                downloaded = False
            if not entry["uploader"]:
                continue
            to_append = {
                "youtube_id": entry["id"],
                "title": entry["title"],
                "uploader": entry["uploader"],
                "idx": idx,
                "downloaded": downloaded,
            }
            all_members.append(to_append)
        self.all_members = all_members
    @staticmethod
    def get_playlist_art():
        """download artwork of playlist"""
        thumbnails = ThumbManager()
        missing_playlists = thumbnails.get_missing_playlists()
        thumbnails.download_playlist(missing_playlists)
    def add_vids_to_playlist(self):
        """sync the playlist id to videos"""
        script = (
            'if (!ctx._source.containsKey("playlist")) '
            + "{ctx._source.playlist = [params.playlist]} "
            + "else if (!ctx._source.playlist.contains(params.playlist)) "
            + "{ctx._source.playlist.add(params.playlist)} "
            + "else {ctx.op = 'none'}"
        )
        bulk_list = []
        for entry in self.json_data["playlist_entries"]:
            video_id = entry["youtube_id"]
            action = {"update": {"_id": video_id, "_index": "ta_video"}}
            source = {
                "script": {
                    "source": script,
                    "lang": "painless",
                    "params": {"playlist": self.youtube_id},
                }
            }
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(source))
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        ElasticWrap("_bulk").post(query_str, ndjson=True)
    def update_playlist(self):
        """update metadata for playlist with data from YouTube"""
        self.get_from_es()
        subscribed = self.json_data["playlist_subscribed"]
        self.get_from_youtube()
        if not self.json_data:
            # return false to deactivate
            return False
        self.json_data["playlist_subscribed"] = subscribed
        self.upload_to_es()
        return True
    def build_nav(self, youtube_id):
        """find next and previous in playlist of a given youtube_id"""
        all_entries_available = self.json_data["playlist_entries"]
        all_entries = [i for i in all_entries_available if i["downloaded"]]
        current = [i for i in all_entries if i["youtube_id"] == youtube_id]
        # stop if not found or playlist of 1
        if not current or not len(all_entries) > 1:
            return
        current_idx = all_entries.index(current[0])
        if current_idx == 0:
            previous_item = False
        else:
            previous_item = all_entries[current_idx - 1]
            prev_thumb = ThumbManager().vid_thumb_path(
                previous_item["youtube_id"]
            )
            previous_item["vid_thumb"] = prev_thumb
        if current_idx == len(all_entries) - 1:
            next_item = False
        else:
            next_item = all_entries[current_idx + 1]
            next_thumb = ThumbManager().vid_thumb_path(next_item["youtube_id"])
            next_item["vid_thumb"] = next_thumb
        self.nav = {
            "playlist_meta": {
                "current_idx": current[0]["idx"],
                "playlist_id": self.youtube_id,
                "playlist_name": self.json_data["playlist_name"],
                "playlist_channel": self.json_data["playlist_channel"],
            },
            "playlist_previous": previous_item,
            "playlist_next": next_item,
        }
        return
    def delete_metadata(self):
        """delete metadata for playlist"""
        script = (
            "ctx._source.playlist.removeAll("
            + "Collections.singleton(params.playlist)) "
        )
        data = {
            "query": {
                "term": {"playlist.keyword": {"value": self.youtube_id}}
            },
            "script": {
                "source": script,
                "lang": "painless",
                "params": {"playlist": self.youtube_id},
            },
        }
        _, _ = ElasticWrap("ta_video/_update_by_query").post(data)
        self.del_in_es()
    def delete_videos_playlist(self):
        """delete playlist with all videos"""
        print(f"{self.youtube_id}: delete playlist")
        self.get_from_es()
        all_youtube_id = [
            i["youtube_id"]
            for i in self.json_data["playlist_entries"]
            if i["downloaded"]
        ]
        for youtube_id in all_youtube_id:
            YoutubeVideo(youtube_id).delete_media_file()
        self.delete_metadata()
--- a/tubearchivist/home/src/index/reindex.py
+++ b/tubearchivist/home/src/index/reindex.py
@ -0,0 +1,271 @@
 """
 functionality:
 - periodically refresh documents
 - index and update in es
 """
 import json
 from datetime import datetime
 from math import ceil
 from time import sleep
 import requests
 from home.src.download.queue import PendingList
 from home.src.download.subscriptions import ChannelSubscription
 from home.src.download.thumbnails import ThumbManager
 from home.src.index.channel import YoutubeChannel
 from home.src.index.playlist import YoutubePlaylist
 from home.src.index.video import YoutubeVideo
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import get_total_hits
 class Reindex:
    """check for outdated documents and refresh data from youtube"""
    def __init__(self):
        # config
        config = AppConfig().config
        self.sleep_interval = config["downloads"]["sleep_interval"]
        self.es_url = config["application"]["es_url"]
        self.es_auth = config["application"]["es_auth"]
        self.refresh_interval = config["scheduler"]["check_reindex_days"]
        self.integrate_ryd = config["downloads"]["integrate_ryd"]
        # scan
        self.all_youtube_ids = False
        self.all_channel_ids = False
        self.all_playlist_ids = False
    def get_daily(self):
        """get daily refresh values"""
        total_videos = get_total_hits(
            "ta_video", self.es_url, self.es_auth, "active"
        )
        video_daily = ceil(total_videos / self.refresh_interval * 1.2)
        total_channels = get_total_hits(
            "ta_channel", self.es_url, self.es_auth, "channel_active"
        )
        channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
        total_playlists = get_total_hits(
            "ta_playlist", self.es_url, self.es_auth, "playlist_active"
        )
        playlist_daily = ceil(total_playlists / self.refresh_interval * 1.2)
        return (video_daily, channel_daily, playlist_daily)
    def get_outdated_vids(self, size):
        """get daily videos to refresh"""
        headers = {"Content-type": "application/json"}
        now = int(datetime.now().strftime("%s"))
        now_lte = now - self.refresh_interval * 24 * 60 * 60
        data = {
            "size": size,
            "query": {
                "bool": {
                    "must": [
                        {"match": {"active": True}},
                        {"range": {"vid_last_refresh": {"lte": now_lte}}},
                    ]
                }
            },
            "sort": [{"vid_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_video/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
        return all_youtube_ids
    def get_unrated_vids(self):
        """get all videos without rating if ryd integration is enabled"""
        headers = {"Content-type": "application/json"}
        data = {
            "size": 200,
            "query": {
                "bool": {
                    "must_not": [{"exists": {"field": "stats.average_rating"}}]
                }
            },
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_video/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        missing_rating = [i["_id"] for i in response_dict["hits"]["hits"]]
        self.all_youtube_ids = self.all_youtube_ids + missing_rating
    def get_outdated_channels(self, size):
        """get daily channels to refresh"""
        headers = {"Content-type": "application/json"}
        now = int(datetime.now().strftime("%s"))
        now_lte = now - self.refresh_interval * 24 * 60 * 60
        data = {
            "size": size,
            "query": {
                "bool": {
                    "must": [
                        {"match": {"channel_active": True}},
                        {"range": {"channel_last_refresh": {"lte": now_lte}}},
                    ]
                }
            },
            "sort": [{"channel_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_channel/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
        return all_channel_ids
    def get_outdated_playlists(self, size):
        """get daily outdated playlists to refresh"""
        headers = {"Content-type": "application/json"}
        now = int(datetime.now().strftime("%s"))
        now_lte = now - self.refresh_interval * 24 * 60 * 60
        data = {
            "size": size,
            "query": {
                "bool": {
                    "must": [
                        {"match": {"playlist_active": True}},
                        {"range": {"playlist_last_refresh": {"lte": now_lte}}},
                    ]
                }
            },
            "sort": [{"playlist_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_playlist/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        all_playlist_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
        return all_playlist_ids
    def check_outdated(self):
        """add missing vids and channels"""
        video_daily, channel_daily, playlist_daily = self.get_daily()
        self.all_youtube_ids = self.get_outdated_vids(video_daily)
        self.all_channel_ids = self.get_outdated_channels(channel_daily)
        self.all_playlist_ids = self.get_outdated_playlists(playlist_daily)
        if self.integrate_ryd:
            self.get_unrated_vids()
    def rescrape_all_channels(self):
        """sync new data from channel to all matching videos"""
        sleep_interval = self.sleep_interval
        channel_sub_handler = ChannelSubscription()
        all_channels = channel_sub_handler.get_channels(subscribed_only=False)
        all_channel_ids = [i["channel_id"] for i in all_channels]
        for channel_id in all_channel_ids:
            channel = YoutubeChannel(channel_id)
            subscribed = channel.json_data["channel_subscribed"]
            channel.get_from_youtube()
            channel.json_data["channel_subscribed"] = subscribed
            channel.upload_to_es()
            channel.sync_to_videos()
            if sleep_interval:
                sleep(sleep_interval)
    @staticmethod
    def reindex_single_video(youtube_id):
        """refresh data for single video"""
        video = YoutubeVideo(youtube_id)
        # read current state
        video.get_from_es()
        player = video.json_data["player"]
        date_downloaded = video.json_data["date_downloaded"]
        channel_dict = video.json_data["channel"]
        playlist = video.json_data.get("playlist")
        # get new
        video.build_json()
        if not video.json_data:
            video.deactivate()
        # add back
        video.json_data["player"] = player
        video.json_data["date_downloaded"] = date_downloaded
        video.json_data["channel"] = channel_dict
        if playlist:
            video.json_data["playlist"] = playlist
        video.upload_to_es()
        thumb_handler = ThumbManager()
        thumb_handler.delete_vid_thumb(youtube_id)
        to_download = (youtube_id, video.json_data["vid_thumb_url"])
        thumb_handler.download_vid([to_download], notify=False)
    @staticmethod
    def reindex_single_channel(channel_id):
        """refresh channel data and sync to videos"""
        channel = YoutubeChannel(channel_id)
        channel.get_from_es()
        subscribed = channel.json_data["channel_subscribed"]
        channel.get_from_youtube()
        channel.json_data["channel_subscribed"] = subscribed
        channel.upload_to_es()
        channel.sync_to_videos()
    @staticmethod
    def reindex_single_playlist(playlist_id, all_indexed_ids):
        """refresh playlist data"""
        playlist = YoutubePlaylist(playlist_id)
        playlist.get_from_es()
        subscribed = playlist.json_data["playlist_subscribed"]
        playlist.all_youtube_ids = all_indexed_ids
        playlist.build_json(scrape=True)
        if not playlist.json_data:
            playlist.deactivate()
            return
        playlist.json_data["playlist_subscribed"] = subscribed
        playlist.upload_to_es()
        return
    def reindex(self):
        """reindex what's needed"""
        # videos
        print(f"reindexing {len(self.all_youtube_ids)} videos")
        for youtube_id in self.all_youtube_ids:
            self.reindex_single_video(youtube_id)
            if self.sleep_interval:
                sleep(self.sleep_interval)
        # channels
        print(f"reindexing {len(self.all_channel_ids)} channels")
        for channel_id in self.all_channel_ids:
            self.reindex_single_channel(channel_id)
            if self.sleep_interval:
                sleep(self.sleep_interval)
        # playlist
        print(f"reindexing {len(self.all_playlist_ids)} playlists")
        if self.all_playlist_ids:
            all_indexed = PendingList().get_all_indexed()
            all_indexed_ids = [i["youtube_id"] for i in all_indexed]
            for playlist_id in self.all_playlist_ids:
                self.reindex_single_playlist(playlist_id, all_indexed_ids)
                if self.sleep_interval:
                    sleep(self.sleep_interval)
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@ -0,0 +1,175 @@
 """
 functionality:
 - get metadata from youtube for a video
 - index and update in es
 """
 import os
 from datetime import datetime
 import requests
 from home.src.index import channel as ta_channel
 from home.src.index.generic import YouTubeItem
 from home.src.ta.helper import DurationConverter, clean_string
 from ryd_client import ryd_client
 class YoutubeVideo(YouTubeItem):
    """represents a single youtube video"""
    es_path = False
    index_name = "ta_video"
    yt_base = "https://www.youtube.com/watch?v="
    def __init__(self, youtube_id):
        super().__init__(youtube_id)
        self.channel_id = False
        self.es_path = f"{self.index_name}/_doc/{youtube_id}"
    def build_json(self):
        """build json dict of video"""
        self.get_from_youtube()
        if not self.youtube_meta:
            return
        self._process_youtube_meta()
        self._add_channel()
        self._add_stats()
        self.add_file_path()
        self.add_player()
        if self.config["downloads"]["integrate_ryd"]:
            self._get_ryd_stats()
        return
    def _process_youtube_meta(self):
        """extract relevant fields from youtube"""
        # extract
        self.channel_id = self.youtube_meta["channel_id"]
        upload_date = self.youtube_meta["upload_date"]
        upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
        published = upload_date_time.strftime("%Y-%m-%d")
        last_refresh = int(datetime.now().strftime("%s"))
        # build json_data basics
        self.json_data = {
            "title": self.youtube_meta["title"],
            "description": self.youtube_meta["description"],
            "category": self.youtube_meta["categories"],
            "vid_thumb_url": self.youtube_meta["thumbnail"],
            "tags": self.youtube_meta["tags"],
            "published": published,
            "vid_last_refresh": last_refresh,
            "date_downloaded": last_refresh,
            "youtube_id": self.youtube_id,
            "active": True,
        }
    def _add_channel(self):
        """add channel dict to video json_data"""
        channel = ta_channel.YoutubeChannel(self.channel_id)
        channel.build_json(upload=True)
        self.json_data.update({"channel": channel.json_data})
    def _add_stats(self):
        """add stats dicst to json_data"""
        # likes
        like_count = self.youtube_meta.get("like_count", 0)
        dislike_count = self.youtube_meta.get("dislike_count", 0)
        self.json_data.update(
            {
                "stats": {
                    "view_count": self.youtube_meta["view_count"],
                    "like_count": like_count,
                    "dislike_count": dislike_count,
                    "average_rating": self.youtube_meta["average_rating"],
                }
            }
        )
    def build_dl_cache_path(self):
        """find video path in dl cache"""
        cache_dir = self.app_conf["cache_dir"]
        cache_path = f"{cache_dir}/download/"
        all_cached = os.listdir(cache_path)
        for file_cached in all_cached:
            if self.youtube_id in file_cached:
                vid_path = os.path.join(cache_path, file_cached)
                return vid_path
        return False
    def add_player(self):
        """add player information for new videos"""
        try:
            # when indexing from download task
            vid_path = self.build_dl_cache_path()
        except FileNotFoundError:
            # when reindexing
            base = self.app_conf["videos"]
            vid_path = os.path.join(base, self.json_data["media_url"])
        duration_handler = DurationConverter()
        duration = duration_handler.get_sec(vid_path)
        duration_str = duration_handler.get_str(duration)
        self.json_data.update(
            {
                "player": {
                    "watched": False,
                    "duration": duration,
                    "duration_str": duration_str,
                }
            }
        )
    def add_file_path(self):
        """build media_url for where file will be located"""
        channel_name = self.json_data["channel"]["channel_name"]
        clean_channel_name = clean_string(channel_name)
        timestamp = self.json_data["published"].replace("-", "")
        youtube_id = self.json_data["youtube_id"]
        title = self.json_data["title"]
        clean_title = clean_string(title)
        filename = f"{timestamp}_{youtube_id}_{clean_title}.mp4"
        media_url = os.path.join(clean_channel_name, filename)
        self.json_data["media_url"] = media_url
    def delete_media_file(self):
        """delete video file, meta data"""
        self.get_from_es()
        video_base = self.app_conf["videos"]
        media_url = self.json_data["media_url"]
        print(f"{self.youtube_id}: delete {media_url} from file system")
        to_delete = os.path.join(video_base, media_url)
        os.remove(to_delete)
        self.del_in_es()
    def _get_ryd_stats(self):
        """get optional stats from returnyoutubedislikeapi.com"""
        try:
            print(f"{self.youtube_id}: get ryd stats")
            result = ryd_client.get(self.youtube_id)
        except requests.exceptions.ConnectionError:
            print(f"{self.youtube_id}: failed to query ryd api, skipping")
            return False
        if result["status"] == 404:
            return False
        dislikes = {
            "dislike_count": result["dislikes"],
            "average_rating": result["rating"],
        }
        self.json_data["stats"].update(dislikes)
        return True
 def index_new_video(youtube_id):
    """combined classes to create new video in index"""
    video = YoutubeVideo(youtube_id)
    video.build_json()
    if not video.json_data:
        raise ValueError("failed to get metadata for " + youtube_id)
    video.upload_to_es()
    return video.json_data
--- a/tubearchivist/home/src/reindex.py
+++ b/tubearchivist/home/src/reindex.py
@ -1,600 +0,0 @@
 """
 Functionality:
 - reindexing old documents
 - syncing updated values between indexes
 - scan the filesystem to delete or index
 """
 import json
 import os
 import re
 import shutil
 import subprocess
 from datetime import datetime
 from math import ceil
 from time import sleep
 import requests
 from home.src.config import AppConfig
 from home.src.download import ChannelSubscription, PendingList, VideoDownloader
 from home.src.helper import (
    RedisArchivist,
    clean_string,
    get_total_hits,
    ignore_filelist,
 )
 from home.src.index import (
    YoutubeChannel,
    YoutubePlaylist,
    YoutubeVideo,
    index_new_video,
 )
 from home.src.thumbnails import ThumbManager
 class Reindex:
    """check for outdated documents and refresh data from youtube"""
    def __init__(self):
        # config
        config = AppConfig().config
        self.sleep_interval = config["downloads"]["sleep_interval"]
        self.es_url = config["application"]["es_url"]
        self.es_auth = config["application"]["es_auth"]
        self.refresh_interval = config["scheduler"]["check_reindex_days"]
        self.integrate_ryd = config["downloads"]["integrate_ryd"]
        # scan
        self.all_youtube_ids = False
        self.all_channel_ids = False
        self.all_playlist_ids = False
    def get_daily(self):
        """get daily refresh values"""
        total_videos = get_total_hits(
            "ta_video", self.es_url, self.es_auth, "active"
        )
        video_daily = ceil(total_videos / self.refresh_interval * 1.2)
        total_channels = get_total_hits(
            "ta_channel", self.es_url, self.es_auth, "channel_active"
        )
        channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
        total_playlists = get_total_hits(
            "ta_playlist", self.es_url, self.es_auth, "playlist_active"
        )
        playlist_daily = ceil(total_playlists / self.refresh_interval * 1.2)
        return (video_daily, channel_daily, playlist_daily)
    def get_outdated_vids(self, size):
        """get daily videos to refresh"""
        headers = {"Content-type": "application/json"}
        now = int(datetime.now().strftime("%s"))
        now_lte = now - self.refresh_interval * 24 * 60 * 60
        data = {
            "size": size,
            "query": {
                "bool": {
                    "must": [
                        {"match": {"active": True}},
                        {"range": {"vid_last_refresh": {"lte": now_lte}}},
                    ]
                }
            },
            "sort": [{"vid_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_video/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
        return all_youtube_ids
    def get_unrated_vids(self):
        """get all videos without rating if ryd integration is enabled"""
        headers = {"Content-type": "application/json"}
        data = {
            "size": 200,
            "query": {
                "bool": {
                    "must_not": [{"exists": {"field": "stats.average_rating"}}]
                }
            },
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_video/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        missing_rating = [i["_id"] for i in response_dict["hits"]["hits"]]
        self.all_youtube_ids = self.all_youtube_ids + missing_rating
    def get_outdated_channels(self, size):
        """get daily channels to refresh"""
        headers = {"Content-type": "application/json"}
        now = int(datetime.now().strftime("%s"))
        now_lte = now - self.refresh_interval * 24 * 60 * 60
        data = {
            "size": size,
            "query": {
                "bool": {
                    "must": [
                        {"match": {"channel_active": True}},
                        {"range": {"channel_last_refresh": {"lte": now_lte}}},
                    ]
                }
            },
            "sort": [{"channel_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_channel/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
        return all_channel_ids
    def get_outdated_playlists(self, size):
        """get daily outdated playlists to refresh"""
        headers = {"Content-type": "application/json"}
        now = int(datetime.now().strftime("%s"))
        now_lte = now - self.refresh_interval * 24 * 60 * 60
        data = {
            "size": size,
            "query": {
                "bool": {
                    "must": [
                        {"match": {"playlist_active": True}},
                        {"range": {"playlist_last_refresh": {"lte": now_lte}}},
                    ]
                }
            },
            "sort": [{"playlist_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        query_str = json.dumps(data)
        url = self.es_url + "/ta_playlist/_search"
        response = requests.get(
            url, data=query_str, headers=headers, auth=self.es_auth
        )
        if not response.ok:
            print(response.text)
        response_dict = json.loads(response.text)
        all_playlist_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
        return all_playlist_ids
    def check_outdated(self):
        """add missing vids and channels"""
        video_daily, channel_daily, playlist_daily = self.get_daily()
        self.all_youtube_ids = self.get_outdated_vids(video_daily)
        self.all_channel_ids = self.get_outdated_channels(channel_daily)
        self.all_playlist_ids = self.get_outdated_playlists(playlist_daily)
        if self.integrate_ryd:
            self.get_unrated_vids()
    def rescrape_all_channels(self):
        """sync new data from channel to all matching videos"""
        sleep_interval = self.sleep_interval
        channel_sub_handler = ChannelSubscription()
        all_channels = channel_sub_handler.get_channels(subscribed_only=False)
        all_channel_ids = [i["channel_id"] for i in all_channels]
        counter = 1
        for channel_id in all_channel_ids:
            channel_index = YoutubeChannel(channel_id)
            subscribed = channel_index.channel_dict["channel_subscribed"]
            channel_index.channel_dict = channel_index.build_channel_dict(
                scrape=True
            )
            channel_index.channel_dict["channel_subscribed"] = subscribed
            channel_index.upload_to_es()
            channel_index.sync_to_videos()
            counter = counter + 1
            if sleep_interval:
                sleep(sleep_interval)
    @staticmethod
    def reindex_single_video(youtube_id):
        """refresh data for single video"""
        vid_handler = YoutubeVideo(youtube_id)
        vid_handler.get_vid_dict()
        if not vid_handler.vid_dict:
            # stop if deactivated
            vid_handler.deactivate()
            return
        es_vid_dict = vid_handler.get_es_data()
        player = es_vid_dict["_source"]["player"]
        date_downloaded = es_vid_dict["_source"]["date_downloaded"]
        channel_dict = es_vid_dict["_source"]["channel"]
        channel_name = channel_dict["channel_name"]
        try:
            playlist = es_vid_dict["_source"]["playlist"]
        except KeyError:
            playlist = False
        vid_handler.build_file_path(channel_name)
        # add to vid_dict
        vid_handler.vid_dict["player"] = player
        vid_handler.vid_dict["date_downloaded"] = date_downloaded
        vid_handler.vid_dict["channel"] = channel_dict
        if playlist:
            vid_handler.vid_dict["playlist"] = playlist
        # update
        vid_handler.upload_to_es()
        thumb_handler = ThumbManager()
        thumb_handler.delete_vid_thumb(youtube_id)
        to_download = (youtube_id, vid_handler.vid_dict["vid_thumb_url"])
        thumb_handler.download_vid([to_download], notify=False)
    @staticmethod
    def reindex_single_channel(channel_id):
        """refresh channel data and sync to videos"""
        channel_handler = YoutubeChannel(channel_id)
        subscribed = channel_handler.channel_dict["channel_subscribed"]
        channel_handler.channel_dict = channel_handler.build_channel_dict(
            scrape=True
        )
        channel_handler.channel_dict["channel_subscribed"] = subscribed
        # update
        channel_handler.upload_to_es()
        channel_handler.sync_to_videos()
        thumb_handler = ThumbManager()
        thumb_handler.delete_chan_thumb(channel_id)
        channel_thumb = channel_handler.channel_dict["channel_thumb_url"]
        channel_banner = channel_handler.channel_dict["channel_banner_url"]
        to_download = (channel_id, channel_thumb, channel_banner)
        thumb_handler.download_chan([to_download])
    @staticmethod
    def reindex_single_playlist(playlist_id, all_indexed_ids):
        """refresh playlist data"""
        playlist_handler = YoutubePlaylist(
            playlist_id, all_youtube_ids=all_indexed_ids
        )
        playlist = playlist_handler.update_playlist()
        if not playlist:
            playlist_handler.deactivate()
            return
        playlist_thumbnail = (playlist_id, playlist["playlist_thumbnail"])
        thumb_handler = ThumbManager()
        thumb_handler.download_playlist([playlist_thumbnail])
        return
    def reindex(self):
        """reindex what's needed"""
        # videos
        print(f"reindexing {len(self.all_youtube_ids)} videos")
        for youtube_id in self.all_youtube_ids:
            self.reindex_single_video(youtube_id)
            if self.sleep_interval:
                sleep(self.sleep_interval)
        # channels
        print(f"reindexing {len(self.all_channel_ids)} channels")
        for channel_id in self.all_channel_ids:
            self.reindex_single_channel(channel_id)
            if self.sleep_interval:
                sleep(self.sleep_interval)
        # playlist
        print(f"reindexing {len(self.all_playlist_ids)} playlists")
        if self.all_playlist_ids:
            all_indexed = PendingList().get_all_indexed()
            all_indexed_ids = [i["youtube_id"] for i in all_indexed]
            for playlist_id in self.all_playlist_ids:
                self.reindex_single_playlist(playlist_id, all_indexed_ids)
                if self.sleep_interval:
                    sleep(self.sleep_interval)
 class FilesystemScanner:
    """handle scanning and fixing from filesystem"""
    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    ES_AUTH = CONFIG["application"]["es_auth"]
    VIDEOS = CONFIG["application"]["videos"]
    def __init__(self):
        self.all_downloaded = self.get_all_downloaded()
        self.all_indexed = self.get_all_indexed()
        self.mismatch = None
        self.to_rename = None
        self.to_index = None
        self.to_delete = None
    def get_all_downloaded(self):
        """get a list of all video files downloaded"""
        channels = os.listdir(self.VIDEOS)
        all_channels = ignore_filelist(channels)
        all_channels.sort()
        all_downloaded = []
        for channel_name in all_channels:
            channel_path = os.path.join(self.VIDEOS, channel_name)
            videos = os.listdir(channel_path)
            all_videos = ignore_filelist(videos)
            for video in all_videos:
                youtube_id = video[9:20]
                all_downloaded.append((channel_name, video, youtube_id))
        return all_downloaded
    @staticmethod
    def get_all_indexed():
        """get a list of all indexed videos"""
        index_handler = PendingList()
        all_indexed_raw = index_handler.get_all_indexed()
        all_indexed = []
        for video in all_indexed_raw:
            youtube_id = video["youtube_id"]
            media_url = video["media_url"]
            published = video["published"]
            title = video["title"]
            all_indexed.append((youtube_id, media_url, published, title))
        return all_indexed
    def list_comarison(self):
        """compare the lists to figure out what to do"""
        self.find_unindexed()
        self.find_missing()
        self.find_bad_media_url()
    def find_unindexed(self):
        """find video files without a matching document indexed"""
        all_indexed_ids = [i[0] for i in self.all_indexed]
        to_index = []
        for downloaded in self.all_downloaded:
            if downloaded[2] not in all_indexed_ids:
                to_index.append(downloaded)
        self.to_index = to_index
    def find_missing(self):
        """find indexed videos without matching media file"""
        all_downloaded_ids = [i[2] for i in self.all_downloaded]
        to_delete = []
        for video in self.all_indexed:
            youtube_id = video[0]
            if youtube_id not in all_downloaded_ids:
                to_delete.append(video)
        self.to_delete = to_delete
    def find_bad_media_url(self):
        """rename media files not matching the indexed title"""
        to_fix = []
        to_rename = []
        for downloaded in self.all_downloaded:
            channel, filename, downloaded_id = downloaded
            # find in indexed
            for indexed in self.all_indexed:
                indexed_id, media_url, published, title = indexed
                if indexed_id == downloaded_id:
                    # found it
                    title_c = clean_string(title)
                    pub = published.replace("-", "")
                    expected_filename = f"{pub}_{indexed_id}_{title_c}.mp4"
                    new_url = os.path.join(channel, expected_filename)
                    if expected_filename != filename:
                        # file to rename
                        to_rename.append(
                            (channel, filename, expected_filename)
                        )
                    if media_url != new_url:
                        # media_url to update in es
                        to_fix.append((indexed_id, new_url))
                    break
        self.mismatch = to_fix
        self.to_rename = to_rename
    def rename_files(self):
        """rename media files as identified by find_bad_media_url"""
        for bad_filename in self.to_rename:
            channel, filename, expected_filename = bad_filename
            print(f"renaming [{filename}] to [{expected_filename}]")
            old_path = os.path.join(self.VIDEOS, channel, filename)
            new_path = os.path.join(self.VIDEOS, channel, expected_filename)
            os.rename(old_path, new_path)
    def send_mismatch_bulk(self):
        """build bulk update"""
        bulk_list = []
        for video_mismatch in self.mismatch:
            youtube_id, media_url = video_mismatch
            print(f"{youtube_id}: fixing media url {media_url}")
            action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
            source = {"doc": {"media_url": media_url}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(source))
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
        # make the call
        headers = {"Content-type": "application/x-ndjson"}
        url = self.ES_URL + "/_bulk"
        request = requests.post(
            url, data=query_str, headers=headers, auth=self.ES_AUTH
        )
        if not request.ok:
            print(request.text)
    def delete_from_index(self):
        """find indexed but deleted mediafile"""
        for indexed in self.to_delete:
            youtube_id = indexed[0]
            print(f"deleting {youtube_id} from index")
            url = self.ES_URL + "/ta_video/_doc/" + youtube_id
            request = requests.delete(url, auth=self.ES_AUTH)
            if not request.ok:
                print(request.text)
 class ManualImport:
    """import and indexing existing video files"""
    CONFIG = AppConfig().config
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    IMPORT_DIR = os.path.join(CACHE_DIR, "import")
    def __init__(self):
        self.identified = self.import_folder_parser()
    def import_folder_parser(self):
        """detect files in import folder"""
        import_files = os.listdir(self.IMPORT_DIR)
        to_import = ignore_filelist(import_files)
        to_import.sort()
        video_files = [i for i in to_import if not i.endswith(".json")]
        identified = []
        for file_path in video_files:
            file_dict = {"video_file": file_path}
            file_name, _ = os.path.splitext(file_path)
            matching_json = [
                i
                for i in to_import
                if i.startswith(file_name) and i.endswith(".json")
            ]
            if matching_json:
                json_file = matching_json[0]
                youtube_id = self.extract_id_from_json(json_file)
                file_dict.update({"json_file": json_file})
            else:
                youtube_id = self.extract_id_from_filename(file_name)
                file_dict.update({"json_file": False})
            file_dict.update({"youtube_id": youtube_id})
            identified.append(file_dict)
        return identified
    @staticmethod
    def extract_id_from_filename(file_name):
        """
        look at the file name for the youtube id
        expects filename ending in [<youtube_id>].<ext>
        """
        id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
        if id_search:
            youtube_id = id_search.group(1)
            return youtube_id
        print("failed to extract youtube id for: " + file_name)
        raise Exception
    def extract_id_from_json(self, json_file):
        """open json file and extract id"""
        json_path = os.path.join(self.CACHE_DIR, "import", json_file)
        with open(json_path, "r", encoding="utf-8") as f:
            json_content = f.read()
        youtube_id = json.loads(json_content)["id"]
        return youtube_id
    def process_import(self):
        """go through identified media files"""
        all_videos_added = []
        for media_file in self.identified:
            json_file = media_file["json_file"]
            video_file = media_file["video_file"]
            youtube_id = media_file["youtube_id"]
            video_path = os.path.join(self.CACHE_DIR, "import", video_file)
            self.move_to_cache(video_path, youtube_id)
            # identify and archive
            vid_dict = index_new_video(youtube_id)
            VideoDownloader([youtube_id]).move_to_archive(vid_dict)
            youtube_id = vid_dict["youtube_id"]
            thumb_url = vid_dict["vid_thumb_url"]
            all_videos_added.append((youtube_id, thumb_url))
            # cleanup
            if os.path.exists(video_path):
                os.remove(video_path)
            if json_file:
                json_path = os.path.join(self.CACHE_DIR, "import", json_file)
                os.remove(json_path)
        return all_videos_added
    def move_to_cache(self, video_path, youtube_id):
        """move identified video file to cache, convert to mp4"""
        file_name = os.path.split(video_path)[-1]
        video_file, ext = os.path.splitext(file_name)
        # make sure youtube_id is in filename
        if youtube_id not in video_file:
            video_file = f"{video_file}_{youtube_id}"
        # move, convert if needed
        if ext == ".mp4":
            new_file = video_file + ext
            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
            shutil.move(video_path, dest_path)
        else:
            print(f"processing with ffmpeg: {video_file}")
            new_file = video_file + ".mp4"
            dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
            subprocess.run(
                [
                    "ffmpeg",
                    "-i",
                    video_path,
                    dest_path,
                    "-loglevel",
                    "warning",
                    "-stats",
                ],
                check=True,
            )
 def scan_filesystem():
    """grouped function to delete and update index"""
    filesystem_handler = FilesystemScanner()
    filesystem_handler.list_comarison()
    if filesystem_handler.to_rename:
        print("renaming files")
        filesystem_handler.rename_files()
    if filesystem_handler.mismatch:
        print("fixing media urls in index")
        filesystem_handler.send_mismatch_bulk()
    if filesystem_handler.to_delete:
        print("delete metadata from index")
        filesystem_handler.delete_from_index()
    if filesystem_handler.to_index:
        print("index new videos")
        for missing_vid in filesystem_handler.to_index:
            youtube_id = missing_vid[2]
            index_new_video(youtube_id, missing_vid=missing_vid)
 def reindex_old_documents():
    """daily refresh of old documents"""
    # continue if needed
    reindex_handler = Reindex()
    reindex_handler.check_outdated()
    reindex_handler.reindex()
    # set timestamp
    now = int(datetime.now().strftime("%s"))
    RedisArchivist().set_message("last_reindex", now, expire=False)
--- a/tubearchivist/home/src/ta/init.py
+++ b/tubearchivist/home/src/ta/init.py
--- a/tubearchivist/home/src/ta/config.py
+++ b/tubearchivist/home/src/ta/config.py
@ -2,7 +2,6 @@
 Functionality:
 - read and write config
 - load config variables into redis
 - needs to be a separate module to avoid circular import
 """
 import json
@ -10,7 +9,7 @@ import os
 import re
 from celery.schedules import crontab
-from home.src.helper import RedisArchivist
+from home.src.ta.ta_redis import RedisArchivist
 class AppConfig:
@ -39,8 +38,7 @@ class AppConfig:
    def get_config_file(self):
        """read the defaults from config.json"""
        with open("home/config.json", "r", encoding="utf-8") as f:
-            config_str = f.read()
+            config_file = json.load(f)
            config_file = json.loads(config_str)
        config_file["application"].update(self.get_config_env())
--- a/tubearchivist/home/src/ta/helper.py
+++ b/tubearchivist/home/src/ta/helper.py
@ -4,14 +4,12 @@ Loose collection of helper functions
 """
 import json
 import os
 import re
 import string
 import subprocess
 import unicodedata
 from urllib.parse import parse_qs, urlparse
 import redis
 import requests
 import yt_dlp
@ -149,153 +147,6 @@ class UrlListParser:
        return channel_id
 class RedisArchivist:
    """collection of methods to interact with redis"""
    REDIS_HOST = os.environ.get("REDIS_HOST")
    REDIS_PORT = os.environ.get("REDIS_PORT") or 6379
    NAME_SPACE = "ta:"
    CHANNELS = [
        "download",
        "add",
        "rescan",
        "subchannel",
        "subplaylist",
        "playlistscan",
        "setting",
    ]
    def __init__(self):
        self.redis_connection = redis.Redis(
            host=self.REDIS_HOST, port=self.REDIS_PORT
        )
    def set_message(self, key, message, expire=True):
        """write new message to redis"""
        self.redis_connection.execute_command(
            "JSON.SET", self.NAME_SPACE + key, ".", json.dumps(message)
        )
        if expire:
            if isinstance(expire, bool):
                secs = 20
            else:
                secs = expire
            self.redis_connection.execute_command(
                "EXPIRE", self.NAME_SPACE + key, secs
            )
    def get_message(self, key):
        """get message dict from redis"""
        reply = self.redis_connection.execute_command(
            "JSON.GET", self.NAME_SPACE + key
        )
        if reply:
            json_str = json.loads(reply)
        else:
            json_str = {"status": False}
        return json_str
    def del_message(self, key):
        """delete key from redis"""
        response = self.redis_connection.execute_command(
            "DEL", self.NAME_SPACE + key
        )
        return response
    def get_lock(self, lock_key):
        """handle lock for task management"""
        redis_lock = self.redis_connection.lock(self.NAME_SPACE + lock_key)
        return redis_lock
    def get_progress(self):
        """get a list of all progress messages"""
        all_messages = []
        for channel in self.CHANNELS:
            key = "message:" + channel
            reply = self.redis_connection.execute_command(
                "JSON.GET", self.NAME_SPACE + key
            )
            if reply:
                json_str = json.loads(reply)
                all_messages.append(json_str)
        return all_messages
    @staticmethod
    def monitor_cache_dir(cache_dir):
        """
        look at download cache dir directly as alternative progress info
        """
        dl_cache = os.path.join(cache_dir, "download")
        all_cache_file = os.listdir(dl_cache)
        cache_file = ignore_filelist(all_cache_file)
        if cache_file:
            filename = cache_file[0][12:].replace("_", " ").split(".")[0]
            mess_dict = {
                "status": "message:download",
                "level": "info",
                "title": "Downloading: " + filename,
                "message": "",
            }
        else:
            return False
        return mess_dict
 class RedisQueue:
    """dynamically interact with the download queue in redis"""
    REDIS_HOST = os.environ.get("REDIS_HOST")
    REDIS_PORT = os.environ.get("REDIS_PORT")
    NAME_SPACE = "ta:"
    if not REDIS_PORT:
        REDIS_PORT = 6379
    def __init__(self, key):
        self.key = self.NAME_SPACE + key
        self.conn = redis.Redis(host=self.REDIS_HOST, port=self.REDIS_PORT)
    def get_all(self):
        """return all elements in list"""
        result = self.conn.execute_command("LRANGE", self.key, 0, -1)
        all_elements = [i.decode() for i in result]
        return all_elements
    def add_list(self, to_add):
        """add list to queue"""
        self.conn.execute_command("RPUSH", self.key, *to_add)
    def add_priority(self, to_add):
        """add single video to front of queue"""
        self.clear_item(to_add)
        self.conn.execute_command("LPUSH", self.key, to_add)
    def get_next(self):
        """return next element in the queue, False if none"""
        result = self.conn.execute_command("LPOP", self.key)
        if not result:
            return False
        next_element = result.decode()
        return next_element
    def clear(self):
        """delete list from redis"""
        self.conn.execute_command("DEL", self.key)
    def clear_item(self, to_clear):
        """remove single item from list if it's there"""
        self.conn.execute_command("LREM", self.key, 0, to_clear)
    def trim(self, size):
        """trim the queue based on settings amount"""
        self.conn.execute_command("LTRIM", self.key, 0, size)
 class DurationConverter:
    """
    using ffmpeg to get and parse duration from filepath
--- a/tubearchivist/home/src/ta/ta_redis.py
+++ b/tubearchivist/home/src/ta/ta_redis.py
@ -0,0 +1,158 @@
 """
 functionality:
 - interact with redis
 - hold temporary download queue in redis
 """
 import json
 import os
 import redis
 from home.src.ta.helper import ignore_filelist
 class RedisArchivist:
    """collection of methods to interact with redis"""
    REDIS_HOST = os.environ.get("REDIS_HOST")
    REDIS_PORT = os.environ.get("REDIS_PORT") or 6379
    NAME_SPACE = "ta:"
    CHANNELS = [
        "download",
        "add",
        "rescan",
        "subchannel",
        "subplaylist",
        "playlistscan",
        "setting",
    ]
    def __init__(self):
        self.redis_connection = redis.Redis(
            host=self.REDIS_HOST, port=self.REDIS_PORT
        )
    def set_message(self, key, message, expire=True):
        """write new message to redis"""
        self.redis_connection.execute_command(
            "JSON.SET", self.NAME_SPACE + key, ".", json.dumps(message)
        )
        if expire:
            if isinstance(expire, bool):
                secs = 20
            else:
                secs = expire
            self.redis_connection.execute_command(
                "EXPIRE", self.NAME_SPACE + key, secs
            )
    def get_message(self, key):
        """get message dict from redis"""
        reply = self.redis_connection.execute_command(
            "JSON.GET", self.NAME_SPACE + key
        )
        if reply:
            json_str = json.loads(reply)
        else:
            json_str = {"status": False}
        return json_str
    def del_message(self, key):
        """delete key from redis"""
        response = self.redis_connection.execute_command(
            "DEL", self.NAME_SPACE + key
        )
        return response
    def get_lock(self, lock_key):
        """handle lock for task management"""
        redis_lock = self.redis_connection.lock(self.NAME_SPACE + lock_key)
        return redis_lock
    def get_progress(self):
        """get a list of all progress messages"""
        all_messages = []
        for channel in self.CHANNELS:
            key = "message:" + channel
            reply = self.redis_connection.execute_command(
                "JSON.GET", self.NAME_SPACE + key
            )
            if reply:
                json_str = json.loads(reply)
                all_messages.append(json_str)
        return all_messages
    @staticmethod
    def monitor_cache_dir(cache_dir):
        """
        look at download cache dir directly as alternative progress info
        """
        dl_cache = os.path.join(cache_dir, "download")
        all_cache_file = os.listdir(dl_cache)
        cache_file = ignore_filelist(all_cache_file)
        if cache_file:
            filename = cache_file[0][12:].replace("_", " ").split(".")[0]
            mess_dict = {
                "status": "message:download",
                "level": "info",
                "title": "Downloading: " + filename,
                "message": "",
            }
        else:
            return False
        return mess_dict
 class RedisQueue:
    """dynamically interact with the download queue in redis"""
    REDIS_HOST = os.environ.get("REDIS_HOST")
    REDIS_PORT = os.environ.get("REDIS_PORT")
    NAME_SPACE = "ta:"
    if not REDIS_PORT:
        REDIS_PORT = 6379
    def __init__(self, key):
        self.key = self.NAME_SPACE + key
        self.conn = redis.Redis(host=self.REDIS_HOST, port=self.REDIS_PORT)
    def get_all(self):
        """return all elements in list"""
        result = self.conn.execute_command("LRANGE", self.key, 0, -1)
        all_elements = [i.decode() for i in result]
        return all_elements
    def add_list(self, to_add):
        """add list to queue"""
        self.conn.execute_command("RPUSH", self.key, *to_add)
    def add_priority(self, to_add):
        """add single video to front of queue"""
        self.clear_item(to_add)
        self.conn.execute_command("LPUSH", self.key, to_add)
    def get_next(self):
        """return next element in the queue, False if none"""
        result = self.conn.execute_command("LPOP", self.key)
        if not result:
            return False
        next_element = result.decode()
        return next_element
    def clear(self):
        """delete list from redis"""
        self.conn.execute_command("DEL", self.key)
    def clear_item(self, to_clear):
        """remove single item from list if it's there"""
        self.conn.execute_command("LREM", self.key, 0, to_clear)
    def trim(self, size):
        """trim the queue based on settings amount"""
        self.conn.execute_command("LTRIM", self.key, 0, size)
--- a/tubearchivist/home/tasks.py
+++ b/tubearchivist/home/tasks.py
@ -10,22 +10,24 @@ import os
 import home.apps as startup_apps
 from celery import Celery, shared_task
-from home.src.config import AppConfig, ScheduleBuilder
+from home.src.download.queue import PendingList
-from home.src.download import (
+from home.src.download.subscriptions import (
    ChannelSubscription,
    PendingList,
    PlaylistSubscription,
    VideoDownloader,
 )
-from home.src.helper import RedisArchivist, RedisQueue, UrlListParser
+from home.src.download.thumbnails import ThumbManager, validate_thumbnails
-from home.src.index import YoutubeChannel, YoutubePlaylist
+from home.src.download.yt_dlp_handler import VideoDownloader
-from home.src.index_management import backup_all_indexes, restore_from_backup
+from home.src.es.index_setup import backup_all_indexes, restore_from_backup
-from home.src.reindex import (
+from home.src.index.channel import YoutubeChannel
 from home.src.index.filesystem import (
    ManualImport,
    reindex_old_documents,
    scan_filesystem,
 )
-from home.src.thumbnails import ThumbManager, validate_thumbnails
+from home.src.index.playlist import YoutubePlaylist
 from home.src.ta.config import AppConfig, ScheduleBuilder
 from home.src.ta.helper import UrlListParser
 from home.src.ta.ta_redis import RedisArchivist, RedisQueue
 CONFIG = AppConfig().config
 REDIS_HOST = os.environ.get("REDIS_HOST")
@ -266,17 +268,16 @@ def subscribe_to(url_str):
@shared_task
 def index_channel_playlists(channel_id):
    """add all playlists of channel to index"""
-    channel_handler = YoutubeChannel(channel_id)
+    channel = YoutubeChannel(channel_id)
    channel_name = channel_handler.channel_dict["channel_name"]
    # notify
    mess_dict = {
        "status": "message:playlistscan",
        "level": "info",
        "title": "Looking for playlists",
-        "message": f'Scanning channel "{channel_name}" in progress',
+        "message": f'Scanning channel "{channel.youtube_id}" in progress',
    }
    RedisArchivist().set_message("message:playlistscan", mess_dict)
-    all_playlists = channel_handler.get_all_playlists()
+    all_playlists = channel.get_all_playlists()
    if not all_playlists:
        print(f"no playlists found for channel {channel_id}")
@ -295,28 +296,29 @@ def index_channel_playlists(channel_id):
        }
        RedisArchivist().set_message("message:playlistscan", mess_dict)
        print("add playlist: " + playlist_title)
-        playlist_handler = YoutubePlaylist(
+
-            playlist_id, all_youtube_ids=all_youtube_ids
+        playlist = YoutubePlaylist(playlist_id)
-        )
+        playlist.all_youtube_ids = all_youtube_ids
-        playlist_handler.get_playlist_dict()
+        playlist.build_json()
-        if not playlist_handler.playlist_dict:
+
        if not playlist.json_data:
            # skip if not available
            continue
        # don't add if no videos downloaded
        downloaded = [
            i
-            for i in playlist_handler.playlist_dict["playlist_entries"]
+            for i in playlist.json_data["playlist_entries"]
            if i["downloaded"]
        ]
        if not downloaded:
            continue
-        playlist_handler.upload_to_es()
+
-        playlist_handler.add_vids_to_playlist()
+        playlist.upload_to_es()
        playlist.add_vids_to_playlist()
    if all_playlists:
-        handler = ThumbManager()
+        playlist.get_playlist_art()
        missing_playlists = handler.get_missing_playlists()
        handler.download_playlist(missing_playlists)
    return
--- a/tubearchivist/home/templates/home/base.html
+++ b/tubearchivist/home/templates/home/base.html
@ -69,7 +69,7 @@
                        <img src="{% static 'img/icon-gear.svg' %}" alt="gear-icon" title="Settings">
                    </a>
                    <a href="{% url 'logout' %}">
-                        <img src="{% static 'img/icon-exit.svg' %}" alt="exit-icon" title="Logout">
+                        <img class="alert-hover" src="{% static 'img/icon-exit.svg' %}" alt="exit-icon" title="Logout">
                    </a>
                </div>
            </div>
--- a/tubearchivist/home/views.py
+++ b/tubearchivist/home/views.py
@ -1,7 +1,7 @@
 """
 Functionality:
 - all views for home app
- process post data received from frontend via ajax
+- holds base classes to inherit from
 """
 import json
@ -14,7 +14,9 @@ from django.contrib.auth.forms import AuthenticationForm
 from django.http import JsonResponse
 from django.shortcuts import redirect, render
 from django.views import View
-from home.forms import (
+from home.src.es.index_setup import get_available_backups
 from home.src.frontend.api_calls import PostData
 from home.src.frontend.forms import (
    AddToQueueForm,
    ApplicationSettingsForm,
    CustomAuthForm,
@ -24,12 +26,12 @@ from home.forms import (
    SubscribeToPlaylistForm,
    UserSettingsForm,
 )
-from home.src.config import AppConfig, ScheduleBuilder
+from home.src.frontend.searching import SearchHandler
-from home.src.frontend import PostData
+from home.src.index.generic import Pagination
-from home.src.helper import RedisArchivist, UrlListParser
+from home.src.index.playlist import YoutubePlaylist
-from home.src.index import YoutubePlaylist
+from home.src.ta.config import AppConfig, ScheduleBuilder
-from home.src.index_management import get_available_backups
+from home.src.ta.helper import UrlListParser
-from home.src.searching import Pagination, SearchHandler
+from home.src.ta.ta_redis import RedisArchivist
 from home.tasks import extrac_dl, subscribe_to
 from rest_framework.authtoken.models import Token
@ -169,8 +171,7 @@ class ArchivistResultsView(ArchivistViewConfig):
    def single_lookup(self, es_path):
        """retrieve a single item from url"""
-        es_url = self.default_conf["application"]["es_url"]
+        search = SearchHandler(es_path, config=self.default_conf)
        search = SearchHandler(f"{es_url}/{es_path}", data=False)
        result = search.get_data()[0]["source"]
        return result
@ -189,8 +190,9 @@ class ArchivistResultsView(ArchivistViewConfig):
    def find_results(self):
        """add results and pagination to context"""
-        url = self.default_conf["application"]["es_url"] + self.es_search
+        search = SearchHandler(
-        search = SearchHandler(url, self.data)
+            self.es_search, config=self.default_conf, data=self.data
        )
        self.context["results"] = search.get_data()
        self.pagination_handler.validate(search.max_hits)
        self.context["max_hits"] = search.max_hits
@ -203,7 +205,7 @@ class HomeView(ArchivistResultsView):
    """
    view_origin = "home"
-    es_search = "/ta_video/_search"
+    es_search = "ta_video/_search"
    def get(self, request):
        """handle get requests"""
@ -284,7 +286,7 @@ class DownloadView(ArchivistResultsView):
    """
    view_origin = "downloads"
-    es_search = "/ta_download/_search"
+    es_search = "ta_download/_search"
    def get(self, request):
        """handle get request"""
@ -346,7 +348,7 @@ class ChannelIdView(ArchivistResultsView):
    """
    view_origin = "home"
-    es_search = "/ta_video/_search"
+    es_search = "ta_video/_search"
    def get(self, request, channel_id):
        """get request"""
@ -395,7 +397,7 @@ class ChannelView(ArchivistResultsView):
    """
    view_origin = "channel"
-    es_search = "/ta_channel/_search"
+    es_search = "ta_channel/_search"
    def get(self, request):
        """handle get request"""
@ -445,7 +447,7 @@ class PlaylistIdView(ArchivistResultsView):
    """
    view_origin = "home"
-    es_search = "/ta_video/_search"
+    es_search = "ta_video/_search"
    def get(self, request, playlist_id):
        """handle get request"""
@ -521,7 +523,7 @@ class PlaylistView(ArchivistResultsView):
    """
    view_origin = "playlist"
-    es_search = "/ta_playlist/_search"
+    es_search = "ta_playlist/_search"
    def get(self, request):
        """handle get request"""
@ -592,9 +594,9 @@ class VideoView(View):
    def get(self, request, video_id):
        """get single video"""
-        es_url, colors, cast = self.read_config(user_id=request.user.id)
+        colors, cast = self.read_config(user_id=request.user.id)
-        url = f"{es_url}/ta_video/_doc/{video_id}"
+        path = f"ta_video/_doc/{video_id}"
-        look_up = SearchHandler(url, None)
+        look_up = SearchHandler(path, config=False)
        video_hit = look_up.get_data()
        video_data = video_hit[0]["source"]
        try:
@ -624,11 +626,11 @@ class VideoView(View):
        """build playlist nav if available"""
        all_navs = []
        for playlist_id in playlists:
-            handler = YoutubePlaylist(playlist_id)
+            playlist = YoutubePlaylist(playlist_id)
-            handler.get_playlist_dict()
+            playlist.get_from_es()
-            nav = handler.build_nav(video_id)
+            playlist.build_nav(video_id)
-            if nav:
+            if playlist.nav:
-                all_navs.append(nav)
+                all_navs.append(playlist.nav)
        return all_navs
@ -636,10 +638,9 @@ class VideoView(View):
    def read_config(user_id):
        """read config file"""
        config_handler = AppConfig(user_id)
        es_url = config_handler.config["application"]["es_url"]
        cast = config_handler.config["application"]["enable_cast"]
        colors = config_handler.colors
-        return es_url, colors, cast
+        return colors, cast
    @staticmethod
    def star_creator(rating):
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@ -1,12 +1,12 @@
 beautifulsoup4==4.10.0
 celery==5.2.3
 django-cors-headers==3.11.0
 Django==4.0.1
 django-cors-headers==3.11.0
 djangorestframework==3.13.1
 Pillow==9.0.0
-redis==4.1.0
+redis==4.1.1
 requests==2.27.1
 ryd-client==0.0.3
 uWSGI==2.0.20
 whitenoise==5.3.0
-yt_dlp==2021.12.27
+yt_dlp==2022.1.21
--- a/tubearchivist/static/css/style.css
+++ b/tubearchivist/static/css/style.css
@ -286,6 +286,10 @@ button:hover {
    --connected-color: var(--accent-font-light);
 }
 .alert-hover:hover {
    filter: var(--img-filter-error);
 }
 /* top of page */
 .title-bar {
    padding-top: 30px;
--- a/tubearchivist/static/img/icon-exit.svg
+++ b/tubearchivist/static/img/icon-exit.svg
@ -9,15 +9,15 @@
   xmlns="http://www.w3.org/2000/svg"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="500"
+   width="210mm"
-   height="500"
+   height="210mm"
-   viewBox="0 0 132.29197 132.29167"
+   viewBox="0 0 210 210"
   version="1.1"
-   id="svg1303"
+   id="svg1566"
   inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
-   sodipodi:docname="Icons_exit.svg">
+   sodipodi:docname="Icons_exit 05.svg">
  <defs
-     id="defs1297" />
+     id="defs1560" />
  <sodipodi:namedview
     id="base"
     pagecolor="#ffffff"
@ -25,20 +25,19 @@
     borderopacity="1.0"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
-     inkscape:zoom="0.66442107"
+     inkscape:zoom="0.35355339"
-     inkscape:cx="161.45413"
+     inkscape:cx="963.7258"
-     inkscape:cy="207.61753"
+     inkscape:cy="291.01609"
     inkscape:document-units="mm"
     inkscape:current-layer="layer1"
     showgrid="false"
-     units="px"
+     inkscape:window-width="1920"
-     inkscape:window-width="1169"
+     inkscape:window-height="1009"
-     inkscape:window-height="893"
+     inkscape:window-x="-8"
-     inkscape:window-x="729"
+     inkscape:window-y="-8"
-     inkscape:window-y="13"
+     inkscape:window-maximized="1" />
     inkscape:window-maximized="0" />
  <metadata
-     id="metadata1300">
+     id="metadata1563">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
@ -53,15 +52,24 @@
     inkscape:label="Ebene 1"
     inkscape:groupmode="layer"
     id="layer1"
-     transform="translate(0,-164.70764)">
+     transform="translate(0,-87)">
-    <g
+    <path
-       id="g855"
+       style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:2.35654187;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:markers fill stroke"
-       transform="matrix(1.9016362,0,0,1.9016362,-197.93838,-58.9418)">
+       d="M 106.49932,87.901069 C 49.504302,87.900974 3.3006913,134.10459 3.3007713,191.0996 c 0,0.30098 0.003,0.60131 0.005,0.90167 v 0 c -0.003,0.29952 -0.006,0.59901 -0.006,0.89912 -8e-5,56.99502 46.2035307,103.19865 103.1985287,103.19854 23.01714,-0.0773 45.34783,-7.84709 63.44155,-22.07425 0,0 9.01874,-8.71006 2.40579,-16.41737 -6.61297,-7.70731 -19.11222,0.3185 -19.11222,0.3185 -13.60985,9.81394 -29.95596,15.11012 -46.73512,15.14236 -44.275428,0 -80.167758,-35.89234 -80.167758,-80.16778 0,-0.30097 0.003,-0.60148 0.006,-0.90166 h -5.2e-4 c -0.003,-0.29934 -0.006,-0.59901 -0.006,-0.89913 0,-44.27545 35.89234,-80.16777 80.167778,-80.16777 16.77916,0.0322 33.12527,5.32843 46.73512,15.14236 0,0 12.49925,8.02581 19.11222,0.3185 6.61295,-7.70732 -2.4058,-16.41739 -2.4058,-16.41739 C 151.84561,95.74815 129.51494,87.97828 106.4978,87.901069 Z m 54.30959,56.450221 -12.13663,11.69622 20.15864,20.93332 -93.932488,-1.4899 c -9.22763,-0.17349 -16.77655,6.07423 -16.92587,14.00904 l 0.002,0.002 c -0.0149,1.82673 -0.0235,3.40102 0,4.99598 l -0.002,0.002 c 0.14932,7.93483 7.69824,14.18254 16.92587,14.00905 l 93.932488,-1.48991 -20.15864,20.93333 12.13663,11.69622 34.0585,-35.35536 11.82982,-12.29208 h 0.003 l -9.9e-4,-0.002 9.9e-4,-9.9e-4 h -0.003 l -11.82982,-12.29208 z"
-      <path
+       id="path1405"
-         inkscape:connector-curvature="0"
+       inkscape:connector-curvature="0"
-         id="rect1208"
+       sodipodi:nodetypes="cccccccsccsccsccscccccccccccccccccccccc" />
-         style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0;stroke-linecap:round;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:markers fill stroke"
+    <path
-         d="m 124.57603,151.92962 c -0.0433,2.30016 2.0751,4.19245 4.75007,4.24278 l 30.26401,0.43007 -6.00195,5.78023 3.43246,3.56154 10.2778,-9.9006 0.002,0.002 3.5183,-3.3908 -3.42991,-3.564 -9.8737,-10.24989 -3.51834,3.39083 5.84388,6.06803 -30.35875,-0.43185 c -2.67494,-0.0503 -4.86301,1.76094 -4.90629,4.06112 z m -17.65039,-32.01644 v 64.95883 h 7.44347 v -58.27707 h 26.3896 v 18.5229 h 7.44296 v -25.20466 z m 33.83307,39.75416 v 25.20467 h 7.44296 v -25.20467 z" />
+       style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:2.39729571;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:markers fill stroke"
-    </g>
+       d="m 506.57967,92.503023 c -57.98068,-1e-4 -104.98336,47.002567 -104.98326,104.983257 1.9e-4,57.98049 47.00276,104.98284 104.98326,104.98273 23.42489,-0.0758 46.15146,-7.98387 57.83458,-18.08923 11.68313,-10.10537 12.15613,-18.62993 7.38675,-23.04107 v -0.002 c -4.7711,-4.41269 -12.38099,-1.9587 -17.69245,2.25103 -13.83538,9.99805 -30.45915,15.40285 -47.52888,15.4528 -45.04116,0 -81.55421,-36.51305 -81.5542,-81.55419 0,-45.04114 36.51307,-81.5542 81.5542,-81.5542 17.06933,0.0328 33.21884,5.19482 43.16812,12.86758 9.94929,7.67275 17.33418,9.17607 22.1053,4.76338 v -0.002 c 4.77116,-4.41278 5.55882,-12.9887 -0.73482,-18.60197 -18.40654,-14.47308 -41.1234,-22.377337 -64.5386,-22.455877 z m 55.24881,57.426467 -12.34652,11.8985 20.50728,21.29534 -95.55697,-1.51567 c -9.38721,-0.17649 -17.06669,6.17929 -17.21858,14.25133 l 0.003,0.002 c -0.15192,8.07203 7.28245,14.71295 16.66978,14.88953 l 95.22519,1.50947 -21.06332,20.28455 12.04579,12.49846 36.06808,-34.74464 0.005,0.005 12.34654,-11.89954 -12.03701,-12.50724 z m 35.17874,98.71801 0.69918,0.67386 c 0.13539,-0.22412 0.26991,-0.44874 0.4036,-0.67386 z"
       id="path1405-6"
       inkscape:connector-curvature="0"
       sodipodi:nodetypes="ccccccccsczccccccccccccccccccccccc" />
    <path
       style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:2.39729571;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:markers fill stroke"
       d="m 740.89945,94.730897 c -57.98068,-9.6e-5 -104.98334,47.002563 -104.98325,104.983253 1.9e-4,57.98049 47.00276,104.98284 104.98325,104.98274 23.42488,-0.0758 46.15145,-7.98387 64.5635,-22.46581 l -17.03461,-16.41553 c -13.83537,9.99805 -30.45916,15.40285 -47.52889,15.4528 -45.04113,0 -81.55419,-36.51306 -81.55419,-81.5542 0,-45.04114 36.51306,-81.55419 81.55419,-81.55419 17.06934,0.0328 33.69814,5.42058 47.54336,15.40423 l 16.99534,-16.3773 c -18.40663,-14.4732 -41.12349,-22.377447 -64.5387,-22.455993 z m 55.24882,57.426473 -12.34653,11.8985 20.50728,21.29534 -95.55696,-1.51567 c -9.38721,-0.17649 -17.06668,6.17928 -17.21858,14.25132 l 0.002,0.002 c -0.1519,8.07203 7.28245,14.71295 16.66978,14.88953 l 95.22519,1.50947 -21.06332,20.28455 12.04578,12.49846 36.06808,-34.74465 0.005,0.005 12.34653,-11.89953 -12.03699,-12.50725 z m 35.17873,98.718 0.69919,0.67386 c 0.13538,-0.22412 0.26991,-0.44874 0.40359,-0.67386 z"
       id="path1405-9"
       inkscape:connector-curvature="0"
       sodipodi:nodetypes="ccccccsccccccccccccccccccccccc" />
  </g>
 </svg>