tubearchivist/tubearchivist/home/src/index/reindex.py

"""
functionality:
- periodically refresh documents
- index and update in es
"""

import os
import shutil
from datetime import datetime
from math import ceil
from time import sleep

from home.src.download.queue import PendingList
from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_dlp_base import CookieHandler
from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.connect import ElasticWrap
from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video import YoutubeVideo
from home.src.ta.config import AppConfig


class Reindex:
    """check for outdated documents and refresh data from youtube"""

    MATCH_FIELD = {
        "ta_video": "active",
        "ta_channel": "channel_active",
        "ta_playlist": "playlist_active",
    }
    MULTIPLY = 1.2

    def __init__(self):
        # config
        self.now = int(datetime.now().strftime("%s"))
        self.config = AppConfig().config
        self.interval = self.config["scheduler"]["check_reindex_days"]
        # scan
        self.all_youtube_ids = False
        self.all_channel_ids = False
        self.all_playlist_ids = False

    def check_cookie(self):
        """validate cookie if enabled"""
        if self.config["downloads"]["cookie_import"]:
            valid = CookieHandler(self.config).validate()
            if not valid:
                return

    def _get_daily(self):
        """get daily refresh values"""
        total_videos = self._get_total_hits("ta_video")
        video_daily = ceil(total_videos / self.interval * self.MULTIPLY)
        if video_daily >= 10000:
            video_daily = 9999

        total_channels = self._get_total_hits("ta_channel")
        channel_daily = ceil(total_channels / self.interval * self.MULTIPLY)
        total_playlists = self._get_total_hits("ta_playlist")
        playlist_daily = ceil(total_playlists / self.interval * self.MULTIPLY)
        return (video_daily, channel_daily, playlist_daily)

    def _get_total_hits(self, index):
        """get total hits from index"""
        match_field = self.MATCH_FIELD[index]
        path = f"{index}/_search?filter_path=hits.total"
        data = {"query": {"match": {match_field: True}}}
        response, _ = ElasticWrap(path).post(data=data)
        total_hits = response["hits"]["total"]["value"]
        return total_hits

    def _get_unrated_vids(self):
        """get max 200 videos without rating if ryd integration is enabled"""
        data = {
            "size": 200,
            "query": {
                "bool": {
                    "must_not": [{"exists": {"field": "stats.average_rating"}}]
                }
            },
        }
        response, _ = ElasticWrap("ta_video/_search").get(data=data)

        missing_rating = [i["_id"] for i in response["hits"]["hits"]]
        self.all_youtube_ids = self.all_youtube_ids + missing_rating

    def _get_outdated_vids(self, size):
        """get daily videos to refresh"""
        now_lte = self.now - self.interval * 24 * 60 * 60
        must_list = [
            {"match": {"active": True}},
            {"range": {"vid_last_refresh": {"lte": now_lte}}},
        ]
        data = {
            "size": size,
            "query": {"bool": {"must": must_list}},
            "sort": [{"vid_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        response, _ = ElasticWrap("ta_video/_search").get(data=data)

        all_youtube_ids = [i["_id"] for i in response["hits"]["hits"]]
        return all_youtube_ids

    def _get_outdated_channels(self, size):
        """get daily channels to refresh"""
        now_lte = self.now - self.interval * 24 * 60 * 60
        must_list = [
            {"match": {"channel_active": True}},
            {"range": {"channel_last_refresh": {"lte": now_lte}}},
        ]
        data = {
            "size": size,
            "query": {"bool": {"must": must_list}},
            "sort": [{"channel_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        response, _ = ElasticWrap("ta_channel/_search").get(data=data)

        all_channel_ids = [i["_id"] for i in response["hits"]["hits"]]
        return all_channel_ids

    def _get_outdated_playlists(self, size):
        """get daily outdated playlists to refresh"""
        now_lte = self.now - self.interval * 24 * 60 * 60
        must_list = [
            {"match": {"playlist_active": True}},
            {"range": {"playlist_last_refresh": {"lte": now_lte}}},
        ]
        data = {
            "size": size,
            "query": {"bool": {"must": must_list}},
            "sort": [{"playlist_last_refresh": {"order": "asc"}}],
            "_source": False,
        }
        response, _ = ElasticWrap("ta_playlist/_search").get(data=data)

        all_playlist_ids = [i["_id"] for i in response["hits"]["hits"]]
        return all_playlist_ids

    def check_outdated(self):
        """add missing vids and channels"""
        video_daily, channel_daily, playlist_daily = self._get_daily()
        self.all_youtube_ids = self._get_outdated_vids(video_daily)
        self.all_channel_ids = self._get_outdated_channels(channel_daily)
        self.all_playlist_ids = self._get_outdated_playlists(playlist_daily)

        integrate_ryd = self.config["downloads"]["integrate_ryd"]
        if integrate_ryd:
            self._get_unrated_vids()

    @staticmethod
    def _reindex_single_video(youtube_id):
        """refresh data for single video"""
        video = YoutubeVideo(youtube_id)

        # read current state
        video.get_from_es()
        player = video.json_data["player"]
        date_downloaded = video.json_data["date_downloaded"]
        channel_dict = video.json_data["channel"]
        playlist = video.json_data.get("playlist")
        subtitles = video.json_data.get("subtitles")

        # get new
        video.build_json()
        if not video.youtube_meta:
            video.deactivate()
            return

        video.delete_subtitles(subtitles=subtitles)
        video.check_subtitles()

        # add back
        video.json_data["player"] = player
        video.json_data["date_downloaded"] = date_downloaded
        video.json_data["channel"] = channel_dict
        if playlist:
            video.json_data["playlist"] = playlist

        video.upload_to_es()

        thumb_handler = ThumbManager()
        thumb_handler.delete_vid_thumb(youtube_id)
        to_download = (youtube_id, video.json_data["vid_thumb_url"])
        thumb_handler.download_vid([to_download], notify=False)
        return

    @staticmethod
    def _reindex_single_channel(channel_id):
        """refresh channel data and sync to videos"""
        channel = YoutubeChannel(channel_id)
        channel.get_from_es()
        subscribed = channel.json_data["channel_subscribed"]
        overwrites = channel.json_data.get("channel_overwrites", False)
        channel.get_from_youtube()
        channel.json_data["channel_subscribed"] = subscribed
        if overwrites:
            channel.json_data["channel_overwrites"] = overwrites
        channel.upload_to_es()
        channel.sync_to_videos()

    @staticmethod
    def _reindex_single_playlist(playlist_id, all_indexed_ids):
        """refresh playlist data"""
        playlist = YoutubePlaylist(playlist_id)
        playlist.get_from_es()
        subscribed = playlist.json_data["playlist_subscribed"]
        playlist.all_youtube_ids = all_indexed_ids
        playlist.build_json(scrape=True)
        if not playlist.json_data:
            playlist.deactivate()
            return

        playlist.json_data["playlist_subscribed"] = subscribed
        playlist.upload_to_es()
        return

    def reindex(self):
        """reindex what's needed"""
        sleep_interval = self.config["downloads"]["sleep_interval"]
        # videos
        print(f"reindexing {len(self.all_youtube_ids)} videos")
        for youtube_id in self.all_youtube_ids:
            try:
                self._reindex_single_video(youtube_id)
            except FileNotFoundError:
                # handle channel name change here
                ChannelUrlFixer(youtube_id, self.config).run()
                self._reindex_single_video(youtube_id)
            if sleep_interval:
                sleep(sleep_interval)
        # channels
        print(f"reindexing {len(self.all_channel_ids)} channels")
        for channel_id in self.all_channel_ids:
            self._reindex_single_channel(channel_id)
            if sleep_interval:
                sleep(sleep_interval)
        # playlist
        print(f"reindexing {len(self.all_playlist_ids)} playlists")
        if self.all_playlist_ids:
            handler = PendingList()
            handler.get_download()
            handler.get_indexed()
            all_indexed_ids = [i["youtube_id"] for i in handler.all_videos]
            for playlist_id in self.all_playlist_ids:
                self._reindex_single_playlist(playlist_id, all_indexed_ids)
                if sleep_interval:
                    sleep(sleep_interval)


class ChannelUrlFixer:
    """fix not matching channel names in reindex"""

    def __init__(self, youtube_id, config):
        self.youtube_id = youtube_id
        self.config = config
        self.video = False

    def run(self):
        """check and run if needed"""
        print(f"{self.youtube_id}: failed to build channel path, try to fix.")
        video_path_is, video_folder_is = self.get_as_is()
        if not os.path.exists(video_path_is):
            print(f"giving up reindex, video in video: {self.video.json_data}")
            raise ValueError

        _, video_folder_should = self.get_as_should()

        if video_folder_is != video_folder_should:
            self.process(video_path_is)
        else:
            print(f"{self.youtube_id}: skip channel url fixer")

    def get_as_is(self):
        """get video object as is"""
        self.video = YoutubeVideo(self.youtube_id)
        self.video.get_from_es()
        video_path_is = os.path.join(
            self.config["application"]["videos"],
            self.video.json_data["media_url"],
        )
        video_folder_is = os.path.split(video_path_is)[0]

        return video_path_is, video_folder_is

    def get_as_should(self):
        """add fresh metadata from remote"""
        self.video.get_from_youtube()
        self.video.add_file_path()

        video_path_should = os.path.join(
            self.config["application"]["videos"],
            self.video.json_data["media_url"],
        )
        video_folder_should = os.path.split(video_path_should)[0]
        return video_path_should, video_folder_should

    def process(self, video_path_is):
        """fix filepath"""
        print(f"{self.youtube_id}: fixing channel rename.")
        cache_dir = self.config["application"]["cache_dir"]
        new_file_path = os.path.join(
            cache_dir, "download", self.youtube_id + ".mp4"
        )
        shutil.move(video_path_is, new_file_path)
        VideoDownloader().move_to_archive(self.video.json_data)
        self.video.update_media_url()
update doc strings to represent new module structure 2022-01-23 12:32:08 +00:00			`"""`
			`functionality:`
			`- periodically refresh documents`
			`- index and update in es`
			`"""`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
implement channle name change in reindex, #211 2022-05-05 15:59:56 +00:00			`import os`
			`import shutil`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`from datetime import datetime`
			`from math import ceil`
			`from time import sleep`

			`from home.src.download.queue import PendingList`
			`from home.src.download.thumbnails import ThumbManager`
rewrite cookie into redis from io stream, auto validate 2022-06-15 10:54:05 +00:00			`from home.src.download.yt_dlp_base import CookieHandler`
implement channle name change in reindex, #211 2022-05-05 15:59:56 +00:00			`from home.src.download.yt_dlp_handler import VideoDownloader`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`from home.src.es.connect import ElasticWrap`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`from home.src.index.channel import YoutubeChannel`
			`from home.src.index.playlist import YoutubePlaylist`
			`from home.src.index.video import YoutubeVideo`
			`from home.src.ta.config import AppConfig`


			`class Reindex:`
			`"""check for outdated documents and refresh data from youtube"""`

refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`MATCH_FIELD = {`
			`"ta_video": "active",`
			`"ta_channel": "channel_active",`
			`"ta_playlist": "playlist_active",`
			`}`
			`MULTIPLY = 1.2`

major refactor, split up modules 2022-01-22 15:13:37 +00:00			`def __init__(self):`
			`# config`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`self.now = int(datetime.now().strftime("%s"))`
			`self.config = AppConfig().config`
			`self.interval = self.config["scheduler"]["check_reindex_days"]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`# scan`
			`self.all_youtube_ids = False`
			`self.all_channel_ids = False`
			`self.all_playlist_ids = False`

rewrite cookie into redis from io stream, auto validate 2022-06-15 10:54:05 +00:00			`def check_cookie(self):`
			`"""validate cookie if enabled"""`
			`if self.config["downloads"]["cookie_import"]:`
			`valid = CookieHandler(self.config).validate()`
			`if not valid:`
			`return`

refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _get_daily(self):`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"""get daily refresh values"""`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`total_videos = self._get_total_hits("ta_video")`
			`video_daily = ceil(total_videos / self.interval * self.MULTIPLY)`
limit video_daily to below 10k in daily refresh 2022-04-14 09:18:08 +00:00			`if video_daily >= 10000:`
			`video_daily = 9999`

refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`total_channels = self._get_total_hits("ta_channel")`
			`channel_daily = ceil(total_channels / self.interval * self.MULTIPLY)`
			`total_playlists = self._get_total_hits("ta_playlist")`
			`playlist_daily = ceil(total_playlists / self.interval * self.MULTIPLY)`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`return (video_daily, channel_daily, playlist_daily)`

refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _get_total_hits(self, index):`
			`"""get total hits from index"""`
			`match_field = self.MATCH_FIELD[index]`
			`path = f"{index}/_search?filter_path=hits.total"`
			`data = {"query": {"match": {match_field: True}}}`
			`response, _ = ElasticWrap(path).post(data=data)`
			`total_hits = response["hits"]["total"]["value"]`
			`return total_hits`

			`def _get_unrated_vids(self):`
			`"""get max 200 videos without rating if ryd integration is enabled"""`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`data = {`
			`"size": 200,`
			`"query": {`
			`"bool": {`
			`"must_not": [{"exists": {"field": "stats.average_rating"}}]`
			`}`
			`},`
			`}`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`response, _ = ElasticWrap("ta_video/_search").get(data=data)`

			`missing_rating = [i["_id"] for i in response["hits"]["hits"]]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`self.all_youtube_ids = self.all_youtube_ids + missing_rating`

refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _get_outdated_vids(self, size):`
			`"""get daily videos to refresh"""`
			`now_lte = self.now - self.interval * 24 * 60 * 60`
			`must_list = [`
			`{"match": {"active": True}},`
			`{"range": {"vid_last_refresh": {"lte": now_lte}}},`
			`]`
			`data = {`
			`"size": size,`
			`"query": {"bool": {"must": must_list}},`
			`"sort": [{"vid_last_refresh": {"order": "asc"}}],`
			`"_source": False,`
			`}`
			`response, _ = ElasticWrap("ta_video/_search").get(data=data)`

			`all_youtube_ids = [i["_id"] for i in response["hits"]["hits"]]`
			`return all_youtube_ids`

			`def _get_outdated_channels(self, size):`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"""get daily channels to refresh"""`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`now_lte = self.now - self.interval * 24 * 60 * 60`
			`must_list = [`
			`{"match": {"channel_active": True}},`
			`{"range": {"channel_last_refresh": {"lte": now_lte}}},`
			`]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`data = {`
			`"size": size,`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`"query": {"bool": {"must": must_list}},`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"sort": [{"channel_last_refresh": {"order": "asc"}}],`
			`"_source": False,`
			`}`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`response, _ = ElasticWrap("ta_channel/_search").get(data=data)`

			`all_channel_ids = [i["_id"] for i in response["hits"]["hits"]]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`return all_channel_ids`

refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _get_outdated_playlists(self, size):`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"""get daily outdated playlists to refresh"""`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`now_lte = self.now - self.interval * 24 * 60 * 60`
			`must_list = [`
			`{"match": {"playlist_active": True}},`
			`{"range": {"playlist_last_refresh": {"lte": now_lte}}},`
			`]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`data = {`
			`"size": size,`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`"query": {"bool": {"must": must_list}},`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"sort": [{"playlist_last_refresh": {"order": "asc"}}],`
			`"_source": False,`
			`}`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`response, _ = ElasticWrap("ta_playlist/_search").get(data=data)`

			`all_playlist_ids = [i["_id"] for i in response["hits"]["hits"]]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`return all_playlist_ids`

			`def check_outdated(self):`
			`"""add missing vids and channels"""`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`video_daily, channel_daily, playlist_daily = self._get_daily()`
			`self.all_youtube_ids = self._get_outdated_vids(video_daily)`
			`self.all_channel_ids = self._get_outdated_channels(channel_daily)`
			`self.all_playlist_ids = self._get_outdated_playlists(playlist_daily)`

			`integrate_ryd = self.config["downloads"]["integrate_ryd"]`
			`if integrate_ryd:`
			`self._get_unrated_vids()`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`@staticmethod`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _reindex_single_video(youtube_id):`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"""refresh data for single video"""`
			`video = YoutubeVideo(youtube_id)`

			`# read current state`
			`video.get_from_es()`
			`player = video.json_data["player"]`
			`date_downloaded = video.json_data["date_downloaded"]`
			`channel_dict = video.json_data["channel"]`
			`playlist = video.json_data.get("playlist")`
fix subtitle delete overwrite, change filename to .lang.vtt, #195 2022-05-29 04:43:25 +00:00			`subtitles = video.json_data.get("subtitles")`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`# get new`
			`video.build_json()`
fix reindex and deactivate error 2022-02-13 03:05:08 +00:00			`if not video.youtube_meta:`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`video.deactivate()`
delete existing before of reindexing subtitles 2022-02-12 11:22:01 +00:00			`return`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
fix subtitle delete overwrite, change filename to .lang.vtt, #195 2022-05-29 04:43:25 +00:00			`video.delete_subtitles(subtitles=subtitles)`
fix concurrency issue with subtitle indexing 2022-03-23 09:20:33 +00:00			`video.check_subtitles()`

major refactor, split up modules 2022-01-22 15:13:37 +00:00			`# add back`
			`video.json_data["player"] = player`
			`video.json_data["date_downloaded"] = date_downloaded`
			`video.json_data["channel"] = channel_dict`
			`if playlist:`
			`video.json_data["playlist"] = playlist`

			`video.upload_to_es()`

			`thumb_handler = ThumbManager()`
			`thumb_handler.delete_vid_thumb(youtube_id)`
			`to_download = (youtube_id, video.json_data["vid_thumb_url"])`
			`thumb_handler.download_vid([to_download], notify=False)`
delete existing before of reindexing subtitles 2022-02-12 11:22:01 +00:00			`return`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`@staticmethod`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _reindex_single_channel(channel_id):`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"""refresh channel data and sync to videos"""`
			`channel = YoutubeChannel(channel_id)`
			`channel.get_from_es()`
			`subscribed = channel.json_data["channel_subscribed"]`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`overwrites = channel.json_data.get("channel_overwrites", False)`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`channel.get_from_youtube()`
			`channel.json_data["channel_subscribed"] = subscribed`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`if overwrites:`
			`channel.json_data["channel_overwrites"] = overwrites`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`channel.upload_to_es()`
			`channel.sync_to_videos()`

			`@staticmethod`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`def _reindex_single_playlist(playlist_id, all_indexed_ids):`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`"""refresh playlist data"""`
			`playlist = YoutubePlaylist(playlist_id)`
			`playlist.get_from_es()`
			`subscribed = playlist.json_data["playlist_subscribed"]`
			`playlist.all_youtube_ids = all_indexed_ids`
			`playlist.build_json(scrape=True)`
			`if not playlist.json_data:`
			`playlist.deactivate()`
			`return`

			`playlist.json_data["playlist_subscribed"] = subscribed`
			`playlist.upload_to_es()`
			`return`

			`def reindex(self):`
			`"""reindex what's needed"""`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`sleep_interval = self.config["downloads"]["sleep_interval"]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`# videos`
			`print(f"reindexing {len(self.all_youtube_ids)} videos")`
			`for youtube_id in self.all_youtube_ids:`
implement channle name change in reindex, #211 2022-05-05 15:59:56 +00:00			`try:`
			`self._reindex_single_video(youtube_id)`
			`except FileNotFoundError:`
			`# handle channel name change here`
			`ChannelUrlFixer(youtube_id, self.config).run()`
			`self._reindex_single_video(youtube_id)`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`if sleep_interval:`
			`sleep(sleep_interval)`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`# channels`
			`print(f"reindexing {len(self.all_channel_ids)} channels")`
			`for channel_id in self.all_channel_ids:`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`self._reindex_single_channel(channel_id)`
			`if sleep_interval:`
			`sleep(sleep_interval)`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`# playlist`
			`print(f"reindexing {len(self.all_playlist_ids)} playlists")`
			`if self.all_playlist_ids:`
use the refactored PendingList class 2022-03-18 11:27:25 +00:00			`handler = PendingList()`
fix playlist reindex after PendingList refactor 2022-03-26 11:31:00 +00:00			`handler.get_download()`
use the refactored PendingList class 2022-03-18 11:27:25 +00:00			`handler.get_indexed()`
			`all_indexed_ids = [i["youtube_id"] for i in handler.all_videos]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`for playlist_id in self.all_playlist_ids:`
refactor and consolidate Reindex class 2022-03-23 08:48:38 +00:00			`self._reindex_single_playlist(playlist_id, all_indexed_ids)`
			`if sleep_interval:`
			`sleep(sleep_interval)`
implement channle name change in reindex, #211 2022-05-05 15:59:56 +00:00

			`class ChannelUrlFixer:`
			`"""fix not matching channel names in reindex"""`

			`def __init__(self, youtube_id, config):`
			`self.youtube_id = youtube_id`
			`self.config = config`
			`self.video = False`

			`def run(self):`
			`"""check and run if needed"""`
			`print(f"{self.youtube_id}: failed to build channel path, try to fix.")`
			`video_path_is, video_folder_is = self.get_as_is()`
			`if not os.path.exists(video_path_is):`
			`print(f"giving up reindex, video in video: {self.video.json_data}")`
			`raise ValueError`

			`_, video_folder_should = self.get_as_should()`

			`if video_folder_is != video_folder_should:`
			`self.process(video_path_is)`
			`else:`
			`print(f"{self.youtube_id}: skip channel url fixer")`

			`def get_as_is(self):`
			`"""get video object as is"""`
			`self.video = YoutubeVideo(self.youtube_id)`
			`self.video.get_from_es()`
			`video_path_is = os.path.join(`
			`self.config["application"]["videos"],`
			`self.video.json_data["media_url"],`
			`)`
			`video_folder_is = os.path.split(video_path_is)[0]`

			`return video_path_is, video_folder_is`

			`def get_as_should(self):`
			`"""add fresh metadata from remote"""`
			`self.video.get_from_youtube()`
			`self.video.add_file_path()`

			`video_path_should = os.path.join(`
			`self.config["application"]["videos"],`
			`self.video.json_data["media_url"],`
			`)`
			`video_folder_should = os.path.split(video_path_should)[0]`
			`return video_path_should, video_folder_should`

			`def process(self, video_path_is):`
			`"""fix filepath"""`
			`print(f"{self.youtube_id}: fixing channel rename.")`
			`cache_dir = self.config["application"]["cache_dir"]`
			`new_file_path = os.path.join(`
			`cache_dir, "download", self.youtube_id + ".mp4"`
			`)`
			`shutil.move(video_path_is, new_file_path)`
			`VideoDownloader().move_to_archive(self.video.json_data)`
			`self.video.update_media_url()`