"""
Functionality:
- index new videos into elastisearch
- extract video info with yt_dlp
- scrape youtube channel page if needed
"""

import json
import os
import re
from datetime import datetime
from time import sleep

import requests
import yt_dlp as youtube_dl
from bs4 import BeautifulSoup
from home.src.config import AppConfig
from home.src.helper import DurationConverter, clean_string


class YoutubeChannel:
    """represents a single youtube channel"""

    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    CACHE_DIR = CONFIG["application"]["cache_dir"]

    def __init__(self, channel_id):
        self.channel_id = channel_id
        self.json_data = None
        self.source = None
        self.channel_dict = self.build_channel_dict()

    def build_channel_dict(self, scrape=False):
        """combine the dicts build from extracted json payload"""
        if scrape:
            channel_dict = False
        else:
            channel_dict = self.get_es_channel()
        if not channel_dict:
            print("scrape data from youtube")
            self.scrape_channel()
            channel_dict = self.parse_channel_main()
            channel_dict.update(self.parse_channel_meta())
            self.source = "scraped"
        return channel_dict

    def get_es_channel(self):
        """get from elastic search first if possible"""
        channel_id = self.channel_id
        url = f"{self.ES_URL}/ta_channel/_doc/{channel_id}"
        response = requests.get(url)
        if response.ok:
            channel_source = response.json()["_source"]
            self.source = "elastic"
            return channel_source
        return False

    def scrape_channel(self):
        """scrape channel page for additional infos"""
        channel_id = self.channel_id
        url = f"https://www.youtube.com/channel/{channel_id}/about?hl=en"
        cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
        response = requests.get(url, cookies=cookies)
        if response.ok:
            channel_page = response.text
        else:
            print(f"failed to extract channel info for: {channel_id}")
            raise ConnectionError
        soup = BeautifulSoup(channel_page, "html.parser")
        # load script into json
        all_scripts = soup.find("body").find_all("script")
        for script in all_scripts:
            if "var ytInitialData = " in str(script):
                script_content = str(script)
                break
        # extract payload
        script_content = script_content.split("var ytInitialData = ")[1]
        json_raw = script_content.rstrip(";</script>")
        json_data = json.loads(json_raw)
        # add to self
        self.json_data = json_data

    def parse_channel_main(self):
        """extract maintab values from scraped channel json data"""
        main_tab = self.json_data["header"]["c4TabbedHeaderRenderer"]
        channel_name = main_tab["title"]
        last_refresh = int(datetime.now().strftime("%s"))
        # channel_subs
        try:
            sub_text_simple = main_tab["subscriberCountText"]["simpleText"]
            sub_text = sub_text_simple.split(" ")[0]
            if sub_text[-1] == "K":
                channel_subs = int(float(sub_text.replace("K", "")) * 1000)
            elif sub_text[-1] == "M":
                channel_subs = int(float(sub_text.replace("M", "")) * 1000000)
            elif int(sub_text) >= 0:
                channel_subs = int(sub_text)
            else:
                message = f"{sub_text} not dealt with"
                print(message)
        except KeyError:
            channel_subs = 0
        # banner
        try:
            all_banners = main_tab["banner"]["thumbnails"]
            banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]
        except KeyError:
            banner = False
        # build and return dict
        main_channel_dict = {
            "channel_active": True,
            "channel_last_refresh": last_refresh,
            "channel_subs": channel_subs,
            "channel_banner_url": banner,
            "channel_name": channel_name,
            "channel_id": self.channel_id,
        }
        return main_channel_dict

    def parse_channel_meta(self):
        """extract meta tab values from channel payload"""
        # meta tab
        json_data = self.json_data
        meta_tab = json_data["metadata"]["channelMetadataRenderer"]
        description = meta_tab["description"]
        all_thumbs = meta_tab["avatar"]["thumbnails"]
        thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]
        # stats tab
        renderer = "twoColumnBrowseResultsRenderer"
        all_tabs = json_data["contents"][renderer]["tabs"]
        for tab in all_tabs:
            if "tabRenderer" in tab.keys():
                if tab["tabRenderer"]["title"] == "About":
                    about_tab = tab["tabRenderer"]["content"][
                        "sectionListRenderer"
                    ]["contents"][0]["itemSectionRenderer"]["contents"][0][
                        "channelAboutFullMetadataRenderer"
                    ]
                    break
        try:
            channel_views_text = about_tab["viewCountText"]["simpleText"]
            channel_views = int(re.sub(r"\D", "", channel_views_text))
        except KeyError:
            channel_views = 0

        meta_channel_dict = {
            "channel_description": description,
            "channel_thumb_url": thumb_url,
            "channel_views": channel_views,
        }

        return meta_channel_dict

    def upload_to_es(self):
        """upload channel data to elastic search"""
        url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}"
        response = requests.put(url, json=self.channel_dict)
        print(f"added {self.channel_id} to es")
        if not response.ok:
            print(response.text)

    def clear_cache(self):
        """delete banner and thumb from cache if available"""
        channel_cache = os.path.join(self.CACHE_DIR, "channels")
        thumb = os.path.join(channel_cache, self.channel_id + "_thumb.jpg")
        banner = os.path.join(channel_cache, self.channel_id + "_banner.jpg")
        if os.path.exists(thumb):
            os.remove(thumb)
        if os.path.exists(banner):
            os.remove(banner)

    def sync_to_videos(self):
        """sync new channel_dict to all videos of channel"""
        headers = {"Content-type": "application/json"}
        channel_id = self.channel_id
        # add ingest pipeline
        processors = []
        for field, value in self.channel_dict.items():
            line = {"set": {"field": "channel." + field, "value": value}}
            processors.append(line)
        data = {"description": channel_id, "processors": processors}
        payload = json.dumps(data)
        url = self.ES_URL + "/_ingest/pipeline/" + channel_id
        request = requests.put(url, data=payload, headers=headers)
        if not request.ok:
            print(request.text)
        # apply pipeline
        data = {"query": {"match": {"channel.channel_id": channel_id}}}
        payload = json.dumps(data)
        url = self.ES_URL + "/ta_video/_update_by_query?pipeline=" + channel_id
        request = requests.post(url, data=payload, headers=headers)
        if not request.ok:
            print(request.text)

    def get_total_hits(self):
        """get total channels indexed"""
        headers = {"Content-type": "application/json"}
        data = {"query": {"match_all": {}}}
        payload = json.dumps(data)
        url = f"{self.ES_URL}/ta_channel/_search?filter_path=hits.total"
        request = requests.post(url, data=payload, headers=headers)
        if not request.ok:
            print(request.text)
        total_hits = json.loads(request.text)["hits"]["total"]["value"]
        return total_hits


class YoutubeVideo:
    """represents a single youtube video"""

    CONFIG = AppConfig().config
    ES_URL = CONFIG["application"]["es_url"]
    CACHE_DIR = CONFIG["application"]["cache_dir"]
    VIDEOS = CONFIG["application"]["videos"]

    def __init__(self, youtube_id):
        self.youtube_id = youtube_id
        self.channel_id = None
        self.vid_dict = self.get_wrapper()

    def get_wrapper(self):
        """wrapper to loop around youtube_dl to retry on failure"""
        print(f"get video data for {self.youtube_id}")
        for i in range(3):
            try:
                vid_dict = self.get_youtubedl_vid_data()
            except KeyError as e:
                print(e)
                sleep((i + 1) ** 2)
                continue
            else:
                break

        return vid_dict

    def get_youtubedl_vid_data(self):
        """parse youtubedl extract info"""
        youtube_id = self.youtube_id
        obs = {
            "quiet": True,
            "default_search": "ytsearch",
            "skip_download": True,
        }
        try:
            vid = youtube_dl.YoutubeDL(obs).extract_info(youtube_id)
        except (
            youtube_dl.utils.ExtractorError,
            youtube_dl.utils.DownloadError,
        ):
            print("failed to get info for " + youtube_id)
            return False
        # extract
        self.channel_id = vid["channel_id"]
        upload_date = vid["upload_date"]
        upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
        published = upload_date_time.strftime("%Y-%m-%d")
        last_refresh = int(datetime.now().strftime("%s"))
        # likes
        try:
            like_count = vid["like_count"]
        except KeyError:
            like_count = 0
        try:
            dislike_count = vid["dislike_count"]
        except KeyError:
            dislike_count = 0
        # build dicts
        stats = {
            "view_count": vid["view_count"],
            "like_count": like_count,
            "dislike_count": dislike_count,
            "average_rating": vid["average_rating"],
        }
        vid_basic = {
            "title": vid["title"],
            "description": vid["description"],
            "category": vid["categories"],
            "vid_thumb_url": vid["thumbnail"],
            "tags": vid["tags"],
            "published": published,
            "stats": stats,
            "vid_last_refresh": last_refresh,
            "date_downloaded": last_refresh,
            "youtube_id": youtube_id,
            "active": True,
            "channel": False,
        }

        return vid_basic

    def add_player(self, missing_vid):
        """add player information for new videos"""
        cache_path = self.CACHE_DIR + "/download/"
        videos = self.VIDEOS

        if missing_vid:
            # coming from scan_filesystem
            channel_name, file_name, _ = missing_vid
            vid_path = os.path.join(videos, channel_name, file_name)
        else:
            # coming from VideoDownload
            all_cached = os.listdir(cache_path)
            for file_cached in all_cached:
                if self.youtube_id in file_cached:
                    vid_path = os.path.join(cache_path, file_cached)
                    break

        duration_handler = DurationConverter()
        duration = duration_handler.get_sec(vid_path)
        duration_str = duration_handler.get_str(duration)
        player = {
            "watched": False,
            "duration": duration,
            "duration_str": duration_str,
        }
        self.vid_dict["player"] = player

    def build_file_path(self, channel_name):
        """build media_url from where file will be located"""
        clean_channel_name = clean_string(channel_name)
        timestamp = self.vid_dict["published"].replace("-", "")
        youtube_id = self.vid_dict["youtube_id"]
        title = self.vid_dict["title"]
        clean_title = clean_string(title)
        filename = f"{timestamp}_{youtube_id}_{clean_title}.mp4"
        media_url = os.path.join(clean_channel_name, filename)
        self.vid_dict["media_url"] = media_url

    def get_es_data(self):
        """get current data from elastic search"""
        url = self.ES_URL + "/ta_video/_doc/" + self.youtube_id
        response = requests.get(url)
        if not response.ok:
            print(response.text)
        es_vid_dict = json.loads(response.text)
        return es_vid_dict

    def upload_to_es(self):
        """upload channel data to elastic search"""
        url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}"
        response = requests.put(url, json=self.vid_dict)
        if not response.ok:
            print(response.text)

    def delete_cache(self):
        """delete thumbnail from cache if exist"""
        video_cache = os.path.join(self.CACHE_DIR, "videos")
        thumb = os.path.join(video_cache, self.youtube_id + ".jpg")
        if os.path.exists(thumb):
            os.remove(thumb)

    def deactivate(self):
        """deactivate document on extractor error"""
        youtube_id = self.youtube_id
        headers = {"Content-type": "application/json"}
        url = f"{self.ES_URL}/ta_video/_update/{youtube_id}"
        data = {"script": "ctx._source.active = false"}
        json_str = json.dumps(data)
        response = requests.post(url, data=json_str, headers=headers)
        print(f"deactivated {youtube_id}")
        if not response.ok:
            print(response.text)


def index_new_video(youtube_id, missing_vid=False):
    """combine video and channel classes for new video index"""
    vid_handler = YoutubeVideo(youtube_id)
    if not vid_handler.vid_dict:
        raise ValueError("failed to get metadata for " + youtube_id)

    channel_handler = YoutubeChannel(vid_handler.channel_id)
    # add filepath to vid_dict
    channel_name = channel_handler.channel_dict["channel_name"]
    vid_handler.build_file_path(channel_name)
    # add channel and player to video
    vid_handler.add_player(missing_vid)
    vid_handler.vid_dict["channel"] = channel_handler.channel_dict
    # add new channel to es
    if channel_handler.source == "scraped":
        channel_handler.channel_dict["channel_subscribed"] = False
        channel_handler.upload_to_es()
    # upload video to es
    vid_handler.upload_to_es()
    # return vid_dict for further processing
    return vid_handler.vid_dict