tubearchivist/tubearchivist/home/src/index/channel.py

"""
functionality:
- get metadata from youtube for a channel
- index and update in es
"""

import json
import os
import re
from datetime import datetime

import requests
from bs4 import BeautifulSoup
from home.src.download import queue  # partial import
from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.generic import YouTubeItem
from home.src.index.playlist import YoutubePlaylist
from home.src.ta.helper import clean_string, requests_headers
from home.src.ta.ta_redis import RedisArchivist


class ChannelScraper:
    """custom scraper using bs4 to scrape channel about page
    will be able to be integrated into yt-dlp
    once #2237 and #2350 are merged upstream
    """

    def __init__(self, channel_id):
        self.channel_id = channel_id
        self.soup = False
        self.yt_json = False
        self.json_data = False

    def get_json(self):
        """main method to return channel dict"""
        self.get_soup()
        self._extract_yt_json()
        self._parse_channel_main()
        self._parse_channel_meta()
        return self.json_data

    def get_soup(self):
        """return soup from youtube"""
        print(f"{self.channel_id}: scrape channel data from youtube")
        url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
        cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
        response = requests.get(
            url, cookies=cookies, headers=requests_headers()
        )
        if response.ok:
            channel_page = response.text
        else:
            print(f"{self.channel_id}: failed to extract channel info")
            raise ConnectionError
        self.soup = BeautifulSoup(channel_page, "html.parser")

    def _extract_yt_json(self):
        """parse soup and get ytInitialData json"""
        all_scripts = self.soup.find("body").find_all("script")
        for script in all_scripts:
            if "var ytInitialData = " in str(script):
                script_content = str(script)
                break
        # extract payload
        script_content = script_content.split("var ytInitialData = ")[1]
        json_raw = script_content.rstrip(";</script>")
        self.yt_json = json.loads(json_raw)

    def _parse_channel_main(self):
        """extract maintab values from scraped channel json data"""
        main_tab = self.yt_json["header"]["c4TabbedHeaderRenderer"]
        # build and return dict
        self.json_data = {
            "channel_active": True,
            "channel_last_refresh": int(datetime.now().strftime("%s")),
            "channel_subs": self._get_channel_subs(main_tab),
            "channel_name": main_tab["title"],
            "channel_banner_url": self._get_thumbnails(main_tab, "banner"),
            "channel_tvart_url": self._get_thumbnails(main_tab, "tvBanner"),
            "channel_id": self.channel_id,
            "channel_subscribed": False,
        }

    @staticmethod
    def _get_thumbnails(main_tab, thumb_name):
        """extract banner url from main_tab"""
        try:
            all_banners = main_tab[thumb_name]["thumbnails"]
            banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]
        except KeyError:
            banner = False

        return banner

    @staticmethod
    def _get_channel_subs(main_tab):
        """process main_tab to get channel subs as int"""
        try:
            sub_text_simple = main_tab["subscriberCountText"]["simpleText"]
            sub_text = sub_text_simple.split(" ")[0]
            if sub_text[-1] == "K":
                channel_subs = int(float(sub_text.replace("K", "")) * 1000)
            elif sub_text[-1] == "M":
                channel_subs = int(float(sub_text.replace("M", "")) * 1000000)
            elif int(sub_text) >= 0:
                channel_subs = int(sub_text)
            else:
                message = f"{sub_text} not dealt with"
                print(message)
        except KeyError:
            channel_subs = 0

        return channel_subs

    def _parse_channel_meta(self):
        """extract meta tab values from channel payload"""
        # meta tab
        meta_tab = self.yt_json["metadata"]["channelMetadataRenderer"]
        all_thumbs = meta_tab["avatar"]["thumbnails"]
        thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]
        # stats tab
        renderer = "twoColumnBrowseResultsRenderer"
        all_tabs = self.yt_json["contents"][renderer]["tabs"]
        for tab in all_tabs:
            if "tabRenderer" in tab.keys():
                if tab["tabRenderer"]["title"] == "About":
                    about_tab = tab["tabRenderer"]["content"][
                        "sectionListRenderer"
                    ]["contents"][0]["itemSectionRenderer"]["contents"][0][
                        "channelAboutFullMetadataRenderer"
                    ]
                    break
        try:
            channel_views_text = about_tab["viewCountText"]["simpleText"]
            channel_views = int(re.sub(r"\D", "", channel_views_text))
        except KeyError:
            channel_views = 0

        self.json_data.update(
            {
                "channel_description": meta_tab["description"],
                "channel_thumb_url": thumb_url,
                "channel_views": channel_views,
            }
        )


class YoutubeChannel(YouTubeItem):
    """represents a single youtube channel"""

    es_path = False
    index_name = "ta_channel"
    yt_base = "https://www.youtube.com/channel/"
    msg = "message:playlistscan"

    def __init__(self, youtube_id):
        super().__init__(youtube_id)
        self.es_path = f"{self.index_name}/_doc/{youtube_id}"
        self.all_playlists = False

    def build_json(self, upload=False):
        """get from es or from youtube"""
        self.get_from_es()
        if self.json_data:
            return

        self.get_from_youtube()
        if upload:
            self.upload_to_es()
        return

    def get_from_youtube(self):
        """use bs4 to scrape channel about page"""
        self.json_data = ChannelScraper(self.youtube_id).get_json()
        self.get_channel_art()

    def get_channel_art(self):
        """download channel art for new channels"""
        channel_id = self.youtube_id
        channel_thumb = self.json_data["channel_thumb_url"]
        channel_banner = self.json_data["channel_banner_url"]
        ThumbManager().download_chan(
            [(channel_id, channel_thumb, channel_banner)]
        )

    def sync_to_videos(self):
        """sync new channel_dict to all videos of channel"""
        # add ingest pipeline
        processors = []
        for field, value in self.json_data.items():
            line = {"set": {"field": "channel." + field, "value": value}}
            processors.append(line)
        data = {"description": self.youtube_id, "processors": processors}
        ingest_path = f"_ingest/pipeline/{self.youtube_id}"
        _, _ = ElasticWrap(ingest_path).put(data)
        # apply pipeline
        data = {"query": {"match": {"channel.channel_id": self.youtube_id}}}
        update_path = f"ta_video/_update_by_query?pipeline={self.youtube_id}"
        _, _ = ElasticWrap(update_path).post(data)

    def get_folder_path(self):
        """get folder where media files get stored"""
        channel_name = self.json_data["channel_name"]
        folder_name = clean_string(channel_name)
        if len(folder_name) <= 3:
            # fall back to channel id
            folder_name = self.json_data["channel_id"]
        folder_path = os.path.join(self.app_conf["videos"], folder_name)
        return folder_path

    def delete_es_videos(self):
        """delete all channel documents from elasticsearch"""
        data = {
            "query": {
                "term": {"channel.channel_id": {"value": self.youtube_id}}
            }
        }
        _, _ = ElasticWrap("ta_video/_delete_by_query").post(data)

    def delete_playlists(self):
        """delete all indexed playlist from es"""
        all_playlists = self.get_indexed_playlists()
        for playlist in all_playlists:
            playlist_id = playlist["playlist_id"]
            YoutubePlaylist(playlist_id).delete_metadata()

    def delete_channel(self):
        """delete channel and all videos"""
        print(f"{self.youtube_id}: delete channel")
        self.get_from_es()
        folder_path = self.get_folder_path()
        print(f"{self.youtube_id}: delete all media files")
        try:
            all_videos = os.listdir(folder_path)
            for video in all_videos:
                video_path = os.path.join(folder_path, video)
                os.remove(video_path)
            os.rmdir(folder_path)
        except FileNotFoundError:
            print(f"no videos found for {folder_path}")

        print(f"{self.youtube_id}: delete indexed playlists")
        self.delete_playlists()
        print(f"{self.youtube_id}: delete indexed videos")
        self.delete_es_videos()
        self.del_in_es()

    def index_channel_playlists(self):
        """add all playlists of channel to index"""
        print(f"{self.youtube_id}: index all playlists")
        self.get_from_es()
        channel_name = self.json_data["channel_name"]
        mess_dict = {
            "status": self.msg,
            "level": "info",
            "title": "Looking for playlists",
            "message": f"{channel_name}: Scanning channel in progress",
        }
        RedisArchivist().set_message(self.msg, mess_dict, expire=True)
        self.get_all_playlists()
        if not self.all_playlists:
            print(f"{self.youtube_id}: no playlists found.")
            return

        all_youtube_ids = self.get_all_video_ids()
        for idx, playlist in enumerate(self.all_playlists):
            self._notify_single_playlist(idx, playlist)
            self._index_single_playlist(playlist, all_youtube_ids)

    def _notify_single_playlist(self, idx, playlist):
        """send notification"""
        channel_name = self.json_data["channel_name"]
        mess_dict = {
            "status": self.msg,
            "level": "info",
            "title": f"{channel_name}: Scanning channel for playlists",
            "message": f"Progress: {idx + 1}/{len(self.all_playlists)}",
        }
        RedisArchivist().set_message(self.msg, mess_dict, expire=True)
        print("add playlist: " + playlist[1])

    @staticmethod
    def _index_single_playlist(playlist, all_youtube_ids):
        """add single playlist if needed"""
        playlist = YoutubePlaylist(playlist[0])
        playlist.all_youtube_ids = all_youtube_ids
        playlist.build_json()
        if not playlist.json_data:
            return

        entries = playlist.json_data["playlist_entries"]
        downloaded = [i for i in entries if i["downloaded"]]
        if not downloaded:
            return

        playlist.upload_to_es()
        playlist.add_vids_to_playlist()
        playlist.get_playlist_art()

    @staticmethod
    def get_all_video_ids():
        """match all playlists with videos"""
        handler = queue.PendingList()
        handler.get_download()
        handler.get_indexed()
        all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]

        return all_youtube_ids

    def get_all_playlists(self):
        """get all playlists owned by this channel"""
        url = (
            f"https://www.youtube.com/channel/{self.youtube_id}"
            + "/playlists?view=1&sort=dd&shelf_id=0"
        )
        obs = {"skip_download": True, "extract_flat": True}
        playlists = YtWrap(obs, self.config).extract(url)
        all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
        self.all_playlists = all_entries

    def get_indexed_playlists(self):
        """get all indexed playlists from channel"""
        data = {
            "query": {
                "term": {"playlist_channel_id": {"value": self.youtube_id}}
            },
            "sort": [{"playlist_channel.keyword": {"order": "desc"}}],
        }
        all_playlists = IndexPaginate("ta_playlist", data).get_results()
        return all_playlists

    def get_overwrites(self):
        """get all per channel overwrites"""
        return self.json_data.get("channel_overwrites", False)

    def set_overwrites(self, overwrites):
        """set per channel overwrites"""
        valid_keys = [
            "download_format",
            "autodelete_days",
            "index_playlists",
            "integrate_sponsorblock",
        ]

        to_write = self.json_data.get("channel_overwrites", {})
        for key, value in overwrites.items():
            if key not in valid_keys:
                raise ValueError(f"invalid overwrite key: {key}")
            if value == "disable":
                to_write[key] = False
                continue
            if value in [0, "0"]:
                del to_write[key]
                continue
            if value == "1":
                to_write[key] = True
                continue
            if value:
                to_write.update({key: value})

        self.json_data["channel_overwrites"] = to_write


def channel_overwrites(channel_id, overwrites):
    """collection to overwrite settings per channel"""
    channel = YoutubeChannel(channel_id)
    channel.build_json()
    channel.set_overwrites(overwrites)
    channel.upload_to_es()
    channel.sync_to_videos()
update doc strings to represent new module structure 2022-01-23 12:32:08 +00:00			`"""`
			`functionality:`
			`- get metadata from youtube for a channel`
			`- index and update in es`
			`"""`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`import json`
			`import os`
			`import re`
			`from datetime import datetime`

			`import requests`
			`from bs4 import BeautifulSoup`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`from home.src.download import queue # partial import`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`from home.src.download.thumbnails import ThumbManager`
extract playlists with new YtBase class 2022-05-24 03:44:18 +00:00			`from home.src.download.yt_dlp_base import YtWrap`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`from home.src.es.connect import ElasticWrap, IndexPaginate`
			`from home.src.index.generic import YouTubeItem`
			`from home.src.index.playlist import YoutubePlaylist`
add random headers for requests outside of yt-dlp 2022-03-26 04:49:53 +00:00			`from home.src.ta.helper import clean_string, requests_headers`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`from home.src.ta.ta_redis import RedisArchivist`
major refactor, split up modules 2022-01-22 15:13:37 +00:00

			`class ChannelScraper:`
			`"""custom scraper using bs4 to scrape channel about page`
			`will be able to be integrated into yt-dlp`
			`once #2237 and #2350 are merged upstream`
			`"""`

			`def __init__(self, channel_id):`
			`self.channel_id = channel_id`
			`self.soup = False`
			`self.yt_json = False`
			`self.json_data = False`

			`def get_json(self):`
			`"""main method to return channel dict"""`
			`self.get_soup()`
			`self._extract_yt_json()`
			`self._parse_channel_main()`
			`self._parse_channel_meta()`
			`return self.json_data`

			`def get_soup(self):`
			`"""return soup from youtube"""`
			`print(f"{self.channel_id}: scrape channel data from youtube")`
			`url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"`
			`cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}`
add random headers for requests outside of yt-dlp 2022-03-26 04:49:53 +00:00			`response = requests.get(`
			`url, cookies=cookies, headers=requests_headers()`
			`)`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`if response.ok:`
			`channel_page = response.text`
			`else:`
			`print(f"{self.channel_id}: failed to extract channel info")`
			`raise ConnectionError`
			`self.soup = BeautifulSoup(channel_page, "html.parser")`

			`def _extract_yt_json(self):`
			`"""parse soup and get ytInitialData json"""`
			`all_scripts = self.soup.find("body").find_all("script")`
			`for script in all_scripts:`
			`if "var ytInitialData = " in str(script):`
			`script_content = str(script)`
			`break`
			`# extract payload`
			`script_content = script_content.split("var ytInitialData = ")[1]`
			`json_raw = script_content.rstrip(";</script>")`
			`self.yt_json = json.loads(json_raw)`

			`def _parse_channel_main(self):`
			`"""extract maintab values from scraped channel json data"""`
			`main_tab = self.yt_json["header"]["c4TabbedHeaderRenderer"]`
			`# build and return dict`
			`self.json_data = {`
			`"channel_active": True,`
			`"channel_last_refresh": int(datetime.now().strftime("%s")),`
			`"channel_subs": self._get_channel_subs(main_tab),`
			`"channel_name": main_tab["title"],`
			`"channel_banner_url": self._get_thumbnails(main_tab, "banner"),`
			`"channel_tvart_url": self._get_thumbnails(main_tab, "tvBanner"),`
			`"channel_id": self.channel_id,`
			`"channel_subscribed": False,`
			`}`

			`@staticmethod`
			`def _get_thumbnails(main_tab, thumb_name):`
			`"""extract banner url from main_tab"""`
			`try:`
			`all_banners = main_tab[thumb_name]["thumbnails"]`
			`banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]`
			`except KeyError:`
			`banner = False`

			`return banner`

			`@staticmethod`
			`def _get_channel_subs(main_tab):`
			`"""process main_tab to get channel subs as int"""`
			`try:`
			`sub_text_simple = main_tab["subscriberCountText"]["simpleText"]`
			`sub_text = sub_text_simple.split(" ")[0]`
			`if sub_text[-1] == "K":`
			`channel_subs = int(float(sub_text.replace("K", "")) * 1000)`
			`elif sub_text[-1] == "M":`
			`channel_subs = int(float(sub_text.replace("M", "")) * 1000000)`
			`elif int(sub_text) >= 0:`
			`channel_subs = int(sub_text)`
			`else:`
			`message = f"{sub_text} not dealt with"`
			`print(message)`
			`except KeyError:`
			`channel_subs = 0`

			`return channel_subs`

			`def _parse_channel_meta(self):`
			`"""extract meta tab values from channel payload"""`
			`# meta tab`
			`meta_tab = self.yt_json["metadata"]["channelMetadataRenderer"]`
			`all_thumbs = meta_tab["avatar"]["thumbnails"]`
			`thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]`
			`# stats tab`
			`renderer = "twoColumnBrowseResultsRenderer"`
			`all_tabs = self.yt_json["contents"][renderer]["tabs"]`
			`for tab in all_tabs:`
			`if "tabRenderer" in tab.keys():`
			`if tab["tabRenderer"]["title"] == "About":`
			`about_tab = tab["tabRenderer"]["content"][`
			`"sectionListRenderer"`
			`]["contents"][0]["itemSectionRenderer"]["contents"][0][`
			`"channelAboutFullMetadataRenderer"`
			`]`
			`break`
			`try:`
			`channel_views_text = about_tab["viewCountText"]["simpleText"]`
			`channel_views = int(re.sub(r"\D", "", channel_views_text))`
			`except KeyError:`
			`channel_views = 0`

			`self.json_data.update(`
			`{`
			`"channel_description": meta_tab["description"],`
			`"channel_thumb_url": thumb_url,`
			`"channel_views": channel_views,`
			`}`
			`)`


			`class YoutubeChannel(YouTubeItem):`
			`"""represents a single youtube channel"""`

			`es_path = False`
			`index_name = "ta_channel"`
			`yt_base = "https://www.youtube.com/channel/"`
refactor: default set_message in RedisArchivist to True 2022-06-16 03:37:46 +00:00			`msg = "message:playlistscan"`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`def __init__(self, youtube_id):`
			`super().__init__(youtube_id)`
			`self.es_path = f"{self.index_name}/_doc/{youtube_id}"`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`self.all_playlists = False`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`def build_json(self, upload=False):`
			`"""get from es or from youtube"""`
			`self.get_from_es()`
			`if self.json_data:`
			`return`

			`self.get_from_youtube()`
			`if upload:`
			`self.upload_to_es()`
			`return`

			`def get_from_youtube(self):`
			`"""use bs4 to scrape channel about page"""`
			`self.json_data = ChannelScraper(self.youtube_id).get_json()`
			`self.get_channel_art()`

			`def get_channel_art(self):`
			`"""download channel art for new channels"""`
			`channel_id = self.youtube_id`
			`channel_thumb = self.json_data["channel_thumb_url"]`
			`channel_banner = self.json_data["channel_banner_url"]`
			`ThumbManager().download_chan(`
			`[(channel_id, channel_thumb, channel_banner)]`
			`)`

			`def sync_to_videos(self):`
			`"""sync new channel_dict to all videos of channel"""`
			`# add ingest pipeline`
			`processors = []`
			`for field, value in self.json_data.items():`
			`line = {"set": {"field": "channel." + field, "value": value}}`
			`processors.append(line)`
			`data = {"description": self.youtube_id, "processors": processors}`
			`ingest_path = f"_ingest/pipeline/{self.youtube_id}"`
			`_, _ = ElasticWrap(ingest_path).put(data)`
			`# apply pipeline`
			`data = {"query": {"match": {"channel.channel_id": self.youtube_id}}}`
			`update_path = f"ta_video/_update_by_query?pipeline={self.youtube_id}"`
			`_, _ = ElasticWrap(update_path).post(data)`

			`def get_folder_path(self):`
			`"""get folder where media files get stored"""`
			`channel_name = self.json_data["channel_name"]`
			`folder_name = clean_string(channel_name)`
fix channel delete for channel_id fallback folders 2022-01-30 01:33:10 +00:00			`if len(folder_name) <= 3:`
			`# fall back to channel id`
			`folder_name = self.json_data["channel_id"]`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`folder_path = os.path.join(self.app_conf["videos"], folder_name)`
			`return folder_path`

			`def delete_es_videos(self):`
			`"""delete all channel documents from elasticsearch"""`
			`data = {`
			`"query": {`
			`"term": {"channel.channel_id": {"value": self.youtube_id}}`
			`}`
			`}`
			`_, _ = ElasticWrap("ta_video/_delete_by_query").post(data)`

			`def delete_playlists(self):`
			`"""delete all indexed playlist from es"""`
			`all_playlists = self.get_indexed_playlists()`
			`for playlist in all_playlists:`
			`playlist_id = playlist["playlist_id"]`
			`YoutubePlaylist(playlist_id).delete_metadata()`

			`def delete_channel(self):`
			`"""delete channel and all videos"""`
			`print(f"{self.youtube_id}: delete channel")`
			`self.get_from_es()`
			`folder_path = self.get_folder_path()`
			`print(f"{self.youtube_id}: delete all media files")`
			`try:`
			`all_videos = os.listdir(folder_path)`
			`for video in all_videos:`
			`video_path = os.path.join(folder_path, video)`
			`os.remove(video_path)`
			`os.rmdir(folder_path)`
			`except FileNotFoundError:`
			`print(f"no videos found for {folder_path}")`

			`print(f"{self.youtube_id}: delete indexed playlists")`
			`self.delete_playlists()`
			`print(f"{self.youtube_id}: delete indexed videos")`
			`self.delete_es_videos()`
			`self.del_in_es()`

refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`def index_channel_playlists(self):`
			`"""add all playlists of channel to index"""`
full playlist refresh and index for channel overwrites 2022-03-22 02:42:41 +00:00			`print(f"{self.youtube_id}: index all playlists")`
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`self.get_from_es()`
			`channel_name = self.json_data["channel_name"]`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`mess_dict = {`
refactor: default set_message in RedisArchivist to True 2022-06-16 03:37:46 +00:00			`"status": self.msg,`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`"level": "info",`
			`"title": "Looking for playlists",`
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`"message": f"{channel_name}: Scanning channel in progress",`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`}`
refactor: default set_message in RedisArchivist to True 2022-06-16 03:37:46 +00:00			`RedisArchivist().set_message(self.msg, mess_dict, expire=True)`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`self.get_all_playlists()`
			`if not self.all_playlists:`
			`print(f"{self.youtube_id}: no playlists found.")`
			`return`

			`all_youtube_ids = self.get_all_video_ids()`
			`for idx, playlist in enumerate(self.all_playlists):`
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`self._notify_single_playlist(idx, playlist)`
			`self._index_single_playlist(playlist, all_youtube_ids)`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`def _notify_single_playlist(self, idx, playlist):`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`"""send notification"""`
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`channel_name = self.json_data["channel_name"]`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`mess_dict = {`
refactor: default set_message in RedisArchivist to True 2022-06-16 03:37:46 +00:00			`"status": self.msg,`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`"level": "info",`
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`"title": f"{channel_name}: Scanning channel for playlists",`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`"message": f"Progress: {idx + 1}/{len(self.all_playlists)}",`
			`}`
refactor: default set_message in RedisArchivist to True 2022-06-16 03:37:46 +00:00			`RedisArchivist().set_message(self.msg, mess_dict, expire=True)`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`print("add playlist: " + playlist[1])`

			`@staticmethod`
better playlist scan message, private methods in YoutubeChannel class 2022-03-22 03:07:33 +00:00			`def _index_single_playlist(playlist, all_youtube_ids):`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`"""add single playlist if needed"""`
			`playlist = YoutubePlaylist(playlist[0])`
			`playlist.all_youtube_ids = all_youtube_ids`
			`playlist.build_json()`
			`if not playlist.json_data:`
			`return`

			`entries = playlist.json_data["playlist_entries"]`
			`downloaded = [i for i in entries if i["downloaded"]]`
			`if not downloaded:`
			`return`

			`playlist.upload_to_es()`
			`playlist.add_vids_to_playlist()`
			`playlist.get_playlist_art()`

			`@staticmethod`
			`def get_all_video_ids():`
			`"""match all playlists with videos"""`
			`handler = queue.PendingList()`
			`handler.get_download()`
			`handler.get_indexed()`
			`all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]`

			`return all_youtube_ids`

major refactor, split up modules 2022-01-22 15:13:37 +00:00			`def get_all_playlists(self):`
			`"""get all playlists owned by this channel"""`
			`url = (`
			`f"https://www.youtube.com/channel/{self.youtube_id}"`
			`+ "/playlists?view=1&sort=dd&shelf_id=0"`
			`)`
extract playlists with new YtBase class 2022-05-24 03:44:18 +00:00			`obs = {"skip_download": True, "extract_flat": True}`
refactor use cookie io_stream 2022-05-24 08:51:58 +00:00			`playlists = YtWrap(obs, self.config).extract(url)`
major refactor, split up modules 2022-01-22 15:13:37 +00:00			`all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]`
refactor index_channel_playlists task, move to YoutubeChannel class 2022-03-21 10:59:21 +00:00			`self.all_playlists = all_entries`
major refactor, split up modules 2022-01-22 15:13:37 +00:00
			`def get_indexed_playlists(self):`
			`"""get all indexed playlists from channel"""`
			`data = {`
			`"query": {`
			`"term": {"playlist_channel_id": {"value": self.youtube_id}}`
			`},`
			`"sort": [{"playlist_channel.keyword": {"order": "desc"}}],`
			`}`
			`all_playlists = IndexPaginate("ta_playlist", data).get_results()`
			`return all_playlists`
add get_overwrites and set_overwrites methods for channel 2022-03-12 15:19:42 +00:00
			`def get_overwrites(self):`
			`"""get all per channel overwrites"""`
			`return self.json_data.get("channel_overwrites", False)`

			`def set_overwrites(self, overwrites):`
			`"""set per channel overwrites"""`
implement per channel sponsorblock 2022-04-08 10:17:39 +00:00			`valid_keys = [`
			`"download_format",`
			`"autodelete_days",`
			`"index_playlists",`
			`"integrate_sponsorblock",`
			`]`
better channel overwrite update form 2022-03-16 05:09:50 +00:00
			`to_write = self.json_data.get("channel_overwrites", {})`
			`for key, value in overwrites.items():`
add get_overwrites and set_overwrites methods for channel 2022-03-12 15:19:42 +00:00			`if key not in valid_keys:`
			`raise ValueError(f"invalid overwrite key: {key}")`
fix per channel sb integration 2022-04-13 08:53:00 +00:00			`if value == "disable":`
			`to_write[key] = False`
			`continue`
better channel overwrite update form 2022-03-16 05:09:50 +00:00			`if value in [0, "0"]:`
			`del to_write[key]`
			`continue`
add bool true to channel overwrite form parser 2022-03-16 05:32:02 +00:00			`if value == "1":`
			`to_write[key] = True`
			`continue`
better channel overwrite update form 2022-03-16 05:09:50 +00:00			`if value:`
			`to_write.update({key: value})`
add get_overwrites and set_overwrites methods for channel 2022-03-12 15:19:42 +00:00
better channel overwrite update form 2022-03-16 05:09:50 +00:00			`self.json_data["channel_overwrites"] = to_write`
add channel overwrite form to frontend, store in es 2022-03-15 05:00:48 +00:00

			`def channel_overwrites(channel_id, overwrites):`
			`"""collection to overwrite settings per channel"""`
			`channel = YoutubeChannel(channel_id)`
			`channel.build_json()`
			`channel.set_overwrites(overwrites)`
			`channel.upload_to_es()`
			`channel.sync_to_videos()`