From 039a49387c829a47bb4850900a8112fc5865592a Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 6 Jul 2024 16:50:29 +0200 Subject: [PATCH] remove workarounds for merged channel extraction failure --- tubearchivist/home/src/index/channel.py | 60 +------------------------ 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 43b32234..d55e46c4 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -6,46 +6,16 @@ functionality: import json import os -import re from datetime import datetime -import requests from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.generic import YouTubeItem from home.src.index.playlist import YoutubePlaylist -from home.src.ta.helper import requests_headers from home.src.ta.settings import EnvironmentSettings -def banner_extractor(channel_id: str) -> dict[str, str] | None: - """workaround for new channel renderer, upstream #9893""" - url = f"https://www.youtube.com/channel/{channel_id}?hl=en" - cookies = {"SOCS": "CAI"} - response = requests.get( - url, cookies=cookies, headers=requests_headers(), timeout=30 - ) - if not response.ok: - return None - - matched_urls = re.findall( - r'"(https://yt3.googleusercontent.com/[^"]+=w(\d{3,4})-fcrop64[^"]*)"', - response.text, - ) - if not matched_urls: - return None - - sorted_urls = sorted(matched_urls, key=lambda x: int(x[1]), reverse=True) - banner = sorted_urls[0][0] - channel_art_fallback = { - "channel_banner_url": banner, - "channel_tvart_url": banner.split("-fcrop64")[0], - } - - return channel_art_fallback - - class YoutubeChannel(YouTubeItem): """represents a single youtube channel""" @@ -87,7 +57,7 @@ class YoutubeChannel(YouTubeItem): "channel_id": self.youtube_id, "channel_last_refresh": int(datetime.now().timestamp()), "channel_name": self.youtube_meta["uploader"], - "channel_subs": self._extract_follower_count(), + "channel_subs": self.youtube_meta.get("channel_follower_count", 0), "channel_subscribed": False, "channel_tags": self._parse_tags(self.youtube_meta.get("tags")), "channel_banner_url": self._get_banner_art(), @@ -95,34 +65,6 @@ class YoutubeChannel(YouTubeItem): "channel_tvart_url": self._get_tv_art(), "channel_views": self.youtube_meta.get("view_count") or 0, } - self._inject_fallback() - - def _inject_fallback(self): - """fallback channel art work, workaround for upstream #9893""" - if self.json_data["channel_banner_url"]: - return - - print(f"{self.youtube_id}: attempt art fallback extraction") - fallback = banner_extractor(self.youtube_id) - if fallback: - print(f"{self.youtube_id}: fallback succeeded: {fallback}") - self.json_data.update(fallback) - - def _extract_follower_count(self) -> int: - """workaround for upstream #9893, extract subs from first video""" - subs = self.youtube_meta.get("channel_follower_count") - if subs is not None: - return subs - - entries = self.youtube_meta.get("entries", []) - if entries: - first_entry = entries[0] - if isinstance(first_entry, dict): - subs_entry = first_entry.get("channel_follower_count") - if subs_entry is not None: - return subs_entry - - return 0 def _parse_tags(self, tags): """parse channel tags"""