From 0891c49620ef442c80037a36c45d13c7123267cd Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 11 Aug 2022 19:57:00 +0700 Subject: [PATCH] implement offline channel import --- tubearchivist/home/src/index/channel.py | 51 +++++++++++++++++++--- tubearchivist/home/src/index/filesystem.py | 12 ++++- tubearchivist/home/src/index/video.py | 5 ++- 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index d41449ae..34e4adc7 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -173,30 +173,71 @@ class YoutubeChannel(YouTubeItem): self.es_path = f"{self.index_name}/_doc/{youtube_id}" self.all_playlists = False - def build_json(self, upload=False): + def build_json(self, upload=False, fallback=False): """get from es or from youtube""" self.get_from_es() if self.json_data: return - self.get_from_youtube() + self.get_from_youtube(fallback) + if upload: self.upload_to_es() return - def get_from_youtube(self): + def get_from_youtube(self, fallback=False): """use bs4 to scrape channel about page""" self.json_data = ChannelScraper(self.youtube_id).get_json() + + if not self.json_data and fallback: + self._video_fallback(fallback) + self.get_channel_art() + def _video_fallback(self, fallback): + """use video metadata as fallback""" + print(f"{self.youtube_id}: fallback to video metadata") + self.json_data = { + "channel_active": False, + "channel_last_refresh": int(datetime.now().strftime("%s")), + "channel_subs": fallback.get("channel_follower_count", 0), + "channel_name": fallback["uploader"], + "channel_banner_url": False, + "channel_tvart_url": False, + "channel_id": self.youtube_id, + "channel_subscribed": False, + "channel_description": False, + "channel_thumb_url": False, + "channel_views": 0, + } + self._info_json_fallback() + + def _info_json_fallback(self): + """read channel info.json for additional metadata""" + info_json = os.path.join( + self.config["application"]["cache_dir"], + "import", + f"{self.youtube_id}.info.json", + ) + if os.path.exists(info_json): + print(f"{self.youtube_id}: read info.json file") + with open(info_json, "r", encoding="utf-8") as f: + content = json.loads(f.read()) + + self.json_data.update( + { + "channel_subs": content["channel_follower_count"], + "channel_description": content["description"], + } + ) + def get_channel_art(self): """download channel art for new channels""" - channel_id = self.youtube_id urls = ( self.json_data["channel_thumb_url"], self.json_data["channel_banner_url"], ) - ThumbManager(channel_id, item_type="channel").download(urls) + ThumbManager(self.youtube_id, item_type="channel").download(urls) def sync_to_videos(self): """sync new channel_dict to all videos of channel""" diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 38d790b8..3689fed8 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -467,7 +467,7 @@ class ManualImport: """run all""" json_data = self.index_metadata() self._move_to_archive(json_data) - self._cleanup() + self._cleanup(json_data) def index_metadata(self): """get metadata from yt or json""" @@ -517,7 +517,7 @@ class ManualImport: new_path = os.path.join(channel_folder, file) shutil.move(old_path, new_path, copy_function=shutil.copyfile) - def _cleanup(self): + def _cleanup(self, json_data): """cleanup leftover files""" if os.path.exists(self.current_video["metadata"]): os.remove(self.current_video["metadata"]) @@ -529,6 +529,14 @@ class ManualImport: if os.path.exists(subtitle_file): os.remove(subtitle_file) + channel_info = os.path.join( + self.config["application"]["cache_dir"], + "import", + f"{json_data['channel']['channel_id']}.info.json", + ) + if os.path.exists(channel_info): + os.remove(channel_info) + def scan_filesystem(): """grouped function to delete and update index""" diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index a2b54970..d7d8b983 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -492,7 +492,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): def _add_channel(self): """add channel dict to video json_data""" channel = ta_channel.YoutubeChannel(self.channel_id) - channel.build_json(upload=True) + channel.build_json(upload=True, fallback=self.youtube_meta) self.json_data.update({"channel": channel.json_data}) def _add_stats(self): @@ -500,13 +500,14 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): # likes like_count = self.youtube_meta.get("like_count", 0) dislike_count = self.youtube_meta.get("dislike_count", 0) + average_rating = self.youtube_meta.get("average_rating", 0) self.json_data.update( { "stats": { "view_count": self.youtube_meta["view_count"], "like_count": like_count, "dislike_count": dislike_count, - "average_rating": self.youtube_meta["average_rating"], + "average_rating": average_rating, } } )