From 35a5eda36e226f5f68f2849e8b0005bf8c0c4bce Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 11 Oct 2021 16:03:25 +0700 Subject: [PATCH] downloading channel artwork after scraping --- tubearchivist/home/src/index.py | 11 +++++++++++ tubearchivist/home/src/thumbnails.py | 23 +++++++++++++++++------ tubearchivist/home/tasks.py | 2 +- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tubearchivist/home/src/index.py b/tubearchivist/home/src/index.py index 8adaecd..ead7921 100644 --- a/tubearchivist/home/src/index.py +++ b/tubearchivist/home/src/index.py @@ -16,6 +16,7 @@ import yt_dlp as youtube_dl from bs4 import BeautifulSoup from home.src.config import AppConfig from home.src.helper import DurationConverter, clean_string, process_url_list +from home.src.thumbnails import ThumbManager class YoutubeChannel: @@ -153,6 +154,15 @@ class YoutubeChannel: return meta_channel_dict + def get_channel_art(self): + """download channel art for new channels""" + channel_id = self.channel_id + channel_thumb = self.channel_dict["channel_thumb_url"] + channel_banner = self.channel_dict["channel_banner_url"] + ThumbManager().download_chan( + [(channel_id, channel_thumb, channel_banner)] + ) + def upload_to_es(self): """upload channel data to elastic search""" url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}" @@ -514,6 +524,7 @@ def index_new_video(youtube_id, missing_vid=False): if channel_handler.source == "scraped": channel_handler.channel_dict["channel_subscribed"] = False channel_handler.upload_to_es() + channel_handler.get_channel_art() # upload video to es vid_handler.upload_to_es() # return vid_dict for further processing diff --git a/tubearchivist/home/src/thumbnails.py b/tubearchivist/home/src/thumbnails.py index a350e94..b4be8a8 100644 --- a/tubearchivist/home/src/thumbnails.py +++ b/tubearchivist/home/src/thumbnails.py @@ -5,9 +5,9 @@ functionality: import os +import home.src.download as download import requests from home.src.config import AppConfig -from home.src.download import ChannelSubscription, PendingList from home.src.helper import RedisArchivist, ignore_filelist from PIL import Image @@ -50,8 +50,8 @@ class ThumbManager: def get_missing_thumbs(self): """get a list of all missing thumbnails""" all_thumbs = self.get_all_thumbs() - all_indexed = PendingList().get_all_indexed() - all_in_queue, all_ignored = PendingList().get_all_pending() + all_indexed = download.PendingList().get_all_indexed() + all_in_queue, all_ignored = download.PendingList().get_all_pending() missing_thumbs = [] for video in all_indexed: @@ -72,12 +72,14 @@ class ThumbManager: """get all channel artwork""" all_channel_art = os.listdir(self.CHANNEL_DIR) cached_channel_ids = {i[0:24] for i in all_channel_art} - channels = ChannelSubscription().get_channels(subscribed_only=False) + channels = download.ChannelSubscription().get_channels( + subscribed_only=False + ) missing_channels = [] for channel in channels: channel_id = channel["channel_id"] - if not channel_id in cached_channel_ids: + if channel_id not in cached_channel_ids: channel_banner = channel["channel_banner_url"] channel_thumb = channel["channel_thumb_url"] missing_channels.append( @@ -120,7 +122,8 @@ class ThumbManager: """download needed artwork for channels""" print(f"downloading {len(missing_channels)} channel artwork") for channel in missing_channels: - channel_id, channel_thumb, channel_banner = channel + print(channel) + channel_id, channel_thumb, channel_banner = channel thumb_path = os.path.join( self.CHANNEL_DIR, channel_id + "_thumb.jpg" @@ -137,6 +140,14 @@ class ThumbManager: with open(banner_path, "wb") as f: f.write(img_raw) + mess_dict = { + "status": "pending", + "level": "info", + "title": "Adding to download queue.", + "message": "Downloading Channel Art...", + } + RedisArchivist().set_message("progress:download", mess_dict) + @staticmethod def vid_thumb_path(youtube_id): """build expected path for video thumbnail from youtube_id""" diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 7346304..9c6c9e7 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -92,7 +92,7 @@ def extrac_dl(youtube_ids): pending_handler = PendingList() missing_videos = pending_handler.parse_url_list(youtube_ids) all_videos_added = pending_handler.add_to_pending(missing_videos) - ThumbManager().download_missing(all_videos_added) + ThumbManager().download_vid(all_videos_added) @shared_task