downloading channel artwork after scraping

This commit is contained in:
simon 2021-10-11 16:03:25 +07:00
parent 567e9d473b
commit 35a5eda36e
3 changed files with 29 additions and 7 deletions

View File

@ -16,6 +16,7 @@ import yt_dlp as youtube_dl
from bs4 import BeautifulSoup
from home.src.config import AppConfig
from home.src.helper import DurationConverter, clean_string, process_url_list
from home.src.thumbnails import ThumbManager
class YoutubeChannel:
@ -153,6 +154,15 @@ class YoutubeChannel:
return meta_channel_dict
def get_channel_art(self):
"""download channel art for new channels"""
channel_id = self.channel_id
channel_thumb = self.channel_dict["channel_thumb_url"]
channel_banner = self.channel_dict["channel_banner_url"]
ThumbManager().download_chan(
[(channel_id, channel_thumb, channel_banner)]
)
def upload_to_es(self):
"""upload channel data to elastic search"""
url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}"
@ -514,6 +524,7 @@ def index_new_video(youtube_id, missing_vid=False):
if channel_handler.source == "scraped":
channel_handler.channel_dict["channel_subscribed"] = False
channel_handler.upload_to_es()
channel_handler.get_channel_art()
# upload video to es
vid_handler.upload_to_es()
# return vid_dict for further processing

View File

@ -5,9 +5,9 @@ functionality:
import os
import home.src.download as download
import requests
from home.src.config import AppConfig
from home.src.download import ChannelSubscription, PendingList
from home.src.helper import RedisArchivist, ignore_filelist
from PIL import Image
@ -50,8 +50,8 @@ class ThumbManager:
def get_missing_thumbs(self):
"""get a list of all missing thumbnails"""
all_thumbs = self.get_all_thumbs()
all_indexed = PendingList().get_all_indexed()
all_in_queue, all_ignored = PendingList().get_all_pending()
all_indexed = download.PendingList().get_all_indexed()
all_in_queue, all_ignored = download.PendingList().get_all_pending()
missing_thumbs = []
for video in all_indexed:
@ -72,12 +72,14 @@ class ThumbManager:
"""get all channel artwork"""
all_channel_art = os.listdir(self.CHANNEL_DIR)
cached_channel_ids = {i[0:24] for i in all_channel_art}
channels = ChannelSubscription().get_channels(subscribed_only=False)
channels = download.ChannelSubscription().get_channels(
subscribed_only=False
)
missing_channels = []
for channel in channels:
channel_id = channel["channel_id"]
if not channel_id in cached_channel_ids:
if channel_id not in cached_channel_ids:
channel_banner = channel["channel_banner_url"]
channel_thumb = channel["channel_thumb_url"]
missing_channels.append(
@ -120,7 +122,8 @@ class ThumbManager:
"""download needed artwork for channels"""
print(f"downloading {len(missing_channels)} channel artwork")
for channel in missing_channels:
channel_id, channel_thumb, channel_banner = channel
print(channel)
channel_id, channel_thumb, channel_banner = channel
thumb_path = os.path.join(
self.CHANNEL_DIR, channel_id + "_thumb.jpg"
@ -137,6 +140,14 @@ class ThumbManager:
with open(banner_path, "wb") as f:
f.write(img_raw)
mess_dict = {
"status": "pending",
"level": "info",
"title": "Adding to download queue.",
"message": "Downloading Channel Art...",
}
RedisArchivist().set_message("progress:download", mess_dict)
@staticmethod
def vid_thumb_path(youtube_id):
"""build expected path for video thumbnail from youtube_id"""

View File

@ -92,7 +92,7 @@ def extrac_dl(youtube_ids):
pending_handler = PendingList()
missing_videos = pending_handler.parse_url_list(youtube_ids)
all_videos_added = pending_handler.add_to_pending(missing_videos)
ThumbManager().download_missing(all_videos_added)
ThumbManager().download_vid(all_videos_added)
@shared_task