From 91a9477bba1eb4fa6bea3f534fe633d3f8a56f8c Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 21 Mar 2022 17:59:21 +0700 Subject: [PATCH] refactor index_channel_playlists task, move to YoutubeChannel class --- tubearchivist/home/src/index/channel.py | 64 ++++++++++++++++++++++++- tubearchivist/home/tasks.py | 48 +------------------ 2 files changed, 63 insertions(+), 49 deletions(-) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 10da4f4..5991157 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -12,11 +12,13 @@ from datetime import datetime import requests import yt_dlp from bs4 import BeautifulSoup +from home.src.download import queue # partial import from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.generic import YouTubeItem from home.src.index.playlist import YoutubePlaylist from home.src.ta.helper import clean_string +from home.src.ta.ta_redis import RedisArchivist class ChannelScraper: @@ -153,6 +155,7 @@ class YoutubeChannel(YouTubeItem): def __init__(self, youtube_id): super().__init__(youtube_id) self.es_path = f"{self.index_name}/_doc/{youtube_id}" + self.all_playlists = False def build_json(self, upload=False): """get from es or from youtube""" @@ -241,6 +244,64 @@ class YoutubeChannel(YouTubeItem): self.delete_es_videos() self.del_in_es() + def index_channel_playlists(self): + """add all playlists of channel to index""" + mess_dict = { + "status": "message:playlistscan", + "level": "info", + "title": "Looking for playlists", + "message": f'Scanning channel "{self.youtube_id}" in progress', + } + RedisArchivist().set_message("message:playlistscan", mess_dict) + self.get_all_playlists() + if not self.all_playlists: + print(f"{self.youtube_id}: no playlists found.") + return + + all_youtube_ids = self.get_all_video_ids() + for idx, playlist in enumerate(self.all_playlists): + self.notify_single_playlist(idx, playlist) + self.index_single_playlist(playlist, all_youtube_ids) + + def notify_single_playlist(self, idx, playlist): + """send notification""" + mess_dict = { + "status": "message:playlistscan", + "level": "info", + "title": "Scanning channel for playlists", + "message": f"Progress: {idx + 1}/{len(self.all_playlists)}", + } + RedisArchivist().set_message("message:playlistscan", mess_dict) + print("add playlist: " + playlist[1]) + + @staticmethod + def index_single_playlist(playlist, all_youtube_ids): + """add single playlist if needed""" + playlist = YoutubePlaylist(playlist[0]) + playlist.all_youtube_ids = all_youtube_ids + playlist.build_json() + if not playlist.json_data: + return + + entries = playlist.json_data["playlist_entries"] + downloaded = [i for i in entries if i["downloaded"]] + if not downloaded: + return + + playlist.upload_to_es() + playlist.add_vids_to_playlist() + playlist.get_playlist_art() + + @staticmethod + def get_all_video_ids(): + """match all playlists with videos""" + handler = queue.PendingList() + handler.get_download() + handler.get_indexed() + all_youtube_ids = [i["youtube_id"] for i in handler.all_videos] + + return all_youtube_ids + def get_all_playlists(self): """get all playlists owned by this channel""" url = ( @@ -254,8 +315,7 @@ class YoutubeChannel(YouTubeItem): } playlists = yt_dlp.YoutubeDL(obs).extract_info(url) all_entries = [(i["id"], i["title"]) for i in playlists["entries"]] - - return all_entries + self.all_playlists = all_entries def get_indexed_playlists(self): """get all indexed playlists from channel""" diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 6d89983..b0f86fd 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -24,7 +24,6 @@ from home.src.index.filesystem import ( reindex_old_documents, scan_filesystem, ) -from home.src.index.playlist import YoutubePlaylist from home.src.ta.config import AppConfig, ScheduleBuilder from home.src.ta.helper import UrlListParser from home.src.ta.ta_redis import RedisArchivist, RedisQueue @@ -268,52 +267,7 @@ def index_channel_playlists(channel_id): "message": f'Scanning channel "{channel.youtube_id}" in progress', } RedisArchivist().set_message("message:playlistscan", mess_dict) - all_playlists = channel.get_all_playlists() - - if not all_playlists: - print(f"no playlists found for channel {channel_id}") - return - - handler = PendingList() - handler.get_download() - handler.get_indexed() - all_youtube_ids = [i["youtube_id"] for i in handler.all_videos] - - for idx, (playlist_id, playlist_title) in enumerate(all_playlists): - # notify - mess_dict = { - "status": "message:playlistscan", - "level": "info", - "title": "Scanning channel for playlists", - "message": f"Progress: {idx + 1}/{len(all_playlists)}", - } - RedisArchivist().set_message("message:playlistscan", mess_dict) - print("add playlist: " + playlist_title) - - playlist = YoutubePlaylist(playlist_id) - playlist.all_youtube_ids = all_youtube_ids - playlist.build_json() - - if not playlist.json_data: - # skip if not available - continue - - # don't add if no videos downloaded - downloaded = [ - i - for i in playlist.json_data["playlist_entries"] - if i["downloaded"] - ] - if not downloaded: - continue - - playlist.upload_to_es() - playlist.add_vids_to_playlist() - - if all_playlists: - playlist.get_playlist_art() - - return + channel.index_channel_playlists() try: