From 5ef9c5b0594df4a6ba4adbacd32a91597f3ff08a Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 10 Nov 2021 17:55:34 +0700 Subject: [PATCH] download playlist artwork --- tubearchivist/home/apps.py | 9 +++- tubearchivist/home/src/download.py | 62 ++++++++++++++++++++++++++++ tubearchivist/home/src/thumbnails.py | 38 +++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/apps.py b/tubearchivist/home/apps.py index a036900..f17ff3a 100644 --- a/tubearchivist/home/apps.py +++ b/tubearchivist/home/apps.py @@ -18,7 +18,14 @@ def sync_redis_state(): def make_folders(): """make needed cache folders here so docker doesn't mess it up""" - folders = ["download", "channels", "videos", "import", "backup"] + folders = [ + "download", + "channels", + "videos", + "playlists", + "import", + "backup", + ] config = ArchivistConfig().config cache_dir = config["application"]["cache_dir"] for folder in folders: diff --git a/tubearchivist/home/src/download.py b/tubearchivist/home/src/download.py index f7c2e0d..0963bb0 100644 --- a/tubearchivist/home/src/download.py +++ b/tubearchivist/home/src/download.py @@ -445,6 +445,68 @@ class ChannelSubscription: channel_handler.get_channel_art() +class PlaylistSubscription: + """manage the playlist download functionality""" + + def __init__(self): + config = AppConfig().config + self.es_url = config["application"]["es_url"] + self.es_auth = config["application"]["es_auth"] + self.channel_size = config["subscriptions"]["channel_size"] + + def get_playlists(self, subscribed_only=True): + """get a list of all playlists""" + headers = {"Content-type": "application/json"} + # get PIT ID + url = self.es_url + "/ta_playlist/_pit?keep_alive=1m" + response = requests.post(url, auth=self.es_auth) + json_data = json.loads(response.text) + pit_id = json_data["id"] + # query + if subscribed_only: + data = { + "query": {"term": {"playlist_subscribed": {"value": True}}}, + "size": 50, + "pit": {"id": pit_id, "keep_alive": "1m"}, + "sort": [{"playlist_channel.keyword": {"order": "desc"}}], + } + else: + data = { + "query": {"match_all": {}}, + "size": 50, + "pit": {"id": pit_id, "keep_alive": "1m"}, + "sort": [{"playlist_channel.keyword": {"order": "desc"}}], + } + query_str = json.dumps(data) + url = self.es_url + "/_search" + all_playlists = [] + while True: + response = requests.get( + url, data=query_str, headers=headers, auth=self.es_auth + ) + json_data = json.loads(response.text) + all_hits = json_data["hits"]["hits"] + if all_hits: + for hit in all_hits: + source = hit["_source"] + search_after = hit["sort"] + all_playlists.append(source) + # update search_after with last hit data + data["search_after"] = search_after + query_str = json.dumps(data) + else: + break + # clean up PIT + query_str = json.dumps({"id": pit_id}) + requests.delete( + self.es_url + "/_pit", + data=query_str, + headers=headers, + auth=self.es_auth, + ) + return all_playlists + + class VideoDownloader: """ handle the video download functionality diff --git a/tubearchivist/home/src/thumbnails.py b/tubearchivist/home/src/thumbnails.py index 017b9a8..5c3c7a5 100644 --- a/tubearchivist/home/src/thumbnails.py +++ b/tubearchivist/home/src/thumbnails.py @@ -23,6 +23,7 @@ class ThumbManager: CACHE_DIR = CONFIG["application"]["cache_dir"] VIDEO_DIR = os.path.join(CACHE_DIR, "videos") CHANNEL_DIR = os.path.join(CACHE_DIR, "channels") + PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists") def get_all_thumbs(self): """get all video artwork already downloaded""" @@ -99,6 +100,24 @@ class ThumbManager: return missing_channels + def get_missing_playlists(self): + """get all missing playlist artwork""" + all_downloaded = ignore_filelist(os.listdir(self.PLAYLIST_DIR)) + all_ids_downloaded = [i.replace(".jpg", "") for i in all_downloaded] + + playlists = download.PlaylistSubscription().get_playlists( + subscribed_only=False + ) + + missing_playlists = [] + for playlist in playlists: + playlist_id = playlist["playlist_id"] + if playlist_id not in all_ids_downloaded: + playlist_thumb = playlist["playlist_thumbnail"] + missing_playlists.append((playlist_id, playlist_thumb)) + + return missing_playlists + def get_raw_img(self, img_url, thumb_type): """get raw image from youtube and handle 404""" try: @@ -201,6 +220,23 @@ class ThumbManager: } RedisArchivist().set_message("progress:download", mess_dict) + def download_playlist(self, missing_playlists): + """download needed artwork for playlists""" + print(f"downloading {len(missing_playlists)} playlist artwork") + for playlist in missing_playlists: + playlist_id, playlist_thumb_url = playlist + thumb_path = os.path.join(self.PLAYLIST_DIR, playlist_id + ".jpg") + img_raw = self.get_raw_img(playlist_thumb_url, "video") + img_raw.convert("RGB").save(thumb_path) + + mess_dict = { + "status": "pending", + "level": "info", + "title": "Adding to download queue.", + "message": "Downloading Playlist Art...", + } + RedisArchivist().set_message("progress:download", mess_dict) + @staticmethod def vid_thumb_path(youtube_id): """build expected path for video thumbnail from youtube_id""" @@ -285,4 +321,6 @@ def validate_thumbnails(): handler.download_vid(thumbs_to_download) missing_channels = handler.get_missing_channels() handler.download_chan(missing_channels) + missing_playlists = handler.get_missing_playlists() + handler.download_playlist(missing_playlists) handler.cleanup_downloaded()