From c499a130da6a8bae09755f5f937fa3be60b92fac Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 11 Nov 2021 17:56:29 +0700 Subject: [PATCH] add sync playlist meta data and position to video index --- tubearchivist/home/src/index.py | 109 ++++++++++++++++++++- tubearchivist/home/src/index_management.py | 15 +++ 2 files changed, 119 insertions(+), 5 deletions(-) diff --git a/tubearchivist/home/src/index.py b/tubearchivist/home/src/index.py index 9022f26..b6595c9 100644 --- a/tubearchivist/home/src/index.py +++ b/tubearchivist/home/src/index.py @@ -443,18 +443,22 @@ class YoutubePlaylist: def __init__(self, playlist_id): self.playlist_id = playlist_id self.stamp = int(datetime.now().strftime("%s")) + self.playlist_dict = False def get_playlist_dict(self, scrape=False): """get data from es or youtube""" print(f"get playlist with id {self.playlist_id}") + if scrape: playlist_dict = self.get_youtube_playlist() + playlist_dict["playlist_entries"] = self.get_entries() else: playlist_dict = self.get_es_playlist() if not playlist_dict: playlist_dict = self.get_youtube_playlist() + playlist_dict["playlist_entries"] = self.get_entries() - return playlist_dict + self.playlist_dict = playlist_dict def get_youtube_playlist(self): """get meta data dict from youtube""" @@ -488,7 +492,7 @@ class YoutubePlaylist: url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}" response = requests.get(url, auth=self.ES_AUTH) if response.ok: - return json.loads(response.text) + return json.loads(response.text)["_source"] return False @@ -524,14 +528,109 @@ class YoutubePlaylist: def upload_to_es(self): """add playlist to es with its entries""" - playlist = self.get_playlist_dict() - playlist["playlist_entries"] = self.get_entries() - + playlist = self.playlist_dict url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}" response = requests.put(url, json=playlist, auth=self.ES_AUTH) if not response.ok: print(response.text) + def add_vids_to_playlist(self): + """sync the playlistdict to video dict""" + print("sync playlist meta data for " + self.playlist_id) + playlist_dict = self.playlist_dict + all_entries = playlist_dict["playlist_entries"] + vid_ids = [i["youtube_id"] for i in all_entries] + + to_add = { + key: val + for key, val in playlist_dict.items() + if key != "playlist_entries" + } + bulk_list = [] + for youtube_id in vid_ids: + action = {"update": {"_id": youtube_id, "_index": "ta_video"}} + source = {"doc": {"playlist": to_add}} + bulk_list.append(json.dumps(action)) + bulk_list.append(json.dumps(source)) + + # add last newline + bulk_list.append("\n") + query_str = "\n".join(bulk_list) + + headers = {"Content-type": "application/x-ndjson"} + url = self.ES_URL + "/_bulk" + response = requests.post( + url, data=query_str, headers=headers, auth=self.ES_AUTH + ) + if not response.ok: + print(response.text) + + def playlist_position(self): + """sync playlist_position to video dict""" + all_entries = self.playlist_dict["playlist_entries"] + + bulk_list = [] + for idx, entry in enumerate(all_entries): + youtube_id = entry["youtube_id"] + playlist_position = self.get_position_dict(all_entries, idx) + action = {"update": {"_id": youtube_id, "_index": "ta_video"}} + source = {"doc": {"playlist_position": playlist_position}} + bulk_list.append(json.dumps(action)) + bulk_list.append(json.dumps(source)) + + # add last newline + bulk_list.append("\n") + query_str = "\n".join(bulk_list) + headers = {"Content-type": "application/x-ndjson"} + url = self.ES_URL + "/_bulk" + response = requests.post( + url, data=query_str, headers=headers, auth=self.ES_AUTH + ) + if not response.ok: + print(response.text) + + @staticmethod + def get_position_dict(all_entries, idx): + """get previous and next videos in playlist""" + playlist_position = {"playlist_index": idx} + if idx == 0: + playlist_position.update( + { + "playlist_prev_id": False, + "playlist_prev_title": False, + "playlist_prev_channel_name": False, + } + ) + else: + prev_vid = all_entries[idx - 1] + playlist_position.update( + { + "playlist_prev_id": prev_vid["youtube_id"], + "playlist_prev_title": prev_vid["title"], + "playlist_prev_channel_name": prev_vid["uploader"], + } + ) + + if idx == len(all_entries) - 1: + playlist_position.update( + { + "playlist_next_id": False, + "playlist_next_title": False, + "playlist_next_channel_name": False, + } + ) + else: + next_vid = all_entries[idx + 1] + playlist_position.update( + { + "playlist_next_id": next_vid["youtube_id"], + "playlist_next_title": next_vid["title"], + "playlist_next_channel_name": next_vid["uploader"], + } + ) + + return playlist_position + class WatchState: """handle watched checkbox for videos and channels""" diff --git a/tubearchivist/home/src/index_management.py b/tubearchivist/home/src/index_management.py index a7046b2..0c87055 100644 --- a/tubearchivist/home/src/index_management.py +++ b/tubearchivist/home/src/index_management.py @@ -106,6 +106,21 @@ INDEX_CONFIG = [ "vid_last_refresh": {"type": "date"}, "youtube_id": {"type": "keyword"}, "published": {"type": "date"}, + "playlist": { + "properties": { + "playlist_description": {"type": "text"}, + } + }, + "playlist_position": { + "properties": { + "playlist_next_id": {"type": "keyword"}, + "playlist_next_title": {"type": "text"}, + "playlist_next_channel_name": {"type": "text"}, + "playlist_prev_id": {"type": "keyword"}, + "playlist_prev_title": {"type": "text"}, + "playlist_prev_channel_name": {"type": "text"}, + } + }, }, "expected_set": { "analysis": {