add sync playlist meta data and position to video index

This commit is contained in:
simon 2021-11-11 17:56:29 +07:00
parent 824ba35c14
commit c499a130da
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
2 changed files with 119 additions and 5 deletions

View File

@ -443,18 +443,22 @@ class YoutubePlaylist:
def __init__(self, playlist_id): def __init__(self, playlist_id):
self.playlist_id = playlist_id self.playlist_id = playlist_id
self.stamp = int(datetime.now().strftime("%s")) self.stamp = int(datetime.now().strftime("%s"))
self.playlist_dict = False
def get_playlist_dict(self, scrape=False): def get_playlist_dict(self, scrape=False):
"""get data from es or youtube""" """get data from es or youtube"""
print(f"get playlist with id {self.playlist_id}") print(f"get playlist with id {self.playlist_id}")
if scrape: if scrape:
playlist_dict = self.get_youtube_playlist() playlist_dict = self.get_youtube_playlist()
playlist_dict["playlist_entries"] = self.get_entries()
else: else:
playlist_dict = self.get_es_playlist() playlist_dict = self.get_es_playlist()
if not playlist_dict: if not playlist_dict:
playlist_dict = self.get_youtube_playlist() playlist_dict = self.get_youtube_playlist()
playlist_dict["playlist_entries"] = self.get_entries()
return playlist_dict self.playlist_dict = playlist_dict
def get_youtube_playlist(self): def get_youtube_playlist(self):
"""get meta data dict from youtube""" """get meta data dict from youtube"""
@ -488,7 +492,7 @@ class YoutubePlaylist:
url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}" url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}"
response = requests.get(url, auth=self.ES_AUTH) response = requests.get(url, auth=self.ES_AUTH)
if response.ok: if response.ok:
return json.loads(response.text) return json.loads(response.text)["_source"]
return False return False
@ -524,14 +528,109 @@ class YoutubePlaylist:
def upload_to_es(self): def upload_to_es(self):
"""add playlist to es with its entries""" """add playlist to es with its entries"""
playlist = self.get_playlist_dict() playlist = self.playlist_dict
playlist["playlist_entries"] = self.get_entries()
url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}" url = f"{self.ES_URL}/ta_playlist/_doc/{self.playlist_id}"
response = requests.put(url, json=playlist, auth=self.ES_AUTH) response = requests.put(url, json=playlist, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
def add_vids_to_playlist(self):
"""sync the playlistdict to video dict"""
print("sync playlist meta data for " + self.playlist_id)
playlist_dict = self.playlist_dict
all_entries = playlist_dict["playlist_entries"]
vid_ids = [i["youtube_id"] for i in all_entries]
to_add = {
key: val
for key, val in playlist_dict.items()
if key != "playlist_entries"
}
bulk_list = []
for youtube_id in vid_ids:
action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
source = {"doc": {"playlist": to_add}}
bulk_list.append(json.dumps(action))
bulk_list.append(json.dumps(source))
# add last newline
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk"
response = requests.post(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
if not response.ok:
print(response.text)
def playlist_position(self):
"""sync playlist_position to video dict"""
all_entries = self.playlist_dict["playlist_entries"]
bulk_list = []
for idx, entry in enumerate(all_entries):
youtube_id = entry["youtube_id"]
playlist_position = self.get_position_dict(all_entries, idx)
action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
source = {"doc": {"playlist_position": playlist_position}}
bulk_list.append(json.dumps(action))
bulk_list.append(json.dumps(source))
# add last newline
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk"
response = requests.post(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
if not response.ok:
print(response.text)
@staticmethod
def get_position_dict(all_entries, idx):
"""get previous and next videos in playlist"""
playlist_position = {"playlist_index": idx}
if idx == 0:
playlist_position.update(
{
"playlist_prev_id": False,
"playlist_prev_title": False,
"playlist_prev_channel_name": False,
}
)
else:
prev_vid = all_entries[idx - 1]
playlist_position.update(
{
"playlist_prev_id": prev_vid["youtube_id"],
"playlist_prev_title": prev_vid["title"],
"playlist_prev_channel_name": prev_vid["uploader"],
}
)
if idx == len(all_entries) - 1:
playlist_position.update(
{
"playlist_next_id": False,
"playlist_next_title": False,
"playlist_next_channel_name": False,
}
)
else:
next_vid = all_entries[idx + 1]
playlist_position.update(
{
"playlist_next_id": next_vid["youtube_id"],
"playlist_next_title": next_vid["title"],
"playlist_next_channel_name": next_vid["uploader"],
}
)
return playlist_position
class WatchState: class WatchState:
"""handle watched checkbox for videos and channels""" """handle watched checkbox for videos and channels"""

View File

@ -106,6 +106,21 @@ INDEX_CONFIG = [
"vid_last_refresh": {"type": "date"}, "vid_last_refresh": {"type": "date"},
"youtube_id": {"type": "keyword"}, "youtube_id": {"type": "keyword"},
"published": {"type": "date"}, "published": {"type": "date"},
"playlist": {
"properties": {
"playlist_description": {"type": "text"},
}
},
"playlist_position": {
"properties": {
"playlist_next_id": {"type": "keyword"},
"playlist_next_title": {"type": "text"},
"playlist_next_channel_name": {"type": "text"},
"playlist_prev_id": {"type": "keyword"},
"playlist_prev_title": {"type": "text"},
"playlist_prev_channel_name": {"type": "text"},
}
},
}, },
"expected_set": { "expected_set": {
"analysis": { "analysis": {