From 8f5311ff04f37ae0ae4701854fbda46ee2e0fa70 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 18 Nov 2022 11:33:06 +0700 Subject: [PATCH] delete comments when deleting channel --- tubearchivist/home/src/es/index_mapping.json | 3 +++ tubearchivist/home/src/index/channel.py | 12 +++++++++++- tubearchivist/home/src/index/comments.py | 7 +++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index 79f1d81..7270563 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -473,6 +473,9 @@ "comment_last_refresh": { "type": "date" }, + "comment_channel_id": { + "type": "keyword" + }, "comment_comments": { "properties": { "comment_id": { diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 7a554a9..7108d43 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -50,7 +50,7 @@ class ChannelScraper: url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en" cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"} response = requests.get( - url, cookies=cookies, headers=requests_headers() + url, cookies=cookies, headers=requests_headers(), timeout=10 ) if response.ok: channel_page = response.text @@ -275,6 +275,15 @@ class YoutubeChannel(YouTubeItem): } _, _ = ElasticWrap("ta_video/_delete_by_query").post(data) + def delete_es_comments(self): + """delete all comments from this channel""" + data = { + "query": { + "term": {"comment_channel_id": {"value": self.youtube_id}} + } + } + _, _ = ElasticWrap("ta_comment/_delete_by_query").post(data) + def delete_playlists(self): """delete all indexed playlist from es""" all_playlists = self.get_indexed_playlists() @@ -301,6 +310,7 @@ class YoutubeChannel(YouTubeItem): self.delete_playlists() print(f"{self.youtube_id}: delete indexed videos") self.delete_es_videos() + self.delete_es_comments() self.del_in_es() def index_channel_playlists(self): diff --git a/tubearchivist/home/src/index/comments.py b/tubearchivist/home/src/index/comments.py index 3ad7c9e..0ac2436 100644 --- a/tubearchivist/home/src/index/comments.py +++ b/tubearchivist/home/src/index/comments.py @@ -31,12 +31,13 @@ class Comments: if not self.is_activated: return - comments_raw = self.get_yt_comments() + comments_raw, channel_id = self.get_yt_comments() self.format_comments(comments_raw) self.json_data = { "youtube_id": self.youtube_id, "comment_last_refresh": int(datetime.now().strftime("%s")), + "comment_channel_id": channel_id, "comment_comments": self.comments_format, } @@ -75,7 +76,8 @@ class Comments: yt_obs = self.build_yt_obs() info_json = YtWrap(yt_obs).extract(self.youtube_id) comments_raw = info_json.get("comments") - return comments_raw + channel_id = info_json.get("channel_id") + return comments_raw, channel_id def format_comments(self, comments_raw): """process comments to match format""" @@ -141,6 +143,7 @@ class Comments: def reindex_comments(self): """update comments from youtube""" + self.check_config() if not self.is_activated: return