delete comments when deleting channel

This commit is contained in:
simon 2022-11-18 11:33:06 +07:00
parent e590d456a1
commit 8f5311ff04
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
3 changed files with 19 additions and 3 deletions

View File

@ -473,6 +473,9 @@
"comment_last_refresh": { "comment_last_refresh": {
"type": "date" "type": "date"
}, },
"comment_channel_id": {
"type": "keyword"
},
"comment_comments": { "comment_comments": {
"properties": { "properties": {
"comment_id": { "comment_id": {

View File

@ -50,7 +50,7 @@ class ChannelScraper:
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en" url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"} cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
response = requests.get( response = requests.get(
url, cookies=cookies, headers=requests_headers() url, cookies=cookies, headers=requests_headers(), timeout=10
) )
if response.ok: if response.ok:
channel_page = response.text channel_page = response.text
@ -275,6 +275,15 @@ class YoutubeChannel(YouTubeItem):
} }
_, _ = ElasticWrap("ta_video/_delete_by_query").post(data) _, _ = ElasticWrap("ta_video/_delete_by_query").post(data)
def delete_es_comments(self):
"""delete all comments from this channel"""
data = {
"query": {
"term": {"comment_channel_id": {"value": self.youtube_id}}
}
}
_, _ = ElasticWrap("ta_comment/_delete_by_query").post(data)
def delete_playlists(self): def delete_playlists(self):
"""delete all indexed playlist from es""" """delete all indexed playlist from es"""
all_playlists = self.get_indexed_playlists() all_playlists = self.get_indexed_playlists()
@ -301,6 +310,7 @@ class YoutubeChannel(YouTubeItem):
self.delete_playlists() self.delete_playlists()
print(f"{self.youtube_id}: delete indexed videos") print(f"{self.youtube_id}: delete indexed videos")
self.delete_es_videos() self.delete_es_videos()
self.delete_es_comments()
self.del_in_es() self.del_in_es()
def index_channel_playlists(self): def index_channel_playlists(self):

View File

@ -31,12 +31,13 @@ class Comments:
if not self.is_activated: if not self.is_activated:
return return
comments_raw = self.get_yt_comments() comments_raw, channel_id = self.get_yt_comments()
self.format_comments(comments_raw) self.format_comments(comments_raw)
self.json_data = { self.json_data = {
"youtube_id": self.youtube_id, "youtube_id": self.youtube_id,
"comment_last_refresh": int(datetime.now().strftime("%s")), "comment_last_refresh": int(datetime.now().strftime("%s")),
"comment_channel_id": channel_id,
"comment_comments": self.comments_format, "comment_comments": self.comments_format,
} }
@ -75,7 +76,8 @@ class Comments:
yt_obs = self.build_yt_obs() yt_obs = self.build_yt_obs()
info_json = YtWrap(yt_obs).extract(self.youtube_id) info_json = YtWrap(yt_obs).extract(self.youtube_id)
comments_raw = info_json.get("comments") comments_raw = info_json.get("comments")
return comments_raw channel_id = info_json.get("channel_id")
return comments_raw, channel_id
def format_comments(self, comments_raw): def format_comments(self, comments_raw):
"""process comments to match format""" """process comments to match format"""
@ -141,6 +143,7 @@ class Comments:
def reindex_comments(self): def reindex_comments(self):
"""update comments from youtube""" """update comments from youtube"""
self.check_config()
if not self.is_activated: if not self.is_activated:
return return