delete comments when deleting channel

This commit is contained in:
simon 2022-11-18 11:33:06 +07:00
parent e590d456a1
commit 8f5311ff04
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
3 changed files with 19 additions and 3 deletions

View File

@ -473,6 +473,9 @@
"comment_last_refresh": {
"type": "date"
},
"comment_channel_id": {
"type": "keyword"
},
"comment_comments": {
"properties": {
"comment_id": {

View File

@ -50,7 +50,7 @@ class ChannelScraper:
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
response = requests.get(
url, cookies=cookies, headers=requests_headers()
url, cookies=cookies, headers=requests_headers(), timeout=10
)
if response.ok:
channel_page = response.text
@ -275,6 +275,15 @@ class YoutubeChannel(YouTubeItem):
}
_, _ = ElasticWrap("ta_video/_delete_by_query").post(data)
def delete_es_comments(self):
"""delete all comments from this channel"""
data = {
"query": {
"term": {"comment_channel_id": {"value": self.youtube_id}}
}
}
_, _ = ElasticWrap("ta_comment/_delete_by_query").post(data)
def delete_playlists(self):
"""delete all indexed playlist from es"""
all_playlists = self.get_indexed_playlists()
@ -301,6 +310,7 @@ class YoutubeChannel(YouTubeItem):
self.delete_playlists()
print(f"{self.youtube_id}: delete indexed videos")
self.delete_es_videos()
self.delete_es_comments()
self.del_in_es()
def index_channel_playlists(self):

View File

@ -31,12 +31,13 @@ class Comments:
if not self.is_activated:
return
comments_raw = self.get_yt_comments()
comments_raw, channel_id = self.get_yt_comments()
self.format_comments(comments_raw)
self.json_data = {
"youtube_id": self.youtube_id,
"comment_last_refresh": int(datetime.now().strftime("%s")),
"comment_channel_id": channel_id,
"comment_comments": self.comments_format,
}
@ -75,7 +76,8 @@ class Comments:
yt_obs = self.build_yt_obs()
info_json = YtWrap(yt_obs).extract(self.youtube_id)
comments_raw = info_json.get("comments")
return comments_raw
channel_id = info_json.get("channel_id")
return comments_raw, channel_id
def format_comments(self, comments_raw):
"""process comments to match format"""
@ -141,6 +143,7 @@ class Comments:
def reindex_comments(self):
"""update comments from youtube"""
self.check_config()
if not self.is_activated:
return