reindex comments

This commit is contained in:
simon 2022-11-18 10:31:09 +07:00
parent b5ceb264ec
commit a5be5a719b
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
3 changed files with 32 additions and 5 deletions

View File

@ -147,7 +147,7 @@ class DownloadPostProcess:
return
for video_id in self.download.videos:
comment = Comments(video_id)
comment = Comments(video_id, config=self.download.config)
comment.build_json()
comment.upload_comments()

View File

@ -20,12 +20,17 @@ class Comments:
self.es_path = f"ta_comment/_doc/{youtube_id}"
self.json_data = False
self.config = config
self.is_activated = False
self.comments_format = False
def build_json(self):
"""build json document for es"""
print(f"{self.youtube_id}: get comments")
self._check_config()
if not self.is_activated:
return
comments_raw = self.get_yt_comments()
self.format_comments(comments_raw)
@ -40,6 +45,8 @@ class Comments:
if not self.config:
self.config = AppConfig().config
self.is_activated = bool(self.config["downloads"]["comment_max"])
def build_yt_obs(self):
"""
get extractor config
@ -109,6 +116,9 @@ class Comments:
def upload_comments(self):
"""upload comments to es"""
if not self.is_activated:
return
_, _ = ElasticWrap(self.es_path).put(self.json_data)
vid_path = f"ta_video/_update/{self.youtube_id}"
@ -117,7 +127,7 @@ class Comments:
def delete_comments(self):
"""delete comments from es"""
_, _ = ElasticWrap(self.es_path).delete()
_, _ = ElasticWrap(self.es_path).delete(refresh=True)
def get_es_comments(self):
"""get comments from ES"""
@ -126,4 +136,19 @@ class Comments:
print(f"comments: not found {self.youtube_id}")
return False
return response
return response.get("_source")
def reindex_comments(self):
"""update comments from youtube"""
if not self.is_activated:
return
self.build_json()
es_comments = self.get_es_comments()
if not self.comments_format and es_comments["comment_comments"]:
# don't overwrite comments in es
return
self.delete_comments()
self.upload_comments()

View File

@ -16,6 +16,7 @@ from home.src.download.yt_dlp_base import CookieHandler
from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.connect import ElasticWrap
from home.src.index.channel import YoutubeChannel
from home.src.index.comments import Comments
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video import YoutubeVideo
from home.src.ta.config import AppConfig
@ -147,8 +148,7 @@ class Reindex:
if integrate_ryd:
self._get_unrated_vids()
@staticmethod
def _reindex_single_video(youtube_id):
def _reindex_single_video(self, youtube_id):
"""refresh data for single video"""
video = YoutubeVideo(youtube_id)
@ -182,6 +182,8 @@ class Reindex:
thumb_handler.delete_video_thumb()
thumb_handler.download_video_thumb(video.json_data["vid_thumb_url"])
Comments(youtube_id, config=self.config).reindex_comments()
return
@staticmethod