From 6641db3e7e5b3a96b8d974d5ce455592dc8ea8dc Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 13 Feb 2023 12:42:55 +0700 Subject: [PATCH] skip empty comment, #429 --- tubearchivist/home/src/index/comments.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tubearchivist/home/src/index/comments.py b/tubearchivist/home/src/index/comments.py index f383708..e9ec0d9 100644 --- a/tubearchivist/home/src/index/comments.py +++ b/tubearchivist/home/src/index/comments.py @@ -109,12 +109,20 @@ class Comments: if comments_raw: for comment in comments_raw: cleaned_comment = self.clean_comment(comment) + if not cleaned_comment: + continue + comments.append(cleaned_comment) self.comments_format = comments def clean_comment(self, comment): """parse metadata from comment for indexing""" + if not comment.get("text"): + # comment text can be empty + print(f"{self.youtube_id}: Failed to extract text, {comment}") + return False + time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"]) if time_text_datetime.hour == 0 and time_text_datetime.minute == 0: