configure comments extraction

This commit is contained in:
simon 2022-11-18 08:59:04 +07:00
parent 11c122cba3
commit 172ced7129
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
4 changed files with 43 additions and 5 deletions

View File

@ -27,6 +27,8 @@
"subtitle": false,
"subtitle_source": false,
"subtitle_index": false,
"comment_max": false,
"comment_sort": "top",
"cookie_import": false,
"throttledratelimit": false,
"integrate_ryd": false,

View File

@ -92,6 +92,12 @@ class ApplicationSettingsForm(forms.Form):
("1", "enable subtitle index"),
]
COMMENT_SORT_CHOICES = [
("", "-- change comments sort settings --"),
("top", "sort comments by top"),
("new", "sort comments by new"),
]
COOKIE_IMPORT_CHOICES = [
("", "-- change cookie settings"),
("0", "disable cookie"),
@ -120,6 +126,10 @@ class ApplicationSettingsForm(forms.Form):
downloads_subtitle_index = forms.ChoiceField(
widget=forms.Select, choices=SUBTITLE_INDEX_CHOICES, required=False
)
downloads_comment_max = forms.CharField(required=False)
downloads_comment_sort = forms.ChoiceField(
widget=forms.Select, choices=COMMENT_SORT_CHOICES, required=False
)
downloads_cookie_import = forms.ChoiceField(
widget=forms.Select, choices=COOKIE_IMPORT_CHOICES, required=False
)

View File

@ -9,19 +9,21 @@ from datetime import datetime
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap
from home.src.ta.config import AppConfig
class Comments:
"""hold all comments functionality"""
def __init__(self, youtube_id):
def __init__(self, youtube_id, config=False):
self.youtube_id = youtube_id
self.es_path = f"ta_comment/_doc/{youtube_id}"
self.max_comments = "all,100,all,30"
self.json_data = False
self.config = config
def build_json(self):
"""build json document for es"""
self._check_config()
comments_raw = self.get_yt_comments()
comments_format = self.format_comments(comments_raw)
@ -31,13 +33,19 @@ class Comments:
"comment_comments": comments_format,
}
def _check_config(self):
"""read config if not attached"""
if not self.config:
self.config = AppConfig().config
def build_yt_obs(self):
"""
get extractor config
max-comments,max-parents,max-replies,max-replies-per-thread
"""
max_comments_list = [i.strip() for i in self.max_comments.split(",")]
comment_sort = "top"
max_comments = self.config["downloads"]["comment_max"]
max_comments_list = [i.strip() for i in max_comments.split(",")]
comment_sort = self.config["downloads"]["comment_sort"]
yt_obs = {
"skip_download": True,
@ -55,7 +63,7 @@ class Comments:
def get_yt_comments(self):
"""get comments from youtube"""
print(f"comments: get comments with format {self.max_comments}")
print("comments: get comments")
yt_obs = self.build_yt_obs()
info_json = YtWrap(yt_obs).extract(self.youtube_id)
comments_raw = info_json.get("comments")

View File

@ -114,6 +114,24 @@
{{ app_form.downloads_subtitle_index }}
</div>
</div>
<div class="settings-group">
<h2 id="comments">Comments</h2>
<div class="settings-item">
<p>Download and index comments: <span class="settings-current">{{ config.downloads.comment_max }}</span><br>
<i>Follow the yt-dlp max_comments documentation, <a href="https://github.com/yt-dlp/yt-dlp#youtube" target="_blank">max-comments,max-parents,max-replies,max-replies-per-thread</a>:</i><br>
<p>Example configurations:</p>
<ul>
<li><span class="settings-current">all,100,all,30</span>: Get 100 max-parents and 30 max-replies-per-thread.</li>
<li><span class="settings-current">1000,all,all,50</span>: Get a total of 1000 comments over all, 50 replies per thread.</li>
</ul>
{{ app_form.downloads_comment_max }}</p>
</div>
<div class="settings-item">
<p>Selected comment sort method: <span class="settings-current">{{ config.downloads.comment_sort }}</span><br>
<i>Select how many comments and threads to download:</i><br>
{{ app_form.downloads_comment_sort }}</p>
</div>
</div>
<div class="settings-group">
<h2 id="format">Cookie</h2>
<div class="settings-item">