extend ryd to refresh when missing average_rating

This commit is contained in:
simon 2022-01-05 14:43:24 +07:00
parent 7a1f77e548
commit c85be45846
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
2 changed files with 26 additions and 2 deletions

View File

@ -42,6 +42,7 @@ class Reindex:
self.es_url = config["application"]["es_url"] self.es_url = config["application"]["es_url"]
self.es_auth = config["application"]["es_auth"] self.es_auth = config["application"]["es_auth"]
self.refresh_interval = config["scheduler"]["check_reindex_days"] self.refresh_interval = config["scheduler"]["check_reindex_days"]
self.integrate_ryd = config["downloads"]["integrate_ryd"]
# scan # scan
self.all_youtube_ids = False self.all_youtube_ids = False
self.all_channel_ids = False self.all_channel_ids = False
@ -92,6 +93,27 @@ class Reindex:
all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]] all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
return all_youtube_ids return all_youtube_ids
def get_unrated_vids(self):
"""get all videos without rating if ryd integration is enabled"""
headers = {"Content-type": "application/json"}
data = {
"query": {
"bool": {
"must_not": [{"exists": {"field": "stats.average_rating"}}]
}
}
}
query_str = json.dumps(data)
url = self.es_url + "/ta_video/_search"
response = requests.get(
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok:
print(response.text)
response_dict = json.loads(response.text)
missing_rating = [i["_id"] for i in response_dict["hits"]["hits"]]
self.all_youtube_ids = self.all_youtube_ids + missing_rating
def get_outdated_channels(self, size): def get_outdated_channels(self, size):
"""get daily channels to refresh""" """get daily channels to refresh"""
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
@ -156,6 +178,8 @@ class Reindex:
self.all_youtube_ids = self.get_outdated_vids(video_daily) self.all_youtube_ids = self.get_outdated_vids(video_daily)
self.all_channel_ids = self.get_outdated_channels(channel_daily) self.all_channel_ids = self.get_outdated_channels(channel_daily)
self.all_playlist_ids = self.get_outdated_playlists(playlist_daily) self.all_playlist_ids = self.get_outdated_playlists(playlist_daily)
if self.integrate_ryd:
self.get_unrated_vids()
def rescrape_all_channels(self): def rescrape_all_channels(self):
"""sync new data from channel to all matching videos""" """sync new data from channel to all matching videos"""

View File

@ -98,8 +98,8 @@
<div class="settings-group"> <div class="settings-group">
<h2 id="integrations">Integrations</h2> <h2 id="integrations">Integrations</h2>
<div class="settings-item"> <div class="settings-item">
<p>Integrate with <a href="https://returnyoutubedislike.com/">returnyoutubedislike.com</a>: <span class="settings-current">{{ config.downloads.integrate_ryd }}</span></p> <p>Integrate with <a href="https://returnyoutubedislike.com/">returnyoutubedislike.com</a> to get dislikes and average ratings back: <span class="settings-current">{{ config.downloads.integrate_ryd }}</span></p>
<i>Get dislikes and average ratings back.</i><br> <i>Before activating that, make sure you have a scraping sleep interval of at least 3 secs set to avoid ratelimiting issues.</i><br>
{{ app_form.downloads_integrate_ryd }} {{ app_form.downloads_integrate_ryd }}
</div> </div>
</div> </div>