Reindex shorts and streams, #build

Changed:
- Match exiting videos with new video type
- Randomize version check schedule per installation
This commit is contained in:
simon 2023-01-12 10:53:53 +07:00
commit 1471fce2c3
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
5 changed files with 113 additions and 15 deletions

View File

@ -52,6 +52,6 @@
"thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"},
"run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"},
"run_backup_rotate": 5,
"version_check": {"minute": "0", "hour": "11", "day_of_week": "*"}
"version_check": "rand-d"
}
}

View File

@ -381,6 +381,17 @@ class YoutubeChannel(YouTubeItem):
return all_youtube_ids
def get_channel_videos(self):
"""get all videos from channel"""
data = {
"query": {
"term": {"channel.channel_id": {"value": self.youtube_id}}
},
"_source": ["youtube_id", "vid_type"],
}
all_videos = IndexPaginate("ta_video", data).get_results()
return all_videos
def get_all_playlists(self):
"""get all playlists owned by this channel"""
url = (

View File

@ -4,12 +4,14 @@ functionality:
- index and update in es
"""
import json
import os
import shutil
from datetime import datetime
from time import sleep
from home.src.download.queue import PendingList
from home.src.download.subscriptions import ChannelSubscription
from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_dlp_base import CookieHandler
from home.src.download.yt_dlp_handler import VideoDownloader
@ -307,6 +309,8 @@ class Reindex(ReindexBase):
channel.upload_to_es()
channel.sync_to_videos()
ChannelFullScan(channel_id).scan()
def _reindex_single_playlist(self, playlist_id):
"""refresh playlist data"""
self._get_all_videos()
@ -473,3 +477,74 @@ class ChannelUrlFixer:
shutil.move(video_path_is, new_path, copy_function=shutil.copyfile)
VideoDownloader().move_to_archive(self.video.json_data)
self.video.update_media_url()
class ChannelFullScan:
"""
update from v0.3.0 to v0.3.1
full scan of channel to fix vid_type mismatch
"""
def __init__(self, channel_id):
self.channel_id = channel_id
self.to_update = False
def scan(self):
"""match local with remote"""
print(f"{self.channel_id}: start full scan")
all_local_videos = self._get_all_local()
all_remote_videos = self._get_all_remote()
self.to_update = []
for video in all_local_videos:
video_id = video["youtube_id"]
remote_match = [i for i in all_remote_videos if i[0] == video_id]
if not remote_match:
print(f"{video_id}: no remote match found")
continue
expected_type = remote_match[0][-1].value
if video["vid_type"] != expected_type:
self.to_update.append(
{
"video_id": video_id,
"vid_type": expected_type,
}
)
self.update()
def _get_all_remote(self):
"""get all channel videos"""
sub = ChannelSubscription()
all_remote_videos = sub.get_last_youtube_videos(
self.channel_id, limit=False
)
return all_remote_videos
def _get_all_local(self):
"""get all local indexed channel_videos"""
channel = YoutubeChannel(self.channel_id)
all_local_videos = channel.get_channel_videos()
return all_local_videos
def update(self):
"""build bulk query for updates"""
if not self.to_update:
print(f"{self.channel_id}: nothing to update")
return
print(f"{self.channel_id}: fixing {len(self.to_update)} videos")
bulk_list = []
for video in self.to_update:
action = {
"update": {"_id": video.get("video_id"), "_index": "ta_video"}
}
source = {"doc": {"vid_type": video.get("vid_type")}}
bulk_list.append(json.dumps(action))
bulk_list.append(json.dumps(source))
# add last newline
bulk_list.append("\n")
data = "\n".join(bulk_list)
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)

View File

@ -7,6 +7,7 @@ Functionality:
import json
import os
import re
from random import randint
import requests
from celery.schedules import crontab
@ -117,6 +118,15 @@ class AppConfig:
self.config["application"]["colors"] = colors
return colors
@staticmethod
def _build_rand_daily():
"""build random daily schedule per installation"""
return {
"minute": randint(0, 59),
"hour": randint(0, 23),
"day_of_week": "*",
}
def load_new_defaults(self):
"""check config.json for missing defaults"""
default_config = self.get_config_file()
@ -140,6 +150,9 @@ class AppConfig:
# missing nested values
for sub_key, sub_value in value.items():
if sub_key not in redis_config[key].keys():
if sub_value == "rand-d":
sub_value = self._build_rand_daily()
redis_config[key].update({sub_key: sub_value})
needs_update = True
@ -256,19 +269,18 @@ class ScheduleBuilder:
if not item_conf:
continue
minute = item_conf["minute"]
hour = item_conf["hour"]
day_of_week = item_conf["day_of_week"]
schedule_name = f"schedule_{schedule_item}"
to_add = {
schedule_name: {
"task": schedule_item,
"schedule": crontab(
minute=minute, hour=hour, day_of_week=day_of_week
),
schedule_dict.update(
{
f"schedule_{schedule_item}": {
"task": schedule_item,
"schedule": crontab(
minute=item_conf["minute"],
hour=item_conf["hour"],
day_of_week=item_conf["day_of_week"],
),
}
}
}
schedule_dict.update(to_add)
)
return schedule_dict

View File

@ -5,9 +5,9 @@ django-auth-ldap==4.1.0
django-cors-headers==3.13.0
djangorestframework==3.14.0
Pillow==9.4.0
redis==4.4.0
redis==4.4.2
requests==2.28.1
ryd-client==0.0.6
uWSGI==2.0.21
whitenoise==6.3.0
yt_dlp==2023.1.2
yt_dlp==2023.1.6