refactor index_channel_playlists task, move to YoutubeChannel class

This commit is contained in:
simon 2022-03-21 17:59:21 +07:00
parent c9399f61d0
commit 91a9477bba
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
2 changed files with 63 additions and 49 deletions

View File

@ -12,11 +12,13 @@ from datetime import datetime
import requests
import yt_dlp
from bs4 import BeautifulSoup
from home.src.download import queue # partial import
from home.src.download.thumbnails import ThumbManager
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.generic import YouTubeItem
from home.src.index.playlist import YoutubePlaylist
from home.src.ta.helper import clean_string
from home.src.ta.ta_redis import RedisArchivist
class ChannelScraper:
@ -153,6 +155,7 @@ class YoutubeChannel(YouTubeItem):
def __init__(self, youtube_id):
super().__init__(youtube_id)
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
self.all_playlists = False
def build_json(self, upload=False):
"""get from es or from youtube"""
@ -241,6 +244,64 @@ class YoutubeChannel(YouTubeItem):
self.delete_es_videos()
self.del_in_es()
def index_channel_playlists(self):
"""add all playlists of channel to index"""
mess_dict = {
"status": "message:playlistscan",
"level": "info",
"title": "Looking for playlists",
"message": f'Scanning channel "{self.youtube_id}" in progress',
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
self.get_all_playlists()
if not self.all_playlists:
print(f"{self.youtube_id}: no playlists found.")
return
all_youtube_ids = self.get_all_video_ids()
for idx, playlist in enumerate(self.all_playlists):
self.notify_single_playlist(idx, playlist)
self.index_single_playlist(playlist, all_youtube_ids)
def notify_single_playlist(self, idx, playlist):
"""send notification"""
mess_dict = {
"status": "message:playlistscan",
"level": "info",
"title": "Scanning channel for playlists",
"message": f"Progress: {idx + 1}/{len(self.all_playlists)}",
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
print("add playlist: " + playlist[1])
@staticmethod
def index_single_playlist(playlist, all_youtube_ids):
"""add single playlist if needed"""
playlist = YoutubePlaylist(playlist[0])
playlist.all_youtube_ids = all_youtube_ids
playlist.build_json()
if not playlist.json_data:
return
entries = playlist.json_data["playlist_entries"]
downloaded = [i for i in entries if i["downloaded"]]
if not downloaded:
return
playlist.upload_to_es()
playlist.add_vids_to_playlist()
playlist.get_playlist_art()
@staticmethod
def get_all_video_ids():
"""match all playlists with videos"""
handler = queue.PendingList()
handler.get_download()
handler.get_indexed()
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
return all_youtube_ids
def get_all_playlists(self):
"""get all playlists owned by this channel"""
url = (
@ -254,8 +315,7 @@ class YoutubeChannel(YouTubeItem):
}
playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
return all_entries
self.all_playlists = all_entries
def get_indexed_playlists(self):
"""get all indexed playlists from channel"""

View File

@ -24,7 +24,6 @@ from home.src.index.filesystem import (
reindex_old_documents,
scan_filesystem,
)
from home.src.index.playlist import YoutubePlaylist
from home.src.ta.config import AppConfig, ScheduleBuilder
from home.src.ta.helper import UrlListParser
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
@ -268,52 +267,7 @@ def index_channel_playlists(channel_id):
"message": f'Scanning channel "{channel.youtube_id}" in progress',
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
all_playlists = channel.get_all_playlists()
if not all_playlists:
print(f"no playlists found for channel {channel_id}")
return
handler = PendingList()
handler.get_download()
handler.get_indexed()
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
for idx, (playlist_id, playlist_title) in enumerate(all_playlists):
# notify
mess_dict = {
"status": "message:playlistscan",
"level": "info",
"title": "Scanning channel for playlists",
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
print("add playlist: " + playlist_title)
playlist = YoutubePlaylist(playlist_id)
playlist.all_youtube_ids = all_youtube_ids
playlist.build_json()
if not playlist.json_data:
# skip if not available
continue
# don't add if no videos downloaded
downloaded = [
i
for i in playlist.json_data["playlist_entries"]
if i["downloaded"]
]
if not downloaded:
continue
playlist.upload_to_es()
playlist.add_vids_to_playlist()
if all_playlists:
playlist.get_playlist_art()
return
channel.index_channel_playlists()
try: