refactor index_channel_playlists task, move to YoutubeChannel class

This commit is contained in:
simon 2022-03-21 17:59:21 +07:00
parent c9399f61d0
commit 91a9477bba
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
2 changed files with 63 additions and 49 deletions

View File

@ -12,11 +12,13 @@ from datetime import datetime
import requests import requests
import yt_dlp import yt_dlp
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from home.src.download import queue # partial import
from home.src.download.thumbnails import ThumbManager from home.src.download.thumbnails import ThumbManager
from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.generic import YouTubeItem from home.src.index.generic import YouTubeItem
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
from home.src.ta.helper import clean_string from home.src.ta.helper import clean_string
from home.src.ta.ta_redis import RedisArchivist
class ChannelScraper: class ChannelScraper:
@ -153,6 +155,7 @@ class YoutubeChannel(YouTubeItem):
def __init__(self, youtube_id): def __init__(self, youtube_id):
super().__init__(youtube_id) super().__init__(youtube_id)
self.es_path = f"{self.index_name}/_doc/{youtube_id}" self.es_path = f"{self.index_name}/_doc/{youtube_id}"
self.all_playlists = False
def build_json(self, upload=False): def build_json(self, upload=False):
"""get from es or from youtube""" """get from es or from youtube"""
@ -241,6 +244,64 @@ class YoutubeChannel(YouTubeItem):
self.delete_es_videos() self.delete_es_videos()
self.del_in_es() self.del_in_es()
def index_channel_playlists(self):
"""add all playlists of channel to index"""
mess_dict = {
"status": "message:playlistscan",
"level": "info",
"title": "Looking for playlists",
"message": f'Scanning channel "{self.youtube_id}" in progress',
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
self.get_all_playlists()
if not self.all_playlists:
print(f"{self.youtube_id}: no playlists found.")
return
all_youtube_ids = self.get_all_video_ids()
for idx, playlist in enumerate(self.all_playlists):
self.notify_single_playlist(idx, playlist)
self.index_single_playlist(playlist, all_youtube_ids)
def notify_single_playlist(self, idx, playlist):
"""send notification"""
mess_dict = {
"status": "message:playlistscan",
"level": "info",
"title": "Scanning channel for playlists",
"message": f"Progress: {idx + 1}/{len(self.all_playlists)}",
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
print("add playlist: " + playlist[1])
@staticmethod
def index_single_playlist(playlist, all_youtube_ids):
"""add single playlist if needed"""
playlist = YoutubePlaylist(playlist[0])
playlist.all_youtube_ids = all_youtube_ids
playlist.build_json()
if not playlist.json_data:
return
entries = playlist.json_data["playlist_entries"]
downloaded = [i for i in entries if i["downloaded"]]
if not downloaded:
return
playlist.upload_to_es()
playlist.add_vids_to_playlist()
playlist.get_playlist_art()
@staticmethod
def get_all_video_ids():
"""match all playlists with videos"""
handler = queue.PendingList()
handler.get_download()
handler.get_indexed()
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
return all_youtube_ids
def get_all_playlists(self): def get_all_playlists(self):
"""get all playlists owned by this channel""" """get all playlists owned by this channel"""
url = ( url = (
@ -254,8 +315,7 @@ class YoutubeChannel(YouTubeItem):
} }
playlists = yt_dlp.YoutubeDL(obs).extract_info(url) playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]] all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
self.all_playlists = all_entries
return all_entries
def get_indexed_playlists(self): def get_indexed_playlists(self):
"""get all indexed playlists from channel""" """get all indexed playlists from channel"""

View File

@ -24,7 +24,6 @@ from home.src.index.filesystem import (
reindex_old_documents, reindex_old_documents,
scan_filesystem, scan_filesystem,
) )
from home.src.index.playlist import YoutubePlaylist
from home.src.ta.config import AppConfig, ScheduleBuilder from home.src.ta.config import AppConfig, ScheduleBuilder
from home.src.ta.helper import UrlListParser from home.src.ta.helper import UrlListParser
from home.src.ta.ta_redis import RedisArchivist, RedisQueue from home.src.ta.ta_redis import RedisArchivist, RedisQueue
@ -268,52 +267,7 @@ def index_channel_playlists(channel_id):
"message": f'Scanning channel "{channel.youtube_id}" in progress', "message": f'Scanning channel "{channel.youtube_id}" in progress',
} }
RedisArchivist().set_message("message:playlistscan", mess_dict) RedisArchivist().set_message("message:playlistscan", mess_dict)
all_playlists = channel.get_all_playlists() channel.index_channel_playlists()
if not all_playlists:
print(f"no playlists found for channel {channel_id}")
return
handler = PendingList()
handler.get_download()
handler.get_indexed()
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
for idx, (playlist_id, playlist_title) in enumerate(all_playlists):
# notify
mess_dict = {
"status": "message:playlistscan",
"level": "info",
"title": "Scanning channel for playlists",
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
}
RedisArchivist().set_message("message:playlistscan", mess_dict)
print("add playlist: " + playlist_title)
playlist = YoutubePlaylist(playlist_id)
playlist.all_youtube_ids = all_youtube_ids
playlist.build_json()
if not playlist.json_data:
# skip if not available
continue
# don't add if no videos downloaded
downloaded = [
i
for i in playlist.json_data["playlist_entries"]
if i["downloaded"]
]
if not downloaded:
continue
playlist.upload_to_es()
playlist.add_vids_to_playlist()
if all_playlists:
playlist.get_playlist_art()
return
try: try: