mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-12-22 18:00:17 +00:00
refactor index_channel_playlists task, move to YoutubeChannel class
This commit is contained in:
parent
c9399f61d0
commit
91a9477bba
@ -12,11 +12,13 @@ from datetime import datetime
|
||||
import requests
|
||||
import yt_dlp
|
||||
from bs4 import BeautifulSoup
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.generic import YouTubeItem
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.ta.helper import clean_string
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class ChannelScraper:
|
||||
@ -153,6 +155,7 @@ class YoutubeChannel(YouTubeItem):
|
||||
def __init__(self, youtube_id):
|
||||
super().__init__(youtube_id)
|
||||
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
||||
self.all_playlists = False
|
||||
|
||||
def build_json(self, upload=False):
|
||||
"""get from es or from youtube"""
|
||||
@ -241,6 +244,64 @@ class YoutubeChannel(YouTubeItem):
|
||||
self.delete_es_videos()
|
||||
self.del_in_es()
|
||||
|
||||
def index_channel_playlists(self):
|
||||
"""add all playlists of channel to index"""
|
||||
mess_dict = {
|
||||
"status": "message:playlistscan",
|
||||
"level": "info",
|
||||
"title": "Looking for playlists",
|
||||
"message": f'Scanning channel "{self.youtube_id}" in progress',
|
||||
}
|
||||
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||
self.get_all_playlists()
|
||||
if not self.all_playlists:
|
||||
print(f"{self.youtube_id}: no playlists found.")
|
||||
return
|
||||
|
||||
all_youtube_ids = self.get_all_video_ids()
|
||||
for idx, playlist in enumerate(self.all_playlists):
|
||||
self.notify_single_playlist(idx, playlist)
|
||||
self.index_single_playlist(playlist, all_youtube_ids)
|
||||
|
||||
def notify_single_playlist(self, idx, playlist):
|
||||
"""send notification"""
|
||||
mess_dict = {
|
||||
"status": "message:playlistscan",
|
||||
"level": "info",
|
||||
"title": "Scanning channel for playlists",
|
||||
"message": f"Progress: {idx + 1}/{len(self.all_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||
print("add playlist: " + playlist[1])
|
||||
|
||||
@staticmethod
|
||||
def index_single_playlist(playlist, all_youtube_ids):
|
||||
"""add single playlist if needed"""
|
||||
playlist = YoutubePlaylist(playlist[0])
|
||||
playlist.all_youtube_ids = all_youtube_ids
|
||||
playlist.build_json()
|
||||
if not playlist.json_data:
|
||||
return
|
||||
|
||||
entries = playlist.json_data["playlist_entries"]
|
||||
downloaded = [i for i in entries if i["downloaded"]]
|
||||
if not downloaded:
|
||||
return
|
||||
|
||||
playlist.upload_to_es()
|
||||
playlist.add_vids_to_playlist()
|
||||
playlist.get_playlist_art()
|
||||
|
||||
@staticmethod
|
||||
def get_all_video_ids():
|
||||
"""match all playlists with videos"""
|
||||
handler = queue.PendingList()
|
||||
handler.get_download()
|
||||
handler.get_indexed()
|
||||
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
|
||||
|
||||
return all_youtube_ids
|
||||
|
||||
def get_all_playlists(self):
|
||||
"""get all playlists owned by this channel"""
|
||||
url = (
|
||||
@ -254,8 +315,7 @@ class YoutubeChannel(YouTubeItem):
|
||||
}
|
||||
playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
|
||||
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
|
||||
|
||||
return all_entries
|
||||
self.all_playlists = all_entries
|
||||
|
||||
def get_indexed_playlists(self):
|
||||
"""get all indexed playlists from channel"""
|
||||
|
@ -24,7 +24,6 @@ from home.src.index.filesystem import (
|
||||
reindex_old_documents,
|
||||
scan_filesystem,
|
||||
)
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.ta.config import AppConfig, ScheduleBuilder
|
||||
from home.src.ta.helper import UrlListParser
|
||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||
@ -268,52 +267,7 @@ def index_channel_playlists(channel_id):
|
||||
"message": f'Scanning channel "{channel.youtube_id}" in progress',
|
||||
}
|
||||
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||
all_playlists = channel.get_all_playlists()
|
||||
|
||||
if not all_playlists:
|
||||
print(f"no playlists found for channel {channel_id}")
|
||||
return
|
||||
|
||||
handler = PendingList()
|
||||
handler.get_download()
|
||||
handler.get_indexed()
|
||||
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
|
||||
|
||||
for idx, (playlist_id, playlist_title) in enumerate(all_playlists):
|
||||
# notify
|
||||
mess_dict = {
|
||||
"status": "message:playlistscan",
|
||||
"level": "info",
|
||||
"title": "Scanning channel for playlists",
|
||||
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||
print("add playlist: " + playlist_title)
|
||||
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
playlist.all_youtube_ids = all_youtube_ids
|
||||
playlist.build_json()
|
||||
|
||||
if not playlist.json_data:
|
||||
# skip if not available
|
||||
continue
|
||||
|
||||
# don't add if no videos downloaded
|
||||
downloaded = [
|
||||
i
|
||||
for i in playlist.json_data["playlist_entries"]
|
||||
if i["downloaded"]
|
||||
]
|
||||
if not downloaded:
|
||||
continue
|
||||
|
||||
playlist.upload_to_es()
|
||||
playlist.add_vids_to_playlist()
|
||||
|
||||
if all_playlists:
|
||||
playlist.get_playlist_art()
|
||||
|
||||
return
|
||||
channel.index_channel_playlists()
|
||||
|
||||
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user