tubearchivist/tubearchivist/home/src/index/generic.py

144 lines
4.4 KiB
Python
Raw Normal View History

"""
functionality:
- generic base class to inherit from for video, channel and playlist
"""
2022-01-22 15:13:37 +00:00
import math
from home.src.download.yt_dlp_base import YtWrap
2022-01-22 15:13:37 +00:00
from home.src.es.connect import ElasticWrap
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist
class YouTubeItem:
"""base class for youtube"""
es_path = False
index_name = False
yt_base = False
yt_obs = {
"skip_download": True,
"noplaylist": True,
}
def __init__(self, youtube_id):
self.youtube_id = youtube_id
self.config = AppConfig().config
self.app_conf = self.config["application"]
2022-05-24 08:51:58 +00:00
self.youtube_meta = False
self.json_data = False
2022-01-22 15:13:37 +00:00
def get_from_youtube(self):
"""use yt-dlp to get meta data from youtube"""
print(f"{self.youtube_id}: get metadata from youtube")
2022-05-24 08:51:58 +00:00
url = self.yt_base + self.youtube_id
response = YtWrap(self.yt_obs, self.config).extract(url)
2022-01-22 15:13:37 +00:00
self.youtube_meta = response
def get_from_es(self):
"""get indexed data from elastic search"""
print(f"{self.youtube_id}: get metadata from es")
response, _ = ElasticWrap(f"{self.es_path}").get()
source = response.get("_source")
self.json_data = source
def upload_to_es(self):
"""add json_data to elastic"""
_, _ = ElasticWrap(self.es_path).put(self.json_data, refresh=True)
def deactivate(self):
"""deactivate document in es"""
2022-02-13 03:05:08 +00:00
print(f"{self.youtube_id}: deactivate document")
2022-01-22 15:13:37 +00:00
key_match = {
2022-02-13 03:05:08 +00:00
"ta_video": "active",
"ta_channel": "channel_active",
"ta_playlist": "playlist_active",
2022-01-22 15:13:37 +00:00
}
2022-11-22 03:17:44 +00:00
path = f"{self.index_name}/_update/{self.youtube_id}?refresh=true"
2022-01-22 15:13:37 +00:00
data = {
"script": f"ctx._source.{key_match.get(self.index_name)} = false"
}
2022-11-22 03:17:44 +00:00
_, _ = ElasticWrap(path).post(data)
2022-01-22 15:13:37 +00:00
def del_in_es(self):
"""delete item from elastic search"""
print(f"{self.youtube_id}: delete from es")
_, _ = ElasticWrap(self.es_path).delete(refresh=True)
2022-01-22 15:13:37 +00:00
class Pagination:
"""
figure out the pagination based on page size and total_hits
"""
2022-10-17 11:40:20 +00:00
def __init__(self, request):
self.request = request
self.page_get = False
self.params = False
self.get_params()
2022-01-22 15:13:37 +00:00
self.page_size = self.get_page_size()
self.pagination = self.first_guess()
2022-10-17 11:40:20 +00:00
def get_params(self):
"""process url query parameters"""
query_dict = self.request.GET.copy()
self.page_get = int(query_dict.get("page", 0))
_ = query_dict.pop("page", False)
self.params = query_dict.urlencode()
2022-01-22 15:13:37 +00:00
def get_page_size(self):
"""get default or user modified page_size"""
2022-10-17 11:40:20 +00:00
key = f"{self.request.user.id}:page_size"
2022-01-22 15:13:37 +00:00
page_size = RedisArchivist().get_message(key)["status"]
if not page_size:
config = AppConfig().config
page_size = config["archive"]["page_size"]
return page_size
def first_guess(self):
"""build first guess before api call"""
page_get = self.page_get
if page_get in [0, 1]:
page_from = 0
prev_pages = False
elif page_get > 1:
page_from = (page_get - 1) * self.page_size
prev_pages = [
i for i in range(page_get - 1, page_get - 6, -1) if i > 1
]
prev_pages.reverse()
pagination = {
"page_size": self.page_size,
"page_from": page_from,
"prev_pages": prev_pages,
"current_page": page_get,
"max_hits": False,
2022-10-17 11:40:20 +00:00
"params": self.params,
2022-01-22 15:13:37 +00:00
}
2022-10-17 11:40:20 +00:00
2022-01-22 15:13:37 +00:00
return pagination
def validate(self, total_hits):
"""validate pagination with total_hits after making api call"""
page_get = self.page_get
max_pages = math.ceil(total_hits / self.page_size)
if total_hits >= 10000:
# es returns maximal 10000 results
self.pagination["max_hits"] = True
max_pages = max_pages - 1
2022-01-22 15:13:37 +00:00
if page_get < max_pages and max_pages > 1:
self.pagination["last_page"] = max_pages
else:
self.pagination["last_page"] = False
next_pages = [
i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
]
self.pagination["next_pages"] = next_pages
2022-04-20 15:43:07 +00:00
self.pagination["total_hits"] = total_hits