tubearchivist/tubearchivist/home/src/searching.py

265 lines
9.2 KiB
Python
Raw Normal View History

2021-09-05 17:10:14 +00:00
"""
Functionality:
- handle search to populate results to view
- cache youtube video thumbnails and channel artwork
- parse values in hit_cleanup for frontend
- calculate pagination values
"""
import math
import urllib.parse
from datetime import datetime
from home.src.config import AppConfig
from home.src.es import ElasticWrap
from home.src.helper import RedisArchivist
from home.src.thumbnails import ThumbManager
2021-09-05 17:10:14 +00:00
class SearchHandler:
2021-09-21 09:25:22 +00:00
"""search elastic search"""
2021-09-05 17:10:14 +00:00
def __init__(self, path, config, data=False):
2021-09-05 17:10:14 +00:00
self.max_hits = None
self.path = path
self.config = config
2021-09-05 17:10:14 +00:00
self.data = data
def get_data(self):
2021-09-21 09:25:22 +00:00
"""get the data"""
response, _ = ElasticWrap(self.path, config=self.config).get(self.data)
2021-09-05 17:10:14 +00:00
2021-09-21 09:25:22 +00:00
if "hits" in response.keys():
self.max_hits = response["hits"]["total"]["value"]
return_value = response["hits"]["hits"]
2021-09-05 17:10:14 +00:00
else:
# simulate list for single result to reuse rest of class
return_value = [response]
# stop if empty
if not return_value:
return False
all_videos = []
all_channels = []
for idx, hit in enumerate(return_value):
return_value[idx] = self.hit_cleanup(hit)
2021-09-21 09:25:22 +00:00
if hit["_index"] == "ta_video":
2021-09-05 17:10:14 +00:00
video_dict, channel_dict = self.vid_cache_link(hit)
if video_dict not in all_videos:
all_videos.append(video_dict)
if channel_dict not in all_channels:
all_channels.append(channel_dict)
2021-09-21 09:25:22 +00:00
elif hit["_index"] == "ta_channel":
2021-09-05 17:10:14 +00:00
channel_dict = self.channel_cache_link(hit)
if channel_dict not in all_channels:
all_channels.append(channel_dict)
return return_value
@staticmethod
def vid_cache_link(hit):
2021-09-21 09:25:22 +00:00
"""download thumbnails into cache"""
vid_thumb = hit["source"]["vid_thumb_url"]
youtube_id = hit["source"]["youtube_id"]
channel_id_hit = hit["source"]["channel"]["channel_id"]
chan_thumb = hit["source"]["channel"]["channel_thumb_url"]
2021-09-05 17:10:14 +00:00
try:
2021-09-21 09:25:22 +00:00
chan_banner = hit["source"]["channel"]["channel_banner_url"]
2021-09-05 17:10:14 +00:00
except KeyError:
chan_banner = False
2021-09-21 09:25:22 +00:00
video_dict = {"youtube_id": youtube_id, "vid_thumb": vid_thumb}
2021-09-05 17:10:14 +00:00
channel_dict = {
2021-09-21 09:25:22 +00:00
"channel_id": channel_id_hit,
"chan_thumb": chan_thumb,
"chan_banner": chan_banner,
2021-09-05 17:10:14 +00:00
}
return video_dict, channel_dict
@staticmethod
def channel_cache_link(hit):
2021-09-21 09:25:22 +00:00
"""build channel thumb links"""
channel_id_hit = hit["source"]["channel_id"]
chan_thumb = hit["source"]["channel_thumb_url"]
2021-09-05 17:10:14 +00:00
try:
2021-09-21 09:25:22 +00:00
chan_banner = hit["source"]["channel_banner_url"]
2021-09-05 17:10:14 +00:00
except KeyError:
chan_banner = False
channel_dict = {
2021-09-21 09:25:22 +00:00
"channel_id": channel_id_hit,
"chan_thumb": chan_thumb,
"chan_banner": chan_banner,
2021-09-05 17:10:14 +00:00
}
return channel_dict
@staticmethod
def hit_cleanup(hit):
2021-09-21 09:25:22 +00:00
"""clean up and parse data from a single hit"""
hit["source"] = hit.pop("_source")
hit_keys = hit["source"].keys()
if "media_url" in hit_keys:
parsed_url = urllib.parse.quote(hit["source"]["media_url"])
hit["source"]["media_url"] = parsed_url
if "published" in hit_keys:
published = hit["source"]["published"]
2021-09-05 17:10:14 +00:00
date_pub = datetime.strptime(published, "%Y-%m-%d")
date_str = datetime.strftime(date_pub, "%d %b, %Y")
2021-09-21 09:25:22 +00:00
hit["source"]["published"] = date_str
2021-09-05 17:10:14 +00:00
2021-09-21 09:25:22 +00:00
if "vid_last_refresh" in hit_keys:
vid_last_refresh = hit["source"]["vid_last_refresh"]
2021-09-05 17:10:14 +00:00
date_refresh = datetime.fromtimestamp(vid_last_refresh)
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
2021-09-21 09:25:22 +00:00
hit["source"]["vid_last_refresh"] = date_str
2021-09-05 17:10:14 +00:00
if "playlist_last_refresh" in hit_keys:
playlist_last_refresh = hit["source"]["playlist_last_refresh"]
date_refresh = datetime.fromtimestamp(playlist_last_refresh)
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
hit["source"]["playlist_last_refresh"] = date_str
if "vid_thumb_url" in hit_keys:
youtube_id = hit["source"]["youtube_id"]
thumb_path = ThumbManager().vid_thumb_path(youtube_id)
hit["source"]["vid_thumb_url"] = thumb_path
2021-09-21 09:25:22 +00:00
if "channel_last_refresh" in hit_keys:
refreshed = hit["source"]["channel_last_refresh"]
2021-09-05 17:10:14 +00:00
date_refresh = datetime.fromtimestamp(refreshed)
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
2021-09-21 09:25:22 +00:00
hit["source"]["channel_last_refresh"] = date_str
2021-09-05 17:10:14 +00:00
2021-09-21 09:25:22 +00:00
if "channel" in hit_keys:
channel_keys = hit["source"]["channel"].keys()
if "channel_last_refresh" in channel_keys:
refreshed = hit["source"]["channel"]["channel_last_refresh"]
2021-09-05 17:10:14 +00:00
date_refresh = datetime.fromtimestamp(refreshed)
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
2021-09-21 09:25:22 +00:00
hit["source"]["channel"]["channel_last_refresh"] = date_str
2021-09-05 17:10:14 +00:00
return hit
class SearchForm:
"""build query from search form data"""
CONFIG = AppConfig().config
def multi_search(self, search_query):
"""searching through index"""
path = "ta_video,ta_channel,ta_playlist/_search"
data = {
"size": 30,
"query": {
"multi_match": {
"query": search_query,
"type": "bool_prefix",
"operator": "and",
"fuzziness": "auto",
"fields": [
"category",
"channel_description",
"channel_name._2gram",
"channel_name._3gram",
"channel_name.search_as_you_type",
"playlist_description",
"playlist_name._2gram",
"playlist_name._3gram",
"playlist_name.search_as_you_type",
"tags",
"title._2gram",
"title._3gram",
"title.search_as_you_type",
],
}
},
}
look_up = SearchHandler(path, config=self.CONFIG, data=data)
search_results = look_up.get_data()
all_results = self.build_results(search_results)
return {"results": all_results}
@staticmethod
def build_results(search_results):
"""build the all_results dict"""
video_results = []
channel_results = []
playlist_results = []
if search_results:
for result in search_results:
if result["_index"] == "ta_video":
video_results.append(result)
elif result["_index"] == "ta_channel":
channel_results.append(result)
elif result["_index"] == "ta_playlist":
playlist_results.append(result)
all_results = {
"video_results": video_results,
"channel_results": channel_results,
"playlist_results": playlist_results,
}
return all_results
2021-09-05 17:10:14 +00:00
class Pagination:
"""
figure out the pagination based on page size and total_hits
"""
def __init__(self, page_get, user_id, search_get=False):
self.user_id = user_id
self.page_size = self.get_page_size()
2021-09-05 17:10:14 +00:00
self.page_get = page_get
self.search_get = search_get
self.pagination = self.first_guess()
def get_page_size(self):
"""get default or user modified page_size"""
key = f"{self.user_id}:page_size"
page_size = RedisArchivist().get_message(key)["status"]
if not page_size:
config = AppConfig().config
page_size = config["archive"]["page_size"]
return page_size
2021-09-05 17:10:14 +00:00
def first_guess(self):
2021-09-21 09:25:22 +00:00
"""build first guess before api call"""
2021-09-05 17:10:14 +00:00
page_get = self.page_get
if page_get in [0, 1]:
page_from = 0
prev_pages = False
elif page_get > 1:
page_from = (page_get - 1) * self.page_size
prev_pages = [
i for i in range(page_get - 1, page_get - 6, -1) if i > 1
]
prev_pages.reverse()
pagination = {
"page_size": self.page_size,
"page_from": page_from,
"prev_pages": prev_pages,
2021-09-21 09:25:22 +00:00
"current_page": page_get,
2021-09-05 17:10:14 +00:00
}
if self.search_get:
pagination.update({"search_get": self.search_get})
return pagination
def validate(self, total_hits):
2021-09-21 09:25:22 +00:00
"""validate pagination with total_hits after making api call"""
2021-09-05 17:10:14 +00:00
page_get = self.page_get
max_pages = math.ceil(total_hits / self.page_size)
if page_get < max_pages and max_pages > 1:
2021-09-21 09:25:22 +00:00
self.pagination["last_page"] = max_pages
2021-09-05 17:10:14 +00:00
else:
2021-09-21 09:25:22 +00:00
self.pagination["last_page"] = False
2021-09-05 17:10:14 +00:00
next_pages = [
i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
]
2021-09-21 09:25:22 +00:00
self.pagination["next_pages"] = next_pages