238 lines
8.5 KiB
Python
238 lines
8.5 KiB
Python
"""
|
|
Functionality:
|
|
- handle search to populate results to view
|
|
- cache youtube video thumbnails and channel artwork
|
|
- parse values in hit_cleanup for frontend
|
|
- calculate pagination values
|
|
"""
|
|
|
|
import math
|
|
import os
|
|
import urllib.parse
|
|
|
|
from datetime import datetime
|
|
|
|
import requests
|
|
|
|
from PIL import Image
|
|
|
|
from home.src.config import AppConfig
|
|
|
|
|
|
class SearchHandler:
|
|
""" search elastic search """
|
|
|
|
CONFIG = AppConfig().config
|
|
CACHE_DIR = CONFIG['application']['cache_dir']
|
|
|
|
def __init__(self, url, data, cache=True):
|
|
self.max_hits = None
|
|
self.url = url
|
|
self.data = data
|
|
self.cache = cache
|
|
|
|
def get_data(self):
|
|
""" get the data """
|
|
if self.data:
|
|
response = requests.get(self.url, json=self.data).json()
|
|
else:
|
|
response = requests.get(self.url).json()
|
|
|
|
if 'hits' in response.keys():
|
|
self.max_hits = response['hits']['total']['value']
|
|
return_value = response['hits']['hits']
|
|
else:
|
|
# simulate list for single result to reuse rest of class
|
|
return_value = [response]
|
|
|
|
# stop if empty
|
|
if not return_value:
|
|
return False
|
|
|
|
all_videos = []
|
|
all_channels = []
|
|
for idx, hit in enumerate(return_value):
|
|
return_value[idx] = self.hit_cleanup(hit)
|
|
if hit['_index'] == 'ta_video':
|
|
video_dict, channel_dict = self.vid_cache_link(hit)
|
|
if video_dict not in all_videos:
|
|
all_videos.append(video_dict)
|
|
if channel_dict not in all_channels:
|
|
all_channels.append(channel_dict)
|
|
elif hit['_index'] == 'ta_channel':
|
|
channel_dict = self.channel_cache_link(hit)
|
|
if channel_dict not in all_channels:
|
|
all_channels.append(channel_dict)
|
|
if self.cache:
|
|
# validate cache
|
|
self.cache_dl_vids(all_videos)
|
|
self.cache_dl_chan(all_channels)
|
|
|
|
return return_value
|
|
|
|
@staticmethod
|
|
def vid_cache_link(hit):
|
|
""" download thumbnails into chache """
|
|
vid_thumb = hit['source']['vid_thumb_url']
|
|
youtube_id = hit['source']['youtube_id']
|
|
channel_id_hit = hit['source']['channel']['channel_id']
|
|
chan_thumb = hit['source']['channel']['channel_thumb_url']
|
|
try:
|
|
chan_banner = hit['source']['channel']['channel_banner_url']
|
|
except KeyError:
|
|
chan_banner = False
|
|
video_dict = {
|
|
'youtube_id': youtube_id,
|
|
'vid_thumb': vid_thumb
|
|
}
|
|
channel_dict = {
|
|
'channel_id': channel_id_hit,
|
|
'chan_thumb': chan_thumb,
|
|
'chan_banner': chan_banner
|
|
}
|
|
return video_dict, channel_dict
|
|
|
|
@staticmethod
|
|
def channel_cache_link(hit):
|
|
""" build channel thumb links """
|
|
channel_id_hit = hit['source']['channel_id']
|
|
chan_thumb = hit['source']['channel_thumb_url']
|
|
try:
|
|
chan_banner = hit['source']['channel_banner_url']
|
|
except KeyError:
|
|
chan_banner = False
|
|
channel_dict = {
|
|
'channel_id': channel_id_hit,
|
|
'chan_thumb': chan_thumb,
|
|
'chan_banner': chan_banner
|
|
}
|
|
return channel_dict
|
|
|
|
def cache_dl_vids(self, all_videos):
|
|
""" video thumbs links for cache """
|
|
vid_cache = os.path.join(self.CACHE_DIR, 'videos')
|
|
all_vid_cached = os.listdir(vid_cache)
|
|
# videos
|
|
for video_dict in all_videos:
|
|
youtube_id = video_dict['youtube_id']
|
|
if not youtube_id + '.jpg' in all_vid_cached:
|
|
cache_path = os.path.join(vid_cache, youtube_id + '.jpg')
|
|
thumb_url = video_dict['vid_thumb']
|
|
img_raw = requests.get(thumb_url, stream=True).raw
|
|
img = Image.open(img_raw)
|
|
width, height = img.size
|
|
if not width / height == 16 / 9:
|
|
new_height = width / 16 * 9
|
|
offset = (height - new_height) / 2
|
|
img = img.crop((0, offset, width, height - offset))
|
|
img.save(cache_path)
|
|
|
|
def cache_dl_chan(self, all_channels):
|
|
""" download channel thumbs """
|
|
chan_cache = os.path.join(self.CACHE_DIR, 'channels')
|
|
all_chan_cached = os.listdir(chan_cache)
|
|
for channel_dict in all_channels:
|
|
channel_id_cache = channel_dict['channel_id']
|
|
channel_banner_url = channel_dict['chan_banner']
|
|
channel_banner = channel_id_cache + '_banner.jpg'
|
|
channel_thumb_url = channel_dict['chan_thumb']
|
|
channel_thumb = channel_id_cache + '_thumb.jpg'
|
|
# thumb
|
|
if channel_thumb_url and channel_thumb not in all_chan_cached:
|
|
cache_path = os.path.join(chan_cache, channel_thumb)
|
|
img_raw = requests.get(channel_thumb_url, stream=True).content
|
|
with open(cache_path, 'wb') as f:
|
|
f.write(img_raw)
|
|
# banner
|
|
if channel_banner_url and channel_banner not in all_chan_cached:
|
|
cache_path = os.path.join(chan_cache, channel_banner)
|
|
img_raw = requests.get(channel_banner_url, stream=True).content
|
|
with open(cache_path, 'wb') as f:
|
|
f.write(img_raw)
|
|
|
|
@staticmethod
|
|
def hit_cleanup(hit):
|
|
""" clean up and parse data from a single hit """
|
|
hit['source'] = hit.pop('_source')
|
|
hit_keys = hit['source'].keys()
|
|
if 'media_url' in hit_keys:
|
|
parsed_url = urllib.parse.quote(hit['source']['media_url'])
|
|
hit['source']['media_url'] = parsed_url
|
|
|
|
if 'published' in hit_keys:
|
|
published = hit['source']['published']
|
|
date_pub = datetime.strptime(published, "%Y-%m-%d")
|
|
date_str = datetime.strftime(date_pub, "%d %b, %Y")
|
|
hit['source']['published'] = date_str
|
|
|
|
if 'vid_last_refresh' in hit_keys:
|
|
vid_last_refresh = hit['source']['vid_last_refresh']
|
|
date_refresh = datetime.fromtimestamp(vid_last_refresh)
|
|
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
|
hit['source']['vid_last_refresh'] = date_str
|
|
|
|
if 'channel_last_refresh' in hit_keys:
|
|
refreshed = hit['source']['channel_last_refresh']
|
|
date_refresh = datetime.fromtimestamp(refreshed)
|
|
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
|
hit['source']['channel_last_refresh'] = date_str
|
|
|
|
if 'channel' in hit_keys:
|
|
channel_keys = hit['source']['channel'].keys()
|
|
if 'channel_last_refresh' in channel_keys:
|
|
refreshed = hit['source']['channel']['channel_last_refresh']
|
|
date_refresh = datetime.fromtimestamp(refreshed)
|
|
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
|
hit['source']['channel']['channel_last_refresh'] = date_str
|
|
|
|
return hit
|
|
|
|
|
|
class Pagination:
|
|
"""
|
|
figure out the pagination based on page size and total_hits
|
|
"""
|
|
|
|
def __init__(self, page_get, search_get=False):
|
|
config = AppConfig().config
|
|
self.page_size = config['archive']['page_size']
|
|
self.page_get = page_get
|
|
self.search_get = search_get
|
|
self.pagination = self.first_guess()
|
|
|
|
def first_guess(self):
|
|
""" build first guess before api call """
|
|
page_get = self.page_get
|
|
if page_get in [0, 1]:
|
|
page_from = 0
|
|
prev_pages = False
|
|
elif page_get > 1:
|
|
page_from = (page_get - 1) * self.page_size
|
|
prev_pages = [
|
|
i for i in range(page_get - 1, page_get - 6, -1) if i > 1
|
|
]
|
|
prev_pages.reverse()
|
|
pagination = {
|
|
"page_size": self.page_size,
|
|
"page_from": page_from,
|
|
"prev_pages": prev_pages,
|
|
"current_page": page_get
|
|
}
|
|
if self.search_get:
|
|
pagination.update({"search_get": self.search_get})
|
|
return pagination
|
|
|
|
def validate(self, total_hits):
|
|
""" validate pagination with total_hits after making api call """
|
|
page_get = self.page_get
|
|
max_pages = math.ceil(total_hits / self.page_size)
|
|
if page_get < max_pages and max_pages > 1:
|
|
self.pagination['last_page'] = max_pages
|
|
else:
|
|
self.pagination['last_page'] = False
|
|
next_pages = [
|
|
i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
|
|
]
|
|
|
|
self.pagination['next_pages'] = next_pages
|