tubearchivist/tubearchivist/home/src/helper.py

168 lines
4.9 KiB
Python
Raw Normal View History

2021-09-05 17:10:14 +00:00
"""
Loose collection of helper functions
- don't import AppConfig class here to avoid circular imports
"""
import json
import os
import re
import string
import subprocess
import unicodedata
import redis
2021-09-18 13:02:54 +00:00
import requests
2021-09-05 17:10:14 +00:00
2021-09-21 09:25:22 +00:00
REDIS_HOST = os.environ.get("REDIS_HOST")
2021-09-05 17:10:14 +00:00
def get_total_hits(index, es_url, match_field):
2021-09-21 09:25:22 +00:00
"""get total hits from index"""
headers = {"Content-type": "application/json"}
2021-09-05 17:10:14 +00:00
data = {"query": {"match": {match_field: True}}}
payload = json.dumps(data)
2021-09-21 09:25:22 +00:00
url = f"{es_url}/{index}/_search?filter_path=hits.total"
2021-09-05 17:10:14 +00:00
request = requests.post(url, data=payload, headers=headers)
if not request.ok:
print(request.text)
total_json = json.loads(request.text)
2021-09-21 09:25:22 +00:00
total_hits = total_json["hits"]["total"]["value"]
2021-09-05 17:10:14 +00:00
return total_hits
def clean_string(file_name):
2021-09-21 09:25:22 +00:00
"""clean string to only asci characters"""
2021-09-05 17:10:14 +00:00
whitelist = "-_.() " + string.ascii_letters + string.digits
2021-09-21 09:25:22 +00:00
normalized = unicodedata.normalize("NFKD", file_name)
ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
white_listed = "".join(c for c in ascii_only if c in whitelist)
cleaned = re.sub(r"[ ]{2,}", " ", white_listed)
2021-09-05 17:10:14 +00:00
return cleaned
def process_url_list(url_str):
2021-09-21 09:25:22 +00:00
"""parse url_list to find valid youtube video or channel ids"""
to_replace = ["watch?v=", "playlist?list="]
url_list = re.split("\n+", url_str[0])
2021-09-05 17:10:14 +00:00
youtube_ids = []
for url in url_list:
2021-09-21 09:25:22 +00:00
url_clean = url.strip().strip("/").split("/")[-1]
2021-09-10 08:07:38 +00:00
for i in to_replace:
2021-09-21 09:25:22 +00:00
url_clean = url_clean.replace(i, "")
url_no_param = url_clean.split("&")[0]
2021-09-05 17:10:14 +00:00
str_len = len(url_no_param)
if str_len == 11:
2021-09-21 09:25:22 +00:00
link_type = "video"
2021-09-05 17:10:14 +00:00
elif str_len == 24:
2021-09-21 09:25:22 +00:00
link_type = "channel"
2021-09-10 08:07:38 +00:00
elif str_len == 34:
2021-09-21 09:25:22 +00:00
link_type = "playlist"
2021-09-10 08:07:38 +00:00
else:
# unable to parse
2021-09-21 09:25:22 +00:00
raise ValueError("not a valid url: " + url)
2021-09-05 17:10:14 +00:00
youtube_ids.append({"url": url_no_param, "type": link_type})
return youtube_ids
def set_message(key, message, expire=True):
2021-09-21 09:25:22 +00:00
"""write new message to redis"""
2021-09-05 17:10:14 +00:00
redis_connection = redis.Redis(host=REDIS_HOST)
2021-09-21 09:25:22 +00:00
redis_connection.execute_command("JSON.SET", key, ".", json.dumps(message))
2021-09-05 17:10:14 +00:00
if expire:
2021-09-21 09:25:22 +00:00
redis_connection.execute_command("EXPIRE", key, 20)
2021-09-05 17:10:14 +00:00
def get_message(key):
2021-09-21 09:25:22 +00:00
"""get any message from JSON key"""
2021-09-05 17:10:14 +00:00
redis_connection = redis.Redis(host=REDIS_HOST)
2021-09-21 09:25:22 +00:00
reply = redis_connection.execute_command("JSON.GET", key)
2021-09-05 17:10:14 +00:00
if reply:
json_str = json.loads(reply)
else:
json_str = {"status": False}
return json_str
def get_dl_message(cache_dir):
2021-09-21 09:25:22 +00:00
"""get latest message if available"""
2021-09-05 17:10:14 +00:00
redis_connection = redis.Redis(host=REDIS_HOST)
2021-09-21 09:25:22 +00:00
reply = redis_connection.execute_command("JSON.GET", "progress:download")
2021-09-05 17:10:14 +00:00
if reply:
json_str = json.loads(reply)
elif json_str := monitor_cache_dir(cache_dir):
json_str = monitor_cache_dir(cache_dir)
else:
json_str = {"status": False}
return json_str
def get_lock(lock_key):
2021-09-21 09:25:22 +00:00
"""handle lock for task management"""
redis_lock = redis.Redis(host=REDIS_HOST).lock(lock_key)
return redis_lock
2021-09-05 17:10:14 +00:00
def monitor_cache_dir(cache_dir):
"""
2021-09-18 10:28:16 +00:00
look at download cache dir directly as alternative progress info
2021-09-05 17:10:14 +00:00
"""
2021-09-21 09:25:22 +00:00
dl_cache = os.path.join(cache_dir, "download")
2021-09-05 17:10:14 +00:00
cache_file = os.listdir(dl_cache)
if cache_file:
2021-09-21 09:25:22 +00:00
filename = cache_file[0][12:].replace("_", " ").split(".")[0]
2021-09-05 17:10:14 +00:00
mess_dict = {
"status": "downloading",
"level": "info",
"title": "Downloading: " + filename,
2021-09-21 09:25:22 +00:00
"message": "",
2021-09-05 17:10:14 +00:00
}
else:
return False
return mess_dict
class DurationConverter:
"""
using ffmpeg to get and parse duration from filepath
"""
@staticmethod
def get_sec(file_path):
2021-09-21 09:25:22 +00:00
"""read duration from file"""
duration = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
file_path,
],
capture_output=True,
check=True,
)
2021-09-05 17:10:14 +00:00
duration_sec = int(float(duration.stdout.decode().strip()))
return duration_sec
@staticmethod
def get_str(duration_sec):
2021-09-21 09:25:22 +00:00
"""takes duration in sec and returns clean string"""
2021-09-05 17:10:14 +00:00
hours = duration_sec // 3600
minutes = (duration_sec - (hours * 3600)) // 60
secs = duration_sec - (hours * 3600) - (minutes * 60)
duration_str = str()
if hours:
2021-09-21 09:25:22 +00:00
duration_str = str(hours).zfill(2) + ":"
2021-09-05 17:10:14 +00:00
if minutes:
2021-09-21 09:25:22 +00:00
duration_str = duration_str + str(minutes).zfill(2) + ":"
2021-09-05 17:10:14 +00:00
else:
2021-09-21 09:25:22 +00:00
duration_str = duration_str + "00:"
2021-09-05 17:10:14 +00:00
duration_str = duration_str + str(secs).zfill(2)
return duration_str