tubearchivist/tubearchivist/home/src/helper.py

"""
Loose collection of helper functions
- don't import AppConfig class here to avoid circular imports
"""

import json
import os
import re
import string
import subprocess
import unicodedata

import redis
import requests

REDIS_HOST = os.environ.get("REDIS_HOST")


def get_total_hits(index, es_url, match_field):
    """get total hits from index"""
    headers = {"Content-type": "application/json"}
    data = {"query": {"match": {match_field: True}}}
    payload = json.dumps(data)
    url = f"{es_url}/{index}/_search?filter_path=hits.total"
    request = requests.post(url, data=payload, headers=headers)
    if not request.ok:
        print(request.text)
    total_json = json.loads(request.text)
    total_hits = total_json["hits"]["total"]["value"]
    return total_hits


def clean_string(file_name):
    """clean string to only asci characters"""
    whitelist = "-_.() " + string.ascii_letters + string.digits
    normalized = unicodedata.normalize("NFKD", file_name)
    ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
    white_listed = "".join(c for c in ascii_only if c in whitelist)
    cleaned = re.sub(r"[ ]{2,}", " ", white_listed)
    return cleaned


def process_url_list(url_str):
    """parse url_list to find valid youtube video or channel ids"""
    to_replace = ["watch?v=", "playlist?list="]
    url_list = re.split("\n+", url_str[0])
    youtube_ids = []
    for url in url_list:
        url_clean = url.strip().strip("/").split("/")[-1]
        for i in to_replace:
            url_clean = url_clean.replace(i, "")
        url_no_param = url_clean.split("&")[0]
        str_len = len(url_no_param)
        if str_len == 11:
            link_type = "video"
        elif str_len == 24:
            link_type = "channel"
        elif str_len == 34:
            link_type = "playlist"
        else:
            # unable to parse
            raise ValueError("not a valid url: " + url)

        youtube_ids.append({"url": url_no_param, "type": link_type})

    return youtube_ids


def set_message(key, message, expire=True):
    """write new message to redis"""
    redis_connection = redis.Redis(host=REDIS_HOST)
    redis_connection.execute_command("JSON.SET", key, ".", json.dumps(message))
    if expire:
        redis_connection.execute_command("EXPIRE", key, 20)


def get_message(key):
    """get any message from JSON key"""
    redis_connection = redis.Redis(host=REDIS_HOST)
    reply = redis_connection.execute_command("JSON.GET", key)
    if reply:
        json_str = json.loads(reply)
    else:
        json_str = {"status": False}
    return json_str


def get_dl_message(cache_dir):
    """get latest message if available"""
    redis_connection = redis.Redis(host=REDIS_HOST)
    reply = redis_connection.execute_command("JSON.GET", "progress:download")
    if reply:
        json_str = json.loads(reply)
    elif json_str := monitor_cache_dir(cache_dir):
        json_str = monitor_cache_dir(cache_dir)
    else:
        json_str = {"status": False}
    return json_str


def get_lock(lock_key):
    """handle lock for task management"""
    redis_lock = redis.Redis(host=REDIS_HOST).lock(lock_key)
    return redis_lock


def monitor_cache_dir(cache_dir):
    """
    look at download cache dir directly as alternative progress info
    """
    dl_cache = os.path.join(cache_dir, "download")
    cache_file = os.listdir(dl_cache)
    if cache_file:
        filename = cache_file[0][12:].replace("_", " ").split(".")[0]
        mess_dict = {
            "status": "downloading",
            "level": "info",
            "title": "Downloading: " + filename,
            "message": "",
        }
    else:
        return False

    return mess_dict


class DurationConverter:
    """
    using ffmpeg to get and parse duration from filepath
    """

    @staticmethod
    def get_sec(file_path):
        """read duration from file"""
        duration = subprocess.run(
            [
                "ffprobe",
                "-v",
                "error",
                "-show_entries",
                "format=duration",
                "-of",
                "default=noprint_wrappers=1:nokey=1",
                file_path,
            ],
            capture_output=True,
            check=True,
        )
        duration_sec = int(float(duration.stdout.decode().strip()))
        return duration_sec

    @staticmethod
    def get_str(duration_sec):
        """takes duration in sec and returns clean string"""
        hours = duration_sec // 3600
        minutes = (duration_sec - (hours * 3600)) // 60
        secs = duration_sec - (hours * 3600) - (minutes * 60)

        duration_str = str()
        if hours:
            duration_str = str(hours).zfill(2) + ":"
        if minutes:
            duration_str = duration_str + str(minutes).zfill(2) + ":"
        else:
            duration_str = duration_str + "00:"
        duration_str = duration_str + str(secs).zfill(2)
        return duration_str
minimal viable product 2021-09-05 17:10:14 +00:00			`"""`
			`Loose collection of helper functions`
			`- don't import AppConfig class here to avoid circular imports`
			`"""`

			`import json`
			`import os`
			`import re`
			`import string`
			`import subprocess`
			`import unicodedata`

			`import redis`
flake8 and isort linting 2021-09-18 13:02:54 +00:00			`import requests`
minimal viable product 2021-09-05 17:10:14 +00:00
linting everything in black 2021-09-21 09:25:22 +00:00			`REDIS_HOST = os.environ.get("REDIS_HOST")`
minimal viable product 2021-09-05 17:10:14 +00:00

			`def get_total_hits(index, es_url, match_field):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""get total hits from index"""`
			`headers = {"Content-type": "application/json"}`
minimal viable product 2021-09-05 17:10:14 +00:00			`data = {"query": {"match": {match_field: True}}}`
			`payload = json.dumps(data)`
linting everything in black 2021-09-21 09:25:22 +00:00			`url = f"{es_url}/{index}/_search?filter_path=hits.total"`
minimal viable product 2021-09-05 17:10:14 +00:00			`request = requests.post(url, data=payload, headers=headers)`
			`if not request.ok:`
			`print(request.text)`
			`total_json = json.loads(request.text)`
linting everything in black 2021-09-21 09:25:22 +00:00			`total_hits = total_json["hits"]["total"]["value"]`
minimal viable product 2021-09-05 17:10:14 +00:00			`return total_hits`


			`def clean_string(file_name):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""clean string to only asci characters"""`
minimal viable product 2021-09-05 17:10:14 +00:00			`whitelist = "-_.() " + string.ascii_letters + string.digits`
linting everything in black 2021-09-21 09:25:22 +00:00			`normalized = unicodedata.normalize("NFKD", file_name)`
			`ascii_only = normalized.encode("ASCII", "ignore").decode().strip()`
			`white_listed = "".join(c for c in ascii_only if c in whitelist)`
			`cleaned = re.sub(r"[ ]{2,}", " ", white_listed)`
minimal viable product 2021-09-05 17:10:14 +00:00			`return cleaned`


			`def process_url_list(url_str):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""parse url_list to find valid youtube video or channel ids"""`
			`to_replace = ["watch?v=", "playlist?list="]`
			`url_list = re.split("\n+", url_str[0])`
minimal viable product 2021-09-05 17:10:14 +00:00			`youtube_ids = []`
			`for url in url_list:`
linting everything in black 2021-09-21 09:25:22 +00:00			`url_clean = url.strip().strip("/").split("/")[-1]`
allowing to add playlist to queue 2021-09-10 08:07:38 +00:00			`for i in to_replace:`
linting everything in black 2021-09-21 09:25:22 +00:00			`url_clean = url_clean.replace(i, "")`
			`url_no_param = url_clean.split("&")[0]`
minimal viable product 2021-09-05 17:10:14 +00:00			`str_len = len(url_no_param)`
			`if str_len == 11:`
linting everything in black 2021-09-21 09:25:22 +00:00			`link_type = "video"`
minimal viable product 2021-09-05 17:10:14 +00:00			`elif str_len == 24:`
linting everything in black 2021-09-21 09:25:22 +00:00			`link_type = "channel"`
allowing to add playlist to queue 2021-09-10 08:07:38 +00:00			`elif str_len == 34:`
linting everything in black 2021-09-21 09:25:22 +00:00			`link_type = "playlist"`
allowing to add playlist to queue 2021-09-10 08:07:38 +00:00			`else:`
			`# unable to parse`
linting everything in black 2021-09-21 09:25:22 +00:00			`raise ValueError("not a valid url: " + url)`
minimal viable product 2021-09-05 17:10:14 +00:00
			`youtube_ids.append({"url": url_no_param, "type": link_type})`

			`return youtube_ids`


			`def set_message(key, message, expire=True):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""write new message to redis"""`
minimal viable product 2021-09-05 17:10:14 +00:00			`redis_connection = redis.Redis(host=REDIS_HOST)`
linting everything in black 2021-09-21 09:25:22 +00:00			`redis_connection.execute_command("JSON.SET", key, ".", json.dumps(message))`
minimal viable product 2021-09-05 17:10:14 +00:00			`if expire:`
linting everything in black 2021-09-21 09:25:22 +00:00			`redis_connection.execute_command("EXPIRE", key, 20)`
minimal viable product 2021-09-05 17:10:14 +00:00

			`def get_message(key):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""get any message from JSON key"""`
minimal viable product 2021-09-05 17:10:14 +00:00			`redis_connection = redis.Redis(host=REDIS_HOST)`
linting everything in black 2021-09-21 09:25:22 +00:00			`reply = redis_connection.execute_command("JSON.GET", key)`
minimal viable product 2021-09-05 17:10:14 +00:00			`if reply:`
			`json_str = json.loads(reply)`
			`else:`
			`json_str = {"status": False}`
			`return json_str`


			`def get_dl_message(cache_dir):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""get latest message if available"""`
minimal viable product 2021-09-05 17:10:14 +00:00			`redis_connection = redis.Redis(host=REDIS_HOST)`
linting everything in black 2021-09-21 09:25:22 +00:00			`reply = redis_connection.execute_command("JSON.GET", "progress:download")`
minimal viable product 2021-09-05 17:10:14 +00:00			`if reply:`
			`json_str = json.loads(reply)`
			`elif json_str := monitor_cache_dir(cache_dir):`
			`json_str = monitor_cache_dir(cache_dir)`
			`else:`
			`json_str = {"status": False}`
			`return json_str`


media files import with lock to force single run 2021-09-14 11:25:28 +00:00			`def get_lock(lock_key):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""handle lock for task management"""`
media files import with lock to force single run 2021-09-14 11:25:28 +00:00			`redis_lock = redis.Redis(host=REDIS_HOST).lock(lock_key)`
			`return redis_lock`


minimal viable product 2021-09-05 17:10:14 +00:00			`def monitor_cache_dir(cache_dir):`
			`"""`
running codespell #15 2021-09-18 10:28:16 +00:00			`look at download cache dir directly as alternative progress info`
minimal viable product 2021-09-05 17:10:14 +00:00			`"""`
linting everything in black 2021-09-21 09:25:22 +00:00			`dl_cache = os.path.join(cache_dir, "download")`
minimal viable product 2021-09-05 17:10:14 +00:00			`cache_file = os.listdir(dl_cache)`
			`if cache_file:`
linting everything in black 2021-09-21 09:25:22 +00:00			`filename = cache_file[0][12:].replace("_", " ").split(".")[0]`
minimal viable product 2021-09-05 17:10:14 +00:00			`mess_dict = {`
			`"status": "downloading",`
			`"level": "info",`
			`"title": "Downloading: " + filename,`
linting everything in black 2021-09-21 09:25:22 +00:00			`"message": "",`
minimal viable product 2021-09-05 17:10:14 +00:00			`}`
			`else:`
			`return False`

			`return mess_dict`


			`class DurationConverter:`
			`"""`
			`using ffmpeg to get and parse duration from filepath`
			`"""`

			`@staticmethod`
			`def get_sec(file_path):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""read duration from file"""`
			`duration = subprocess.run(`
			`[`
			`"ffprobe",`
			`"-v",`
			`"error",`
			`"-show_entries",`
			`"format=duration",`
			`"-of",`
			`"default=noprint_wrappers=1:nokey=1",`
			`file_path,`
			`],`
			`capture_output=True,`
			`check=True,`
			`)`
minimal viable product 2021-09-05 17:10:14 +00:00			`duration_sec = int(float(duration.stdout.decode().strip()))`
			`return duration_sec`

			`@staticmethod`
			`def get_str(duration_sec):`
linting everything in black 2021-09-21 09:25:22 +00:00			`"""takes duration in sec and returns clean string"""`
minimal viable product 2021-09-05 17:10:14 +00:00			`hours = duration_sec // 3600`
			`minutes = (duration_sec - (hours * 3600)) // 60`
			`secs = duration_sec - (hours * 3600) - (minutes * 60)`

			`duration_str = str()`
			`if hours:`
linting everything in black 2021-09-21 09:25:22 +00:00			`duration_str = str(hours).zfill(2) + ":"`
minimal viable product 2021-09-05 17:10:14 +00:00			`if minutes:`
linting everything in black 2021-09-21 09:25:22 +00:00			`duration_str = duration_str + str(minutes).zfill(2) + ":"`
minimal viable product 2021-09-05 17:10:14 +00:00			`else:`
linting everything in black 2021-09-21 09:25:22 +00:00			`duration_str = duration_str + "00:"`
minimal viable product 2021-09-05 17:10:14 +00:00			`duration_str = duration_str + str(secs).zfill(2)`
			`return duration_str`