From d6f679784ea3847df67c4b06f6923008c93d3e72 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 20 May 2024 13:17:11 +0200 Subject: [PATCH 01/29] fix logout django 5 needing post request --- tubearchivist/home/templates/home/base.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/templates/home/base.html b/tubearchivist/home/templates/home/base.html index 63e22865..2bfdef8f 100644 --- a/tubearchivist/home/templates/home/base.html +++ b/tubearchivist/home/templates/home/base.html @@ -60,7 +60,10 @@ gear-icon - + + exit-icon From 758038f93d85eb48096574f29603d5e471b3a5cf Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 20 May 2024 13:34:45 +0200 Subject: [PATCH 02/29] fix sync_to_videos in channel reindex --- tubearchivist/home/src/index/reindex.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index 1db975e5..51563998 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -375,6 +375,7 @@ class Reindex(ReindexBase): channel.json_data["channel_overwrites"] = overwrites channel.upload_to_es() + channel.sync_to_videos() ChannelFullScan(channel_id).scan() self.processed["channels"] += 1 From 1bcb4f1f7c5a929d01da85db4e328dd8b95b9a10 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 12:16:30 +0200 Subject: [PATCH 03/29] remove unused CONFIG init --- tubearchivist/home/celery.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tubearchivist/home/celery.py b/tubearchivist/home/celery.py index 9f65795b..1f3369f8 100644 --- a/tubearchivist/home/celery.py +++ b/tubearchivist/home/celery.py @@ -3,10 +3,8 @@ import os from celery import Celery -from home.src.ta.config import AppConfig from home.src.ta.settings import EnvironmentSettings -CONFIG = AppConfig().config REDIS_HOST = EnvironmentSettings.REDIS_HOST REDIS_PORT = EnvironmentSettings.REDIS_PORT From ae89f47072852dc9e417cc18edc99ebb3c46fed6 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 12:18:25 +0200 Subject: [PATCH 04/29] make application testable --- Dockerfile | 2 +- tubearchivist/config/settings.py | 4 +++- tubearchivist/requirements-dev.txt | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7e8e0d14..33a6acff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,7 @@ RUN apt-get clean && apt-get -y update && apt-get -y install --no-install-recomm RUN if [ "$INSTALL_DEBUG" ] ; then \ apt-get -y update && apt-get -y install --no-install-recommends \ vim htop bmon net-tools iputils-ping procps \ - && pip install --user ipython \ + && pip install --user ipython pytest pytest-django \ ; fi # make folders diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index ac41eaa0..b81e3b49 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -33,7 +33,9 @@ SECRET_KEY = PW_HASH.hexdigest() # SECURITY WARNING: don't run with debug turned on in production! DEBUG = bool(environ.get("DJANGO_DEBUG")) -ALLOWED_HOSTS, CSRF_TRUSTED_ORIGINS = ta_host_parser(environ["TA_HOST"]) +ALLOWED_HOSTS, CSRF_TRUSTED_ORIGINS = ta_host_parser( + environ.get("TA_HOST", "localhost") +) # Application definition diff --git a/tubearchivist/requirements-dev.txt b/tubearchivist/requirements-dev.txt index 33c81ec8..02778fba 100644 --- a/tubearchivist/requirements-dev.txt +++ b/tubearchivist/requirements-dev.txt @@ -5,4 +5,6 @@ flake8 isort pylint pylint-django +pytest +pytest-django types-requests From 3e3e3ae78e5e61f2548604ea87f3f69fce4773cd Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 13:05:40 +0200 Subject: [PATCH 05/29] add tests for urlparser --- tubearchivist/home/tests/test_ta/__init__.py | 0 .../home/tests/test_ta/test_urlparser.py | 128 ++++++++++++++++++ tubearchivist/home/tests/test_views.py | 42 ------ 3 files changed, 128 insertions(+), 42 deletions(-) create mode 100644 tubearchivist/home/tests/test_ta/__init__.py create mode 100644 tubearchivist/home/tests/test_ta/test_urlparser.py delete mode 100644 tubearchivist/home/tests/test_views.py diff --git a/tubearchivist/home/tests/test_ta/__init__.py b/tubearchivist/home/tests/test_ta/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tubearchivist/home/tests/test_ta/test_urlparser.py b/tubearchivist/home/tests/test_ta/test_urlparser.py new file mode 100644 index 00000000..21e5a31c --- /dev/null +++ b/tubearchivist/home/tests/test_ta/test_urlparser.py @@ -0,0 +1,128 @@ +"""tests for url parser""" + +import pytest +from home.src.ta.urlparser import Parser + +# video id parsing +VIDEO_URL_IN = [ + "7DKv5H5Frt0", + "https://www.youtube.com/watch?v=7DKv5H5Frt0", + "https://www.youtube.com/watch?v=7DKv5H5Frt0&t=113&feature=shared", + "https://www.youtube.com/watch?v=7DKv5H5Frt0&list=PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5&index=1&pp=iAQB" # noqa: E501 + "https://youtu.be/7DKv5H5Frt0", +] +VIDEO_OUT = [{"type": "video", "url": "7DKv5H5Frt0", "vid_type": "unknown"}] +VIDEO_TEST_CASES = [(i, VIDEO_OUT) for i in VIDEO_URL_IN] + +# shorts id parsing +SHORTS_URL_IN = [ + "https://www.youtube.com/shorts/YG3-Pw3rixU", + "https://youtube.com/shorts/YG3-Pw3rixU?feature=shared", +] +SHORTS_OUT = [{"type": "video", "url": "YG3-Pw3rixU", "vid_type": "shorts"}] +SHORTS_TEST_CASES = [(i, SHORTS_OUT) for i in SHORTS_URL_IN] + +# channel id parsing +CHANNEL_URL_IN = [ + "UCBa659QWEk1AI4Tg--mrJ2A", + "@TomScottGo", + "https://www.youtube.com/channel/UCBa659QWEk1AI4Tg--mrJ2A", + "https://www.youtube.com/@TomScottGo", +] +CHANNEL_OUT = [ + { + "type": "channel", + "url": "UCBa659QWEk1AI4Tg--mrJ2A", + "vid_type": "unknown", + } +] +CHANNEL_TEST_CASES = [(i, CHANNEL_OUT) for i in CHANNEL_URL_IN] + +# channel vid type parsing +CHANNEL_VID_TYPES = [ + ( + "https://www.youtube.com/@IBRACORP/videos", + [ + { + "type": "channel", + "url": "UC7aW7chIafJG6ECYAd3N5uQ", + "vid_type": "videos", + } + ], + ), + ( + "https://www.youtube.com/@IBRACORP/shorts", + [ + { + "type": "channel", + "url": "UC7aW7chIafJG6ECYAd3N5uQ", + "vid_type": "shorts", + } + ], + ), + ( + "https://www.youtube.com/@IBRACORP/streams", + [ + { + "type": "channel", + "url": "UC7aW7chIafJG6ECYAd3N5uQ", + "vid_type": "streams", + } + ], + ), +] + +# playlist id parsing +PLAYLIST_URL_IN = [ + "PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5", + "https://www.youtube.com/playlist?list=PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5", +] +PLAYLIST_OUT = [ + { + "type": "playlist", + "url": "PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5", + "vid_type": "unknown", + } +] +PLAYLIST_TEST_CASES = [(i, PLAYLIST_OUT) for i in PLAYLIST_URL_IN] + +# personal playlists +EXPECTED_WL = [{"type": "playlist", "url": "WL", "vid_type": "unknown"}] +EXPECTED_LL = [{"type": "playlist", "url": "LL", "vid_type": "unknown"}] +PERSONAL_PLAYLISTS_TEST_CASES = [ + ("WL", EXPECTED_WL), + ("https://www.youtube.com/playlist?list=WL", EXPECTED_WL), + ("LL", EXPECTED_LL), + ("https://www.youtube.com/playlist?list=LL", EXPECTED_LL), +] + +# collect tests expected to pass +PASSTING_TESTS = [] +PASSTING_TESTS.extend(VIDEO_TEST_CASES) +PASSTING_TESTS.extend(SHORTS_TEST_CASES) +PASSTING_TESTS.extend(CHANNEL_TEST_CASES) +PASSTING_TESTS.extend(CHANNEL_VID_TYPES) +PASSTING_TESTS.extend(PLAYLIST_TEST_CASES) +PASSTING_TESTS.extend(PERSONAL_PLAYLISTS_TEST_CASES) + + +@pytest.mark.parametrize("url_str, expected_result", PASSTING_TESTS) +def test_passing_parse(url_str, expected_result): + """test parser""" + parser = Parser(url_str) + parsed = parser.parse() + assert parsed == expected_result + + +INVALID_IDS_ERRORS = [ + "aaaaa", + "https://www.youtube.com/playlist?list=AAAA", +] + + +@pytest.mark.parametrize("invalid_value", INVALID_IDS_ERRORS) +def test_utility_class_init_raises_value_error(invalid_value): + """test for invalid IDs""" + with pytest.raises(ValueError, match="not a valid id_str"): + parser = Parser(invalid_value) + parser.parse() diff --git a/tubearchivist/home/tests/test_views.py b/tubearchivist/home/tests/test_views.py deleted file mode 100644 index 0d3bd23f..00000000 --- a/tubearchivist/home/tests/test_views.py +++ /dev/null @@ -1,42 +0,0 @@ -"""All test classes""" - -from django.test import TestCase - - -class URLTests(TestCase): - """test if all expected URL are there""" - - def test_home_view(self): - """check homepage""" - response = self.client.get("/") - self.assertEqual(response.status_code, 200) - - def test_about_view(self): - """check about page""" - response = self.client.get("/about/") - self.assertEqual(response.status_code, 200) - - def test_downloads_view(self): - """check downloads page""" - response = self.client.get("/downloads/") - self.assertEqual(response.status_code, 200) - - def test_channel_view(self): - """check channel page""" - response = self.client.get("/channel/") - self.assertEqual(response.status_code, 200) - - def test_settings_view(self): - """check settings page""" - response = self.client.get("/settings/") - self.assertEqual(response.status_code, 200) - - def test_progress_view(self): - """check ajax progress endpoint""" - response = self.client.get("/downloads/progress/") - self.assertEqual(response.status_code, 200) - - def test_process_view(self): - """check process ajax endpoint""" - response = self.client.get("/process/") - self.assertEqual(response.status_code, 200) From c9285ce3a11db01b2b79ef5656f54bc841c57dd0 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 13:06:40 +0200 Subject: [PATCH 06/29] bump requests --- tubearchivist/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index be4b3e94..843d6d46 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -7,8 +7,7 @@ django-cors-headers==4.3.1 djangorestframework==3.15.1 Pillow==10.3.0 redis==5.0.4 -requests==2.31.0 +requests==2.32.1 ryd-client==0.0.6 uWSGI==2.0.25.1 whitenoise==6.6.0 -yt-dlp @ git+https://github.com/bbilly1/yt-dlp@54b823be28f396608349cca69d52eb4c4b72b8b0 From d6b5521c1187587c6a90eb91cf554eaf21041431 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 16:22:59 +0200 Subject: [PATCH 07/29] fix naming --- tubearchivist/home/tests/test_ta/test_urlparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/tests/test_ta/test_urlparser.py b/tubearchivist/home/tests/test_ta/test_urlparser.py index 21e5a31c..a75f5ab3 100644 --- a/tubearchivist/home/tests/test_ta/test_urlparser.py +++ b/tubearchivist/home/tests/test_ta/test_urlparser.py @@ -121,7 +121,7 @@ INVALID_IDS_ERRORS = [ @pytest.mark.parametrize("invalid_value", INVALID_IDS_ERRORS) -def test_utility_class_init_raises_value_error(invalid_value): +def test_invalid_ids(invalid_value): """test for invalid IDs""" with pytest.raises(ValueError, match="not a valid id_str"): parser = Parser(invalid_value) From 819d16629620dbe319f8dccd49dceb29565f2dc5 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 16:35:43 +0200 Subject: [PATCH 08/29] update actions version --- .github/workflows/lint_python.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 2b2c0930..11f7f8bf 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -4,11 +4,13 @@ jobs: lint_python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - python-version: '3.10' - - run: pip install --upgrade pip wheel - - run: pip install bandit black codespell flake8 flake8-bugbear - flake8-comprehensions isort requests - - run: ./deploy.sh validate + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r tubearchivist/requirements-dev.txt + - name: Run Linter + run: ./deploy.sh validate From ca1864843aa55fac56e5d28255d40a155fae71e2 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 16:41:37 +0200 Subject: [PATCH 09/29] fix install repo dependencies --- .github/workflows/lint_python.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 11f7f8bf..6f0d34e5 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -8,7 +8,11 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Install dependencies + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y libldap2-dev libsasl2-dev libssl-dev + - name: Install python dependencies run: | python -m pip install --upgrade pip pip install -r tubearchivist/requirements-dev.txt From 91f9351502b33274a354969258babba15ffc3751 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 16:51:06 +0200 Subject: [PATCH 10/29] limit action run on path glob --- .github/workflows/lint_js.yml | 14 ++++++++++---- .github/workflows/lint_python.yml | 10 +++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/workflows/lint_js.yml b/.github/workflows/lint_js.yml index 73038422..de2650b3 100644 --- a/.github/workflows/lint_js.yml +++ b/.github/workflows/lint_js.yml @@ -1,16 +1,22 @@ name: lint_js -on: [pull_request, push] +on: + push: + paths: + - '**/*.js' + pull_request: + paths: + - '**/*.js' jobs: check: name: lint_js runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 with: - node-version: '16' + node-version: '22' - run: npm ci - run: npm run lint - run: npm run format -- --check diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 6f0d34e5..ee410697 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -1,5 +1,13 @@ name: lint_python -on: [pull_request, push] + +on: + push: + paths: + - '**/*.py' + pull_request: + paths: + - '**/*.py' + jobs: lint_python: runs-on: ubuntu-latest From 873e7d1e8dfee0269e78db0528fde2c3adb7a622 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 17:41:08 +0200 Subject: [PATCH 11/29] add gcc to runner --- .github/workflows/lint_python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index ee410697..2084c520 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -19,7 +19,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y libldap2-dev libsasl2-dev libssl-dev + sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev - name: Install python dependencies run: | python -m pip install --upgrade pip From f9eff28da788819c7ef8294795555010529d9d6d Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 19:05:49 +0200 Subject: [PATCH 12/29] replace deprecated STATICFILES_STORAGE --- tubearchivist/config/settings.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index b81e3b49..8ec78889 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -243,7 +243,11 @@ USE_TZ = True STATIC_URL = "/static/" STATICFILES_DIRS = (str(BASE_DIR.joinpath("static")),) STATIC_ROOT = str(BASE_DIR.joinpath("staticfiles")) -STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage" +STORAGES = { + "staticfiles": { + "BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage", + }, +} # Default primary key field type # https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field From e2253fc63b94c83e8c5db255ce138ae0dce06152 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 19:08:29 +0200 Subject: [PATCH 13/29] switch to official yt-dlp commit --- tubearchivist/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index 843d6d46..ac8782cb 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -11,3 +11,4 @@ requests==2.32.1 ryd-client==0.0.6 uWSGI==2.0.25.1 whitenoise==6.6.0 +yt-dlp @ git+https://github.com/yt-dlp/yt-dlp@8e15177b4113c355989881e4e030f695a9b59c3a From 8a2040aa265e05c381406747e54e8931168a9b35 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 19:08:45 +0200 Subject: [PATCH 14/29] ignore pytest cache --- deploy.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy.sh b/deploy.sh index a3620bb0..6087b03f 100755 --- a/deploy.sh +++ b/deploy.sh @@ -50,6 +50,7 @@ function sync_test { --exclude ".gitignore" \ --exclude "**/cache" \ --exclude "**/__pycache__/" \ + --exclude "**/.pytest_cache/" \ --exclude ".venv" \ --exclude "db.sqlite3" \ --exclude ".mypy_cache" \ From cfa0cabd41dc1cd07285cf3313859a37659d8008 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 19:10:22 +0200 Subject: [PATCH 15/29] add unit tests workflow --- .github/workflows/unit_tests.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/unit_tests.yml diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml new file mode 100644 index 00000000..692398a0 --- /dev/null +++ b/.github/workflows/unit_tests.yml @@ -0,0 +1,30 @@ +name: python_unit_tests + +on: + push: + paths: + - '**/*.py' + pull_request: + paths: + - '**/*.py' + +jobs: + unit-tests: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r tubearchivist/requirements-dev.txt + + - name: Run unit tests + run: pytest tubearchivist From 30c20b41d9efa82989897c24910047e73e849024 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 20:14:40 +0200 Subject: [PATCH 16/29] test and better handle invalid domains --- tubearchivist/api/views.py | 14 +++++++------- tubearchivist/home/src/ta/urlparser.py | 4 ++++ .../home/tests/test_ta/test_urlparser.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 1daca9dc..6009568c 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -688,14 +688,14 @@ class DownloadApiListView(ApiBaseView): pending = [i["youtube_id"] for i in to_add if i["status"] == "pending"] url_str = " ".join(pending) - try: - youtube_ids = Parser(url_str).parse() - except ValueError: - message = f"failed to parse: {url_str}" - print(message) - return Response({"message": message}, status=400) + # try: + # youtube_ids = Parser(url_str).parse() + # except ValueError: + # message = f"failed to parse: {url_str}" + # print(message) + # return Response({"message": message}, status=400) - extrac_dl.delay(youtube_ids, auto_start=auto_start) + extrac_dl.delay(url_str, auto_start=auto_start) return Response(data) diff --git a/tubearchivist/home/src/ta/urlparser.py b/tubearchivist/home/src/ta/urlparser.py index 743af988..924961ac 100644 --- a/tubearchivist/home/src/ta/urlparser.py +++ b/tubearchivist/home/src/ta/urlparser.py @@ -42,6 +42,10 @@ class Parser: youtube_id = parsed.path.strip("/") return self._validate_expected(youtube_id, "video") + if "youtube.com" not in parsed.netloc: + message = f"invalid domain: {parsed.netloc}" + raise ValueError(message) + query_parsed = parse_qs(parsed.query) if "v" in query_parsed: # video from v query str diff --git a/tubearchivist/home/tests/test_ta/test_urlparser.py b/tubearchivist/home/tests/test_ta/test_urlparser.py index a75f5ab3..f30f1e6b 100644 --- a/tubearchivist/home/tests/test_ta/test_urlparser.py +++ b/tubearchivist/home/tests/test_ta/test_urlparser.py @@ -117,6 +117,7 @@ def test_passing_parse(url_str, expected_result): INVALID_IDS_ERRORS = [ "aaaaa", "https://www.youtube.com/playlist?list=AAAA", + "https://www.youtube.com/channel/UC9-y-6csu5WGm29I7Jiwpn", ] @@ -126,3 +127,17 @@ def test_invalid_ids(invalid_value): with pytest.raises(ValueError, match="not a valid id_str"): parser = Parser(invalid_value) parser.parse() + + +INVALID_DOMAINS = [ + "https://vimeo.com/32001208", + "https://peertube.tv/w/8RiJE2j2nw569FVgPNjDt7", +] + + +@pytest.mark.parametrize("invalid_value", INVALID_DOMAINS) +def test_invalid_domains(invalid_value): + """raise error on none YT domains""" + parser = Parser(invalid_value) + with pytest.raises(ValueError, match="invalid domain"): + parser.parse() From d3103ccc300d54024f9f0bc583fe5ef6a1fd737a Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 20:17:47 +0200 Subject: [PATCH 17/29] install missing system dependencies --- .github/workflows/unit_tests.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 692398a0..331aea27 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -21,6 +21,11 @@ jobs: with: python-version: '3.11' + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev + - name: Install dependencies run: | python -m pip install --upgrade pip From d7818871a441face93b23da173fdc722ed4eaeed Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 20:18:27 +0200 Subject: [PATCH 18/29] clean up --- tubearchivist/api/views.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 6009568c..898c721d 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -688,13 +688,6 @@ class DownloadApiListView(ApiBaseView): pending = [i["youtube_id"] for i in to_add if i["status"] == "pending"] url_str = " ".join(pending) - # try: - # youtube_ids = Parser(url_str).parse() - # except ValueError: - # message = f"failed to parse: {url_str}" - # print(message) - # return Response({"message": message}, status=400) - extrac_dl.delay(url_str, auto_start=auto_start) return Response(data) From 40beb35f3497387b9da51736a117c5af114d1c6b Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 20:54:04 +0200 Subject: [PATCH 19/29] fix typo, better date parsing error --- tubearchivist/api/src/search_processor.py | 12 ++++++------ tubearchivist/home/src/ta/helper.py | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index 9f77b05e..a7891c96 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -7,7 +7,7 @@ Functionality: import urllib.parse from home.src.download.thumbnails import ThumbManager -from home.src.ta.helper import date_praser, get_duration_str +from home.src.ta.helper import date_parser, get_duration_str from home.src.ta.settings import EnvironmentSettings @@ -67,7 +67,7 @@ class SearchProcess: """run on single channel""" channel_id = channel_dict["channel_id"] art_base = f"/cache/channels/{channel_id}" - date_str = date_praser(channel_dict["channel_last_refresh"]) + date_str = date_parser(channel_dict["channel_last_refresh"]) channel_dict.update( { "channel_last_refresh": date_str, @@ -83,8 +83,8 @@ class SearchProcess: """run on single video dict""" video_id = video_dict["youtube_id"] media_url = urllib.parse.quote(video_dict["media_url"]) - vid_last_refresh = date_praser(video_dict["vid_last_refresh"]) - published = date_praser(video_dict["published"]) + vid_last_refresh = date_parser(video_dict["vid_last_refresh"]) + published = date_parser(video_dict["published"]) vid_thumb_url = ThumbManager(video_id).vid_thumb_path() channel = self._process_channel(video_dict["channel"]) @@ -109,7 +109,7 @@ class SearchProcess: def _process_playlist(playlist_dict): """run on single playlist dict""" playlist_id = playlist_dict["playlist_id"] - playlist_last_refresh = date_praser( + playlist_last_refresh = date_parser( playlist_dict["playlist_last_refresh"] ) playlist_dict.update( @@ -125,7 +125,7 @@ class SearchProcess: """run on single download item""" video_id = download_dict["youtube_id"] vid_thumb_url = ThumbManager(video_id).vid_thumb_path() - published = date_praser(download_dict["published"]) + published = date_parser(download_dict["published"]) download_dict.update( { diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index 05e1ef0e..ea2eb999 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -93,12 +93,14 @@ def requests_headers() -> dict[str, str]: return {"User-Agent": template} -def date_praser(timestamp: int | str) -> str: +def date_parser(timestamp: int | str) -> str: """return formatted date string""" if isinstance(timestamp, int): date_obj = datetime.fromtimestamp(timestamp) elif isinstance(timestamp, str): date_obj = datetime.strptime(timestamp, "%Y-%m-%d") + else: + raise TypeError(f"invalid timestamp: {timestamp}") return date_obj.date().isoformat() From 7111fedd5cf2d654f3f283de4516c387b7d0b2b6 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 20:55:36 +0200 Subject: [PATCH 20/29] fix redirect with is_shorts --- tubearchivist/home/src/ta/helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index ea2eb999..88c6b50d 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -140,8 +140,9 @@ def get_mapping() -> dict: def is_shorts(youtube_id: str) -> bool: """check if youtube_id is a shorts video, bot not it it's not a shorts""" shorts_url = f"https://www.youtube.com/shorts/{youtube_id}" + cookies = {"SOCS": "CAI"} response = requests.head( - shorts_url, headers=requests_headers(), timeout=10 + shorts_url, cookies=cookies, headers=requests_headers(), timeout=10 ) return response.status_code == 200 From 241e76c17d3c39c7e7cd22c61718c656233752f2 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 21:08:24 +0200 Subject: [PATCH 21/29] cleane get_duration_str without zfill on first part --- tubearchivist/home/src/ta/helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index 88c6b50d..767b4a00 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -186,6 +186,8 @@ def get_duration_str(seconds: int) -> str: unit_count, seconds = divmod(seconds, unit_seconds) duration_parts.append(f"{unit_count:02}{unit_label}") + duration_parts[0] = duration_parts[0].lstrip("0") + return " ".join(duration_parts) From a1e3512bab4d87dc5e614aeb4973e5329079afed Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 21:10:21 +0200 Subject: [PATCH 22/29] add test_helper --- .../home/tests/test_ta/test_helper.py | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 tubearchivist/home/tests/test_ta/test_helper.py diff --git a/tubearchivist/home/tests/test_ta/test_helper.py b/tubearchivist/home/tests/test_ta/test_helper.py new file mode 100644 index 00000000..23035643 --- /dev/null +++ b/tubearchivist/home/tests/test_ta/test_helper.py @@ -0,0 +1,105 @@ +"""tests for helper functions""" + +import pytest +from home.src.ta.helper import ( + date_parser, + get_duration_str, + is_shorts, + randomizor, + time_parser, +) + + +def test_randomizor_with_positive_length(): + """test randomizer""" + length = 10 + result = randomizor(length) + assert len(result) == length + assert result.isalnum() + + +def test_date_parser_with_int(): + """unix timestamp""" + timestamp = 1621539600 # May 21, 2021 + expected_date = "2021-05-20" + assert date_parser(timestamp) == expected_date + + +def test_date_parser_with_str(): + """iso timestamp""" + date_str = "2021-05-21" + expected_date = "2021-05-21" + assert date_parser(date_str) == expected_date + + +def test_date_parser_with_invalid_input(): + """invalid type""" + invalid_input = [1621539600] + with pytest.raises(TypeError): + date_parser(invalid_input) + + +def test_date_parser_with_invalid_string_format(): + """invalid date string""" + invalid_date_str = "21/05/2021" # Invalid format + with pytest.raises(ValueError): + date_parser(invalid_date_str) + + +def test_time_parser_with_numeric_string(): + """as number""" + timestamp = "100" + expected_seconds = 100 + assert time_parser(timestamp) == expected_seconds + + +def test_time_parser_with_hh_mm_ss_format(): + """to seconds""" + timestamp = "01:00:00" + expected_seconds = 3600.0 + assert time_parser(timestamp) == expected_seconds + + +def test_time_parser_with_empty_string(): + """handle empty""" + timestamp = "" + assert time_parser(timestamp) is False + + +def test_time_parser_with_invalid_format(): + """not enough to unpack""" + timestamp = "01:00" + with pytest.raises(ValueError): + time_parser(timestamp) + + +def test_time_parser_with_non_numeric_input(): + """non numeric""" + timestamp = "1a:00:00" + with pytest.raises(ValueError): + time_parser(timestamp) + + +def test_is_shorts(): + """is shorts id""" + youtube_id = "YG3-Pw3rixU" + assert is_shorts(youtube_id) + + +def test_is_not_shorts(): + """is not shorts id""" + youtube_id = "Ogr9kbypSNg" + assert is_shorts(youtube_id) is False + + +def test_get_duration_str(): + """only seconds""" + assert get_duration_str(None) == "NA" + assert get_duration_str(5) == "5s" + assert get_duration_str(10) == "10s" + assert get_duration_str(500) == "8m 20s" + assert get_duration_str(1000) == "16m 40s" + assert get_duration_str(5000) == "1h 23m 20s" + assert get_duration_str(500000) == "5d 18h 53m 20s" + assert get_duration_str(5000000) == "57d 20h 53m 20s" + assert get_duration_str(50000000) == "1y 213d 16h 53m 20s" From ed3183b42c98c280966621e0fd99fe5407328cb2 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 21:17:45 +0200 Subject: [PATCH 23/29] remove comments --- tubearchivist/home/tests/test_ta/test_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/home/tests/test_ta/test_helper.py b/tubearchivist/home/tests/test_ta/test_helper.py index 23035643..109f4e56 100644 --- a/tubearchivist/home/tests/test_ta/test_helper.py +++ b/tubearchivist/home/tests/test_ta/test_helper.py @@ -20,7 +20,7 @@ def test_randomizor_with_positive_length(): def test_date_parser_with_int(): """unix timestamp""" - timestamp = 1621539600 # May 21, 2021 + timestamp = 1621539600 expected_date = "2021-05-20" assert date_parser(timestamp) == expected_date @@ -41,7 +41,7 @@ def test_date_parser_with_invalid_input(): def test_date_parser_with_invalid_string_format(): """invalid date string""" - invalid_date_str = "21/05/2021" # Invalid format + invalid_date_str = "21/05/2021" with pytest.raises(ValueError): date_parser(invalid_date_str) From 7a265231bc4afd497a17f2944f607c1cb7ed977d Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 21:26:09 +0200 Subject: [PATCH 24/29] cache pip --- .github/workflows/lint_python.yml | 7 +++++++ .github/workflows/unit_tests.yml | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 2084c520..e8996dd1 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -16,6 +16,13 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.11' + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- - name: Install system dependencies run: | sudo apt-get update diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 331aea27..e2964fc0 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -21,6 +21,14 @@ jobs: with: python-version: '3.11' + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install system dependencies run: | sudo apt-get update From 200f5ed5a2aaaa3da8a975f2226525b4b9b161ca Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 21:54:20 +0200 Subject: [PATCH 25/29] add apt caching, better structuring --- .github/workflows/lint_python.yml | 29 +++++++++++++++++++++++------ .github/workflows/unit_tests.yml | 20 +++++++++++++++----- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index e8996dd1..79819ec7 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -12,10 +12,29 @@ jobs: lint_python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + + - name: Cache apt packages + uses: actions/cache@v4 + with: + path: | + /var/cache/apt + /var/lib/apt + key: ${{ runner.os }}-apt-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-apt- + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev + + - name: Set up Python + uses: actions/setup-python@v5 with: python-version: '3.11' + - name: Cache pip uses: actions/cache@v4 with: @@ -23,13 +42,11 @@ jobs: key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev + - name: Install python dependencies run: | python -m pip install --upgrade pip pip install -r tubearchivist/requirements-dev.txt + - name: Run Linter run: ./deploy.sh validate diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index e2964fc0..8a36a562 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -16,6 +16,21 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache apt packages + uses: actions/cache@v4 + with: + path: | + /var/cache/apt + /var/lib/apt + key: ${{ runner.os }}-apt-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-apt- + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev + - name: Set up Python uses: actions/setup-python@v5 with: @@ -29,11 +44,6 @@ jobs: restore-keys: | ${{ runner.os }}-pip- - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev - - name: Install dependencies run: | python -m pip install --upgrade pip From a201d43bfdac29ce6e051cea1f9f2dbdeb7d8cdc Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 22:15:48 +0200 Subject: [PATCH 26/29] chdir for tests, test reading mapping --- tubearchivist/home/tests/conftest.py | 11 +++++++++++ tubearchivist/home/tests/test_ta/test_helper.py | 8 ++++++++ 2 files changed, 19 insertions(+) create mode 100644 tubearchivist/home/tests/conftest.py diff --git a/tubearchivist/home/tests/conftest.py b/tubearchivist/home/tests/conftest.py new file mode 100644 index 00000000..5e5cb7f4 --- /dev/null +++ b/tubearchivist/home/tests/conftest.py @@ -0,0 +1,11 @@ +"""test configs""" + +import os + +import pytest + + +@pytest.fixture(scope="session", autouse=True) +def change_test_dir(request): + """change directory to project folder""" + os.chdir(request.config.rootdir / "tubearchivist") diff --git a/tubearchivist/home/tests/test_ta/test_helper.py b/tubearchivist/home/tests/test_ta/test_helper.py index 109f4e56..7c1c4bbf 100644 --- a/tubearchivist/home/tests/test_ta/test_helper.py +++ b/tubearchivist/home/tests/test_ta/test_helper.py @@ -4,6 +4,7 @@ import pytest from home.src.ta.helper import ( date_parser, get_duration_str, + get_mapping, is_shorts, randomizor, time_parser, @@ -80,6 +81,13 @@ def test_time_parser_with_non_numeric_input(): time_parser(timestamp) +def test_get_mapping(): + """test mappint""" + index_config = get_mapping() + assert isinstance(index_config, list) + assert all(isinstance(i, dict) for i in index_config) + + def test_is_shorts(): """is shorts id""" youtube_id = "YG3-Pw3rixU" From 87e3814751393e0943279bf6f689dee11a63dfed Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 22:20:47 +0200 Subject: [PATCH 27/29] fix locked files caching --- .github/workflows/lint_python.yml | 5 +++++ .github/workflows/unit_tests.yml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 79819ec7..b8b552ec 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -24,6 +24,11 @@ jobs: key: ${{ runner.os }}-apt-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-apt- + before-save: | + sudo rm -f /var/cache/apt/archives/lock + sudo rm -f /var/lib/apt/lists/lock + sudo rm -rf /var/cache/apt/archives/partial + sudo rm -rf /var/lib/apt/lists/partial - name: Install system dependencies run: | diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 8a36a562..bb75ac30 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -25,6 +25,11 @@ jobs: key: ${{ runner.os }}-apt-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-apt- + before-save: | + sudo rm -f /var/cache/apt/archives/lock + sudo rm -f /var/lib/apt/lists/lock + sudo rm -rf /var/cache/apt/archives/partial + sudo rm -rf /var/lib/apt/lists/partial - name: Install system dependencies run: | From 9fe0aa85d0f61b838a8062efc313e6f0e5f494ea Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 22:23:07 +0200 Subject: [PATCH 28/29] add invalid watch url --- tubearchivist/home/tests/test_ta/test_urlparser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubearchivist/home/tests/test_ta/test_urlparser.py b/tubearchivist/home/tests/test_ta/test_urlparser.py index f30f1e6b..6d4ab2f4 100644 --- a/tubearchivist/home/tests/test_ta/test_urlparser.py +++ b/tubearchivist/home/tests/test_ta/test_urlparser.py @@ -118,6 +118,7 @@ INVALID_IDS_ERRORS = [ "aaaaa", "https://www.youtube.com/playlist?list=AAAA", "https://www.youtube.com/channel/UC9-y-6csu5WGm29I7Jiwpn", + "https://www.youtube.com/watch?v=CK3_zarXkw", ] From d4165327df93d2f02406fea504df5ddad3be36fc Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 21 May 2024 22:29:55 +0200 Subject: [PATCH 29/29] remove apt cache --- .github/workflows/lint_python.yml | 15 --------------- .github/workflows/unit_tests.yml | 15 --------------- 2 files changed, 30 deletions(-) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index b8b552ec..0aae9597 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -15,21 +15,6 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: Cache apt packages - uses: actions/cache@v4 - with: - path: | - /var/cache/apt - /var/lib/apt - key: ${{ runner.os }}-apt-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-apt- - before-save: | - sudo rm -f /var/cache/apt/archives/lock - sudo rm -f /var/lib/apt/lists/lock - sudo rm -rf /var/cache/apt/archives/partial - sudo rm -rf /var/lib/apt/lists/partial - - name: Install system dependencies run: | sudo apt-get update diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index bb75ac30..33f3bd5f 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -16,21 +16,6 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: Cache apt packages - uses: actions/cache@v4 - with: - path: | - /var/cache/apt - /var/lib/apt - key: ${{ runner.os }}-apt-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-apt- - before-save: | - sudo rm -f /var/cache/apt/archives/lock - sudo rm -f /var/lib/apt/lists/lock - sudo rm -rf /var/cache/apt/archives/partial - sudo rm -rf /var/lib/apt/lists/partial - - name: Install system dependencies run: | sudo apt-get update