From b2bb7ea28e2d20a09e4c7daf94549213020ec7b8 Mon Sep 17 00:00:00 2001 From: Igor Rzegocki Date: Thu, 21 Sep 2023 22:23:44 +0200 Subject: [PATCH 1/4] Support wider spectrum of ElasticSearch configurations --- README.md | 2 + .../management/commands/ta_connection.py | 18 +++++++- tubearchivist/home/src/es/connect.py | 44 ++++++++++++++++--- tubearchivist/home/src/es/snapshot.py | 4 +- 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9bad431..954df49 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ Take a look at the example [docker-compose.yml](https://github.com/tubearchivist | TA_PORT | Overwrite Nginx port | Optional | | TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional | | ES_URL | URL That ElasticSearch runs on | Optional | +| ES_VERIFY_SSL | Verify ElasticSearch SSL certificate, everything other than `false` defaults to `true` | Optional | +| ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional | | HOST_GID | Allow TA to own the video files instead of container user | Optional | | HOST_UID | Allow TA to own the video files instead of container user | Optional | | ELASTIC_USER | Change the default ElasticSearch user | Optional | diff --git a/tubearchivist/config/management/commands/ta_connection.py b/tubearchivist/config/management/commands/ta_connection.py index 23cc14d..4e0f59a 100644 --- a/tubearchivist/config/management/commands/ta_connection.py +++ b/tubearchivist/config/management/commands/ta_connection.py @@ -3,6 +3,7 @@ Functionality: - check that all connections are working """ +from os import environ from time import sleep import requests @@ -132,7 +133,19 @@ class Command(BaseCommand): """check that path.repo var is set""" self.stdout.write("[5] check ES path.repo env var") response, _ = ElasticWrap("_nodes/_all/settings").get() + snaphost_roles = [ + "data", + "data_cold", + "data_content", + "data_frozen", + "data_hot", + "data_warm", + "master", + ] for node in response["nodes"].values(): + if not (set(node["roles"]) & set(snaphost_roles)): + continue + if node["settings"]["path"].get("repo"): self.stdout.write( self.style.SUCCESS(" ✓ path.repo env var is set") @@ -142,7 +155,10 @@ class Command(BaseCommand): message = ( " 🗙 path.repo env var not found. " + "set the following env var to the ES container:\n" - + " path.repo=/usr/share/elasticsearch/data/snapshot" + + " path.repo=" + + environ.get( + "ES_SNAPSHOT_DIR", "/usr/share/elasticsearch/data/snapshot" + ), ) self.stdout.write(self.style.ERROR(f"{message}")) sleep(60) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index b526cf4..7d3b79a 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -9,6 +9,7 @@ import json import os import requests +import urllib3 class ElasticWrap: @@ -19,19 +20,33 @@ class ElasticWrap: ES_URL: str = str(os.environ.get("ES_URL")) ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD")) ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic") + ES_VERIFY_SSL: str = str(os.environ.get("ES_VERIFY_SSL") or "true") def __init__(self, path): self.url = f"{self.ES_URL}/{path}" self.auth = (self.ES_USER, self.ES_PASS) + self.verify_ssl = self.ES_VERIFY_SSL != "false" + + if not self.verify_ssl: + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get(self, data=False, timeout=10, print_error=True): """get data from es""" if data: response = requests.get( - self.url, json=data, auth=self.auth, timeout=timeout + self.url, + json=data, + auth=self.auth, + timeout=timeout, + verify=self.verify_ssl, ) else: - response = requests.get(self.url, auth=self.auth, timeout=timeout) + response = requests.get( + self.url, + auth=self.auth, + timeout=timeout, + verify=self.verify_ssl, + ) if print_error and not response.ok: print(response.text) @@ -48,10 +63,19 @@ class ElasticWrap: if data: response = requests.post( - self.url, data=payload, headers=headers, auth=self.auth + self.url, + data=payload, + headers=headers, + auth=self.auth, + verify=self.verify_ssl, ) else: - response = requests.post(self.url, headers=headers, auth=self.auth) + response = requests.post( + self.url, + headers=headers, + auth=self.auth, + verify=self.verify_ssl, + ) if not response.ok: print(response.text) @@ -62,7 +86,9 @@ class ElasticWrap: """put data to es""" if refresh: self.url = f"{self.url}/?refresh=true" - response = requests.put(f"{self.url}", json=data, auth=self.auth) + response = requests.put( + f"{self.url}", json=data, auth=self.auth, verify=self.verify_ssl + ) if not response.ok: print(response.text) print(data) @@ -75,9 +101,13 @@ class ElasticWrap: if refresh: self.url = f"{self.url}/?refresh=true" if data: - response = requests.delete(self.url, json=data, auth=self.auth) + response = requests.delete( + self.url, json=data, auth=self.auth, verify=self.verify_ssl + ) else: - response = requests.delete(self.url, auth=self.auth) + response = requests.delete( + self.url, auth=self.auth, verify=self.verify_ssl + ) if not response.ok: print(response.text) diff --git a/tubearchivist/home/src/es/snapshot.py b/tubearchivist/home/src/es/snapshot.py index 6d6563c..15fc82c 100644 --- a/tubearchivist/home/src/es/snapshot.py +++ b/tubearchivist/home/src/es/snapshot.py @@ -19,7 +19,9 @@ class ElasticSnapshot: REPO_SETTINGS = { "compress": "true", "chunk_size": "1g", - "location": "/usr/share/elasticsearch/data/snapshot", + "location": environ.get( + "ES_SNAPSHOT_DIR", "/usr/share/elasticsearch/data/snapshot" + ), } POLICY = "ta_daily" From 892e81c185020e24402ffdb1ec21b524e0e8f7ec Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 20:35:14 +0700 Subject: [PATCH 2/4] refactor ElasticWrap dynamic kwargs --- tubearchivist/home/src/es/connect.py | 132 ++++++++++++++++----------- 1 file changed, 79 insertions(+), 53 deletions(-) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index 7d3b79a..afbdc61 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -7,6 +7,7 @@ functionality: import json import os +from typing import Any import requests import urllib3 @@ -20,75 +21,93 @@ class ElasticWrap: ES_URL: str = str(os.environ.get("ES_URL")) ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD")) ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic") - ES_VERIFY_SSL: str = str(os.environ.get("ES_VERIFY_SSL") or "true") + ES_DISABLE_VERIFY_SSL: bool = bool(os.environ.get("ES_DISABLE_VERIFY_SSL")) - def __init__(self, path): - self.url = f"{self.ES_URL}/{path}" - self.auth = (self.ES_USER, self.ES_PASS) - self.verify_ssl = self.ES_VERIFY_SSL != "false" + def __init__(self, path: str): + self.url: str = f"{self.ES_URL}/{path}" + self.auth: tuple[str, str] = (self.ES_USER, self.ES_PASS) - if not self.verify_ssl: + if not self.ES_DISABLE_VERIFY_SSL: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - def get(self, data=False, timeout=10, print_error=True): + def get( + self, + data: bool | dict = False, + timeout: int = 10, + print_error: bool = True, + ) -> tuple[dict, int]: """get data from es""" + + kwargs: dict[str, Any] = { + "auth": self.auth, + "timeout": timeout, + } + + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + if data: - response = requests.get( - self.url, - json=data, - auth=self.auth, - timeout=timeout, - verify=self.verify_ssl, - ) - else: - response = requests.get( - self.url, - auth=self.auth, - timeout=timeout, - verify=self.verify_ssl, - ) + kwargs["json"] = data + + response = requests.get(self.url, **kwargs) + if print_error and not response.ok: print(response.text) return response.json(), response.status_code - def post(self, data=False, ndjson=False): + def post( + self, data: bool | dict = False, ndjson: bool = False + ) -> tuple[dict, int]: """post data to es""" - if ndjson: - headers = {"Content-type": "application/x-ndjson"} - payload = data - else: - headers = {"Content-type": "application/json"} - payload = json.dumps(data) - if data: - response = requests.post( - self.url, - data=payload, - headers=headers, - auth=self.auth, - verify=self.verify_ssl, + kwargs: dict[str, Any] = {"auth": self.auth} + + if ndjson and data: + kwargs.update( + { + "headers": {"Content-type": "application/x-ndjson"}, + "data": data, + } ) - else: - response = requests.post( - self.url, - headers=headers, - auth=self.auth, - verify=self.verify_ssl, + elif data: + kwargs.update( + { + "headers": {"Content-type": "application/json"}, + "data": json.dumps(data), + } ) + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + + response = requests.post(self.url, **kwargs) + if not response.ok: print(response.text) return response.json(), response.status_code - def put(self, data, refresh=False): + def put( + self, + data: bool | dict = False, + refresh: bool = False, + ) -> tuple[dict, Any]: """put data to es""" + if refresh: self.url = f"{self.url}/?refresh=true" - response = requests.put( - f"{self.url}", json=data, auth=self.auth, verify=self.verify_ssl - ) + + kwargs: dict[str, Any] = { + "json": data, + "auth": self.auth, + } + + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + + response = requests.put(self.url, **kwargs) + if not response.ok: print(response.text) print(data) @@ -96,18 +115,25 @@ class ElasticWrap: return response.json(), response.status_code - def delete(self, data=False, refresh=False): + def delete( + self, + data: bool | dict = False, + refresh: bool = False, + ) -> tuple[dict, Any]: """delete document from es""" + if refresh: self.url = f"{self.url}/?refresh=true" + + kwargs: dict[str, Any] = {"auth": self.auth} + if data: - response = requests.delete( - self.url, json=data, auth=self.auth, verify=self.verify_ssl - ) - else: - response = requests.delete( - self.url, auth=self.auth, verify=self.verify_ssl - ) + kwargs["json"] = data + + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + + response = requests.delete(self.url, **kwargs) if not response.ok: print(response.text) From dcbd8d2a55570ba6fc46474eed243160407009e0 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 20:42:26 +0700 Subject: [PATCH 3/4] update ES_DISABLE_VERIFY_SSL readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 954df49..4756258 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Take a look at the example [docker-compose.yml](https://github.com/tubearchivist | TA_PORT | Overwrite Nginx port | Optional | | TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional | | ES_URL | URL That ElasticSearch runs on | Optional | -| ES_VERIFY_SSL | Verify ElasticSearch SSL certificate, everything other than `false` defaults to `true` | Optional | +| ES_DISABLE_VERIFY_SSL | Disable ElasticSearch SSL certificate verification | Optional | | ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional | | HOST_GID | Allow TA to own the video files instead of container user | Optional | | HOST_UID | Allow TA to own the video files instead of container user | Optional | From 6abec9401bfaf4ee4b773cb1e41aa943d5f9a59e Mon Sep 17 00:00:00 2001 From: Igor Rzegocki Date: Sat, 23 Sep 2023 15:00:46 +0200 Subject: [PATCH 4/4] Mute urlib3 ssl warnings when SSL verification is disabled --- tubearchivist/home/src/es/connect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index afbdc61..43e2f6e 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -27,7 +27,7 @@ class ElasticWrap: self.url: str = f"{self.ES_URL}/{path}" self.auth: tuple[str, str] = (self.ES_USER, self.ES_PASS) - if not self.ES_DISABLE_VERIFY_SSL: + if self.ES_DISABLE_VERIFY_SSL: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get(