Merge pull request #551 from ajgon/fix/elasticsearch-ha

Support elasticsearch clusters
This commit is contained in:
Simon 2023-09-25 12:28:04 +07:00 committed by GitHub
commit 0e767e2f84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 105 additions and 29 deletions

View File

@ -59,6 +59,8 @@ Take a look at the example [docker-compose.yml](https://github.com/tubearchivist
| TA_PORT | Overwrite Nginx port | Optional | | TA_PORT | Overwrite Nginx port | Optional |
| TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional | | TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional |
| ES_URL | URL That ElasticSearch runs on | Optional | | ES_URL | URL That ElasticSearch runs on | Optional |
| ES_DISABLE_VERIFY_SSL | Disable ElasticSearch SSL certificate verification | Optional |
| ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional |
| HOST_GID | Allow TA to own the video files instead of container user | Optional | | HOST_GID | Allow TA to own the video files instead of container user | Optional |
| HOST_UID | Allow TA to own the video files instead of container user | Optional | | HOST_UID | Allow TA to own the video files instead of container user | Optional |
| ELASTIC_USER | Change the default ElasticSearch user | Optional | | ELASTIC_USER | Change the default ElasticSearch user | Optional |

View File

@ -3,6 +3,7 @@ Functionality:
- check that all connections are working - check that all connections are working
""" """
from os import environ
from time import sleep from time import sleep
import requests import requests
@ -132,7 +133,19 @@ class Command(BaseCommand):
"""check that path.repo var is set""" """check that path.repo var is set"""
self.stdout.write("[5] check ES path.repo env var") self.stdout.write("[5] check ES path.repo env var")
response, _ = ElasticWrap("_nodes/_all/settings").get() response, _ = ElasticWrap("_nodes/_all/settings").get()
snaphost_roles = [
"data",
"data_cold",
"data_content",
"data_frozen",
"data_hot",
"data_warm",
"master",
]
for node in response["nodes"].values(): for node in response["nodes"].values():
if not (set(node["roles"]) & set(snaphost_roles)):
continue
if node["settings"]["path"].get("repo"): if node["settings"]["path"].get("repo"):
self.stdout.write( self.stdout.write(
self.style.SUCCESS(" ✓ path.repo env var is set") self.style.SUCCESS(" ✓ path.repo env var is set")
@ -142,7 +155,10 @@ class Command(BaseCommand):
message = ( message = (
" 🗙 path.repo env var not found. " " 🗙 path.repo env var not found. "
+ "set the following env var to the ES container:\n" + "set the following env var to the ES container:\n"
+ " path.repo=/usr/share/elasticsearch/data/snapshot" + " path.repo="
+ environ.get(
"ES_SNAPSHOT_DIR", "/usr/share/elasticsearch/data/snapshot"
),
) )
self.stdout.write(self.style.ERROR(f"{message}")) self.stdout.write(self.style.ERROR(f"{message}"))
sleep(60) sleep(60)

View File

@ -7,8 +7,10 @@ functionality:
import json import json
import os import os
from typing import Any
import requests import requests
import urllib3
class ElasticWrap: class ElasticWrap:
@ -19,50 +21,93 @@ class ElasticWrap:
ES_URL: str = str(os.environ.get("ES_URL")) ES_URL: str = str(os.environ.get("ES_URL"))
ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD")) ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD"))
ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic") ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic")
ES_DISABLE_VERIFY_SSL: bool = bool(os.environ.get("ES_DISABLE_VERIFY_SSL"))
def __init__(self, path): def __init__(self, path: str):
self.url = f"{self.ES_URL}/{path}" self.url: str = f"{self.ES_URL}/{path}"
self.auth = (self.ES_USER, self.ES_PASS) self.auth: tuple[str, str] = (self.ES_USER, self.ES_PASS)
def get(self, data=False, timeout=10, print_error=True): if self.ES_DISABLE_VERIFY_SSL:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def get(
self,
data: bool | dict = False,
timeout: int = 10,
print_error: bool = True,
) -> tuple[dict, int]:
"""get data from es""" """get data from es"""
kwargs: dict[str, Any] = {
"auth": self.auth,
"timeout": timeout,
}
if self.ES_DISABLE_VERIFY_SSL:
kwargs["verify"] = False
if data: if data:
response = requests.get( kwargs["json"] = data
self.url, json=data, auth=self.auth, timeout=timeout
) response = requests.get(self.url, **kwargs)
else:
response = requests.get(self.url, auth=self.auth, timeout=timeout)
if print_error and not response.ok: if print_error and not response.ok:
print(response.text) print(response.text)
return response.json(), response.status_code return response.json(), response.status_code
def post(self, data=False, ndjson=False): def post(
self, data: bool | dict = False, ndjson: bool = False
) -> tuple[dict, int]:
"""post data to es""" """post data to es"""
if ndjson:
headers = {"Content-type": "application/x-ndjson"}
payload = data
else:
headers = {"Content-type": "application/json"}
payload = json.dumps(data)
if data: kwargs: dict[str, Any] = {"auth": self.auth}
response = requests.post(
self.url, data=payload, headers=headers, auth=self.auth if ndjson and data:
kwargs.update(
{
"headers": {"Content-type": "application/x-ndjson"},
"data": data,
}
) )
else: elif data:
response = requests.post(self.url, headers=headers, auth=self.auth) kwargs.update(
{
"headers": {"Content-type": "application/json"},
"data": json.dumps(data),
}
)
if self.ES_DISABLE_VERIFY_SSL:
kwargs["verify"] = False
response = requests.post(self.url, **kwargs)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
return response.json(), response.status_code return response.json(), response.status_code
def put(self, data, refresh=False): def put(
self,
data: bool | dict = False,
refresh: bool = False,
) -> tuple[dict, Any]:
"""put data to es""" """put data to es"""
if refresh: if refresh:
self.url = f"{self.url}/?refresh=true" self.url = f"{self.url}/?refresh=true"
response = requests.put(f"{self.url}", json=data, auth=self.auth)
kwargs: dict[str, Any] = {
"json": data,
"auth": self.auth,
}
if self.ES_DISABLE_VERIFY_SSL:
kwargs["verify"] = False
response = requests.put(self.url, **kwargs)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
print(data) print(data)
@ -70,14 +115,25 @@ class ElasticWrap:
return response.json(), response.status_code return response.json(), response.status_code
def delete(self, data=False, refresh=False): def delete(
self,
data: bool | dict = False,
refresh: bool = False,
) -> tuple[dict, Any]:
"""delete document from es""" """delete document from es"""
if refresh: if refresh:
self.url = f"{self.url}/?refresh=true" self.url = f"{self.url}/?refresh=true"
kwargs: dict[str, Any] = {"auth": self.auth}
if data: if data:
response = requests.delete(self.url, json=data, auth=self.auth) kwargs["json"] = data
else:
response = requests.delete(self.url, auth=self.auth) if self.ES_DISABLE_VERIFY_SSL:
kwargs["verify"] = False
response = requests.delete(self.url, **kwargs)
if not response.ok: if not response.ok:
print(response.text) print(response.text)

View File

@ -19,7 +19,9 @@ class ElasticSnapshot:
REPO_SETTINGS = { REPO_SETTINGS = {
"compress": "true", "compress": "true",
"chunk_size": "1g", "chunk_size": "1g",
"location": "/usr/share/elasticsearch/data/snapshot", "location": environ.get(
"ES_SNAPSHOT_DIR", "/usr/share/elasticsearch/data/snapshot"
),
} }
POLICY = "ta_daily" POLICY = "ta_daily"