From 18462e9ea721f095cc83e658275178a3acf4a7fd Mon Sep 17 00:00:00 2001 From: Jenny Danzmayr Date: Sat, 23 Dec 2023 23:31:38 +0100 Subject: [PATCH] improved health checks for tile cache server --- docker/tileserver.dockerfile | 2 +- src/c3nav/tileserver/wsgi.py | 31 ++++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/docker/tileserver.dockerfile b/docker/tileserver.dockerfile index a8149b70..8062240b 100644 --- a/docker/tileserver.dockerfile +++ b/docker/tileserver.dockerfile @@ -77,7 +77,7 @@ ENV C3NAV_DEBUG="" \ USER c3nav WORKDIR /app EXPOSE 8000 5000 -HEALTHCHECK --start-period=10s --interval=10s --timeout=1s CMD curl -f http://localhost:8000/check || exit 1 +HEALTHCHECK --start-period=10s --interval=10s --timeout=1s CMD curl -f http://localhost:8000/health/ready || exit 1 CMD ["/app/env/bin/uwsgi", "--master", \ "--wsgi", "c3nav.tileserver.wsgi", \ "--pythonpath", "/app/src", \ diff --git a/src/c3nav/tileserver/wsgi.py b/src/c3nav/tileserver/wsgi.py index 2de7fd2c..0aa247a1 100644 --- a/src/c3nav/tileserver/wsgi.py +++ b/src/c3nav/tileserver/wsgi.py @@ -120,6 +120,7 @@ class TileServer: if self.cache_package is not None: logger.debug('Not modified.') cache['cache_package_filename'] = self.cache_package_filename + cache.set('cache_package_last_successful_check', time.time()) return True logger.error('Unexpected not modified.') return False @@ -147,6 +148,7 @@ class TileServer: with open(self.cache_package_filename, 'wb') as f: pickle.dump(self.cache_package, f) cache.set('cache_package_filename', self.cache_package_filename) + cache.set('cache_package_last_successful_check', time.time()) except Exception as e: self.cache_package_etag = None logger.error('Saving pickled package failed: %s' % e) @@ -173,7 +175,7 @@ class TileServer: ('ETag', etag)]) return [data] - def check_response(self, start_response): + def liveness_check_response(self, start_response): self.get_cache_package() text = b'OK' start_response('200 OK', [self.get_date_header(), @@ -181,6 +183,26 @@ class TileServer: ('Content-Length', str(len(text)))]) return [text] + def readiness_check_response(self, start_response): + text = b'OK' + error = False + try: + last_check = self.cache.get('cache_package_last_successful_check') + except pylibmc.Error as e: + error = True + text = b'memcached error' + else: + if last_check is None or last_check <= (time.time() - self.reload_interval * 3): + error = True + if last_check: + text = f'last successful cache package check was {time.time() - last_check}s ago.'.encode('utf-8') + else: + text = b'last successful cache package check is unknown' + start_response(('500' if error else '200') + ' OK', [self.get_date_header(), + ('Content-Type', 'text/plain'), + ('Content-Length', str(len(text)))]) + return [text] + def get_cache_package(self): try: cache_package_filename = self.cache.get('cache_package_filename') @@ -207,8 +229,11 @@ class TileServer: def __call__(self, env, start_response): path_info = env['PATH_INFO'] - if path_info == '/check': - return self.check_response(start_response) + if path_info == '/health' or path_info == '/health/live': + return self.liveness_check_response(start_response) + + if path_info == '/health/ready': + return self.readiness_check_response(start_response) match = self.path_regex.match(path_info) if match is None: