improved health checks for tile cache server

This commit is contained in:
Jenny Danzmayr 2023-12-23 23:31:38 +01:00
parent 59e02a90af
commit 18462e9ea7
2 changed files with 29 additions and 4 deletions

View file

@ -77,7 +77,7 @@ ENV C3NAV_DEBUG="" \
USER c3nav USER c3nav
WORKDIR /app WORKDIR /app
EXPOSE 8000 5000 EXPOSE 8000 5000
HEALTHCHECK --start-period=10s --interval=10s --timeout=1s CMD curl -f http://localhost:8000/check || exit 1 HEALTHCHECK --start-period=10s --interval=10s --timeout=1s CMD curl -f http://localhost:8000/health/ready || exit 1
CMD ["/app/env/bin/uwsgi", "--master", \ CMD ["/app/env/bin/uwsgi", "--master", \
"--wsgi", "c3nav.tileserver.wsgi", \ "--wsgi", "c3nav.tileserver.wsgi", \
"--pythonpath", "/app/src", \ "--pythonpath", "/app/src", \

View file

@ -120,6 +120,7 @@ class TileServer:
if self.cache_package is not None: if self.cache_package is not None:
logger.debug('Not modified.') logger.debug('Not modified.')
cache['cache_package_filename'] = self.cache_package_filename cache['cache_package_filename'] = self.cache_package_filename
cache.set('cache_package_last_successful_check', time.time())
return True return True
logger.error('Unexpected not modified.') logger.error('Unexpected not modified.')
return False return False
@ -147,6 +148,7 @@ class TileServer:
with open(self.cache_package_filename, 'wb') as f: with open(self.cache_package_filename, 'wb') as f:
pickle.dump(self.cache_package, f) pickle.dump(self.cache_package, f)
cache.set('cache_package_filename', self.cache_package_filename) cache.set('cache_package_filename', self.cache_package_filename)
cache.set('cache_package_last_successful_check', time.time())
except Exception as e: except Exception as e:
self.cache_package_etag = None self.cache_package_etag = None
logger.error('Saving pickled package failed: %s' % e) logger.error('Saving pickled package failed: %s' % e)
@ -173,7 +175,7 @@ class TileServer:
('ETag', etag)]) ('ETag', etag)])
return [data] return [data]
def check_response(self, start_response): def liveness_check_response(self, start_response):
self.get_cache_package() self.get_cache_package()
text = b'OK' text = b'OK'
start_response('200 OK', [self.get_date_header(), start_response('200 OK', [self.get_date_header(),
@ -181,6 +183,26 @@ class TileServer:
('Content-Length', str(len(text)))]) ('Content-Length', str(len(text)))])
return [text] return [text]
def readiness_check_response(self, start_response):
text = b'OK'
error = False
try:
last_check = self.cache.get('cache_package_last_successful_check')
except pylibmc.Error as e:
error = True
text = b'memcached error'
else:
if last_check is None or last_check <= (time.time() - self.reload_interval * 3):
error = True
if last_check:
text = f'last successful cache package check was {time.time() - last_check}s ago.'.encode('utf-8')
else:
text = b'last successful cache package check is unknown'
start_response(('500' if error else '200') + ' OK', [self.get_date_header(),
('Content-Type', 'text/plain'),
('Content-Length', str(len(text)))])
return [text]
def get_cache_package(self): def get_cache_package(self):
try: try:
cache_package_filename = self.cache.get('cache_package_filename') cache_package_filename = self.cache.get('cache_package_filename')
@ -207,8 +229,11 @@ class TileServer:
def __call__(self, env, start_response): def __call__(self, env, start_response):
path_info = env['PATH_INFO'] path_info = env['PATH_INFO']
if path_info == '/check': if path_info == '/health' or path_info == '/health/live':
return self.check_response(start_response) return self.liveness_check_response(start_response)
if path_info == '/health/ready':
return self.readiness_check_response(start_response)
match = self.path_regex.match(path_info) match = self.path_regex.match(path_info)
if match is None: if match is None: