gnocchixyz · leo-naeka · Oct 4, 2018 · jd · Oct 8, 2018 · jd
diff --git a/gnocchi/incoming/__init__.py b/gnocchi/incoming/__init__.py
@@ -257,6 +257,10 @@ def finish_sack_processing(sack):
         """Mark sack processing has finished."""
         pass
 
+    @staticmethod
+    def get_health_status():
+        raise exceptions.NotImplementedError
+
 
 @utils.retry_on_exception_and_log("Unable to initialize incoming driver")
 def get_driver(conf):

diff --git a/gnocchi/incoming/ceph.py b/gnocchi/incoming/ceph.py
@@ -22,6 +22,7 @@
 import six
 
 from gnocchi.common import ceph
+from gnocchi.status import get_ceph_health_status
 from gnocchi import incoming
 
 rados = ceph.rados
@@ -232,3 +233,6 @@ def process_measures_for_sack(self, sack):
                 self.ioctx.remove_omap_keys(op, tuple(processed_keys))
                 self.ioctx.operate_write_op(op, str(sack),
                                             flags=self.OMAP_WRITE_FLAGS)
+
+    def get_health_status(self):
+        return get_ceph_health_status(self)
diff --git a/gnocchi/incoming/file.py b/gnocchi/incoming/file.py
@@ -24,6 +24,7 @@
 import numpy
 import six
 
+from gnocchi.status import get_file_health_status
 from gnocchi import incoming
 from gnocchi import utils
 
@@ -206,3 +207,6 @@ def process_measures_for_sack(self, sack):
 
         for metric_id, files in six.iteritems(processed_files):
             self._delete_measures_files_for_metric(metric_id, files)
+
+    def get_health_status(self):
+        return get_file_health_status(self)
diff --git a/gnocchi/incoming/redis.py b/gnocchi/incoming/redis.py
@@ -20,6 +20,7 @@
 import six
 
 from gnocchi.common import redis
+from gnocchi.status import get_redis_health_status
 from gnocchi import incoming
 
 
@@ -193,3 +194,6 @@ def finish_sack_processing(self, sack):
         # Delete the sack key which handles no data but is used to get a SET
         # notification in iter_on_sacks_to_process
         self._client.delete(str(sack))
+
+    def get_health_status(self):
+        return get_redis_health_status(self)
diff --git a/gnocchi/incoming/s3.py b/gnocchi/incoming/s3.py
@@ -23,6 +23,7 @@
 import numpy
 
 from gnocchi.common import s3
+from gnocchi.status import get_s3_health_status
 from gnocchi import incoming
 
 boto3 = s3.boto3
@@ -188,3 +189,6 @@ def process_measures_for_sack(self, sack):
 
         # Now clean objects
         s3.bulk_delete(self.s3, self._bucket_name_measures, files)
+
+    def get_health_status(self):
+        return get_s3_health_status(self)
diff --git a/gnocchi/incoming/swift.py b/gnocchi/incoming/swift.py
@@ -21,6 +21,7 @@
 import six
 
 from gnocchi.common import swift
+from gnocchi.status import get_swift_health_status
 from gnocchi import incoming
 from gnocchi import utils
 
@@ -144,3 +145,6 @@ def process_measures_for_sack(self, sack):
         yield measures
 
         swift.bulk_delete(self.swift, sack_name, files)
+
+    def get_health_status(self):
+        return get_swift_health_status(self)
diff --git a/gnocchi/indexer/__init__.py b/gnocchi/indexer/__init__.py
@@ -471,3 +471,7 @@ def get_resource_attributes_schemas():
     @staticmethod
     def get_resource_type_schema():
         raise exceptions.NotImplementedError
+
+    @staticmethod
+    def get_health_status():
+        raise exceptions.NotImplementedError
diff --git a/gnocchi/indexer/sqlalchemy.py b/gnocchi/indexer/sqlalchemy.py
@@ -49,6 +49,7 @@
 from gnocchi import indexer
 from gnocchi.indexer import sqlalchemy_base as base
 from gnocchi.indexer import sqlalchemy_types as types
+from gnocchi.status import get_sqlalchemy_health_status
 from gnocchi import resource_type
 from gnocchi import utils
 
@@ -1174,6 +1175,9 @@ def _build_sort_keys(sorts, unique_keys):
 
         return sort_keys, sort_dirs
 
+    def get_health_status(self):
+        return get_sqlalchemy_health_status(self)
+
 
 def _operator_in(field_name, value):
     # Do not generate empty IN comparison

diff --git a/gnocchi/rest/api.py b/gnocchi/rest/api.py
@@ -2087,36 +2087,61 @@ class StatusController(rest.RestController):
     @pecan.expose('json')
     def get(details=True):
         enforce("get status", {})
-        try:
-            members_req = pecan.request.coordinator.get_members(
-                metricd.MetricProcessor.GROUP_ID)
-        except tooz.NotImplemented:
-            members_req = None
+
+        # Add status for incoming/indexer/storage
+        response_data = {
+            'incoming': {
+                'status': pecan.request.incoming.get_health_status(),
+            },
+            'indexer': {
+                'status': pecan.request.indexer.get_health_status(),
+            },
+            'storage': {
+                'status': pecan.request.storage.get_health_status(),
+            }
+        }
+
+        # Always return the detail, but set status code to 503
+        # if a component is not available
+        pecan.response.status = 200 if all([
+            component['status']['is_available']
+            for component in response_data.values()]) else 503
+
+        # Add storage measures to process
         try:
             report = pecan.request.incoming.measures_report(
-                strtobool("details", details))
+                strtobool('details', details))
         except incoming.ReportGenerationError:
-            abort(503, 'Unable to generate status. Please retry.')
-        report_dict = {"storage": {"summary": report['summary']}}
+            abort(503, "Unable to generate status. Please retry.")
+        response_data['storage']['summary'] = report['summary']
         if 'details' in report:
-            report_dict["storage"]["measures_to_process"] = report['details']
-        report_dict['metricd'] = {}
-        if members_req:
+            response_data['storage']['measures_to_process'] = report['details']
+
+        # Add metricd status
+        try:
+            members_req = pecan.request.coordinator.get_members(
+                metricd.MetricProcessor.GROUP_ID)
+        except tooz.NotImplemented:
+            response_data['metricd'] = {
+                'processors': None,
+                'statistics': {}
+            }
+        else:
             members = members_req.get()
             caps = [
                 pecan.request.coordinator.get_member_capabilities(
                     metricd.MetricProcessor.GROUP_ID, member)
                 for member in members
             ]
-            report_dict['metricd']['processors'] = members
-            report_dict['metricd']['statistics'] = {
-                member: cap.get()
-                for member, cap in six.moves.zip(members, caps)
+            response_data['metricd'] = {
+                'processors': members,
+                'statistics': {
+                    member: cap.get()
+                    for member, cap in six.moves.zip(members, caps)
+                }
             }
-        else:
-            report_dict['metricd']['processors'] = None
-            report_dict['metricd']['statistics'] = {}
-        return report_dict
+
+        return response_data
 
 
 class MetricsBatchController(object):

diff --git a/gnocchi/status.py b/gnocchi/status.py
@@ -0,0 +1,108 @@
+# -*- coding:Utf-8 -*-
+from __future__ import unicode_literals
+
+from collections import OrderedDict
+import six
+
+
+def get_ceph_health_status(driver):
+    """
+    Return ceph status.
+    Include ceph stats.
+    """
+    response = OrderedDict([
+        ('name', driver.__class__.__name__)
+    ])
+    try:
+        stats = driver.rados.get_cluster_stats()
+    except Exception as e:
+        response['is_available'] = False
+        response['error'] = six.text_type(e)
+    else:
+        response['is_available'] = True
+        response['stats'] = stats
+    return response
+
+
+def get_file_health_status(driver):
+    """
+    Return file status.
+    """
+    return OrderedDict([
+        ('name', driver.__class__.__name__),
+        ('is_available', True)
+    ])
+
+
+def get_redis_health_status(driver):
+    """
+    Return redis status.
+    Include redis info.
+    """
+    response = OrderedDict([
+        ('name', driver.__class__.__name__)
+    ])
+    try:
+        info = driver._client.info()
+    except Exception as e:
+        response['is_available'] = False
+        response['error'] = six.text_type(e)
+    else:
+        response['is_available'] = True
+        response['info'] = info
+    return response
+
+
+def get_s3_health_status(driver):
+    """
+    Return s3 status.
+    """
+    response = OrderedDict([
+        ('name', driver.__class__.__name__)
+    ])
+    try:
+        driver.s3.list_objects_v2(
+            Bucket=driver._bucket_name_measures, Prefix='/')
+    except Exception as e:
+        response['is_available'] = False
+        response['error'] = six.text_type(e)
+    else:
+        response['is_available'] = True
+    return response
+
+
+def get_sqlalchemy_health_status(driver):
+    """
+    Return sqlalchemy status.
+    """
+    response = OrderedDict([
+        ('name', driver.__class__.__name__)
+    ])
+    try:
+        with driver.facade.independent_reader() as session:
+            session.execute('SELECT 1')
+    except Exception as e:
+        response['is_available'] = False
+        response['error'] = six.text_type(e)
+    else:
+        response['is_available'] = True
+    return response
+
+
+def get_swift_health_status(driver):
+    """
+    Return swift status.
+    Include swift account info.
+    """
+    response = OrderedDict([
+        ('name', driver.__class__.__name__)
+    ])
+    try:
+        info = driver.swift.head_account()
+    except Exception as e:
+        response['is_available'] = False
+        response['error'] = six.text_type(e)
+    else:
+        response['is_available'] = True
+        response['info'] = info
+    return response
diff --git a/gnocchi/storage/__init__.py b/gnocchi/storage/__init__.py
@@ -699,3 +699,7 @@ def _map_compute_splits_operations(bound_timeserie):
         with self.statistics.time("raw measures store"):
             self._store_unaggregated_timeseries(new_boundts)
         self.statistics["raw measures store"] += len(new_boundts)
+
+    @staticmethod
+    def get_health_status():
+        raise NotImplementedError
diff --git a/gnocchi/storage/ceph.py b/gnocchi/storage/ceph.py
@@ -21,6 +21,7 @@
 
 from gnocchi import carbonara
 from gnocchi.common import ceph
+from gnocchi.status import get_ceph_health_status
 from gnocchi import storage
 from gnocchi import utils
 
@@ -232,3 +233,6 @@ def _get_object_content(self, name):
             content += data
             offset += len(data)
         return content
+
+    def get_health_status(self):
+        return get_ceph_health_status(self)
diff --git a/gnocchi/storage/file.py b/gnocchi/storage/file.py
@@ -28,6 +28,7 @@
 from oslo_config import cfg
 import six
 
+from gnocchi.status import get_file_health_status
 from gnocchi import carbonara
 from gnocchi import storage
 from gnocchi import utils
@@ -247,3 +248,6 @@ def _get_splits_unbatched(self, metric, key, aggregation, version=3):
             if e.errno == errno.ENOENT:
                 return
             raise
+
+    def get_health_status(self):
+        return get_file_health_status(self)
diff --git a/gnocchi/storage/redis.py b/gnocchi/storage/redis.py
@@ -19,6 +19,7 @@
 
 from gnocchi import carbonara
 from gnocchi.common import redis
+from gnocchi.status import get_redis_health_status
 from gnocchi import storage
 from gnocchi import utils
 
@@ -193,3 +194,6 @@ def _get_splits(self, metrics_aggregations_keys, version=3):
             results[metric][aggregation] = result
 
         return results
+
+    def get_health_status(self):
+        return get_redis_health_status(self)
diff --git a/gnocchi/storage/s3.py b/gnocchi/storage/s3.py
@@ -20,6 +20,7 @@
 
 from gnocchi import carbonara
 from gnocchi.common import s3
+from gnocchi.status import get_s3_health_status
 from gnocchi import storage
 from gnocchi import utils
 
@@ -242,3 +243,6 @@ def _store_unaggregated_timeseries_unbatched(
             Bucket=self._bucket_name,
             Key=self._build_unaggregated_timeserie_path(metric, version),
             Body=data)
+
+    def get_health_status(self):
+        return get_s3_health_status(self)
diff --git a/gnocchi/storage/swift.py b/gnocchi/storage/swift.py
@@ -21,6 +21,7 @@
 
 from gnocchi import carbonara
 from gnocchi.common import swift
+from gnocchi.status import get_swift_health_status
 from gnocchi import storage
 from gnocchi import utils
 
@@ -221,3 +222,6 @@ def _store_unaggregated_timeseries_unbatched(
             self._container_name(metric),
             self._build_unaggregated_timeserie_path(version),
             data)
+
+    def get_health_status(self):
+        return get_swift_health_status(self)