1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  package org.apache.hadoop.hbase;
19  
20  import org.apache.commons.logging.Log;
21  import org.apache.commons.logging.LogFactory;
22  import org.apache.hadoop.conf.Configuration;
23  import org.apache.hadoop.hbase.HealthChecker.HealthCheckerExitStatus;
24  import org.apache.hadoop.util.StringUtils;
25  
26  
27  
28  
29  public class HealthCheckChore extends ScheduledChore {
30    private static Log LOG = LogFactory.getLog(HealthCheckChore.class);
31    private HealthChecker healthChecker;
32    private Configuration config;
33    private int threshold;
34    private int numTimesUnhealthy = 0;
35    private long failureWindow;
36    private long startWindow;
37  
38    public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) {
39      super("HealthChecker", stopper, sleepTime);
40      LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime));
41      this.config = conf;
42      String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC);
43      long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT,
44        HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT);
45      healthChecker = new HealthChecker();
46      healthChecker.init(healthCheckScript, scriptTimeout);
47      this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD,
48        HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD);
49      this.failureWindow = (long)this.threshold * (long)sleepTime;
50    }
51  
52    @Override
53    protected void chore() {
54      HealthReport report = healthChecker.checkHealth();
55      boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS);
56      if (!isHealthy) {
57        boolean needToStop = decideToStop();
58        if (needToStop) {
59          getStopper().stop(
60            "The  node reported unhealthy " + threshold + " number of times consecutively.");
61        }
62        
63        LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : "
64            + report.getHealthReport());
65      }
66    }
67  
68    private boolean decideToStop() {
69      boolean stop = false;
70      if (numTimesUnhealthy == 0) {
71        
72        
73        numTimesUnhealthy++;
74        startWindow = System.currentTimeMillis();
75      } else {
76        if ((System.currentTimeMillis() - startWindow) < failureWindow) {
77          numTimesUnhealthy++;
78          if (numTimesUnhealthy == threshold) {
79            stop = true;
80          } else {
81            stop = false;
82          }
83        } else {
84          
85          numTimesUnhealthy = 1;
86          startWindow = System.currentTimeMillis();
87          stop = false;
88        }
89      }
90      return stop;
91    }
92  
93  }