1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  package org.apache.hadoop.hbase.util;
19  
20  import java.lang.management.GarbageCollectorMXBean;
21  import java.lang.management.ManagementFactory;
22  import java.util.List;
23  import java.util.Map;
24  import java.util.Set;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.metrics.JvmPauseMonitorSource;
30  import org.apache.hadoop.conf.Configuration;
31  
32  import com.google.common.base.Joiner;
33  import com.google.common.base.Preconditions;
34  import com.google.common.base.Stopwatch;
35  import com.google.common.collect.Lists;
36  import com.google.common.collect.Maps;
37  import com.google.common.collect.Sets;
38  
39  
40  
41  
42  
43  
44  
45  
46  
47  
48  
49  
50  
51  @InterfaceAudience.Private
52  public class JvmPauseMonitor {
53    private static final Log LOG = LogFactory.getLog(JvmPauseMonitor.class);
54  
55    
56    private static final long SLEEP_INTERVAL_MS = 500;
57    
58    
59    private final long warnThresholdMs;
60    public static final String WARN_THRESHOLD_KEY =
61        "jvm.pause.warn-threshold.ms";
62    private static final long WARN_THRESHOLD_DEFAULT = 10000;
63    
64    
65    private final long infoThresholdMs;
66    public static final String INFO_THRESHOLD_KEY =
67        "jvm.pause.info-threshold.ms";
68    private static final long INFO_THRESHOLD_DEFAULT = 1000;
69  
70    private Thread monitorThread;
71    private volatile boolean shouldRun = true;
72    private JvmPauseMonitorSource metricsSource;
73  
74    public JvmPauseMonitor(Configuration conf) {
75      this(conf, null);
76    }
77  
78    public JvmPauseMonitor(Configuration conf, JvmPauseMonitorSource metricsSource) {
79      this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, WARN_THRESHOLD_DEFAULT);
80      this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, INFO_THRESHOLD_DEFAULT);
81      this.metricsSource = metricsSource;
82    }
83    
84    public void start() {
85      Preconditions.checkState(monitorThread == null, "Already started");
86      monitorThread = new Thread(new Monitor());
87      monitorThread.setDaemon(true);
88      monitorThread.setName("JvmPauseMonitor");
89      monitorThread.start();
90    }
91  
92    public void stop() {
93      shouldRun = false;
94      monitorThread.interrupt();
95      try {
96        monitorThread.join();
97      } catch (InterruptedException e) {
98        Thread.currentThread().interrupt();
99      }
100   }
101   
102   private String formatMessage(long extraSleepTime, List<String> gcDiffs) {
103     String ret = "Detected pause in JVM or host machine (eg GC): " + "pause of approximately "
104         + extraSleepTime + "ms\n";
105     if (gcDiffs.isEmpty()) {
106       ret += "No GCs detected";
107     } else {
108       ret += Joiner.on("\n").join(gcDiffs);
109     }
110     return ret;
111   }
112   
113   private Map<String, GcTimes> getGcTimes() {
114     Map<String, GcTimes> map = Maps.newHashMap();
115     List<GarbageCollectorMXBean> gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
116     for (GarbageCollectorMXBean gcBean : gcBeans) {
117       map.put(gcBean.getName(), new GcTimes(gcBean));
118     }
119     return map;
120   }
121 
122   private static class GcTimes {
123     private GcTimes(GarbageCollectorMXBean gcBean) {
124       gcCount = gcBean.getCollectionCount();
125       gcTimeMillis = gcBean.getCollectionTime();
126     }
127 
128     private GcTimes(long count, long time) {
129       this.gcCount = count;
130       this.gcTimeMillis = time;
131     }
132 
133     private GcTimes subtract(GcTimes other) {
134       return new GcTimes(this.gcCount - other.gcCount, this.gcTimeMillis - other.gcTimeMillis);
135     }
136 
137     @Override
138     public String toString() {
139       return "count=" + gcCount + " time=" + gcTimeMillis + "ms";
140     }
141 
142     private long gcCount;
143     private long gcTimeMillis;
144   }
145 
146   private class Monitor implements Runnable {
147     @Override
148     public void run() {
149       Stopwatch sw = new Stopwatch();
150       Map<String, GcTimes> gcTimesBeforeSleep = getGcTimes();
151       while (shouldRun) {
152         sw.reset().start();
153         try {
154           Thread.sleep(SLEEP_INTERVAL_MS);
155         } catch (InterruptedException ie) {
156           return;
157         }
158 
159         long extraSleepTime = sw.elapsedMillis() - SLEEP_INTERVAL_MS;
160         Map<String, GcTimes> gcTimesAfterSleep = getGcTimes();
161 
162         if (extraSleepTime > infoThresholdMs) {
163           Set<String> gcBeanNames = Sets.intersection(gcTimesAfterSleep.keySet(),
164             gcTimesBeforeSleep.keySet());
165           List<String> gcDiffs = Lists.newArrayList();
166           for (String name : gcBeanNames) {
167             GcTimes diff = gcTimesAfterSleep.get(name).subtract(gcTimesBeforeSleep.get(name));
168             if (diff.gcCount != 0) {
169               gcDiffs.add("GC pool '" + name + "' had collection(s): " + diff.toString());
170             }
171           }
172 
173           updateMetrics(extraSleepTime, !gcDiffs.isEmpty());
174 
175           if (extraSleepTime > warnThresholdMs) {
176             LOG.warn(formatMessage(extraSleepTime, gcDiffs));
177           } else {
178             LOG.info(formatMessage(extraSleepTime, gcDiffs));
179           }
180         }
181         gcTimesBeforeSleep = gcTimesAfterSleep;
182       }
183     }
184   }
185 
186   public void updateMetrics(long sleepTime, boolean gcDetected) {
187     if (metricsSource != null) {
188       if (sleepTime > warnThresholdMs) {
189         metricsSource.incWarnThresholdExceeded(1);
190       } else {
191         metricsSource.incInfoThresholdExceeded(1);
192       }
193       if (gcDetected) {
194         metricsSource.updatePauseTimeWithGc(sleepTime);
195       } else {
196         metricsSource.updatePauseTimeWithoutGc(sleepTime);
197       }
198     }
199   }
200 
201   public JvmPauseMonitorSource getMetricsSource() {
202     return metricsSource;
203   }
204 
205   public void setMetricsSource(JvmPauseMonitorSource metricsSource) {
206     this.metricsSource = metricsSource;
207   }
208 
209   
210 
211 
212 
213 
214 
215 
216   public static void main(String []args) throws Exception {
217     new JvmPauseMonitor(new Configuration()).start();
218     List<String> list = Lists.newArrayList();
219     int i = 0;
220     while (true) {
221       list.add(String.valueOf(i++));
222     }
223   }
224 }