1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.hadoop.hbase.chaos.monkies;
20
21 import org.apache.hadoop.hbase.Stoppable;
22
23 /**
24 * A utility to injects faults in a running cluster.
25 * <p>
26 * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like
27 * - Select a random server to kill
28 * - Sleep for 5 sec
29 * - Start the server on the same host
30 * Actions can also be complex events, like rolling restart of all of the servers.
31 * <p>
32 * Policies on the other hand are responsible for executing the actions based on a strategy.
33 * The default policy is to execute a random action every minute based on predefined action
34 * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one
35 * policy can be active at any time.
36 * <p>
37 * Chaos monkey can be run from the command line, or can be invoked from integration tests.
38 * See {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use
39 * chaos monkey for code examples.
40 * <p>
41 * ChaosMonkey class is indeed inspired by the Netflix's same-named tool:
42 * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html
43 */
44 public abstract class ChaosMonkey implements Stoppable {
45 public abstract void start() throws Exception;
46
47 @Override
48 public abstract void stop(String why);
49
50 @Override
51 public abstract boolean isStopped();
52
53 public abstract void waitForStop() throws InterruptedException;
54
55 /**
56 * Returns whether the CM does destructive actions (killing servers) so that a cluster restore
57 * is needed after CM is stopped. Otherwise cluster will be left as it is
58 */
59 public abstract boolean isDestructive();
60 }