1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.zookeeper;
20
21 import java.util.List;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.HRegionInfo;
28 import org.apache.hadoop.hbase.RegionTransition;
29 import org.apache.hadoop.hbase.ServerName;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.executor.EventType;
32 import org.apache.zookeeper.AsyncCallback;
33 import org.apache.zookeeper.KeeperException;
34 import org.apache.zookeeper.KeeperException.Code;
35 import org.apache.zookeeper.data.Stat;
36
37 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be
38 // about zk and bytes only.
39
40 /**
41 * Utility class for doing region assignment in ZooKeeper. This class extends
42 * stuff done in {@link ZKUtil} to cover specific assignment operations.
43 * <p>
44 * Contains only static methods and constants.
45 * <p>
46 * Used by both the Master and RegionServer.
47 * <p>
48 * All valid transitions outlined below:
49 * <p>
50 * <b>MASTER</b>
51 * <ol>
52 * <li>
53 * Master creates an unassigned node as OFFLINE.
54 * - Cluster startup and table enabling.
55 * </li>
56 * <li>
57 * Master forces an existing unassigned node to OFFLINE.
58 * - RegionServer failure.
59 * - Allows transitions from all states to OFFLINE.
60 * </li>
61 * <li>
62 * Master deletes an unassigned node that was in a OPENED state.
63 * - Normal region transitions. Besides cluster startup, no other deletions
64 * of unassigned nodes is allowed.
65 * </li>
66 * <li>
67 * Master deletes all unassigned nodes regardless of state.
68 * - Cluster startup before any assignment happens.
69 * </li>
70 * </ol>
71 * <p>
72 * <b>REGIONSERVER</b>
73 * <ol>
74 * <li>
75 * RegionServer creates an unassigned node as CLOSING.
76 * - All region closes will do this in response to a CLOSE RPC from Master.
77 * - A node can never be transitioned to CLOSING, only created.
78 * </li>
79 * <li>
80 * RegionServer transitions an unassigned node from CLOSING to CLOSED.
81 * - Normal region closes. CAS operation.
82 * </li>
83 * <li>
84 * RegionServer transitions an unassigned node from OFFLINE to OPENING.
85 * - All region opens will do this in response to an OPEN RPC from the Master.
86 * - Normal region opens. CAS operation.
87 * </li>
88 * <li>
89 * RegionServer transitions an unassigned node from OPENING to OPENED.
90 * - Normal region opens. CAS operation.
91 * </li>
92 * </ol>
93 */
94 @InterfaceAudience.Private
95 public class ZKAssign {
96 private static final Log LOG = LogFactory.getLog(ZKAssign.class);
97
98 /**
99 * Gets the full path node name for the unassigned node for the specified
100 * region.
101 * @param zkw zk reference
102 * @param regionName region name
103 * @return full path node name
104 */
105 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
106 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
107 }
108
109 /**
110 * Gets the region name from the full path node name of an unassigned node.
111 * @param path full zk path
112 * @return region name
113 */
114 public static String getRegionName(ZooKeeperWatcher zkw, String path) {
115 return path.substring(zkw.assignmentZNode.length()+1);
116 }
117
118 // Master methods
119
120 /**
121 * Creates a new unassigned node in the OFFLINE state for the specified region.
122 *
123 * <p>Does not transition nodes from other states. If a node already exists
124 * for this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException}
125 * will be thrown.
126 *
127 * <p>Sets a watcher on the unassigned region node if the method is successful.
128 *
129 * <p>This method should only be used during cluster startup and the enabling
130 * of a table.
131 *
132 * @param zkw zk reference
133 * @param region region to be created as offline
134 * @param serverName server transition will happen on
135 * @throws KeeperException if unexpected zookeeper exception
136 * @throws KeeperException.NodeExistsException if node already exists
137 */
138 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
139 ServerName serverName)
140 throws KeeperException, KeeperException.NodeExistsException {
141 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
142 }
143
144 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
145 ServerName serverName, final EventType event)
146 throws KeeperException, KeeperException.NodeExistsException {
147 LOG.debug(zkw.prefix("Creating unassigned node " +
148 region.getEncodedName() + " in OFFLINE state"));
149 RegionTransition rt =
150 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
151 String node = getNodeName(zkw, region.getEncodedName());
152 ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
153 }
154
155 /**
156 * Creates an unassigned node in the OFFLINE state for the specified region.
157 * <p>
158 * Runs asynchronously. Depends on no pre-existing znode.
159 *
160 * <p>Sets a watcher on the unassigned region node.
161 *
162 * @param zkw zk reference
163 * @param region region to be created as offline
164 * @param serverName server transition will happen on
165 * @param cb
166 * @param ctx
167 * @throws KeeperException if unexpected zookeeper exception
168 * @throws KeeperException.NodeExistsException if node already exists
169 */
170 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
171 HRegionInfo region, ServerName serverName,
172 final AsyncCallback.StringCallback cb, final Object ctx)
173 throws KeeperException {
174 LOG.debug(zkw.prefix("Async create of unassigned node " +
175 region.getEncodedName() + " with OFFLINE state"));
176 RegionTransition rt =
177 RegionTransition.createRegionTransition(
178 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
179 String node = getNodeName(zkw, region.getEncodedName());
180 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
181 }
182
183 /**
184 * Creates or force updates an unassigned node to the OFFLINE state for the
185 * specified region.
186 * <p>
187 * Attempts to create the node but if it exists will force it to transition to
188 * and OFFLINE state.
189 *
190 * <p>Sets a watcher on the unassigned region node if the method is
191 * successful.
192 *
193 * <p>This method should be used when assigning a region.
194 *
195 * @param zkw zk reference
196 * @param region region to be created as offline
197 * @param serverName server transition will happen on
198 * @return the version of the znode created in OFFLINE state, -1 if
199 * unsuccessful.
200 * @throws KeeperException if unexpected zookeeper exception
201 * @throws KeeperException.NodeExistsException if node already exists
202 */
203 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
204 HRegionInfo region, ServerName serverName) throws KeeperException {
205 LOG.debug(zkw.prefix("Creating (or updating) unassigned node " +
206 region.getEncodedName() + " with OFFLINE state"));
207 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
208 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
209 byte [] data = rt.toByteArray();
210 String node = getNodeName(zkw, region.getEncodedName());
211 zkw.sync(node);
212 int version = ZKUtil.checkExists(zkw, node);
213 if (version == -1) {
214 return ZKUtil.createAndWatch(zkw, node, data);
215 } else {
216 boolean setData = false;
217 try {
218 setData = ZKUtil.setData(zkw, node, data, version);
219 // Setdata throws KeeperException which aborts the Master. So we are
220 // catching it here.
221 // If just before setting the znode to OFFLINE if the RS has made any
222 // change to the
223 // znode state then we need to return -1.
224 } catch (KeeperException kpe) {
225 LOG.info("Version mismatch while setting the node to OFFLINE state.");
226 return -1;
227 }
228 if (!setData) {
229 return -1;
230 } else {
231 // We successfully forced to OFFLINE, reset watch and handle if
232 // the state changed in between our set and the watch
233 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
234 rt = getRegionTransition(bytes);
235 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
236 // state changed, need to process
237 return -1;
238 }
239 }
240 }
241 return version + 1;
242 }
243
244 /**
245 * Deletes an existing unassigned node that is in the OPENED state for the
246 * specified region.
247 *
248 * <p>If a node does not already exist for this region, a
249 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
250 *
251 * <p>No watcher is set whether this succeeds or not.
252 *
253 * <p>Returns false if the node was not in the proper state but did exist.
254 *
255 * <p>This method is used during normal region transitions when a region
256 * finishes successfully opening. This is the Master acknowledging completion
257 * of the specified regions transition.
258 *
259 * @param zkw zk reference
260 * @param encodedRegionName opened region to be deleted from zk
261 * @param sn the expected region transition target server name
262 * @throws KeeperException if unexpected zookeeper exception
263 * @throws KeeperException.NoNodeException if node does not exist
264 */
265 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
266 String encodedRegionName, ServerName sn)
267 throws KeeperException, KeeperException.NoNodeException {
268 return deleteNode(zkw, encodedRegionName,
269 EventType.RS_ZK_REGION_OPENED, sn);
270 }
271
272 /**
273 * Deletes an existing unassigned node that is in the OFFLINE state for the
274 * specified region.
275 *
276 * <p>If a node does not already exist for this region, a
277 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
278 *
279 * <p>No watcher is set whether this succeeds or not.
280 *
281 * <p>Returns false if the node was not in the proper state but did exist.
282 *
283 * <p>This method is used during master failover when the regions on an RS
284 * that has died are all set to OFFLINE before being processed.
285 *
286 * @param zkw zk reference
287 * @param encodedRegionName closed region to be deleted from zk
288 * @param sn the expected region transition target server name
289 * @throws KeeperException if unexpected zookeeper exception
290 * @throws KeeperException.NoNodeException if node does not exist
291 */
292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
293 String encodedRegionName, ServerName sn)
294 throws KeeperException, KeeperException.NoNodeException {
295 return deleteNode(zkw, encodedRegionName,
296 EventType.M_ZK_REGION_OFFLINE, sn);
297 }
298
299 /**
300 * Deletes an existing unassigned node that is in the CLOSED state for the
301 * specified region.
302 *
303 * <p>If a node does not already exist for this region, a
304 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
305 *
306 * <p>No watcher is set whether this succeeds or not.
307 *
308 * <p>Returns false if the node was not in the proper state but did exist.
309 *
310 * <p>This method is used during table disables when a region finishes
311 * successfully closing. This is the Master acknowledging completion
312 * of the specified regions transition to being closed.
313 *
314 * @param zkw zk reference
315 * @param encodedRegionName closed region to be deleted from zk
316 * @param sn the expected region transition target server name
317 * @throws KeeperException if unexpected zookeeper exception
318 * @throws KeeperException.NoNodeException if node does not exist
319 */
320 public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
321 String encodedRegionName, ServerName sn)
322 throws KeeperException, KeeperException.NoNodeException {
323 return deleteNode(zkw, encodedRegionName,
324 EventType.RS_ZK_REGION_CLOSED, sn);
325 }
326
327 /**
328 * Deletes an existing unassigned node that is in the CLOSING state for the
329 * specified region.
330 *
331 * <p>If a node does not already exist for this region, a
332 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
333 *
334 * <p>No watcher is set whether this succeeds or not.
335 *
336 * <p>Returns false if the node was not in the proper state but did exist.
337 *
338 * <p>This method is used during table disables when a region finishes
339 * successfully closing. This is the Master acknowledging completion
340 * of the specified regions transition to being closed.
341 *
342 * @param zkw zk reference
343 * @param region closing region to be deleted from zk
344 * @param sn the expected region transition target server name
345 * @throws KeeperException if unexpected zookeeper exception
346 * @throws KeeperException.NoNodeException if node does not exist
347 */
348 public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
349 HRegionInfo region, ServerName sn)
350 throws KeeperException, KeeperException.NoNodeException {
351 String encodedRegionName = region.getEncodedName();
352 return deleteNode(zkw, encodedRegionName,
353 EventType.M_ZK_REGION_CLOSING, sn);
354 }
355
356 /**
357 * Deletes an existing unassigned node that is in the specified state for the
358 * specified region.
359 *
360 * <p>If a node does not already exist for this region, a
361 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
362 *
363 * <p>No watcher is set whether this succeeds or not.
364 *
365 * <p>Returns false if the node was not in the proper state but did exist.
366 *
367 * <p>This method is used when a region finishes opening/closing.
368 * The Master acknowledges completion
369 * of the specified regions transition to being closed/opened.
370 *
371 * @param zkw zk reference
372 * @param encodedRegionName region to be deleted from zk
373 * @param expectedState state region must be in for delete to complete
374 * @param sn the expected region transition target server name
375 * @throws KeeperException if unexpected zookeeper exception
376 * @throws KeeperException.NoNodeException if node does not exist
377 */
378 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
379 EventType expectedState, ServerName sn)
380 throws KeeperException, KeeperException.NoNodeException {
381 return deleteNode(zkw, encodedRegionName, expectedState, sn, -1);
382 }
383
384 /**
385 * Deletes an existing unassigned node that is in the specified state for the
386 * specified region.
387 *
388 * <p>If a node does not already exist for this region, a
389 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
390 *
391 * <p>No watcher is set whether this succeeds or not.
392 *
393 * <p>Returns false if the node was not in the proper state but did exist.
394 *
395 * <p>This method is used when a region finishes opening/closing.
396 * The Master acknowledges completion
397 * of the specified regions transition to being closed/opened.
398 *
399 * @param zkw zk reference
400 * @param encodedRegionName region to be deleted from zk
401 * @param expectedState state region must be in for delete to complete
402 * @param expectedVersion of the znode that is to be deleted.
403 * If expectedVersion need not be compared while deleting the znode
404 * pass -1
405 * @throws KeeperException if unexpected zookeeper exception
406 * @throws KeeperException.NoNodeException if node does not exist
407 */
408 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
409 EventType expectedState, int expectedVersion)
410 throws KeeperException, KeeperException.NoNodeException {
411 return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion);
412 }
413
414 /**
415 * Deletes an existing unassigned node that is in the specified state for the
416 * specified region.
417 *
418 * <p>If a node does not already exist for this region, a
419 * {@link org.apache.zookeeper.KeeperException.NoNodeException} will be thrown.
420 *
421 * <p>No watcher is set whether this succeeds or not.
422 *
423 * <p>Returns false if the node was not in the proper state but did exist.
424 *
425 * <p>This method is used when a region finishes opening/closing.
426 * The Master acknowledges completion
427 * of the specified regions transition to being closed/opened.
428 *
429 * @param zkw zk reference
430 * @param encodedRegionName region to be deleted from zk
431 * @param expectedState state region must be in for delete to complete
432 * @param serverName the expected region transition target server name
433 * @param expectedVersion of the znode that is to be deleted.
434 * If expectedVersion need not be compared while deleting the znode
435 * pass -1
436 * @throws KeeperException if unexpected zookeeper exception
437 * @throws KeeperException.NoNodeException if node does not exist
438 */
439 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
440 EventType expectedState, ServerName serverName, int expectedVersion)
441 throws KeeperException, KeeperException.NoNodeException {
442 if (LOG.isTraceEnabled()) {
443 LOG.trace(zkw.prefix("Deleting existing unassigned " +
444 "node " + encodedRegionName + " in expected state " + expectedState));
445 }
446 String node = getNodeName(zkw, encodedRegionName);
447 zkw.sync(node);
448 Stat stat = new Stat();
449 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
450 if (bytes == null) {
451 // If it came back null, node does not exist.
452 throw KeeperException.create(Code.NONODE);
453 }
454 RegionTransition rt = getRegionTransition(bytes);
455 EventType et = rt.getEventType();
456 if (!et.equals(expectedState)) {
457 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
458 expectedState + " state but node is in " + et + " state"));
459 return false;
460 }
461 // Verify the server transition happens on is not changed
462 if (serverName != null && !rt.getServerName().equals(serverName)) {
463 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName
464 + " with target " + serverName + " but node has " + rt.getServerName()));
465 return false;
466 }
467 if (expectedVersion != -1
468 && stat.getVersion() != expectedVersion) {
469 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
470 " the expected one. Got a version mismatch");
471 return false;
472 }
473 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
474 LOG.warn(zkw.prefix("Attempting to delete " +
475 "unassigned node " + encodedRegionName + " in " + expectedState +
476 " state but after verifying state, we got a version mismatch"));
477 return false;
478 }
479 LOG.debug(zkw.prefix("Deleted unassigned node " +
480 encodedRegionName + " in expected state " + expectedState));
481 return true;
482 }
483
484 /**
485 * Deletes all unassigned nodes regardless of their state.
486 *
487 * <p>No watchers are set.
488 *
489 * <p>This method is used by the Master during cluster startup to clear out
490 * any existing state from other cluster runs.
491 *
492 * @param zkw zk reference
493 * @throws KeeperException if unexpected zookeeper exception
494 */
495 public static void deleteAllNodes(ZooKeeperWatcher zkw)
496 throws KeeperException {
497 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
498 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
499 }
500
501 /**
502 * Creates a new unassigned node in the CLOSING state for the specified
503 * region.
504 *
505 * <p>Does not transition nodes from any states. If a node already exists
506 * for this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException}
507 * will be thrown.
508 *
509 * <p>If creation is successful, returns the version number of the CLOSING
510 * node created.
511 *
512 * <p>Set a watch.
513 *
514 * <p>This method should only be used by a Master when initiating a
515 * close of a region before sending a close request to the region server.
516 *
517 * @param zkw zk reference
518 * @param region region to be created as closing
519 * @param serverName server transition will happen on
520 * @return version of node after transition, -1 if unsuccessful transition
521 * @throws KeeperException if unexpected zookeeper exception
522 * @throws KeeperException.NodeExistsException if node already exists
523 */
524 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
525 ServerName serverName)
526 throws KeeperException, KeeperException.NodeExistsException {
527 LOG.debug(zkw.prefix("Creating unassigned node " +
528 region.getEncodedName() + " in a CLOSING state"));
529 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
530 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
531 String node = getNodeName(zkw, region.getEncodedName());
532 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
533 }
534
535 // RegionServer methods
536
537 /**
538 * Transitions an existing unassigned node for the specified region which is
539 * currently in the CLOSING state to be in the CLOSED state.
540 *
541 * <p>Does not transition nodes from other states. If for some reason the
542 * node could not be transitioned, the method returns -1. If the transition
543 * is successful, the version of the node after transition is returned.
544 *
545 * <p>This method can fail and return false for three different reasons:
546 * <ul><li>Unassigned node for this region does not exist</li>
547 * <li>Unassigned node for this region is not in CLOSING state</li>
548 * <li>After verifying CLOSING state, update fails because of wrong version
549 * (someone else already transitioned the node)</li>
550 * </ul>
551 *
552 * <p>Does not set any watches.
553 *
554 * <p>This method should only be used by a RegionServer when initiating a
555 * close of a region after receiving a CLOSE RPC from the Master.
556 *
557 * @param zkw zk reference
558 * @param region region to be transitioned to closed
559 * @param serverName server transition happens on
560 * @return version of node after transition, -1 if unsuccessful transition
561 * @throws KeeperException if unexpected zookeeper exception
562 */
563 public static int transitionNodeClosed(ZooKeeperWatcher zkw,
564 HRegionInfo region, ServerName serverName, int expectedVersion)
565 throws KeeperException {
566 return transitionNode(zkw, region, serverName,
567 EventType.M_ZK_REGION_CLOSING,
568 EventType.RS_ZK_REGION_CLOSED, expectedVersion);
569 }
570
571 /**
572 * Transitions an existing unassigned node for the specified region which is
573 * currently in the OFFLINE state to be in the OPENING state.
574 *
575 * <p>Does not transition nodes from other states. If for some reason the
576 * node could not be transitioned, the method returns -1. If the transition
577 * is successful, the version of the node written as OPENING is returned.
578 *
579 * <p>This method can fail and return -1 for three different reasons:
580 * <ul><li>Unassigned node for this region does not exist</li>
581 * <li>Unassigned node for this region is not in OFFLINE state</li>
582 * <li>After verifying OFFLINE state, update fails because of wrong version
583 * (someone else already transitioned the node)</li>
584 * </ul>
585 *
586 * <p>Does not set any watches.
587 *
588 * <p>This method should only be used by a RegionServer when initiating an
589 * open of a region after receiving an OPEN RPC from the Master.
590 *
591 * @param zkw zk reference
592 * @param region region to be transitioned to opening
593 * @param serverName server transition happens on
594 * @return version of node after transition, -1 if unsuccessful transition
595 * @throws KeeperException if unexpected zookeeper exception
596 */
597 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
598 HRegionInfo region, ServerName serverName)
599 throws KeeperException {
600 return transitionNodeOpening(zkw, region, serverName,
601 EventType.M_ZK_REGION_OFFLINE);
602 }
603
604 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
605 HRegionInfo region, ServerName serverName, final EventType beginState)
606 throws KeeperException {
607 return transitionNode(zkw, region, serverName, beginState,
608 EventType.RS_ZK_REGION_OPENING, -1);
609 }
610
611 /**
612 * Confirm an existing unassigned node for the specified region which is
613 * currently in the OPENING state to be still in the OPENING state on
614 * the specified server.
615 *
616 * <p>If for some reason the check fails, the method returns -1. Otherwise,
617 * the version of the node (same as the expected version) is returned.
618 *
619 * <p>This method can fail and return -1 for three different reasons:
620 * <ul><li>Unassigned node for this region does not exist</li>
621 * <li>Unassigned node for this region is not in OPENING state</li>
622 * <li>After verifying OPENING state, the server name or the version of the
623 * doesn't match)</li>
624 * </ul>
625 *
626 * <p>Does not set any watches.
627 *
628 * <p>This method should only be used by a RegionServer when initiating an
629 * open of a region after receiving an OPEN RPC from the Master.
630 *
631 * @param zkw zk reference
632 * @param region region to be transitioned to opening
633 * @param serverName server transition happens on
634 * @return version of node after transition, -1 if unsuccessful transition
635 * @throws KeeperException if unexpected zookeeper exception
636 */
637 public static int confirmNodeOpening(ZooKeeperWatcher zkw,
638 HRegionInfo region, ServerName serverName, int expectedVersion)
639 throws KeeperException {
640
641 String encoded = region.getEncodedName();
642 if(LOG.isDebugEnabled()) {
643 LOG.debug(zkw.prefix("Attempting to retransition opening state of node " +
644 HRegionInfo.prettyPrint(encoded)));
645 }
646
647 String node = getNodeName(zkw, encoded);
648 zkw.sync(node);
649
650 // Read existing data of the node
651 Stat stat = new Stat();
652 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
653 if (existingBytes == null) {
654 // Node no longer exists. Return -1. It means unsuccessful transition.
655 return -1;
656 }
657 RegionTransition rt = getRegionTransition(existingBytes);
658
659 // Verify it is the expected version
660 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
661 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
662 "unassigned node for " + encoded + " failed, " +
663 "the node existed but was version " + stat.getVersion() +
664 " not the expected version " + expectedVersion));
665 return -1;
666 }
667
668 // Verify it is in expected state
669 EventType et = rt.getEventType();
670 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
671 String existingServer = (rt.getServerName() == null)
672 ? "<unknown>" : rt.getServerName().toString();
673 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
674 + encoded + " failed, the node existed but was in the state " + et +
675 " set by the server " + existingServer));
676 return -1;
677 }
678
679 return expectedVersion;
680 }
681
682 /**
683 * Transitions an existing unassigned node for the specified region which is
684 * currently in the OPENING state to be in the OPENED state.
685 *
686 * <p>Does not transition nodes from other states. If for some reason the
687 * node could not be transitioned, the method returns -1. If the transition
688 * is successful, the version of the node after transition is returned.
689 *
690 * <p>This method can fail and return false for three different reasons:
691 * <ul><li>Unassigned node for this region does not exist</li>
692 * <li>Unassigned node for this region is not in OPENING state</li>
693 * <li>After verifying OPENING state, update fails because of wrong version
694 * (this should never actually happen since an RS only does this transition
695 * following a transition to OPENING. if two RS are conflicting, one would
696 * fail the original transition to OPENING and not this transition)</li>
697 * </ul>
698 *
699 * <p>Does not set any watches.
700 *
701 * <p>This method should only be used by a RegionServer when completing the
702 * open of a region.
703 *
704 * @param zkw zk reference
705 * @param region region to be transitioned to opened
706 * @param serverName server transition happens on
707 * @return version of node after transition, -1 if unsuccessful transition
708 * @throws KeeperException if unexpected zookeeper exception
709 */
710 public static int transitionNodeOpened(ZooKeeperWatcher zkw,
711 HRegionInfo region, ServerName serverName, int expectedVersion)
712 throws KeeperException {
713 return transitionNode(zkw, region, serverName,
714 EventType.RS_ZK_REGION_OPENING,
715 EventType.RS_ZK_REGION_OPENED, expectedVersion);
716 }
717
718 /**
719 *
720 * @param zkw zk reference
721 * @param region region to be closed
722 * @param expectedVersion expected version of the znode
723 * @return true if the znode exists, has the right version and the right state. False otherwise.
724 * @throws KeeperException
725 */
726 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
727 int expectedVersion) throws KeeperException {
728
729 final String encoded = getNodeName(zkw, region.getEncodedName());
730 zkw.sync(encoded);
731
732 // Read existing data of the node
733 Stat stat = new Stat();
734 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
735
736 if (existingBytes == null) {
737 LOG.warn(zkw.prefix("Attempt to check the " +
738 "closing node for " + encoded +
739 ". The node does not exist"));
740 return false;
741 }
742
743 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
744 LOG.warn(zkw.prefix("Attempt to check the " +
745 "closing node for " + encoded +
746 ". The node existed but was version " + stat.getVersion() +
747 " not the expected version " + expectedVersion));
748 return false;
749 }
750
751 RegionTransition rt = getRegionTransition(existingBytes);
752
753 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
754 LOG.warn(zkw.prefix("Attempt to check the " +
755 "closing node for " + encoded +
756 ". The node existed but was in an unexpected state: " + rt.getEventType()));
757 return false;
758 }
759
760 return true;
761 }
762
763 /**
764 * Method that actually performs unassigned node transitions.
765 *
766 * <p>Attempts to transition the unassigned node for the specified region
767 * from the expected state to the state in the specified transition data.
768 *
769 * <p>Method first reads existing data and verifies it is in the expected
770 * state. If the node does not exist or the node is not in the expected
771 * state, the method returns -1. If the transition is successful, the
772 * version number of the node following the transition is returned.
773 *
774 * <p>If the read state is what is expected, it attempts to write the new
775 * state and data into the node. When doing this, it includes the expected
776 * version (determined when the existing state was verified) to ensure that
777 * only one transition is successful. If there is a version mismatch, the
778 * method returns -1.
779 *
780 * <p>If the write is successful, no watch is set and the method returns true.
781 *
782 * @param zkw zk reference
783 * @param region region to be transitioned to opened
784 * @param serverName server transition happens on
785 * @param endState state to transition node to if all checks pass
786 * @param beginState state the node must currently be in to do transition
787 * @param expectedVersion expected version of data before modification, or -1
788 * @return version of node after transition, -1 if unsuccessful transition
789 * @throws KeeperException if unexpected zookeeper exception
790 */
791 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
792 ServerName serverName, EventType beginState, EventType endState,
793 int expectedVersion)
794 throws KeeperException {
795 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
796 }
797
798
799 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
800 ServerName serverName, EventType beginState, EventType endState,
801 int expectedVersion, final byte [] payload)
802 throws KeeperException {
803 String encoded = region.getEncodedName();
804 if(LOG.isDebugEnabled()) {
805 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
806 " from " + beginState.toString() + " to " + endState.toString()));
807 }
808
809 String node = getNodeName(zkw, encoded);
810 zkw.sync(node);
811
812 // Read existing data of the node
813 Stat stat = new Stat();
814 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
815 if (existingBytes == null) {
816 // Node no longer exists. Return -1. It means unsuccessful transition.
817 return -1;
818 }
819
820 // Verify it is the expected version
821 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
822 LOG.warn(zkw.prefix("Attempt to transition the " +
823 "unassigned node for " + encoded +
824 " from " + beginState + " to " + endState + " failed, " +
825 "the node existed but was version " + stat.getVersion() +
826 " not the expected version " + expectedVersion));
827 return -1;
828 }
829
830 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
831 && endState.equals(EventType.RS_ZK_REGION_OPENING)
832 && expectedVersion == -1 && stat.getVersion() != 0) {
833 // the below check ensures that double assignment doesnot happen.
834 // When the node is created for the first time then the expected version
835 // that is passed will be -1 and the version in znode will be 0.
836 // In all other cases the version in znode will be > 0.
837 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
838 + encoded + " from " + beginState + " to " + endState + " failed, "
839 + "the node existed but was version " + stat.getVersion()
840 + " not the expected version " + expectedVersion));
841 return -1;
842 }
843
844 RegionTransition rt = getRegionTransition(existingBytes);
845
846 // Verify the server transition happens on is not changed
847 if (!rt.getServerName().equals(serverName)) {
848 LOG.warn(zkw.prefix("Attempt to transition the " +
849 "unassigned node for " + encoded +
850 " from " + beginState + " to " + endState + " failed, " +
851 "the server that tried to transition was " + serverName +
852 " not the expected " + rt.getServerName()));
853 return -1;
854 }
855
856 // Verify it is in expected state
857 EventType et = rt.getEventType();
858 if (!et.equals(beginState)) {
859 String existingServer = (rt.getServerName() == null)
860 ? "<unknown>" : rt.getServerName().toString();
861 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
862 + " from " + beginState + " to " + endState + " failed, the node existed but"
863 + " was in the state " + et + " set by the server " + existingServer));
864 return -1;
865 }
866
867 // Write new data, ensuring data has not changed since we last read it
868 try {
869 rt = RegionTransition.createRegionTransition(
870 endState, region.getRegionName(), serverName, payload);
871 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
872 LOG.warn(zkw.prefix("Attempt to transition the " +
873 "unassigned node for " + encoded +
874 " from " + beginState + " to " + endState + " failed, " +
875 "the node existed and was in the expected state but then when " +
876 "setting data we got a version mismatch"));
877 return -1;
878 }
879 if(LOG.isDebugEnabled()) {
880 LOG.debug(zkw.prefix("Transitioned node " + encoded +
881 " from " + beginState + " to " + endState));
882 }
883 return stat.getVersion() + 1;
884 } catch (KeeperException.NoNodeException nne) {
885 LOG.warn(zkw.prefix("Attempt to transition the " +
886 "unassigned node for " + encoded +
887 " from " + beginState + " to " + endState + " failed, " +
888 "the node existed and was in the expected state but then when " +
889 "setting data it no longer existed"));
890 return -1;
891 }
892 }
893
894 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
895 try {
896 return RegionTransition.parseFrom(bytes);
897 } catch (DeserializationException e) {
898 // Convert to a zk exception for now. Otherwise have to change API
899 throw ZKUtil.convert(e);
900 }
901 }
902
903 /**
904 * Gets the current data in the unassigned node for the specified region name
905 * or fully-qualified path.
906 *
907 * <p>Returns null if the region does not currently have a node.
908 *
909 * <p>Sets a watch on the node if the node exists.
910 *
911 * @param zkw zk reference
912 * @param pathOrRegionName fully-specified path or region name
913 * @return znode content
914 * @throws KeeperException if unexpected zookeeper exception
915 */
916 public static byte [] getData(ZooKeeperWatcher zkw,
917 String pathOrRegionName)
918 throws KeeperException {
919 String node = getPath(zkw, pathOrRegionName);
920 return ZKUtil.getDataAndWatch(zkw, node);
921 }
922
923 /**
924 * Gets the current data in the unassigned node for the specified region name
925 * or fully-qualified path.
926 *
927 * <p>Returns null if the region does not currently have a node.
928 *
929 * <p>Sets a watch on the node if the node exists.
930 *
931 * @param zkw zk reference
932 * @param pathOrRegionName fully-specified path or region name
933 * @param stat object to populate the version.
934 * @return znode content
935 * @throws KeeperException if unexpected zookeeper exception
936 */
937 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
938 String pathOrRegionName, Stat stat)
939 throws KeeperException {
940 String node = getPath(zkw, pathOrRegionName);
941 return ZKUtil.getDataAndWatch(zkw, node, stat);
942 }
943
944 /**
945 * Gets the current data in the unassigned node for the specified region name
946 * or fully-qualified path.
947 *
948 * <p>Returns null if the region does not currently have a node.
949 *
950 * <p>Does not set a watch.
951 *
952 * @param zkw zk reference
953 * @param pathOrRegionName fully-specified path or region name
954 * @param stat object to store node info into on getData call
955 * @return znode content
956 * @throws KeeperException if unexpected zookeeper exception
957 */
958 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
959 String pathOrRegionName, Stat stat)
960 throws KeeperException {
961 String node = getPath(zkw, pathOrRegionName);
962 return ZKUtil.getDataNoWatch(zkw, node, stat);
963 }
964
965 /**
966 * @param zkw
967 * @param pathOrRegionName
968 * @return Path to znode
969 */
970 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
971 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
972 }
973
974 /**
975 * Get the version of the specified znode
976 * @param zkw zk reference
977 * @param region region's info
978 * @return the version of the znode, -1 if it doesn't exist
979 * @throws KeeperException
980 */
981 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
982 throws KeeperException {
983 String znode = getNodeName(zkw, region.getEncodedName());
984 return ZKUtil.checkExists(zkw, znode);
985 }
986
987 /**
988 * Delete the assignment node regardless of its current state.
989 * <p>
990 * Fail silent even if the node does not exist at all.
991 * @param watcher
992 * @param regionInfo
993 * @throws KeeperException
994 */
995 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
996 HRegionInfo regionInfo)
997 throws KeeperException {
998 String node = getNodeName(watcher, regionInfo.getEncodedName());
999 ZKUtil.deleteNodeFailSilent(watcher, node);
1000 }
1001
1002 /**
1003 * Blocks until there are no node in regions in transition.
1004 * <p>
1005 * Used in testing only.
1006 * @param zkw zk reference
1007 * @throws KeeperException
1008 * @throws InterruptedException
1009 */
1010 public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
1011 throws KeeperException, InterruptedException {
1012 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1013 List<String> znodes =
1014 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1015 if (znodes != null && !znodes.isEmpty()) {
1016 LOG.debug("Waiting on RIT: " + znodes);
1017 }
1018 Thread.sleep(100);
1019 }
1020 }
1021
1022 /**
1023 * Blocks until there is at least one node in regions in transition.
1024 * <p>
1025 * Used in testing only.
1026 * @param zkw zk reference
1027 * @throws KeeperException
1028 * @throws InterruptedException
1029 */
1030 public static void blockUntilRIT(ZooKeeperWatcher zkw)
1031 throws KeeperException, InterruptedException {
1032 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1033 List<String> znodes =
1034 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1035 if (znodes == null || znodes.isEmpty()) {
1036 LOG.debug("No RIT in ZK");
1037 }
1038 Thread.sleep(100);
1039 }
1040 }
1041
1042 /**
1043 * Presume bytes are serialized unassigned data structure
1044 * @param znodeBytes
1045 * @return String of the deserialized znode bytes.
1046 */
1047 static String toString(final byte[] znodeBytes) {
1048 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the
1049 // method in here so RegionTransition does not leak into ZKUtil.
1050 try {
1051 RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1052 return rt.toString();
1053 } catch (DeserializationException e) {
1054 return "";
1055 }
1056 }
1057 }