1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.regionserver;
19
20 import java.io.IOException;
21 import java.util.Collection;
22 import java.util.List;
23 import java.util.Map;
24
25 import org.apache.hadoop.hbase.Cell;
26 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
27 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
28 import org.apache.hadoop.hbase.HRegionInfo;
29 import org.apache.hadoop.hbase.HTableDescriptor;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.hbase.classification.InterfaceStability;
32 import org.apache.hadoop.hbase.client.Append;
33 import org.apache.hadoop.hbase.client.Delete;
34 import org.apache.hadoop.hbase.client.Get;
35 import org.apache.hadoop.hbase.client.Increment;
36 import org.apache.hadoop.hbase.client.IsolationLevel;
37 import org.apache.hadoop.hbase.client.Mutation;
38 import org.apache.hadoop.hbase.client.Put;
39 import org.apache.hadoop.hbase.client.Result;
40 import org.apache.hadoop.hbase.client.RowMutations;
41 import org.apache.hadoop.hbase.client.Scan;
42 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
43 import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
44 import org.apache.hadoop.hbase.filter.ByteArrayComparable;
45 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
46 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
47 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
48 import org.apache.hadoop.hbase.util.Pair;
49 import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay;
50
51 import com.google.protobuf.Message;
52 import com.google.protobuf.RpcController;
53 import com.google.protobuf.Service;
54
55 /**
56 * Regions store data for a certain region of a table. It stores all columns
57 * for each row. A given table consists of one or more Regions.
58 *
59 * <p>An Region is defined by its table and its key extent.
60 *
61 * <p>Locking at the Region level serves only one purpose: preventing the
62 * region from being closed (and consequently split) while other operations
63 * are ongoing. Each row level operation obtains both a row lock and a region
64 * read lock for the duration of the operation. While a scanner is being
65 * constructed, getScanner holds a read lock. If the scanner is successfully
66 * constructed, it holds a read lock until it is closed. A close takes out a
67 * write lock and consequently will block for ongoing operations and will block
68 * new operations from starting while the close is in progress.
69 */
70 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
71 @InterfaceStability.Evolving
72 public interface Region extends ConfigurationObserver {
73
74 ///////////////////////////////////////////////////////////////////////////
75 // Region state
76
77 /** @return region information for this region */
78 HRegionInfo getRegionInfo();
79
80 /** @return table descriptor for this region */
81 HTableDescriptor getTableDesc();
82
83 /** @return true if region is available (not closed and not closing) */
84 boolean isAvailable();
85
86 /** @return true if region is closed */
87 boolean isClosed();
88
89 /** @return True if closing process has started */
90 boolean isClosing();
91
92 /** @return True if region is in recovering state */
93 boolean isRecovering();
94
95 /** @return True if region is read only */
96 boolean isReadOnly();
97
98 /**
99 * Return the list of Stores managed by this region
100 * <p>Use with caution. Exposed for use of fixup utilities.
101 * @return a list of the Stores managed by this region
102 */
103 List<Store> getStores();
104
105 /**
106 * Return the Store for the given family
107 * <p>Use with caution. Exposed for use of fixup utilities.
108 * @return the Store for the given family
109 */
110 Store getStore(byte[] family);
111
112 /** @return list of store file names for the given families */
113 List<String> getStoreFileList(byte [][] columns);
114
115 /**
116 * Check the region's underlying store files, open the files that have not
117 * been opened yet, and remove the store file readers for store files no
118 * longer available.
119 * @throws IOException
120 */
121 boolean refreshStoreFiles() throws IOException;
122
123 /** @return the latest sequence number that was read from storage when this region was opened */
124 long getOpenSeqNum();
125
126 /** @return the max sequence id of flushed data on this region */
127 long getMaxFlushedSeqId();
128
129 /** @return the oldest sequence id found in the store for the given family */
130 public long getOldestSeqIdOfStore(byte[] familyName);
131
132 /**
133 * This can be used to determine the last time all files of this region were major compacted.
134 * @param majorCompactioOnly Only consider HFile that are the result of major compaction
135 * @return the timestamp of the oldest HFile for all stores of this region
136 */
137 long getOldestHfileTs(boolean majorCompactioOnly) throws IOException;
138
139 /**
140 * @return map of column family names to max sequence id that was read from storage when this
141 * region was opened
142 */
143 public Map<byte[], Long> getMaxStoreSeqId();
144
145 /** @return true if loading column families on demand by default */
146 boolean isLoadingCfsOnDemandDefault();
147
148 /** @return readpoint considering given IsolationLevel */
149 long getReadpoint(IsolationLevel isolationLevel);
150
151 /**
152 * @return The earliest time a store in the region was flushed. All
153 * other stores in the region would have been flushed either at, or
154 * after this time.
155 */
156 long getEarliestFlushTimeForAllStores();
157
158 ///////////////////////////////////////////////////////////////////////////
159 // Metrics
160
161 /** @return read requests count for this region */
162 long getReadRequestsCount();
163
164 /**
165 * Update the read request count for this region
166 * @param i increment
167 */
168 void updateReadRequestsCount(long i);
169
170 /** @return write request count for this region */
171 long getWriteRequestsCount();
172
173 /**
174 * Update the write request count for this region
175 * @param i increment
176 */
177 void updateWriteRequestsCount(long i);
178
179 /** @return memstore size for this region, in bytes */
180 long getMemstoreSize();
181
182 /** @return the number of mutations processed bypassing the WAL */
183 long getNumMutationsWithoutWAL();
184
185 /** @return the size of data processed bypassing the WAL, in bytes */
186 long getDataInMemoryWithoutWAL();
187
188 /** @return the number of blocked requests */
189 long getBlockedRequestsCount();
190
191 /** @return the number of checkAndMutate guards that passed */
192 long getCheckAndMutateChecksPassed();
193
194 /** @return the number of failed checkAndMutate guards */
195 long getCheckAndMutateChecksFailed();
196
197 /** @return the MetricsRegion for this region */
198 MetricsRegion getMetrics();
199
200 /** @return the block distribution for all Stores managed by this region */
201 HDFSBlocksDistribution getHDFSBlocksDistribution();
202
203 ///////////////////////////////////////////////////////////////////////////
204 // Locking
205
206 // Region read locks
207
208 /**
209 * Operation enum is used in {@link Region#startRegionOperation} to provide context for
210 * various checks before any region operation begins.
211 */
212 enum Operation {
213 ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
214 REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT
215 }
216
217 /**
218 * This method needs to be called before any public call that reads or
219 * modifies data.
220 * Acquires a read lock and checks if the region is closing or closed.
221 * <p>{@link #closeRegionOperation} MUST then always be called after
222 * the operation has completed, whether it succeeded or failed.
223 * @throws IOException
224 */
225 void startRegionOperation() throws IOException;
226
227 /**
228 * This method needs to be called before any public call that reads or
229 * modifies data.
230 * Acquires a read lock and checks if the region is closing or closed.
231 * <p>{@link #closeRegionOperation} MUST then always be called after
232 * the operation has completed, whether it succeeded or failed.
233 * @param op The operation is about to be taken on the region
234 * @throws IOException
235 */
236 void startRegionOperation(Operation op) throws IOException;
237
238 /**
239 * Closes the region operation lock.
240 * @throws IOException
241 */
242 void closeRegionOperation() throws IOException;
243
244 // Row write locks
245
246 /**
247 * Row lock held by a given thread.
248 * One thread may acquire multiple locks on the same row simultaneously.
249 * The locks must be released by calling release() from the same thread.
250 */
251 public interface RowLock {
252 /**
253 * Release the given lock. If there are no remaining locks held by the current thread
254 * then unlock the row and allow other threads to acquire the lock.
255 * @throws IllegalArgumentException if called by a different thread than the lock owning
256 * thread
257 */
258 void release();
259 }
260
261 /**
262 * Tries to acquire a lock on the given row.
263 * @param waitForLock if true, will block until the lock is available.
264 * Otherwise, just tries to obtain the lock and returns
265 * false if unavailable.
266 * @return the row lock if acquired,
267 * null if waitForLock was false and the lock was not acquired
268 * @throws IOException if waitForLock was true and the lock could not be acquired after waiting
269 */
270 RowLock getRowLock(byte[] row, boolean waitForLock) throws IOException;
271
272 /**
273 * If the given list of row locks is not null, releases all locks.
274 */
275 void releaseRowLocks(List<RowLock> rowLocks);
276
277 ///////////////////////////////////////////////////////////////////////////
278 // Region operations
279
280 /**
281 * Perform one or more append operations on a row.
282 * @param append
283 * @param nonceGroup
284 * @param nonce
285 * @return result of the operation
286 * @throws IOException
287 */
288 Result append(Append append, long nonceGroup, long nonce) throws IOException;
289
290 /**
291 * Perform a batch of mutations.
292 * <p>
293 * Note this supports only Put and Delete mutations and will ignore other types passed.
294 * @param mutations the list of mutations
295 * @param nonceGroup
296 * @param nonce
297 * @return an array of OperationStatus which internally contains the
298 * OperationStatusCode and the exceptionMessage if any.
299 * @throws IOException
300 */
301 OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce)
302 throws IOException;
303
304 /**
305 * Replay a batch of mutations.
306 * @param mutations mutations to replay.
307 * @param replaySeqId
308 * @return an array of OperationStatus which internally contains the
309 * OperationStatusCode and the exceptionMessage if any.
310 * @throws IOException
311 */
312 OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException;
313
314 /**
315 * Atomically checks if a row/family/qualifier value matches the expected val
316 * If it does, it performs the row mutations. If the passed value is null, t
317 * is for the lack of column (ie: non-existence)
318 * @param row to check
319 * @param family column family to check
320 * @param qualifier column qualifier to check
321 * @param compareOp the comparison operator
322 * @param comparator
323 * @param mutation
324 * @param writeToWAL
325 * @return true if mutation was applied, false otherwise
326 * @throws IOException
327 */
328 boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
329 ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException;
330
331 /**
332 * Atomically checks if a row/family/qualifier value matches the expected val
333 * If it does, it performs the row mutations. If the passed value is null, t
334 * is for the lack of column (ie: non-existence)
335 * @param row to check
336 * @param family column family to check
337 * @param qualifier column qualifier to check
338 * @param compareOp the comparison operator
339 * @param comparator
340 * @param mutations
341 * @param writeToWAL
342 * @return true if mutation was applied, false otherwise
343 * @throws IOException
344 */
345 boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
346 ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL)
347 throws IOException;
348
349 /**
350 * Deletes the specified cells/row.
351 * @param delete
352 * @throws IOException
353 */
354 void delete(Delete delete) throws IOException;
355
356 /**
357 * Do a get based on the get parameter.
358 * @param get query parameters
359 * @return result of the operation
360 */
361 Result get(Get get) throws IOException;
362
363 /**
364 * Do a get based on the get parameter.
365 * @param get query parameters
366 * @param withCoprocessor invoke coprocessor or not. We don't want to
367 * always invoke cp.
368 * @return list of cells resulting from the operation
369 */
370 List<Cell> get(Get get, boolean withCoprocessor) throws IOException;
371
372 /**
373 * Return all the data for the row that matches <i>row</i> exactly,
374 * or the one that immediately preceeds it, at or immediately before
375 * <i>ts</i>.
376 * @param row
377 * @param family
378 * @return result of the operation
379 * @throws IOException
380 */
381 Result getClosestRowBefore(byte[] row, byte[] family) throws IOException;
382
383 /**
384 * Return an iterator that scans over the HRegion, returning the indicated
385 * columns and rows specified by the {@link Scan}.
386 * <p>
387 * This Iterator must be closed by the caller.
388 *
389 * @param scan configured {@link Scan}
390 * @return RegionScanner
391 * @throws IOException read exceptions
392 */
393 RegionScanner getScanner(Scan scan) throws IOException;
394
395 /**
396 * Perform one or more increment operations on a row.
397 * @param increment
398 * @param nonceGroup
399 * @param nonce
400 * @return result of the operation
401 * @throws IOException
402 */
403 Result increment(Increment increment, long nonceGroup, long nonce) throws IOException;
404
405 /**
406 * Performs multiple mutations atomically on a single row. Currently
407 * {@link Put} and {@link Delete} are supported.
408 *
409 * @param mutations object that specifies the set of mutations to perform atomically
410 * @throws IOException
411 */
412 void mutateRow(RowMutations mutations) throws IOException;
413
414 /**
415 * Perform atomic mutations within the region.
416 *
417 * @param mutations The list of mutations to perform.
418 * <code>mutations</code> can contain operations for multiple rows.
419 * Caller has to ensure that all rows are contained in this region.
420 * @param rowsToLock Rows to lock
421 * @param nonceGroup Optional nonce group of the operation (client Id)
422 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
423 * If multiple rows are locked care should be taken that
424 * <code>rowsToLock</code> is sorted in order to avoid deadlocks.
425 * @throws IOException
426 */
427 void mutateRowsWithLocks(Collection<Mutation> mutations, Collection<byte[]> rowsToLock,
428 long nonceGroup, long nonce) throws IOException;
429
430 /**
431 * Performs atomic multiple reads and writes on a given row.
432 *
433 * @param processor The object defines the reads and writes to a row.
434 */
435 void processRowsWithLocks(RowProcessor<?,?> processor) throws IOException;
436
437 /**
438 * Performs atomic multiple reads and writes on a given row.
439 *
440 * @param processor The object defines the reads and writes to a row.
441 * @param nonceGroup Optional nonce group of the operation (client Id)
442 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
443 */
444 void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce)
445 throws IOException;
446
447 /**
448 * Performs atomic multiple reads and writes on a given row.
449 *
450 * @param processor The object defines the reads and writes to a row.
451 * @param timeout The timeout of the processor.process() execution
452 * Use a negative number to switch off the time bound
453 * @param nonceGroup Optional nonce group of the operation (client Id)
454 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
455 */
456 void processRowsWithLocks(RowProcessor<?,?> processor, long timeout, long nonceGroup, long nonce)
457 throws IOException;
458
459 /**
460 * Puts some data in the table.
461 * @param put
462 * @throws IOException
463 */
464 void put(Put put) throws IOException;
465
466 /**
467 * Listener class to enable callers of
468 * bulkLoadHFile() to perform any necessary
469 * pre/post processing of a given bulkload call
470 */
471 interface BulkLoadListener {
472
473 /**
474 * Called before an HFile is actually loaded
475 * @param family family being loaded to
476 * @param srcPath path of HFile
477 * @return final path to be used for actual loading
478 * @throws IOException
479 */
480 String prepareBulkLoad(byte[] family, String srcPath) throws IOException;
481
482 /**
483 * Called after a successful HFile load
484 * @param family family being loaded to
485 * @param srcPath path of HFile
486 * @throws IOException
487 */
488 void doneBulkLoad(byte[] family, String srcPath) throws IOException;
489
490 /**
491 * Called after a failed HFile load
492 * @param family family being loaded to
493 * @param srcPath path of HFile
494 * @throws IOException
495 */
496 void failedBulkLoad(byte[] family, String srcPath) throws IOException;
497 }
498
499 /**
500 * Attempts to atomically load a group of hfiles. This is critical for loading
501 * rows with multiple column families atomically.
502 *
503 * @param familyPaths List of Pair<byte[] column family, String hfilePath>
504 * @param bulkLoadListener Internal hooks enabling massaging/preparation of a
505 * file about to be bulk loaded
506 * @param assignSeqId
507 * @return true if successful, false if failed recoverably
508 * @throws IOException if failed unrecoverably.
509 */
510 boolean bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths, boolean assignSeqId,
511 BulkLoadListener bulkLoadListener) throws IOException;
512
513 ///////////////////////////////////////////////////////////////////////////
514 // Coprocessors
515
516 /** @return the coprocessor host */
517 RegionCoprocessorHost getCoprocessorHost();
518
519 /**
520 * Executes a single protocol buffer coprocessor endpoint {@link Service} method using
521 * the registered protocol handlers. {@link Service} implementations must be registered via the
522 * {@link Region#registerService(com.google.protobuf.Service)}
523 * method before they are available.
524 *
525 * @param controller an {@code RpcContoller} implementation to pass to the invoked service
526 * @param call a {@code CoprocessorServiceCall} instance identifying the service, method,
527 * and parameters for the method invocation
528 * @return a protocol buffer {@code Message} instance containing the method's result
529 * @throws IOException if no registered service handler is found or an error
530 * occurs during the invocation
531 * @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service)
532 */
533 Message execService(RpcController controller, CoprocessorServiceCall call) throws IOException;
534
535 /**
536 * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to
537 * be available for handling
538 * {@link Region#execService(com.google.protobuf.RpcController,
539 * org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall)}} calls.
540 *
541 * <p>
542 * Only a single instance may be registered per region for a given {@link Service} subclass (the
543 * instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}.
544 * After the first registration, subsequent calls with the same service name will fail with
545 * a return value of {@code false}.
546 * </p>
547 * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint
548 * @return {@code true} if the registration was successful, {@code false}
549 * otherwise
550 */
551 boolean registerService(Service instance);
552
553 ///////////////////////////////////////////////////////////////////////////
554 // RowMutation processor support
555
556 /**
557 * Check the collection of families for validity.
558 * @param families
559 * @throws NoSuchColumnFamilyException
560 */
561 void checkFamilies(Collection<byte[]> families) throws NoSuchColumnFamilyException;
562
563 /**
564 * Check the collection of families for valid timestamps
565 * @param familyMap
566 * @param now current timestamp
567 * @throws FailedSanityCheckException
568 */
569 void checkTimestamps(Map<byte[], List<Cell>> familyMap, long now)
570 throws FailedSanityCheckException;
571
572 /**
573 * Prepare a delete for a row mutation processor
574 * @param delete The passed delete is modified by this method. WARNING!
575 * @throws IOException
576 */
577 void prepareDelete(Delete delete) throws IOException;
578
579 /**
580 * Set up correct timestamps in the KVs in Delete object.
581 * <p>Caller should have the row and region locks.
582 * @param mutation
583 * @param familyCellMap
584 * @param now
585 * @throws IOException
586 */
587 void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyCellMap,
588 byte[] now) throws IOException;
589
590 /**
591 * Replace any cell timestamps set to HConstants#LATEST_TIMESTAMP with the
592 * provided current timestamp.
593 * @param values
594 * @param now
595 */
596 void updateCellTimestamps(final Iterable<List<Cell>> values, final byte[] now)
597 throws IOException;
598
599 ///////////////////////////////////////////////////////////////////////////
600 // Flushes, compactions, splits, etc.
601 // Wizards only, please
602
603 interface FlushResult {
604 enum Result {
605 FLUSHED_NO_COMPACTION_NEEDED,
606 FLUSHED_COMPACTION_NEEDED,
607 // Special case where a flush didn't run because there's nothing in the memstores. Used when
608 // bulk loading to know when we can still load even if a flush didn't happen.
609 CANNOT_FLUSH_MEMSTORE_EMPTY,
610 CANNOT_FLUSH
611 }
612
613 /** @return the detailed result code */
614 Result getResult();
615
616 /** @return true if the memstores were flushed, else false */
617 boolean isFlushSucceeded();
618
619 /** @return True if the flush requested a compaction, else false */
620 boolean isCompactionNeeded();
621 }
622
623 /**
624 * Flush the cache.
625 *
626 * <p>When this method is called the cache will be flushed unless:
627 * <ol>
628 * <li>the cache is empty</li>
629 * <li>the region is closed.</li>
630 * <li>a flush is already in progress</li>
631 * <li>writes are disabled</li>
632 * </ol>
633 *
634 * <p>This method may block for some time, so it should not be called from a
635 * time-sensitive thread.
636 * @param force whether we want to force a flush of all stores
637 * @return FlushResult indicating whether the flush was successful or not and if
638 * the region needs compacting
639 *
640 * @throws IOException general io exceptions
641 * @throws DroppedSnapshotException Thrown when abort is required. The caller MUST catch this
642 * exception and MUST abort. Any further operation to the region may cause data loss.
643 * because a snapshot was not properly persisted.
644 */
645 FlushResult flush(boolean force) throws IOException;
646
647 /**
648 * Synchronously compact all stores in the region.
649 * <p>This operation could block for a long time, so don't call it from a
650 * time-sensitive thread.
651 * <p>Note that no locks are taken to prevent possible conflicts between
652 * compaction and splitting activities. The regionserver does not normally compact
653 * and split in parallel. However by calling this method you may introduce
654 * unexpected and unhandled concurrency. Don't do this unless you know what
655 * you are doing.
656 *
657 * @param majorCompaction True to force a major compaction regardless of thresholds
658 * @throws IOException
659 */
660 void compact(final boolean majorCompaction) throws IOException;
661
662 /**
663 * Trigger major compaction on all stores in the region.
664 * <p>
665 * Compaction will be performed asynchronously to this call by the RegionServer's
666 * CompactSplitThread. See also {@link Store#triggerMajorCompaction()}
667 * @throws IOException
668 */
669 void triggerMajorCompaction() throws IOException;
670
671 /**
672 * @return if a given region is in compaction now.
673 */
674 CompactionState getCompactionState();
675
676 /** Wait for all current flushes and compactions of the region to complete */
677 void waitForFlushesAndCompactions();
678
679 }