1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  package org.apache.hadoop.hbase.util.hbck;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collection;
24  import java.util.HashSet;
25  import java.util.List;
26  import java.util.Set;
27  import java.util.concurrent.Callable;
28  import java.util.concurrent.ConcurrentSkipListSet;
29  import java.util.concurrent.ExecutionException;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.Future;
32  import java.util.concurrent.atomic.AtomicInteger;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileStatus;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.TableName;
43  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
44  import org.apache.hadoop.hbase.io.hfile.CorruptHFileException;
45  import org.apache.hadoop.hbase.io.hfile.HFile;
46  import org.apache.hadoop.hbase.mob.MobUtils;
47  import org.apache.hadoop.hbase.util.FSUtils;
48  import org.apache.hadoop.hbase.util.FSUtils.FamilyDirFilter;
49  import org.apache.hadoop.hbase.util.FSUtils.HFileFilter;
50  import org.apache.hadoop.hbase.util.FSUtils.RegionDirFilter;
51  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
52  
53  
54  
55  
56  
57  
58  
59  
60  
61  @InterfaceAudience.Private
62  public class HFileCorruptionChecker {
63    private static final Log LOG = LogFactory.getLog(HFileCorruptionChecker.class);
64  
65    final Configuration conf;
66    final FileSystem fs;
67    final CacheConfig cacheConf;
68    final ExecutorService executor;
69    final Set<Path> corrupted = new ConcurrentSkipListSet<Path>();
70    final Set<Path> failures = new ConcurrentSkipListSet<Path>();
71    final Set<Path> quarantined = new ConcurrentSkipListSet<Path>();
72    final Set<Path> missing = new ConcurrentSkipListSet<Path>();
73    final Set<Path> corruptedMobFiles = new ConcurrentSkipListSet<Path>();
74    final Set<Path> failureMobFiles = new ConcurrentSkipListSet<Path>();
75    final Set<Path> missedMobFiles = new ConcurrentSkipListSet<Path>();
76    final Set<Path> quarantinedMobFiles = new ConcurrentSkipListSet<Path>();
77    final boolean inQuarantineMode;
78    final AtomicInteger hfilesChecked = new AtomicInteger();
79    final AtomicInteger mobFilesChecked = new AtomicInteger();
80  
81    public HFileCorruptionChecker(Configuration conf, ExecutorService executor,
82        boolean quarantine) throws IOException {
83      this.conf = conf;
84      this.fs = FileSystem.get(conf);
85      this.cacheConf = new CacheConfig(conf);
86      this.executor = executor;
87      this.inQuarantineMode = quarantine;
88    }
89  
90    
91  
92  
93  
94  
95  
96  
97  
98    protected void checkHFile(Path p) throws IOException {
99      HFile.Reader r = null;
100     try {
101       r = HFile.createReader(fs, p, cacheConf, conf);
102     } catch (CorruptHFileException che) {
103       LOG.warn("Found corrupt HFile " + p, che);
104       corrupted.add(p);
105       if (inQuarantineMode) {
106         Path dest = createQuarantinePath(p);
107         LOG.warn("Quarantining corrupt HFile " + p + " into " + dest);
108         boolean success = fs.mkdirs(dest.getParent());
109         success = success ? fs.rename(p, dest): false;
110         if (!success) {
111           failures.add(p);
112         } else {
113           quarantined.add(dest);
114         }
115       }
116       return;
117     } catch (FileNotFoundException fnfe) {
118       LOG.warn("HFile " + p + " was missing.  Likely removed due to compaction/split?");
119       missing.add(p);
120     } finally {
121       hfilesChecked.addAndGet(1);
122       if (r != null) {
123         r.close(true);
124       }
125     }
126   }
127 
128   
129 
130 
131 
132 
133 
134 
135 
136 
137 
138   Path createQuarantinePath(Path hFile) throws IOException {
139     
140     Path cfDir = hFile.getParent();
141     Path regionDir = cfDir.getParent();
142     Path tableDir = regionDir.getParent();
143 
144     
145     Path corruptBaseDir = new Path(FSUtils.getRootDir(conf), conf.get(
146         "hbase.hfile.quarantine.dir", HConstants.CORRUPT_DIR_NAME));
147     Path corruptTableDir = new Path(corruptBaseDir, tableDir.getName());
148     Path corruptRegionDir = new Path(corruptTableDir, regionDir.getName());
149     Path corruptFamilyDir = new Path(corruptRegionDir, cfDir.getName());
150     Path corruptHfile = new Path(corruptFamilyDir, hFile.getName());
151     return corruptHfile;
152   }
153 
154   
155 
156 
157 
158 
159 
160 
161   protected void checkColFamDir(Path cfDir) throws IOException {
162     FileStatus[] statuses = null;
163     try {
164       statuses = fs.listStatus(cfDir); 
165     } catch (FileNotFoundException fnfe) {
166       
167       LOG.warn("Colfam Directory " + cfDir +
168           " does not exist.  Likely due to concurrent split/compaction. Skipping.");
169       missing.add(cfDir);
170       return;
171     }
172 
173     List<FileStatus> hfs = FSUtils.filterFileStatuses(statuses, new HFileFilter(fs));
174     
175     if (hfs.size() == 0 && !fs.exists(cfDir)) {
176       LOG.warn("Colfam Directory " + cfDir +
177           " does not exist.  Likely due to concurrent split/compaction. Skipping.");
178       missing.add(cfDir);
179       return;
180     }
181     for (FileStatus hfFs : hfs) {
182       Path hf = hfFs.getPath();
183       checkHFile(hf);
184     }
185   }
186 
187   
188 
189 
190 
191 
192 
193 
194   protected void checkMobColFamDir(Path cfDir) throws IOException {
195     FileStatus[] hfs = null;
196     try {
197       hfs = fs.listStatus(cfDir, new HFileFilter(fs)); 
198     } catch (FileNotFoundException fnfe) {
199       
200       LOG.warn("Mob colfam Directory " + cfDir +
201           " does not exist.  Likely the table is deleted. Skipping.");
202       missedMobFiles.add(cfDir);
203       return;
204     }
205 
206     
207     if (hfs.length == 0 && !fs.exists(cfDir)) {
208       LOG.warn("Mob colfam Directory " + cfDir +
209           " does not exist.  Likely the table is deleted. Skipping.");
210       missedMobFiles.add(cfDir);
211       return;
212     }
213     for (FileStatus hfFs : hfs) {
214       Path hf = hfFs.getPath();
215       checkMobFile(hf);
216     }
217   }
218 
219   
220 
221 
222 
223 
224 
225 
226 
227   protected void checkMobFile(Path p) throws IOException {
228     HFile.Reader r = null;
229     try {
230       r = HFile.createReader(fs, p, cacheConf, conf);
231     } catch (CorruptHFileException che) {
232       LOG.warn("Found corrupt mob file " + p, che);
233       corruptedMobFiles.add(p);
234       if (inQuarantineMode) {
235         Path dest = createQuarantinePath(p);
236         LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
237         boolean success = fs.mkdirs(dest.getParent());
238         success = success ? fs.rename(p, dest): false;
239         if (!success) {
240           failureMobFiles.add(p);
241         } else {
242           quarantinedMobFiles.add(dest);
243         }
244       }
245       return;
246     } catch (FileNotFoundException fnfe) {
247       LOG.warn("Mob file " + p + " was missing.  Likely removed due to compaction?");
248       missedMobFiles.add(p);
249     } finally {
250       mobFilesChecked.addAndGet(1);
251       if (r != null) {
252         r.close(true);
253       }
254     }
255   }
256 
257   
258 
259 
260 
261 
262   private void checkMobRegionDir(Path regionDir) throws IOException {
263     if (!fs.exists(regionDir)) {
264       return;
265     }
266     FileStatus[] hfs = null;
267     try {
268       hfs = fs.listStatus(regionDir, new FamilyDirFilter(fs));
269     } catch (FileNotFoundException fnfe) {
270       
271       LOG.warn("Mob directory " + regionDir
272         + " does not exist.  Likely the table is deleted. Skipping.");
273       missedMobFiles.add(regionDir);
274       return;
275     }
276 
277     
278     if (hfs.length == 0 && !fs.exists(regionDir)) {
279       LOG.warn("Mob directory " + regionDir
280         + " does not exist.  Likely the table is deleted. Skipping.");
281       missedMobFiles.add(regionDir);
282       return;
283     }
284     for (FileStatus hfFs : hfs) {
285       Path hf = hfFs.getPath();
286       checkMobColFamDir(hf);
287     }
288   }
289 
290   
291 
292 
293 
294 
295 
296 
297   protected void checkRegionDir(Path regionDir) throws IOException {
298     FileStatus[] statuses = null;
299     try {
300       statuses = fs.listStatus(regionDir);
301     } catch (FileNotFoundException fnfe) {
302       
303       LOG.warn("Region Directory " + regionDir +
304           " does not exist.  Likely due to concurrent split/compaction. Skipping.");
305       missing.add(regionDir);
306       return;
307     }
308 
309     List<FileStatus> cfs = FSUtils.filterFileStatuses(statuses, new FamilyDirFilter(fs));
310     
311     if (cfs.size() == 0 && !fs.exists(regionDir)) {
312       LOG.warn("Region Directory " + regionDir +
313           " does not exist.  Likely due to concurrent split/compaction. Skipping.");
314       missing.add(regionDir);
315       return;
316     }
317 
318     for (FileStatus cfFs : cfs) {
319       Path cfDir = cfFs.getPath();
320       checkColFamDir(cfDir);
321     }
322   }
323 
324   
325 
326 
327 
328 
329 
330 
331   void checkTableDir(Path tableDir) throws IOException {
332     List<FileStatus> rds = FSUtils.listStatusWithStatusFilter(fs, tableDir, new RegionDirFilter(fs));
333     if (rds == null) {
334       if (!fs.exists(tableDir)) {
335         LOG.warn("Table Directory " + tableDir +
336             " does not exist.  Likely due to concurrent delete. Skipping.");
337         missing.add(tableDir);
338       }
339       return;
340     }
341 
342     
343     List<RegionDirChecker> rdcs = new ArrayList<RegionDirChecker>();
344     List<Future<Void>> rdFutures;
345 
346     for (FileStatus rdFs : rds) {
347       Path rdDir = rdFs.getPath();
348       RegionDirChecker work = new RegionDirChecker(rdDir);
349       rdcs.add(work);
350     }
351 
352     
353     rdcs.add(createMobRegionDirChecker(tableDir));
354     
355     try {
356       rdFutures = executor.invokeAll(rdcs);
357     } catch (InterruptedException ie) {
358       Thread.currentThread().interrupt();
359       LOG.warn("Region dirs checking interrupted!", ie);
360       return;
361     }
362 
363     for (int i = 0; i < rdFutures.size(); i++) {
364       Future<Void> f = rdFutures.get(i);
365       try {
366         f.get();
367       } catch (ExecutionException e) {
368         LOG.warn("Failed to quaratine an HFile in regiondir "
369             + rdcs.get(i).regionDir, e.getCause());
370         
371         if (e.getCause() instanceof IOException) {
372           throw (IOException) e.getCause();
373         }
374 
375         
376         if (e.getCause() instanceof RuntimeException) {
377           throw (RuntimeException) e.getCause();
378         }
379 
380         
381         LOG.error("Unexpected exception encountered", e);
382         return; 
383       } catch (InterruptedException ie) {
384         Thread.currentThread().interrupt();
385         LOG.warn("Region dirs check interrupted!", ie);
386         
387         return;
388       }
389     }
390   }
391 
392   
393 
394 
395 
396   private class RegionDirChecker implements Callable<Void> {
397     final Path regionDir;
398 
399     RegionDirChecker(Path regionDir) {
400       this.regionDir = regionDir;
401     }
402 
403     @Override
404     public Void call() throws IOException {
405       checkRegionDir(regionDir);
406       return null;
407     }
408   }
409 
410   
411 
412 
413 
414   private class MobRegionDirChecker extends RegionDirChecker {
415 
416     MobRegionDirChecker(Path regionDir) {
417       super(regionDir);
418     }
419 
420     @Override
421     public Void call() throws IOException {
422       checkMobRegionDir(regionDir);
423       return null;
424     }
425   }
426 
427   
428 
429 
430 
431 
432   private MobRegionDirChecker createMobRegionDirChecker(Path tableDir) {
433     TableName tableName = FSUtils.getTableName(tableDir);
434     Path mobDir = MobUtils.getMobRegionPath(conf, tableName);
435     return new MobRegionDirChecker(mobDir);
436   }
437 
438   
439 
440 
441   public void checkTables(Collection<Path> tables) throws IOException {
442     for (Path t : tables) {
443       checkTableDir(t);
444     }
445   }
446 
447   
448 
449 
450   public Collection<Path> getFailures() {
451     return new HashSet<Path>(failures);
452   }
453 
454   
455 
456 
457   public Collection<Path> getCorrupted() {
458     return new HashSet<Path>(corrupted);
459   }
460 
461   
462 
463 
464   public int getHFilesChecked() {
465     return hfilesChecked.get();
466   }
467 
468   
469 
470 
471   public Collection<Path> getQuarantined() {
472     return new HashSet<Path>(quarantined);
473   }
474 
475   
476 
477 
478 
479   public Collection<Path> getMissing() {
480     return new HashSet<Path>(missing);
481   }
482 
483   
484 
485 
486   public Collection<Path> getFailureMobFiles() {
487     return new HashSet<Path>(failureMobFiles);
488   }
489 
490   
491 
492 
493   public Collection<Path> getCorruptedMobFiles() {
494     return new HashSet<Path>(corruptedMobFiles);
495   }
496 
497   
498 
499 
500   public int getMobFilesChecked() {
501     return mobFilesChecked.get();
502   }
503 
504   
505 
506 
507   public Collection<Path> getQuarantinedMobFiles() {
508     return new HashSet<Path>(quarantinedMobFiles);
509   }
510 
511   
512 
513 
514 
515   public Collection<Path> getMissedMobFiles() {
516     return new HashSet<Path>(missedMobFiles);
517   }
518 
519   
520 
521 
522 
523   public void report(ErrorReporter out) {
524     out.print("Checked " + hfilesChecked.get() + " hfile for corruption");
525     out.print("  HFiles corrupted:                  " + corrupted.size());
526     if (inQuarantineMode) {
527       out.print("    HFiles successfully quarantined: " + quarantined.size());
528       for (Path sq : quarantined) {
529         out.print("      " + sq);
530       }
531       out.print("    HFiles failed quarantine:        " + failures.size());
532       for (Path fq : failures) {
533         out.print("      " + fq);
534       }
535     }
536     out.print("    HFiles moved while checking:     " + missing.size());
537     for (Path mq : missing) {
538       out.print("      " + mq);
539     }
540 
541     String initialState = (corrupted.size() == 0) ? "OK" : "CORRUPTED";
542     String fixedState = (corrupted.size() == quarantined.size()) ? "OK"
543         : "CORRUPTED";
544 
545     
546     if (inQuarantineMode) {
547       out.print("    Mob files successfully quarantined: " + quarantinedMobFiles.size());
548       for (Path sq : quarantinedMobFiles) {
549         out.print("      " + sq);
550       }
551       out.print("    Mob files failed quarantine:        " + failureMobFiles.size());
552       for (Path fq : failureMobFiles) {
553         out.print("      " + fq);
554       }
555     }
556     out.print("    Mob files moved while checking:     " + missedMobFiles.size());
557     for (Path mq : missedMobFiles) {
558       out.print("      " + mq);
559     }
560     String initialMobState = (corruptedMobFiles.size() == 0) ? "OK" : "CORRUPTED";
561     String fixedMobState = (corruptedMobFiles.size() == quarantinedMobFiles.size()) ? "OK"
562         : "CORRUPTED";
563 
564     if (inQuarantineMode) {
565       out.print("Summary: " + initialState + " => " + fixedState);
566       out.print("Mob summary: " + initialMobState + " => " + fixedMobState);
567     } else {
568       out.print("Summary: " + initialState);
569       out.print("Mob summary: " + initialMobState);
570     }
571   }
572 }