001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.util.Time.now; 021 022import java.io.FilterInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.Arrays; 026import java.util.EnumMap; 027import java.util.EnumSet; 028import java.util.List; 029 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.classification.InterfaceStability; 034import org.apache.hadoop.fs.FileSystem; 035import org.apache.hadoop.fs.Path; 036import org.apache.hadoop.hdfs.protocol.Block; 037import org.apache.hadoop.hdfs.protocol.HdfsConstants; 038import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 039import org.apache.hadoop.hdfs.protocol.LayoutVersion; 040import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 041import org.apache.hadoop.hdfs.protocol.LocatedBlock; 042import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 043import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 044import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 045import org.apache.hadoop.hdfs.server.common.Storage; 046import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp; 047import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp; 048import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; 049import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp; 050import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp; 051import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp; 052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp; 053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; 054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp; 055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp; 056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp; 057import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; 058import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp; 059import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp; 060import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; 061import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; 062import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp; 063import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp; 064import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; 065import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp; 066import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp; 067import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; 068import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; 069import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp; 070import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp; 071import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op; 072import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op; 073import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp; 074import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp; 075import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; 076import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; 077import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; 078import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; 079import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; 080import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; 081import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; 082import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 083import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 084import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 085import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 086import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 087import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 088import org.apache.hadoop.hdfs.util.ChunkedArrayList; 089import org.apache.hadoop.hdfs.util.Holder; 090import org.apache.jasper.tagplugins.jstl.core.Remove; 091 092import com.google.common.base.Joiner; 093import com.google.common.base.Preconditions; 094 095@InterfaceAudience.Private 096@InterfaceStability.Evolving 097public class FSEditLogLoader { 098 static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); 099 static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec 100 private final FSNamesystem fsNamesys; 101 private long lastAppliedTxId; 102 103 public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) { 104 this.fsNamesys = fsNamesys; 105 this.lastAppliedTxId = lastAppliedTxId; 106 } 107 108 /** 109 * Load an edit log, and apply the changes to the in-memory structure 110 * This is where we apply edits that we've been writing to disk all 111 * along. 112 */ 113 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, 114 MetaRecoveryContext recovery) throws IOException { 115 StartupProgress prog = NameNode.getStartupProgress(); 116 Step step = createStartupProgressStep(edits); 117 prog.beginStep(Phase.LOADING_EDITS, step); 118 fsNamesys.writeLock(); 119 try { 120 long startTime = now(); 121 FSImage.LOG.info("Start loading edits file " + edits.getName()); 122 long numEdits = loadEditRecords(edits, false, 123 expectedStartingTxId, recovery); 124 FSImage.LOG.info("Edits file " + edits.getName() 125 + " of size " + edits.length() + " edits # " + numEdits 126 + " loaded in " + (now()-startTime)/1000 + " seconds"); 127 return numEdits; 128 } finally { 129 edits.close(); 130 fsNamesys.writeUnlock(); 131 prog.endStep(Phase.LOADING_EDITS, step); 132 } 133 } 134 135 long loadEditRecords(EditLogInputStream in, boolean closeOnExit, 136 long expectedStartingTxId, MetaRecoveryContext recovery) 137 throws IOException { 138 FSDirectory fsDir = fsNamesys.dir; 139 140 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = 141 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class); 142 143 if (LOG.isTraceEnabled()) { 144 LOG.trace("Acquiring write lock to replay edit log"); 145 } 146 147 fsNamesys.writeLock(); 148 fsDir.writeLock(); 149 150 long recentOpcodeOffsets[] = new long[4]; 151 Arrays.fill(recentOpcodeOffsets, -1); 152 153 long expectedTxId = expectedStartingTxId; 154 long numEdits = 0; 155 long lastTxId = in.getLastTxId(); 156 long numTxns = (lastTxId - expectedStartingTxId) + 1; 157 StartupProgress prog = NameNode.getStartupProgress(); 158 Step step = createStartupProgressStep(in); 159 prog.setTotal(Phase.LOADING_EDITS, step, numTxns); 160 Counter counter = prog.getCounter(Phase.LOADING_EDITS, step); 161 long lastLogTime = now(); 162 long lastInodeId = fsNamesys.getLastInodeId(); 163 164 try { 165 while (true) { 166 try { 167 FSEditLogOp op; 168 try { 169 op = in.readOp(); 170 if (op == null) { 171 break; 172 } 173 } catch (Throwable e) { 174 // Handle a problem with our input 175 check203UpgradeFailure(in.getVersion(), e); 176 String errorMessage = 177 formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId); 178 FSImage.LOG.error(errorMessage, e); 179 if (recovery == null) { 180 // We will only try to skip over problematic opcodes when in 181 // recovery mode. 182 throw new EditLogInputException(errorMessage, e, numEdits); 183 } 184 MetaRecoveryContext.editLogLoaderPrompt( 185 "We failed to read txId " + expectedTxId, 186 recovery, "skipping the bad section in the log"); 187 in.resync(); 188 continue; 189 } 190 recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] = 191 in.getPosition(); 192 if (op.hasTransactionId()) { 193 if (op.getTransactionId() > expectedTxId) { 194 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 195 "to be a gap in the edit log. We expected txid " + 196 expectedTxId + ", but got txid " + 197 op.getTransactionId() + ".", recovery, "ignoring missing " + 198 " transaction IDs"); 199 } else if (op.getTransactionId() < expectedTxId) { 200 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 201 "to be an out-of-order edit in the edit log. We " + 202 "expected txid " + expectedTxId + ", but got txid " + 203 op.getTransactionId() + ".", recovery, 204 "skipping the out-of-order edit"); 205 continue; 206 } 207 } 208 try { 209 long inodeId = applyEditLogOp(op, fsDir, in.getVersion(), lastInodeId); 210 if (lastInodeId < inodeId) { 211 lastInodeId = inodeId; 212 } 213 } catch (Throwable e) { 214 LOG.error("Encountered exception on operation " + op, e); 215 MetaRecoveryContext.editLogLoaderPrompt("Failed to " + 216 "apply edit log operation " + op + ": error " + 217 e.getMessage(), recovery, "applying edits"); 218 } 219 // Now that the operation has been successfully decoded and 220 // applied, update our bookkeeping. 221 incrOpCount(op.opCode, opCounts, step, counter); 222 if (op.hasTransactionId()) { 223 lastAppliedTxId = op.getTransactionId(); 224 expectedTxId = lastAppliedTxId + 1; 225 } else { 226 expectedTxId = lastAppliedTxId = expectedStartingTxId; 227 } 228 // log progress 229 if (op.hasTransactionId()) { 230 long now = now(); 231 if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { 232 long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1; 233 int percent = Math.round((float) deltaTxId / numTxns * 100); 234 LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns 235 + " transactions completed. (" + percent + "%)"); 236 lastLogTime = now; 237 } 238 } 239 numEdits++; 240 } catch (MetaRecoveryContext.RequestStopException e) { 241 MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + 242 in.getPosition() + "/" + in.length()); 243 break; 244 } 245 } 246 } finally { 247 fsNamesys.resetLastInodeId(lastInodeId); 248 if(closeOnExit) { 249 in.close(); 250 } 251 fsDir.writeUnlock(); 252 fsNamesys.writeUnlock(); 253 254 if (LOG.isTraceEnabled()) { 255 LOG.trace("replaying edit log finished"); 256 } 257 258 if (FSImage.LOG.isDebugEnabled()) { 259 dumpOpCounts(opCounts); 260 } 261 } 262 return numEdits; 263 } 264 265 // allocate and update last allocated inode id 266 private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion, 267 long lastInodeId) throws IOException { 268 long inodeId = inodeIdFromOp; 269 270 if (inodeId == INodeId.GRANDFATHER_INODE_ID) { 271 if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { 272 throw new IOException("The layout version " + logVersion 273 + " supports inodeId but gave bogus inodeId"); 274 } 275 inodeId = fsNamesys.allocateNewInodeId(); 276 } else { 277 // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from 278 // fsimage but editlog captures more recent inodeId allocations 279 if (inodeId > lastInodeId) { 280 fsNamesys.resetLastInodeId(inodeId); 281 } 282 } 283 return inodeId; 284 } 285 286 @SuppressWarnings("deprecation") 287 private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, 288 int logVersion, long lastInodeId) throws IOException { 289 long inodeId = INodeId.GRANDFATHER_INODE_ID; 290 if (LOG.isTraceEnabled()) { 291 LOG.trace("replaying edit log: " + op); 292 } 293 final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds(); 294 295 switch (op.opCode) { 296 case OP_ADD: { 297 AddCloseOp addCloseOp = (AddCloseOp)op; 298 if (FSNamesystem.LOG.isDebugEnabled()) { 299 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 300 " numblocks : " + addCloseOp.blocks.length + 301 " clientHolder " + addCloseOp.clientName + 302 " clientMachine " + addCloseOp.clientMachine); 303 } 304 // There three cases here: 305 // 1. OP_ADD to create a new file 306 // 2. OP_ADD to update file blocks 307 // 3. OP_ADD to open file for append 308 309 // See if the file already exists (persistBlocks call) 310 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path); 311 final INodeFile oldFile = INodeFile.valueOf( 312 iip.getINode(0), addCloseOp.path, true); 313 INodeFile newFile = oldFile; 314 if (oldFile == null) { // this is OP_ADD on a new file (case 1) 315 // versions > 0 support per file replication 316 // get name and replication 317 final short replication = fsNamesys.getBlockManager() 318 .adjustReplication(addCloseOp.replication); 319 assert addCloseOp.blocks.length == 0; 320 321 // add to the file tree 322 inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion, 323 lastInodeId); 324 newFile = fsDir.unprotectedAddFile(inodeId, 325 addCloseOp.path, addCloseOp.permissions, replication, 326 addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true, 327 addCloseOp.clientName, addCloseOp.clientMachine); 328 fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path); 329 330 // add the op into retry cache if necessary 331 if (toAddRetryCache) { 332 HdfsFileStatus stat = fsNamesys.dir.createFileStatus( 333 HdfsFileStatus.EMPTY_NAME, newFile, null); 334 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 335 addCloseOp.rpcCallId, stat); 336 } 337 } else { // This is OP_ADD on an existing file 338 if (!oldFile.isUnderConstruction()) { 339 // This is case 3: a call to append() on an already-closed file. 340 if (FSNamesystem.LOG.isDebugEnabled()) { 341 FSNamesystem.LOG.debug("Reopening an already-closed file " + 342 "for append"); 343 } 344 LocatedBlock lb = fsNamesys.prepareFileForWrite(addCloseOp.path, 345 oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null, 346 false, iip.getLatestSnapshot(), false); 347 newFile = INodeFile.valueOf(fsDir.getINode(addCloseOp.path), 348 addCloseOp.path, true); 349 350 // add the op into retry cache is necessary 351 if (toAddRetryCache) { 352 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 353 addCloseOp.rpcCallId, lb); 354 } 355 } 356 } 357 // Fall-through for case 2. 358 // Regardless of whether it's a new file or an updated file, 359 // update the block list. 360 361 // Update the salient file attributes. 362 newFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap()); 363 newFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap()); 364 updateBlocks(fsDir, addCloseOp, newFile); 365 break; 366 } 367 case OP_CLOSE: { 368 AddCloseOp addCloseOp = (AddCloseOp)op; 369 370 if (FSNamesystem.LOG.isDebugEnabled()) { 371 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 372 " numblocks : " + addCloseOp.blocks.length + 373 " clientHolder " + addCloseOp.clientName + 374 " clientMachine " + addCloseOp.clientMachine); 375 } 376 377 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path); 378 final INodeFile oldFile = INodeFile.valueOf(iip.getINode(0), addCloseOp.path); 379 380 // Update the salient file attributes. 381 oldFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap()); 382 oldFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap()); 383 updateBlocks(fsDir, addCloseOp, oldFile); 384 385 // Now close the file 386 if (!oldFile.isUnderConstruction() && 387 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { 388 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE 389 // could show up twice in a row. But after that version, this 390 // should be fixed, so we should treat it as an error. 391 throw new IOException( 392 "File is not under construction: " + addCloseOp.path); 393 } 394 // One might expect that you could use removeLease(holder, path) here, 395 // but OP_CLOSE doesn't serialize the holder. So, remove by path. 396 if (oldFile.isUnderConstruction()) { 397 INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile; 398 fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path); 399 INodeFile newFile = ucFile.toINodeFile(ucFile.getModificationTime()); 400 fsDir.unprotectedReplaceINodeFile(addCloseOp.path, ucFile, newFile); 401 } 402 break; 403 } 404 case OP_UPDATE_BLOCKS: { 405 UpdateBlocksOp updateOp = (UpdateBlocksOp)op; 406 if (FSNamesystem.LOG.isDebugEnabled()) { 407 FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path + 408 " numblocks : " + updateOp.blocks.length); 409 } 410 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(updateOp.path), 411 updateOp.path); 412 // Update in-memory data structures 413 updateBlocks(fsDir, updateOp, oldFile); 414 415 if (toAddRetryCache) { 416 fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId); 417 } 418 break; 419 } 420 case OP_ADD_BLOCK: { 421 AddBlockOp addBlockOp = (AddBlockOp) op; 422 String path = addBlockOp.getPath(); 423 if (FSNamesystem.LOG.isDebugEnabled()) { 424 FSNamesystem.LOG.debug(op.opCode + ": " + path + 425 " new block id : " + addBlockOp.getLastBlock().getBlockId()); 426 } 427 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path); 428 // add the new block to the INodeFile 429 addNewBlock(fsDir, addBlockOp, oldFile); 430 break; 431 } 432 case OP_SET_REPLICATION: { 433 SetReplicationOp setReplicationOp = (SetReplicationOp)op; 434 short replication = fsNamesys.getBlockManager().adjustReplication( 435 setReplicationOp.replication); 436 fsDir.unprotectedSetReplication(setReplicationOp.path, 437 replication, null); 438 break; 439 } 440 case OP_CONCAT_DELETE: { 441 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op; 442 fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs, 443 concatDeleteOp.timestamp); 444 445 if (toAddRetryCache) { 446 fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId, 447 concatDeleteOp.rpcCallId); 448 } 449 break; 450 } 451 case OP_RENAME_OLD: { 452 RenameOldOp renameOp = (RenameOldOp)op; 453 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 454 renameOp.timestamp); 455 456 if (toAddRetryCache) { 457 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 458 } 459 break; 460 } 461 case OP_DELETE: { 462 DeleteOp deleteOp = (DeleteOp)op; 463 fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp); 464 465 if (toAddRetryCache) { 466 fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId); 467 } 468 break; 469 } 470 case OP_MKDIR: { 471 MkdirOp mkdirOp = (MkdirOp)op; 472 inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion, 473 lastInodeId); 474 fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions, 475 mkdirOp.timestamp); 476 break; 477 } 478 case OP_SET_GENSTAMP_V1: { 479 SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op; 480 fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1); 481 break; 482 } 483 case OP_SET_PERMISSIONS: { 484 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op; 485 fsDir.unprotectedSetPermission(setPermissionsOp.src, 486 setPermissionsOp.permissions); 487 break; 488 } 489 case OP_SET_OWNER: { 490 SetOwnerOp setOwnerOp = (SetOwnerOp)op; 491 fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username, 492 setOwnerOp.groupname); 493 break; 494 } 495 case OP_SET_NS_QUOTA: { 496 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op; 497 fsDir.unprotectedSetQuota(setNSQuotaOp.src, 498 setNSQuotaOp.nsQuota, 499 HdfsConstants.QUOTA_DONT_SET); 500 break; 501 } 502 case OP_CLEAR_NS_QUOTA: { 503 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op; 504 fsDir.unprotectedSetQuota(clearNSQuotaOp.src, 505 HdfsConstants.QUOTA_RESET, 506 HdfsConstants.QUOTA_DONT_SET); 507 break; 508 } 509 510 case OP_SET_QUOTA: 511 SetQuotaOp setQuotaOp = (SetQuotaOp)op; 512 fsDir.unprotectedSetQuota(setQuotaOp.src, 513 setQuotaOp.nsQuota, 514 setQuotaOp.dsQuota); 515 break; 516 517 case OP_TIMES: { 518 TimesOp timesOp = (TimesOp)op; 519 520 fsDir.unprotectedSetTimes(timesOp.path, 521 timesOp.mtime, 522 timesOp.atime, true); 523 break; 524 } 525 case OP_SYMLINK: { 526 if (!FileSystem.areSymlinksEnabled()) { 527 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS"); 528 } 529 SymlinkOp symlinkOp = (SymlinkOp)op; 530 inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion, 531 lastInodeId); 532 fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path, 533 symlinkOp.value, symlinkOp.mtime, 534 symlinkOp.atime, symlinkOp.permissionStatus); 535 536 if (toAddRetryCache) { 537 fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId); 538 } 539 break; 540 } 541 case OP_RENAME: { 542 RenameOp renameOp = (RenameOp)op; 543 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 544 renameOp.timestamp, renameOp.options); 545 546 if (toAddRetryCache) { 547 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 548 } 549 break; 550 } 551 case OP_GET_DELEGATION_TOKEN: { 552 GetDelegationTokenOp getDelegationTokenOp 553 = (GetDelegationTokenOp)op; 554 555 fsNamesys.getDelegationTokenSecretManager() 556 .addPersistedDelegationToken(getDelegationTokenOp.token, 557 getDelegationTokenOp.expiryTime); 558 break; 559 } 560 case OP_RENEW_DELEGATION_TOKEN: { 561 RenewDelegationTokenOp renewDelegationTokenOp 562 = (RenewDelegationTokenOp)op; 563 fsNamesys.getDelegationTokenSecretManager() 564 .updatePersistedTokenRenewal(renewDelegationTokenOp.token, 565 renewDelegationTokenOp.expiryTime); 566 break; 567 } 568 case OP_CANCEL_DELEGATION_TOKEN: { 569 CancelDelegationTokenOp cancelDelegationTokenOp 570 = (CancelDelegationTokenOp)op; 571 fsNamesys.getDelegationTokenSecretManager() 572 .updatePersistedTokenCancellation( 573 cancelDelegationTokenOp.token); 574 break; 575 } 576 case OP_UPDATE_MASTER_KEY: { 577 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op; 578 fsNamesys.getDelegationTokenSecretManager() 579 .updatePersistedMasterKey(updateMasterKeyOp.key); 580 break; 581 } 582 case OP_REASSIGN_LEASE: { 583 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op; 584 585 Lease lease = fsNamesys.leaseManager.getLease( 586 reassignLeaseOp.leaseHolder); 587 INodeFileUnderConstruction pendingFile = 588 INodeFileUnderConstruction.valueOf( 589 fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path); 590 fsNamesys.reassignLeaseInternal(lease, 591 reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile); 592 break; 593 } 594 case OP_START_LOG_SEGMENT: 595 case OP_END_LOG_SEGMENT: { 596 // no data in here currently. 597 break; 598 } 599 case OP_CREATE_SNAPSHOT: { 600 CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op; 601 String path = fsNamesys.getSnapshotManager().createSnapshot( 602 createSnapshotOp.snapshotRoot, createSnapshotOp.snapshotName); 603 if (toAddRetryCache) { 604 fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId, 605 createSnapshotOp.rpcCallId, path); 606 } 607 break; 608 } 609 case OP_DELETE_SNAPSHOT: { 610 DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op; 611 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 612 List<INode> removedINodes = new ChunkedArrayList<INode>(); 613 fsNamesys.getSnapshotManager().deleteSnapshot( 614 deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, 615 collectedBlocks, removedINodes); 616 fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks); 617 collectedBlocks.clear(); 618 fsNamesys.dir.removeFromInodeMap(removedINodes); 619 removedINodes.clear(); 620 621 if (toAddRetryCache) { 622 fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId, 623 deleteSnapshotOp.rpcCallId); 624 } 625 break; 626 } 627 case OP_RENAME_SNAPSHOT: { 628 RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op; 629 fsNamesys.getSnapshotManager().renameSnapshot( 630 renameSnapshotOp.snapshotRoot, renameSnapshotOp.snapshotOldName, 631 renameSnapshotOp.snapshotNewName); 632 633 if (toAddRetryCache) { 634 fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId, 635 renameSnapshotOp.rpcCallId); 636 } 637 break; 638 } 639 case OP_ALLOW_SNAPSHOT: { 640 AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op; 641 fsNamesys.getSnapshotManager().setSnapshottable( 642 allowSnapshotOp.snapshotRoot, false); 643 break; 644 } 645 case OP_DISALLOW_SNAPSHOT: { 646 DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op; 647 fsNamesys.getSnapshotManager().resetSnapshottable( 648 disallowSnapshotOp.snapshotRoot); 649 break; 650 } 651 case OP_SET_GENSTAMP_V2: { 652 SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op; 653 fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2); 654 break; 655 } 656 case OP_ALLOCATE_BLOCK_ID: { 657 AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op; 658 fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId); 659 break; 660 } 661 case OP_ADD_CACHE_DIRECTIVE: { 662 AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op; 663 CacheDirectiveInfo result = fsNamesys. 664 getCacheManager().addDirectiveFromEditLog(addOp.directive); 665 if (toAddRetryCache) { 666 Long id = result.getId(); 667 fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id); 668 } 669 break; 670 } 671 case OP_MODIFY_CACHE_DIRECTIVE: { 672 ModifyCacheDirectiveInfoOp modifyOp = 673 (ModifyCacheDirectiveInfoOp) op; 674 fsNamesys.getCacheManager().modifyDirectiveFromEditLog( 675 modifyOp.directive); 676 if (toAddRetryCache) { 677 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 678 } 679 break; 680 } 681 case OP_REMOVE_CACHE_DIRECTIVE: { 682 RemoveCacheDirectiveInfoOp removeOp = 683 (RemoveCacheDirectiveInfoOp) op; 684 fsNamesys.getCacheManager().removeDirective(removeOp.id, null); 685 if (toAddRetryCache) { 686 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 687 } 688 break; 689 } 690 case OP_ADD_CACHE_POOL: { 691 AddCachePoolOp addOp = (AddCachePoolOp) op; 692 fsNamesys.getCacheManager().addCachePool(addOp.info); 693 if (toAddRetryCache) { 694 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 695 } 696 break; 697 } 698 case OP_MODIFY_CACHE_POOL: { 699 ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op; 700 fsNamesys.getCacheManager().modifyCachePool(modifyOp.info); 701 if (toAddRetryCache) { 702 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 703 } 704 break; 705 } 706 case OP_REMOVE_CACHE_POOL: { 707 RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op; 708 fsNamesys.getCacheManager().removeCachePool(removeOp.poolName); 709 if (toAddRetryCache) { 710 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 711 } 712 break; 713 } 714 default: 715 throw new IOException("Invalid operation read " + op.opCode); 716 } 717 return inodeId; 718 } 719 720 private static String formatEditLogReplayError(EditLogInputStream in, 721 long recentOpcodeOffsets[], long txid) { 722 StringBuilder sb = new StringBuilder(); 723 sb.append("Error replaying edit log at offset " + in.getPosition()); 724 sb.append(". Expected transaction ID was ").append(txid); 725 if (recentOpcodeOffsets[0] != -1) { 726 Arrays.sort(recentOpcodeOffsets); 727 sb.append("\nRecent opcode offsets:"); 728 for (long offset : recentOpcodeOffsets) { 729 if (offset != -1) { 730 sb.append(' ').append(offset); 731 } 732 } 733 } 734 return sb.toString(); 735 } 736 737 /** 738 * Add a new block into the given INodeFile 739 */ 740 private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file) 741 throws IOException { 742 BlockInfo[] oldBlocks = file.getBlocks(); 743 Block pBlock = op.getPenultimateBlock(); 744 Block newBlock= op.getLastBlock(); 745 746 if (pBlock != null) { // the penultimate block is not null 747 Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0); 748 // compare pBlock with the last block of oldBlocks 749 Block oldLastBlock = oldBlocks[oldBlocks.length - 1]; 750 if (oldLastBlock.getBlockId() != pBlock.getBlockId() 751 || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) { 752 throw new IOException( 753 "Mismatched block IDs or generation stamps for the old last block of file " 754 + op.getPath() + ", the old last block is " + oldLastBlock 755 + ", and the block read from editlog is " + pBlock); 756 } 757 758 oldLastBlock.setNumBytes(pBlock.getNumBytes()); 759 if (oldLastBlock instanceof BlockInfoUnderConstruction) { 760 fsNamesys.getBlockManager().forceCompleteBlock( 761 (INodeFileUnderConstruction) file, 762 (BlockInfoUnderConstruction) oldLastBlock); 763 fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock); 764 } 765 } else { // the penultimate block is null 766 Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0); 767 } 768 // add the new block 769 BlockInfo newBI = new BlockInfoUnderConstruction( 770 newBlock, file.getBlockReplication()); 771 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 772 file.addBlock(newBI); 773 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 774 } 775 776 /** 777 * Update in-memory data structures with new block information. 778 * @throws IOException 779 */ 780 private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op, 781 INodeFile file) throws IOException { 782 // Update its block list 783 BlockInfo[] oldBlocks = file.getBlocks(); 784 Block[] newBlocks = op.getBlocks(); 785 String path = op.getPath(); 786 787 // Are we only updating the last block's gen stamp. 788 boolean isGenStampUpdate = oldBlocks.length == newBlocks.length; 789 790 // First, update blocks in common 791 for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) { 792 BlockInfo oldBlock = oldBlocks[i]; 793 Block newBlock = newBlocks[i]; 794 795 boolean isLastBlock = i == newBlocks.length - 1; 796 if (oldBlock.getBlockId() != newBlock.getBlockId() || 797 (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 798 !(isGenStampUpdate && isLastBlock))) { 799 throw new IOException("Mismatched block IDs or generation stamps, " + 800 "attempting to replace block " + oldBlock + " with " + newBlock + 801 " as block # " + i + "/" + newBlocks.length + " of " + 802 path); 803 } 804 805 oldBlock.setNumBytes(newBlock.getNumBytes()); 806 boolean changeMade = 807 oldBlock.getGenerationStamp() != newBlock.getGenerationStamp(); 808 oldBlock.setGenerationStamp(newBlock.getGenerationStamp()); 809 810 if (oldBlock instanceof BlockInfoUnderConstruction && 811 (!isLastBlock || op.shouldCompleteLastBlock())) { 812 changeMade = true; 813 fsNamesys.getBlockManager().forceCompleteBlock( 814 (INodeFileUnderConstruction)file, 815 (BlockInfoUnderConstruction)oldBlock); 816 } 817 if (changeMade) { 818 // The state or gen-stamp of the block has changed. So, we may be 819 // able to process some messages from datanodes that we previously 820 // were unable to process. 821 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 822 } 823 } 824 825 if (newBlocks.length < oldBlocks.length) { 826 // We're removing a block from the file, e.g. abandonBlock(...) 827 if (!file.isUnderConstruction()) { 828 throw new IOException("Trying to remove a block from file " + 829 path + " which is not under construction."); 830 } 831 if (newBlocks.length != oldBlocks.length - 1) { 832 throw new IOException("Trying to remove more than one block from file " 833 + path); 834 } 835 Block oldBlock = oldBlocks[oldBlocks.length - 1]; 836 boolean removed = fsDir.unprotectedRemoveBlock(path, 837 (INodeFileUnderConstruction) file, oldBlock); 838 if (!removed && !(op instanceof UpdateBlocksOp)) { 839 throw new IOException("Trying to delete non-existant block " + oldBlock); 840 } 841 } else if (newBlocks.length > oldBlocks.length) { 842 // We're adding blocks 843 for (int i = oldBlocks.length; i < newBlocks.length; i++) { 844 Block newBlock = newBlocks[i]; 845 BlockInfo newBI; 846 if (!op.shouldCompleteLastBlock()) { 847 // TODO: shouldn't this only be true for the last block? 848 // what about an old-version fsync() where fsync isn't called 849 // until several blocks in? 850 newBI = new BlockInfoUnderConstruction( 851 newBlock, file.getBlockReplication()); 852 } else { 853 // OP_CLOSE should add finalized blocks. This code path 854 // is only executed when loading edits written by prior 855 // versions of Hadoop. Current versions always log 856 // OP_ADD operations as each block is allocated. 857 newBI = new BlockInfo(newBlock, file.getBlockReplication()); 858 } 859 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 860 file.addBlock(newBI); 861 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 862 } 863 } 864 } 865 866 private static void dumpOpCounts( 867 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 868 StringBuilder sb = new StringBuilder(); 869 sb.append("Summary of operations loaded from edit log:\n "); 870 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts); 871 FSImage.LOG.debug(sb.toString()); 872 } 873 874 private void incrOpCount(FSEditLogOpCodes opCode, 875 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step, 876 Counter counter) { 877 Holder<Integer> holder = opCounts.get(opCode); 878 if (holder == null) { 879 holder = new Holder<Integer>(1); 880 opCounts.put(opCode, holder); 881 } else { 882 holder.held++; 883 } 884 counter.increment(); 885 } 886 887 /** 888 * Throw appropriate exception during upgrade from 203, when editlog loading 889 * could fail due to opcode conflicts. 890 */ 891 private void check203UpgradeFailure(int logVersion, Throwable e) 892 throws IOException { 893 // 0.20.203 version version has conflicting opcodes with the later releases. 894 // The editlog must be emptied by restarting the namenode, before proceeding 895 // with the upgrade. 896 if (Storage.is203LayoutVersion(logVersion) 897 && logVersion != HdfsConstants.LAYOUT_VERSION) { 898 String msg = "During upgrade failed to load the editlog version " 899 + logVersion + " from release 0.20.203. Please go back to the old " 900 + " release and restart the namenode. This empties the editlog " 901 + " and saves the namespace. Resume the upgrade after this step."; 902 throw new IOException(msg, e); 903 } 904 } 905 906 /** 907 * Find the last valid transaction ID in the stream. 908 * If there are invalid or corrupt transactions in the middle of the stream, 909 * validateEditLog will skip over them. 910 * This reads through the stream but does not close it. 911 * 912 * @throws IOException if the stream cannot be read due to an IO error (eg 913 * if the log does not exist) 914 */ 915 static EditLogValidation validateEditLog(EditLogInputStream in) { 916 long lastPos = 0; 917 long lastTxId = HdfsConstants.INVALID_TXID; 918 long numValid = 0; 919 FSEditLogOp op = null; 920 while (true) { 921 lastPos = in.getPosition(); 922 try { 923 if ((op = in.readOp()) == null) { 924 break; 925 } 926 } catch (Throwable t) { 927 FSImage.LOG.warn("Caught exception after reading " + numValid + 928 " ops from " + in + " while determining its valid length." + 929 "Position was " + lastPos, t); 930 in.resync(); 931 FSImage.LOG.warn("After resync, position is " + in.getPosition()); 932 continue; 933 } 934 if (lastTxId == HdfsConstants.INVALID_TXID 935 || op.getTransactionId() > lastTxId) { 936 lastTxId = op.getTransactionId(); 937 } 938 numValid++; 939 } 940 return new EditLogValidation(lastPos, lastTxId, false); 941 } 942 943 static class EditLogValidation { 944 private final long validLength; 945 private final long endTxId; 946 private final boolean hasCorruptHeader; 947 948 EditLogValidation(long validLength, long endTxId, 949 boolean hasCorruptHeader) { 950 this.validLength = validLength; 951 this.endTxId = endTxId; 952 this.hasCorruptHeader = hasCorruptHeader; 953 } 954 955 long getValidLength() { return validLength; } 956 957 long getEndTxId() { return endTxId; } 958 959 boolean hasCorruptHeader() { return hasCorruptHeader; } 960 } 961 962 /** 963 * Stream wrapper that keeps track of the current stream position. 964 * 965 * This stream also allows us to set a limit on how many bytes we can read 966 * without getting an exception. 967 */ 968 public static class PositionTrackingInputStream extends FilterInputStream 969 implements StreamLimiter { 970 private long curPos = 0; 971 private long markPos = -1; 972 private long limitPos = Long.MAX_VALUE; 973 974 public PositionTrackingInputStream(InputStream is) { 975 super(is); 976 } 977 978 private void checkLimit(long amt) throws IOException { 979 long extra = (curPos + amt) - limitPos; 980 if (extra > 0) { 981 throw new IOException("Tried to read " + amt + " byte(s) past " + 982 "the limit at offset " + limitPos); 983 } 984 } 985 986 @Override 987 public int read() throws IOException { 988 checkLimit(1); 989 int ret = super.read(); 990 if (ret != -1) curPos++; 991 return ret; 992 } 993 994 @Override 995 public int read(byte[] data) throws IOException { 996 checkLimit(data.length); 997 int ret = super.read(data); 998 if (ret > 0) curPos += ret; 999 return ret; 1000 } 1001 1002 @Override 1003 public int read(byte[] data, int offset, int length) throws IOException { 1004 checkLimit(length); 1005 int ret = super.read(data, offset, length); 1006 if (ret > 0) curPos += ret; 1007 return ret; 1008 } 1009 1010 @Override 1011 public void setLimit(long limit) { 1012 limitPos = curPos + limit; 1013 } 1014 1015 @Override 1016 public void clearLimit() { 1017 limitPos = Long.MAX_VALUE; 1018 } 1019 1020 @Override 1021 public void mark(int limit) { 1022 super.mark(limit); 1023 markPos = curPos; 1024 } 1025 1026 @Override 1027 public void reset() throws IOException { 1028 if (markPos == -1) { 1029 throw new IOException("Not marked!"); 1030 } 1031 super.reset(); 1032 curPos = markPos; 1033 markPos = -1; 1034 } 1035 1036 public long getPos() { 1037 return curPos; 1038 } 1039 1040 @Override 1041 public long skip(long amt) throws IOException { 1042 long extra = (curPos + amt) - limitPos; 1043 if (extra > 0) { 1044 throw new IOException("Tried to skip " + extra + " bytes past " + 1045 "the limit at offset " + limitPos); 1046 } 1047 long ret = super.skip(amt); 1048 curPos += ret; 1049 return ret; 1050 } 1051 } 1052 1053 public long getLastAppliedTxId() { 1054 return lastAppliedTxId; 1055 } 1056 1057 /** 1058 * Creates a Step used for updating startup progress, populated with 1059 * information from the given edits. The step always includes the log's name. 1060 * If the log has a known length, then the length is included in the step too. 1061 * 1062 * @param edits EditLogInputStream to use for populating step 1063 * @return Step populated with information from edits 1064 * @throws IOException thrown if there is an I/O error 1065 */ 1066 private static Step createStartupProgressStep(EditLogInputStream edits) 1067 throws IOException { 1068 long length = edits.length(); 1069 String name = edits.getCurrentStreamName(); 1070 return length != -1 ? new Step(name, length) : new Step(name); 1071 } 1072}