001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
021import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
022import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
023
024import java.io.File;
025import java.io.IOException;
026import java.io.PrintStream;
027import java.net.InetSocketAddress;
028import java.net.URI;
029import java.security.PrivilegedExceptionAction;
030import java.util.ArrayList;
031import java.util.Arrays;
032import java.util.Collection;
033import java.util.Iterator;
034import java.util.List;
035
036import javax.management.ObjectName;
037
038import org.apache.commons.logging.Log;
039import org.apache.commons.logging.LogFactory;
040import org.apache.hadoop.HadoopIllegalArgumentException;
041import org.apache.hadoop.classification.InterfaceAudience;
042import org.apache.hadoop.conf.Configuration;
043import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
044import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
045import org.apache.hadoop.ha.HAServiceStatus;
046import org.apache.hadoop.ha.HealthCheckFailedException;
047import org.apache.hadoop.ha.ServiceFailedException;
048import org.apache.hadoop.fs.FileSystem;
049import org.apache.hadoop.fs.FileUtil;
050import org.apache.hadoop.fs.Trash;
051
052import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
053import static org.apache.hadoop.util.ExitUtil.terminate;
054import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
055
056import org.apache.hadoop.hdfs.DFSConfigKeys;
057import org.apache.hadoop.hdfs.DFSUtil;
058import org.apache.hadoop.hdfs.HAUtil;
059import org.apache.hadoop.hdfs.HdfsConfiguration;
060import org.apache.hadoop.hdfs.protocol.ClientProtocol;
061import org.apache.hadoop.hdfs.protocol.HdfsConstants;
062import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
063import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
064import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
065import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
066import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
067import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
068import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
069import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
070import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
071import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
072import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
073import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
074import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
075import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
076import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
077import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
078import org.apache.hadoop.ipc.Server;
079import org.apache.hadoop.ipc.StandbyException;
080import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
081import org.apache.hadoop.metrics2.util.MBeans;
082import org.apache.hadoop.net.NetUtils;
083import org.apache.hadoop.security.AccessControlException;
084import org.apache.hadoop.security.RefreshUserMappingsProtocol;
085import org.apache.hadoop.security.SecurityUtil;
086import org.apache.hadoop.security.UserGroupInformation;
087import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
088import org.apache.hadoop.tools.GetUserMappingsProtocol;
089import org.apache.hadoop.util.ExitUtil.ExitException;
090import org.apache.hadoop.util.JvmPauseMonitor;
091import org.apache.hadoop.util.ServicePlugin;
092import org.apache.hadoop.util.StringUtils;
093
094import com.google.common.annotations.VisibleForTesting;
095import com.google.common.base.Joiner;
096import com.google.common.base.Preconditions;
097import com.google.common.collect.Lists;
098
099/**********************************************************
100 * NameNode serves as both directory namespace manager and
101 * "inode table" for the Hadoop DFS.  There is a single NameNode
102 * running in any DFS deployment.  (Well, except when there
103 * is a second backup/failover NameNode, or when using federated NameNodes.)
104 *
105 * The NameNode controls two critical tables:
106 *   1)  filename->blocksequence (namespace)
107 *   2)  block->machinelist ("inodes")
108 *
109 * The first table is stored on disk and is very precious.
110 * The second table is rebuilt every time the NameNode comes up.
111 *
112 * 'NameNode' refers to both this class as well as the 'NameNode server'.
113 * The 'FSNamesystem' class actually performs most of the filesystem
114 * management.  The majority of the 'NameNode' class itself is concerned
115 * with exposing the IPC interface and the HTTP server to the outside world,
116 * plus some configuration management.
117 *
118 * NameNode implements the
119 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
120 * allows clients to ask for DFS services.
121 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
122 * direct use by authors of DFS client code.  End-users should instead use the
123 * {@link org.apache.hadoop.fs.FileSystem} class.
124 *
125 * NameNode also implements the
126 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
127 * used by DataNodes that actually store DFS data blocks.  These
128 * methods are invoked repeatedly and automatically by all the
129 * DataNodes in a DFS deployment.
130 *
131 * NameNode also implements the
132 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
133 * used by secondary namenodes or rebalancing processes to get partial
134 * NameNode state, for example partial blocksMap etc.
135 **********************************************************/
136@InterfaceAudience.Private
137public class NameNode implements NameNodeStatusMXBean {
138  static{
139    HdfsConfiguration.init();
140  }
141  
142  /**
143   * Categories of operations supported by the namenode.
144   */
145  public static enum OperationCategory {
146    /** Operations that are state agnostic */
147    UNCHECKED,
148    /** Read operation that does not change the namespace state */
149    READ,
150    /** Write operation that changes the namespace state */
151    WRITE,
152    /** Operations related to checkpointing */
153    CHECKPOINT,
154    /** Operations related to {@link JournalProtocol} */
155    JOURNAL
156  }
157  
158  /**
159   * HDFS configuration can have three types of parameters:
160   * <ol>
161   * <li>Parameters that are common for all the name services in the cluster.</li>
162   * <li>Parameters that are specific to a name service. These keys are suffixed
163   * with nameserviceId in the configuration. For example,
164   * "dfs.namenode.rpc-address.nameservice1".</li>
165   * <li>Parameters that are specific to a single name node. These keys are suffixed
166   * with nameserviceId and namenodeId in the configuration. for example,
167   * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
168   * </ol>
169   * 
170   * In the latter cases, operators may specify the configuration without
171   * any suffix, with a nameservice suffix, or with a nameservice and namenode
172   * suffix. The more specific suffix will take precedence.
173   * 
174   * These keys are specific to a given namenode, and thus may be configured
175   * globally, for a nameservice, or for a specific namenode within a nameservice.
176   */
177  public static final String[] NAMENODE_SPECIFIC_KEYS = {
178    DFS_NAMENODE_RPC_ADDRESS_KEY,
179    DFS_NAMENODE_RPC_BIND_HOST_KEY,
180    DFS_NAMENODE_NAME_DIR_KEY,
181    DFS_NAMENODE_EDITS_DIR_KEY,
182    DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
183    DFS_NAMENODE_CHECKPOINT_DIR_KEY,
184    DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
185    DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
186    DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
187    DFS_NAMENODE_HTTP_ADDRESS_KEY,
188    DFS_NAMENODE_KEYTAB_FILE_KEY,
189    DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
190    DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
191    DFS_NAMENODE_BACKUP_ADDRESS_KEY,
192    DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
193    DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
194    DFS_NAMENODE_USER_NAME_KEY,
195    DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
196    DFS_HA_FENCE_METHODS_KEY,
197    DFS_HA_ZKFC_PORT_KEY,
198    DFS_HA_FENCE_METHODS_KEY
199  };
200  
201  /**
202   * @see #NAMENODE_SPECIFIC_KEYS
203   * These keys are specific to a nameservice, but may not be overridden
204   * for a specific namenode.
205   */
206  public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
207    DFS_HA_AUTO_FAILOVER_ENABLED_KEY
208  };
209  
210  private static final String USAGE = "Usage: java NameNode ["
211      + StartupOption.BACKUP.getName() + "] | ["
212      + StartupOption.CHECKPOINT.getName() + "] | ["
213      + StartupOption.FORMAT.getName() + " ["
214      + StartupOption.CLUSTERID.getName() + " cid ] ["
215      + StartupOption.FORCE.getName() + "] ["
216      + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
217      + StartupOption.UPGRADE.getName() + "] | ["
218      + StartupOption.ROLLBACK.getName() + "] | ["
219      + StartupOption.FINALIZE.getName() + "] | ["
220      + StartupOption.IMPORT.getName() + "] | ["
221      + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
222      + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
223      + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
224      + " ] ]";
225  
226  public long getProtocolVersion(String protocol, 
227                                 long clientVersion) throws IOException {
228    if (protocol.equals(ClientProtocol.class.getName())) {
229      return ClientProtocol.versionID; 
230    } else if (protocol.equals(DatanodeProtocol.class.getName())){
231      return DatanodeProtocol.versionID;
232    } else if (protocol.equals(NamenodeProtocol.class.getName())){
233      return NamenodeProtocol.versionID;
234    } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
235      return RefreshAuthorizationPolicyProtocol.versionID;
236    } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
237      return RefreshUserMappingsProtocol.versionID;
238    } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
239      return GetUserMappingsProtocol.versionID;
240    } else {
241      throw new IOException("Unknown protocol to name node: " + protocol);
242    }
243  }
244    
245  public static final int DEFAULT_PORT = 8020;
246  public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
247  public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
248  public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
249  public static final HAState ACTIVE_STATE = new ActiveState();
250  public static final HAState STANDBY_STATE = new StandbyState();
251  
252  protected FSNamesystem namesystem; 
253  protected final Configuration conf;
254  protected NamenodeRole role;
255  private volatile HAState state;
256  private final boolean haEnabled;
257  private final HAContext haContext;
258  protected boolean allowStaleStandbyReads;
259
260  
261  /** httpServer */
262  protected NameNodeHttpServer httpServer;
263  private Thread emptier;
264  /** only used for testing purposes  */
265  protected boolean stopRequested = false;
266  /** Registration information of this name-node  */
267  protected NamenodeRegistration nodeRegistration;
268  /** Activated plug-ins. */
269  private List<ServicePlugin> plugins;
270  
271  private NameNodeRpcServer rpcServer;
272
273  private JvmPauseMonitor pauseMonitor;
274  private ObjectName nameNodeStatusBeanName;
275  
276  /** Format a new filesystem.  Destroys any filesystem that may already
277   * exist at this location.  **/
278  public static void format(Configuration conf) throws IOException {
279    format(conf, true, true);
280  }
281
282  static NameNodeMetrics metrics;
283  private static final StartupProgress startupProgress = new StartupProgress();
284  /** Return the {@link FSNamesystem} object.
285   * @return {@link FSNamesystem} object.
286   */
287  public FSNamesystem getNamesystem() {
288    return namesystem;
289  }
290
291  public NamenodeProtocols getRpcServer() {
292    return rpcServer;
293  }
294  
295  static void initMetrics(Configuration conf, NamenodeRole role) {
296    metrics = NameNodeMetrics.create(conf, role);
297  }
298
299  public static NameNodeMetrics getNameNodeMetrics() {
300    return metrics;
301  }
302
303  /**
304   * Returns object used for reporting namenode startup progress.
305   * 
306   * @return StartupProgress for reporting namenode startup progress
307   */
308  public static StartupProgress getStartupProgress() {
309    return startupProgress;
310  }
311
312  public static InetSocketAddress getAddress(String address) {
313    return NetUtils.createSocketAddr(address, DEFAULT_PORT);
314  }
315  
316  /**
317   * Set the configuration property for the service rpc address
318   * to address
319   */
320  public static void setServiceAddress(Configuration conf,
321                                           String address) {
322    LOG.info("Setting ADDRESS " + address);
323    conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
324  }
325  
326  /**
327   * Fetches the address for services to use when connecting to namenode
328   * based on the value of fallback returns null if the special
329   * address is not specified or returns the default namenode address
330   * to be used by both clients and services.
331   * Services here are datanodes, backup node, any non client connection
332   */
333  public static InetSocketAddress getServiceAddress(Configuration conf,
334                                                        boolean fallback) {
335    String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
336    if (addr == null || addr.isEmpty()) {
337      return fallback ? getAddress(conf) : null;
338    }
339    return getAddress(addr);
340  }
341
342  public static InetSocketAddress getAddress(Configuration conf) {
343    URI filesystemURI = FileSystem.getDefaultUri(conf);
344    return getAddress(filesystemURI);
345  }
346
347
348  /**
349   * TODO:FEDERATION
350   * @param filesystemURI
351   * @return address of file system
352   */
353  public static InetSocketAddress getAddress(URI filesystemURI) {
354    String authority = filesystemURI.getAuthority();
355    if (authority == null) {
356      throw new IllegalArgumentException(String.format(
357          "Invalid URI for NameNode address (check %s): %s has no authority.",
358          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
359    }
360    if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
361        filesystemURI.getScheme())) {
362      throw new IllegalArgumentException(String.format(
363          "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
364          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
365          HdfsConstants.HDFS_URI_SCHEME));
366    }
367    return getAddress(authority);
368  }
369
370  public static URI getUri(InetSocketAddress namenode) {
371    int port = namenode.getPort();
372    String portString = port == DEFAULT_PORT ? "" : (":"+port);
373    return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
374        + namenode.getHostName()+portString);
375  }
376
377  //
378  // Common NameNode methods implementation for the active name-node role.
379  //
380  public NamenodeRole getRole() {
381    return role;
382  }
383
384  boolean isRole(NamenodeRole that) {
385    return role.equals(that);
386  }
387
388  /**
389   * Given a configuration get the address of the service rpc server
390   * If the service rpc is not configured returns null
391   */
392  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
393    return NameNode.getServiceAddress(conf, false);
394  }
395
396  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
397    return getAddress(conf);
398  }
399  
400  /** Given a configuration get the bind host of the service rpc server
401   *  If the bind host is not configured returns null.
402   */
403  protected String getServiceRpcServerBindHost(Configuration conf) {
404    String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
405    if (addr == null || addr.isEmpty()) {
406      return null;
407    }
408    return addr;
409  }
410
411  /** Given a configuration get the bind host of the client rpc server
412   *  If the bind host is not configured returns null.
413   */
414  protected String getRpcServerBindHost(Configuration conf) {
415    String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
416    if (addr == null || addr.isEmpty()) {
417      return null;
418    }
419    return addr;
420  }
421   
422  /**
423   * Modifies the configuration passed to contain the service rpc address setting
424   */
425  protected void setRpcServiceServerAddress(Configuration conf,
426      InetSocketAddress serviceRPCAddress) {
427    setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
428  }
429
430  protected void setRpcServerAddress(Configuration conf,
431      InetSocketAddress rpcAddress) {
432    FileSystem.setDefaultUri(conf, getUri(rpcAddress));
433  }
434
435  protected InetSocketAddress getHttpServerAddress(Configuration conf) {
436    return getHttpAddress(conf);
437  }
438
439  /** @return the NameNode HTTP address. */
440  public static InetSocketAddress getHttpAddress(Configuration conf) {
441    return  NetUtils.createSocketAddr(
442        conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
443  }
444
445  protected void loadNamesystem(Configuration conf) throws IOException {
446    this.namesystem = FSNamesystem.loadFromDisk(conf);
447  }
448
449  NamenodeRegistration getRegistration() {
450    return nodeRegistration;
451  }
452
453  NamenodeRegistration setRegistration() {
454    nodeRegistration = new NamenodeRegistration(
455        NetUtils.getHostPortString(rpcServer.getRpcAddress()),
456        NetUtils.getHostPortString(getHttpAddress()),
457        getFSImage().getStorage(), getRole());
458    return nodeRegistration;
459  }
460
461  /* optimize ugi lookup for RPC operations to avoid a trip through
462   * UGI.getCurrentUser which is synch'ed
463   */
464  public static UserGroupInformation getRemoteUser() throws IOException {
465    UserGroupInformation ugi = Server.getRemoteUser();
466    return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
467  }
468
469
470  /**
471   * Login as the configured user for the NameNode.
472   */
473  void loginAsNameNodeUser(Configuration conf) throws IOException {
474    InetSocketAddress socAddr = getRpcServerAddress(conf);
475    SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
476        DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
477  }
478  
479  /**
480   * Initialize name-node.
481   * 
482   * @param conf the configuration
483   */
484  protected void initialize(Configuration conf) throws IOException {
485    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
486      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
487      if (intervals != null) {
488        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
489          intervals);
490      }
491    }
492
493    UserGroupInformation.setConfiguration(conf);
494    loginAsNameNodeUser(conf);
495
496    NameNode.initMetrics(conf, this.getRole());
497    StartupProgressMetrics.register(startupProgress);
498
499    if (NamenodeRole.NAMENODE == role) {
500      startHttpServer(conf);
501    }
502    loadNamesystem(conf);
503
504    rpcServer = createRpcServer(conf);
505    if (NamenodeRole.NAMENODE == role) {
506      httpServer.setNameNodeAddress(getNameNodeAddress());
507      httpServer.setFSImage(getFSImage());
508    }
509    
510    pauseMonitor = new JvmPauseMonitor(conf);
511    pauseMonitor.start();
512
513    startCommonServices(conf);
514  }
515  
516  /**
517   * Create the RPC server implementation. Used as an extension point for the
518   * BackupNode.
519   */
520  protected NameNodeRpcServer createRpcServer(Configuration conf)
521      throws IOException {
522    return new NameNodeRpcServer(conf, this);
523  }
524
525  /** Start the services common to active and standby states */
526  private void startCommonServices(Configuration conf) throws IOException {
527    namesystem.startCommonServices(conf, haContext);
528    registerNNSMXBean();
529    if (NamenodeRole.NAMENODE != role) {
530      startHttpServer(conf);
531      httpServer.setNameNodeAddress(getNameNodeAddress());
532      httpServer.setFSImage(getFSImage());
533    }
534    rpcServer.start();
535    plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
536        ServicePlugin.class);
537    for (ServicePlugin p: plugins) {
538      try {
539        p.start(this);
540      } catch (Throwable t) {
541        LOG.warn("ServicePlugin " + p + " could not be started", t);
542      }
543    }
544    LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
545    if (rpcServer.getServiceRpcAddress() != null) {
546      LOG.info(getRole() + " service RPC up at: "
547          + rpcServer.getServiceRpcAddress());
548    }
549  }
550  
551  private void stopCommonServices() {
552    if(rpcServer != null) rpcServer.stop();
553    if(namesystem != null) namesystem.close();
554    if (pauseMonitor != null) pauseMonitor.stop();
555    if (plugins != null) {
556      for (ServicePlugin p : plugins) {
557        try {
558          p.stop();
559        } catch (Throwable t) {
560          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
561        }
562      }
563    }   
564    stopHttpServer();
565  }
566  
567  private void startTrashEmptier(final Configuration conf) throws IOException {
568    long trashInterval =
569        conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
570    if (trashInterval == 0) {
571      return;
572    } else if (trashInterval < 0) {
573      throw new IOException("Cannot start trash emptier with negative interval."
574          + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
575    }
576    
577    // This may be called from the transitionToActive code path, in which
578    // case the current user is the administrator, not the NN. The trash
579    // emptier needs to run as the NN. See HDFS-3972.
580    FileSystem fs = SecurityUtil.doAsLoginUser(
581        new PrivilegedExceptionAction<FileSystem>() {
582          @Override
583          public FileSystem run() throws IOException {
584            return FileSystem.get(conf);
585          }
586        });
587    this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
588    this.emptier.setDaemon(true);
589    this.emptier.start();
590  }
591  
592  private void stopTrashEmptier() {
593    if (this.emptier != null) {
594      emptier.interrupt();
595      emptier = null;
596    }
597  }
598  
599  private void startHttpServer(final Configuration conf) throws IOException {
600    httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
601    httpServer.start();
602    httpServer.setStartupProgress(startupProgress);
603  }
604  
605  private void stopHttpServer() {
606    try {
607      if (httpServer != null) httpServer.stop();
608    } catch (Exception e) {
609      LOG.error("Exception while stopping httpserver", e);
610    }
611  }
612
613  /**
614   * Start NameNode.
615   * <p>
616   * The name-node can be started with one of the following startup options:
617   * <ul> 
618   * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
619   * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
620   * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
621   * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
622   * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
623   * upgrade and create a snapshot of the current file system state</li> 
624   * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
625   * metadata</li>
626   * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
627   *            cluster back to the previous state</li>
628   * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
629   *            previous upgrade</li>
630   * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
631   * </ul>
632   * The option is passed via configuration field: 
633   * <tt>dfs.namenode.startup</tt>
634   * 
635   * The conf will be modified to reflect the actual ports on which 
636   * the NameNode is up and running if the user passes the port as
637   * <code>zero</code> in the conf.
638   * 
639   * @param conf  confirguration
640   * @throws IOException
641   */
642  public NameNode(Configuration conf) throws IOException {
643    this(conf, NamenodeRole.NAMENODE);
644  }
645
646  protected NameNode(Configuration conf, NamenodeRole role) 
647      throws IOException { 
648    this.conf = conf;
649    this.role = role;
650    String nsId = getNameServiceId(conf);
651    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
652    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
653    state = createHAState();
654    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
655    this.haContext = createHAContext();
656    try {
657      initializeGenericKeys(conf, nsId, namenodeId);
658      initialize(conf);
659      try {
660        haContext.writeLock();
661        state.prepareToEnterState(haContext);
662        state.enterState(haContext);
663      } finally {
664        haContext.writeUnlock();
665      }
666    } catch (IOException e) {
667      this.stop();
668      throw e;
669    } catch (HadoopIllegalArgumentException e) {
670      this.stop();
671      throw e;
672    }
673  }
674
675  protected HAState createHAState() {
676    return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
677  }
678
679  protected HAContext createHAContext() {
680    return new NameNodeHAContext();
681  }
682
683  /**
684   * Wait for service to finish.
685   * (Normally, it runs forever.)
686   */
687  public void join() {
688    try {
689      rpcServer.join();
690    } catch (InterruptedException ie) {
691      LOG.info("Caught interrupted exception ", ie);
692    }
693  }
694
695  /**
696   * Stop all NameNode threads and wait for all to finish.
697   */
698  public void stop() {
699    synchronized(this) {
700      if (stopRequested)
701        return;
702      stopRequested = true;
703    }
704    try {
705      if (state != null) {
706        state.exitState(haContext);
707      }
708    } catch (ServiceFailedException e) {
709      LOG.warn("Encountered exception while exiting state ", e);
710    } finally {
711      stopCommonServices();
712      if (metrics != null) {
713        metrics.shutdown();
714      }
715      if (namesystem != null) {
716        namesystem.shutdown();
717      }
718      if (nameNodeStatusBeanName != null) {
719        MBeans.unregister(nameNodeStatusBeanName);
720        nameNodeStatusBeanName = null;
721      }
722    }
723  }
724
725  synchronized boolean isStopRequested() {
726    return stopRequested;
727  }
728
729  /**
730   * Is the cluster currently in safe mode?
731   */
732  public boolean isInSafeMode() {
733    return namesystem.isInSafeMode();
734  }
735    
736  /** get FSImage */
737  @VisibleForTesting
738  public FSImage getFSImage() {
739    return namesystem.dir.fsImage;
740  }
741
742  /**
743   * @return NameNode RPC address
744   */
745  public InetSocketAddress getNameNodeAddress() {
746    return rpcServer.getRpcAddress();
747  }
748
749  /**
750   * @return NameNode RPC address in "host:port" string form
751   */
752  public String getNameNodeAddressHostPortString() {
753    return NetUtils.getHostPortString(rpcServer.getRpcAddress());
754  }
755
756  /**
757   * @return NameNode service RPC address if configured, the
758   *    NameNode RPC address otherwise
759   */
760  public InetSocketAddress getServiceRpcAddress() {
761    final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
762    return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
763  }
764
765  /**
766   * @return NameNode HTTP address, used by the Web UI, image transfer,
767   *    and HTTP-based file system clients like Hftp and WebHDFS
768   */
769  public InetSocketAddress getHttpAddress() {
770    return httpServer.getHttpAddress();
771  }
772
773  /**
774   * @return NameNode HTTPS address, used by the Web UI, image transfer,
775   *    and HTTP-based file system clients like Hftp and WebHDFS
776   */
777  public InetSocketAddress getHttpsAddress() {
778    return httpServer.getHttpsAddress();
779  }
780
781  /**
782   * Verify that configured directories exist, then
783   * Interactively confirm that formatting is desired 
784   * for each existing directory and format them.
785   * 
786   * @param conf
787   * @param force
788   * @return true if formatting was aborted, false otherwise
789   * @throws IOException
790   */
791  private static boolean format(Configuration conf, boolean force,
792      boolean isInteractive) throws IOException {
793    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
794    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
795    initializeGenericKeys(conf, nsId, namenodeId);
796    checkAllowFormat(conf);
797
798    if (UserGroupInformation.isSecurityEnabled()) {
799      InetSocketAddress socAddr = getAddress(conf);
800      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
801          DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
802    }
803    
804    Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
805    List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
806    List<URI> dirsToPrompt = new ArrayList<URI>();
807    dirsToPrompt.addAll(nameDirsToFormat);
808    dirsToPrompt.addAll(sharedDirs);
809    List<URI> editDirsToFormat = 
810                 FSNamesystem.getNamespaceEditsDirs(conf);
811
812    // if clusterID is not provided - see if you can find the current one
813    String clusterId = StartupOption.FORMAT.getClusterId();
814    if(clusterId == null || clusterId.equals("")) {
815      //Generate a new cluster id
816      clusterId = NNStorage.newClusterID();
817    }
818    System.out.println("Formatting using clusterid: " + clusterId);
819    
820    FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
821    try {
822      FSNamesystem fsn = new FSNamesystem(conf, fsImage);
823      fsImage.getEditLog().initJournalsForWrite();
824
825      if (!fsImage.confirmFormat(force, isInteractive)) {
826        return true; // aborted
827      }
828
829      fsImage.format(fsn, clusterId);
830    } catch (IOException ioe) {
831      LOG.warn("Encountered exception during format: ", ioe);
832      fsImage.close();
833      throw ioe;
834    }
835    return false;
836  }
837
838  public static void checkAllowFormat(Configuration conf) throws IOException {
839    if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
840        DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
841      throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
842                + " is set to false for this filesystem, so it "
843                + "cannot be formatted. You will need to set "
844                + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
845                + "to true in order to format this filesystem");
846    }
847  }
848  
849  @VisibleForTesting
850  public static boolean initializeSharedEdits(Configuration conf) throws IOException {
851    return initializeSharedEdits(conf, true);
852  }
853  
854  @VisibleForTesting
855  public static boolean initializeSharedEdits(Configuration conf,
856      boolean force) throws IOException {
857    return initializeSharedEdits(conf, force, false);
858  }
859
860  /**
861   * Clone the supplied configuration but remove the shared edits dirs.
862   *
863   * @param conf Supplies the original configuration.
864   * @return Cloned configuration without the shared edit dirs.
865   * @throws IOException on failure to generate the configuration.
866   */
867  private static Configuration getConfigurationWithoutSharedEdits(
868      Configuration conf)
869      throws IOException {
870    List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
871    String editsDirsString = Joiner.on(",").join(editsDirs);
872
873    Configuration confWithoutShared = new Configuration(conf);
874    confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
875    confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
876        editsDirsString);
877    return confWithoutShared;
878  }
879
880  /**
881   * Format a new shared edits dir and copy in enough edit log segments so that
882   * the standby NN can start up.
883   * 
884   * @param conf configuration
885   * @param force format regardless of whether or not the shared edits dir exists
886   * @param interactive prompt the user when a dir exists
887   * @return true if the command aborts, false otherwise
888   */
889  private static boolean initializeSharedEdits(Configuration conf,
890      boolean force, boolean interactive) throws IOException {
891    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
892    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
893    initializeGenericKeys(conf, nsId, namenodeId);
894    
895    if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
896      LOG.fatal("No shared edits directory configured for namespace " +
897          nsId + " namenode " + namenodeId);
898      return false;
899    }
900
901    if (UserGroupInformation.isSecurityEnabled()) {
902      InetSocketAddress socAddr = getAddress(conf);
903      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
904          DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
905    }
906
907    NNStorage existingStorage = null;
908    FSImage sharedEditsImage = null;
909    try {
910      FSNamesystem fsns =
911          FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
912      
913      existingStorage = fsns.getFSImage().getStorage();
914      NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
915      
916      List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
917      
918      sharedEditsImage = new FSImage(conf,
919          Lists.<URI>newArrayList(),
920          sharedEditsDirs);
921      sharedEditsImage.getEditLog().initJournalsForWrite();
922      
923      if (!sharedEditsImage.confirmFormat(force, interactive)) {
924        return true; // abort
925      }
926      
927      NNStorage newSharedStorage = sharedEditsImage.getStorage();
928      // Call Storage.format instead of FSImage.format here, since we don't
929      // actually want to save a checkpoint - just prime the dirs with
930      // the existing namespace info
931      newSharedStorage.format(nsInfo);
932      sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
933
934      // Need to make sure the edit log segments are in good shape to initialize
935      // the shared edits dir.
936      fsns.getFSImage().getEditLog().close();
937      fsns.getFSImage().getEditLog().initJournalsForWrite();
938      fsns.getFSImage().getEditLog().recoverUnclosedStreams();
939
940      copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
941          conf);
942    } catch (IOException ioe) {
943      LOG.error("Could not initialize shared edits dir", ioe);
944      return true; // aborted
945    } finally {
946      if (sharedEditsImage != null) {
947        try {
948          sharedEditsImage.close();
949        }  catch (IOException ioe) {
950          LOG.warn("Could not close sharedEditsImage", ioe);
951        }
952      }
953      // Have to unlock storage explicitly for the case when we're running in a
954      // unit test, which runs in the same JVM as NNs.
955      if (existingStorage != null) {
956        try {
957          existingStorage.unlockAll();
958        } catch (IOException ioe) {
959          LOG.warn("Could not unlock storage directories", ioe);
960          return true; // aborted
961        }
962      }
963    }
964    return false; // did not abort
965  }
966
967  private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
968      Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
969      Configuration conf) throws IOException {
970    Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
971        "No shared edits specified");
972    // Copy edit log segments into the new shared edits dir.
973    List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
974    FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
975        sharedEditsUris);
976    newSharedEditLog.initJournalsForWrite();
977    newSharedEditLog.recoverUnclosedStreams();
978    
979    FSEditLog sourceEditLog = fsns.getFSImage().editLog;
980    
981    long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
982    
983    Collection<EditLogInputStream> streams = null;
984    try {
985      streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
986
987      // Set the nextTxid to the CheckpointTxId+1
988      newSharedEditLog.setNextTxId(fromTxId + 1);
989
990      // Copy all edits after last CheckpointTxId to shared edits dir
991      for (EditLogInputStream stream : streams) {
992        LOG.debug("Beginning to copy stream " + stream + " to shared edits");
993        FSEditLogOp op;
994        boolean segmentOpen = false;
995        while ((op = stream.readOp()) != null) {
996          if (LOG.isTraceEnabled()) {
997            LOG.trace("copying op: " + op);
998          }
999          if (!segmentOpen) {
1000            newSharedEditLog.startLogSegment(op.txid, false);
1001            segmentOpen = true;
1002          }
1003
1004          newSharedEditLog.logEdit(op);
1005
1006          if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1007            newSharedEditLog.logSync();
1008            newSharedEditLog.endCurrentLogSegment(false);
1009            LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1010                + stream);
1011            segmentOpen = false;
1012          }
1013        }
1014
1015        if (segmentOpen) {
1016          LOG.debug("ending log segment because of end of stream in " + stream);
1017          newSharedEditLog.logSync();
1018          newSharedEditLog.endCurrentLogSegment(false);
1019          segmentOpen = false;
1020        }
1021      }
1022    } finally {
1023      if (streams != null) {
1024        FSEditLog.closeAllStreams(streams);
1025      }
1026    }
1027  }
1028
1029  private static boolean finalize(Configuration conf,
1030                               boolean isConfirmationNeeded
1031                               ) throws IOException {
1032    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1033    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1034    initializeGenericKeys(conf, nsId, namenodeId);
1035
1036    FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1037    System.err.print(
1038        "\"finalize\" will remove the previous state of the files system.\n"
1039        + "Recent upgrade will become permanent.\n"
1040        + "Rollback option will not be available anymore.\n");
1041    if (isConfirmationNeeded) {
1042      if (!confirmPrompt("Finalize filesystem state?")) {
1043        System.err.println("Finalize aborted.");
1044        return true;
1045      }
1046    }
1047    nsys.dir.fsImage.finalizeUpgrade();
1048    return false;
1049  }
1050
1051  private static void printUsage(PrintStream out) {
1052    out.println(USAGE + "\n");
1053  }
1054
1055  private static StartupOption parseArguments(String args[]) {
1056    int argsLen = (args == null) ? 0 : args.length;
1057    StartupOption startOpt = StartupOption.REGULAR;
1058    for(int i=0; i < argsLen; i++) {
1059      String cmd = args[i];
1060      if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1061        startOpt = StartupOption.FORMAT;
1062        for (i = i + 1; i < argsLen; i++) {
1063          if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1064            i++;
1065            if (i >= argsLen) {
1066              // if no cluster id specified, return null
1067              LOG.fatal("Must specify a valid cluster ID after the "
1068                  + StartupOption.CLUSTERID.getName() + " flag");
1069              return null;
1070            }
1071            String clusterId = args[i];
1072            // Make sure an id is specified and not another flag
1073            if (clusterId.isEmpty() ||
1074                clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1075                clusterId.equalsIgnoreCase(
1076                    StartupOption.NONINTERACTIVE.getName())) {
1077              LOG.fatal("Must specify a valid cluster ID after the "
1078                  + StartupOption.CLUSTERID.getName() + " flag");
1079              return null;
1080            }
1081            startOpt.setClusterId(clusterId);
1082          }
1083
1084          if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1085            startOpt.setForceFormat(true);
1086          }
1087
1088          if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1089            startOpt.setInteractiveFormat(false);
1090          }
1091        }
1092      } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1093        startOpt = StartupOption.GENCLUSTERID;
1094      } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1095        startOpt = StartupOption.REGULAR;
1096      } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1097        startOpt = StartupOption.BACKUP;
1098      } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1099        startOpt = StartupOption.CHECKPOINT;
1100      } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1101        startOpt = StartupOption.UPGRADE;
1102        // might be followed by two args
1103        if (i + 2 < argsLen
1104            && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1105          i += 2;
1106          startOpt.setClusterId(args[i]);
1107        }
1108      } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1109        startOpt = StartupOption.ROLLBACK;
1110      } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1111        startOpt = StartupOption.FINALIZE;
1112      } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1113        startOpt = StartupOption.IMPORT;
1114      } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1115        startOpt = StartupOption.BOOTSTRAPSTANDBY;
1116        return startOpt;
1117      } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1118        startOpt = StartupOption.INITIALIZESHAREDEDITS;
1119        for (i = i + 1 ; i < argsLen; i++) {
1120          if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1121            startOpt.setInteractiveFormat(false);
1122          } else if (StartupOption.FORCE.getName().equals(args[i])) {
1123            startOpt.setForceFormat(true);
1124          } else {
1125            LOG.fatal("Invalid argument: " + args[i]);
1126            return null;
1127          }
1128        }
1129        return startOpt;
1130      } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1131        if (startOpt != StartupOption.REGULAR) {
1132          throw new RuntimeException("Can't combine -recover with " +
1133              "other startup options.");
1134        }
1135        startOpt = StartupOption.RECOVER;
1136        while (++i < argsLen) {
1137          if (args[i].equalsIgnoreCase(
1138                StartupOption.FORCE.getName())) {
1139            startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1140          } else {
1141            throw new RuntimeException("Error parsing recovery options: " + 
1142              "can't understand option \"" + args[i] + "\"");
1143          }
1144        }
1145      } else {
1146        return null;
1147      }
1148    }
1149    return startOpt;
1150  }
1151
1152  private static void setStartupOption(Configuration conf, StartupOption opt) {
1153    conf.set(DFS_NAMENODE_STARTUP_KEY, opt.toString());
1154  }
1155
1156  static StartupOption getStartupOption(Configuration conf) {
1157    return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1158                                          StartupOption.REGULAR.toString()));
1159  }
1160
1161  private static void doRecovery(StartupOption startOpt, Configuration conf)
1162      throws IOException {
1163    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1164    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1165    initializeGenericKeys(conf, nsId, namenodeId);
1166    if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1167      if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1168          "This mode is intended to recover lost metadata on a corrupt " +
1169          "filesystem.  Metadata recovery mode often permanently deletes " +
1170          "data from your HDFS filesystem.  Please back up your edit log " +
1171          "and fsimage before trying this!\n\n" +
1172          "Are you ready to proceed? (Y/N)\n")) {
1173        System.err.println("Recovery aborted at user request.\n");
1174        return;
1175      }
1176    }
1177    MetaRecoveryContext.LOG.info("starting recovery...");
1178    UserGroupInformation.setConfiguration(conf);
1179    NameNode.initMetrics(conf, startOpt.toNodeRole());
1180    FSNamesystem fsn = null;
1181    try {
1182      fsn = FSNamesystem.loadFromDisk(conf);
1183      fsn.saveNamespace();
1184      MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1185    } catch (IOException e) {
1186      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1187      throw e;
1188    } catch (RuntimeException e) {
1189      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1190      throw e;
1191    } finally {
1192      if (fsn != null)
1193        fsn.close();
1194    }
1195  }
1196
1197  public static NameNode createNameNode(String argv[], Configuration conf)
1198      throws IOException {
1199    if (conf == null)
1200      conf = new HdfsConfiguration();
1201    StartupOption startOpt = parseArguments(argv);
1202    if (startOpt == null) {
1203      printUsage(System.err);
1204      return null;
1205    }
1206    setStartupOption(conf, startOpt);
1207    
1208    if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
1209        (startOpt == StartupOption.UPGRADE ||
1210         startOpt == StartupOption.ROLLBACK ||
1211         startOpt == StartupOption.FINALIZE)) {
1212      throw new HadoopIllegalArgumentException("Invalid startup option. " +
1213          "Cannot perform DFS upgrade with HA enabled.");
1214    }
1215
1216    switch (startOpt) {
1217      case FORMAT: {
1218        boolean aborted = format(conf, startOpt.getForceFormat(),
1219            startOpt.getInteractiveFormat());
1220        terminate(aborted ? 1 : 0);
1221        return null; // avoid javac warning
1222      }
1223      case GENCLUSTERID: {
1224        System.err.println("Generating new cluster id:");
1225        System.out.println(NNStorage.newClusterID());
1226        terminate(0);
1227        return null;
1228      }
1229      case FINALIZE: {
1230        boolean aborted = finalize(conf, true);
1231        terminate(aborted ? 1 : 0);
1232        return null; // avoid javac warning
1233      }
1234      case BOOTSTRAPSTANDBY: {
1235        String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1236        int rc = BootstrapStandby.run(toolArgs, conf);
1237        terminate(rc);
1238        return null; // avoid warning
1239      }
1240      case INITIALIZESHAREDEDITS: {
1241        boolean aborted = initializeSharedEdits(conf,
1242            startOpt.getForceFormat(),
1243            startOpt.getInteractiveFormat());
1244        terminate(aborted ? 1 : 0);
1245        return null; // avoid warning
1246      }
1247      case BACKUP:
1248      case CHECKPOINT: {
1249        NamenodeRole role = startOpt.toNodeRole();
1250        DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1251        return new BackupNode(conf, role);
1252      }
1253      case RECOVER: {
1254        NameNode.doRecovery(startOpt, conf);
1255        return null;
1256      }
1257      default: {
1258        DefaultMetricsSystem.initialize("NameNode");
1259        return new NameNode(conf);
1260      }
1261    }
1262  }
1263
1264  /**
1265   * In federation configuration is set for a set of
1266   * namenode and secondary namenode/backup/checkpointer, which are
1267   * grouped under a logical nameservice ID. The configuration keys specific 
1268   * to them have suffix set to configured nameserviceId.
1269   * 
1270   * This method copies the value from specific key of format key.nameserviceId
1271   * to key, to set up the generic configuration. Once this is done, only
1272   * generic version of the configuration is read in rest of the code, for
1273   * backward compatibility and simpler code changes.
1274   * 
1275   * @param conf
1276   *          Configuration object to lookup specific key and to set the value
1277   *          to the key passed. Note the conf object is modified
1278   * @param nameserviceId name service Id (to distinguish federated NNs)
1279   * @param namenodeId the namenode ID (to distinguish HA NNs)
1280   * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1281   */
1282  public static void initializeGenericKeys(Configuration conf,
1283      String nameserviceId, String namenodeId) {
1284    if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1285        (namenodeId != null && !namenodeId.isEmpty())) {
1286      if (nameserviceId != null) {
1287        conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1288      }
1289      if (namenodeId != null) {
1290        conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1291      }
1292      
1293      DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1294          NAMENODE_SPECIFIC_KEYS);
1295      DFSUtil.setGenericConf(conf, nameserviceId, null,
1296          NAMESERVICE_SPECIFIC_KEYS);
1297    }
1298    
1299    // If the RPC address is set use it to (re-)configure the default FS
1300    if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1301      URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1302          + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1303      conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1304      LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1305    }
1306  }
1307    
1308  /** 
1309   * Get the name service Id for the node
1310   * @return name service Id or null if federation is not configured
1311   */
1312  protected String getNameServiceId(Configuration conf) {
1313    return DFSUtil.getNamenodeNameServiceId(conf);
1314  }
1315  
1316  /**
1317   */
1318  public static void main(String argv[]) throws Exception {
1319    if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1320      System.exit(0);
1321    }
1322
1323    try {
1324      StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1325      NameNode namenode = createNameNode(argv, null);
1326      if (namenode != null) {
1327        namenode.join();
1328      }
1329    } catch (Throwable e) {
1330      LOG.fatal("Exception in namenode join", e);
1331      terminate(1, e);
1332    }
1333  }
1334
1335  synchronized void monitorHealth() 
1336      throws HealthCheckFailedException, AccessControlException {
1337    namesystem.checkSuperuserPrivilege();
1338    if (!haEnabled) {
1339      return; // no-op, if HA is not enabled
1340    }
1341    getNamesystem().checkAvailableResources();
1342    if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1343      throw new HealthCheckFailedException(
1344          "The NameNode has no resources available");
1345    }
1346  }
1347  
1348  synchronized void transitionToActive() 
1349      throws ServiceFailedException, AccessControlException {
1350    namesystem.checkSuperuserPrivilege();
1351    if (!haEnabled) {
1352      throw new ServiceFailedException("HA for namenode is not enabled");
1353    }
1354    state.setState(haContext, ACTIVE_STATE);
1355  }
1356  
1357  synchronized void transitionToStandby() 
1358      throws ServiceFailedException, AccessControlException {
1359    namesystem.checkSuperuserPrivilege();
1360    if (!haEnabled) {
1361      throw new ServiceFailedException("HA for namenode is not enabled");
1362    }
1363    state.setState(haContext, STANDBY_STATE);
1364  }
1365
1366  synchronized HAServiceStatus getServiceStatus()
1367      throws ServiceFailedException, AccessControlException {
1368    namesystem.checkSuperuserPrivilege();
1369    if (!haEnabled) {
1370      throw new ServiceFailedException("HA for namenode is not enabled");
1371    }
1372    if (state == null) {
1373      return new HAServiceStatus(HAServiceState.INITIALIZING);
1374    }
1375    HAServiceState retState = state.getServiceState();
1376    HAServiceStatus ret = new HAServiceStatus(retState);
1377    if (retState == HAServiceState.STANDBY) {
1378      String safemodeTip = namesystem.getSafeModeTip();
1379      if (!safemodeTip.isEmpty()) {
1380        ret.setNotReadyToBecomeActive(
1381            "The NameNode is in safemode. " +
1382            safemodeTip);
1383      } else {
1384        ret.setReadyToBecomeActive();
1385      }
1386    } else if (retState == HAServiceState.ACTIVE) {
1387      ret.setReadyToBecomeActive();
1388    } else {
1389      ret.setNotReadyToBecomeActive("State is " + state);
1390    }
1391    return ret;
1392  }
1393
1394  synchronized HAServiceState getServiceState() {
1395    if (state == null) {
1396      return HAServiceState.INITIALIZING;
1397    }
1398    return state.getServiceState();
1399  }
1400
1401  /**
1402   * Register NameNodeStatusMXBean
1403   */
1404  private void registerNNSMXBean() {
1405    nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1406  }
1407
1408  @Override // NameNodeStatusMXBean
1409  public String getNNRole() {
1410    String roleStr = "";
1411    NamenodeRole role = getRole();
1412    if (null != role) {
1413      roleStr = role.toString();
1414    }
1415    return roleStr;
1416  }
1417
1418  @Override // NameNodeStatusMXBean
1419  public String getState() {
1420    String servStateStr = "";
1421    HAServiceState servState = getServiceState();
1422    if (null != servState) {
1423      servStateStr = servState.toString();
1424    }
1425    return servStateStr;
1426  }
1427
1428  @Override // NameNodeStatusMXBean
1429  public String getHostAndPort() {
1430    return getNameNodeAddressHostPortString();
1431  }
1432
1433  @Override // NameNodeStatusMXBean
1434  public boolean isSecurityEnabled() {
1435    return UserGroupInformation.isSecurityEnabled();
1436  }
1437
1438  /**
1439   * Shutdown the NN immediately in an ungraceful way. Used when it would be
1440   * unsafe for the NN to continue operating, e.g. during a failed HA state
1441   * transition.
1442   * 
1443   * @param t exception which warrants the shutdown. Printed to the NN log
1444   *          before exit.
1445   * @throws ExitException thrown only for testing.
1446   */
1447  protected synchronized void doImmediateShutdown(Throwable t)
1448      throws ExitException {
1449    String message = "Error encountered requiring NN shutdown. " +
1450        "Shutting down immediately.";
1451    try {
1452      LOG.fatal(message, t);
1453    } catch (Throwable ignored) {
1454      // This is unlikely to happen, but there's nothing we can do if it does.
1455    }
1456    terminate(1, t);
1457  }
1458  
1459  /**
1460   * Class used to expose {@link NameNode} as context to {@link HAState}
1461   */
1462  protected class NameNodeHAContext implements HAContext {
1463    @Override
1464    public void setState(HAState s) {
1465      state = s;
1466    }
1467
1468    @Override
1469    public HAState getState() {
1470      return state;
1471    }
1472
1473    @Override
1474    public void startActiveServices() throws IOException {
1475      try {
1476        namesystem.startActiveServices();
1477        startTrashEmptier(conf);
1478      } catch (Throwable t) {
1479        doImmediateShutdown(t);
1480      }
1481    }
1482
1483    @Override
1484    public void stopActiveServices() throws IOException {
1485      try {
1486        if (namesystem != null) {
1487          namesystem.stopActiveServices();
1488        }
1489        stopTrashEmptier();
1490      } catch (Throwable t) {
1491        doImmediateShutdown(t);
1492      }
1493    }
1494
1495    @Override
1496    public void startStandbyServices() throws IOException {
1497      try {
1498        namesystem.startStandbyServices(conf);
1499      } catch (Throwable t) {
1500        doImmediateShutdown(t);
1501      }
1502    }
1503
1504    @Override
1505    public void prepareToStopStandbyServices() throws ServiceFailedException {
1506      try {
1507        namesystem.prepareToStopStandbyServices();
1508      } catch (Throwable t) {
1509        doImmediateShutdown(t);
1510      }
1511    }
1512    
1513    @Override
1514    public void stopStandbyServices() throws IOException {
1515      try {
1516        if (namesystem != null) {
1517          namesystem.stopStandbyServices();
1518        }
1519      } catch (Throwable t) {
1520        doImmediateShutdown(t);
1521      }
1522    }
1523    
1524    @Override
1525    public void writeLock() {
1526      namesystem.writeLock();
1527    }
1528    
1529    @Override
1530    public void writeUnlock() {
1531      namesystem.writeUnlock();
1532    }
1533    
1534    /** Check if an operation of given category is allowed */
1535    @Override
1536    public void checkOperation(final OperationCategory op)
1537        throws StandbyException {
1538      state.checkOperation(haContext, op);
1539    }
1540    
1541    @Override
1542    public boolean allowStaleReads() {
1543      return allowStaleStandbyReads;
1544    }
1545
1546  }
1547  
1548  public boolean isStandbyState() {
1549    return (state.equals(STANDBY_STATE));
1550  }
1551
1552  /**
1553   * Check that a request to change this node's HA state is valid.
1554   * In particular, verifies that, if auto failover is enabled, non-forced
1555   * requests from the HAAdmin CLI are rejected, and vice versa.
1556   *
1557   * @param req the request to check
1558   * @throws AccessControlException if the request is disallowed
1559   */
1560  void checkHaStateChange(StateChangeRequestInfo req)
1561      throws AccessControlException {
1562    boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1563        DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1564    switch (req.getSource()) {
1565    case REQUEST_BY_USER:
1566      if (autoHaEnabled) {
1567        throw new AccessControlException(
1568            "Manual HA control for this NameNode is disallowed, because " +
1569            "automatic HA is enabled.");
1570      }
1571      break;
1572    case REQUEST_BY_USER_FORCED:
1573      if (autoHaEnabled) {
1574        LOG.warn("Allowing manual HA control from " +
1575            Server.getRemoteAddress() +
1576            " even though automatic HA is enabled, because the user " +
1577            "specified the force flag");
1578      }
1579      break;
1580    case REQUEST_BY_ZKFC:
1581      if (!autoHaEnabled) {
1582        throw new AccessControlException(
1583            "Request from ZK failover controller at " +
1584            Server.getRemoteAddress() + " denied since automatic HA " +
1585            "is not enabled"); 
1586      }
1587      break;
1588    }
1589  }
1590}