001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.IOException;
021import java.net.InetAddress;
022import java.net.UnknownHostException;
023import java.util.HashMap;
024import java.util.HashSet;
025import java.util.Iterator;
026import java.util.Map;
027import java.util.TreeMap;
028
029import org.apache.commons.logging.Log;
030import org.apache.commons.logging.LogFactory;
031import org.apache.hadoop.hdfs.protocol.DatanodeID;
032import org.apache.hadoop.util.HostsFileReader;
033
034/**
035 * This class manages the include and exclude files for HDFS.
036 * 
037 * These files control which DataNodes the NameNode expects to see in the
038 * cluster.  Loosely speaking, the include file, if it exists and is not
039 * empty, is a list of everything we expect to see.  The exclude file is 
040 * a list of everything we want to ignore if we do see it.
041 *
042 * Entries may or may not specify a port.  If they don't, we consider
043 * them to apply to every DataNode on that host.  For example, putting 
044 * 192.168.0.100 in the excludes file blacklists both 192.168.0.100:5000 and
045 * 192.168.0.100:6000.  This case comes up in unit tests.
046 *
047 * When reading the hosts files, we try to find the IP address for each
048 * entry.  This is important because it allows us to de-duplicate entries.
049 * If the user specifies a node as foo.bar.com in the include file, but
050 * 192.168.0.100 in the exclude file, we need to realize that these are 
051 * the same node.  Resolving the IP address also allows us to give more
052 * information back to getDatanodeListForReport, which makes the web UI 
053 * look nicer (among other things.)  See HDFS-3934 for more details.
054 *
055 * DNS resolution can be slow.  For this reason, we ONLY do it when (re)reading
056 * the hosts files.  In all other cases, we rely on the cached values either
057 * in the DatanodeID objects, or in HostFileManager#Entry.
058 * We also don't want to be holding locks when doing this.
059 * See HDFS-3990 for more discussion of DNS overheads.
060 * 
061 * Not all entries in the hosts files will have an associated IP address. 
062 * Some entries may be "registration names."  The "registration name" of 
063 * a DataNode is either the actual hostname, or an arbitrary string configured
064 * by dfs.datanode.hostname.  It's possible to add registration names to the
065 * include or exclude files.  If we can't find an IP address associated with
066 * a host file entry, we assume it's a registered hostname and act accordingly.
067 * The "registration name" feature is a little odd and it may be removed in the
068 * future (I hope?)
069 */
070public class HostFileManager {
071  private static final Log LOG = LogFactory.getLog(HostFileManager.class);
072
073  public static class Entry {
074    /**
075     * This what the user put on the line before the colon, or the whole line
076     * if there is no colon.
077     */
078    private final String prefix;
079    
080    /**
081     * This is the port which was specified after the colon.  It is 0 if no
082     * port was given.
083     */
084    private final int port;
085
086    /**
087     * If we can resolve the IP address, this is it.  Otherwise, it is the 
088     * empty string.
089     */
090    private final String ipAddress;
091
092    /**
093     * Parse a hosts file Entry.
094     */
095    static Entry parse(String fileName, String entry) throws IOException {
096      final String prefix;
097      final int port;
098      String ipAddress = "";
099      
100      int idx = entry.indexOf(':');
101      if (-1 == idx) {
102        prefix = entry;
103        port = 0;
104      } else {
105        prefix = entry.substring(0, idx);
106        String portStr = entry.substring(idx + 1);
107        try {
108          port = Integer.valueOf(portStr);
109        } catch (NumberFormatException e) {
110          throw new IOException("unable to parse port number for " +
111              "'" + entry + "'", e);
112        }
113      }
114      try {
115        // Let's see if we can resolve this prefix to an IP address.
116        // This may fail; one example is with a registered hostname
117        // which is not actually a real DNS name.
118        InetAddress addr = InetAddress.getByName(prefix);
119        ipAddress = addr.getHostAddress();
120      } catch (UnknownHostException e) {
121        LOG.info("When reading " + fileName + ", could not look up " +
122            "IP address for " + prefix + ".  We will assume this is a " +
123            "registration name.", e);
124      }
125      return new Entry(prefix, port, ipAddress);
126    }
127
128    public String getIdentifier() {
129      return ipAddress.isEmpty() ? prefix : ipAddress;
130    }
131
132    public Entry(String prefix, int port, String ipAddress) {
133      this.prefix = prefix;
134      this.port = port;
135      this.ipAddress = ipAddress;
136    }
137
138    public String getPrefix() {
139      return prefix;
140    }
141
142    public int getPort() {
143      return port;
144    }
145
146    public String getIpAddress() {
147      return ipAddress;
148    }
149
150    public String toString() {
151      StringBuilder bld = new StringBuilder();
152      bld.append("Entry{").append(prefix).append(", port=").
153          append(port).append(", ipAddress=").append(ipAddress).append("}");
154      return bld.toString();
155    }
156  }
157
158  public static class EntrySet implements Iterable<Entry> {
159    /**
160     * The index.  Each Entry appears in here exactly once.
161     *
162     * It may be indexed by one of:
163     *     ipAddress:port
164     *     ipAddress
165     *     registeredHostname:port
166     *     registeredHostname
167     *     
168     * The different indexing strategies reflect the fact that we may or may
169     * not have a port or IP address for each entry.
170     */
171    TreeMap<String, Entry> index = new TreeMap<String, Entry>();
172
173    public boolean isEmpty() {
174      return index.isEmpty();
175    }
176
177    public Entry find(DatanodeID datanodeID) {
178      Entry entry;
179      int xferPort = datanodeID.getXferPort();
180      assert(xferPort > 0);
181      String datanodeIpAddr = datanodeID.getIpAddr();
182      if (datanodeIpAddr != null) {
183        entry = index.get(datanodeIpAddr + ":" + xferPort);
184        if (entry != null) {
185          return entry;
186        }
187        entry = index.get(datanodeIpAddr);
188        if (entry != null) {
189          return entry;
190        }
191      }
192      String registeredHostName = datanodeID.getHostName();
193      if (registeredHostName != null) {
194        entry = index.get(registeredHostName + ":" + xferPort);
195        if (entry != null) {
196          return entry;
197        }
198        entry = index.get(registeredHostName);
199        if (entry != null) {
200          return entry;
201        }
202      }
203      return null;
204    }
205
206    public Entry find(Entry toFind) {
207      int port = toFind.getPort();
208      if (port != 0) {
209        return index.get(toFind.getIdentifier() + ":" + port);
210      } else {
211        // An Entry with no port matches any entry with the same identifer.
212        // In other words, we treat 0 as "any port."
213        Map.Entry<String, Entry> ceil =
214            index.ceilingEntry(toFind.getIdentifier());
215        if ((ceil != null) &&
216            (ceil.getValue().getIdentifier().equals(
217                toFind.getIdentifier()))) {
218          return ceil.getValue();
219        }
220        return null;
221      }
222    }
223
224    public String toString() {
225      StringBuilder bld = new StringBuilder();
226      
227      bld.append("HostSet(");
228      for (Map.Entry<String, Entry> entry : index.entrySet()) {
229        bld.append("\n\t");
230        bld.append(entry.getKey()).append("->").
231            append(entry.getValue().toString());
232      }
233      bld.append("\n)");
234      return bld.toString();
235    }
236
237    @Override
238    public Iterator<Entry> iterator() {
239      return index.values().iterator();
240    }
241  }
242
243  public static class MutableEntrySet extends EntrySet {
244    public void add(DatanodeID datanodeID) {
245      Entry entry = new Entry(datanodeID.getHostName(),
246          datanodeID.getXferPort(), datanodeID.getIpAddr());
247      index.put(datanodeID.getIpAddr() + ":" + datanodeID.getXferPort(),
248          entry);
249    }
250
251    public void add(Entry entry) {
252      int port = entry.getPort();
253      if (port != 0) {
254        index.put(entry.getIdentifier() + ":" + port, entry);
255      } else {
256        index.put(entry.getIdentifier(), entry);
257      }
258    }
259
260    void readFile(String type, String filename) throws IOException {
261      if (filename.isEmpty()) {
262        return;
263      }
264      HashSet<String> entrySet = new HashSet<String>();
265      HostsFileReader.readFileToSet(type, filename, entrySet);
266      for (String str : entrySet) {
267        Entry entry = Entry.parse(filename, str);
268        add(entry);
269      }
270    }
271  }
272
273  private EntrySet includes = new EntrySet();
274  private EntrySet excludes = new EntrySet();
275
276  public HostFileManager() {
277  }
278
279  public void refresh(String includeFile, String excludeFile)
280      throws IOException {
281    MutableEntrySet newIncludes = new MutableEntrySet();
282    IOException includeException = null;
283    try {
284      newIncludes.readFile("included", includeFile);
285    } catch (IOException e) {
286      includeException = e;
287    }
288    MutableEntrySet newExcludes = new MutableEntrySet();
289    IOException excludeException = null;
290    try {
291      newExcludes.readFile("excluded", excludeFile);
292    } catch (IOException e) {
293      excludeException = e;
294    }
295    synchronized(this) {
296      if (includeException == null) {
297        includes = newIncludes;
298      }
299      if (excludeException == null) {
300        excludes = newExcludes;
301      }
302    }
303    if (includeException == null) {
304      LOG.info("read includes:\n" + newIncludes);
305    } else {
306      LOG.error("failed to read include file '" + includeFile + "'. " +
307          "Continuing to use previous include list.",
308          includeException);
309    }
310    if (excludeException == null) {
311      LOG.info("read excludes:\n" + newExcludes);
312    } else {
313      LOG.error("failed to read exclude file '" + excludeFile + "'." +
314          "Continuing to use previous exclude list.",
315          excludeException);
316    }
317    if (includeException != null) {
318      throw new IOException("error reading hosts file " + includeFile,
319          includeException);
320    }
321    if (excludeException != null) {
322      throw new IOException("error reading exclude file " + excludeFile,
323          excludeException);
324    }
325  }
326
327  public synchronized boolean isIncluded(DatanodeID dn) {
328    if (includes.isEmpty()) {
329      // If the includes list is empty, act as if everything is in the
330      // includes list.
331      return true;
332    } else {
333      return includes.find(dn) != null;
334    }
335  }
336
337  public synchronized boolean isExcluded(DatanodeID dn) {
338    return excludes.find(dn) != null;
339  }
340
341  public synchronized boolean hasIncludes() {
342    return !includes.isEmpty();
343  }
344
345  /**
346   * @return          the includes as an immutable set.
347   */
348  public synchronized EntrySet getIncludes() {
349    return includes;
350  }
351
352  /**
353   * @return          the excludes as an immutable set.
354   */
355  public synchronized EntrySet getExcludes() {
356    return excludes;
357  }
358}