001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT; 021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY; 022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; 023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY; 024 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.List; 028import java.util.Random; 029 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.conf.Configurable; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; 035 036/** 037 * A DN volume choosing policy which takes into account the amount of free 038 * space on each of the available volumes when considering where to assign a 039 * new replica allocation. By default this policy prefers assigning replicas to 040 * those volumes with more available free space, so as to over time balance the 041 * available space of all the volumes within a DN. 042 */ 043public class AvailableSpaceVolumeChoosingPolicy<V extends FsVolumeSpi> 044 implements VolumeChoosingPolicy<V>, Configurable { 045 046 private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class); 047 048 private static final Random RAND = new Random(); 049 050 private long balancedSpaceThreshold = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT; 051 private float balancedPreferencePercent = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; 052 053 @Override 054 public synchronized void setConf(Configuration conf) { 055 balancedSpaceThreshold = conf.getLong( 056 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY, 057 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT); 058 balancedPreferencePercent = conf.getFloat( 059 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY, 060 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); 061 062 LOG.info("Available space volume choosing policy initialized: " + 063 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY + 064 " = " + balancedSpaceThreshold + ", " + 065 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 066 " = " + balancedPreferencePercent); 067 068 if (balancedPreferencePercent > 1.0) { 069 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 070 " is greater than 1.0 but should be in the range 0.0 - 1.0"); 071 } 072 073 if (balancedPreferencePercent < 0.5) { 074 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 075 " is less than 0.5 so volumes with less available disk space will receive more block allocations"); 076 } 077 } 078 079 @Override 080 public synchronized Configuration getConf() { 081 // Nothing to do. Only added to fulfill the Configurable contract. 082 return null; 083 } 084 085 private VolumeChoosingPolicy<V> roundRobinPolicyBalanced = 086 new RoundRobinVolumeChoosingPolicy<V>(); 087 private VolumeChoosingPolicy<V> roundRobinPolicyHighAvailable = 088 new RoundRobinVolumeChoosingPolicy<V>(); 089 private VolumeChoosingPolicy<V> roundRobinPolicyLowAvailable = 090 new RoundRobinVolumeChoosingPolicy<V>(); 091 092 @Override 093 public synchronized V chooseVolume(List<V> volumes, 094 final long replicaSize) throws IOException { 095 if (volumes.size() < 1) { 096 throw new DiskOutOfSpaceException("No more available volumes"); 097 } 098 099 AvailableSpaceVolumeList volumesWithSpaces = 100 new AvailableSpaceVolumeList(volumes); 101 102 if (volumesWithSpaces.areAllVolumesWithinFreeSpaceThreshold()) { 103 // If they're actually not too far out of whack, fall back on pure round 104 // robin. 105 V volume = roundRobinPolicyBalanced.chooseVolume(volumes, replicaSize); 106 if (LOG.isDebugEnabled()) { 107 LOG.debug("All volumes are within the configured free space balance " + 108 "threshold. Selecting " + volume + " for write of block size " + 109 replicaSize); 110 } 111 return volume; 112 } else { 113 V volume = null; 114 // If none of the volumes with low free space have enough space for the 115 // replica, always try to choose a volume with a lot of free space. 116 long mostAvailableAmongLowVolumes = volumesWithSpaces 117 .getMostAvailableSpaceAmongVolumesWithLowAvailableSpace(); 118 119 List<V> highAvailableVolumes = extractVolumesFromPairs( 120 volumesWithSpaces.getVolumesWithHighAvailableSpace()); 121 List<V> lowAvailableVolumes = extractVolumesFromPairs( 122 volumesWithSpaces.getVolumesWithLowAvailableSpace()); 123 124 float preferencePercentScaler = 125 (highAvailableVolumes.size() * balancedPreferencePercent) + 126 (lowAvailableVolumes.size() * (1 - balancedPreferencePercent)); 127 float scaledPreferencePercent = 128 (highAvailableVolumes.size() * balancedPreferencePercent) / 129 preferencePercentScaler; 130 if (mostAvailableAmongLowVolumes < replicaSize || 131 RAND.nextFloat() < scaledPreferencePercent) { 132 volume = roundRobinPolicyHighAvailable.chooseVolume( 133 highAvailableVolumes, 134 replicaSize); 135 if (LOG.isDebugEnabled()) { 136 LOG.debug("Volumes are imbalanced. Selecting " + volume + 137 " from high available space volumes for write of block size " 138 + replicaSize); 139 } 140 } else { 141 volume = roundRobinPolicyLowAvailable.chooseVolume( 142 lowAvailableVolumes, 143 replicaSize); 144 if (LOG.isDebugEnabled()) { 145 LOG.debug("Volumes are imbalanced. Selecting " + volume + 146 " from low available space volumes for write of block size " 147 + replicaSize); 148 } 149 } 150 return volume; 151 } 152 } 153 154 /** 155 * Used to keep track of the list of volumes we're choosing from. 156 */ 157 private class AvailableSpaceVolumeList { 158 private final List<AvailableSpaceVolumePair> volumes; 159 160 public AvailableSpaceVolumeList(List<V> volumes) throws IOException { 161 this.volumes = new ArrayList<AvailableSpaceVolumePair>(); 162 for (V volume : volumes) { 163 this.volumes.add(new AvailableSpaceVolumePair(volume)); 164 } 165 } 166 167 /** 168 * Check if the available space on all the volumes is roughly equal. 169 * 170 * @param volumes the volumes to check 171 * @return true if all volumes' free space is within the configured threshold, 172 * false otherwise. 173 * @throws IOException 174 * in the event of error checking amount of available space 175 */ 176 public boolean areAllVolumesWithinFreeSpaceThreshold() { 177 long leastAvailable = Long.MAX_VALUE; 178 long mostAvailable = 0; 179 for (AvailableSpaceVolumePair volume : volumes) { 180 leastAvailable = Math.min(leastAvailable, volume.getAvailable()); 181 mostAvailable = Math.max(mostAvailable, volume.getAvailable()); 182 } 183 return (mostAvailable - leastAvailable) < balancedSpaceThreshold; 184 } 185 186 /** 187 * @return the minimum amount of space available on a single volume, 188 * across all volumes. 189 */ 190 private long getLeastAvailableSpace() { 191 long leastAvailable = Long.MAX_VALUE; 192 for (AvailableSpaceVolumePair volume : volumes) { 193 leastAvailable = Math.min(leastAvailable, volume.getAvailable()); 194 } 195 return leastAvailable; 196 } 197 198 /** 199 * @return the maximum amount of space available across volumes with low space. 200 */ 201 public long getMostAvailableSpaceAmongVolumesWithLowAvailableSpace() { 202 long mostAvailable = Long.MIN_VALUE; 203 for (AvailableSpaceVolumePair volume : getVolumesWithLowAvailableSpace()) { 204 mostAvailable = Math.max(mostAvailable, volume.getAvailable()); 205 } 206 return mostAvailable; 207 } 208 209 /** 210 * @return the list of volumes with relatively low available space. 211 */ 212 public List<AvailableSpaceVolumePair> getVolumesWithLowAvailableSpace() { 213 long leastAvailable = getLeastAvailableSpace(); 214 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>(); 215 for (AvailableSpaceVolumePair volume : volumes) { 216 if (volume.getAvailable() <= leastAvailable + balancedSpaceThreshold) { 217 ret.add(volume); 218 } 219 } 220 return ret; 221 } 222 223 /** 224 * @return the list of volumes with a lot of available space. 225 */ 226 public List<AvailableSpaceVolumePair> getVolumesWithHighAvailableSpace() { 227 long leastAvailable = getLeastAvailableSpace(); 228 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>(); 229 for (AvailableSpaceVolumePair volume : volumes) { 230 if (volume.getAvailable() > leastAvailable + balancedSpaceThreshold) { 231 ret.add(volume); 232 } 233 } 234 return ret; 235 } 236 237 } 238 239 /** 240 * Used so that we only check the available space on a given volume once, at 241 * the beginning of {@link AvailableSpaceVolumeChoosingPolicy#chooseVolume(List, long)}. 242 */ 243 private class AvailableSpaceVolumePair { 244 private final V volume; 245 private final long availableSpace; 246 247 public AvailableSpaceVolumePair(V volume) throws IOException { 248 this.volume = volume; 249 this.availableSpace = volume.getAvailable(); 250 } 251 252 public long getAvailable() { 253 return availableSpace; 254 } 255 256 public V getVolume() { 257 return volume; 258 } 259 } 260 261 private List<V> extractVolumesFromPairs(List<AvailableSpaceVolumePair> volumes) { 262 List<V> ret = new ArrayList<V>(); 263 for (AvailableSpaceVolumePair volume : volumes) { 264 ret.add(volume.getVolume()); 265 } 266 return ret; 267 } 268 269}