001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT; 021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY; 022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; 023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY; 024 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.List; 028import java.util.Random; 029 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.conf.Configurable; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; 035 036/** 037 * A DN volume choosing policy which takes into account the amount of free 038 * space on each of the available volumes when considering where to assign a 039 * new replica allocation. By default this policy prefers assigning replicas to 040 * those volumes with more available free space, so as to over time balance the 041 * available space of all the volumes within a DN. 042 */ 043public class AvailableSpaceVolumeChoosingPolicy<V extends FsVolumeSpi> 044 implements VolumeChoosingPolicy<V>, Configurable { 045 046 private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class); 047 048 private final Random random; 049 050 private long balancedSpaceThreshold = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT; 051 private float balancedPreferencePercent = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; 052 053 AvailableSpaceVolumeChoosingPolicy(Random random) { 054 this.random = random; 055 } 056 057 public AvailableSpaceVolumeChoosingPolicy() { 058 this(new Random()); 059 } 060 061 @Override 062 public synchronized void setConf(Configuration conf) { 063 balancedSpaceThreshold = conf.getLong( 064 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY, 065 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT); 066 balancedPreferencePercent = conf.getFloat( 067 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY, 068 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); 069 070 LOG.info("Available space volume choosing policy initialized: " + 071 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY + 072 " = " + balancedSpaceThreshold + ", " + 073 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 074 " = " + balancedPreferencePercent); 075 076 if (balancedPreferencePercent > 1.0) { 077 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 078 " is greater than 1.0 but should be in the range 0.0 - 1.0"); 079 } 080 081 if (balancedPreferencePercent < 0.5) { 082 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 083 " is less than 0.5 so volumes with less available disk space will receive more block allocations"); 084 } 085 } 086 087 @Override 088 public synchronized Configuration getConf() { 089 // Nothing to do. Only added to fulfill the Configurable contract. 090 return null; 091 } 092 093 private final VolumeChoosingPolicy<V> roundRobinPolicyBalanced = 094 new RoundRobinVolumeChoosingPolicy<V>(); 095 private final VolumeChoosingPolicy<V> roundRobinPolicyHighAvailable = 096 new RoundRobinVolumeChoosingPolicy<V>(); 097 private final VolumeChoosingPolicy<V> roundRobinPolicyLowAvailable = 098 new RoundRobinVolumeChoosingPolicy<V>(); 099 100 @Override 101 public synchronized V chooseVolume(List<V> volumes, 102 final long replicaSize) throws IOException { 103 if (volumes.size() < 1) { 104 throw new DiskOutOfSpaceException("No more available volumes"); 105 } 106 107 AvailableSpaceVolumeList volumesWithSpaces = 108 new AvailableSpaceVolumeList(volumes); 109 110 if (volumesWithSpaces.areAllVolumesWithinFreeSpaceThreshold()) { 111 // If they're actually not too far out of whack, fall back on pure round 112 // robin. 113 V volume = roundRobinPolicyBalanced.chooseVolume(volumes, replicaSize); 114 if (LOG.isDebugEnabled()) { 115 LOG.debug("All volumes are within the configured free space balance " + 116 "threshold. Selecting " + volume + " for write of block size " + 117 replicaSize); 118 } 119 return volume; 120 } else { 121 V volume = null; 122 // If none of the volumes with low free space have enough space for the 123 // replica, always try to choose a volume with a lot of free space. 124 long mostAvailableAmongLowVolumes = volumesWithSpaces 125 .getMostAvailableSpaceAmongVolumesWithLowAvailableSpace(); 126 127 List<V> highAvailableVolumes = extractVolumesFromPairs( 128 volumesWithSpaces.getVolumesWithHighAvailableSpace()); 129 List<V> lowAvailableVolumes = extractVolumesFromPairs( 130 volumesWithSpaces.getVolumesWithLowAvailableSpace()); 131 132 float preferencePercentScaler = 133 (highAvailableVolumes.size() * balancedPreferencePercent) + 134 (lowAvailableVolumes.size() * (1 - balancedPreferencePercent)); 135 float scaledPreferencePercent = 136 (highAvailableVolumes.size() * balancedPreferencePercent) / 137 preferencePercentScaler; 138 if (mostAvailableAmongLowVolumes < replicaSize || 139 random.nextFloat() < scaledPreferencePercent) { 140 volume = roundRobinPolicyHighAvailable.chooseVolume( 141 highAvailableVolumes, 142 replicaSize); 143 if (LOG.isDebugEnabled()) { 144 LOG.debug("Volumes are imbalanced. Selecting " + volume + 145 " from high available space volumes for write of block size " 146 + replicaSize); 147 } 148 } else { 149 volume = roundRobinPolicyLowAvailable.chooseVolume( 150 lowAvailableVolumes, 151 replicaSize); 152 if (LOG.isDebugEnabled()) { 153 LOG.debug("Volumes are imbalanced. Selecting " + volume + 154 " from low available space volumes for write of block size " 155 + replicaSize); 156 } 157 } 158 return volume; 159 } 160 } 161 162 /** 163 * Used to keep track of the list of volumes we're choosing from. 164 */ 165 private class AvailableSpaceVolumeList { 166 private final List<AvailableSpaceVolumePair> volumes; 167 168 public AvailableSpaceVolumeList(List<V> volumes) throws IOException { 169 this.volumes = new ArrayList<AvailableSpaceVolumePair>(); 170 for (V volume : volumes) { 171 this.volumes.add(new AvailableSpaceVolumePair(volume)); 172 } 173 } 174 175 /** 176 * @return true if all volumes' free space is within the 177 * configured threshold, false otherwise. 178 */ 179 public boolean areAllVolumesWithinFreeSpaceThreshold() { 180 long leastAvailable = Long.MAX_VALUE; 181 long mostAvailable = 0; 182 for (AvailableSpaceVolumePair volume : volumes) { 183 leastAvailable = Math.min(leastAvailable, volume.getAvailable()); 184 mostAvailable = Math.max(mostAvailable, volume.getAvailable()); 185 } 186 return (mostAvailable - leastAvailable) < balancedSpaceThreshold; 187 } 188 189 /** 190 * @return the minimum amount of space available on a single volume, 191 * across all volumes. 192 */ 193 private long getLeastAvailableSpace() { 194 long leastAvailable = Long.MAX_VALUE; 195 for (AvailableSpaceVolumePair volume : volumes) { 196 leastAvailable = Math.min(leastAvailable, volume.getAvailable()); 197 } 198 return leastAvailable; 199 } 200 201 /** 202 * @return the maximum amount of space available across volumes with low space. 203 */ 204 public long getMostAvailableSpaceAmongVolumesWithLowAvailableSpace() { 205 long mostAvailable = Long.MIN_VALUE; 206 for (AvailableSpaceVolumePair volume : getVolumesWithLowAvailableSpace()) { 207 mostAvailable = Math.max(mostAvailable, volume.getAvailable()); 208 } 209 return mostAvailable; 210 } 211 212 /** 213 * @return the list of volumes with relatively low available space. 214 */ 215 public List<AvailableSpaceVolumePair> getVolumesWithLowAvailableSpace() { 216 long leastAvailable = getLeastAvailableSpace(); 217 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>(); 218 for (AvailableSpaceVolumePair volume : volumes) { 219 if (volume.getAvailable() <= leastAvailable + balancedSpaceThreshold) { 220 ret.add(volume); 221 } 222 } 223 return ret; 224 } 225 226 /** 227 * @return the list of volumes with a lot of available space. 228 */ 229 public List<AvailableSpaceVolumePair> getVolumesWithHighAvailableSpace() { 230 long leastAvailable = getLeastAvailableSpace(); 231 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>(); 232 for (AvailableSpaceVolumePair volume : volumes) { 233 if (volume.getAvailable() > leastAvailable + balancedSpaceThreshold) { 234 ret.add(volume); 235 } 236 } 237 return ret; 238 } 239 240 } 241 242 /** 243 * Used so that we only check the available space on a given volume once, at 244 * the beginning of {@link AvailableSpaceVolumeChoosingPolicy#chooseVolume(List, long)}. 245 */ 246 private class AvailableSpaceVolumePair { 247 private final V volume; 248 private final long availableSpace; 249 250 public AvailableSpaceVolumePair(V volume) throws IOException { 251 this.volume = volume; 252 this.availableSpace = volume.getAvailable(); 253 } 254 255 public long getAvailable() { 256 return availableSpace; 257 } 258 259 public V getVolume() { 260 return volume; 261 } 262 } 263 264 private List<V> extractVolumesFromPairs(List<AvailableSpaceVolumePair> volumes) { 265 List<V> ret = new ArrayList<V>(); 266 for (AvailableSpaceVolumePair volume : volumes) { 267 ret.add(volume.getVolume()); 268 } 269 return ret; 270 } 271 272}