001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode; 019 020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SHARED_FILE_DESCRIPTOR_PATHS; 021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SHARED_FILE_DESCRIPTOR_PATHS_DEFAULT; 022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS; 023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS_DEFAULT; 024 025import java.io.Closeable; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.util.HashMap; 029import java.util.Iterator; 030import java.util.Set; 031 032import org.apache.commons.io.IOUtils; 033import org.apache.commons.logging.Log; 034import org.apache.commons.logging.LogFactory; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.InvalidRequestException; 037import org.apache.hadoop.hdfs.ExtendedBlockId; 038import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm; 039import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.ShmId; 040import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot; 041import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; 042import org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory; 043import org.apache.hadoop.net.unix.DomainSocket; 044import org.apache.hadoop.net.unix.DomainSocketWatcher; 045 046import com.google.common.base.Preconditions; 047import com.google.common.collect.HashMultimap; 048 049/* 050 * Manages client short-circuit memory segments on the DataNode. 051 * 052 * DFSClients request shared memory segments from the DataNode. The 053 * ShortCircuitRegistry generates and manages these segments. Each segment 054 * has a randomly generated 128-bit ID which uniquely identifies it. The 055 * segments each contain several "slots." 056 * 057 * Before performing a short-circuit read, DFSClients must request a pair of 058 * file descriptors from the DataNode via the REQUEST_SHORT_CIRCUIT_FDS 059 * operation. As part of this operation, DFSClients pass the ID of the shared 060 * memory segment they would like to use to communicate information about this 061 * replica, as well as the slot number within that segment they would like to 062 * use. Slot allocation is always done by the client. 063 * 064 * Slots are used to track the state of the block on the both the client and 065 * datanode. When this DataNode mlocks a block, the corresponding slots for the 066 * replicas are marked as "anchorable". Anchorable blocks can be safely read 067 * without verifying the checksum. This means that BlockReaderLocal objects 068 * using these replicas can skip checksumming. It also means that we can do 069 * zero-copy reads on these replicas (the ZCR interface has no way of 070 * verifying checksums.) 071 * 072 * When a DN needs to munlock a block, it needs to first wait for the block to 073 * be unanchored by clients doing a no-checksum read or a zero-copy read. The 074 * DN also marks the block's slots as "unanchorable" to prevent additional 075 * clients from initiating these operations in the future. 076 * 077 * The counterpart fo this class on the client is {@link DfsClientShmManager}. 078 */ 079public class ShortCircuitRegistry { 080 public static final Log LOG = LogFactory.getLog(ShortCircuitRegistry.class); 081 082 private static final int SHM_LENGTH = 8192; 083 084 private static class RegisteredShm extends ShortCircuitShm 085 implements DomainSocketWatcher.Handler { 086 private final ShortCircuitRegistry registry; 087 088 RegisteredShm(ShmId shmId, FileInputStream stream, 089 ShortCircuitRegistry registry) throws IOException { 090 super(shmId, stream); 091 this.registry = registry; 092 } 093 094 @Override 095 public boolean handle(DomainSocket sock) { 096 synchronized (registry) { 097 synchronized (this) { 098 registry.removeShm(this); 099 } 100 } 101 return true; 102 } 103 } 104 105 public synchronized void removeShm(ShortCircuitShm shm) { 106 if (LOG.isTraceEnabled()) { 107 LOG.debug("removing shm " + shm); 108 } 109 // Stop tracking the shmId. 110 RegisteredShm removedShm = segments.remove(shm.getShmId()); 111 Preconditions.checkState(removedShm == shm, 112 "failed to remove " + shm.getShmId()); 113 // Stop tracking the slots. 114 for (Iterator<Slot> iter = shm.slotIterator(); iter.hasNext(); ) { 115 Slot slot = iter.next(); 116 boolean removed = slots.remove(slot.getBlockId(), slot); 117 Preconditions.checkState(removed); 118 slot.makeInvalid(); 119 } 120 // De-allocate the memory map and close the shared file. 121 shm.free(); 122 } 123 124 /** 125 * Whether or not the registry is enabled. 126 */ 127 private boolean enabled; 128 129 /** 130 * The factory which creates shared file descriptors. 131 */ 132 private final SharedFileDescriptorFactory shmFactory; 133 134 /** 135 * A watcher which sends out callbacks when the UNIX domain socket 136 * associated with a shared memory segment closes. 137 */ 138 private final DomainSocketWatcher watcher; 139 140 private final HashMap<ShmId, RegisteredShm> segments = 141 new HashMap<ShmId, RegisteredShm>(0); 142 143 private final HashMultimap<ExtendedBlockId, Slot> slots = 144 HashMultimap.create(0, 1); 145 146 public ShortCircuitRegistry(Configuration conf) throws IOException { 147 boolean enabled = false; 148 SharedFileDescriptorFactory shmFactory = null; 149 DomainSocketWatcher watcher = null; 150 try { 151 int interruptCheck = conf.getInt( 152 DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS, 153 DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS_DEFAULT); 154 if (interruptCheck <= 0) { 155 throw new IOException( 156 DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS + 157 " was set to " + interruptCheck); 158 } 159 String shmPaths[] = 160 conf.getTrimmedStrings(DFS_DATANODE_SHARED_FILE_DESCRIPTOR_PATHS); 161 if (shmPaths.length == 0) { 162 shmPaths = 163 DFS_DATANODE_SHARED_FILE_DESCRIPTOR_PATHS_DEFAULT.split(","); 164 } 165 shmFactory = SharedFileDescriptorFactory. 166 create("HadoopShortCircuitShm_", shmPaths); 167 String dswLoadingFailure = DomainSocketWatcher.getLoadingFailureReason(); 168 if (dswLoadingFailure != null) { 169 throw new IOException(dswLoadingFailure); 170 } 171 watcher = new DomainSocketWatcher(interruptCheck); 172 enabled = true; 173 if (LOG.isDebugEnabled()) { 174 LOG.debug("created new ShortCircuitRegistry with interruptCheck=" + 175 interruptCheck + ", shmPath=" + shmFactory.getPath()); 176 } 177 } catch (IOException e) { 178 if (LOG.isDebugEnabled()) { 179 LOG.debug("Disabling ShortCircuitRegistry", e); 180 } 181 } finally { 182 this.enabled = enabled; 183 this.shmFactory = shmFactory; 184 this.watcher = watcher; 185 } 186 } 187 188 /** 189 * Process a block mlock event from the FsDatasetCache. 190 * 191 * @param blockId The block that was mlocked. 192 */ 193 public synchronized void processBlockMlockEvent(ExtendedBlockId blockId) { 194 if (!enabled) return; 195 Set<Slot> affectedSlots = slots.get(blockId); 196 for (Slot slot : affectedSlots) { 197 slot.makeAnchorable(); 198 } 199 } 200 201 /** 202 * Mark any slots associated with this blockId as unanchorable. 203 * 204 * @param blockId The block ID. 205 * @return True if we should allow the munlock request. 206 */ 207 public synchronized boolean processBlockMunlockRequest( 208 ExtendedBlockId blockId) { 209 if (!enabled) return true; 210 boolean allowMunlock = true; 211 Set<Slot> affectedSlots = slots.get(blockId); 212 for (Slot slot : affectedSlots) { 213 slot.makeUnanchorable(); 214 if (slot.isAnchored()) { 215 allowMunlock = false; 216 } 217 } 218 return allowMunlock; 219 } 220 221 public static class NewShmInfo implements Closeable { 222 public final ShmId shmId; 223 public final FileInputStream stream; 224 225 NewShmInfo(ShmId shmId, FileInputStream stream) { 226 this.shmId = shmId; 227 this.stream = stream; 228 } 229 230 @Override 231 public void close() throws IOException { 232 stream.close(); 233 } 234 } 235 236 /** 237 * Handle a DFSClient request to create a new memory segment. 238 * 239 * @param clientName Client name as reported by the client. 240 * @param sock The DomainSocket to associate with this memory 241 * segment. When this socket is closed, or the 242 * other side writes anything to the socket, the 243 * segment will be closed. This can happen at any 244 * time, including right after this function returns. 245 * @return A NewShmInfo object. The caller must close the 246 * NewShmInfo object once they are done with it. 247 * @throws IOException If the new memory segment could not be created. 248 */ 249 public NewShmInfo createNewMemorySegment(String clientName, 250 DomainSocket sock) throws IOException { 251 NewShmInfo info = null; 252 RegisteredShm shm = null; 253 ShmId shmId = null; 254 synchronized (this) { 255 if (!enabled) { 256 if (LOG.isTraceEnabled()) { 257 LOG.trace("createNewMemorySegment: ShortCircuitRegistry is " + 258 "not enabled."); 259 } 260 throw new UnsupportedOperationException(); 261 } 262 FileInputStream fis = null; 263 try { 264 do { 265 shmId = ShmId.createRandom(); 266 } while (segments.containsKey(shmId)); 267 fis = shmFactory.createDescriptor(clientName, SHM_LENGTH); 268 shm = new RegisteredShm(shmId, fis, this); 269 } finally { 270 if (shm == null) { 271 IOUtils.closeQuietly(fis); 272 } 273 } 274 info = new NewShmInfo(shmId, fis); 275 segments.put(shmId, shm); 276 } 277 // Drop the registry lock to prevent deadlock. 278 // After this point, RegisteredShm#handle may be called at any time. 279 watcher.add(sock, shm); 280 if (LOG.isTraceEnabled()) { 281 LOG.trace("createNewMemorySegment: created " + info.shmId); 282 } 283 return info; 284 } 285 286 public synchronized void registerSlot(ExtendedBlockId blockId, SlotId slotId, 287 boolean isCached) throws InvalidRequestException { 288 if (!enabled) { 289 if (LOG.isTraceEnabled()) { 290 LOG.trace(this + " can't register a slot because the " + 291 "ShortCircuitRegistry is not enabled."); 292 } 293 throw new UnsupportedOperationException(); 294 } 295 ShmId shmId = slotId.getShmId(); 296 RegisteredShm shm = segments.get(shmId); 297 if (shm == null) { 298 throw new InvalidRequestException("there is no shared memory segment " + 299 "registered with shmId " + shmId); 300 } 301 Slot slot = shm.registerSlot(slotId.getSlotIdx(), blockId); 302 if (isCached) { 303 slot.makeAnchorable(); 304 } else { 305 slot.makeUnanchorable(); 306 } 307 boolean added = slots.put(blockId, slot); 308 Preconditions.checkState(added); 309 if (LOG.isTraceEnabled()) { 310 LOG.trace(this + ": registered " + blockId + " with slot " + 311 slotId + " (isCached=" + isCached + ")"); 312 } 313 } 314 315 public synchronized void unregisterSlot(SlotId slotId) 316 throws InvalidRequestException { 317 if (!enabled) { 318 if (LOG.isTraceEnabled()) { 319 LOG.trace("unregisterSlot: ShortCircuitRegistry is " + 320 "not enabled."); 321 } 322 throw new UnsupportedOperationException(); 323 } 324 ShmId shmId = slotId.getShmId(); 325 RegisteredShm shm = segments.get(shmId); 326 if (shm == null) { 327 throw new InvalidRequestException("there is no shared memory segment " + 328 "registered with shmId " + shmId); 329 } 330 Slot slot = shm.getSlot(slotId.getSlotIdx()); 331 slot.makeInvalid(); 332 shm.unregisterSlot(slotId.getSlotIdx()); 333 slots.remove(slot.getBlockId(), slot); 334 } 335 336 public void shutdown() { 337 synchronized (this) { 338 if (!enabled) return; 339 enabled = false; 340 } 341 IOUtils.closeQuietly(watcher); 342 } 343}