1/********************************************************************
2 * Copyright (c) 2013 - 2014, Pivotal Inc.
3 * All rights reserved.
4 *
5 * Author: Zhanwei Wang
6 ********************************************************************/
7/********************************************************************
8 * 2014 -
9 * open source under Apache License Version 2.0
10 ********************************************************************/
11/**
12 * Licensed to the Apache Software Foundation (ASF) under one
13 * or more contributor license agreements. See the NOTICE file
14 * distributed with this work for additional information
15 * regarding copyright ownership. The ASF licenses this file
16 * to you under the Apache License, Version 2.0 (the
17 * "License"); you may not use this file except in compliance
18 * with the License. You may obtain a copy of the License at
19 *
20 * http://www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an "AS IS" BASIS,
24 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28#ifndef _HDFS_LIBHDFS3_CLIENT_FILESYSTEMIMPL_H_
29#define _HDFS_LIBHDFS3_CLIENT_FILESYSTEMIMPL_H_
30
31#include "BlockLocation.h"
32#include "DirectoryIterator.h"
33#include "FileStatus.h"
34#include "FileSystemInter.h"
35#include "FileSystemKey.h"
36#include "FileSystemStats.h"
37#include "Permission.h"
38#include "server/Namenode.h"
39#include "SessionConfig.h"
40#include "Unordered.h"
41#include "UserInfo.h"
42#include "XmlConfig.h"
43#ifdef MOCK
44#include "NamenodeStub.h"
45#endif
46
47#include <string>
48#include <vector>
49
50namespace Hdfs {
51namespace Internal {
52
53class InputStreamInter;
54class OutputStreamInter;
55
56class FileSystemImpl: public FileSystemInter {
57public:
58 /**
59 * Construct a FileSystemImpl instance.
60 * @param key a key which can be uniquely identify a FileSystemImpl instance.
61 * @param c a configuration objecto used to initialize the instance.
62 */
63 FileSystemImpl(const FileSystemKey & key, const Config & c);
64
65 /**
66 * Destroy a FileSystemBase instance
67 */
68 ~FileSystemImpl();
69
70 /**
71 * Format the path to a absolute canonicalized path.
72 * @param path target path to be hendled.
73 * @return return a absolute canonicalized path.
74 */
75 const std::string getStandardPath(const char * path);
76
77 /**
78 * To get the client unique ID.
79 * @return return the client unique ID.
80 */
81 const char * getClientName();
82
83 /**
84 * Connect to hdfs
85 */
86 void connect();
87
88 /**
89 * disconnect from hdfs
90 */
91 void disconnect();
92
93 /**
94 * To get default number of replication.
95 * @return the default number of replication.
96 */
97 int getDefaultReplication() const;
98
99 /**
100 * To get the default block size.
101 * @return the default block size.
102 */
103 int64_t getDefaultBlockSize() const;
104
105 /**
106 * To get the home directory.
107 * @return home directory.
108 */
109 std::string getHomeDirectory() const;
110
111 /**
112 * To delete a file or directory.
113 * @param path the path to be deleted.
114 * @param recursive if path is a directory, delete the contents recursively.
115 * @return return true if success.
116 */
117 bool deletePath(const char * path, bool recursive);
118
119 /**
120 * To create a directory with given permission.
121 * @param path the directory path which is to be created.
122 * @param permission directory permission.
123 * @return return true if success.
124 */
125 bool mkdir(const char * path, const Permission & permission);
126
127 /**
128 * To create a directory which given permission.
129 * If parent path does not exits, create it.
130 * @param path the directory path which is to be created.
131 * @param permission directory permission.
132 * @return return true if success.
133 */
134 bool mkdirs(const char * path, const Permission & permission);
135
136 /**
137 * To get path information.
138 * @param path the path which information is to be returned.
139 * @return the path information.
140 */
141 FileStatus getFileStatus(const char * path);
142
143 /**
144 * Return an array containing hostnames, offset and size of
145 * portions of the given file.
146 *
147 * This call is most helpful with DFS, where it returns
148 * hostnames of machines that contain the given file.
149 *
150 * The FileSystem will simply return an elt containing 'localhost'.
151 *
152 * @param path path is used to identify an FS since an FS could have
153 * another FS that it could be delegating the call to
154 * @param start offset into the given file
155 * @param len length for which to get locations for
156 */
157 std::vector<BlockLocation> getFileBlockLocations(
158 const char * path, int64_t start, int64_t len);
159
160 /**
161 * list the contents of a directory.
162 * @param path the directory path.
163 * @return Return a iterator to visit all elements in this directory.
164 */
165 DirectoryIterator listDirectory(const char * path, bool needLocation);
166
167 /**
168 * list all the contents of a directory.
169 * @param path The directory path.
170 * @return Return a vector of file informations in the directory.
171 */
172 std::vector<FileStatus> listAllDirectoryItems(const char * path,
173 bool needLocation);
174
175 /**
176 * To set the owner and the group of the path.
177 * username and groupname cannot be empty at the same time.
178 * @param path the path which owner of group is to be changed.
179 * @param username new user name.
180 * @param groupname new group.
181 */
182 void setOwner(const char * path, const char * username,
183 const char * groupname);
184
185 /**
186 * To set the access time or modification time of a path.
187 * @param path the path which access time or modification time is to be changed.
188 * @param mtime new modification time.
189 * @param atime new access time.
190 */
191 void setTimes(const char * path, int64_t mtime, int64_t atime);
192
193 /**
194 * To set the permission of a path.
195 * @param path the path which permission is to be changed.
196 * @param permission new permission.
197 */
198 void setPermission(const char * path, const Permission & permission);
199
200 /**
201 * To set the number of replication.
202 * @param path the path which number of replication is to be changed.
203 * @param replication new number of replication.
204 * @return return true if success.
205 */
206 bool setReplication(const char * path, short replication);
207
208 /**
209 * To rename a path.
210 * @param src old path.
211 * @param dst new path.
212 * @return return true if success.
213 */
214 bool rename(const char * src, const char * dst);
215
216 /**
217 * To set working directory.
218 * @param path new working directory.
219 */
220 void setWorkingDirectory(const char * path);
221
222 /**
223 * To get working directory.
224 * @return working directory.
225 */
226 std::string getWorkingDirectory() const;
227
228 /**
229 * To test if the path exist.
230 * @param path the path which is to be tested.
231 * @return return true if the path exist.
232 */
233 bool exist(const char * path);
234
235 /**
236 * To get the file system status.
237 * @return the file system status.
238 */
239 FileSystemStats getFsStats();
240
241 /**
242 * Truncate the file in the indicated path to the indicated size.
243 * @param path The path to the file to be truncated
244 * @param size The size the file is to be truncated to
245 *
246 * @return true if and client does not need to wait for block recovery,
247 * false if client needs to wait for block recovery.
248 */
249 bool truncate(const char * path, int64_t size);
250
251 /**
252 * Get a valid Delegation Token.
253 *
254 * @param renewer the designated renewer for the token
255 * @return Token
256 * @throws IOException
257 */
258 std::string getDelegationToken(const char * renewer);
259
260 /**
261 * Get a valid Delegation Token using default user as renewer.
262 *
263 * @return Token
264 * @throws IOException
265 */
266 std::string getDelegationToken();
267
268 /**
269 * Renew an existing delegation token.
270 *
271 * @param token delegation token obtained earlier
272 * @return the new expiration time
273 * @throws IOException
274 */
275 int64_t renewDelegationToken(const std::string & token);
276
277 /**
278 * Cancel an existing delegation token.
279 *
280 * @param token delegation token
281 * @throws IOException
282 */
283 void cancelDelegationToken(const std::string & token);
284
285 /**
286 * Get locations of the blocks of the specified file within the specified range.
287 * DataNode locations for each block are sorted by
288 * the proximity to the client.
289 *
290 * The client will then have to contact
291 * one of the indicated DataNodes to obtain the actual data.
292 *
293 * @param src file name
294 * @param offset range start offset
295 * @param length range length
296 * @param lbs output the returned blocks
297 */
298 void getBlockLocations(const std::string & src, int64_t offset,
299 int64_t length, LocatedBlocks & lbs);
300
301 /**
302 * Create a new file entry in the namespace.
303 *
304 * @param src path of the file being created.
305 * @param masked masked permission.
306 * @param flag indicates whether the file should be
307 * overwritten if it already exists or create if it does not exist or append.
308 * @param createParent create missing parent directory if true
309 * @param replication block replication factor.
310 * @param blockSize maximum block size.
311 */
312 void create(const std::string & src, const Permission & masked, int flag,
313 bool createParent, short replication, int64_t blockSize);
314
315 /**
316 * Append to the end of the file.
317 *
318 * @param src path of the file being created.
319 * @return return the last partial block if any
320 */
321 std::pair<shared_ptr<LocatedBlock>, shared_ptr<FileStatus> > append(
322 const std::string& src);
323
324 /**
325 * The client can give up on a block by calling abandonBlock().
326 * The client can then either obtain a new block, or complete or abandon the file.
327 * Any partial writes to the block will be discarded.
328 *
329 * @param b the block to be abandoned.
330 * @param src the file which the block belongs to.
331 */
332 void abandonBlock(const ExtendedBlock & b, const std::string & src);
333
334 /**
335 * A client that wants to write an additional block to the
336 * indicated filename (which must currently be open for writing)
337 * should call addBlock().
338 *
339 * addBlock() allocates a new block and datanodes the block data
340 * should be replicated to.
341 *
342 * addBlock() also commits the previous block by reporting
343 * to the name-node the actual generation stamp and the length
344 * of the block that the client has transmitted to data-nodes.
345 *
346 * @param src the file being created
347 * @param previous previous block
348 * @param excludeNodes a list of nodes that should not be allocated for the current block.
349 * @return return the new block.
350 */
351 shared_ptr<LocatedBlock> addBlock(const std::string & src,
352 const ExtendedBlock * previous,
353 const std::vector<DatanodeInfo> & excludeNodes);
354
355 /**
356 * Get a datanode for an existing pipeline.
357 *
358 * @param src the file being written
359 * @param blk the block being written
360 * @param existings the existing nodes in the pipeline
361 * @param excludes the excluded nodes
362 * @param numAdditionalNodes number of additional datanodes
363 * @return return a new block information which contains new datanode.
364 */
365 shared_ptr<LocatedBlock> getAdditionalDatanode(const std::string & src,
366 const ExtendedBlock & blk,
367 const std::vector<DatanodeInfo> & existings,
368 const std::vector<std::string> & storageIDs,
369 const std::vector<DatanodeInfo> & excludes, int numAdditionalNodes);
370
371 /**
372 * The client is done writing data to the given filename, and would
373 * like to complete it.
374 *
375 * The function returns whether the file has been closed successfully.
376 * If the function returns false, the caller should try again.
377 *
378 * close() also commits the last block of file by reporting
379 * to the name-node the actual generation stamp and the length
380 * of the block that the client has transmitted to data-nodes.
381 *
382 * A call to complete() will not return true until all the file's
383 * blocks have been replicated the minimum number of times. Thus,
384 * DataNode failures may cause a client to call complete() several
385 * times before succeeding.
386 *
387 * @param src the file being written.
388 * @param last last block to be committed.
389 * @return return false if the client should retry.
390 */
391 bool complete(const std::string & src, const ExtendedBlock * last);
392
393 /**
394 * The client wants to report corrupted blocks (blocks with specified
395 * locations on datanodes).
396 * @param blocks Array of located blocks to report
397 */
398 /*void reportBadBlocks(const std::vector<LocatedBlock> & blocks);*/
399
400 /**
401 * Write all metadata for this file into persistent storage.
402 * The file must be currently open for writing.
403 * @param src The const std::string & representation of the path
404 */
405 void fsync(const std::string & src);
406
407 /**
408 * Get a new generation stamp together with an access token for
409 * a block under construction
410 *
411 * This method is called only when a client needs to recover a failed
412 * pipeline or set up a pipeline for appending to a block.
413 *
414 * @param block a block
415 * @return return a located block with a new generation stamp and an access token
416 */
417 shared_ptr<LocatedBlock> updateBlockForPipeline(
418 const ExtendedBlock & block);
419
420 /**
421 * Update a pipeline for a block under construction
422 *
423 * @param oldBlock the old block
424 * @param newBlock the new block containing new generation stamp and length
425 * @param newNodes datanodes in the pipeline
426 */
427 void updatePipeline(const ExtendedBlock & oldBlock,
428 const ExtendedBlock & newBlock,
429 const std::vector<DatanodeInfo> & newNodes,
430 const std::vector<std::string> & storageIDs);
431
432 /**
433 * register the output stream in filespace when it is opened.
434 */
435 void registerOpenedOutputStream();
436
437 /**
438 * unregister the output stream from filespace when it is closed.
439 */
440 bool unregisterOpenedOutputStream();
441
442 /**
443 * Get the configuration used in filesystem.
444 * @return return the configuration instance.
445 */
446 const SessionConfig & getConf() const {
447 return sconf;
448 }
449
450 /**
451 * Get the user used in filesystem.
452 * @return return the user information.
453 */
454 const UserInfo & getUserInfo() const {
455 return user;
456 }
457
458 /**
459 * Get a partial listing of the indicated directory
460 *
461 * @param src the directory name
462 * @param startAfter the name to start listing after encoded in java UTF8
463 * @param needLocation if the FileStatus should contain block locations
464 * @param dl append the returned directories.
465 * @return return true if there are more items.
466 */
467 bool getListing(const std::string & src, const std::string & startAfter,
468 bool needLocation, std::vector<FileStatus> & dl);
469
470 /**
471 * To renew the lease.
472 *
473 * @return return false if the filesystem no long needs to renew lease.
474 */
475 bool renewLease();
476
477 /**
478 * Get the peer cache.
479 *
480 * @return return the peer cache.
481 */
482 PeerCache& getPeerCache() {
483 return *peerCache;
484 }
485
486private:
487 Config conf;
488 FileSystemKey key;
489 int openedOutputStream;
490 mutex mutWorkingDir;
491 Namenode * nn;
492 SessionConfig sconf;
493 shared_ptr<PeerCache> peerCache;
494 std::string clientName;
495 std::string tokenService;
496 std::string workingDir;
497 UserInfo user;
498#ifdef MOCK
499private:
500 Hdfs::Mock::NamenodeStub * stub;
501#endif
502};
503
504}
505}
506
507#endif /* _HDFS_LIBHDFS3_CLIENT_FILESYSTEMIMPL_H_ */
508