The Hadoop Distributed File System (HDFS) is designed to store very large data sets reliably, and to stream those data sets at high bandwidth to user applications. In a large cluster, thousands of servers both host directly attached storage and execute user application tasks. By distributing storage and computation across many servers, the resource can grow with demand while remaining economical at every size. We describe the architecture of HDFS and report on experience using HDFS to manage 25 petabytes of enterprise data at Yahoo!.
%0 Conference Paper
%1 shvachko2010hadoop
%A Shvachko, K.
%A Kuang, Hairong
%A Radia, S.
%A Chansler, R.
%B Mass Storage Systems and Technologies (MSST), 2010 IEEE 26th Symposium on
%D 2010
%K hadoop hdfs ieee master seminar:dfs uni ws1213
%P 1 -10
%R 10.1109/MSST.2010.5496972
%T The Hadoop Distributed File System
%U http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=&arnumber=5496972&contentType=Conference+Publications&queryText%3Dthe+hadoop+distributed+file+system
%X The Hadoop Distributed File System (HDFS) is designed to store very large data sets reliably, and to stream those data sets at high bandwidth to user applications. In a large cluster, thousands of servers both host directly attached storage and execute user application tasks. By distributing storage and computation across many servers, the resource can grow with demand while remaining economical at every size. We describe the architecture of HDFS and report on experience using HDFS to manage 25 petabytes of enterprise data at Yahoo!.
@inproceedings{shvachko2010hadoop,
abstract = {The Hadoop Distributed File System (HDFS) is designed to store very large data sets reliably, and to stream those data sets at high bandwidth to user applications. In a large cluster, thousands of servers both host directly attached storage and execute user application tasks. By distributing storage and computation across many servers, the resource can grow with demand while remaining economical at every size. We describe the architecture of HDFS and report on experience using HDFS to manage 25 petabytes of enterprise data at Yahoo!.},
added-at = {2012-12-26T17:17:57.000+0100},
author = {Shvachko, K. and Kuang, Hairong and Radia, S. and Chansler, R.},
biburl = {https://www.bibsonomy.org/bibtex/21a682fac1165a990bb111a7980a64d6b/telekoma},
booktitle = {Mass Storage Systems and Technologies (MSST), 2010 IEEE 26th Symposium on},
description = {IEEE Xplore - The Hadoop Distributed File System},
doi = {10.1109/MSST.2010.5496972},
interhash = {60362a14af3385008e21425ea011d79e},
intrahash = {1a682fac1165a990bb111a7980a64d6b},
keywords = {hadoop hdfs ieee master seminar:dfs uni ws1213},
month = may,
pages = {1 -10},
timestamp = {2012-12-26T17:17:57.000+0100},
title = {The Hadoop Distributed File System},
url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=&arnumber=5496972&contentType=Conference+Publications&queryText%3Dthe+hadoop+distributed+file+system},
year = 2010
}