{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T13:09:15Z","timestamp":1769000955545,"version":"3.49.0"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,8]]},"DOI":"10.1109\/ic3.2014.6897216","type":"proceedings-article","created":{"date-parts":[[2014,9,26]],"date-time":"2014-09-26T16:33:47Z","timestamp":1411749227000},"page":"454-460","source":"Crossref","is-referenced-by-count":3,"title":["Leveraging hadoop framework to develop duplication detector and analysis using Mapreduce, Hive and Pig"],"prefix":"10.1109","author":[{"given":"Priyanka","family":"Sethi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Prakash","family":"Kumar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","article-title":"P2CP: A new cloud storage model to enhance performance of cloud services","author":"sun","year":"2011","journal-title":"Proceedings Oj International ConJerence on InJormation Resources Management Conf-iRM"},{"key":"17","first-page":"1","article-title":"Farsite: Federated, available, and reliable storage for an incompletely trusted environment","author":"atul","year":"2002","journal-title":"Proc of the 5th Symposium on Operating Systems Design and Implementation"},{"key":"18","first-page":"48","article-title":"FAB: Building distributed enterprise disk arrays from commodity components","author":"yasushi","year":"2004","journal-title":"Proceedings Oj the II Th International ConJerence on Architectural Support Jor Programming Languages and Operating Systems"},{"key":"15","first-page":"5","article-title":"Ursa minor: Versatile cluster-based storage","author":"michael","year":"2005","journal-title":"Proceedings Oj the 4th ConJerence on USENiX Conference on File and Storage Technologies"},{"key":"16","first-page":"17","article-title":"Scalable performance of the Panasas parallel file system","author":"brent","year":"2008","journal-title":"Proc 5th USENIX conference on File and Storage Technologies"},{"key":"13","first-page":"84","article-title":"Petal: Distributed virtual disks","author":"edward","year":"1996","journal-title":"Proceedings Oj the Seventh International ConJerence on Architectural Support Jor Programming Languages and Operating Systems"},{"key":"14","first-page":"29","article-title":"The google file system","author":"sanjay","year":"2003","journal-title":"Proceedings of the nineteenth ACM symposium on Operating systems principles"},{"key":"11","first-page":"307","article-title":"Ceph: A scalable, high-performance distributed file system","author":"weil","year":"2006","journal-title":"Proc of the 5th Symposium on Operating Systems Design and Implementation"},{"key":"12","first-page":"35","article-title":"RADOS: A scalable, reliable storage service for petabyte-scale storage clusters","author":"sage","year":"2007","journal-title":"Proceedings Oj the 2nd International Workshop on Petascale Data Storage Held in Conjunction with Supercomputing"},{"key":"21","first-page":"269","article-title":"Avoiding the disk bottleneck in the data domain deduplication file system","author":"zhu","year":"2008","journal-title":"Proceedings Oj the 6th USENiX ConJerence on File and Storage Technologies"},{"key":"20","first-page":"111","article-title":"Sparse indexing: Large scale, inline deduplication using sampling and locality","author":"lillibridge","year":"2009","journal-title":"6th USENIX Conference on File and Storage Technologies"},{"key":"22","first-page":"89","article-title":"Venti: A new approach to archival data storage","author":"sean","year":"2002","journal-title":"Proceedings Oj the 4th ConJerence on USENiX Conference on File and Storage Technologies"},{"key":"23","first-page":"101","article-title":"Decentralized deduplication in SAN cluster file systems","author":"austin","year":"2009","journal-title":"Proceedings Oj the 2009 ConJerence on USENiX Annual Technical ConJerence"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1109\/CSCWD.2011.5960097"},{"key":"25","first-page":"197","article-title":"HYDRAstor: A scalable secondary storage","author":"cezary","year":"2009","journal-title":"Proceedings OJ the 7th ConJerence on File and Storage Technologies"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOT.2009.5366623"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2010.5496987"},{"key":"28","first-page":"301","article-title":"Duplicate data elimination in a san file system","author":"hong","year":"2004","journal-title":"Proceedings of the 21st IEEEI12th NASA Goddard ConJerence on Mass Storage Systems and Technologies (MSST)"},{"key":"29","first-page":"430","article-title":"Data deduplication techniques","author":"he","year":"2010","journal-title":"International ConJerence on Future InJormation Technology and Management Engineering (FiTME)"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2008.538"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2013.2241359"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1049\/et.2013.0307"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/AERO.2012.6187357"},{"key":"30","first-page":"13","article-title":"An analysis of compare-by-hash","author":"henson","year":"2003","journal-title":"Proceedings OJ the 9th ConJerence on Hot Topics in Operating Systems"},{"key":"7","year":"1999","journal-title":"Apache Hive TM Hive"},{"key":"6","author":"borthakur","year":"2007","journal-title":"The Hadoop Distributed File System Architecture and Design"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1145\/2351316.2351323"},{"key":"5","author":"white","year":"2012","journal-title":"Hadoop The Definitive Guide 3d Ed"},{"key":"31","first-page":"85","article-title":"Compare-by-hash: A reasoned analysis","author":"black","year":"2006","journal-title":"USENiX Association Proceedings Oj the 2006 USENiX Annual Technical ConJerence"},{"key":"4","year":"1999","journal-title":"Welcome to ApacheTM Hadoop\ufffd! Hadoop"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2012.73"},{"key":"8","year":"1999","journal-title":"Welcome to Apache Pig! Hadoop"}],"event":{"name":"2014 Seventh International Conference on Contemporary Computing (IC3)","location":"Noida, India","start":{"date-parts":[[2014,8,7]]},"end":{"date-parts":[[2014,8,9]]}},"container-title":["2014 Seventh International Conference on Contemporary Computing (IC3)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6892117\/6897132\/06897216.pdf?arnumber=6897216","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,14]],"date-time":"2020-10-14T15:55:24Z","timestamp":1602690924000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/6897216"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,8]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/ic3.2014.6897216","relation":{},"subject":[],"published":{"date-parts":[[2014,8]]}}}