{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T11:17:26Z","timestamp":1730200646599,"version":"3.28.0"},"reference-count":19,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,10]]},"DOI":"10.1109\/bigdata.2015.7363976","type":"proceedings-article","created":{"date-parts":[[2015,12,28]],"date-time":"2015-12-28T16:36:21Z","timestamp":1451320581000},"page":"1967-1971","source":"Crossref","is-referenced-by-count":3,"title":["Optimizing apache nutch for domain specific crawling at large scale"],"prefix":"10.1109","author":[{"given":"Luis A.","family":"Lopez","sequence":"first","affiliation":[]},{"given":"Ruth","family":"Duerr","sequence":"additional","affiliation":[]},{"given":"Siri Jodha Singh","family":"Khalsa","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Building Nutch","volume":"54","author":"mike","year":"2004","journal-title":"Queue 2 2"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1007\/s11704-014-3050-9"},{"year":"2014","author":"joe","article-title":"The Science of Crawl (Part 1): Deduplication of Web Content","key":"ref12"},{"key":"ref13","article-title":"Memex (Domain-Specific Search)","author":"chris","year":"2014","journal-title":"Memex DARPA"},{"key":"ref14","article-title":"The BCube Crawler: Web Scale Data and Service Discovery for EarthCube","author":"lopez","year":"2014","journal-title":"American Geophysical Union Fall Meeting"},{"year":"0","journal-title":"Nutch Logs are stored in s3 \/\/bcube-nutch-test\/logs and crawl data","key":"ref15"},{"year":"0","journal-title":"Nutch JIRA Tickets","key":"ref16"},{"year":"2009","author":"ken","journal-title":"Performance problems with vertical\/focused web crawling","key":"ref17"},{"key":"ref18","article-title":"Crawling The Web for Libre","author":"truslove","year":"2012","journal-title":"abstract #IN11D-1482"},{"year":"0","journal-title":"Alpha-Beta Pruning Algorithm Wikipedia","key":"ref19"},{"key":"ref4","article-title":"The Pagerank Citation Algorithm: Bringing Order to the Web","author":"page","year":"1998","journal-title":"Technical Report Stanford Digital Library Technologies"},{"key":"ref3","first-page":"265","volume":"5","author":"garofalakis","year":"2006","journal-title":"Web Service Discovery Mechanisms Looking for a Needle in a Haystack"},{"key":"ref6","article-title":"Detecting Large-Scale System Problems by Mining Console Logs","author":"wei","year":"2009","journal-title":"Berkeley Technical Report No UCB\/EECS-2009-103"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1145\/1242572.1242726"},{"key":"ref8","article-title":"The HiBench Benchmark Suite: Characterization of the MapReduce-based Data Analysis","author":"shengsheng","year":"2010","journal-title":"2010 IEEE 26th International Conference on Data Engineering Workshops ICDEW"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"1127","DOI":"10.1080\/13658810903514172","article-title":"An active crawler for discovering geospatial. Web services and their distribution pattern - A case study of OGC Web Map Service","volume":"24","author":"wenwen","year":"2010","journal-title":"International Journal of Geographical Information Science"},{"year":"2004","author":"etzioni","journal-title":"Web-Scale Information Extraction in KnowItAll","key":"ref2"},{"key":"ref1","first-page":"527","article-title":"Focused Crawling using Context Graphs","author":"michelangelo","year":"2000","journal-title":"26th International Conference on Very Large Databases VLDB 2000"},{"key":"ref9","first-page":"48","article-title":"An Approach for Identifying URLs Based on Division Score and Link Score in Focused Crawler","volume":"2 3","author":"debashis","year":"2010","journal-title":"International Journal of Computer Applications IJCA"}],"event":{"name":"2015 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2015,10,29]]},"location":"Santa Clara, CA","end":{"date-parts":[[2015,11,1]]}},"container-title":["2015 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7347101\/7363706\/07363976.pdf?arnumber=7363976","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,1,15]],"date-time":"2020-01-15T04:02:08Z","timestamp":1579060928000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7363976\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,10]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/bigdata.2015.7363976","relation":{},"subject":[],"published":{"date-parts":[[2015,10]]}}}