{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T06:04:43Z","timestamp":1725861883968},"publisher-location":"Singapore","reference-count":14,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811020971"},{"type":"electronic","value":"9789811020988"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-981-10-2098-8_26","type":"book-chapter","created":{"date-parts":[[2016,7,29]],"date-time":"2016-07-29T22:56:57Z","timestamp":1469833017000},"page":"217-225","source":"Crossref","is-referenced-by-count":0,"title":["The BBC News Hunter: A Novel Crawler for BBC News"],"prefix":"10.1007","author":[{"given":"Mingxin","family":"Wang","sequence":"first","affiliation":[]},{"given":"Ning","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Boran","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Can","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Yanchun","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Guozhong","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Xiaosong","family":"Han","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,7,31]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhu, L., Li, C.: Discussion about the core of search engine again\u2014web crawler. In: 2011 International Conference on Computer Science and Service System (CSSS), pp. 3188\u20133191. IEEE (2011)","DOI":"10.1109\/CSSS.2011.5972036"},{"key":"26_CR2","first-page":"32","volume":"1","author":"R Khare","year":"2004","unstructured":"Khare, R., Cutting, D., Sitaker, K., Rifkin, A.: Nutch: a flexible and scalable open-source web search engine. Or. State Univ. 1, 32 (2004)","journal-title":"Or. State Univ."},{"issue":"18","key":"26_CR3","doi-asserted-by":"crossref","first-page":"3825","DOI":"10.1016\/j.comnet.2012.10.007","volume":"56","author":"S Brin","year":"2012","unstructured":"Brin, S., Page, L.: Reprint of: the anatomy of a large-scale hypertextual web search engine. Comput. Netw. 56(18), 3825\u20133833 (2012)","journal-title":"Comput. Netw."},{"key":"26_CR4","unstructured":"http:\/\/blog.csdn.net\/chaishen10000\/article\/details\/50776662"},{"key":"26_CR5","unstructured":"Mohr, G., Stack, M., Ranitovic, I., et al.: An Introduction to Heritrix An open source archival quality web crawler. In: IWAW 2004, 4th International Web Archiving Workshop (2004)"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Liu, D.F., Fan, X.S.: Study and application of web crawler algorithm based on heritrix. In: Advanced Materials Research, vol. 219, pp. 1069\u20131072. Trans Tech Publications (2011)","DOI":"10.4028\/www.scientific.net\/AMR.219-220.1069"},{"issue":"3","key":"26_CR7","first-page":"175","volume":"9","author":"HG Kim","year":"2015","unstructured":"Kim, H.G., Lee, J.W., Ban, T.H., Jung, H.K.: A study on distributed crawling-based overhead optimization. Int. J. Softw. Eng. Appl. 9(3), 175\u2013182 (2015)","journal-title":"Int. J. Softw. Eng. Appl."},{"key":"26_CR8","first-page":"30","volume":"3","author":"W Feng","year":"2005","unstructured":"Feng, W., Mao, Z.: The research of web pages information extraction based on Web. J. Luoyang Technol. Coll. 3, 30\u201331 (2005)","journal-title":"J. Luoyang Technol. Coll."},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Chakrabarti, S.: Integrating the document object model with hyperlinks for enhanced topic distillation and information extraction. In: Proceedings of the 10th International Conference on World Wide Web, pp. 211\u2013220. ACM (2001)","DOI":"10.1145\/371920.372054"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Hengru, Z., Chun, C.: Web information extraction technology research based on ajax. In: 2011 International Conference on Business Computing and Global Informatization (BCGIN), pp. 208\u2013211. IEEE (2011)","DOI":"10.1109\/BCGIn.2011.60"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Kovacevic, M., Diligenti, M., Gori, M., Milutinovic, V.: Recognition of common areas in a web page using visual information: a possible application in a page classification. In: Proceedings of the 2002 IEEE International Conference on Data Mining, ICDM 2003, pp. 250\u2013257. IEEE (2002)","DOI":"10.1109\/ICDM.2002.1183910"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Kang, J., Choi, J.: Detecting informative web page blocks for efficient information extraction using visual block segmentation. In: International Symposium on Information Technology Convergence, ISITC 2007, pp. 306\u2013310. IEEE (2007)","DOI":"10.1109\/ISITC.2007.6"},{"issue":"2","key":"26_CR13","doi-asserted-by":"crossref","first-page":"467","DOI":"10.1145\/304181.304223","volume":"28","author":"DW Embley","year":"1999","unstructured":"Embley, D.W., Jiang, Y., Ng, Y.K.: Record-boundary discovery in web documents. ACM SIGMOD Rec. 28(2), 467\u2013478 (1999). ACM","journal-title":"ACM SIGMOD Rec."},{"issue":"3","key":"26_CR14","first-page":"144","volume":"24","author":"XX Zhao","year":"2007","unstructured":"Zhao, X.X., Suo, H.G., Liu, Y.S.: Web content information extraction method based on tag window. Jisuanji Yingyong Yanjiu\/Appl. Res. Comput. 24(3), 144\u2013145 (2007)","journal-title":"Jisuanji Yingyong Yanjiu\/Appl. Res. Comput."}],"container-title":["Communications in Computer and Information Science","Social Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-2098-8_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T15:33:13Z","timestamp":1498318393000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-10-2098-8_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9789811020971","9789811020988"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-2098-8_26","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2016]]}}}