{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T23:42:25Z","timestamp":1725752545890},"publisher-location":"Berlin, Heidelberg","reference-count":28,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642416866"},{"type":"electronic","value":"9783642416873"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-41687-3_19","type":"book-chapter","created":{"date-parts":[[2013,10,17]],"date-time":"2013-10-17T10:46:37Z","timestamp":1382006797000},"page":"196-207","source":"Crossref","is-referenced-by-count":0,"title":["Crawling Data-Intensive Web Sources Using Structure Information"],"prefix":"10.1007","author":[{"given":"Dawid Grzegorz","family":"W\u0119ckowski","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Levene, M.: An Introduction to Search Engines and Web Navigation. John Wiley & Sons (2010)","DOI":"10.1002\/9780470874233"},{"key":"19_CR2","first-page":"107","volume-title":"Proceedings of the Seventh International Conference on World Wide Web 7, WWW7","author":"S. Brin","year":"1998","unstructured":"Brin, S., Page, L.: The anatomy of a large-scale hypertextual web search engine. In: Proceedings of the Seventh International Conference on World Wide Web 7, WWW7, pp. 107\u2013117. Elsevier Science Publishers B. V., Amsterdam (1998)"},{"issue":"11-16","key":"19_CR3","doi-asserted-by":"publisher","first-page":"1623","DOI":"10.1016\/S1389-1286(99)00052-3","volume":"31","author":"S. Chakrabarti","year":"1999","unstructured":"Chakrabarti, S., van den Berg, M., Dom, B.: Focused crawling: a new approach to topic-specific web resource discovery. Comput. Netw.\u00a031(11-16), 1623\u20131640 (1999)","journal-title":"Comput. Netw."},{"key":"19_CR4","unstructured":"Abramowicz, W.: Filtrowanie informacji. Wydawnictwo Uniwersytetu Ekonomicznego w Poznaniu (2008)"},{"key":"19_CR5","first-page":"335","volume-title":"Proceedings of the Sixteenth International Conference on Machine Learning, ICML 1999","author":"J. Rennie","year":"1999","unstructured":"Rennie, J., McCallum, A.: Using reinforcement learning to spider the web efficiently. In: Proceedings of the Sixteenth International Conference on Machine Learning, ICML 1999, pp. 335\u2013343. Morgan Kaufmann Publishers Inc., San Francisco (1999)"},{"key":"19_CR6","first-page":"527","volume-title":"Focused crawling using context graphs. In: Proceedings of the 26th International Conference on Very Large Data Bases, VLDB 2000","author":"M. Diligenti","year":"2000","unstructured":"Diligenti, M., Coetzee, F., Lawrence, S., Giles, C.L., Gori, M.: Focused crawling using context graphs. In: Focused crawling using context graphs. In: Proceedings of the 26th International Conference on Very Large Data Bases, VLDB 2000, pp. 527\u2013534. Morgan Kaufmann Publishers Inc., San Francisco (2000)"},{"key":"19_CR7","first-page":"517","volume-title":"Proceedings of the 11th International Conference on World Wide Web, WWW 2002","author":"T.H. Haveliwala","year":"2002","unstructured":"Haveliwala, T.H.: Topic-sensitive pagerank. In: Proceedings of the 11th International Conference on World Wide Web, WWW 2002, pp. 517\u2013526. ACM, New York (2002)"},{"key":"19_CR8","doi-asserted-by":"publisher","first-page":"1513","DOI":"10.1145\/1871437.1871660","volume-title":"Proceedings of the 19th ACM International Conference on Information and Knowledge Management, CIKM 2010","author":"S. Feng","year":"2010","unstructured":"Feng, S., Zhang, L., Xiong, Y., Yao, C.: Focused crawling using navigational rank. In: Proceedings of the 19th ACM International Conference on Information and Knowledge Management, CIKM 2010, pp. 1513\u20131516. ACM, New York (2010)"},{"key":"19_CR9","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1145\/1774088.1774459","volume-title":"Proceedings of the 2010 ACM Symposium on Applied Computing, SAC 2010","author":"A. Pirkola","year":"2010","unstructured":"Pirkola, A., Talvensaari, T.: Addressing the limited scope problem of focused crawling using a result merging approach. In: Proceedings of the 2010 ACM Symposium on Applied Computing, SAC 2010, pp. 1735\u20131740. ACM, New York (2010)"},{"key":"19_CR10","first-page":"755","volume-title":"Proceedings of the 20th ACM International Conference on Information and Knowledge Management, CIKM 2011","author":"L. Barbosa","year":"2011","unstructured":"Barbosa, L., Bangalore, S.: Focusing on novelty: a crawling strategy to build diverse language models. In: Proceedings of the 20th ACM International Conference on Information and Knowledge Management, CIKM 2011, pp. 755\u2013764. ACM, New York (2011)"},{"key":"19_CR11","unstructured":"Kolari, P., Finin, T., Joshi, A.: SVMs for the blogosphere: Blog identification and splog detection. In: AAAI Spring Symposium on Computational Approaches to Analysing Weblogs (2006)"},{"key":"19_CR12","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1145\/1367497.1367558","volume-title":"Proceedings of the 17th International Conference on World Wide Web, WWW 2008","author":"R. Cai","year":"2008","unstructured":"Cai, R., Yang, J.M., Lai, W., Wang, Y., Zhang, L.: iRobot: an intelligent crawler for web forums. In: Proceedings of the 17th International Conference on World Wide Web, WWW 2008, pp. 447\u2013456. ACM, New York (2008)"},{"key":"19_CR13","doi-asserted-by":"publisher","first-page":"1375","DOI":"10.1145\/1557019.1557166","volume-title":"Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2009","author":"J.M. Yang","year":"2009","unstructured":"Yang, J.M., Cai, R., Wang, C., Huang, H., Zhang, L., Ma, W.Y.: Incorporating site-level knowledge for incremental crawling of web forums: a list-wise strategy. In: Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2009, pp. 1375\u20131384. ACM, New York (2009)"},{"key":"19_CR14","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1145\/2187980.2187985","volume-title":"Proceedings of the 21st International Conference Companion on World Wide Web, WWW 2012 Companion","author":"J. Jiang","year":"2012","unstructured":"Jiang, J., Yu, N., Lin, C.Y.: Focus: learning to crawl web forums. In: Proceedings of the 21st International Conference Companion on World Wide Web, WWW 2012 Companion, pp. 33\u201342. ACM, New York (2012)"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Catanese, S.A., De Meo, P., Ferrara, E., Fiumara, G., Provetti, A.: Crawling facebook for social network analysis purposes. In: Proceedings of the International Conference on Web Intelligence, Mining and Semantics, WIMS 2011, pp. 52:1\u201352:8. ACM, New York (2011)","DOI":"10.1145\/1988688.1988749"},{"key":"19_CR16","doi-asserted-by":"publisher","first-page":"1233","DOI":"10.1145\/2187980.2188266","volume-title":"Proceedings of the 21st International Conference Companion on World Wide Web, WWW 2012 Companion","author":"M. Boanjak","year":"2012","unstructured":"Boanjak, M., Oliveira, E., Martins, J., Mendes Rodrigues, E., Sarmento, L.: Twitterecho: a distributed focused crawler to support open research with twitter data. In: Proceedings of the 21st International Conference Companion on World Wide Web, WWW 2012 Companion, pp. 1233\u20131240. ACM, New York (2012)"},{"key":"19_CR17","first-page":"129","volume-title":"Proceedings of the 27th International Conference on Very Large Data Bases, VLDB 2001","author":"S. Raghavan","year":"2001","unstructured":"Raghavan, S., Garcia-Molina, H.: Crawling the hidden web. In: Proceedings of the 27th International Conference on Very Large Data Bases, VLDB 2001, pp. 129\u2013138. Morgan Kaufmann Publishers Inc., San Francisco (2001)"},{"key":"19_CR18","first-page":"100","volume-title":"Proceedings of the 5th ACM\/IEEE-CS Joint Conference on Digital libraries, JCDL 2005","author":"A. Ntoulas","year":"2005","unstructured":"Ntoulas, A., Zerfos, P., Cho, J.: Downloading textual hidden web content through keyword queries. In: Proceedings of the 5th ACM\/IEEE-CS Joint Conference on Digital libraries, JCDL 2005, pp. 100\u2013109. ACM, New York (2005)"},{"key":"19_CR19","first-page":"47","volume-title":"Proceedings of the 22nd International Conference on Data Engineering, ICDE 2006","author":"P. Wu","year":"2006","unstructured":"Wu, P., Wen, J.R., Liu, H., Ma, W.Y.: Query selection techniques for efficient crawling of structured web sources. In: Proceedings of the 22nd International Conference on Data Engineering, ICDE 2006, p. 47. IEEE Computer Society, Washington, DC (2006)"},{"key":"19_CR20","unstructured":"Barbosa, L., Freire, J.: Searching for hidden-web databases. In: Doan, A., Neven, F., McCann, R., Bex, G.J. (eds.) WebDB, pp. 1\u20136 (2005)"},{"key":"19_CR21","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1145\/1242572.1242632","volume-title":"Proceedings of the 16th International Conference on World Wide Web, WWW 2007","author":"L. Barbosa","year":"2007","unstructured":"Barbosa, L., Freire, J.: An adaptive crawler for locating hidden-web entry points. In: Proceedings of the 16th International Conference on World Wide Web, WWW 2007, pp. 441\u2013450. ACM, New York (2007)"},{"issue":"2","key":"19_CR22","doi-asserted-by":"crossref","first-page":"1241","DOI":"10.14778\/1454159.1454163","volume":"1","author":"J. Madhavan","year":"2008","unstructured":"Madhavan, J., Ko, D., Kot, \u0141., Ganapathy, V., Rasmussen, A., Halevy, A.: Google\u2019s deep web crawl. Proc. VLDB Endow.\u00a01(2), 1241\u20131252 (2008)","journal-title":"Proc. VLDB Endow."},{"key":"19_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"524","DOI":"10.1007\/978-3-642-17616-6_46","volume-title":"Web Information Systems Engineering \u2013 WISE 2010","author":"W. Liu","year":"2010","unstructured":"Liu, W., Xiao, J.: Incremental structured web database crawling via history versions. In: Chen, L., Triantafillou, P., Suel, T. (eds.) WISE 2010. LNCS, vol.\u00a06488, pp. 524\u2013533. Springer, Heidelberg (2010)"},{"key":"19_CR24","unstructured":"Flejter, D.: Semi-Automatic Web Information Extraction. PhD thesis, Pozna\u0144 University of Economics (2011)"},{"issue":"3","key":"19_CR25","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1145\/857166.857170","volume":"3","author":"J. Cho","year":"2003","unstructured":"Cho, J., Garcia-Molina, H.: Estimating frequency of change. ACM Trans. Internet Technol.\u00a03(3), 256\u2013290 (2003)","journal-title":"ACM Trans. Internet Technol."},{"issue":"4","key":"19_CR26","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/12944.12948","volume":"11","author":"B. Boehm","year":"1986","unstructured":"Boehm, B.: A spiral model of software development and enhancement. SIGSOFT Softw. Eng. Notes\u00a011(4), 14\u201324 (1986)","journal-title":"SIGSOFT Softw. Eng. Notes"},{"key":"19_CR27","unstructured":"Kaczmarek, T., W\u0119ckowski, D.G.: Web forums change analysis. In: Proceedings of the 9th International Conference on Web Information Systems and Technologies, Aachen, Germany (2013)"},{"key":"19_CR28","first-page":"200","volume-title":"Proceedings of the 26th International Conference on Very Large Data Bases, VLDB 2000","author":"J. Cho","year":"2000","unstructured":"Cho, J., Garcia-Molina, H.: The evolution of the web and implications for an incremental crawler. In: Proceedings of the 26th International Conference on Very Large Data Bases, VLDB 2000, pp. 200\u2013209. Morgan Kaufmann Publishers Inc., San Francisco (2000)"}],"container-title":["Lecture Notes in Business Information Processing","Business Information Systems Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-41687-3_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,23]],"date-time":"2019-05-23T17:25:53Z","timestamp":1558632353000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-41687-3_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642416866","9783642416873"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-41687-3_19","relation":{},"ISSN":["1865-1348","1865-1356"],"issn-type":[{"type":"print","value":"1865-1348"},{"type":"electronic","value":"1865-1356"}],"subject":[],"published":{"date-parts":[[2013]]}}}