{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T06:52:17Z","timestamp":1760597537189},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2012,9,9]],"date-time":"2012-09-09T00:00:00Z","timestamp":1347148800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2012,12]]},"DOI":"10.1007\/s10115-012-0535-4","type":"journal-article","created":{"date-parts":[[2012,9,8]],"date-time":"2012-09-08T14:52:19Z","timestamp":1347115939000},"page":"707-734","source":"Crossref","is-referenced-by-count":11,"title":["Novel approaches to crawling important pages early"],"prefix":"10.1007","volume":"33","author":[{"given":"Md. Hijbul","family":"Alam","sequence":"first","affiliation":[]},{"given":"JongWoo","family":"Ha","sequence":"additional","affiliation":[]},{"given":"SangKeun","family":"Lee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,9,9]]},"reference":[{"key":"535_CR1","doi-asserted-by":"crossref","unstructured":"Abiteboul S, Preda M, Cobena G (2003) Adaptive on-line page importance computation. In: Proccedings of 12th international conference on world Wide Web. ACM, New York, pp 280\u2013290","DOI":"10.1145\/775152.775192"},{"key":"535_CR2","doi-asserted-by":"crossref","unstructured":"Alam MH, Ha J, Lee S (2009) Fractional pagerank crawler: prioritizing urls efficiently for crawling important pages early. In: Proccedings of 14th international conference on database systems for advanced applications. Springer, Berlin, pp 590\u2013594","DOI":"10.1007\/978-3-642-00887-0_52"},{"issue":"6","key":"535_CR3","doi-asserted-by":"crossref","first-page":"886","DOI":"10.1016\/j.is.2006.09.004","volume":"32","author":"G Almpanidis","year":"2007","unstructured":"Almpanidis G, Kotropoulos C, Pitas I (2007) Combining text and link analysis for focused crawling-an application for vertical search engines. Inf Syst 32(6):886\u2013908","journal-title":"Inf Syst"},{"key":"535_CR4","doi-asserted-by":"crossref","unstructured":"Baeza YR, Castillo C, Marin M, Rodriguez A (2005) Crawling a country: better strategies than breadth-first for Web page ordering. In: Proccedings of special interest tracks and posters of the 14th international conference on world wide web. ACM, New York, pp 864\u2013872","DOI":"10.1145\/1062745.1062768"},{"key":"535_CR5","doi-asserted-by":"crossref","unstructured":"Bai X, Cambazoglu BB, Junqueira FP (2011) Discovering URLs through user feedback. In: Proceedings of the 20th ACM international conference on information and knowledge. ACM, New York, pp 77\u201386","DOI":"10.1145\/2063576.2063592"},{"key":"535_CR6","doi-asserted-by":"crossref","unstructured":"Boldi P, Vigna S (2004) The WebGraph framework I: compression techniques. In: Proccedings of 13th international conference on world wide web. ACM, New York, pp 595\u2013602","DOI":"10.1145\/988672.988752"},{"issue":"1\u20137","key":"535_CR7","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1016\/S0169-7552(98)00110-X","volume":"30","author":"S Brin","year":"1998","unstructured":"Brin S, Page L (1998) The anatomy of a large-scale hypertextual Web search engine. Comput Netw ISDN Syst 30(1\u20137):107\u2013117","journal-title":"Comput Netw ISDN Syst"},{"issue":"3","key":"535_CR8","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1145\/1151087.1151090","volume":"6","author":"M Brinkmeier","year":"2006","unstructured":"Brinkmeier M (2006) Pagerank revisited. ACM Trans Int Technol 6(3):282\u2013301","journal-title":"ACM Trans Int Technol"},{"issue":"2","key":"535_CR9","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1145\/1189702.1189703","volume":"40","author":"C Castillo","year":"2006","unstructured":"Castillo C, Donato D, Becchetti L, Boldi P, Leonardi S, Santini M, Vigna S (2006) A reference collection for web spam. SIGIR Forum 40(2):11\u201324","journal-title":"SIGIR Forum"},{"issue":"11\u201316","key":"535_CR10","doi-asserted-by":"crossref","first-page":"1623","DOI":"10.1016\/S1389-1286(99)00052-3","volume":"31","author":"S Chakrabarti","year":"1999","unstructured":"Chakrabarti S, Van den BM, Dom B (1999) Focused crawling: a new approach to topic-specific web resource discovery. Comput Netw 31(11\u201316):1623\u20131640","journal-title":"Comput Netw"},{"key":"535_CR11","unstructured":"Cho J, Garcia MH (2000) The evolution of the Web and implications for an incremental crawler. In: Proccedings of 26th international conference on very large data bases. Morgan Kaufmann, San Francisco, pp 200\u2013209"},{"issue":"1\u20137","key":"535_CR12","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1016\/S0169-7552(98)00108-1","volume":"30","author":"J Cho","year":"1998","unstructured":"Cho J, Garcia MH, Page L (1998) Efficient crawling through url ordering. Comput Netw ISDN Syst 30(1\u20137):161\u2013172","journal-title":"Comput Netw ISDN Syst"},{"key":"535_CR13","doi-asserted-by":"crossref","unstructured":"Cho J, Roy S, Adams RE (2005) Page quality: in search of an unbiased Web ranking. In: Proccedings of 2005 ACM SIGMOD international conference on management of data. ACM, New York, pp 551\u2013562","DOI":"10.1145\/1066157.1066220"},{"key":"535_CR14","unstructured":"Cho J, Schonfeld U (2007) Rankmass crawler: a crawler with high personalized pagerank coverage guarantee. In: Proccedings of 33rd international conference on very large data bases. VLDB Endowment, pp 375\u2013386"},{"key":"535_CR15","doi-asserted-by":"crossref","unstructured":"Dasgupta A, Ghosh A, Kumar R, Olston C, Pandey S, Tomkins A. (2007) The discoverability of the Web. In: Proccedings of 16th international conference on world wide web. ACM, New York, pp 421\u2013430","DOI":"10.1145\/1242572.1242630"},{"key":"535_CR16","unstructured":"Deerwester S, Dumais ST, Furnas GW, Landauer TK, Harshman R (1988) Indexing by latent semantic analysis. In: Proccedings of 51st Annual Meeting of the American Society for Information, Science, pp 36\u201340"},{"key":"535_CR17","first-page":"1871","volume":"9","author":"R Fan","year":"2008","unstructured":"Fan R, Chang K, Hsieh CJ, Wang XR, Lin C (2008) LIBLINEAR: a library for large linear classification. J Mach Learn Res 9:1871\u20131874","journal-title":"J Mach Learn Res"},{"key":"535_CR18","doi-asserted-by":"crossref","unstructured":"Fetterly D, Craswell N, Vinay V (2009) The impact of crawl policy on web search effectiveness. In: Proccedings of 32nd international ACM SIGIR conference on research and development in information retrieval. ACM, New York, pp 580\u2013587","DOI":"10.1145\/1571941.1572041"},{"issue":"1","key":"535_CR19","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1007\/s10115-011-0426-0","volume":"32","author":"M G\u00e9ry","year":"2012","unstructured":"G\u00e9ry M, Largeron C (2012) BM25t: a BM25 extension for focused information retrieval. Knowl Inf Syst 32(1):217\u2013241","journal-title":"Knowl Inf Syst"},{"key":"535_CR20","doi-asserted-by":"crossref","unstructured":"Groza T, Grimnes G, Handschuh S, Decker S (2011) From raw publications to Linked Data. Knowl Inf Syst (available online). doi: 10.1007\/s10115-011-0473-6","DOI":"10.1007\/s10115-011-0473-6"},{"key":"535_CR21","unstructured":"Gy\u00f6ngyi Z, Garcia MH, Pedersen J (2004) Combating Web spam with trustrank. In: Proccedings of 13th international conference on very large data bases. VLDB Endowment, pp 576\u2013587"},{"key":"535_CR22","doi-asserted-by":"crossref","unstructured":"Kamvar SD, Haveliwala TH, Manning CD, Golub GH (2003) Extrapolation methods for accelerating pagerank computations. In: Proccedings of 12th international conference on world wide web. ACM, New York, pp 261\u2013270","DOI":"10.1145\/775152.775190"},{"issue":"3","key":"535_CR23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1541822.1541823","volume":"3","author":"HT Lee","year":"2009","unstructured":"Lee HT, Leonard D, Wang X, Loguinov D (2009) Irlbot: scaling to 6 billion pages and beyond. ACM Trans Web 3(3):1\u201334","journal-title":"ACM Trans Web"},{"issue":"1","key":"535_CR24","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/s10115-011-0427-z","volume":"32","author":"Z Lin","year":"2012","unstructured":"Lin Z, Lyu M, King I (2012) MatchSim: a novel similarity measure based on maximum neighborhood matching. Knowl Inf Syst 32(1):141\u2013161","journal-title":"Knowl Inf Syst"},{"issue":"3","key":"535_CR25","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1007\/s10115-011-0437-x","volume":"31","author":"JP Mei","year":"2012","unstructured":"Mei JP, Chen L (2012) SumCR: a new subtopic-based extractive approach for text summarization. Knowl Inf Syst 31(3):527\u2013545","journal-title":"Knowl Inf Syst"},{"key":"535_CR26","doi-asserted-by":"crossref","unstructured":"Najork M, Wiener JL (2001) Breadth-first crawling yields high-quality pages. In: Proccedings of 10th international conference on world wide web. ACM, New York, pp 114\u2013118","DOI":"10.1145\/371920.371965"},{"key":"535_CR27","unstructured":"Ntoulas A, Cho J, Olston C (2004) What\u2019s new on the web? The evolution of the web from a search engine perspective. In: Proccedings of 13th international conference on world wide web. ACM, New York, pp 1\u201312"},{"key":"535_CR28","doi-asserted-by":"crossref","unstructured":"Olston C, Pandey S (2008) Recrawl scheduling based on information longevity. In: Proccedings of 17th international conference on world wide web. ACM, New York, pp 437\u2013446","DOI":"10.1145\/1367497.1367557"},{"issue":"2","key":"535_CR29","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/1480506.1480512","volume":"42","author":"A Orlandi","year":"2008","unstructured":"Orlandi A, Vigna S (2008) Compressed collections for simulated crawling. SIGIR Forum 42(2):39\u201344","journal-title":"SIGIR Forum"},{"key":"535_CR30","doi-asserted-by":"crossref","unstructured":"Pandey S, Olston C (2008) Crawl ordering by search impact. In: Proccedings of 1st international conference on web search and data mining. ACM, New York, pp 3\u201314","DOI":"10.1145\/1341531.1341535"},{"issue":"4","key":"535_CR31","doi-asserted-by":"crossref","first-page":"430","DOI":"10.1145\/1095872.1095875","volume":"23","author":"G Pant","year":"2005","unstructured":"Pant G, Srinivasan P (2005) Learning to crawl: comparing classification schemes. ACM Trans Inf Syst 23(4):430\u2013462","journal-title":"ACM Trans Inf Syst"},{"issue":"11","key":"535_CR32","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1971","unstructured":"Salton G, Wong A, Yang CS (1971) A vector space model for automatic indexing. Commun ACM 18(11):613\u2013620","journal-title":"Commun ACM"},{"issue":"2","key":"535_CR33","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s10115-009-0266-3","volume":"25","author":"K Shchekotykhin","year":"2009","unstructured":"Shchekotykhin K, Jannach D, Friedrich G (2009) xCrawl: a high-recall crawling method for Web mining. Knowl Inf Syst 25(2):303\u2013326","journal-title":"Knowl Inf Syst"},{"key":"535_CR34","doi-asserted-by":"crossref","unstructured":"Wan M, Jnsson A, Wang C, Li L, Yang Y (2011) Web user clustering and Web prefetching using Random Indexing with weight functions. Knowl Inf Syst (available online). doi: 10.1007\/s10115-011-0453-x","DOI":"10.1007\/s10115-011-0453-x"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-012-0535-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-012-0535-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-012-0535-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,3]],"date-time":"2019-07-03T16:22:32Z","timestamp":1562170952000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-012-0535-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,9,9]]},"references-count":34,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,12]]}},"alternative-id":["535"],"URL":"https:\/\/doi.org\/10.1007\/s10115-012-0535-4","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,9,9]]}}}