{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T12:05:21Z","timestamp":1751889921262,"version":"3.37.3"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,10,27]],"date-time":"2018-10-27T00:00:00Z","timestamp":1540598400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","award":["ERC 339233"],"award-info":[{"award-number":["ERC 339233"]}],"id":[{"id":"10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["SoBigData (RIA 654024)"],"award-info":[{"award-number":["SoBigData (RIA 654024)"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002347","name":"Bundesministerium f\u00fcr Bildung und Forschung","doi-asserted-by":"publisher","award":["Data4UrbanMobility (02K15A040)"],"award-info":[{"award-number":["Data4UrbanMobility (02K15A040)"]}],"id":[{"id":"10.13039\/501100002347","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010665","name":"H2020 Marie Sklodowska-Curie Actions","doi-asserted-by":"publisher","award":["H2020-MSCA-ITN-2018-812997"],"award-info":[{"award-number":["H2020-MSCA-ITN-2018-812997"]}],"id":[{"id":"10.13039\/100010665","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1007\/s00799-018-0258-6","type":"journal-article","created":{"date-parts":[[2018,10,26]],"date-time":"2018-10-26T22:27:28Z","timestamp":1540592848000},"page":"31-45","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Towards extracting event-centric collections from Web archives"],"prefix":"10.1007","volume":"21","author":[{"given":"Gerhard","family":"Gossen","sequence":"first","affiliation":[]},{"given":"Thomas","family":"Risse","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5134-9072","authenticated-orcid":false,"given":"Elena","family":"Demidova","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,27]]},"reference":[{"key":"258_CR1","doi-asserted-by":"crossref","unstructured":"Aggarwal, C., Al-Garawi, F., Yu, P.S.: Intelligent crawling on the World Wide Web with arbitrary predicates. In: Proceedings of the 10th International World Wide Web Conference, WWW\u201901. pp. 96\u2013105 (2001)","DOI":"10.1145\/371920.371955"},{"issue":"3","key":"258_CR2","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1007\/s00799-016-0183-5","volume":"17","author":"Y AlNoamany","year":"2016","unstructured":"AlNoamany, Y., Weigle, M.C., Nelson, M.L.: Detecting off-topic pages within timemaps in web archives. Int. J. Digit. Libr. 17(3), 203\u2013221 (2016)","journal-title":"Int. J. Digit. Libr."},{"key":"258_CR3","doi-asserted-by":"crossref","unstructured":"AlNoamany, Y., Weigle, M.C., Nelson, M.L.: Generating stories from archived collections. In: Proceedings of the 2017 ACM Web Science Conference, WebSci\u201917, ACM, New York, NY, USA, pp. 309\u2013318 (2017)","DOI":"10.1145\/3091478.3091508"},{"key":"258_CR4","unstructured":"Berberich, K., Bedathur, S.: Temporal Diversification of Search Results. In: Proceedings of the Workshop on Time-Aware Information Access (TAIA 2013) (2013)"},{"key":"258_CR5","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1007\/3-540-45747-X_7","volume-title":"Research and Advanced Technology for Digital Libraries","author":"Donna Bergmark","year":"2002","unstructured":"Bergmark, D., Lagoze, C., Sbityakov, A.: Focused crawls, tunneling, and digital libraries. In: Proceedings of the European Conference on Digital Libraries (ECDL\u201902) (2002)"},{"key":"258_CR6","doi-asserted-by":"crossref","unstructured":"Bouzeghoub, M.: A framework for analysis of data freshness. In: Proceedings of the Workshop on Information Quality in Information Systems (2004)","DOI":"10.1145\/1012453.1012464"},{"key":"258_CR7","doi-asserted-by":"crossref","unstructured":"Brin, S., Page, L.: The anatomy of a large-scale hypertextual Web search engine. In: Proceedings of the Seventh International Conference on World Wide Web 7, WWW7, pp. 107\u2013117 (1998)","DOI":"10.1016\/S0169-7552(98)00110-X"},{"issue":"11\u201316","key":"258_CR8","doi-asserted-by":"publisher","first-page":"1623","DOI":"10.1016\/S1389-1286(99)00052-3","volume":"31","author":"S Chakrabarti","year":"1999","unstructured":"Chakrabarti, S., van den Berg, M., Dom, B.: Focused crawling: a new approach to topic-specific Web resource discovery. Comput. Netw. 31(11\u201316), 1623\u20131640 (1999)","journal-title":"Comput. Netw."},{"key":"258_CR9","doi-asserted-by":"crossref","unstructured":"Costa, M., Couto, F., Silva, M.: Learning temporal-dependent ranking models. In: Proceedings of the SIGIR\u201914 (2014)","DOI":"10.1145\/2600428.2609619"},{"issue":"3","key":"258_CR10","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1007\/s00799-016-0171-9","volume":"18","author":"M Costa","year":"2017","unstructured":"Costa, M., Gomes, D., Silva, M.J.: The evolution of web archiving. Int. J. Digit. Libr. 18(3), 191\u2013205 (2017)","journal-title":"Int. J. Digit. Libr."},{"issue":"3","key":"258_CR11","doi-asserted-by":"publisher","first-page":"433","DOI":"10.3390\/fi6030433","volume":"6","author":"E Demidova","year":"2014","unstructured":"Demidova, E., Barbieri, N., Dietze, S., Funk, A., Holzmann, H., Maynard, D., Papailiou, N., Peters, W., Risse, T., Spiliotopoulos, D.: Analysing and enriching focused semantic web archives for parliament applications. Fut. Intern. 6(3), 433\u2013456 (2014)","journal-title":"Fut. Intern."},{"key":"258_CR12","unstructured":"Diligenti, M., Coetzee, F., Lawrence, S., Giles, C.L., Gori, M.: Focused crawling using context graphs. In: Proceedings of the VLDB\u201900 (2000)"},{"key":"258_CR13","doi-asserted-by":"crossref","unstructured":"Dong, A., Chang, Y., Zheng, Z., Mishne, G., Bai, J., Zhang, R., Buchner, K., Liao, C., Diaz, F.: Towards recency ranking in web search. In: Proceedings of the WSDM\u201910 (2010)","DOI":"10.1145\/1718487.1718490"},{"issue":"12","key":"258_CR14","doi-asserted-by":"publisher","first-page":"1755","DOI":"10.1002\/cpe.2980","volume":"25","author":"H Dong","year":"2013","unstructured":"Dong, H., Hussain, F.K.: SOF: a semi-supervised ontology-learning-based focused crawler. Concurr. Comput. Pract. Exp. 25(12), 1755\u20131770 (2013)","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"258_CR15","doi-asserted-by":"crossref","unstructured":"Ehrig, M., Maedche, A.: Ontology-focused crawling of web documents. In: Proceedings of the ACM SAC (2003)","DOI":"10.1145\/952532.952761"},{"issue":"1","key":"258_CR16","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s00799-016-0207-1","volume":"19","author":"MMG Farag","year":"2018","unstructured":"Farag, M.M.G., Lee, S., Fox, E.A.: Focused crawler for events. Int. J. Digit. Libr. 19(1), 3\u201319 (2018)","journal-title":"Int. J. Digit. Libr."},{"key":"258_CR17","doi-asserted-by":"crossref","unstructured":"Gossen, G., Demidova, E., Risse, T.: iCrawl: Improving the freshness of web collections by integrating social web and focused web crawling. In: Proceedings of the JCDL\u201915 (2015)","DOI":"10.1145\/2756406.2756925"},{"key":"258_CR18","doi-asserted-by":"crossref","unstructured":"Gossen, G., Demidova, E., Risse, T.: The iCrawl Wizard\u2014supporting interactive focused crawl specification. In: Proceedings of the ECIR\u201915 (2015)","DOI":"10.1145\/2756406.2756925"},{"key":"258_CR19","doi-asserted-by":"crossref","unstructured":"Gossen, G., Demidova, E., Risse, T.: Analyzing Web archives through topic and event focused sub-collections. In: Proceedings of the WebSci\u201916, pp. 291\u2013295 (May 2016)","DOI":"10.1145\/2908131.2908175"},{"key":"258_CR20","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1007\/978-3-319-67008-9_10","volume-title":"Research and Advanced Technology for Digital Libraries","author":"Gerhard Gossen","year":"2017","unstructured":"Gossen, G., Demidova, E., Risse, T.: Extracting event-centric document collections from large-scale web archives. In: Proceedings of the 21st International Conference on Theory and Practice of Digital Libraries, TPDL 2017, pp. 116\u2013127 (2017)"},{"key":"258_CR21","unstructured":"Gottschalk, S., Demidova, E.: EventKG: A multilingual event-centric temporal knowledge graph. In: Proceedings of the ESWC 2018, Heraklion, Crete, Greece, June 3\u20137, 2018, pp. 272\u2013287 (2018)"},{"key":"258_CR22","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/978-3-030-00066-0_12","volume-title":"Digital Libraries for Open Knowledge","author":"Simon Gottschalk","year":"2018","unstructured":"Gottschalk, S., Demidova, E., Bernacchi, V., Rogers, R., Demidova, E.: Towards better understanding researcher strategies in cross-lingual event analytics. In: Proceedings of the 22nd International Conference on Theory and Practice of Digital Libraries, TPDL 2018 (2018)"},{"issue":"4","key":"258_CR23","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1023\/A:1019213109274","volume":"2","author":"A Heydon","year":"1999","unstructured":"Heydon, A., Najork, M.: Mercator: a scalable, extensible web crawler. World Wide Web 2(4), 219\u2013229 (1999)","journal-title":"World Wide Web"},{"key":"258_CR24","unstructured":"Holzmann, H., Risse, T.: Accessing web archives from different perspectives with potential synergies. In: Researchers, Practitioners and Their Use of the Archived Web, London (2017). \nhttp:\/\/archivedweb.blogs.sas.ac.uk\/files\/2017\/06\/RESAW2017-HolzmannRisse-Accessing_web_archives_from_different_perspectives_with_potential_synergies.pdf"},{"key":"258_CR25","unstructured":"International Internet Presevation Consortium (IIPC): OpenWayback (2017). \nhttp:\/\/netpreserve.org\/openwayback"},{"issue":"6","key":"258_CR26","doi-asserted-by":"publisher","first-page":"1293","DOI":"10.1109\/TKDE.2012.56","volume":"25","author":"J Jiang","year":"2013","unstructured":"Jiang, J., Song, X., Yu, N., Lin, C.Y.: FoCUS: learning to crawl web forums. IEEE Trans. Knowl. Data Eng. 25(6), 1293\u20131306 (2013)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"258_CR27","doi-asserted-by":"crossref","unstructured":"Kanhabua, N., N\u00f8rv\u00e5g, K.: A comparison of time-aware ranking methods. In: Proceedings of the SIGIR\u201911 (2011)","DOI":"10.1145\/2009916.2010147"},{"key":"258_CR28","unstructured":"Laranjeira, B., Moreira, V., Villavicencio, A., Ramisch, C., Finatto, M.J.: Comparing the quality of focused crawlers and of the translation resources obtained from them. In: Proceedings of the LREC\u201914 (2014)"},{"issue":"2","key":"258_CR29","doi-asserted-by":"publisher","first-page":"167","DOI":"10.3233\/SW-140134","volume":"6","author":"J Lehmann","year":"2015","unstructured":"Lehmann, J., Isele, R., Jakob, M., et al.: DBpedia\u2014a large-scale, multilingual knowledge base extracted from Wikipedia. Semant. Web 6(2), 167\u2013195 (2015)","journal-title":"Semant. Web"},{"key":"258_CR30","unstructured":"Mohr, G., Kimpton, M., Stack, M., Ranitovic, I.: Introduction to Heritrix, an archival quality web crawler. In: Proceedings of the 4th International Web Archiving Workshop (2004)"},{"key":"258_CR31","doi-asserted-by":"crossref","unstructured":"Nguyen, T.N., Kanhabua, N., Nieder\u00e9e, C., Zhu, X.: A time-aware random walk model for finding important documents in web archives. In: Proceedings of the SIGIR\u201915 (2015)","DOI":"10.1145\/2766462.2767832"},{"issue":"4","key":"258_CR32","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1145\/1095872.1095875","volume":"23","author":"G Pant","year":"2005","unstructured":"Pant, G., Srinivasan, P.: Learning to crawl: comparing classification schemes. ACM Trans. Inf. Syst. 23(4), 430\u2013462 (2005)","journal-title":"ACM Trans. Inf. Syst."},{"key":"258_CR33","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/978-3-662-10874-1_7","volume-title":"Web Dynamics","author":"Gautam Pant","year":"2004","unstructured":"Pant, G., Srinivasan, P., Menczer, F.: Crawling the web. In: Web Dynamics. Springer, New York (2004)"},{"key":"258_CR34","first-page":"534","volume-title":"Lecture Notes in Computer Science","author":"Pedro Pereira","year":"2014","unstructured":"Pereira, P., Macedo, J., Craveiro, O., Madeira, H.: Time-aware focused web crawling. In: Proceedings of the ECIR\u201914 (2014)"},{"key":"258_CR35","doi-asserted-by":"crossref","unstructured":"Qin, J., Zhou, Y., Chau, M.: Building domain-specific Web collections for scientific digital libraries. In: Proceedings of the JCDL\u201904 (2004)","DOI":"10.1145\/996350.996383"},{"key":"258_CR36","unstructured":"Risse, T., Demidova, E., Gossen, G.: What do you want to collect from the web? In: Proceedings of the Building Web Observatories Workshop (BWOW) 2014 (2014)"},{"key":"258_CR37","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1016\/j.websem.2015.12.004","volume":"37","author":"M Rospocher","year":"2016","unstructured":"Rospocher, M., et al.: Building event-centric knowledge graphs from news. Web Semant. 37, 132\u2013151 (2016)","journal-title":"Web Semant."},{"key":"258_CR38","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/978-3-319-27932-9_14","volume-title":"Semantic Keyword-based Search on Structured Data Sources","author":"Tarcisio Souza","year":"2015","unstructured":"Souza, T., Demidova, E., Risse, T., Holzmann, H., Gossen, G., Szymanski, J.: Semantic URL analytics to support efficient annotation of large scale web archives. In: Proceedings of the First International KEYSTONE Conference, IKC 2015, Coimbra, Portugal, September 8\u20139, 2015. pp. 153\u2013166 (2015)"},{"key":"258_CR39","doi-asserted-by":"crossref","unstructured":"Vrande\u010di\u0107, D.: Wikidata: A new platform for collaborative data collection. In: Proceedings of the 21st International Conference on World Wide Web. WWW\u201912 Companion, ACM, pp. 1063\u20131064 (2012)","DOI":"10.1145\/2187980.2188242"}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-018-0258-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00799-018-0258-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-018-0258-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,20]],"date-time":"2020-02-20T04:04:14Z","timestamp":1582171454000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00799-018-0258-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,27]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,3]]}},"alternative-id":["258"],"URL":"https:\/\/doi.org\/10.1007\/s00799-018-0258-6","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"type":"print","value":"1432-5012"},{"type":"electronic","value":"1432-1300"}],"subject":[],"published":{"date-parts":[[2018,10,27]]},"assertion":[{"value":"18 January 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}