{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:39:07Z","timestamp":1742913547661,"version":"3.40.3"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031168017"},{"type":"electronic","value":"9783031168024"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-16802-4_19","type":"book-chapter","created":{"date-parts":[[2022,9,14]],"date-time":"2022-09-14T10:05:31Z","timestamp":1663149931000},"page":"245-259","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Robots Still Outnumber Humans in\u00a0Web Archives, But\u00a0Less Than Before"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4748-9176","authenticated-orcid":false,"given":"Himarsha R.","family":"Jayanetti","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6498-7391","authenticated-orcid":false,"given":"Kritika","family":"Garg","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8267-3326","authenticated-orcid":false,"given":"Sawood","family":"Alam","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3749-8116","authenticated-orcid":false,"given":"Michael L.","family":"Nelson","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2787-7166","authenticated-orcid":false,"given":"Michele C.","family":"Weigle","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,15]]},"reference":[{"key":"19_CR1","unstructured":"Alam, S.: AccessLog Parser and CLI (2019). https:\/\/github.com\/oduwsdl\/accesslog-parser"},{"key":"19_CR2","unstructured":"Alam, S., Cartledge, C.L., Nelson, M.L.: Support for various HTTP methods on the web. Technical report. arXiv:1405.2330 (2014)"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Alam, S., Nelson, M.L.: MemGator - a portable concurrent Memento aggregator: cross-platform CLI and server binaries in Go. In: Proceedings of the 16th ACM\/IEEE-CS Joint Conference on Digital Libraries, JCDL 2016, pp. 243\u2013244 (2016)","DOI":"10.1145\/2910896.2925452"},{"key":"19_CR4","doi-asserted-by":"publisher","unstructured":"Alam, S., Weigle, M.C., Nelson, M.L.: Profiling web archival voids for memento routing. In: Proceedings of the 21st ACM\/IEEE-CS Joint Conference on Digital Libraries, JCDL 2021, pp. 150\u2013159 (2021). https:\/\/doi.org\/10.1109\/JCDL52503.2021.00027","DOI":"10.1109\/JCDL52503.2021.00027"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"AlNoamany, Y., AlSum, A., Weigle, M.C., Nelson, M.L.: Who and what links to the Internet Archive. In: Proceedings of Theory and Practice of Digital Libraries (TPDL), pp. 346\u2013357 (2013)","DOI":"10.1007\/978-3-642-40501-3_35"},{"issue":"3","key":"19_CR6","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s00799-014-0111-5","volume":"14","author":"Y AlNoamany","year":"2014","unstructured":"AlNoamany, Y., AlSum, A., Weigle, M.C., Nelson, M.L.: Who and what links to the Internet Archive. Int. J. Digit. Libr. 14(3), 101\u2013115 (2014)","journal-title":"Int. J. Digit. Libr."},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"AlNoamany, Y., Weigle, M.C., Nelson, M.L.: Access patterns for robots and humans in web archives. In: Proceedings of the 13th ACM\/IEEE-CS Joint Conference on Digital Libraries, JCDL 2013, pp. 339\u2013348 (2013)","DOI":"10.1145\/2467696.2467722"},{"key":"19_CR8","unstructured":"Apache HTTP Server: Common Log Format and Combined Log Format (2013). https:\/\/httpd.apache.org\/docs\/trunk\/logs.html"},{"key":"19_CR9","doi-asserted-by":"publisher","unstructured":"Banos, V., Manolopoulos, Y.: A quantitative approach to evaluate Website Archivability using the CLEAR+ method. 17, 119\u2013141 (2016). https:\/\/doi.org\/10.1007\/s00799-015-0144-4","DOI":"10.1007\/s00799-015-0144-4"},{"key":"19_CR10","unstructured":"Berendt, B., Mobasher, B., Spiliopoulou, M., Wiltshire, J.: Measuring the accuracy of sessionizers for web usage analysis. In: Workshop on Web Mining at the First SIAM International Conference on Data Mining, pp. 7\u201314. SIAM, Philadelphia (2001)"},{"key":"19_CR11","unstructured":"Bidelman, E.: Getting Started with Headless Chrome (2018). https:\/\/developer.chrome.com\/blog\/headless-chrome\/"},{"key":"19_CR12","unstructured":"Burkholder, D.: DeviceDetector (2022). https:\/\/github.com\/thinkwelltwd\/device_detector"},{"key":"19_CR13","unstructured":"Castellano, G., Fanelli, A.M., Torsello, M.A.: LODAP: a LOg DAta preprocessor for mining web browsing patterns. In: Proceedings of the 6th WSEAS International Conference on Artificial Intelligence, Knowledge Engineering and Data Bases, pp. 12\u201317 (2007)"},{"key":"19_CR14","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1007\/978-3-540-73400-0_12","volume-title":"Applications of Fuzzy Sets Theory","author":"G Castellano","year":"2007","unstructured":"Castellano, G., Mesto, F., Minunno, M., Torsello, M.A.: Web user profiling using fuzzy clustering. In: Masulli, F., Mitra, S., Pasi, G. (eds.) WILF 2007. LNCS (LNAI), vol. 4578, pp. 94\u2013101. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-73400-0_12"},{"key":"19_CR15","doi-asserted-by":"publisher","unstructured":"Costa, M., Gomes, D., Couto, F.M., Silva, M.J.: A survey of web archive search architectures. In: Proceedings of the Temporal Web Analytics Workshop, TempWeb 2013, pp. 1045\u20131050 (2013). https:\/\/doi.org\/10.1145\/2487788.2488116","DOI":"10.1145\/2487788.2488116"},{"key":"19_CR16","unstructured":"Costa, M., Miranda, J., Cruz, D., Gomes, D.: Query suggestion for web archive search. In: iPRES (2013)"},{"key":"19_CR17","unstructured":"Costa, M., Silva, M.J.: Characterizing search behavior in web archives. In: TWAW (2011)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Fielding, R.T., Reschke, J.F.: Hypertext Transfer Protocol (HTTP\/1.1): Message Syntax and Routing (2014). https:\/\/tools.ietf.org\/html\/rfc7230","DOI":"10.17487\/rfc7230"},{"issue":"1","key":"19_CR19","first-page":"106","volume":"8","author":"D Gomes","year":"2014","unstructured":"Gomes, D., Costa, M.: The importance of web archives for humanities. Int. J. Hum. Arts Comput. 8(1), 106\u2013123 (2014)","journal-title":"Int. J. Hum. Arts Comput."},{"key":"19_CR20","doi-asserted-by":"publisher","unstructured":"Gomes, D., Costa, M., Cruz, D., Miranda, J., Fontes, S.: Creating a billion-scale searchable web archive. In: Proceedings of the Temporal Web Analytics Workshop, TempWeb 2013, pp. 1059\u20131066 (2013). https:\/\/doi.org\/10.1145\/2487788.2488118","DOI":"10.1145\/2487788.2488118"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Gomes, D., Cruz, D., Miranda, J., Costa, M., Fontes, S.: Search the past with the Portuguese web archive. In: Proceedings of the 22nd International Conference on World Wide Web, pp. 321\u2013324 (2013)","DOI":"10.1145\/2487788.2487934"},{"key":"19_CR22","unstructured":"Grcar, M.: User profiling: web usage mining. In: Proceedings of the 7th International Multiconference Information Society IS (2004)"},{"key":"19_CR23","unstructured":"Hidayat, A.: PhantomJS (2011). https:\/\/phantomjs.org\/"},{"issue":"1\u20132","key":"19_CR24","doi-asserted-by":"publisher","first-page":"113","DOI":"10.7227\/ALX.0023","volume":"25","author":"H Hockx-Yu","year":"2014","unstructured":"Hockx-Yu, H.: Access and scholarly use of web archives. Alexandria 25(1\u20132), 113\u2013127 (2014)","journal-title":"Alexandria"},{"key":"19_CR25","unstructured":"Internet Archive: Wayback Machine (2022). https:\/\/web.archive.org\/web\/20220527205606\/web.archive.org\/"},{"key":"19_CR26","unstructured":"Jayanetti, H.: Visualizations for Web Archive Access Log Datasets (2022). https:\/\/observablehq.com\/@himarshaj\/visualizations-for-web-archive-access-log-datasets"},{"key":"19_CR27","unstructured":"Jayanetti, H., Garg, K.: Access Patterns (2022). https:\/\/github.com\/oduwsdl\/access-patterns\/"},{"key":"19_CR28","unstructured":"Jayanetti, H., Garg, K.: Known Bot List (2022). https:\/\/github.com\/oduwsdl\/access-patterns\/tree\/main\/Known_Bot_List"},{"key":"19_CR29","unstructured":"Jones, S.M.: Improving collection understanding for web archives with storytelling: shining light into dark and stormy archives. Ph.D. thesis, Old Dominion University (2021). https:\/\/doi.org\/10.25777\/zts6-v512"},{"key":"19_CR30","unstructured":"Jones, S.M., et al.: The DSA toolkit shines light into dark and stormy archives. Code4Lib J. (2022). https:\/\/journal.code4lib.org\/articles\/16441"},{"key":"19_CR31","doi-asserted-by":"crossref","unstructured":"Koster, M., Illyes, G., Zeller, H., Sassman, L.: Robots Exclusion Protocol (2022). https:\/\/datatracker.ietf.org\/doc\/html\/draft-koster-rep-08","DOI":"10.17487\/RFC9309"},{"key":"19_CR32","unstructured":"Kreymer, I., Rosenthal, D.S.H.: Guest Post: Ilya Kreymer on oldweb.today (2016). https:\/\/blog.dshr.org\/2016\/01\/guest-post-ilya-kreymer-on-oldwebtoday.html"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Kreymer, I., Rosenthal, D.S.H.: Announcing the New OldWeb.today (2020). https:\/\/webrecorder.net\/2020\/12\/23\/new-oldweb-today.html","DOI":"10.1002\/say.30808"},{"key":"19_CR34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19460-3","volume-title":"Web Data Mining: Exploring Hyperlinks, Contents, and Usage Data","author":"B Liu","year":"2011","unstructured":"Liu, B.: Web Data Mining: Exploring Hyperlinks, Contents, and Usage Data, vol. 1. Springer, Cham (2011). https:\/\/doi.org\/10.1007\/978-3-642-19460-3"},{"key":"19_CR35","unstructured":"Mabe, A., et al.: Visualizing Webpage Changes Over Time (2020). http:\/\/arxiv.org\/abs\/2006.02487"},{"key":"19_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/978-3-642-33290-6_22","volume-title":"Theory and Practice of Digital Libraries","author":"L Meneses","year":"2012","unstructured":"Meneses, L., Furuta, R., Shipman, F.: Identifying \u201cSoft 404\u2019\u2019 error pages: analyzing the lexical signatures of documents in distributed collections. In: Zaphiris, P., Buchanan, G., Rasmussen, E., Loizides, F. (eds.) TPDL 2012. LNCS, vol. 7489, pp. 197\u2013208. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33290-6_22"},{"key":"19_CR37","doi-asserted-by":"crossref","unstructured":"Mobasher, B.: Web usage mining. In: Encyclopedia of Data Warehousing and Mining, pp. 1216\u20131220. IGI Global (2005)","DOI":"10.4018\/978-1-59140-557-3.ch229"},{"issue":"8","key":"19_CR38","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1145\/345124.345169","volume":"43","author":"B Mobasher","year":"2000","unstructured":"Mobasher, B., Cooley, R., Srivastava, J.: Automatic personalization based on web usage mining. Commun. ACM 43(8), 142\u2013151 (2000)","journal-title":"Commun. ACM"},{"key":"19_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/3-540-44463-7_15","volume-title":"Electronic Commerce and Web Technologies","author":"B Mobasher","year":"2000","unstructured":"Mobasher, B., Dai, H., Luo, T., Sun, Y., Zhu, J.: Integrating web usage and content mining for more effective personalization. In: Bauknecht, K., Madria, S.K., Pernul, G. (eds.) EC-Web 2000. LNCS, vol. 1875, pp. 165\u2013176. Springer, Heidelberg (2000). https:\/\/doi.org\/10.1007\/3-540-44463-7_15"},{"key":"19_CR40","doi-asserted-by":"crossref","unstructured":"Mughal, M.J.H.: Data mining: web data mining techniques, tools and algorithms: an overview. Int. J. Adv. Comput. Sci. Appl. 9(6) (2018)","DOI":"10.14569\/IJACSA.2018.090630"},{"key":"19_CR41","unstructured":"Newbold, B.: Search Scholarly Materials Preserved in the Internet Archive (2021). https:\/\/blog.archive.org\/2021\/03\/09\/search-scholarly-materials-preserved-in-the-internet-archive\/"},{"issue":"4","key":"19_CR42","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1023\/A:1026238916441","volume":"13","author":"D Pierrakos","year":"2003","unstructured":"Pierrakos, D., Paliouras, G., Papatheodorou, C., Spyropoulos, C.D.: Web usage mining as a tool for personalization: a survey. User Model. User-Adap. Inter. 13(4), 311\u2013372 (2003)","journal-title":"User Model. User-Adap. Inter."},{"issue":"1","key":"19_CR43","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s00799-021-00314-x","volume":"23","author":"B Reyes Ayala","year":"2022","unstructured":"Reyes Ayala, B.: Correspondence as the primary measure of information quality for web archives: a human-centered grounded theory study. Int. J. Digit. Libr. 23(1), 19\u201331 (2022)","journal-title":"Int. J. Digit. Libr."},{"key":"19_CR44","unstructured":"Selenium: Selenium Client Driver (2018). https:\/\/selenium.dev\/selenium\/docs\/api\/py\/"},{"key":"19_CR45","unstructured":"Siregar, E.: Deploying the Memento-Damage Service (2017). https:\/\/ws-dl.blogspot.com\/2017\/11\/2017-11-22-deploying-memento-damage.html"},{"key":"19_CR46","doi-asserted-by":"publisher","unstructured":"Srivastava, J., Cooley, R., Deshpande, M., Tan, P.N.: Web usage mining: discovery and applications of usage patterns from web data. SIGKDD Explor. Newsl. 1(2), 12\u201323 (2000). https:\/\/doi.org\/10.1145\/846183.846188","DOI":"10.1145\/846183.846188"},{"issue":"2","key":"19_CR47","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1145\/846183.846188","volume":"1","author":"J Srivastava","year":"2000","unstructured":"Srivastava, J., Cooley, R., Deshpande, M., Tan, P.: Web usage mining: discovery and applications of usage patterns from web data. SIGKDD Explor. 1(2), 12\u201323 (2000)","journal-title":"SIGKDD Explor."},{"issue":"3","key":"19_CR48","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1016\/j.comnet.2008.09.021","volume":"53","author":"A Stassopoulou","year":"2009","unstructured":"Stassopoulou, A., Dikaiakos, M.D.: Web robot detection: a probabilistic reasoning approach. Comput. Netw. 53(3), 265\u2013278 (2009)","journal-title":"Comput. Netw."},{"key":"19_CR49","doi-asserted-by":"crossref","unstructured":"Van de Sompel, H., Nelson, M.L., Sanderson, R.: HTTP Framework for Time-Based Access to Resource States - Memento (2013). http:\/\/tools.ietf.org\/html\/rfc7089","DOI":"10.17487\/rfc7089"},{"key":"19_CR50","doi-asserted-by":"crossref","unstructured":"Varnagar, C.R., Madhak, N.N., Kodinariya, T.M., Rathod, J.N.: Web usage mining: a review on process, methods and techniques. In: 2013 International Conference on Information Communication and Embedded Systems (ICICES), pp. 40\u201346. IEEE (2013)","DOI":"10.1109\/ICICES.2013.6508399"},{"key":"19_CR51","doi-asserted-by":"publisher","unstructured":"Zaiane, O.: Web usage mining for a better web-based learning environment. Technical report. TR01-05, University of Alberta (2001). https:\/\/doi.org\/10.7939\/R3736M20P","DOI":"10.7939\/R3736M20P"},{"key":"19_CR52","unstructured":"Zeller, H., Harvey, L., Illyes, G.: Formalizing the Robots Exclusion Protocol Specification (2019). https:\/\/webmasters.googleblog.com\/2019\/07\/rep-id.html"}],"container-title":["Lecture Notes in Computer Science","Linking Theory and Practice of Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-16802-4_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T18:42:03Z","timestamp":1710268923000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-16802-4_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031168017","9783031168024"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-16802-4_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"15 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPDL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Theory and Practice of Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Padua","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpdl2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tpdl2022.dei.unipd.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"107","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"15 accelerating innovation papers included in these proceedings","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}