{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T10:05:12Z","timestamp":1766484312663,"version":"3.41.2"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031859595"},{"type":"electronic","value":"9783031859601"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-85960-1_9","type":"book-chapter","created":{"date-parts":[[2025,3,6]],"date-time":"2025-03-06T07:24:29Z","timestamp":1741245869000},"page":"197-214","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Web Crawl Refusals: Insights From Common Crawl"],"prefix":"10.1007","author":[{"given":"Mostafa","family":"Ansar","sequence":"first","affiliation":[]},{"given":"Anna","family":"Sperotto","sequence":"additional","affiliation":[]},{"given":"Ralph","family":"Holz","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,7]]},"reference":[{"key":"9_CR1","unstructured":"Pepyaka webserver. https:\/\/webtechsurvey.com\/technology\/pepyaka. Accessed May 2024"},{"key":"9_CR2","unstructured":"Ablove, A., et al.: Digital discrimination of users in sanctioned states: the case of the cuba embargo. In: 33rd USENIX Security Symposium (USENIX Security 2024), Philadelphia, PA, pp. 3909\u20133926. USENIX Association (2024). https:\/\/www.usenix.org\/conference\/usenixsecurity24\/presentation\/ablove"},{"key":"9_CR3","unstructured":"Afroz, S., Tschantz, M.C., Sajid, S., Qazi, S.A., Javed, M., Paxson, V.: Exploring server-side blocking of regions. arXiv abs\/1805.11606 (2018). https:\/\/api.semanticscholar.org\/CorpusID:44131334"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Ahmad, S.S., Dar, M.D., Zaffar, M.F., Vallina-Rodriguez, N., Nithyanand, R.: Apophanies or epiphanies? How crawlers impact our understanding of the web. In: Proceedings of The Web Conference 2020 (WWW 2020), pp. 271\u2013280 (2020)","DOI":"10.1145\/3366423.3380113"},{"key":"9_CR5","unstructured":"Asghari, H.: pyasn. https:\/\/github.com\/hadiasghari\/pyasn"},{"key":"9_CR6","unstructured":"Center for Applied Internet Data Analysis (CAIDA): AS Organizations Dataset (2024). https:\/\/catalog.caida.org\/dataset\/as_organizations. Accessed May 2024"},{"key":"9_CR7","unstructured":"Common Crawl: November\/december 2023 crawl archive now available. https:\/\/www.commoncrawl.org\/blog\/november-december-2023-crawl-archive-now-available. Accessed May 2024"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Darer, A., Farnan, O., Wright, J.: Automated discovery of internet censorship by web crawling. In: Proceedings of the 10th ACM Conference on Web Science (WebSci 2018), pp. 195\u2013204 (2018)","DOI":"10.1145\/3201064.3201091"},{"key":"9_CR9","doi-asserted-by":"publisher","unstructured":"Fielding, R.T., Nottingham, M.: Additional HTTP Status Codes. RFC 6585 (2012). https:\/\/doi.org\/10.17487\/RFC6585. https:\/\/www.rfc-editor.org\/info\/rfc6585","DOI":"10.17487\/RFC6585"},{"key":"9_CR10","doi-asserted-by":"publisher","unstructured":"Fielding, R.T., Nottingham, M., Reschke, J.: HTTP Semantics. RFC 9110 (2022). https:\/\/doi.org\/10.17487\/RFC9110. https:\/\/www.rfc-editor.org\/info\/rfc9110","DOI":"10.17487\/RFC9110"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Holz, R., Braun, L., Kammenhuber, N., Carle, G.: The SSL landscape - a thorough analysis of the X.509 PKI using active and passive measurements. In: Proceedings of the ACM\/USENIX 11th Annual Internet Measurement Conference (IMC), Berlin, Germany (2011)","DOI":"10.1145\/2068816.2068856"},{"key":"9_CR12","unstructured":"http.dev: HTTP status codes. https:\/\/http.dev\/status"},{"key":"9_CR13","unstructured":"Institute, R.: How many news websites block AI crawlers. Reuters Institute for the Study of Journalism (2023). https:\/\/reutersinstitute.politics.ox.ac.uk\/how-many-news-websites-block-ai-crawlers#:~:text=Examining. Accessed May 2024"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Invernizzi, L., Thomas, K., Kapravelos, A., Comanescu, O., Picod, J.M., Bursztein, E.: Cloak of visibility: detecting when machines browse a different web. In: 2016 IEEE Symposium on Security and Privacy (SP), pp. 743\u2013758 (2016)","DOI":"10.1109\/SP.2016.50"},{"key":"9_CR15","doi-asserted-by":"publisher","unstructured":"Koster, M., Illyes, G., Zeller, H., Sassman, L.: Robots Exclusion Protocol. RFC 9309 (2022). https:\/\/doi.org\/10.17487\/RFC9309. https:\/\/www.rfc-editor.org\/info\/rfc9309","DOI":"10.17487\/RFC9309"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Leonard, D., Loguinov, D.: Demystifying service discovery: implementing an internet-wide scanner. In: Proceedings of the ACM SIGCOMM Conference on Internet Measurement (IMC 2010), pp. 109\u2013122 (2010)","DOI":"10.1145\/1879141.1879156"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"McDonald, A., et al.: 403 forbidden: a global view of CDN geoblocking. In: Proceedings of the Internet Measurement Conference (IMC 2018), pp. 218\u2013230 (2018)","DOI":"10.1145\/3278532.3278552"},{"key":"9_CR18","unstructured":"Nagel, S.: Common crawl: data collection and use cases for NLP (2023). http:\/\/nlpl.eu\/skeikampen23\/nagel.230206.pdf. Accessed May 2024"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Niaki, A.A., et al.: ICLab: a global, longitudinal internet censorship measurement platform. In: 2020 IEEE Symposium on Security and Privacy (SP), pp. 135\u2013151 (2020)","DOI":"10.1109\/SP40000.2020.00014"},{"key":"9_CR20","unstructured":"Tschantz, M.C., Afroz, S., Sajid, S., Qazi, S.A., Javed, M., Paxson, V.: A bestiary of blocking: the motivations and modes behind website unavailability. In: 8th USENIX Workshop on Free and Open Communications on the Internet (FOCI 2018) (2018)"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Vastel, A., Rudametkin, W., Rouvoy, R., Blanc, X.: FP-crawlers: studying the resilience of browser fingerprinting to block crawlers. In: NDSS Workshop on Measurements, Attacks, and Defenses for the Web, MADWeb 2020 (2020)","DOI":"10.14722\/madweb.2020.23010"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Wan, G., et al.: On the origin of scanning: the impact of location on Internet-wide scans. In: Proceedings of the ACM Internet Measurement Conference (IMC 2020), pp. 662\u2013679 (2020)","DOI":"10.1145\/3419394.3424214"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Zeber, D., et al.: The representativeness of automated web crawls as a surrogate for human browsing. In: Proceedings of the Web Conference 2020 (WWW 2020), pp. 167\u2013178 (2020)","DOI":"10.1145\/3366423.3380104"}],"container-title":["Lecture Notes in Computer Science","Passive and Active Measurement"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-85960-1_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T14:08:16Z","timestamp":1753884496000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-85960-1_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031859595","9783031859601"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-85960-1_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"7 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"This work raises no ethical concerns. The CC dataset was ethically created\u00a0[]. We ran fewer than  DNS queries (about  FQDN samples) to identify their NS and PTR records, distributing them over time and sequentially to avoid network or nameserver load.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics"}},{"value":"PAM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Passive and Active Network Measurement","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 March 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 March 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pam2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/udesa.edu.ar\/pam25","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}