{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T01:05:05Z","timestamp":1781226305778,"version":"3.54.1"},"reference-count":332,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Data &amp; Knowledge Engineering"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.datak.2026.102598","type":"journal-article","created":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T15:38:26Z","timestamp":1774885106000},"page":"102598","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A systematic review of web scraping: Techniques, LLM-enhanced approaches, performance metrics, and legal\u2013ethical issues"],"prefix":"10.1016","volume":"164","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4209-727X","authenticated-orcid":false,"given":"Navroz Kaur","family":"Kahlon","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Williamjeet","family":"Singh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.datak.2026.102598_b1","series-title":"International telecommunication union: Statistics","author":"ITU","year":"2023"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b2","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pone.0169658","article-title":"Vigi4Med scraper: A framework for web forum structured data extraction and semantic representation","volume":"12","author":"Audeh","year":"2017","journal-title":"PLoS One"},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b3","doi-asserted-by":"crossref","first-page":"1165","DOI":"10.1016\/j.infsof.2013.01.008","article-title":"Software clone detection: A systematic review","volume":"55","author":"Rattan","year":"2013","journal-title":"Inf. Softw. Technol."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b4","article-title":"A survey of web crawlers for information retrieval","volume":"7","author":"Kumar","year":"2017","journal-title":"Wiley Interdiscip. Rev.: Data Min. Knowl. Discov."},{"key":"10.1016\/j.datak.2026.102598_b5","series-title":"Guidelines for Performing Systematic Literature Reviews in Software Engineering","first-page":"65","author":"Kitchenham","year":"2007"},{"key":"10.1016\/j.datak.2026.102598_b6","series-title":"Web scraping history: The origins of web scraping","author":"Robot","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b7","series-title":"DataHub","year":"2025"},{"key":"10.1016\/j.datak.2026.102598_b8","series-title":"Individuals using the internet","year":"2025"},{"key":"10.1016\/j.datak.2026.102598_b9","unstructured":"What is web scraping? https:\/\/www.webharvy.com\/articles\/what-is-web-scraping.html."},{"key":"10.1016\/j.datak.2026.102598_b10","series-title":"World Conference on Futuristic Trends in Research and Innovation for Social Welfare","article-title":"Web data extraction techniques: A review","author":"Kamanwar","year":"2016"},{"key":"10.1016\/j.datak.2026.102598_b11","series-title":"6th International Conference on Electrical Engineering and Informatics: Sustainable Society Through Digital Innovation","first-page":"1","article-title":"Wrapper approaches for web data extraction: A review","author":"Bin Mohd Azir","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b12","series-title":"1st International Conference on Data Science, Machine Learning and Applications","article-title":"A comparative study of various approaches to adaptive web scraping","author":"Kumar","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b13","unstructured":"S.C.M. de S. Sirisuriya, A Comparative Study on Web Scraping, in: 8th International Research Conference, 2015, pp. 135\u2013140."},{"key":"10.1016\/j.datak.2026.102598_b14","doi-asserted-by":"crossref","unstructured":"R. Diouf, U.D. Thies, U.-s. Michel, Web Scraping : State-of-the-Art and Areas of Application, in: IEEE International Conference on Big Data, Big Data, ISBN: 9781728108582, 2019, pp. 6040\u20136042.","DOI":"10.1109\/BigData47090.2019.9005594"},{"key":"10.1016\/j.datak.2026.102598_b15","series-title":"Web scraping for research: Legal, ethical, institutional, and scientific considerations","first-page":"1","author":"Brown","year":"2024"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b16","first-page":"363","article-title":"An overview on web scraping techniques and tools","volume":"4","author":"Saurkar","year":"2018","journal-title":"Int. J. Futur. Revolut. Comput. Sci. Commun. Eng."},{"issue":"0123456789","key":"10.1016\/j.datak.2026.102598_b17","article-title":"Algorithmic thinking in the public interest: navigating technical, legal, and ethical hurdles to web scraping in the social sciences","author":"Luscombe","year":"2021","journal-title":"Qual. Quant."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b18","doi-asserted-by":"crossref","first-page":"275","DOI":"10.1007\/s11831-021-09593-8","article-title":"An insight on software features supporting software transplantation: A systematic review","volume":"29","author":"Sodhi","year":"2022","journal-title":"Arch. Comput. Methods Eng."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b19","doi-asserted-by":"crossref","first-page":"951","DOI":"10.1007\/s11831-020-09403-7","article-title":"State-of-the-art segmentation techniques and future directions for multiple sclerosis brain lesions","volume":"28","author":"Kaur","year":"2021","journal-title":"Arch. Comput. Methods Eng."},{"key":"10.1016\/j.datak.2026.102598_b20","series-title":"ACM SIGMOD International Conference on Management of Data","first-page":"337","article-title":"Extracting structured data from web pages","author":"Arasu","year":"2003"},{"key":"10.1016\/j.datak.2026.102598_b21","series-title":"5th Asian-Pacific Web Conference on Web Technologies and Applications","article-title":"Extracting content structure for web pages based on visual representation","author":"Cai","year":"2002"},{"key":"10.1016\/j.datak.2026.102598_b22","unstructured":"V. Crescenzi, G. Mecca, P. Merialdo, RoadRunner: Towards automatic data extraction from large web sites, in: VLDB 2001 - Proceedings of 27th International Conference on Very Large Data Bases, ISBN: 1558608044, 2001, pp. 109\u2013118."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b23","first-page":"973","article-title":"Optimized template detection and extraction algorithm for web scraping of dynamic web pages","volume":"13","author":"Gupta","year":"2017","journal-title":"Glob. J. Pure Appl. Math."},{"issue":"11","key":"10.1016\/j.datak.2026.102598_b24","doi-asserted-by":"crossref","first-page":"4355","DOI":"10.1007\/s10489-018-1208-0","article-title":"A novel alignment algorithm for effective web data extraction from singleton-item pages","volume":"48","author":"Yuliana","year":"2018","journal-title":"Appl. Intell."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b25","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1007\/s10489-019-01499-0","article-title":"DCADE: divide and conquer alignment with dynamic encoding for full page data extraction","volume":"50","author":"Yuliana","year":"2020","journal-title":"Appl. Intell."},{"key":"10.1016\/j.datak.2026.102598_b26","series-title":"10th International Conference on World Wide Web","first-page":"681","article-title":"IEPAD: Information extraction based on pattern discovery","author":"Chang","year":"2001"},{"key":"10.1016\/j.datak.2026.102598_b27","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1016\/j.knosys.2012.10.009","article-title":"TEX: An efficient and effective unsupervised web information extractor","volume":"39","author":"Sleiman","year":"2013","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.datak.2026.102598_b28","series-title":"Twenty-Third ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems","article-title":"The lixto data extraction project \u2013 back and forth between theory and practice","author":"Gottlob","year":"2004"},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b29","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1109\/MIS.2004.71","article-title":"OLERA: Semisupervised web-data extraction with visual support","volume":"19","author":"Chang","year":"2004","journal-title":"IEEE Intell. Syst."},{"key":"10.1016\/j.datak.2026.102598_b30","doi-asserted-by":"crossref","unstructured":"K. Simon, G. Lausen, ViPER: Augmenting automatic information extraction with visual preceptions, in: 14th ACM International Conference on Information and Knowledge Management, ISBN: 1595931406, 2005, pp. 381\u2013388.","DOI":"10.1145\/1099554.1099672"},{"key":"10.1016\/j.datak.2026.102598_b31","series-title":"12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"494","article-title":"Simultaneous record detection and attribute labeling in web data extraction","author":"Zhu","year":"2006"},{"key":"10.1016\/j.datak.2026.102598_b32","first-page":"1","article-title":"Deep web data extraction based on visual information processing","author":"Liu","year":"2017","journal-title":"J. Ambient. Intell. Humaniz. Comput."},{"key":"10.1016\/j.datak.2026.102598_b33","series-title":"2018 World Wide Web Conference","first-page":"1095","article-title":"Browserless web data extraction: Challenges and opportunities","author":"Fayzrakhmanov","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b34","series-title":"Asian Conference on Intelligent Information and Database Systems","first-page":"77","article-title":"Robust web data extraction based on unsupervised visual validation","volume":"vol. 11431","author":"Potvin","year":"2019"},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b35","doi-asserted-by":"crossref","first-page":"447","DOI":"10.1109\/TKDE.2009.109","article-title":"ViDE: A vision-based approach for deep web data extraction","volume":"22","author":"Liu","year":"2010","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.datak.2026.102598_b36","unstructured":"R. Baumgartner, S. Flesca, G. Gottlob, Visual web information extraction with lixto, in: VLDB 2001 - Proceedings of 27th International Conference on Very Large Data Bases, ISBN: 1558608044, 2001, pp. 119\u2013128."},{"key":"10.1016\/j.datak.2026.102598_b37","series-title":"13th International Conference on World Wide Web","first-page":"502","article-title":"Automatic web news extraction using tree edit distance","author":"De Reis","year":"2004"},{"issue":"12","key":"10.1016\/j.datak.2026.102598_b38","first-page":"3","article-title":"STAVIES: A system for information extraction from unknown web data sources through automatic web wrapper generation using clustering techniques","volume":"17","author":"Jaffali","year":"2005","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b39","doi-asserted-by":"crossref","first-page":"438","DOI":"10.1007\/s10115-004-0188-z","article-title":"Web data extraction based on structural similarity","volume":"8","author":"Li","year":"2005","journal-title":"Knowl. Inf. Syst."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b40","first-page":"1583","article-title":"Dynamic hierarchical Markov random fields for integrated web data extraction","volume":"9","author":"Zhu","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.datak.2026.102598_b41","series-title":"10th SIAM International Conference on Data Mining","first-page":"930","article-title":"A generalized tree matching algorithm considering nested lists for web data extraction","author":"Jindal","year":"2010"},{"key":"10.1016\/j.datak.2026.102598_b42","series-title":"19th ACM International Conference on Information and Knowledge Management","first-page":"39","article-title":"Automatic extraction of web data records containing user-generated content","author":"Song","year":"2010"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b43","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1109\/TKDE.2009.82","article-title":"FiVaTech: Page-level web data extraction from template pages","volume":"22","author":"Kayed","year":"2010","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.datak.2026.102598_b44","series-title":"34th International ACM SIGIR Conference on Research and Development in Information Retrieval","first-page":"775","article-title":"From one tree to a forest: A unified solution for structured web data extraction","author":"Hao","year":"2011"},{"key":"10.1016\/j.datak.2026.102598_b45","series-title":"WSDM 2017 - Proceedings of the 10th ACM International Conference on Web Search and Data Mining","first-page":"385","article-title":"Synthesis of forgiving data extractors","author":"Omari","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b46","doi-asserted-by":"crossref","first-page":"417","DOI":"10.1016\/j.procs.2019.12.124","article-title":"Web data extraction approach for deep web using WEIDJ","volume":"163","author":"Ahmad Sabri","year":"2019","journal-title":"Procedia Comput. Sci."},{"key":"10.1016\/j.datak.2026.102598_b47","series-title":"IEEE International Conference on Systems, Man and Cybernetics","first-page":"3420","article-title":"Deep web data extraction","author":"Hong","year":"2010"},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b48","doi-asserted-by":"crossref","first-page":"854","DOI":"10.1109\/TSMCC.2010.2089678","article-title":"Data extraction for deep web using WordNet","volume":"41","author":"Hong","year":"2011","journal-title":"IEEE Trans. Syst. Man Cybern. C"},{"key":"10.1016\/j.datak.2026.102598_b49","series-title":"IEEE 28th International Conference on Data Engineering","first-page":"726","article-title":"Automatic extraction of structured web data with domain knowledge","author":"Derouiche","year":"2012"},{"key":"10.1016\/j.datak.2026.102598_b50","series-title":"2018 IEEE International Conference on Communication, Networks and Satellite","first-page":"44","article-title":"Resource description framework generation for tropical disease using web scraping","author":"Amalia","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b51","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2019.106518","article-title":"Effective information retrieval and feature minimization technique for semantic web data","volume":"81","author":"Saravana Kumar","year":"2020","journal-title":"Comput. Electr. Eng."},{"key":"10.1016\/j.datak.2026.102598_b52","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1016\/j.imu.2018.01.003","article-title":"Automated scraping of structured data records from health discussion forums using semantic analysis","volume":"10","author":"Kumaresan","year":"2018","journal-title":"Inform. Med. Unlocked"},{"issue":"14","key":"10.1016\/j.datak.2026.102598_b53","doi-asserted-by":"crossref","first-page":"1845","DOI":"10.14778\/2733085.2733091","article-title":"Diadem","volume":"7","author":"Furche","year":"2014","journal-title":"Proc. VLDB Endow."},{"key":"10.1016\/j.datak.2026.102598_b54","series-title":"International Conference on Sustainable Information Engineering and Technology","first-page":"226","article-title":"Increased information retrieval capabilities om e-commerce websites using scraping techniques","author":"Deborah","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b55","series-title":"16th IEEE International Conference on Machine Learning and Applications","first-page":"125","article-title":"Classification-based adaptive web scraper","author":"Ujwal","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b56","series-title":"IEEE International Conference on Smart Cloud","first-page":"138","article-title":"Cloud based web scraping for big data applications","author":"Chaulagain","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b57","series-title":"8th IEEE Annual Information Technology, Electronics and Mobile Communication Conference","first-page":"680","article-title":"Phishing website detection framework through web scraping and data mining","author":"Park","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b58","series-title":"International Conference on Electrical Engineering and Computer Science","first-page":"385","article-title":"Web scraping techniques to collect weather data in south sumatera","volume":"vol. 17","author":"Purnamasari","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b59","series-title":"A web scraping methodology for bypassing Twitter API restrictions","first-page":"1","author":"Hernandez-Suarez","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b60","doi-asserted-by":"crossref","first-page":"444","DOI":"10.1016\/j.procs.2019.08.237","article-title":"Social media web scraping using social media developers API and regex","volume":"157","author":"Citra","year":"2019","journal-title":"Procedia Comput. Sci."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b61","doi-asserted-by":"crossref","first-page":"41","DOI":"10.4018\/IJWP.2019070103","article-title":"Personalized content extraction and text classification using effective web scraping techniques","volume":"11","author":"Karthikeyan","year":"2019","journal-title":"Int. J. Web Portals"},{"key":"10.1016\/j.datak.2026.102598_b62","doi-asserted-by":"crossref","first-page":"61726","DOI":"10.1109\/ACCESS.2020.2984503","article-title":"A novel web scraping approach using the additional information obtained from web pages","volume":"8","author":"Uzun","year":"2020","journal-title":"IEEE Access"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b63","first-page":"1","article-title":"Smart algorithmic based web crawling and scraping with template autoupdate capabilities","volume":"33","author":"Qudus","year":"2020","journal-title":"Concurr. Comput.: Pr. Exp."},{"key":"10.1016\/j.datak.2026.102598_b64","doi-asserted-by":"crossref","unstructured":"J.G. Thomsen, E. Ernst, C. Brabrand, M. Schwartzbach, WebSelF : A Web Scraping Framework, in: International Conference on Web Engineering, 2012, pp. 347\u2013361.","DOI":"10.1007\/978-3-642-31753-8_28"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b65","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1016\/j.compag.2009.12.006","article-title":"Development of an automated climatic data scraping , filtering and display system","volume":"71","author":"Yang","year":"2010","journal-title":"Comput. Electron. Agric."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b66","first-page":"1027","article-title":"Integration of web scraping, fine-tuning, and data enrichment in a continuous monitoring context via large language model operations","volume":"15","author":"Bodor","year":"2025","journal-title":"Int. J. Electr. Comput. Eng."},{"key":"10.1016\/j.datak.2026.102598_b67","series-title":"LLM-based web data collection for research dataset creation","author":"Berkane","year":"2025"},{"key":"10.1016\/j.datak.2026.102598_b68","series-title":"Leveraging large language models for web scraping","author":"Ahluwalia","year":"2024"},{"key":"10.1016\/j.datak.2026.102598_b69","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1016\/j.knosys.2014.07.007","article-title":"Web data extraction, applications and techniques: A survey","volume":"70","author":"Ferrara","year":"2014","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b70","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1111\/jbl.12120","article-title":"Accessing online data : Web-crawling and information-scraping techniques to automate the assembly of research data","volume":"37","author":"Massimino","year":"2016","journal-title":"J. Bus. Logist."},{"issue":"10","key":"10.1016\/j.datak.2026.102598_b71","doi-asserted-by":"crossref","first-page":"1411","DOI":"10.1109\/TKDE.2006.152","article-title":"A survey of web information extraction systems","volume":"18","author":"De Marco","year":"2006","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"October","key":"10.1016\/j.datak.2026.102598_b72","first-page":"66","article-title":"New indices for text: PAT trees and PAT arrays","author":"Gonnet","year":"1992","journal-title":"Inf. Retr.: Data Struct. Algorithms"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b73","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1108\/EUM0000000007186","article-title":"Overview of okapi projects","volume":"53","author":"Robertson","year":"2018","journal-title":"J. Doc."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b74","doi-asserted-by":"crossref","first-page":"853","DOI":"10.1016\/S0306-4573(02)00084-5","article-title":"Engineering a multi-purpose test collection for web retrieval experiments","volume":"39","author":"Bailey","year":"2003","journal-title":"Inf. Process. Manag."},{"key":"10.1016\/j.datak.2026.102598_b75","series-title":"14th International Conference on World Wide Web","first-page":"66","article-title":"Fully automatic wrapper generation for search engines","author":"Zhao","year":"2005"},{"key":"10.1016\/j.datak.2026.102598_b76","series-title":"European Semantic Web Conference","first-page":"515","article-title":"The personal publication reader: Illustrating web data extraction, personalization and reasoning for the semantic web","author":"Baumgartner","year":"2005"},{"key":"10.1016\/j.datak.2026.102598_b77","series-title":"14th International Conference on World Wide Web","first-page":"76","article-title":"Web data extraction based on partial tree alignment","author":"Zhai","year":"2005"},{"key":"10.1016\/j.datak.2026.102598_b78","series-title":"Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"601","article-title":"Mining data records in web pages","author":"Liu","year":"2003"},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b79","doi-asserted-by":"crossref","first-page":"1109","DOI":"10.1007\/s11280-013-0248-y","article-title":"Information extraction for deep web using repetitive subject pattern","volume":"17","author":"Thamviset","year":"2014","journal-title":"World Wide Web"},{"key":"10.1016\/j.datak.2026.102598_b80","series-title":"24th International Conference on Machine Learning","first-page":"1175","article-title":"Dynamic hierarchical Markov random fields and their application to web data extraction","author":"Zhu","year":"2007"},{"key":"10.1016\/j.datak.2026.102598_b81","series-title":"13th International World Wide Web Conference on Alternate Track Papers & Posters","first-page":"346","article-title":"Testbed for information extraction from deep web","author":"Yamada","year":"2004"},{"key":"10.1016\/j.datak.2026.102598_b82","series-title":"32nd International Conference on Very Large Data Bases","first-page":"989","article-title":"Automatic extraction of dynamic record sections from search engine result pages","author":"Zhao","year":"2006"},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b83","doi-asserted-by":"crossref","first-page":"1544","DOI":"10.1109\/TKDE.2013.161","article-title":"Trinity: On using trinary trees for unsupervised web data extraction","volume":"26","author":"Sleiman","year":"2014","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.datak.2026.102598_b84","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1016\/0020-0190(77)90064-3","article-title":"The tree-to-tree editing problem","volume":"6","author":"Selkow","year":"1977","journal-title":"Inform. Process. Lett."},{"key":"10.1016\/j.datak.2026.102598_b85","series-title":"International Conference on Distributed Computing Systems","first-page":"361","article-title":"A fully automated object extraction system for the world wide web","author":"Buttler","year":"2001"},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b86","first-page":"752","article-title":"Improving performance of DOM in semi-structured data extraction using WEIDJ model","volume":"9","author":"Sabri","year":"2018","journal-title":"Indones. J. Electr. Eng. Comput. Sci."},{"key":"10.1016\/j.datak.2026.102598_b87","series-title":"WordNet: An Electronic Lexical Database","author":"Soergel","year":"1998"},{"key":"10.1016\/j.datak.2026.102598_b88","series-title":"16th International World Wide Web Conference","first-page":"697","article-title":"Yago: A core of semantic knowledge","author":"Suchanek","year":"2007"},{"key":"10.1016\/j.datak.2026.102598_b89","series-title":"14th Conference on Computational Linguistics","first-page":"539","article-title":"Automatic acquisition of hyponyms from large text corpora lexico-syntactic for hyponymy patterns","author":"Heart","year":"1992"},{"key":"10.1016\/j.datak.2026.102598_b90","series-title":"18th International Workshop on Web and Databases","first-page":"13","article-title":"IBEX: Harvesting entities from the web using unique identifiers","author":"Talaika","year":"2015"},{"key":"10.1016\/j.datak.2026.102598_b91","unstructured":"F. Tim, G. Gottlob, G. Grasso, G. Orsi, C. Schallhart, C. Wang, Little Knowledge Rules the Web: Domain-Centric Result Page Extraction, in: 5th International Conference on Web Reasoning and Rule Systems, ISBN: 3540287922, 2005, pp. 61\u201376."},{"key":"10.1016\/j.datak.2026.102598_b92","series-title":"Archiving Relational Databases using a Semantic Web Representation","first-page":"1","author":"Kumar","year":"2007"},{"issue":"February","key":"10.1016\/j.datak.2026.102598_b93","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1016\/j.imu.2018.01.003","article-title":"Informatics in medicine unlocked automated scraping of structured data records from health discussion forums using semantic analysis","volume":"10","author":"Baskaran","year":"2018","journal-title":"Inform. Med. Unlocked"},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b94","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1093\/bib\/bbt026","article-title":"Web scraping technologies in an API world","volume":"15","author":"Glez-Pe\u00f1a","year":"2013","journal-title":"Brief. Bioinform."},{"issue":"September","key":"10.1016\/j.datak.2026.102598_b95","first-page":"117","article-title":"Comparative study of amazon EC2 and microsoft azure cloud architecture","author":"Gandhi","year":"2014","journal-title":"Int. J. Adv. Netw. Appl. (IJANA)"},{"key":"10.1016\/j.datak.2026.102598_b96","unstructured":"HAP parser. https:\/\/html-agility-pack.net\/."},{"key":"10.1016\/j.datak.2026.102598_b97","series-title":"AngleSharp parser","author":"Github","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b98","series-title":"Scrapy","year":"2021"},{"key":"10.1016\/j.datak.2026.102598_b99","unstructured":"K. Reitz, Requests package. https:\/\/docs.python-requests.org\/en\/master\/."},{"key":"10.1016\/j.datak.2026.102598_b100","unstructured":"B. Muthukadan, Selenium. https:\/\/selenium-python.readthedocs.io\/index.html."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b101","doi-asserted-by":"crossref","first-page":"365","DOI":"10.1108\/eb026584","article-title":"Foundation of evaluation","volume":"30","author":"Van Rijsbergen","year":"1974","journal-title":"J. Doc."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b102","doi-asserted-by":"crossref","first-page":"205","DOI":"10.5614\/itbj.ict.res.appl.2021.15.3.1","article-title":"Development of focused crawlers for building large Punjabi news corpus","volume":"15","author":"Mahi","year":"2021","journal-title":"J. ICT Res. Appl."},{"key":"10.1016\/j.datak.2026.102598_b103","series-title":"2017 IEEE 14th International Scientific Conference on Informatics, INFORMATICS 2017 - Proceedings","first-page":"13","article-title":"Deep learning powered automated tool for generating image based datasets","volume":"vol. 2018-Janua","author":"Arsenovic","year":"2018"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b104","article-title":"Designing android gaming news & information application using java-based web scraping technique","volume":"1230","author":"Indra","year":"2019","journal-title":"J. Phys.: Conf. Ser."},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b105","first-page":"0","article-title":"Design of a daily brief business report generator based on web scraping with KNN algorithm","volume":"1345","author":"Lin","year":"2019","journal-title":"J. Phys.: Conf. Ser."},{"key":"10.1016\/j.datak.2026.102598_b106","series-title":"5th International Conference on Computing Engineering and Design","first-page":"1","article-title":"Building corpus in bahasa Indonesia for pornographic indicated website content","author":"Chandra","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b107","series-title":"2019 4th International Conference on Information Technology, Information Systems and Electrical Engineering","first-page":"40","article-title":"The best parameters to select instagram account for endorsement using web scraping","author":"Akrianto","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b108","series-title":"2018 International Conference on Computing, Power and Communication Technologies","first-page":"548","article-title":"Sentiment classification for mobile reviews using KNIME","author":"Chauhan","year":"2019"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b109","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1504\/IJWET.2018.097561","article-title":"Stable web scraping: An approach based on neighbour zone and path similarity of page elements","volume":"13","author":"Gao","year":"2018","journal-title":"Int. J. Web Eng. Technol."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b110","doi-asserted-by":"crossref","first-page":"275","DOI":"10.12928\/telkomnika.v17i1.11613","article-title":"Implementation of web scraping on GitHub task monitoring system","volume":"17","author":"Awangga","year":"2019","journal-title":"Telkomnika (Telecommun. Comput. Electron. Control)"},{"key":"10.1016\/j.datak.2026.102598_b111","series-title":"Proceedings - 2019 IEEE 35th International Conference on Data Engineering Workshops","first-page":"1","article-title":"Recursive stock price prediction with machine learning and web scrapping for specified time period","volume":"vol. 2019-Decem","author":"Maurya","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b112","series-title":"Proceedings - 2019 IEEE 35th International Conference on Data Engineering Workshops","first-page":"96","article-title":"Recommendation of Indian cuisine recipes based on ingredients","author":"Nilesh","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b113","series-title":"2019 International Conference on Computer, Control, Informatics and its Applications: Emerging Trends in Big Data and Artificial Intelligence","first-page":"120","article-title":"Searching region of interest from news website using steepest ascent hill climbing algorithm","author":"Mustika","year":"2019"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b114","first-page":"59","article-title":"Implementation of web scraping to build a web-based instagram account data downloader application","volume":"9","author":"Himawan","year":"2020","journal-title":"Int. J. Inform. Dev."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b115","doi-asserted-by":"crossref","first-page":"501","DOI":"10.1002\/agr.21653","article-title":"Does the distribution of ratings affect online grocery sales? Evidence from amazon","volume":"36","author":"Etumnu","year":"2020","journal-title":"Agribusiness"},{"key":"10.1016\/j.datak.2026.102598_b116","series-title":"Extraction and integration of partially overlapping web sources","first-page":"805","author":"Bronzi","year":"2013"},{"key":"10.1016\/j.datak.2026.102598_b117","series-title":"Proceedings - 2017 IEEE 26th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises","first-page":"18","article-title":"Evaluate a personalized multi agent system through social networks: Web scraping","author":"Trifa","year":"2017"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b118","doi-asserted-by":"crossref","DOI":"10.1016\/j.oooo.2017.05.401","article-title":"Web scraping computer program for the estomato web software: a potential tool for oral medicine practice and research","volume":"124","author":"Zanon","year":"2017","journal-title":"Oral Surg. Oral Med. Oral Pathol. Oral Radiol."},{"issue":"February","key":"10.1016\/j.datak.2026.102598_b119","article-title":"Daylight savings time transitions on football injuries and key performance indicators in the bundesliga: A web-scraping approach","author":"He","year":"2019","journal-title":"Studies"},{"key":"10.1016\/j.datak.2026.102598_b120","series-title":"2019 4th International Conference on System Reliability and Safety","first-page":"116","article-title":"Credibility analysis for available information sources on the web: A review and a contribution","author":"Dongo","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b121","series-title":"Proceedings of 2019 IEEE 18th International Conference on Cognitive Informatics and Cognitive Computing","first-page":"235","article-title":"Web insights for national security: Analysing participative online activity to interpret crises","author":"Cardenas","year":"2019"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b122","doi-asserted-by":"crossref","first-page":"19","DOI":"10.15575\/join.v5i1.548","article-title":"Exploiting web scraping for education news analysis using depth-first search algorithm","volume":"5","author":"Arumi","year":"2020","journal-title":"J. Online Inform."},{"issue":"June","key":"10.1016\/j.datak.2026.102598_b123","first-page":"18","article-title":"Use of artificial intelligence and web scraping methods to retrieve information from the world wide web","volume":"8","author":"Cineca","year":"2016","journal-title":"Int. J. Eng. Res. Appl."},{"key":"10.1016\/j.datak.2026.102598_b124","series-title":"HEALTH-INF 2019 - 12th International Conference on Health Informatics, Proceedings; Part of 12th International Joint Conference on Biomedical Engineering Systems and Technologies","first-page":"319","article-title":"Web scraping online newspaper death notices for the estimation of the local number of deaths","author":"Schnell","year":"2019"},{"issue":"October","key":"10.1016\/j.datak.2026.102598_b125","first-page":"1","article-title":"Web scraping meets survey design: Combining forces","author":"ten Bosch","year":"2018","journal-title":"Bigsurv18 Conf."},{"key":"10.1016\/j.datak.2026.102598_b126","series-title":"Getting Structured Data from the Internet: Running Web Crawlers\/Scrapers on a Big Data Production Scale","author":"Data","year":"2020"},{"key":"10.1016\/j.datak.2026.102598_b127","series-title":"Data wrangling","author":"Altair","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b128","series-title":"2018 3rd International Conference on Information Technology Research","first-page":"1","article-title":"A semantic similarity measure based news posts validation on social media","author":"Chandrathlake","year":"2018"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b129","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1080\/14459795.2020.1801793","article-title":"Defining the key issues discussed by problematic gamblers on web-based forums : a data-driven approach","volume":"21","author":"Bradley","year":"2020","journal-title":"Int. Gambl. Stud."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b130","doi-asserted-by":"crossref","first-page":"320","DOI":"10.1016\/j.jksuci.2018.06.004","article-title":"Firefly algorithm based feature selection for arabic text classification","volume":"32","author":"Larabi Marie-Sainte","year":"2020","journal-title":"J. King Saud Univ. - Comput. Inf. Sci."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b131","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1080\/10410236.2018.1536961","article-title":"Can mobile apps motivate fitness tracking? A study of technological affordances and workout behaviors","volume":"35","author":"Molina","year":"2020","journal-title":"Health Commun."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b132","doi-asserted-by":"crossref","first-page":"695","DOI":"10.1080\/09613218.2021.1908879","article-title":"Investigating the indoor environmental quality of different workplaces through web-scraping and text-mining of Glassdoor reviews","volume":"49","author":"Chinazzo","year":"2021","journal-title":"Build. Res. Inf."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b133","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1007\/s12020-020-02567-z","article-title":"Publication outcome of research presented at the European congress of endocrinology: a web scraping-based analysis and critical appraisal","volume":"72","author":"Saygili","year":"2021","journal-title":"Endocrine"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b134","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1177\/1938965520973587","article-title":"Web scraping for hospitality research: Overview, opportunities, and implications","volume":"62","author":"Han","year":"2021","journal-title":"Cornell Hosp. Q."},{"issue":"12","key":"10.1016\/j.datak.2026.102598_b135","doi-asserted-by":"crossref","first-page":"3350","DOI":"10.1108\/BFJ-02-2019-0081","article-title":"Web scraping for food price research","volume":"121","author":"Hillen","year":"2019","journal-title":"Br. Food J."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b136","first-page":"580","article-title":"A qualitative and quantitative comparison between web scraping and API methods for Twitter credibility analysis","volume":"17","author":"Dongo","year":"2021","journal-title":"Int. J. Web Inf. Syst."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b137","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1080\/17530350.2013.772070","article-title":"Scraping the social? Issues in live social research","volume":"6","author":"Marres","year":"2013","journal-title":"J. Cult. Econ."},{"issue":"00","key":"10.1016\/j.datak.2026.102598_b138","first-page":"1","article-title":"Digital methods in a post-API environment","volume":"00","author":"Perriam","year":"2019","journal-title":"Int. J. Soc. Res. Methodol."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b139","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1093\/phe\/phaa006","article-title":"Scraping the web for public health gains: Ethical considerations from a \u2019big data\u2019 research project on HIV and incarceration","volume":"13","author":"Rennie","year":"2020","journal-title":"Public Health Ethics"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b140","doi-asserted-by":"crossref","first-page":"19","DOI":"10.2753\/JEC1086-4415160402","article-title":"Creating a taxonomy for mobile commerce innovations using social network and cluster analyses","volume":"16","author":"Khansa","year":"2012","journal-title":"Int. J. Electron. Commer."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b141","first-page":"45","article-title":"Assessing innovations in cloud security","volume":"54","author":"Khansa","year":"2014","journal-title":"J. Comput. Inf. Syst."},{"issue":"20","key":"10.1016\/j.datak.2026.102598_b142","doi-asserted-by":"crossref","first-page":"2781","DOI":"10.1002\/sim.6525","article-title":"Publication bias in meta-analyses from the cochrane database of systematic reviews","volume":"34","author":"Kicinski","year":"2015","journal-title":"Stat. Med."},{"key":"10.1016\/j.datak.2026.102598_b143","first-page":"1","article-title":"Seeing the forest for trees : Tools for analyzing faculty research output seeing the forest for trees : Tools for analyzing faculty research output","author":"Frazier","year":"2020","journal-title":"Ser. Rev."},{"issue":"February","key":"10.1016\/j.datak.2026.102598_b144","first-page":"186","article-title":"The use of web-scraping software in searching for grey literature","volume":"11","author":"Haddaway","year":"2016","journal-title":"Grey J."},{"issue":"9\u201310","key":"10.1016\/j.datak.2026.102598_b145","article-title":"Scraping scientific web repositories: Challenges and solutions for automated content extraction","volume":"22","author":"Meschenmoser","year":"2016","journal-title":"D-Lib Mag."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b146","first-page":"1","article-title":"Web scraping scientific repositories for augmented relevant literature search using crisp-dm","volume":"2","author":"El-Din Hassanien","year":"2019","journal-title":"Appl. Syst. Innov."},{"key":"10.1016\/j.datak.2026.102598_b147","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/j.cca.2016.02.019","article-title":"Publication outcome of abstracts presented at the AACC annual meeting","volume":"456","author":"Greene","year":"2016","journal-title":"Clin. Chim. Acta"},{"issue":"10","key":"10.1016\/j.datak.2026.102598_b148","doi-asserted-by":"crossref","first-page":"913","DOI":"10.1080\/10447318.2018.1471570","article-title":"Validity, reliability, and the case for participant-centered research: Reflections on a multi-platform social media study","volume":"34","author":"Jordan","year":"2018","journal-title":"Int. J. Hum.-Comput. Interact."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b149","doi-asserted-by":"crossref","first-page":"678","DOI":"10.1111\/1365-2656.13155","article-title":"Fitness consequences of different migratory strategies in partially migratory populations: A multi-taxa meta-analysis","volume":"89","author":"Buchan","year":"2020","journal-title":"J. Anim. Ecol."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b150","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1002\/leap.1339","article-title":"The relationship and incidence of three editorial notices in PubPeer: Errata, expressions of concern, and retractions","volume":"34","author":"Ortega","year":"2021","journal-title":"Learn. Publ."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b151","doi-asserted-by":"crossref","first-page":"228","DOI":"10.1111\/jems.12399","article-title":"The production economics of economics production","volume":"30","author":"Hu","year":"2021","journal-title":"J. Econ. Manag. Strategy"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b152","doi-asserted-by":"crossref","first-page":"479","DOI":"10.1111\/j.1467-9671.2011.01274.x","article-title":"The development of a web-based demographic data extraction tool for population monitoring","volume":"15","author":"Chow","year":"2011","journal-title":"Trans. GIS"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b153","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1080\/03007766.2015.1061354","article-title":"Talk of heritage: Critical benchmarks and DIY preservationism in progressive rock","volume":"39","author":"Dowd","year":"2016","journal-title":"Popul. Music. Soc."},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b154","doi-asserted-by":"crossref","first-page":"1405","DOI":"10.1080\/13658816.2015.1133820","article-title":"Enabling maps\/location searches on mobile devices: constructing a POI database via focused crawling and information extraction","volume":"30","author":"Chuang","year":"2016","journal-title":"Int. J. Geogr. Inf. Sci."},{"key":"10.1016\/j.datak.2026.102598_b155","article-title":"Natural language processing-based characterization of top-down communication in smart cities for enhancing citizen alignment","volume":"66","author":"Clement","year":"2021","journal-title":"Sustain. Cities Soc."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b156","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1080\/10919392.2020.1776033","article-title":"Institutional isomorphism in organizational cybersecurity: A text analytics approach","volume":"30","author":"Jeyaraj","year":"2020","journal-title":"J. Org. Comput. Electron. Commer."},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b157","doi-asserted-by":"crossref","first-page":"605","DOI":"10.1111\/ropr.12399","article-title":"NEPA and national trends in federal infrastructure siting in the United States","volume":"37","author":"Scott","year":"2020","journal-title":"Rev. Policy Res."},{"issue":"February","key":"10.1016\/j.datak.2026.102598_b158","article-title":"Knowledge acquisition from chemical accident databases using an ontology- based method and natural language processing","volume":"129","author":"Single","year":"2020","journal-title":"Saf. Sci."},{"key":"10.1016\/j.datak.2026.102598_b159","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1016\/j.cageo.2014.10.010","article-title":"CCDST : A free Canadian climate data scraping tool","volume":"75","author":"Bonifacio","year":"2015","journal-title":"Comput. Geosci."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b160","first-page":"247","article-title":"Exploring entrepreneurial legitimacy in reward-based crowdfunding","volume":"16","author":"Frydrych","year":"2014","journal-title":"Ventur. Cap."},{"key":"10.1016\/j.datak.2026.102598_b161","doi-asserted-by":"crossref","first-page":"S449","DOI":"10.1111\/roiw.12141","article-title":"Prices and supply disruptions during natural disasters","volume":"60","author":"Cavallo","year":"2014","journal-title":"Rev. Income Wealth"},{"issue":"10","key":"10.1016\/j.datak.2026.102598_b162","doi-asserted-by":"crossref","first-page":"981","DOI":"10.1080\/09537325.2012.724163","article-title":"Pathways from discovery to commtechnology analysis & strategic management ercialisation : Using web sources to track small and medium-sized enterprise strategies in emerging nanotechnologies","volume":"24","author":"Youtie","year":"2012","journal-title":"Technol. Anal. Strategy Manag."},{"issue":"57","key":"10.1016\/j.datak.2026.102598_b163","doi-asserted-by":"crossref","first-page":"5740","DOI":"10.1080\/00036846.2017.1340575","article-title":"Geographical dispersion of consumer search behaviour","volume":"49","author":"Yilmazkuday","year":"2017","journal-title":"Appl. Econ."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b164","doi-asserted-by":"crossref","first-page":"165","DOI":"10.3233\/sji-150901","article-title":"Web scraping techniques to collect data on consumer electronics and airfares for Italian HICP compilation","volume":"31","author":"Polidoro","year":"2015","journal-title":"Stat. J. IAOS"},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b165","doi-asserted-by":"crossref","first-page":"737","DOI":"10.1111\/rssa.12314","article-title":"Tracking and modelling prices using web-scraped price microdata: Towards automated daily consumer price index forecasting","volume":"181","author":"Powell","year":"2018","journal-title":"J. R. Stat. Soc."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b166","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1111\/rssa.12487","article-title":"A data-driven supply-side approach for estimating cross-border internet purchases within the European union","volume":"183","author":"Meertens","year":"2020","journal-title":"J. R. Stat. Soc. Ser. A"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b167","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1080\/17440572.2019.1569520","article-title":"Size and scope of the tobacco trade on the darkweb","volume":"20","author":"Barrera","year":"2019","journal-title":"Glob. Crime"},{"key":"10.1016\/j.datak.2026.102598_b168","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1016\/j.jretconser.2018.05.002","article-title":"Evaluation of collaborative consumption of food delivery services through web mining techniques","volume":"46","author":"Correa","year":"2019","journal-title":"J. Retail. Consum. Serv."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b169","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1080\/13571516.2020.1838227","article-title":"Competition analysis of the UK intercity coach market: A structural econometric model","volume":"28","author":"Duberga","year":"2021","journal-title":"Int. J. Econ. Bus."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b170","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1002\/agr.21673","article-title":"Online food prices during the COVID-19 pandemic","volume":"37","author":"Hillen","year":"2021","journal-title":"Agribusiness"},{"key":"10.1016\/j.datak.2026.102598_b171","first-page":"1","article-title":"No plant, no problem? Factoryless manufacturing, economic measurement and national manufacturing policies","author":"Coyle","year":"2020","journal-title":"Rev. Int. Political Econ."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b172","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1080\/13504851.2020.1725229","article-title":"The higher price of whiter skin: An analysis of escort services","volume":"28","author":"Campos-Vazquez","year":"2021","journal-title":"Appl. Econ. Lett."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b173","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1080\/08111146.2020.1847067","article-title":"Understanding the share housing sector: A geography of group housing supply in metropolitan sydney","volume":"39","author":"Zhang","year":"2021","journal-title":"Urban Policy Res."},{"issue":"5\u20136","key":"10.1016\/j.datak.2026.102598_b174","doi-asserted-by":"crossref","first-page":"785","DOI":"10.1111\/jbfa.12505","article-title":"Undervaluation and non-financial information: Evidence from voluntary disclosure of CSR news","volume":"48","author":"Benlemlih","year":"2021","journal-title":"J. Bus. Finance Account."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b175","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1177\/0739456X16664789","article-title":"New insights into rental housing markets across the United States: Web scraping and analyzing craigslist rental listings","volume":"37","author":"Boeing","year":"2017","journal-title":"J. Plan. Educ. Res."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b176","doi-asserted-by":"crossref","first-page":"167","DOI":"10.2753\/MIS0742-1222240207","article-title":"A temporary monopolist: Taking advantage of information transparency on the web","volume":"24","author":"Dewan","year":"2007","journal-title":"J. Manage. Inf. Syst."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b177","doi-asserted-by":"crossref","first-page":"547","DOI":"10.1080\/07350015.2015.1110525","article-title":"Distillation of news flow into analysis of stock reactions","volume":"34","author":"Zhang","year":"2016","journal-title":"J. Bus. Econom. Statist."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b178","doi-asserted-by":"crossref","first-page":"417","DOI":"10.1016\/j.jtrangeo.2006.12.002","article-title":"A ticket to ride: Evolving landscapes of air travel accessibility in the United States","volume":"15","author":"Grubesic","year":"2007","journal-title":"J. Transp. Geogr."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b179","first-page":"43","article-title":"A computer aided content analysis of online reviews","volume":"52","author":"Simmons","year":"2011","journal-title":"J. Comput. Inf. Syst."},{"key":"10.1016\/j.datak.2026.102598_b180","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113608","article-title":"An analytical system for evaluating academia units based on metrics provided by academic social network","volume":"159","author":"Wiechetek","year":"2020","journal-title":"Expert Syst. Appl."},{"issue":"July","key":"10.1016\/j.datak.2026.102598_b181","doi-asserted-by":"crossref","first-page":"1852","DOI":"10.1002\/asi.22883","article-title":"Developing metrics to characterize Flickr groups","volume":"64","author":"Liu","year":"2013","journal-title":"J. Am. Soc. Inf. Sci. Technol."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b182","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1080\/08839514.2013.805600","article-title":"Applying semantic technology to business news analysis","volume":"27","author":"Novalija","year":"2013","journal-title":"Appl. Artif. Intell."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b183","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1080\/09505431.2013.846311","article-title":"Images of extreme weather: Symbolising human responses to climate change","volume":"23","author":"Nerlich","year":"2014","journal-title":"Sci. As Cult."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b184","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1080\/01292986.2013.804103","article-title":"Mapping The Kominas\u2019 sociomusical transnation: Punk, diaspora, and digital media","volume":"23","author":"Hsu","year":"2013","journal-title":"Asian J. Commun."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b185","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1080\/01292986.2019.1602916","article-title":"Applying user analytics to uses and effects of social media in China","volume":"29","author":"Zhu","year":"2019","journal-title":"Asian J. Commun."},{"issue":"May","key":"10.1016\/j.datak.2026.102598_b186","article-title":"Scraping and clustering techniques for the characterization of linkedin profiles","author":"Dai","year":"2015","journal-title":"Soc. Inf. Networks"},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b187","doi-asserted-by":"crossref","first-page":"831","DOI":"10.1080\/09644016.2016.1189233","article-title":"Hurricanes and hegemony: A qualitative analysis of micro-level climate change denial discourses","volume":"25","author":"Jacques","year":"2016","journal-title":"Environ. Politics"},{"issue":"July","key":"10.1016\/j.datak.2026.102598_b188","doi-asserted-by":"crossref","first-page":"1852","DOI":"10.1002\/asi.22883","article-title":"Online disclosure of illicit information: Information behaviors in two drug forums","volume":"64","author":"Liu","year":"2013","journal-title":"J. Assoc. Inf. Sci. Technol."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b189","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1080\/12265934.2017.1343155","article-title":"Organizational microblogging for event marketing: A new approach to creative placemaking","volume":"22","author":"Li","year":"2018","journal-title":"Int. J. Urban Sci."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b190","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1080\/12460125.2019.1587133","article-title":"The digital ingredients of donation-based crowdfunding. A data-driven study of Leetchi projects and social campaigns","volume":"27","author":"Sokolova","year":"2018","journal-title":"J. Decis. Syst."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b191","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1080\/10864415.2018.1396110","article-title":"Handling consumer messages on social networking sites: Customer service or privacy infringement?","volume":"22","author":"Demmers","year":"2018","journal-title":"Int. J. Electron. Commer."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b192","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1002\/jee.20299","article-title":"Analysis of social media forums to elicit narratives of graduate engineering student attrition","volume":"109","author":"Berdanier","year":"2020","journal-title":"J. Eng. Educ."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b193","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1080\/17510694.2019.1652025","article-title":"The big bang theory of stardom: The social process of sharing emotional experiences associated online TV actors","volume":"13","author":"Kim","year":"2020","journal-title":"Creative Ind. J."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b194","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/j.smr.2019.01.001","article-title":"Fan response to the identity threat of potential team relocation","volume":"23","author":"Wegner","year":"2020","journal-title":"Sport. Manag. Rev."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b195","doi-asserted-by":"crossref","first-page":"959","DOI":"10.1002\/pits.22244","article-title":"Gone viral: Content characteristics and relative quality of highly shared school psychology-related content on pinterest","volume":"56","author":"Hall","year":"2019","journal-title":"Psychol. Sch."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b196","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1080\/07370024.2019.1685883","article-title":"Technology-facilitated intimate partner abuse: A qualitative analysis of data from online domestic abuse forums","volume":"36","author":"Leitao","year":"2021","journal-title":"Human-Comput. Interact."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b197","first-page":"115","article-title":"Analyzing the depression and suicidal tendencies of people affected by COVID-19\u2019s lockdown using sentiment analysis on social networking websites","volume":"24","author":"Sharma","year":"2021","journal-title":"J. Stat. Manag. Syst."},{"issue":"13","key":"10.1016\/j.datak.2026.102598_b198","doi-asserted-by":"crossref","first-page":"4564","DOI":"10.1002\/cam4.3974","article-title":"Characterizing online crowdfunding campaigns for patients with kidney cancer","volume":"10","author":"Thomas","year":"2021","journal-title":"Cancer Med."},{"issue":"8","key":"10.1016\/j.datak.2026.102598_b199","first-page":"1089","article-title":"Tweeting outside the lines: Normalization and fragmentation as political reporters break from the mainstream","volume":"15","author":"Mour\u00e3o","year":"2020","journal-title":"Journal. Pract."},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b200","doi-asserted-by":"crossref","first-page":"963","DOI":"10.1080\/02650487.2020.1718823","article-title":"You reap where you sow: a trust-based approach to initial seeding for viral advertising","volume":"39","author":"Huh","year":"2020","journal-title":"Int. J. Advert."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b201","doi-asserted-by":"crossref","first-page":"450","DOI":"10.1080\/13032917.2019.1642923","article-title":"\u201cPlease help me die\u201d: applying self-determination theory to understand suicide travel","volume":"30","author":"Yu","year":"2019","journal-title":"Anatolia"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b202","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1111\/weng.12134","article-title":"The grammatical features of English in a Chinese Internet discussion forum","volume":"34","author":"Ai","year":"2015","journal-title":"World Englishes"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b203","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1080\/16522354.2020.1832746","article-title":"Non-journalistic competitors of news media brands on Google and YouTube: From solid competition to a liquid media market","volume":"18","author":"Krebs","year":"2021","journal-title":"J. Media Bus. Stud."},{"key":"10.1016\/j.datak.2026.102598_b204","series-title":"Proceedings of the Association for Information Science and Technology","first-page":"499","article-title":"An analysis on charitable crowdfunding in an authoritarian regime","volume":"Vol. 56","author":"Yang","year":"2019"},{"issue":"14","key":"10.1016\/j.datak.2026.102598_b205","doi-asserted-by":"crossref","first-page":"2044","DOI":"10.1080\/13683500.2020.1806791","article-title":"The influence of foreigners\u2019 buzzing on TripAdvisor ranking of restaurants in Venice: implications for the sustainability of over-touristed heritage cities","volume":"24","author":"Ganzaroli","year":"2021","journal-title":"Curr. Issues Tour."},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b206","doi-asserted-by":"crossref","first-page":"892","DOI":"10.1080\/13683500.2020.1843607","article-title":"Twitter public sentiment dynamics on cruise tourism during the COVID-19 pandemic","volume":"24","author":"Lu","year":"2021","journal-title":"Curr. Issues Tour."},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b207","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1080\/13683500.2020.1735318","article-title":"Exploring the influence of culture on tourist experiences with robots in service delivery environment","volume":"24","author":"Choi","year":"2021","journal-title":"Curr. Issues Tour."},{"key":"10.1016\/j.datak.2026.102598_b208","first-page":"1","article-title":"The influence of cultural origins of visitors when staying in the city that never sleeps","author":"Moro","year":"2020","journal-title":"Tour. Recreat. Res."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b209","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1080\/15022250.2020.1775116","article-title":"Peer-to-peer accommodation in destination life cycle: the case of Nordic countries","volume":"20","author":"Adamiak","year":"2020","journal-title":"Scand. J. Hosp. Tour."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b210","doi-asserted-by":"crossref","first-page":"365","DOI":"10.1080\/15528014.2017.1337390","article-title":"Ethnic foodscapes: Foreign cuisines in the United States","volume":"20","author":"Park","year":"2017","journal-title":"Food Cult. Soc."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b211","first-page":"242","article-title":"The sweet spot in the eye of the beholder? Exploring the sweet sour spots of Asian restaurant menus","volume":"30","author":"Kuo","year":"2020","journal-title":"J. Hosp. Mark. Manag."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b212","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1080\/1528008X.2020.1848747","article-title":"Understanding tourists\u2019 experiences at local markets in Phuket: An analysis of TripAdvisor reviews","volume":"23","author":"Sangkaew","year":"2020","journal-title":"J. Qual. Assur. Hosp. Tour."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b213","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1080\/13504851.2020.1725231","article-title":"What effect the demand for homestays: evidence from Airbnb in China","volume":"28","author":"Jiang","year":"2021","journal-title":"Appl. Econ. Lett."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b214","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1080\/15230406.2016.1139467","article-title":"Design and evaluation of a geovisual analytics system for uncovering patterns in spatio-temporal event data","volume":"44","author":"Robinson","year":"2017","journal-title":"Cartogr. Geogr. Inf. Sci."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b215","doi-asserted-by":"crossref","first-page":"579","DOI":"10.1111\/lsq.12169","article-title":"Men idle, women network: How networks help female legislators succeed","volume":"42","author":"Wojcik","year":"2017","journal-title":"Legis. Stud. Q."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b216","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1080\/08920753.2017.1303694","article-title":"Shaping the coast with permits: Making the state regulatory permitting process transparent with text mining","volume":"45","author":"Hui","year":"2017","journal-title":"Coast. Manag."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b217","doi-asserted-by":"crossref","first-page":"977","DOI":"10.1111\/ssqu.12476","article-title":"Gun talk online: Canadian tools, American values","volume":"99","author":"McLean","year":"2018","journal-title":"Soc. Sci. Q."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b218","doi-asserted-by":"crossref","first-page":"926","DOI":"10.1111\/padm.12602","article-title":"How policy agendas change when autocracies liberalize: The case of Hong Kong, 1975\u20132016","volume":"97","author":"Or","year":"2019","journal-title":"Public Adm."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b219","doi-asserted-by":"crossref","first-page":"797","DOI":"10.1111\/padm.12594","article-title":"Regulating lobbying through voluntary transparency clubs: The connoisseurs\u2019 assessment. Evidence from the European Union","volume":"97","author":"Bunea","year":"2019","journal-title":"Public Adm."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b220","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1002\/polq.12957","article-title":"Beyond likely voters: An event analysis of conservative political outreach","volume":"134","author":"Bautista-Chavez","year":"2019","journal-title":"Political Sci. Q."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b221","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1080\/09644008.2019.1635120","article-title":"Do minority cabinets govern more flexibly and inclusively? Evidence from Germany","volume":"28","author":"Ganghof","year":"2019","journal-title":"Ger. Politics"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b222","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1080\/01402382.2019.1603849","article-title":"Is politics under increasing corporate sway? A longitudinal study on the drivers of corporate access","volume":"43","author":"Aizenberg","year":"2020","journal-title":"West Eur. Politics"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b223","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1080\/2474736X.2019.1605834","article-title":"Simulating pluralism: the language of democracy in hegemonic authoritarianism","volume":"1","author":"Maerz","year":"2019","journal-title":"Political Res. Exch."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b224","doi-asserted-by":"crossref","first-page":"1419","DOI":"10.1111\/rego.12313","article-title":"Does business influence government regulations? New evidence from Canadian impact assessments","volume":"15","author":"Beaulieu-Guay","year":"2021","journal-title":"Regul. Gov."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b225","doi-asserted-by":"crossref","first-page":"426","DOI":"10.1080\/13510347.2020.1714595","article-title":"Scraping the demos. Digitalization, web scraping and the democratic project","volume":"27","author":"Ulbricht","year":"2020","journal-title":"Democratization"},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b226","doi-asserted-by":"crossref","first-page":"764","DOI":"10.1080\/10410236.2018.1434737","article-title":"Emotional self-disclosure in online breast cancer support groups: Examining theme, reciprocity, and linguistic style matching","volume":"34","author":"Malloch","year":"2019","journal-title":"Health Commun."},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b227","doi-asserted-by":"crossref","first-page":"394","DOI":"10.1080\/10810730.2020.1776423","article-title":"Algorithms and health misinformation: A case study of vaccine books on Amazon","volume":"25","author":"Shin","year":"2020","journal-title":"J. Health Commun."},{"issue":"8","key":"10.1016\/j.datak.2026.102598_b228","doi-asserted-by":"crossref","first-page":"1001","DOI":"10.1080\/10410236.2020.1731776","article-title":"From network positions to language use: Understanding the effects of brokerage and closure structures from a linguistic perspective","volume":"36","author":"Chen","year":"2021","journal-title":"Health Commun."},{"issue":"9","key":"10.1016\/j.datak.2026.102598_b229","doi-asserted-by":"crossref","first-page":"1166","DOI":"10.1080\/10410236.2017.1339371","article-title":"\u201cYou see yourself like in a mirror\u201d: The effects of internet-mediated personal networks on body image and eating disorders","volume":"33","author":"Pallotti","year":"2018","journal-title":"Health Commun."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b230","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1080\/20479700.2019.1640971","article-title":"How wired are U.S. hospitals? A study of patient-oriented interactive tools","volume":"13","author":"Huang","year":"2020","journal-title":"Int. J. Healthc. Manag."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b231","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1111\/sltb.12569","article-title":"Using topic modeling to detect and describe self-injurious and related content on a large-scale digital platform","volume":"50","author":"Franz","year":"2020","journal-title":"Suicide Life-Threat. Behav."},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b232","doi-asserted-by":"crossref","first-page":"510","DOI":"10.1080\/13504851.2015.1083936","article-title":"Aging faster in office? the effect of extended service in political office on longevity","volume":"23","author":"Deuchert","year":"2016","journal-title":"Appl. Econ. Lett."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b233","doi-asserted-by":"crossref","first-page":"194","DOI":"10.1080\/07421222.2018.1550547","article-title":"Fostering participant health knowledge and attitudes: An econometric study of a chronic disease-focused online health community","volume":"36","author":"Chen","year":"2019","journal-title":"J. Manage. Inf. Syst."},{"key":"10.1016\/j.datak.2026.102598_b234","doi-asserted-by":"crossref","first-page":"36","DOI":"10.1016\/j.puhe.2020.07.029","article-title":"Retail violations of sales to minors on e-cigarettes and cigars","volume":"187","author":"Dai","year":"2020","journal-title":"Public Health"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b235","first-page":"435","article-title":"A primer on theory-driven web scraping: Automatic extraction of big data from the internet for use in psychological research","volume":"18","author":"Landers","year":"2013","journal-title":"Psychol. Methods"},{"issue":"10","key":"10.1016\/j.datak.2026.102598_b236","doi-asserted-by":"crossref","first-page":"1915","DOI":"10.1002\/ejp.1639","article-title":"Individuals with back and neck pain on medical forums: What do they mention? What do they fear?","volume":"24","author":"Mintz","year":"2020","journal-title":"Eur. J. Pain"},{"issue":"9","key":"10.1016\/j.datak.2026.102598_b237","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1002\/pds.5301","article-title":"A review of the sampling methodology used in studies evaluating the effectiveness of risk minimisation measures in Europe","volume":"30","author":"Jouaville","year":"2021","journal-title":"Pharmacoepidemiol. Drug Safety"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b238","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1080\/20964471.2020.1844934","article-title":"A spatiotemporal data collection of viral cases for COVID-19 rapid response","volume":"5","author":"Sha","year":"2021","journal-title":"Big Earth Data"},{"key":"10.1016\/j.datak.2026.102598_b239","first-page":"1","article-title":"Web scraping in the statistics and data science curriculum : Challenges and opportunities","author":"Dogucu","year":"2020","journal-title":"J. Stat. Educ."},{"issue":"February","key":"10.1016\/j.datak.2026.102598_b240","article-title":"Modern campuses , local connections and unconventional symbols : Promotional practises in the Canadian community college sector","volume":"3883","author":"Milian","year":"2017","journal-title":"Tert. Educ. Manag."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b241","first-page":"685","article-title":"Gather-narrow-extract: A framework for studying local policy variation using web-scraping and natural language processing","volume":"12","author":"Anglin","year":"2019","journal-title":"J. Res. Educ. Eff."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b242","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1080\/0309877X.2016.1206859","article-title":"Symbolic resources and marketing strategies in Ontario higher education: a comparative analysis","volume":"42","author":"Pizarro Milian","year":"2018","journal-title":"J. Furth. High. Educ."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b243","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1080\/09332480.2016.1181966","article-title":"Taking a chance in the classroom: La Quinta is Spanish for \u201cNext to Denny\u2019s\u201d","volume":"29","author":"Rundel","year":"2016","journal-title":"Chance"},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b244","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1080\/09585176.2016.1261719","article-title":"Validating curriculum development using text mining","volume":"28","author":"West","year":"2017","journal-title":"Curric. J."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b245","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1111\/tgis.12301","article-title":"Exploring the anatomy of geographic information systems and technology (GIS&T) textbooks","volume":"22","author":"Frazier","year":"2018","journal-title":"Trans. GIS"},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b246","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1080\/08923647.2019.1610285","article-title":"The structure of the MOOC ecosystem as revealed by course aggregators","volume":"33","author":"Cisel","year":"2019","journal-title":"Am. J. Distance Educ."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b247","first-page":"11","article-title":"On the necessity of multiple university rankings","volume":"13","author":"Angelis","year":"2019","journal-title":"COLLNET J. Sci. Inf. Manag."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b248","doi-asserted-by":"crossref","first-page":"364","DOI":"10.3109\/10826084.2013.841243","article-title":"Are \u201dlegal highs\u201d users satisfied evidence from online customer comments","volume":"49","author":"Bruneel","year":"2014","journal-title":"Subst. Use Misuse"},{"issue":"8","key":"10.1016\/j.datak.2026.102598_b249","doi-asserted-by":"crossref","first-page":"1023","DOI":"10.1080\/01639625.2017.1395669","article-title":"The evolution of the tattoo in defiance of the immutable definition of deviance: current perceptions by law enforcement of tattooed arrestees","volume":"39","author":"Camacho","year":"2018","journal-title":"Deviant Behav."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b250","doi-asserted-by":"crossref","first-page":"704","DOI":"10.1111\/tgis.12630","article-title":"Analyzing relationship between user-generated content and local visual information with augmented reality-based location-based social networks","volume":"24","author":"Liu","year":"2020","journal-title":"Trans. GIS"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b251","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1080\/14789949.2017.1291707","article-title":"Linguistic analysis of chat transcripts from child predator undercover sex stings","volume":"28","author":"Drouin","year":"2017","journal-title":"J. Forensic Psychiatry Psychol."},{"issue":"21","key":"10.1016\/j.datak.2026.102598_b252","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/cpe.4783","article-title":"Evaluating deep learning models for sentiment classification","volume":"30","author":"Ay Karakus","year":"2018","journal-title":"Concurr. Comput.: Pract. Exp."},{"issue":"7","key":"10.1016\/j.datak.2026.102598_b253","doi-asserted-by":"crossref","first-page":"3254","DOI":"10.1111\/gcb.14064","article-title":"Global spread of helminth parasites at the human\u2013domestic animal\u2013wildlife interface","volume":"24","author":"Wells","year":"2018","journal-title":"Global Change Biol."},{"issue":"6","key":"10.1016\/j.datak.2026.102598_b254","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/conl.12601","article-title":"Missing, delayed, and old: The status of ESA recovery plans","volume":"11","author":"Malcom","year":"2018","journal-title":"Conserv. Lett."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b255","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1080\/19312458.2018.1555798","article-title":"Overcoming language barriers: Assessing the potential of machine translation and topic modeling for the comparative analysis of multilingual text Corpora","volume":"13","author":"Reber","year":"2019","journal-title":"Commun. Methods Meas."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b256","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1002\/sam.11372","article-title":"Skills in demand for ICT and statistical occupations: Evidence from web-based job vacancies","volume":"11","author":"Lovaglio","year":"2018","journal-title":"Stat. Anal. Data Min."},{"issue":"10","key":"10.1016\/j.datak.2026.102598_b257","doi-asserted-by":"crossref","first-page":"1977","DOI":"10.1080\/13658816.2018.1470633","article-title":"Spatial mining of migration patterns from web demographics","volume":"32","author":"Chow","year":"2018","journal-title":"Int. J. Geogr. Inf. Sci."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b258","doi-asserted-by":"crossref","first-page":"433","DOI":"10.1111\/jems.12258","article-title":"Unpacking 3GPP standards","volume":"27","author":"Baron","year":"2018","journal-title":"J. Econ. Manag. Strat."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b259","first-page":"5","article-title":"Web scraping and mapping urban data to support urban design decisions","volume":"15","author":"Ensari","year":"2018","journal-title":"A\/Z ITU J. Fac. Archit."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b260","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1080\/19322909.2018.1534635","article-title":"Computer programming for librarians: A study of job postings for library technologists","volume":"13","author":"Gonzales","year":"2019","journal-title":"J. Web Libr."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b261","doi-asserted-by":"crossref","first-page":"494","DOI":"10.1111\/jssr.12599","article-title":"Digital irreligion: Christian deconversion in an online community","volume":"58","author":"Starr","year":"2019","journal-title":"J. Sci. Study Relig."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b262","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1080\/08832323.2018.1520685","article-title":"An investigation of skill requirements for business and data analytics positions: A content analysis of job advertisements","volume":"94","author":"Verma","year":"2019","journal-title":"J. Educ. Bus."},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b263","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1080\/10572252.2019.1634766","article-title":"Peering into the internet abyss: Using big data audience analysis to understand online comments","volume":"29","author":"Gallagher","year":"2020","journal-title":"Tech. Commun. Q."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b264","doi-asserted-by":"crossref","first-page":"390","DOI":"10.1080\/02640414.2019.1702776","article-title":"Searching for momentum in NBA triplets of free throws","volume":"38","author":"Morgulev","year":"2020","journal-title":"J. Sports Sci."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b265","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/nem.2143","article-title":"Detecting and analyzing border gateway protocol blackholing activity","volume":"31","author":"Farasat","year":"2021","journal-title":"Int. J. Netw. Manage."},{"issue":"11","key":"10.1016\/j.datak.2026.102598_b266","doi-asserted-by":"crossref","first-page":"1559","DOI":"10.1080\/17461391.2020.1838621","article-title":"Effect of marathon characteristics and runners\u2019 time category on pacing profile","volume":"21","author":"Oficial-Casado","year":"2020","journal-title":"Eur. J. Sport. Sci."},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b267","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/ajp.23213","article-title":"Open-access platform to synthesize knowledge of ape conservation across sites","volume":"83","author":"Heinicke","year":"2021","journal-title":"Am. J. Primatol."},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b268","doi-asserted-by":"crossref","first-page":"1110","DOI":"10.1111\/poms.13294","article-title":"Cracking sex trafficking: Data analysis, pattern recognition, and path prediction","volume":"30","author":"Keskin","year":"2021","journal-title":"Prod. Oper. Manage."},{"key":"10.1016\/j.datak.2026.102598_b269","doi-asserted-by":"crossref","DOI":"10.1016\/j.dib.2020.106178","article-title":"COVID-19: A scholarly production dataset report for research analysis","volume":"32","author":"Santos","year":"2020","journal-title":"Data Brief"},{"key":"10.1016\/j.datak.2026.102598_b270","series-title":"Python Web Scraping","author":"Jarmul","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b271","series-title":"PyMed-PubMed access through Python","author":"Wobben","year":"2020"},{"key":"10.1016\/j.datak.2026.102598_b272","doi-asserted-by":"crossref","DOI":"10.1016\/j.softx.2019.100263","article-title":"Pybliometrics: Scriptable bibliometrics using a python interface to scopus","volume":"10","author":"Rose","year":"2019","journal-title":"SoftwareX"},{"key":"10.1016\/j.datak.2026.102598_b273","series-title":"Web content extractor","author":"Newprosoft","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b274","series-title":"Cochrane scraper","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b275","series-title":"E-databases","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b276","series-title":"Datacrypto: The dark net crawler and scraper. Software program","author":"Decary-Hetu","year":"2013"},{"key":"10.1016\/j.datak.2026.102598_b277","series-title":"European Conference on Quality in Official Statistics","first-page":"1","article-title":"Use of web scraping and text mining techniques in the istat survey on \u201dinformation and communication technology in enterprises\u201d","author":"Barcaroli","year":"2014"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b278","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1257\/jep.30.2.151","article-title":"The billion prices project: Using online prices for measurement and research","volume":"30","author":"Cavallo","year":"2016","journal-title":"J. Econ. Perspect."},{"key":"10.1016\/j.datak.2026.102598_b279","series-title":"3rd International Conference on New Media","article-title":"The use of web scraping in computer parts and assembly price comparison","author":"Julian","year":"2015"},{"key":"10.1016\/j.datak.2026.102598_b280","series-title":"Scraperwiki","year":"2021"},{"key":"10.1016\/j.datak.2026.102598_b281","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1016\/B978-0-12-397186-9.00007-8","article-title":"Social platforms","author":"Krishnan","year":"2015","journal-title":"Soc. Data Anal."},{"key":"10.1016\/j.datak.2026.102598_b282","series-title":"Airbnb website scraper(version 3.4)","author":"Slee","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b283","series-title":"Beautiful soup documentation","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b284","doi-asserted-by":"crossref","unstructured":"M.R. Mufid, A. Basofi, Risk Diagnosis and Mitigation System of COVID-19 Using Expert System and Web Scraping, in: International Electronics Symposium, IES, 2020, pp. 577\u2013583.","DOI":"10.1109\/IES50839.2020.9231619"},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b285","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1002\/pds.4381","article-title":"Strengthening standards, transparency, and collaboration to support medicine evaluation: Ten years of the European network of centres for pharmacoepidemiology and pharmacovigilance (ENCePP)","volume":"27","author":"Kurz","year":"2018","journal-title":"Pharmacoepidemiol. Drug Safety"},{"key":"10.1016\/j.datak.2026.102598_b286","series-title":"Data miner","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b287","series-title":"Tika in Action","first-page":"257","article-title":"Tika in action","author":"Mattmann","year":"2011"},{"key":"10.1016\/j.datak.2026.102598_b288","unstructured":"S. Bache, H. Wickham, magrittr: A forward-Pipe operator for R. https:\/\/cran.r-project.org\/web\/packages\/magrittr\/magrittr.pdf."},{"key":"10.1016\/j.datak.2026.102598_b289","series-title":"SelectorGadget: Point and select CSS selectors","author":"Cantino","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b290","unstructured":"RapidMiner Studio."},{"key":"10.1016\/j.datak.2026.102598_b291","series-title":"IEEE Frontiers in Education Conference","article-title":"Utilizing web scraping and natural language processing to better inform pedagogical practice","author":"Lunn","year":"2020"},{"issue":"8","key":"10.1016\/j.datak.2026.102598_b292","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3390\/sym13081550","article-title":"Web-browsing application using web scraping technology in korean network separation application","volume":"13","author":"Jung","year":"2021","journal-title":"Symmetry"},{"issue":"5","key":"10.1016\/j.datak.2026.102598_b293","doi-asserted-by":"crossref","first-page":"2795","DOI":"10.1111\/tbed.14133","article-title":"Web-scraping applied to acquire difficult to access animal disease outbreak information, using African Swine Fever in Europe as an example","volume":"68","author":"Leray","year":"2021","journal-title":"Transbound. Emerg. Dis."},{"key":"10.1016\/j.datak.2026.102598_b294","doi-asserted-by":"crossref","first-page":"32498","DOI":"10.1109\/ACCESS.2021.3060623","article-title":"T-CrEO: A twitter credibility analysis framework","volume":"9","author":"Cardinale","year":"2021","journal-title":"IEEE Access"},{"issue":"2","key":"10.1016\/j.datak.2026.102598_b295","doi-asserted-by":"crossref","first-page":"1481","DOI":"10.1007\/s11277-021-08093-z","article-title":"Firefly optimization algorithm based web scraping for web citation extraction","volume":"118","author":"Suganya","year":"2021","journal-title":"Wirel. Pers. Commun."},{"key":"10.1016\/j.datak.2026.102598_b296","doi-asserted-by":"crossref","first-page":"84783","DOI":"10.1109\/ACCESS.2021.3085682","article-title":"COVID-scraper: An open-source toolset for automatically scraping and processing global multi-scale spatiotemporal COVID-19 records","volume":"9","author":"Lan","year":"2021","journal-title":"IEEE Access"},{"issue":"Section 4","key":"10.1016\/j.datak.2026.102598_b297","first-page":"49","article-title":"Towards end-user web scraping for customization","author":"Katongo","year":"2021","journal-title":"ACM Int. Conf. Proceeding Ser."},{"key":"10.1016\/j.datak.2026.102598_b298","series-title":"International Conference on Artificial Intelligence and Soft Computing","first-page":"435","article-title":"FastText and XGBoost content-based classification for employment web scraping","author":"Freire","year":"2020"},{"issue":"509","key":"10.1016\/j.datak.2026.102598_b299","first-page":"49","article-title":"Comparing price indices of clothing and footwear for scanner data and web scraped data","volume":"2019","author":"Chessa","year":"2019","journal-title":"Econ. et Stat."},{"key":"10.1016\/j.datak.2026.102598_b300","doi-asserted-by":"crossref","first-page":"105627","DOI":"10.1109\/ACCESS.2019.2932197","article-title":"A semi-automatic data-scraping method for the public transport domain","volume":"7","author":"Vela","year":"2019","journal-title":"IEEE Access"},{"issue":"11","key":"10.1016\/j.datak.2026.102598_b301","doi-asserted-by":"crossref","first-page":"3383","DOI":"10.1108\/BFJ-07-2019-0522","article-title":"Increasing online shop revenues with web scraping: A case study for the wine sector","volume":"122","author":"Jorge","year":"2020","journal-title":"Br. Food J."},{"key":"10.1016\/j.datak.2026.102598_b302","series-title":"International Conference on Information and Communications Technology (ICOIACT) Discovering","first-page":"554","article-title":"Discovering Indonesian digital workers in online gig economy platforms","author":"Labib Fardany Faisal","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b303","series-title":"31st Annual ACM Symposium on User Interface Software and Technology","first-page":"963","article-title":"Rousillon: Scraping distributed hierarchical web data","author":"Chasins","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b304","series-title":"Encyclopedia of Big Data","first-page":"1","article-title":"Web scraping","author":"Zhao","year":"2020"},{"key":"10.1016\/j.datak.2026.102598_b305","first-page":"1","article-title":"Internet \u2018data scraping\u2019 - A primer for counseling clients","author":"Dreyer","year":"2013","journal-title":"N. Y. Law J."},{"key":"10.1016\/j.datak.2026.102598_b306","unstructured":"V. Krotov, L. Silva, Legality and Ethics of Web Scraping, in: Twenty-Fourth Americas Conference on Information Systems, 2018."},{"key":"10.1016\/j.datak.2026.102598_b307","series-title":"How binding is your browserwrap agreement?","author":"Toto","year":"2016"},{"issue":"4","key":"10.1016\/j.datak.2026.102598_b308","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1080\/10584609.2018.1477506","article-title":"Computational research in the post-API age","volume":"35","author":"Freelon","year":"2018","journal-title":"Political Commun."},{"issue":"3","key":"10.1016\/j.datak.2026.102598_b309","article-title":"What you can scrape and what is right to scrape: A proposal for a tool to collect public facebook data","volume":"6","author":"Mancosu","year":"2020","journal-title":"Soc. Media Soc."},{"issue":"10","key":"10.1016\/j.datak.2026.102598_b310","doi-asserted-by":"crossref","first-page":"1610","DOI":"10.1177\/1049732318776625","article-title":"Online communication settings and the qualitative research process: Acclimating students and novice researchers","volume":"28","author":"Gregory","year":"2018","journal-title":"Qual. Health Res."},{"key":"10.1016\/j.datak.2026.102598_b311","series-title":"Internet research ethics","author":"Buchanan","year":"2021"},{"issue":"3\u20134","key":"10.1016\/j.datak.2026.102598_b312","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1177\/1747016116650720","article-title":"Ethical challenges in online research: Public\/private perceptions","volume":"13","author":"Sugiura","year":"2017","journal-title":"Res. Ethics"},{"issue":"1","key":"10.1016\/j.datak.2026.102598_b313","article-title":"Abusing the computer fraud and abuse act: Why broad interpretations of the CFAA fail","volume":"36","author":"Jensen","year":"2013","journal-title":"Hamline Law Rev."},{"issue":"11","key":"10.1016\/j.datak.2026.102598_b314","doi-asserted-by":"crossref","first-page":"1567","DOI":"10.1080\/1369118X.2019.1627386","article-title":"Overcoming terms of service: A proposal for ethical distributed research","volume":"22","author":"Halavais","year":"2019","journal-title":"Inf. Commun. Soc."},{"key":"10.1016\/j.datak.2026.102598_b315","series-title":"Web scraping and GDPR","author":"Rubio","year":"2020"},{"key":"10.1016\/j.datak.2026.102598_b316","series-title":"Computer Misuse Act, 1990","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b317","series-title":"Legality of data scraping in India","year":"2022"},{"key":"10.1016\/j.datak.2026.102598_b318","first-page":"2","article-title":"Electronic commerce & law report","author":"Commerce","year":"2010","journal-title":"N. Y."},{"key":"10.1016\/j.datak.2026.102598_b319","series-title":"After 10 years, google books is legal","author":"Meyer","year":"2015"},{"key":"10.1016\/j.datak.2026.102598_b320","series-title":"Craigslist Inc. v. 3Taps Inc","author":"Goldman","year":"2013"},{"key":"10.1016\/j.datak.2026.102598_b321","series-title":"Database Right - 77m Ltd. v Ordnance Survey Ltd","author":"Lambert","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b322","series-title":"East Book Company v D.B. Modak","year":"2024"},{"key":"10.1016\/j.datak.2026.102598_b323","unstructured":"HC restrains UK portal Padawan from \u2019lifting\u2019 OLX contents, material. https:\/\/www.dnaindia.com\/business\/report-hc-restrains-uk-portal-padawan-from-lifting-olx-contents-material-2201294."},{"key":"10.1016\/j.datak.2026.102598_b324","series-title":"The Computer Fraud and Abuse Act: Revenue protection weapon for airlines","author":"Nankin","year":"2010"},{"key":"10.1016\/j.datak.2026.102598_b325","series-title":"Facebook v. Power ventures","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b326","series-title":"Victory! ruling in hiQ v. Linkedin protects scraping of public data","author":"Fischer","year":"2019"},{"key":"10.1016\/j.datak.2026.102598_b327","series-title":"D.C. Court: Accessing public information is not a computer crime","author":"Williams","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b328","series-title":"Sandvig v. Session","year":"2018"},{"key":"10.1016\/j.datak.2026.102598_b329","series-title":"Ending data scraping dispute, craigslist reaches $31M settlement with instamotor","author":"Newburger","year":"2017"},{"key":"10.1016\/j.datak.2026.102598_b330","series-title":"Ryanair in another victory against the screenscrapers","author":"Ghrada","year":"2015"},{"key":"10.1016\/j.datak.2026.102598_b331","series-title":"International Conference on Informatics in Secondary Schools - Evolution and Perspectives: The Bridge Between using and Understanding Computers","first-page":"159","article-title":"Algorithmic thinking: The key for understanding computer science","author":"Futschek","year":"2006"},{"key":"10.1016\/j.datak.2026.102598_b332","doi-asserted-by":"crossref","DOI":"10.1109\/ACCESS.2020.3039044","article-title":"Automatically discovering relevant images from web pages","volume":"8","author":"Uzun","year":"2020","journal-title":"IEEE Access"}],"container-title":["Data &amp; Knowledge Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0169023X26000455?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0169023X26000455?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T00:49:09Z","timestamp":1781225349000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0169023X26000455"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":332,"alternative-id":["S0169023X26000455"],"URL":"https:\/\/doi.org\/10.1016\/j.datak.2026.102598","relation":{},"ISSN":["0169-023X"],"issn-type":[{"value":"0169-023X","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A systematic review of web scraping: Techniques, LLM-enhanced approaches, performance metrics, and legal\u2013ethical issues","name":"articletitle","label":"Article Title"},{"value":"Data & Knowledge Engineering","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.datak.2026.102598","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"102598"}}