{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T05:55:35Z","timestamp":1774936535106,"version":"3.50.1"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/bigdata47090.2019.9005594","type":"proceedings-article","created":{"date-parts":[[2020,2,25]],"date-time":"2020-02-25T06:05:34Z","timestamp":1582610734000},"page":"6040-6042","source":"Crossref","is-referenced-by-count":87,"title":["Web Scraping: State-of-the-Art and Areas of Application"],"prefix":"10.1109","author":[{"given":"Rabiyatou","family":"Diouf","sequence":"first","affiliation":[]},{"given":"Edouard Ngor","family":"Sarr","sequence":"additional","affiliation":[]},{"given":"Ousmane","family":"Sall","sequence":"additional","affiliation":[]},{"given":"Babiga","family":"Birregah","sequence":"additional","affiliation":[]},{"given":"Mamadou","family":"Bousso","sequence":"additional","affiliation":[]},{"given":"Seny Ndiaye","family":"Mbaye","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1177\/0739456X16664789"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1080\/17530350.2013.772070"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/223904.223931"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/223904.223929"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/192844.192905"},{"key":"ref30","author":"rich","year":"1998","journal-title":"User modeling via stereotypes"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s13278-017-0442-5"},{"key":"ref36","first-page":"186","article-title":"The use of web-scraping software in searching for grey literature","volume":"11","author":"haddaway","year":"2015","journal-title":"The Journal of Grey"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/245108.245122"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/MIC.2003.1167344"},{"key":"ref28","author":"johnson","year":"2006","journal-title":"Design & Implementation of a Pipeline for High-throughput Enzyme Function Prediction"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IWMC.1994.601243"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2005.99"},{"key":"ref2","author":"rouby","year":"0","journal-title":"Scraping & Crawling"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SNAMS.2018.8554421"},{"key":"ref20","author":"johnson","year":"2006","journal-title":"Design & Implementation of a Pipeline for High-throughput Enzyme Function Prediction"},{"key":"ref22","author":"boag","year":"2002","journal-title":"XQuery 1 0 An XML Query Language"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbt026"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/SNAMS.2018.8554421"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/775181.775182"},{"key":"ref26","first-page":"44","volume":"2","author":"vargiu","year":"2013","journal-title":"Exploiting web scraping in a collaborative filtering-based approach to web advertising Artif Intell Research"},{"key":"ref25","author":"sirisuriya","year":"2015","journal-title":"A Comparative Study on Web Scraping"},{"key":"ref50","year":"2019"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1002\/pmic.200700865"},{"key":"ref53","author":"williams","year":"2008","journal-title":"Using Text-Mining and Crowdsourced Curation to Build a Structure Centric Community for Chemists"},{"key":"ref52","doi-asserted-by":"crossref","first-page":"668d","DOI":"10.1093\/nar\/gkj067","article-title":"DrugBank: a comprehensive resource for in silico drug discovery and exploration","volume":"34","author":"ds","year":"2006","journal-title":"Nucleic Acids Res"},{"key":"ref10","author":"hanretty","year":"2013","journal-title":"Scraping the web for arts and humanities"},{"key":"ref11","author":"manning","year":"1999","journal-title":"Foundations of Statistical Natural Language Processing"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICTACC.2017.43"},{"key":"ref12","year":"2017","journal-title":"Lucas Ou-Yang"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1016\/j.jbi.2010.10.003","article-title":"The Biomedical Resource Ontology (BRO) to enable resource discovery in clinical and translational research","volume":"44","author":"jd","year":"2011","journal-title":"J Biomed Inform"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.imu.2018.01.003"},{"key":"ref15","first-page":"270","article-title":"Distributed systems and automated biodiversity informatics: genomic analysis and geographic visualization of disease evolution","author":"aw","year":"2008","journal-title":"Knowl InfSyst"},{"key":"ref16","year":"2019","journal-title":"Readability"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbt026"},{"key":"ref18","year":"2019"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1186\/1755-8794-3-50","article-title":"Genotator: a disease-agnostic tool for genetic annotation of disease","volume":"3","author":"dp","year":"2010","journal-title":"BMC Med Genomics"},{"key":"ref4","first-page":"186","volume":"11","author":"haddaway","year":"2015","journal-title":"The use of web-scraping software in searching for grey literature"},{"key":"ref3","article-title":"Un mod&#x00E8;le pour la recherche d&#x2019;information sur des documents structur&#x00E9;s","author":"piwowarski","year":"2002","journal-title":"Proceedings of the 6emes journ&#x00E9;es Internationales d'Analyse Statistique des Donnees Textuelles (JADT2002)"},{"key":"ref6","year":"2018","journal-title":"Import io web page"},{"key":"ref5","year":"2018","journal-title":"Webhose io web page"},{"key":"ref8","author":"schrenk","year":"2007","journal-title":"Webbots Spiders and Screen Scrapers"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-55354-2_5"},{"key":"ref49","year":"2019"},{"key":"ref9","author":"chakrabarti","year":"2003","journal-title":"Mining the Web"},{"key":"ref46","year":"2019"},{"key":"ref45","year":"2019"},{"key":"ref48","year":"2019"},{"key":"ref47","year":"2019"},{"key":"ref42","year":"2019"},{"key":"ref41","year":"2019"},{"key":"ref44","year":"2019"},{"key":"ref43","year":"2019"}],"event":{"name":"2019 IEEE International Conference on Big Data (Big Data)","location":"Los Angeles, CA, USA","start":{"date-parts":[[2019,12,9]]},"end":{"date-parts":[[2019,12,12]]}},"container-title":["2019 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8986695\/9005444\/09005594.pdf?arnumber=9005594","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:47:21Z","timestamp":1658094441000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9005594\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/bigdata47090.2019.9005594","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}