{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T07:40:10Z","timestamp":1751096410381,"version":"3.41.0"},"reference-count":84,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3532767","type":"journal-article","created":{"date-parts":[[2025,1,22]],"date-time":"2025-01-22T18:57:45Z","timestamp":1737572265000},"page":"107961-107978","source":"Crossref","is-referenced-by-count":0,"title":["Toward Automatic Dataset Discovery From Scientific Publications"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0848-632X","authenticated-orcid":false,"given":"Sandeep","family":"Kumar","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tirthankar","family":"Ghosal","sequence":"additional","affiliation":[{"name":"National Center for Computational Sciences, Oak Ridge National Laboratory, Oak Ridge, TN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3612-8834","authenticated-orcid":false,"given":"Asif","family":"Ekbal","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1016\/j.ijpe.2014.04.018"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1073\/pnas.2402802121"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.29085\/9781856048910","volume-title":"Why Manage Res. Data","author":"Pryor","year":"2012"},{"key":"ref4","article-title":"Data engineering for everyone","author":"Janapa Reddi","year":"2021","journal-title":"arXiv:2102.11447"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1002\/asi.22634"},{"key":"ref6","article-title":"Automated annotation with generative AI requires validation","author":"Pangakis","year":"2023","journal-title":"arXiv:2306.00176"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1093\/pan\/mps028"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1038\/nature.2013.14416"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1371\/journal.pone.0246099"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1080\/13662716.2020.1792274"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1108\/S0743-41542018000036B009"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1186\/1478-4505-12-34"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1038\/s41562-016-0021"},{"issue":"3","key":"ref14","first-page":"43","article-title":"Peer review in scientific publications: Benefits, critiques, & a survival guide","volume":"25","author":"Kelly","year":"2014","journal-title":"EJIFCC"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1093\/biosci\/bix034"},{"key":"ref16","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. Linguistics, Human Lang. Technol.","author":"Devlin"},{"volume-title":"How Much Data Do We Create Every Day? The Mind-blowing Stats Everyone Should Read","year":"2018","author":"Marr","key":"ref17"},{"volume-title":"How To Manage Complexity and Realize the Value of Big Data","year":"2020","key":"ref18"},{"key":"ref19","first-page":"1","article-title":"The digital universe in 2020: Big data, bigger digital shadows, and biggest growth in the far east","volume":"2013","author":"Gantz","year":"2012","journal-title":"IDC iView, IDC Analyze Future"},{"volume-title":"Report: 80% of Global Datasphere Will Be  Unstructured By 2025","year":"2022","author":"Staff","key":"ref20"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1186\/s40537-019-0254-8"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1007\/978-0-387-39940-9"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1007\/s11192-018-2921-5"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1016\/j.ipm.2005.09.002"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1007\/978-3-540-77094-7_41"},{"year":"1999","author":"Teufel","article-title":"Argumentative zoning: Information extraction from scientific text","key":"ref26"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1007\/978-3-030-45439-5_17"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/eScience.2017.23"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.3389\/frma.2018.00021"},{"key":"ref30","first-page":"329","article-title":"Accurate information extraction from research papers using conditional random fields","volume-title":"Proc. Human Lang. Technol. Conf.","author":"Peng"},{"volume-title":"Citationie: Leveraging the Citation Graph for Scientific Information Extraction","year":"2021","author":"Viswanathan","key":"ref31"},{"key":"ref32","article-title":"Structured information extraction from complex scientific text with fine-tuned large language models","author":"Dunn","year":"2022","journal-title":"arXiv:2212.05238"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1109\/ESEM.2007.62"},{"key":"ref34","first-page":"1","article-title":"A time series is worth 64 words: Long-term forecasting with transformers","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Nie"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1145\/3637528.3672055"},{"doi-asserted-by":"publisher","key":"ref36","DOI":"10.1075\/li.30.1.03nad"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.21437\/ICSLP.2000-131"},{"key":"ref38","first-page":"1","article-title":"Definition, dictionaries and tagger for extended named entity hierarchy","volume-title":"Proc. 4th Int. Conf. Lang. Resour. Eval.","author":"Sekine"},{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.3115\/1699705.1699748"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1186\/s12859-019-3321-4"},{"key":"ref41","first-page":"25","article-title":"Clinical NER and relation extraction using Bi-Char-LSTMs and random forest classifiers","volume-title":"Proc. 1st Int. Workshop Medication Adverse Drug Event Detection, 4 May","author":"Magge"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1093\/database\/baw135"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.18653\/v1\/D17-1279"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.18653\/v1\/S17-2091"},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.3233\/SW-222986"},{"doi-asserted-by":"publisher","key":"ref46","DOI":"10.1007\/978-3-030-45442-5_52"},{"key":"ref47","first-page":"708","article-title":"Large-scale named entity disambiguation based on Wikipedia data","volume-title":"Proc. Joint Conf. Empirical Methods Natural Lang. Process. Comput. Natural Lang. Learn.","author":"Cucerzan"},{"key":"ref48","first-page":"1","article-title":"IIIT Hyderabad at TAC 2009","volume-title":"Proc. 2nd Text Anal. Conf.","author":"Varma"},{"key":"ref49","first-page":"771","article-title":"Collaborative ranking: A case study on entity linking","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Chen"},{"key":"ref50","first-page":"1375","article-title":"Local and global algorithms for disambiguation to Wikipedia","volume-title":"Proc. 49th Annu. Meeting Assoc. Comput. Linguistic","author":"Ratinov"},{"doi-asserted-by":"publisher","key":"ref51","DOI":"10.1109\/TKDE.2017.2730862"},{"key":"ref52","first-page":"782","article-title":"Robust disambiguation of named entities in text","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Hoffart"},{"doi-asserted-by":"publisher","key":"ref53","DOI":"10.24963\/ijcai.2019\/740"},{"key":"ref54","first-page":"675","article-title":"Neural collective entity linking","volume-title":"Proc. 27th Int. Conf. Comput. Linguistic","author":"Cao"},{"doi-asserted-by":"publisher","key":"ref55","DOI":"10.18653\/v1\/2020.emnlp-main.519"},{"doi-asserted-by":"publisher","key":"ref56","DOI":"10.18653\/v1\/P16-1059"},{"doi-asserted-by":"publisher","key":"ref57","DOI":"10.1145\/3366423.3380192"},{"doi-asserted-by":"publisher","key":"ref58","DOI":"10.1007\/s00778-019-00564-x"},{"doi-asserted-by":"publisher","key":"ref59","DOI":"10.1145\/3308558.3313685"},{"doi-asserted-by":"publisher","key":"ref60","DOI":"10.3233\/WEB-170354"},{"doi-asserted-by":"publisher","key":"ref61","DOI":"10.1109\/ICDE.2012.80"},{"doi-asserted-by":"publisher","key":"ref62","DOI":"10.18653\/v1\/P19-1513"},{"doi-asserted-by":"publisher","key":"ref63","DOI":"10.3233\/978-1-61499-649-1-105"},{"doi-asserted-by":"publisher","key":"ref64","DOI":"10.1007\/978-3-642-33290-6_17"},{"doi-asserted-by":"publisher","key":"ref65","DOI":"10.1093\/jamia\/ocx121"},{"key":"ref66","article-title":"The AI2 submission at the rich context competition","volume-title":"Rich Search and Discovery for Research Datasets: Building the Next Generation of Scholarly Infrastructure","author":"King","year":"2020"},{"key":"ref67","article-title":"Rich text competition","volume-title":"Rich Search and Discovery for Research Datasets: Building the Next Generation of Scholarly Infrastructure","author":"Hong","year":"2020"},{"key":"ref68","article-title":"Rich context competition phase 2","volume-title":"Rich Search and Discovery for Research Datasets: Building the Next Generation of Scholarly Infrastructure","author":"Otto","year":"2020"},{"key":"ref69","article-title":"Dice@rich context competition","volume-title":"Rich Search and Discovery for Research Datasets: Building the Next Generation of Scholarly Infrastructure","author":"Ngonga","year":"2020"},{"key":"ref70","article-title":"Simple extraction for social science publications","volume-title":"Rich Search and Discovery for Research Datasets: Building the Next Generation of Scholarly Infrastructure","author":"Prasetyo","year":"2020"},{"key":"ref71","article-title":"Dataset mention extraction in scientific articles using a bilstm-crf model","volume-title":"Rich Search and Discovery for Research Datasets: Building the Next Generation of Scholarly Infrastructure","author":"Zeng","year":"2020"},{"doi-asserted-by":"publisher","key":"ref72","DOI":"10.18653\/v1\/W19-2604"},{"doi-asserted-by":"publisher","key":"ref73","DOI":"10.18653\/v1\/D19-1383"},{"doi-asserted-by":"publisher","key":"ref74","DOI":"10.18653\/v1\/D19-1371"},{"key":"ref75","article-title":"Google\u2019s neural machine translation system: Bridging the gap between human and machine translation","author":"Wu","year":"2016","journal-title":"arXiv:1609.08144"},{"doi-asserted-by":"publisher","key":"ref76","DOI":"10.18653\/v1\/N18-2078"},{"doi-asserted-by":"publisher","key":"ref77","DOI":"10.48550\/ARXIV.1609.02907"},{"key":"ref78","article-title":"Syntax-infused transformer and BERT models for machine translation and natural language understanding","author":"Sundararaman","year":"2019","journal-title":"arXiv:1911.06156"},{"doi-asserted-by":"publisher","key":"ref79","DOI":"10.18653\/v1\/N16-1030"},{"key":"ref80","article-title":"DistilBERT, a distilled version of BERT: Smaller, faster, cheaper and lighter","author":"Sanh","year":"2019","journal-title":"arXiv:1910.01108"},{"volume-title":"Allennlp: A Deep Semantic Natural Language Processing Platform","year":"2018","author":"Gardner","key":"ref81"},{"key":"ref82","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2015","journal-title":"arXiv:1412.6980"},{"volume-title":"Pattern Recognition and Machine Learning","year":"2007","author":"Bishop","key":"ref83"},{"volume-title":"Introduction To Machine Learning With Python: A Guide for Data Scientists","year":"2018","author":"M\u00fcller","key":"ref84"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/10849525.pdf?arnumber=10849525","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T07:12:43Z","timestamp":1751094763000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10849525\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":84,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3532767","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}