{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T09:45:21Z","timestamp":1774691121317,"version":"3.50.1"},"reference-count":28,"publisher":"Ubiquity Press, Ltd.","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,6]]},"DOI":"10.5334\/dsj-2024-057","type":"journal-article","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T05:25:27Z","timestamp":1733462727000},"source":"Crossref","is-referenced-by-count":4,"title":["Semantic Schema Extraction in NoSQL Databases using BERT Embeddings"],"prefix":"10.5334","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5972-0147","authenticated-orcid":false,"given":"Saad","family":"Belefqih","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4688-912X","authenticated-orcid":false,"given":"Ahmed","family":"Zellou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2597-0455","authenticated-orcid":false,"given":"Mouna","family":"Berquedich","sequence":"additional","affiliation":[]}],"member":"3285","reference":[{"key":"key20241206114053_B1","doi-asserted-by":"crossref","first-page":"61","DOI":"10.5220\/0010899000003119","article-title":"\u2018Extraction process of the logical schema of a document-oriented NoSQL database\u2019","volume-title":"Proceedings of the 10th International Conference on Model-Driven Engineering and Software Development","year":"2022"},{"key":"key20241206114053_B2","first-page":"1","article-title":"\u2018A new filtering-based query processing: improving semantic caching efficiency in mediation systems\u2019","year":"2018"},{"issue":"4","key":"key20241206114053_B3","doi-asserted-by":"crossref","first-page":"497","DOI":"10.1007\/s00778-018-0532-7","article-title":"\u2018Parametric schema inference for massive JSON datasets\u2019","volume":"28","year":"2019","journal-title":"The VLDB Journal"},{"key":"key20241206114053_B4","first-page":"133","article-title":"\u2018A workload-driven approach for automatic schema generation for document stores\u2019","year":"2023"},{"key":"key20241206114053_B5","unstructured":"Belefqih, S. (2023) \u2018saadbelefqih\/extractionSchemaNoSQLDb\u2019. Available at: https:\/\/github.com\/saadbelefqih\/extractionSchemaNoSQLDb (Accessed: 17 November 2024)."},{"issue":"8","key":"key20241206114053_B6","first-page":"92","article-title":"\u2018Schema extraction in NoSQL databases: a systematic literature review\u2019","volume":"17","year":"2023","journal-title":"Recent Advances in Computer Science and Communications"},{"key":"key20241206114053_B7","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1109\/ICDEW.2018.00021","article-title":"\u2018Scaling up schema discovery for RDF datasets\u2019","volume-title":"2018 IEEE 34th International Conference on Data Engineering Workshops (ICDEW)","year":"2018"},{"key":"key20241206114053_B8","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1007\/978-81-322-3972-7_19","volume-title":"Fundamentals of Artificial Intelligence","year":"2020"},{"issue":"1","key":"key20241206114053_B9","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1017\/S1351324916000334","article-title":"\u2018Word2Vec\u2019","volume":"23","year":"2017","journal-title":"Natural Language Engineering"},{"issue":"1","key":"key20241206114053_B10","article-title":"\u2018Reactome graph database: efficient access to complex pathway data\u2019","volume":"14","year":"2018","journal-title":"PLoS Computational Biology"},{"key":"key20241206114053_B11","first-page":"175","volume-title":"Anais do Simp\u00f3sio Brasileiro de Banco de Dados (SBBD). Anais do XXXV Simp\u00f3sio Brasileiro de Bancos de Dados","year":"2020"},{"key":"key20241206114053_B12","first-page":"2036","article-title":"\u2018Performance analysis of NoSQL and relational databases with MongoDB and MySQL\u2019","year":"2019","journal-title":"Materials Today: Proceedings"},{"key":"key20241206114053_B13","article-title":"\u2018Extracting JSON schemas with tagged unions\u2019","year":"2023"},{"key":"key20241206114053_B14","doi-asserted-by":"crossref","first-page":"2462","DOI":"10.1109\/BigData.2017.8258204","article-title":"\u2018Uncovering the evolution history of data lakes\u2019","volume-title":"2017 IEEE International Conference on Big Data (Big Data)","year":"2017"},{"issue":"1","key":"key20241206114053_B15","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1186\/s40537-022-00645-9","article-title":"\u2018A universal approach for multi-model schema inference\u2019","volume":"9","year":"2022","journal-title":"Journal of Big Data"},{"key":"key20241206114053_B16","first-page":"228","volume-title":"Quantitative Evaluation of Systems","year":"2015"},{"key":"key20241206114053_B17","doi-asserted-by":"crossref","first-page":"264","DOI":"10.5220\/0010475102640271","article-title":"\u2018A text similarity-based process for extracting JSON conceptual schemas\u2019","volume-title":"Proceedings of the 23rd International Conference on Enterprise Information Systems","year":"2021"},{"key":"key20241206114053_B18","article-title":"\u2018Keeping NoSQL databases up to date \u2013 semantics of evolution operations and their impact on data quality\u2019","year":"2019"},{"key":"key20241206114053_B19","doi-asserted-by":"crossref","first-page":"1532","DOI":"10.3115\/v1\/D14-1162","article-title":"\u2018Glove: global vectors for word representation\u2019","volume-title":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)","year":"2014"},{"key":"key20241206114053_B20","first-page":"1","article-title":"\u2018Towards a data quality assessment in big data\u2019","year":"2020"},{"key":"key20241206114053_B21","unstructured":"Reimers, N. and Gurevych, I. (2019) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks, arXiv.org. Available at: https:\/\/arxiv.org\/abs\/1908.10084v1 (Accessed: 30 December 2023)."},{"key":"key20241206114053_B22","unstructured":"Semantic Textual Similarity Methods, Tools, and Applications: A Survey (2016). Available at: https:\/\/www.scielo.org.mx\/scielo.php?pid=S1405-55462016000400647&script=sci_arttext&tlng=en (Accessed: 31 December 2023)."},{"key":"key20241206114053_B23","first-page":"467","volume-title":"Conceptual Modeling","year":"2015"},{"key":"key20241206114053_B24","doi-asserted-by":"crossref","first-page":"102003","DOI":"10.1016\/j.datak.2022.102003","article-title":"\u2018An embedding driven approach to automatically detect identifiers and references in document stores\u2019","volume":"139","year":"2022","journal-title":"Data & Knowledge Engineering"},{"key":"key20241206114053_B25","article-title":"\u2018Darwin: a data platform for NoSQL schema evolution management and data migration\u2019","year":"2022"},{"key":"key20241206114053_B26","doi-asserted-by":"crossref","first-page":"601","DOI":"10.1109\/ICCSNT.2011.6182030","article-title":"\u2018Hadoop-HBase for large-scale data\u2019","volume-title":"Proceedings of 2011 International Conference on Computer Science and Network Technology","year":"2011"},{"key":"key20241206114053_B27","first-page":"1","article-title":"\u2018Towards a fuzzy mapping for mediation systems\u2019","volume-title":"2012 IEEE International Conference on Complex Systems (ICCS)","year":"2012"},{"key":"key20241206114053_B28","first-page":"64","volume-title":"Computational Collective Intelligence","year":"2018"}],"container-title":["Data Science Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/storage.googleapis.com\/jnl-up-j-dsj-files\/journals\/1\/articles\/1688\/6752d0abc93d2.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T08:02:58Z","timestamp":1761552178000},"score":1,"resource":{"primary":{"URL":"https:\/\/datascience.codata.org\/articles\/10.5334\/dsj-2024-057\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":28,"alternative-id":["10.5334\/dsj-2024-057"],"URL":"https:\/\/doi.org\/10.5334\/dsj-2024-057","relation":{},"ISSN":["1683-1470"],"issn-type":[{"value":"1683-1470","type":"print"}],"subject":[],"published":{"date-parts":[[2024]]},"article-number":"57"}}