{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T19:03:05Z","timestamp":1765479785897,"version":"3.48.0"},"reference-count":22,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3638723","type":"journal-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:44:00Z","timestamp":1764355440000},"page":"205854-205864","source":"Crossref","is-referenced-by-count":0,"title":["A Hybrid Framework for Scalable Data Quality: Comparing PySpark and AI-Powered Validation in Microsoft Fabric"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4501-6248","authenticated-orcid":false,"given":"Dinesh","family":"Eswararaj","sequence":"first","affiliation":[{"name":"Compunnel Software Inc., Irvine, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2829-3198","authenticated-orcid":false,"given":"Vandana","family":"Kollati","sequence":"additional","affiliation":[{"name":"Sogeti, Concord, NC, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6747-191X","authenticated-orcid":false,"given":"Lakshmana Rao","family":"Koppada","sequence":"additional","affiliation":[{"name":"PwC, Union City, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4646-6679","authenticated-orcid":false,"given":"RAM Sekhar","family":"Bodala","sequence":"additional","affiliation":[{"name":"Amtrak, Wilmington, DE, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5320-2065","authenticated-orcid":false,"given":"Ajay Babu","family":"Nellipudi","sequence":"additional","affiliation":[{"name":"Sr. Applications Developer\/Architect, ESRI, Redlands, CA, USA"}]}],"member":"263","reference":[{"volume-title":"Apache Spark Documentation","year":"2025","author":"Found","key":"ref1"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1002\/SERIES1345"},{"key":"ref3","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown"},{"key":"ref5","article-title":"Scaling laws for neural language models","author":"Kaplan","year":"2020","journal-title":"arXiv:2001.08361"},{"volume-title":"Microsoft Fabric Documentation","year":"2025","key":"ref6"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/uic-atc-scalcom-cbdcom-iop-smartworld.2016.0122"},{"volume-title":"Twitter Airline Sentiment","year":"2025","key":"ref8"},{"volume-title":"Synthetic U.S. E-Commerce Reviews","year":"2025","author":"Kumar","key":"ref9"},{"key":"ref10","first-page":"1","article-title":"Spark: Cluster computing with working sets","volume-title":"USENIX Workshop on Hot Topics in Cloud Computing","author":"Zaharia"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.4018\/jdwm.2009070101"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/mis.2009.36"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.2139\/ssrn.2595075"},{"key":"ref14","first-page":"1","article-title":"Quality issues when using big data in official statistics","volume-title":"Proc. Int. Conf. Big Data Stat.","author":"Righi"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.3390\/app9235037"},{"key":"ref16","first-page":"4797","article-title":"Big data analytics: Challenges and opportunities","volume-title":"Proc. IEEE Int. Conf. Big Data","author":"Batra"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/access.2019.2899751"},{"key":"ref18","first-page":"1150","article-title":"The end of an architectural era (it\u2019s time for a complete rewrite)","volume-title":"Proc. 33rd Int. Conf. Very Large Data Bases","author":"Stonebraker"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.5120\/ijca2025924727"},{"volume-title":"Essential PySpark for Scalable Data Analytics: A Beginner\u2019s Guide to Harnessing the Power and Ease of PySpark 3","year":"2021","author":"Nudurupati","key":"ref20"},{"key":"ref21","article-title":"Evaluation of distributed data processing frameworks in hybrid clouds","author":"Ullah","year":"2022","journal-title":"arXiv:2201.01948"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1186\/s40537-025-01118-5"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11271196.pdf?arnumber=11271196","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T18:46:44Z","timestamp":1765478804000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11271196\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3638723","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}