{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T14:57:03Z","timestamp":1776783423124,"version":"3.51.2"},"reference-count":79,"publisher":"Tech Science Press","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.063551","type":"journal-article","created":{"date-parts":[[2025,4,27]],"date-time":"2025-04-27T04:07:37Z","timestamp":1745726857000},"page":"1073-1099","source":"Crossref","is-referenced-by-count":11,"title":["The Future of Artificial Intelligence in the Face of Data Scarcity"],"prefix":"10.32604","volume":"84","author":[{"given":"Hemn Barzan","family":"Abdalla","sequence":"first","affiliation":[]},{"given":"Yulia","family":"Kumar","sequence":"additional","affiliation":[]},{"given":"Jose","family":"Marchena","sequence":"additional","affiliation":[]},{"given":"Stephany","family":"Guzman","sequence":"additional","affiliation":[]},{"given":"Ardalan","family":"Awlla","sequence":"additional","affiliation":[]},{"given":"Mehdi","family":"Gheisari","sequence":"additional","affiliation":[]},{"given":"Maryam","family":"Cheraghy","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","first-page":"292","article-title":"The rise of synthetic data: enhancing AI and machine learning model training to address data scarcity and mitigate privacy risks","volume":"1","author":"Singh","year":"2021","journal-title":"J Artif Intell Res Appl"},{"key":"ref2","unstructured":"Panagiotou E, Qian H, Marx S, Ntoutsi E. Generative AI based augmentation for offshore jacket design: an integrated approach for mixed tabular data generation under data scarcity and imbalance. [cited 2025 Jan 1]. Available from: 10.2139\/ssrn.4703856."},{"key":"ref3","doi-asserted-by":"crossref","first-page":"4175","DOI":"10.1080\/00207543.2020.1758355","article-title":"Reliability assessment of high-quality new products with data scarcity","volume":"59","author":"Zhang","year":"2021","journal-title":"Int J Prod Res"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1007\/s13347-024-00795-z","article-title":"Digital duplicates and the scarcity problem: might AI make us less scarce and therefore less valuable?","volume":"37","author":"Danaher","year":"2024","journal-title":"Philos Technol"},{"key":"ref5","author":"Shen","year":"2023","journal-title":"AI in education: effective machine learning methods to improve data scarcity and knowledge generalization"},{"key":"ref6","unstructured":"Bai J, Alzubaidi L, Wang Q, Kuhl E, Bennamoun M, Gu Y. Utilising physics-guided deep learning to overcome data scarcity. arXiv:2211.15664. 2022."},{"key":"ref7","author":"Kaai","year":"2024","journal-title":"Addressing data scarcity in domain generalization for computer vision applications in image classification"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"264","DOI":"10.3390\/info15050264","article-title":"Addressing data scarcity in the medical domain: a GPT-based approach for synthetic data generation and feature extraction","volume":"15","author":"Sufi","year":"2024","journal-title":"Information"},{"key":"ref9","first-page":"1","article-title":"How to build good AI solutions when data is scarce","volume":"64","author":"Ramakrishnan","year":"2022","journal-title":"MIT Sloan Manag Rev"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"7082","DOI":"10.3390\/app13127082","article-title":"Re-thinking data strategy and integration for artificial intelligence: concepts, opportunities, and challenges","volume":"13","author":"Aldoseri","year":"2023","journal-title":"Appl Sci"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"100778","DOI":"10.1016\/j.coche.2021.100778","article-title":"Audacity of huge: overcoming challenges of data scarcity and data quality for machine learning in computational materials discovery","volume":"36","author":"Nandy","year":"2022","journal-title":"Curr Opin Chem Eng"},{"key":"ref12","unstructured":"OpenAI, Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, et al. GPT-4 technical report. arXiv:2303.08774. 2023."},{"key":"ref13","series-title":"International Conference on Machine Learning","first-page":"265","article-title":"Scaling laws for generative mixed-modal language models","author":"Aghajanyan","year":"2023 Jul 23\u201329"},{"key":"ref14","series-title":"Forty-First International Conference on Machine Learning","article-title":"Position: will we run out of data? Limits of LLM scaling based on human-generated data","author":"Villalobos","year":"2024 Jul 21\u201327"},{"key":"ref15","unstructured":"Villalobos P, Ho A, Sevilla J, Besiroglu T, Heim L, Hobbhahn M. Will we run out of data? Limits of LLM scaling based on human-generated data. arXiv:2211.04325. 2022. doi:10.48550\/arXiv.2211.04325."},{"key":"ref16","first-page":"1","article-title":"PaLM: scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J Mach Learn Res"},{"key":"ref17","first-page":"1","article-title":"Discovering data sets through machine learning: an ensemble approach to uncovering the prevalence of government-funded data sets","volume":"4","author":"Hausen","year":"2024","journal-title":"Harv Data Sci Rev"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"1329","DOI":"10.1007\/s10994-019-05791-5","article-title":"Data scarcity, robustness and extreme multi-label classification","volume":"108","author":"Babbar","year":"2019","journal-title":"Mach Learn"},{"key":"ref19","unstructured":"Larson J, Steven Truitt S. GraphRAG: unlocking LLM discovery on narrative private data. [cited 2025 Jan 15]. Available from: https:\/\/www.microsoft.com\/en-us\/research\/blog\/graphrag-unlocking-llm-discovery-on-narrative-private-data\/."},{"key":"ref20","doi-asserted-by":"crossref","first-page":"102523","DOI":"10.1016\/j.is.2025.102523","article-title":"On the use of trajectory data for tackling data scarcity","volume":"130","author":"Pons","year":"2025","journal-title":"Inf Syst"},{"key":"ref21","series-title":"Proceedings of the 58th Hawaii International Conference on System Sciences","doi-asserted-by":"crossref","DOI":"10.24251\/HICSS.2025.822","article-title":"Not enough data to be fair? Evaluating fairness implications of data scarcity solutions","author":"Karst","year":"2025 Jan 7\u201310"},{"key":"ref22","first-page":"1","article-title":"Data scarcity in recommendation systems: a survey","volume":"3","author":"Chen","year":"2025","journal-title":"ACM Trans Recomm Syst"},{"key":"ref23","doi-asserted-by":"crossref","unstructured":"Khalil M, Vadiee F, Shakya R, Liu Q. Creating artificial students that never existed: leveraging large language models and CTGANs for synthetic data generation. arXiv:2501.01793. 2025. doi:10.1145\/3706468.","DOI":"10.1145\/3706468.3706523"},{"key":"ref24","unstructured":"White J, Madaan P, Shenoy N, Agnihotri A, Sharma M, Doshi J. A case for rejection in low resource ML deployment. arXiv:2208.06359. 2022."},{"key":"ref25","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1016\/j.dsm.2023.06.001","article-title":"Systematic review of data-centric approaches in artificial intelligence and machine learning","volume":"6","author":"Singh","year":"2023","journal-title":"Data Sci Manag"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"24578","DOI":"10.1109\/ACCESS.2022.3154825","article-title":"A mirror environment to produce artificial intelligence training data","volume":"10","author":"Li","year":"2022","journal-title":"IEEE Access"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10618-024-01081-4","article-title":"SynthEval: a framework for detailed utility and privacy evaluation of tabular synthetic data","volume":"39","author":"Lautrup","year":"2025","journal-title":"Data Min Knowl Discov"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"108734","DOI":"10.1016\/j.compbiomed.2024.108734","article-title":"Current strategies to address data scarcity in artificial intelligence-based drug discovery: a comprehensive review","volume":"179","author":"Gangwal","year":"2024","journal-title":"Comput Biol Med"},{"key":"ref29","unstructured":"Niel O. A novel algorithm can generate data to train machine learning models in conditions of extreme scarcity of real world data. arXiv:2305.00987. 2023."},{"key":"ref30","doi-asserted-by":"crossref","first-page":"551","DOI":"10.1109\/TII.2021.3133625","article-title":"On the private data synthesis through deep generative models for data scarcity of industrial Internet of Things","volume":"19","author":"Chen","year":"2023","journal-title":"IEEE Trans Ind Inform"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3502287","article-title":"A systematic review on data scarcity problem in deep learning: solution and applications","volume":"54","author":"Bansal","year":"2022","journal-title":"ACM Comput Surv"},{"key":"ref32","doi-asserted-by":"crossref","unstructured":"Nahid MMH, Bin Hasan S. SafeSynthDP: leveraging large language models for privacy-preserving synthetic data generation using differential privacy. arXiv:2412.20641. 2024.","DOI":"10.32388\/PJIL3E"},{"key":"ref33","series-title":"International Conference on Intelligent Information Hiding and Multimedia Signal Processing","first-page":"347","article-title":"Data augmentation based on topic relevance to enhance text classification in scarcity of training data","author":"Zou","year":"2022 Dec 16\u201318"},{"key":"ref34","unstructured":"Hoang V. Mitigating data scarcity for large language models. arXiv:2302.01806. 2023."},{"key":"ref35","doi-asserted-by":"crossref","first-page":"46","DOI":"10.1186\/s40537-023-00727-2","article-title":"A survey on deep learning tools dealing with data scarcity: definitions, challenges, solutions, tips, and applications","volume":"10","author":"Alzubaidi","year":"2023","journal-title":"J Big Data"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1017\/pan.2023.20","article-title":"Less annotating, more classifying: addressing the data scarcity issue of supervised machine learning with deep transfer learning and BERT-NLI","volume":"32","author":"Laurer","year":"2024","journal-title":"Polit Anal"},{"key":"ref37","series-title":"AHFE, 2022 Conference on Applied Human Factors and Ergonomics International","article-title":"Proposal for the generation of profiles using a synthetic database","author":"Quito","year":"2022 Jul 24\u201328"},{"key":"ref38","author":"Oxford","year":"2024","journal-title":"Data scarcity challenges AI developers globally"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"2575","DOI":"10.1109\/TNNLS.2022.3190451","article-title":"RCT: resource constrained training for edge AI","volume":"35","author":"Huang","year":"2022","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref40","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/brusselsprivacyhub.com\/wp-content\/uploads\/2024\/02\/Personal-Data-Protection-in-Brazil.pdf."},{"key":"ref41","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.novaoneadvisor.com\/report\/artificial-intelligence-market."},{"key":"ref42","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/ourworldindata.org\/data-insights\/investment-in-generative-ai-has-surged-recently."},{"key":"ref43","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/hai.stanford.edu\/news\/ai-index-state-ai-13-charts."},{"key":"ref44","doi-asserted-by":"crossref","first-page":"104137","DOI":"10.1016\/j.yofte.2025.104137","article-title":"Addressing data scarcity in ML-based failure-cause identification in optical networks through generative models","volume":"90","author":"Healy","year":"2025","journal-title":"Opt Fiber Technol"},{"key":"ref45","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.fbcinc.com\/source\/virtualhall_images\/2024_Virtual_Events\/USDA_Innovation\/Databricks\/State_of_Data___AI_Resource.pdf."},{"key":"ref46","unstructured":"Belkada Y, Dettmers T, Pagnoni A, Gugger S, Mangrulkar S. Making LLMs even more accessible with bitsandbytes, 4-bit quantization and QLoRA. [cited 2025 Jan 15]. Available from: https:\/\/huggingface.co\/blog\/4bit-transformers-bitsandbytes."},{"key":"ref47","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1186\/s40537-022-00659-3","article-title":"A brief survey on big data: technologies, terminologies and data-intensive applications","volume":"9","author":"Abdalla","year":"2022","journal-title":"J Big Data"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"2250083","DOI":"10.1142\/S0219649222500836","article-title":"Comprehensive analysis of various big data classification techniques: a challenging overview","volume":"22","author":"Abdalla","year":"2023","journal-title":"J Info Know Mgmt"},{"key":"ref49","series-title":"Proceedings of the 2024 Asia Pacific Conference on Computing Technologies, Communications and Networking","first-page":"60","article-title":"Big data: past, present, and future insights","author":"Abdalla","year":"2024 Jul 26\u201327"},{"key":"ref50","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.sciencealert.com\/the-world-is-running-out-of-data-to-feed-ai-experts-warn."},{"key":"ref51","doi-asserted-by":"crossref","first-page":"186","DOI":"10.1038\/s41746-023-00927-3","article-title":"Harnessing the power of synthetic data in healthcare: innovation, application, and privacy","volume":"6","author":"Giuffr\u00e8","year":"2023","journal-title":"NPJ Digit Med"},{"key":"ref52","unstructured":"Dang H, Mecke L, Lehmann F, Goller S, Buschek D. How to prompt? Opportunities and challenges of zero- and few-shot learning for human-AI interaction in creative applications of generative models. arXiv:2209.01390. 2022."},{"key":"ref53","unstructured":"Yang S, Xiao W, Zhang M, Guo S, Zhao J, Shen F. Image data augmentation for deep learning: A survey. arXiv:2204.08610. 2022."},{"key":"ref54","doi-asserted-by":"crossref","first-page":"1993","DOI":"10.3390\/healthcare10101993","article-title":"IoT-based healthcare-monitoring system towards improving quality of life: a review","volume":"10","author":"Abdulmalek","year":"2022","journal-title":"Healthcare"},{"key":"ref55","doi-asserted-by":"crossref","first-page":"3509","DOI":"10.3390\/electronics13173509","article-title":"A systematic review of synthetic data generation techniques using generative AI","volume":"13","author":"Goyal","year":"2024","journal-title":"Electronics"},{"key":"ref56","doi-asserted-by":"crossref","first-page":"2761","DOI":"10.1007\/s11831-023-09884-2","article-title":"Self-supervised learning: a succinct review","volume":"30","author":"Rani","year":"2023","journal-title":"Arch Comput Methods Eng"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"108487","DOI":"10.1016\/j.ymssp.2021.108487","article-title":"A perspective survey on deep transfer learning for fault diagnosis in industrial scenarios: theories, applications and challenges","volume":"167","author":"Li","year":"2022","journal-title":"Mech Syst Signal Process"},{"key":"ref58","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/technologymagazine.com\/articles\/how-googles-ai-breakthroughs-earned-nobel-recognition."},{"key":"ref59","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/research.google\/blog\/automl-for-large-scale-image-classification-and-object-detection."},{"key":"ref60","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/innovationexchange.mayoclinic.org\/an-introduction-to-federal-funding-for-innovation\/."},{"key":"ref61","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.techmonitor.ai\/risks\/extreme-weather-events\/ibm-weather-and-climate-model?cf-view."},{"key":"ref62","first-page":"251","article-title":"Universities engaging social media users: an investigation of quantitative relationships between universities\u2019 Facebook followers\/interactions and university attributes","volume":"29","author":"Lund","year":"2019","journal-title":"J Mark High Educ"},{"key":"ref63","doi-asserted-by":"crossref","first-page":"3660","DOI":"10.3390\/electronics13183660","article-title":"A critical AI view on autonomous vehicle navigation: the growing danger","volume":"13","author":"Miller","year":"2024","journal-title":"Electronics"},{"key":"ref64","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.europarl.europa.eu\/RegData\/etudes\/STUD\/2020\/641530\/EPRS_STU(2020)641530_EN.pdf."},{"key":"ref65","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/oag.ca.gov\/privacy\/ccpa."},{"key":"ref66","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.twobirds.com\/en\/insights\/2024\/china\/ai-governance-in-china-strategies-initiatives-and-key."},{"key":"ref67","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.hhs.gov\/hipaa\/for-professionals\/privacy\/laws."},{"key":"ref68","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/www.kaggle.com\/datasets\/mlg-ulb\/creditcardfraud\/data."},{"key":"ref69","unstructured":"[cited 2025 Mar 12]. Available from: https:\/\/archive.ics.uci.edu\/dataset\/17\/breast+cancer+wisconsin+diagnostic."},{"key":"ref70","doi-asserted-by":"crossref","first-page":"4991","DOI":"10.3390\/electronics13244991","article-title":"A comprehensive review of AI advancement using testFAILS and testFAILS-2 for the pursuit of AGI","volume":"13","author":"Kumar","year":"2024","journal-title":"Electronics"},{"key":"ref71","unstructured":"Haruni R. Nvidia CEO maps out bold vision: AGI and robotics set to merge. [cited 2025 Jan 15]. Available from: https:\/\/wallstreetpit.com\/120150-nvidia-ceo-maps-out-bold-vision-agi-and-robotics-set-to-merge\/."},{"key":"ref72","unstructured":"Barlow G. Altman predicts artificial superintelligence (AGI) will happen this year. [cited 2025 Jan 15]. Available from: https:\/\/www.techradar.com\/computing\/artificial-intelligence\/sam-altman-predicts-artificial-superintelligence-agi-will-happen-this-year."},{"key":"ref73","unstructured":"Robison K. OpenAI cofounder Ilya Sutskever says the way AI is built is about to change. [cited 2025 Jan 15]. Available from: https:\/\/www.theverge.com\/2024\/12\/13\/24320811\/what-ilya-sutskever-sees-openai-model-data-training."},{"key":"ref74","doi-asserted-by":"crossref","first-page":"3431","DOI":"10.3390\/electronics13173431","article-title":"Bias and cyberbullying detection and data generation using transformer artificial intelligence models and top large language models","volume":"13","author":"Kumar","year":"2024","journal-title":"Electronics"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"10176","DOI":"10.3390\/app142210176","article-title":"The AI-powered evolution of big data","volume":"14","author":"Kumar","year":"2024","journal-title":"Appl Sci"},{"key":"ref76","unstructured":"Edge D, Larson J. LazyGraphRAG: setting a new standard for quality and cost. [cited 2025 Jan 15]. Available from: https:\/\/www.microsoft.com\/en-us\/research\/blog\/lazygraphrag-setting-a-new-standard-for-quality-and-cost\/."},{"key":"ref77","doi-asserted-by":"crossref","unstructured":"Feng Y, Shen A, Hu J, Liang Y, Wang S, Du J. Enhancing few-shot learning with integrated data and GAN model approaches. arXiv:2411.16567. 2024.","DOI":"10.1109\/DSInS64146.2024.10992169"},{"key":"ref78","unstructured":"Wiethof C, Bittner EA. Hybrid intelligence\u2014combining the human in the loop with the computer in the loop: a systematic literature review. [cited 2025 Jan 1]. Available from: https:\/\/www.researchgate.net\/publication\/356209722."},{"key":"ref79","doi-asserted-by":"crossref","first-page":"e1507","DOI":"10.1002\/widm.1507","article-title":"A systematic review of green AI","volume":"13","author":"Verdecchia","year":"2023","journal-title":"Wiley Interdiscip Rev Data Min Knowl Discov"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-84-1\/TSP_CMC_63551\/TSP_CMC_63551.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:38:12Z","timestamp":1763343492000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v84n1\/61743"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":79,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.063551","relation":{},"ISSN":["1546-2226"],"issn-type":[{"value":"1546-2226","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}