{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T20:54:18Z","timestamp":1775163258478,"version":"3.50.1"},"reference-count":63,"publisher":"Ubiquity Press, Ltd.","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,24]]},"DOI":"10.5334\/dsj-2025-027","type":"journal-article","created":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T12:06:29Z","timestamp":1758715589000},"source":"Crossref","is-referenced-by-count":3,"title":["Robust Machine Learning Algorithmic Rules for Detecting Air Pollution in the Lower Parts of the Atmosphere"],"prefix":"10.5334","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1134-547X","authenticated-orcid":false,"given":"Kassim","family":"Mwitondi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7033-6218","authenticated-orcid":false,"given":"Hugo Wai Leung","family":"Mak","sequence":"additional","affiliation":[]}],"member":"3285","reference":[{"key":"key20250924143638_B1","doi-asserted-by":"crossref","first-page":"100184","DOI":"10.1016\/j.sftr.2024.100184","article-title":"\u2018Deciphering the dynamics of human-environment interaction in China: Insights into renewable energy, sustainable consumption patterns, and carbon emissions\u2019","volume":"7","year":"2024","journal-title":"Sustainable Futures"},{"key":"key20250924143638_B2","doi-asserted-by":"crossref","first-page":"140325","DOI":"10.1016\/j.scitotenv.2020.140325","article-title":"\u2018Impact of \u201cblocking\u201d structure in the troposphere on the wintertime persistent heavy air pollution in northern China\u2019","volume":"741","year":"2020","journal-title":"Science of The Total Environment"},{"key":"key20250924143638_B3","volume-title":"Machine Learning: Fundamental Algorithms for Supervised and Unsupervised Learning With Real-World Applications (Advanced Data Analytics)","year":"2017"},{"issue":"1","key":"key20250924143638_B4","doi-asserted-by":"crossref","first-page":"152","DOI":"10.1038\/s41597-024-02956-3","article-title":"\u2018A general primer for data harmonization\u2019","volume":"11","year":"2024","journal-title":"Scientific data"},{"key":"key20250924143638_B5","doi-asserted-by":"crossref","first-page":"150721","DOI":"10.1016\/j.scitotenv.2021.150721","article-title":"\u2018Machine learning-based estimation of ground-level NO2 concentrations over China\u2019","volume":"807","year":"2022","journal-title":"Science of the Total Environment"},{"issue":"1","key":"key20250924143638_B6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"\u2018Maximum likelihood from incomplete data via the em algorithm\u2019","volume":"39","year":"1977","journal-title":"Journal of the Royal Statistical Society: Series B (Methodological)"},{"key":"key20250924143638_B7","first-page":"1","article-title":"\u2018Data harmonization as a keystone for data spaces: Challenges, techniques, and future trends\u2019","year":"2025"},{"key":"key20250924143638_B8","doi-asserted-by":"crossref","first-page":"106380","DOI":"10.1016\/j.resconrec.2022.106380","article-title":"\u2018Forecasting ground-level ozone concentration levels using machine learning\u2019","volume":"184","year":"2022","journal-title":"Resources, Conservation and Recycling"},{"issue":"5","key":"key20250924143638_B9","doi-asserted-by":"crossref","first-page":"874","DOI":"10.1016\/j.ecofro.2024.02.014","article-title":"\u2018Impact of environmental pollution from human activities on water, air quality and climate change\u2019","volume":"44","year":"2024","journal-title":"Ecological Frontiers"},{"key":"key20250924143638_B10","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1016\/j.ecolind.2018.12.038","article-title":"\u2018Dominant variables of global air pollution-climate interaction: Geographic insight\u2019","volume":"99","year":"2019","journal-title":"Ecological Indicators"},{"key":"key20250924143638_B11","doi-asserted-by":"crossref","first-page":"138579","DOI":"10.1016\/j.scitotenv.2020.138579","article-title":"\u2018Temperature inversions in the atmospheric boundary layer and lower troposphere over the Sichuan Basin, China: Climatology and impacts on air pollution\u2019","volume":"726","year":"2020","journal-title":"Science of the Total Environment"},{"key":"key20250924143638_B12","unstructured":"Global Alliance on Health and Pollution (2023) \u2018Global alliance on health and pollution\u2019. Available at: https:\/\/www.gahp.org\/"},{"key":"key20250924143638_B13","first-page":"369","article-title":"\u2018Sudden stratospheric warming impacts on the ionosphere\u2013thermosphere system: A review of recent progress\u2019","year":"2021","journal-title":"Ionosphere dynamics and applications"},{"issue":"4","key":"key20250924143638_B14","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1017\/S0305004100013517","article-title":"\u2018A connection between correlation and contingency\u2019","volume":"31","year":"1935","journal-title":"Mathematical Proceedings of the Cambridge Philosophical Society"},{"key":"key20250924143638_B15","doi-asserted-by":"crossref","first-page":"120652","DOI":"10.1016\/j.envpol.2022.120652","article-title":"\u2018Using cluster algorithms with a machine learning technique and pmf models to quantify local-specific origins of PM2.5 and associated metals in Taiwan\u2019","volume":"316","year":"2023","journal-title":"Environmental Pollution"},{"key":"key20250924143638_B16","first-page":"1","article-title":"\u2018Meteoroids as one of the sources for exosphere formation on airless bodies in the inner solar system\u2019","volume":"217","year":"2021","journal-title":"Space Science Reviews"},{"key":"key20250924143638_B17","volume-title":"Introduction to Clustering Large and High-Dimensional Data","year":"2007"},{"key":"key20250924143638_B18","first-page":"185","article-title":"\u2018Exploratory data analysis\u2019","year":"2016","journal-title":"Secondary Analysis of Electronic Health Records"},{"issue":"10","key":"key20250924143638_B19","doi-asserted-by":"crossref","first-page":"5783","DOI":"10.5194\/acp-23-5783-2023","article-title":"\u2018Progress in investigating long-term trends in the mesosphere, thermosphere, and ionosphere\u2019","volume":"23","year":"2023","journal-title":"Atmospheric Chemistry and Physics"},{"key":"key20250924143638_B20","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1016\/j.inffus.2022.10.008","article-title":"\u2018Deep learning for anomaly detection in multivariate time series: Approaches, applications, and challenges\u2019","volume":"91","year":"2023","journal-title":"Information Fusion"},{"issue":"1","key":"key20250924143638_B21","first-page":"1","article-title":"\u2018A survey on explainable anomaly detection\u2019","volume":"18","year":"2023","journal-title":"ACM Transactions on Knowledge Discovery from Data"},{"key":"key20250924143638_B22","doi-asserted-by":"crossref","first-page":"40608","DOI":"10.1109\/ACCESS.2023.3267813","article-title":"\u2018The fusion of data visualisation and data analytics in the process of mining digitalisation\u2019","volume":"11","year":"2023","journal-title":"IEEE Access"},{"key":"key20250924143638_B23","doi-asserted-by":"crossref","first-page":"117410","DOI":"10.1016\/j.atmosenv.2020.117410","article-title":"\u2018Observation of PM2.5 using a combination of satellite remote sensing and low-cost sensor network in Siberian urban areas with limited reference monitoring\u2019","volume":"227","year":"2020","journal-title":"Atmospheric Environment"},{"issue":"4","key":"key20250924143638_B24","doi-asserted-by":"crossref","first-page":"2124","DOI":"10.1021\/acs.est.1c06157","article-title":"\u2018Data-driven machine learning in environmental pollution: gains and problems\u2019","volume":"56","year":"2022","journal-title":"Environmental science & technology"},{"key":"key20250924143638_B25","doi-asserted-by":"crossref","unstructured":"Liu, Y., Tong, D., Cheng, J., Davis, S.J., Yu, S., Yarlagadda, B., Clarke, L.E., Brauer, M., Cohen, A.J., Kan, H. et al. (2022b) \u2018Role of climate goals and clean-air policies on reducing future air pollution deaths in China: a modelling study\u2019, The Lancet Planetary Health, 6(2), pp. e92\u2013e99. Available at: https:\/\/www.osti.gov\/servlets\/purl\/1855837","DOI":"10.1016\/S2542-5196(21)00326-0"},{"key":"key20250924143638_B26","unstructured":"Lloyd, S.P. (1957) \u2018Least squares quantization in PCM\u2019, Technical Report RR-5497, Bell Laboratories. Available at: https:\/\/www.stat.cmu.edu\/\u2013\u02ddrnugent\/PCMI2016\/papers\/LloydKMeans.pdf"},{"issue":"8","key":"key20250924143638_B27","doi-asserted-by":"crossref","first-page":"089401","DOI":"10.1088\/1674-1056\/ac76ab","article-title":"\u2018Collisionless magnetic reconnection in the magnetosphere\u2019","volume":"31","year":"2022","journal-title":"Chinese Physics B"},{"key":"key20250924143638_B28","first-page":"281","volume-title":"Proceedings of 5th Berkeley Symposium on Mathematical Statistics and Probability","year":"1967"},{"key":"key20250924143638_B29","doi-asserted-by":"crossref","first-page":"102868","DOI":"10.1016\/j.scs.2021.102868","article-title":"\u2018Comparative assessments and insights of data openness of 50 smart cities in air quality aspects\u2019","volume":"69","year":"2021","journal-title":"Sustainable Cities and Society"},{"key":"key20250924143638_B30","first-page":"10","article-title":"\u2018Improved satellite retrieval of tropospheric NO2 column density via updating of air mass factor (AMF): Case study of southern China\u2019","year":"2018","journal-title":"Remote Sensing"},{"issue":"12","key":"key20250924143638_B31","doi-asserted-by":"crossref","first-page":"6532","DOI":"10.3390\/ijerph18126532","article-title":"\u2018Spatial and socio-classification of traffic pollutant emissions and associated mortality rates in high-density Hong Kong via improved data analytic approaches\u2019","volume":"18","year":"2021","journal-title":"International Journal of Environmental Research and Public Health"},{"issue":"5","key":"key20250924143638_B32","doi-asserted-by":"crossref","first-page":"831","DOI":"10.1007\/s11633-022-1411-7","article-title":"\u2018A survey of synthetic data augmentation methods in machine vision\u2019","volume":"21","year":"2024","journal-title":"Machine Intelligence Research"},{"issue":"3","key":"key20250924143638_B33","article-title":"\u2018Statistical estimate of radon concentration from passive and active detectors in Doha\u2019","volume":"3","year":"2018","journal-title":"Data"},{"key":"key20250924143638_B34","article-title":"\u2018Amenability of the United Nations Sustainable Development Goals to big data modelling\u2019","year":"2018"},{"issue":"97","key":"key20250924143638_B35","article-title":"\u2018A robust machine learning approach to SDG data segmentation\u2019","volume":"7","year":"2020","journal-title":"Journal of Big Data"},{"key":"key20250924143638_B36","first-page":"WDS247","article-title":"\u2018A data-driven method for selecting optimal models based on graphical visualisation of differences in sequentially fitted ROC model parameters\u2019","volume":"12","year":"2013","journal-title":"Data Science Journal"},{"issue":"3","key":"key20250924143638_B37","doi-asserted-by":"crossref","first-page":"293","DOI":"10.12785\/jsap\/020312","article-title":"\u2018A data-based method for harmonising heterogeneous data modelling techniques across data mining applications\u2019","volume":"2","year":"2013","journal-title":"Journal of Statistics Applications & Probability"},{"issue":"7","key":"key20250924143638_B38","article-title":"\u2018Dealing with Randomness and Concept Drift in Large Datasets\u2019","volume":"6","year":"2021","journal-title":"Data"},{"issue":"4","key":"key20250924143638_B39","first-page":"230","article-title":"\u2018An iterative multiple sampling method for intrusion detection\u2019","volume":"27","year":"2018","journal-title":"Information Security Journal: A Global Perspective"},{"issue":"2","key":"key20250924143638_B40","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1093\/eurheartj\/ehu458","article-title":"\u2018Expert position paper on air pollution and cardiovascular disease\u2019","volume":"36","year":"2015","journal-title":"European Heart journal"},{"issue":"1","key":"key20250924143638_B41","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1038\/s41612-022-00275-1","article-title":"\u2018Coupled stratosphere-troposphere-atlantic multidecadal oscillation and its importance for near-future climate projection\u2019","volume":"5","year":"2022","journal-title":"NPJ Climate and Atmospheric Science"},{"issue":"1","key":"key20250924143638_B42","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1186\/s40537-023-00748-x","article-title":"\u2018A comparison of machine learning methods for ozone pollution prediction\u2019","volume":"10","year":"2023","journal-title":"Journal of Big Data"},{"key":"key20250924143638_B43","doi-asserted-by":"crossref","first-page":"100210","DOI":"10.1016\/j.bdr.2021.100210","article-title":"\u2018Using big data to improve safety performance: An application of process mining to enhance data visualisation\u2019","volume":"25","year":"2021","journal-title":"Big Data Research"},{"key":"key20250924143638_B44","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1016\/j.procs.2021.12.189","article-title":"\u2018Diagnostic analysis for outlier detection in big data analytics\u2019","volume":"197","year":"2022","journal-title":"Procedia Computer Science"},{"key":"key20250924143638_B45","article-title":"\u2018Development of a mathematical model and an efficient computational algorithm for predicting atmospheric pollution in industrial regions\u2019","year":"2024"},{"issue":"9","key":"key20250924143638_B46","doi-asserted-by":"crossref","first-page":"093004","DOI":"10.1088\/1748-9326\/ac1df8","article-title":"\u2018Combined impacts of climate and air pollution on human health and agricultural productivity\u2019","volume":"16","year":"2021","journal-title":"Environmental Research Letters"},{"issue":"4","key":"key20250924143638_B47","doi-asserted-by":"crossref","first-page":"648","DOI":"10.3390\/atmos14040648","article-title":"\u2018Use of association algorithms in air quality monitoring\u2019","volume":"14","year":"2023","journal-title":"Atmosphere"},{"issue":"1","key":"key20250924143638_B48","first-page":"43","article-title":"\u2018The changing role of China in global environmental governance\u2019","volume":"1","year":"2016","journal-title":"Rising Powers Quarterly"},{"key":"key20250924143638_B49","unstructured":"United Nations (2015) \u2018Sustainable Development Goals\u2019. Available at: https:\/\/www.un.org\/sustainabledevelopment\/sustainable-development-goals\/"},{"key":"key20250924143638_B50","unstructured":"United Nations Environment Programme (UNEP) (2023) \u2018Annual report: Keeping the promise\u2019. Available at: https:\/\/www.unep.org\/annualreport\/2023"},{"issue":"22","key":"key20250924143638_B51","doi-asserted-by":"crossref","first-page":"4622","DOI":"10.1016\/j.atmosenv.2007.03.039","article-title":"\u2018Operational air pollution modelling in the UK\u2014street canyon applications and challenges\u2019","volume":"41","year":"2007","journal-title":"Atmospheric Environment"},{"key":"key20250924143638_B52","article-title":"\u2018A comprehensive survey on data augmentation\u2019","year":"2024","journal-title":"arXiv preprint arXiv:240509591"},{"key":"key20250924143638_B53","unstructured":"World Health Organization (2021) Global Quality Guidelines: Particulate Matter (PM2.5 & PM10), Ozone, Nitrogen Dioxide, Sulfur Dioxide and Carbon Monoxide, World Health Organisation, p. 290. Available at: https:\/\/www.who.int\/publications\/i\/item\/9789240034228"},{"issue":"3","key":"key20250924143638_B54","article-title":"\u2018Association rule mining with a special rule coding and dynamic genetic algorithm for air quality impact factors in Beijing, China\u2019","volume":"19","year":"2024","journal-title":"PloS one"},{"issue":"1","key":"key20250924143638_B55","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1038\/s41612-023-00333-2","article-title":"\u2018Important role of stratosphere-troposphere coupling in the arctic mid-to-upper tropospheric warming in response to sea-ice loss\u2019","volume":"6","year":"2023","journal-title":"npj Climate and Atmospheric Science"},{"key":"key20250924143638_B56","doi-asserted-by":"crossref","first-page":"1417","DOI":"10.1016\/j.envpol.2018.08.029","article-title":"\u2018Evaluation of machine learning techniques with multiple remote sensing datasets in estimating monthly concentrations of ground-level PM2.5\u2019","volume":"242","year":"2018","journal-title":"Environmental pollution"},{"issue":"2","key":"key20250924143638_B57","doi-asserted-by":"crossref","first-page":"643","DOI":"10.3390\/su16020643","article-title":"\u2018Future projections of global plastic pollution: Scenario analyses and policy implications\u2019","volume":"16","year":"2024","journal-title":"Sustainability"},{"key":"key20250924143638_B58","doi-asserted-by":"crossref","first-page":"119347","DOI":"10.1016\/j.atmosenv.2022.119347","article-title":"\u2018Deep learning for air pollutant concentration prediction: A review\u2019","volume":"290","year":"2022","journal-title":"Atmospheric Environment"},{"key":"key20250924143638_B59","doi-asserted-by":"crossref","first-page":"112009","DOI":"10.1016\/j.envres.2021.112009","article-title":"\u2018Cluster analysis of PM2.5 pollution in China using the frequent itemset clustering approach\u2019","volume":"204","year":"2022","journal-title":"Environmental Research"},{"issue":"6","key":"key20250924143638_B60","article-title":"\u2018Overview of particulate air pollution and human health in China: Evidence, challenges, and opportunities\u2019","volume":"3","year":"2022","journal-title":"The Innovation"},{"issue":"1","key":"key20250924143638_B61","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1257\/jep.31.1.71","article-title":"\u2018A new era of pollution progress in urban China?\u2019","volume":"31","year":"2017","journal-title":"Journal of Economic Perspectives"},{"issue":"5","key":"key20250924143638_B62","doi-asserted-by":"crossref","first-page":"3469","DOI":"10.1109\/TII.2020.3022432","article-title":"\u2018Variational LSTM enhanced anomaly detection for industrial big data\u2019","volume":"17","year":"2021","journal-title":"IEEE Transactions on Industrial Informatics"},{"key":"key20250924143638_B63","first-page":"1","volume-title":"A Clean Air Sustainable Development Goal (SDG)","year":"2020"}],"container-title":["Data Science Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/storage.googleapis.com\/jnl-up-j-dsj-files\/journals\/1\/articles\/1867\/68aeece4b39c4.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T08:06:14Z","timestamp":1761552374000},"score":1,"resource":{"primary":{"URL":"http:\/\/datascience.codata.org\/articles\/10.5334\/dsj-2025-027\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":63,"alternative-id":["10.5334\/dsj-2025-027"],"URL":"https:\/\/doi.org\/10.5334\/dsj-2025-027","relation":{},"ISSN":["1683-1470"],"issn-type":[{"value":"1683-1470","type":"print"}],"subject":[],"published":{"date-parts":[[2025]]},"article-number":"27"}}