{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T20:54:48Z","timestamp":1773521688014,"version":"3.50.1"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.eswa.2026.131242","type":"journal-article","created":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T15:44:44Z","timestamp":1768751084000},"page":"131242","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["HOMAYON: A hybrid fusion transformer for microtonal music auto-tagging using melody-aware timbre representations"],"prefix":"10.1016","volume":"309","author":[{"given":"Mehdi","family":"Kiani","sequence":"first","affiliation":[]},{"given":"Reza","family":"Ramezani","sequence":"additional","affiliation":[]},{"given":"MohamadHadi","family":"Ayanbod","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"1","key":"10.1016\/j.eswa.2026.131242_b0005","doi-asserted-by":"crossref","first-page":"187","DOI":"10.37934\/araset.46.1.187200","article-title":"Arabic music genre identification","volume":"46","author":"Ahmed","year":"2024","journal-title":"Journal of Advanced Research in Applied Sciences and Engineering Technology"},{"key":"10.1016\/j.eswa.2026.131242_b0010","unstructured":"Al Tawil, A. Ar-MGC: Arabic Music Genre Classification Dataset (Kaggle."},{"key":"10.1016\/j.eswa.2026.131242_b0015","unstructured":"Aleksandra Ma, T., & Lerch, A. (2024). Music auto-tagging in the long tail: A few-shot approach. arXiv e-prints, arXiv: 2409.07730. https:\/\/doi.org\/10.48550\/arXiv.2409.07730."},{"key":"10.1016\/j.eswa.2026.131242_b0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.entcom.2025.100929","article-title":"Music genre classification using deep neural networks and data augmentation","volume":"53","author":"Ba","year":"2025","journal-title":"Entertainment Computing"},{"key":"10.1016\/j.eswa.2026.131242_b0025","series-title":"2024 4th International Conference on Sustainable Expert Systems (ICSES), blaler. Turkish Music Emotion Dataset (Kaggle)","article-title":"Optimizing music genre classification using CNN sequential models and deep learning techniques","author":"Beri","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0030","series-title":"2024 International Conference on Artificial Intelligence and Emerging Technology (Global AI Summit)","article-title":"Mood classification of indian melodies automatically through random forest method","author":"Chauhan","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0035","unstructured":"Dataset, M. S. (2011). Million song dataset. In."},{"key":"10.1016\/j.eswa.2026.131242_b0040","series-title":"2014 IEEE international conference on acoustics, speech and signal processing (ICASSP)","article-title":"End-to-end learning for music audio","author":"Dieleman","year":"2014"},{"key":"10.1016\/j.eswa.2026.131242_b0045","unstructured":"Ding, Y., & Lerch, A. (2023). Audio embeddings as teachers for music classification. arXiv preprint arXiv:2306.17424. https:\/\/doi.org\/10.48550\/arXiv.2306.17424."},{"key":"10.1016\/j.eswa.2026.131242_b0050","doi-asserted-by":"crossref","unstructured":"Doh, S., Lee, J., Jeong, D., & Nam, J. (2025). Musical word embedding for music tagging and retrieval. IEEE Transactions on Audio, Speech and Language Processing. https:\/\/doi.org\/https:\/\/doi.org\/10.1109\/TASLPRO.2025.3577408.","DOI":"10.1109\/TASLPRO.2025.3577408"},{"key":"10.1016\/j.eswa.2026.131242_b0055","series-title":"ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","article-title":"Joint music and language attention models for zero-shot music tagging","author":"Du","year":"2024"},{"issue":"25","key":"10.1016\/j.eswa.2026.131242_b0060","doi-asserted-by":"crossref","first-page":"21219","DOI":"10.1007\/s00521-025-11433-w","article-title":"Explainable deep learning techniques for wind speed forecasting in coastal areas: Integrating model configuration, regularization, early stopping, and SHAP analysis","volume":"37","author":"Durap","year":"2025","journal-title":"Neural Computing and Applications"},{"issue":"9","key":"10.1016\/j.eswa.2026.131242_b0065","doi-asserted-by":"crossref","DOI":"10.14569\/IJACSA.2024.0150918","article-title":"Texture feature and mel-spectrogram analysis for music sound classification","volume":"15","author":"ElAlami","year":"2024","journal-title":"International Journal of Advanced Computer Science & Applications"},{"key":"10.1016\/j.eswa.2026.131242_b0070","unstructured":"Elshaarawy, M., Saeed, A., Sheta, M., Said, A., Bakr, A., Bahaa, O., & Gomaa, W. (2024). Arabic Music Classification and Generation using Deep Learning. arXiv preprint arXiv:2410.19719. https:\/\/doi.org\/10.48550\/arXiv.2410.19719."},{"key":"10.1016\/j.eswa.2026.131242_b0075","doi-asserted-by":"crossref","DOI":"10.1016\/j.entcom.2022.100518","article-title":"PMG-Net: Persian music genre classification using deep neural networks","volume":"44","author":"Farajzadeh","year":"2023","journal-title":"Entertainment Computing"},{"key":"10.1016\/j.eswa.2026.131242_b0080","unstructured":"Ferraro, A., Bogdanov, D., Jeon, J. H., Yoon, J., & Serra, X. (2019). Music auto-tagging using cnns and mel-spectrograms with reduced frequency and time resolution. arXiv preprint arXiv:1911.04824. https:\/\/doi.org\/10.48550\/arXiv.1911.04824."},{"key":"10.1016\/j.eswa.2026.131242_b0085","unstructured":"Garoufis, C., Zlatintsi, A., & Maragos, P. (2023). Multi-source contrastive learning from musical audio. arXiv preprint arXiv:2302.07077. https:\/\/doi.org\/10.48550\/arXiv.2302.07077."},{"key":"10.1016\/j.eswa.2026.131242_b0090","series-title":"ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","article-title":"Music tagging with classifier group chains","author":"Hasumi","year":"2025"},{"key":"10.1016\/j.eswa.2026.131242_b0095","series-title":"Proceedings of the Cognitive Models and Artificial Intelligence Conference","article-title":"Turkish music genre classification using convolutional neural network","author":"Hazim","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0100","series-title":"ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","article-title":"Music auto-tagging with robust music representation learned via domain adversarial training","author":"Joung","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0105","unstructured":"Kekekci, K. Turkish Music Genre Dataset (Kaggle)."},{"key":"10.1016\/j.eswa.2026.131242_b0110","unstructured":"Kiani, M. Chakavak: A Multi-aspect Dataset for Automatic Tagging of Microtonal Music (Hugging Face Datasets."},{"key":"10.1016\/j.eswa.2026.131242_b0115","series-title":"2018 IEEE international conference on acoustics, speech and signal processing (ICASSP)","article-title":"Crepe: A convolutional representation for pitch estimation","author":"Kim","year":"2018"},{"key":"10.1016\/j.eswa.2026.131242_b0120","article-title":"Evaluation of algorithms using games: The case of music tagging","author":"Law","year":"2009","journal-title":"ISMIR"},{"key":"10.1016\/j.eswa.2026.131242_b0125","unstructured":"Lee, J., Lee, K., Park, J., Park, J., & Nam, J. (2018). Deep content-user embedding model for music recommendation. arXiv preprint arXiv:1807.06786. https:\/\/doi.org\/10.48550\/arXiv.1807.06786."},{"issue":"15","key":"10.1016\/j.eswa.2026.131242_b0130","doi-asserted-by":"crossref","first-page":"9002","DOI":"10.3390\/app13159002","article-title":"ATOSE: Audio tagging with one-sided joint embedding","volume":"13","author":"Lee","year":"2023","journal-title":"Applied Sciences"},{"issue":"8","key":"10.1016\/j.eswa.2026.131242_b0135","doi-asserted-by":"crossref","first-page":"1208","DOI":"10.1109\/LSP.2017.2713830","article-title":"Multi-level and multi-scale feature aggregation using pretrained convolutional neural networks for music auto-tagging","volume":"24","author":"Lee","year":"2017","journal-title":"IEEE signal processing letters"},{"key":"10.1016\/j.eswa.2026.131242_b0140","unstructured":"Lee, J., Park, J., Kim, K. L., & Nam, J. (2017). Sample-level deep convolutional neural networks for music auto-tagging using raw waveforms. arXiv preprint arXiv:1703.01789. https:\/\/doi.org\/10.48550\/arXiv.1703.01789."},{"key":"10.1016\/j.eswa.2026.131242_b0145","doi-asserted-by":"crossref","first-page":"1605","DOI":"10.1109\/TMM.2020.3001521","article-title":"Tag propagation and cost-sensitive learning for music auto-tagging","volume":"23","author":"Lin","year":"2020","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.eswa.2026.131242_b0150","series-title":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","article-title":"Perceptual musical features for interpretable audio tagging","author":"Lyberatos","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0155","doi-asserted-by":"crossref","DOI":"10.1109\/ACCESS.2025.3555741","article-title":"Challenges and perspectives in interpretable music auto-tagging using perceptual features","author":"Lyberatos","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.eswa.2026.131242_b0160","series-title":"ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","article-title":"Evaluating contrastive methodologies for music representation learning using playlist data","author":"Meehan","year":"2025"},{"key":"10.1016\/j.eswa.2026.131242_b0165","series-title":"2024 4th International Conference on Innovative Practices in Technology and Management (ICIPTM)","article-title":"Musical genres classification utilizing the pre-trained ResNet50 CNN model and deep learning techniques","author":"Mittal","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0170","doi-asserted-by":"crossref","first-page":"17031","DOI":"10.1109\/ACCESS.2023.3244620","article-title":"Music deep learning: Deep learning methods for music signal processing\u2014a review of the state-of-the-art","volume":"11","author":"Moysis","year":"2023","journal-title":"IEEE Access"},{"key":"10.1016\/j.eswa.2026.131242_b0175","series-title":"Proceedings of the 27th international conference on machine learning (ICML-10)","article-title":"Rectified linear units improve restricted boltzmann machines","author":"Nair","year":"2010"},{"key":"10.1016\/j.eswa.2026.131242_b0180","unstructured":"Nam, J., Herrera, J., & Lee, K. (2015). A deep bag-of-features model for music auto-tagging. arXiv preprint arXiv:1508.04999. https:\/\/doi.org\/10.48550\/arXiv.1508.04999."},{"key":"10.1016\/j.eswa.2026.131242_b0185","unstructured":"Papaioannou, C., Benetos, E., & Potamianos, A. (2023). From West to East: Who can understand the music of the others better? arXiv preprint arXiv:2307.09795. https:\/\/doi.org\/10.48550\/arXiv.2307.09795."},{"key":"10.1016\/j.eswa.2026.131242_b0190","doi-asserted-by":"crossref","unstructured":"Patakis, A., Lyberatos, V., Kantarelis, S., Dervakos, E., & Stamou, G. (2025). Semantic-Aware Interpretable Multimodal Music Auto-Tagging. arXiv preprint arXiv:2505.17233. https:\/\/doi.org\/10.48550\/arXiv.2505.17233.","DOI":"10.21437\/Interspeech.2025-2574"},{"key":"10.1016\/j.eswa.2026.131242_b0195","unstructured":"Pons, J., Nieto, O., Prockup, M., Schmidt, E., Ehmann, A., & Serra, X. (2017). End-to-end learning for music audio tagging at scale. arXiv preprint arXiv:1711.02520. https:\/\/doi.org\/10.48550\/arXiv.1711.02520."},{"key":"10.1016\/j.eswa.2026.131242_b0200","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124473","article-title":"A music recommender system based on compact convolutional transformers","volume":"255","author":"Pourmoazemi","year":"2024","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131242_b0205","doi-asserted-by":"crossref","unstructured":"Sarkar, S., Solanki, S. S., & Chakraborty, S. (2024). Automatic classification of Indian Music using EMD. Available at SSRN 4544832. https:\/\/dx.doi.org\/10.2139\/ssrn.4544832.","DOI":"10.21203\/rs.3.rs-2870303\/v1"},{"key":"10.1016\/j.eswa.2026.131242_b0210","series-title":"2024 IEEE International Conference on Information Technology, Electronics and Intelligent Communication Systems (ICITEICS)","article-title":"Classification of musical genres utilizing the CNN sequential model and deep learning techniques","author":"Singla","year":"2024"},{"key":"10.1016\/j.eswa.2026.131242_b0215","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2020.106702","article-title":"Music auto-tagging using scattering transform and convolutional neural network with self-attention","volume":"96","author":"Song","year":"2020","journal-title":"Applied Soft Computing"},{"key":"10.1016\/j.eswa.2026.131242_b0220","doi-asserted-by":"crossref","first-page":"104","DOI":"10.1016\/j.neucom.2018.02.076","article-title":"Music auto-tagging using deep recurrent neural networks","volume":"292","author":"Song","year":"2018","journal-title":"Neurocomputing"},{"key":"10.1016\/j.eswa.2026.131242_b0225","unstructured":"Spijkervet, J., & Burgoyne, J. A. (2021). Contrastive learning of musical representations. arXiv preprint arXiv:2103.09410. https:\/\/doi.org\/10.48550\/arXiv.2103.09410."},{"issue":"1","key":"10.1016\/j.eswa.2026.131242_b0230","first-page":"1929","article-title":"Dropout: A simplewaytopreventneuralnetworksfromoverfitting","volume":"15","author":"SrivastavaN","year":"2014","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.eswa.2026.131242_b0235","doi-asserted-by":"crossref","unstructured":"Sturm, B. L. (2012). An analysis of the GTZAN music genre dataset. Proceedings of the second international ACM workshop on Music information retrieval with user-centered and multimodal strategies,.","DOI":"10.1145\/2390848.2390851"},{"key":"10.1016\/j.eswa.2026.131242_b0240","unstructured":"Won, M., Choi, K., & Serra, X. (2021). Semi-supervised music tagging transformer. arXiv preprint arXiv:2111.13457. https:\/\/doi.org\/10.48550\/arXiv.2111.13457."},{"key":"10.1016\/j.eswa.2026.131242_b0245","unstructured":"Won, M., Chun, S., Nieto Caballero, O., & Serra, X. (2019). Automatic music tagging with harmonic cnn."},{"key":"10.1016\/j.eswa.2026.131242_b0250","series-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","article-title":"Data-driven harmonic filters for audio representation learning","author":"Won","year":"2020"},{"key":"10.1016\/j.eswa.2026.131242_b0255","series-title":"The Twelfth International Conference on Learning Representations","article-title":"MERT: Acoustic music understanding model with large-scale self-supervised training","author":"Yizhi","year":"2023"},{"issue":"8","key":"10.1016\/j.eswa.2026.131242_b0260","doi-asserted-by":"crossref","first-page":"11459","DOI":"10.1007\/s11042-020-10330-9","article-title":"A sample-level DCNN for music auto-tagging","volume":"80","author":"Yu","year":"2021","journal-title":"Multimedia Tools and Applications"},{"key":"10.1016\/j.eswa.2026.131242_b0265","doi-asserted-by":"crossref","unstructured":"Zhu, H., Zhou, Y., Chen, H., Yu, J., Ma, Z., Gu, R., Luo, Y., Tan, W., & Chen, X. (2025). Muq: Self-supervised music representation learning with mel residual vector quantization. arXiv preprint arXiv:2501.01108. https:\/\/doi.org\/10.48550\/arXiv.2501.01108.","DOI":"10.1109\/TASLPRO.2025.3602320"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426001569?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426001569?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T19:31:26Z","timestamp":1773516686000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426001569"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":53,"alternative-id":["S0957417426001569"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131242","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"HOMAYON: A hybrid fusion transformer for microtonal music auto-tagging using melody-aware timbre representations","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131242","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"131242"}}