{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:07:42Z","timestamp":1775837262215,"version":"3.50.1"},"reference-count":120,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100004858","name":"Scientific Research Foundation of Sichuan University of Science and Engineering","doi-asserted-by":"publisher","award":["2019RC12"],"award-info":[{"award-number":["2019RC12"]}],"id":[{"id":"10.13039\/501100004858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019065","name":"Sichuan Science and Technology Program","doi-asserted-by":"publisher","award":["2024YFHZ0026"],"award-info":[{"award-number":["2024YFHZ0026"]}],"id":[{"id":"10.13039\/501100019065","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3484470","type":"journal-article","created":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T17:28:52Z","timestamp":1729618132000},"page":"157716-157745","source":"Crossref","is-referenced-by-count":12,"title":["Music Emotion Recognition Based on Deep Learning: A Review"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-9623-8479","authenticated-orcid":false,"given":"Xingguo","family":"Jiang","sequence":"first","affiliation":[{"name":"School of Automation and Information Engineering, Sichuan University of Science and Engineering, Zigong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8845-3093","authenticated-orcid":false,"given":"Yuchao","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Automation and Information Engineering, Sichuan University of Science and Engineering, Zigong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8707-5720","authenticated-orcid":false,"given":"Guojun","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Automation and Information Engineering, Sichuan University of Science and Engineering, Zigong, China"}]},{"given":"Ling","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Automation and Information Engineering, Sichuan University of Science and Engineering, Zigong, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.plrev.2009.11.001"},{"issue":"2","key":"ref2","first-page":"5","article-title":"Prehistoric music archaeology of the Yellow River basin and the origin of Chinese musical civilization","volume":"5","author":"Fang","year":"2024","journal-title":"J. Musical Res."},{"key":"ref3","first-page":"937","article-title":"Music emotion recognition: A state of the art review","volume-title":"Proc. ISMIR","volume":"86","author":"Kim"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1525\/mp.2012.30.3.307"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2168752.2168754"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-017-0559-4"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2020.3032373"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-021-0569-4"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3389\/fninf.2022.997282"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2506364.2506365"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1063\/1.5039095"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3390\/app10030902"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ITOEC49072.2020.9141729"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/PIC53636.2021.9687003"},{"issue":"10","key":"ref15","first-page":"94","article-title":"Music emotion recognition fusion on CNN-BiLSTM and self-attention model","volume":"59","author":"Zhong","year":"2023","journal-title":"Comput. Eng. Appl."},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178058"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-29908-8_10"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0173392"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2021.760060"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s44196-024-00489-6"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110200"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2918739"},{"key":"ref23","first-page":"591","article-title":"The million song dataset","volume-title":"Proc. 12th Int. Soc. Music Inf. Retr. Conf. (ISMIR)","author":"Bertin-Mahieux"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-15376-z"},{"key":"ref25","article-title":"Music mood detection based on audio and lyrics with deep neural net","author":"Delbouys","year":"2018","journal-title":"arXiv:1809.07276"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859812"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.913750"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2014.6890290"},{"key":"ref29","article-title":"CNN based music emotion classification","author":"Liu","year":"2017","journal-title":"arXiv:1704.05665"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3206025.3206037"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-022-00230-z"},{"key":"ref32","first-page":"310","article-title":"The multiple voices of musical emotions: Source separation for improving music emotion recognition models and their interpretability","volume-title":"Proc. ISMIR","author":"De Berardinis"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.15"},{"key":"ref34","first-page":"570","article-title":"Multi-modal music emotion recognition: A new dataset, methodology and comparative analysis","volume-title":"Proc. 10th Int. Symp. Comput. Music Multidisciplinary Res.","author":"Panda"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2018.2885744"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.3745\/JIPS.04.0032"},{"key":"ref37","first-page":"11","article-title":"Music emotion recognition via end-to-end multimodal neural networks","volume-title":"Proc. RecSys","author":"Jeon"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CONIT51480.2021.9498345"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.jestch.2020.10.009"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.3390\/info15040224"},{"key":"ref41","article-title":"EMOPIA: A multi-modal pop piano dataset for emotion recognition and emotion-based music generation","author":"Hung","year":"2021","journal-title":"arXiv:2108.01374"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s10844-021-00658-5"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s10844-022-00746-0"},{"key":"ref44","first-page":"383","article-title":"Musical texture and expressivity features for music emotion recognition","volume-title":"Proc. 19th Int. Soc. Music Inf. Retr. Conf. (ISMIR)","author":"Panda"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/DeSE58274.2023.10100058"},{"key":"ref46","article-title":"Comparison and analysis of deep audio embeddings for music emotion recognition","author":"Koh","year":"2021","journal-title":"arXiv:2104.06517"},{"key":"ref47","first-page":"153","article-title":"Bi-modal music emotion recognition: Novel lyrical features and dataset","volume-title":"Proc. 9th Int. Workshop Music Mach. Learn. Eur. Conf. Mach. Learn. Princ. Pract. Knowl. Discovery Databases (ECML\/PKDD)","author":"Malheiro"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.785"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA220004"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-020-09781-0"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.10.002"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.3390\/s23010382"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.2307\/1415746"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1177\/0305735620927474"},{"key":"ref55","first-page":"164","article-title":"The argument and evidence about universals in facial expressions","volume-title":"Handbook of Social Psychophysiology","volume":"143","author":"Ekman","year":"1989"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1037\/0022-3514.59.5.899"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/860435.860508"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.860344"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.2991\/ijcis.d.191216.001"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/FSKD.2016.7603438"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/5181899"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.2307\/427021"},{"key":"ref63","volume-title":"The Social Psychology of Music","author":"Farnsworth","year":"1958"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.2466\/PMS.96.4.1117-1122"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1037\/h0077714"},{"issue":"3","key":"ref66","first-page":"367","article-title":"A novel long short-term memory network model for multimodal music emotion analysis in affective computing","volume":"26","author":"Chen","year":"2022","journal-title":"J. Appl. Sci. Eng."},{"key":"ref67","article-title":"MediaEval 2015: Music emotion recognition based on feed-forward neural network","volume-title":"Proc. MediaEval","author":"Patra"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2018.2820691"},{"key":"ref69","first-page":"583","article-title":"Dimensional music emotion recognition: Combining standard and melodic audio features","volume-title":"Proc. 10th Int. Symp. Comput. Music Multidisciplinary Res.","author":"Panda"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2014.2333095"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICOT.2013.6521190"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59491-6_14"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.2991\/jcis.2006.325"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1049\/iet-spr.2016.0021"},{"key":"ref75","article-title":"Stacked convolutional and recurrent neural networks for music emotion recognition","author":"Malik","year":"2017","journal-title":"arXiv:1706.02292"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/SIST54437.2022.9945814"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-30577-2_27"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/IS3C.2014.323"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780195068276.001.0001"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1111\/1467-9280.00157"},{"key":"ref81","volume-title":"An Approach to Environmental Psychology","author":"Mehrabian","year":"1974"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1007\/BF02686918"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/s12144-014-9219-4"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CBMI.2008.4564922"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-023-04967-w"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12040978"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1155\/2020\/4606027"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/9256586"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/6749622"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/2802573"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-14345-w"},{"key":"ref92","article-title":"Multi-modality in music: Predicting emotion in music from high-level audio features and lyrics","author":"Krols","year":"2023","journal-title":"arXiv:2302.13321"},{"key":"ref93","first-page":"1","article-title":"Automatic musical pattern feature extraction using convolutional neural network","volume":"10","author":"Li","year":"2010","journal-title":"Genre"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13577-6"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/5732687"},{"key":"ref96","first-page":"1","article-title":"Exploring deep learning methodologies for music emotion recognition","volume-title":"Proc. Sound Music Comput. Conf. (SMC)","author":"Louro"},{"key":"ref97","article-title":"Affective norms for English words (ANEW): Instruction manual and affective ratings","author":"Bradley","year":"1999"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ACIIAsia.2018.8470378"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.11591\/eei.v12i1.4231"},{"key":"ref100","article-title":"Linguistic inquiry and word count: LIWC 2001","author":"Pennebaker","year":"2001"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578784"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1201\/b21811-46"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2006.262723"},{"key":"ref104","first-page":"651","article-title":"SMERS: Music emotion recognition using support vector regression","volume-title":"Proc. ISMIR","author":"Han"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911513"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/PIC.2015.7489849"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICCI-CC.2017.8109740"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471639"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2015.7415321"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.11591\/ijece.v8i3.pp1720-1730"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.33564\/IJEAST.2022.v07i06.026"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1155\/2021\/3561829"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-020-08836-3"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/ITNEC48623.2020.9084846"},{"key":"ref117","first-page":"1","article-title":"A speaker system based on CLDNN music emotion recognition algorithm","volume-title":"Proc. ICETIS; 7th Int. Conf. Electron. Technol. Inf. Sci.","author":"Yang"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2022.08.014"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/2715765"},{"issue":"4","key":"ref120","first-page":"64","article-title":"Survey of music emotion recognition","volume":"58","author":"Kang","year":"2022","journal-title":"Comput. Eng. Appl."}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10729262.pdf?arnumber=10729262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T01:52:07Z","timestamp":1732672327000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10729262\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":120,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3484470","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}