{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T03:47:38Z","timestamp":1776138458187,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"13","license":[{"start":{"date-parts":[[2022,2,12]],"date-time":"2022-02-12T00:00:00Z","timestamp":1644624000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,2,12]],"date-time":"2022-02-12T00:00:00Z","timestamp":1644624000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Jiangsu Provincial Key Constructive Laboratory for Big Data of Psychology and Cognitive Science","award":["No.72592062003G"],"award-info":[{"award-number":["No.72592062003G"]}]},{"name":"Natural Science Foundation of the Colleges and Universities in Anhui Province of China","award":["No. KJ2020A0035"],"award-info":[{"award-number":["No. KJ2020A0035"]}]},{"DOI":"10.13039\/501100007156","name":"Innovation and Technology Commission - Hong Kong","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100007156","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s00521-022-06896-0","type":"journal-article","created":{"date-parts":[[2022,2,12]],"date-time":"2022-02-12T06:02:25Z","timestamp":1644645745000},"page":"10337-10353","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Combined angular margin and cosine margin softmax loss for music classification based on spectrograms"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2450-6052","authenticated-orcid":false,"given":"Jingxian","family":"Li","sequence":"first","affiliation":[]},{"given":"Lixin","family":"Han","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Baohua","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Xiaofeng","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Yan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,12]]},"reference":[{"key":"6896_CR1","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.asoc.2016.12.024","volume":"52","author":"YM Costa","year":"2017","unstructured":"Costa YM, Oliveira LS, Silla CN Jr (2017) An evaluation of convolutional neural networks for music classification using spectrograms. Appl Soft Comput 52:28\u201338","journal-title":"Appl Soft Comput"},{"key":"6896_CR2","doi-asserted-by":"crossref","unstructured":"Simonetta F, Ntalampiras S, Avanzini F (2019) Multimodal music information processing and retrieval: survey and future challenges. In: 2019 international workshop on multilayer music representation and processing (MMRP), pp 10\u201318","DOI":"10.1109\/MMRP.2019.00012"},{"key":"6896_CR3","doi-asserted-by":"crossref","unstructured":"Zhuang Y, Chen Y, Zheng J (2020) Music genre classification with transformer classifier. In: Proceedings of the 2020 4th international conference on digital signal processing, Chengdu, China, June 19\u201321, 2020, pp 155\u2013159","DOI":"10.1145\/3408127.3408137"},{"key":"6896_CR4","doi-asserted-by":"crossref","unstructured":"Chaudhary D, Singh NP, Singh S (2020) Development of music emotion classification system using convolution neural network. Int J Speech Technol 1\u201310","DOI":"10.1007\/s10772-020-09781-0"},{"issue":"4","key":"6896_CR5","doi-asserted-by":"publisher","first-page":"941","DOI":"10.1007\/s00521-018-3704-x","volume":"32","author":"M Doerfler","year":"2020","unstructured":"Doerfler M, Grill T, Bammer R, Flexer A (2020) Basic filters for convolutional neural networks: training or design. Neural Comput Appl 32(4):941\u2013954","journal-title":"Neural Comput Appl"},{"key":"6896_CR6","unstructured":"Choi K, Fazekas G, Sandler M (2016) Automatic tagging using deep convolutional neural networks. arXiv preprint arXiv:1606.00298"},{"key":"6896_CR7","doi-asserted-by":"crossref","unstructured":"Choi K, Fazekas G, Sandler M, Cho K (2017) Convolutional recurrent neural networks for music classification. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP), New Orleans, LA, USA, March 5\u20139, 2017, pp 2392\u20132396","DOI":"10.1109\/ICASSP.2017.7952585"},{"key":"6896_CR8","unstructured":"Liu W, Wen Y, Yu Z, Yang M (2016) Large-margin softmax loss for convolutional neural networks. In: International conference on machine learning, New York City, NY, USA, June 19\u201324, 2016, pp 507\u2013516"},{"key":"6896_CR9","doi-asserted-by":"crossref","unstructured":"Liu W, Wen Y, Yu Z, Li M, Raj B, Song L (2017) SphereFace: deep hypersphere embedding for face recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Honolulu, HI, USA, July 21\u201326, 2017, pp 6738\u20136746","DOI":"10.1109\/CVPR.2017.713"},{"issue":"7","key":"6896_CR10","doi-asserted-by":"publisher","first-page":"926","DOI":"10.1109\/LSP.2018.2822810","volume":"25","author":"F Wang","year":"2018","unstructured":"Wang F, Cheng J, Liu W, Liu H (2018) Additive margin softmax for face verification. IEEE Signal Process Lett 25(7):926\u2013930","journal-title":"IEEE Signal Process Lett"},{"key":"6896_CR11","doi-asserted-by":"crossref","unstructured":"Wang H, Wang Y, Zhou Z, Ji X, Gong D, Zhou J, Li Z, Liu W (2018) Cosface: large margin cosine loss for deep face recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Salt Lake City, UT, USA, June 18\u201322, 2018, pp 5265\u20135274","DOI":"10.1109\/CVPR.2018.00552"},{"key":"6896_CR12","doi-asserted-by":"crossref","unstructured":"Wen Y, Zhang K, Li Z, Qiao Y (2016) A discriminative feature learning approach for deep face recognition. In: European conference on computer vision, Amsterdam, The Netherlands, October 11\u201314, 2016, pp 499\u2013515","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"6896_CR13","doi-asserted-by":"crossref","unstructured":"Deng J, Guo J, Xue N, Zafeiriou S (2019) Arcface: additive angular margin loss for deep face recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Long Beach, CA, USA, June 16\u201320, 2019, pp 4690\u20134699","DOI":"10.1109\/CVPR.2019.00482"},{"key":"6896_CR14","doi-asserted-by":"crossref","unstructured":"Hadsell R, Chopra S, LeCun Y (2006) Dimensionality reduction by learning an invariant mapping. In: 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06), New York, NY, USA, June 17\u201322, 2006, Vol. 2, pp 1735\u20131742","DOI":"10.1109\/CVPR.2006.100"},{"key":"6896_CR15","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: a unified embedding for face recognition and clustering. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Boston, MA, USA, June 7\u201312, 2015, pp 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"6896_CR16","doi-asserted-by":"crossref","unstructured":"Wang J, Song Y, Leung T, Rosenberg C, Wang J, Philbin J, Chen B, Wu Y (2014) Learning fine-grained image similarity with deep ranking. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Columbus, OH, USA, June 23\u201328, 2014, pp 1386\u20131393","DOI":"10.1109\/CVPR.2014.180"},{"key":"6896_CR17","doi-asserted-by":"crossref","unstructured":"Liu H, Zhu X, Lei Z, Li SZ (2019) Adaptiveface: Adaptive margin and sampling for face recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Long Beach, CA, USA, June 16\u201320, 2019, pp 11947\u201311956","DOI":"10.1109\/CVPR.2019.01222"},{"key":"6896_CR18","doi-asserted-by":"crossref","unstructured":"Ferraro A, Bogdanov D, Jay XS, Jeon H, Yoon J (2021) How Low Can You Go? Reducing Frequency and Time Resolution in Current CNN Architectures for Music Auto-tagging. In: 2020 28th European signal processing conference (EUSIPCO), Amsterdam, Netherlands, January 18\u201321, 2021, pp 131\u2013135","DOI":"10.23919\/Eusipco47968.2020.9287769"},{"key":"6896_CR19","doi-asserted-by":"crossref","unstructured":"Liang B, Gu M (2020) Music genre classification using transfer learning. In: 2020 IEEE conference on multimedia information processing and retrieval (MIPR), Shenzhen, China, August 6\u20138, 2020, pp 392\u2013393","DOI":"10.1109\/MIPR49039.2020.00085"},{"key":"6896_CR20","doi-asserted-by":"crossref","unstructured":"Parkhi OM, Vedaldi A, Zisserman A (2015) Deep face recognition. In Proceedings of the British machine vision conference, Swansea, UK, September 7\u201310, 2015, pp 1\u201312","DOI":"10.5244\/C.29.41"},{"key":"6896_CR21","first-page":"1988","volume":"27","author":"Y Sun","year":"2014","unstructured":"Sun Y, Chen Y, Wang X, Tang X (2014) Deep learning face representation by joint identification-verification. Adv Neural Inf Process Syst 27:1988\u20131996","journal-title":"Adv Neural Inf Process Syst"},{"key":"6896_CR22","unstructured":"Taenzer M, Abe\u00dfer J, Mimilakis SI, Wei\u00df C, M\u00fcller M, Lukashevich H, Fraunhofer IDMT (2019) Investigating CNN-based instrument family recognition for western classical music recordings. In: Proceedings of the 20th international society for music information retrieval conference, Delft, The Netherlands, November 4\u20138, 2019, pp 612\u2013619"},{"key":"6896_CR23","doi-asserted-by":"crossref","unstructured":"Taigman Y, Yang M, Ranzato MA, Wolf L (2014) Deepface: closing the gap to human-level performance in face verification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Columbus, OH, USA, June 23\u201328, 2014, pp 1701\u20131708","DOI":"10.1109\/CVPR.2014.220"},{"key":"6896_CR24","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1109\/TASLP.2020.2993152","volume":"28","author":"M Bhattacharjee","year":"2020","unstructured":"Bhattacharjee M, Prasanna SM, Guha P (2020) Speech\/music classification using features from spectral peaks. IEEE\/ACM Trans Audio Speech Language Process 28:1549\u20131559","journal-title":"IEEE\/ACM Trans Audio Speech Language Process"},{"issue":"4","key":"6896_CR25","doi-asserted-by":"publisher","first-page":"1067","DOI":"10.1007\/s00521-019-04076-1","volume":"32","author":"J Kim","year":"2020","unstructured":"Kim J, Urbano J, Liem C, Hanjalic A (2020) One deep music representation to rule them all? a comparative analysis of different representation learning strategies. Neural Comput Appl 32(4):1067\u20131093","journal-title":"Neural Comput Appl"},{"key":"6896_CR26","doi-asserted-by":"crossref","unstructured":"Hansen C, Hansen C, Maystre L, Mehrotra R, Brost B, Tomasi F, Lalmas M (2020) Contextual and sequential user embeddings for large-scale music recommendation. In: Fourteenth ACM conference on recommender systems, virtual event, Brazil, September 22\u201326, 2020, pp 53\u201362","DOI":"10.1145\/3383313.3412248"},{"key":"6896_CR27","unstructured":"Rahardwika DS, Rachmawanto EH, Sari CA, Irawan C, Kusumaningrum DP, Trusthi SL (2020) Comparison of SVM, KNN, and NB Classifier for Genre Music Classification based on Metadata. In: 2020 international seminar on application for technology of information and communication (iSemantic), pp 12\u201316"},{"key":"6896_CR28","doi-asserted-by":"publisher","first-page":"107020","DOI":"10.1016\/j.apacoust.2019.107020","volume":"158","author":"G Sharma","year":"2020","unstructured":"Sharma G, Umapathy K, Krishnan S (2020) Trends in audio signal feature extraction methods. Appl Acoust 158:107020","journal-title":"Appl Acoust"},{"issue":"3","key":"6896_CR29","doi-asserted-by":"publisher","first-page":"3705","DOI":"10.1007\/s11042-017-5539-3","volume":"78","author":"Y Zeng","year":"2019","unstructured":"Zeng Y, Mao H, Peng D, Yi Z (2019) Spectrogram based multi-task audio classification. Multimed Tools Appl 78(3):3705\u20133722","journal-title":"Multimed Tools Appl"},{"key":"6896_CR30","doi-asserted-by":"crossref","unstructured":"Choi K, Fazekas G, Sandler M (2016) Explaining deep convolutional neural networks on music classification. arXiv preprint arXiv:1607.02444","DOI":"10.1109\/ICASSP.2017.7952585"},{"key":"6896_CR31","unstructured":"Kong Q, Feng X, Li Y (2014) Music genre classification using convolutional neural network. In: Proceedings of international society for music information retrieval conference, Taipei, Taiwan, China, October 27\u201331, 2014"},{"key":"6896_CR32","unstructured":"Lidy T, Schindler A (2016) Parallel convolutional neural networks for music genre and mood classification. In: Proceedings of the 17th international society for music information retrieval conference, New York City, United States, August 7\u201311, 2016"},{"key":"6896_CR33","unstructured":"Liu X, Chen Q, Wu X, Liu Y, Liu Y (2017) CNN based music emotion classification. arXiv preprint arXiv:1704.05665"},{"key":"6896_CR34","doi-asserted-by":"crossref","unstructured":"Zhang W, Lei W, Xu X, Xing X (2016) Improved music genre classification with convolutional neural networks. In: 17th annual conference of the international speech communication association, San Francisco, CA, USA, September 8\u201312, 2016, pp 3304\u20133308","DOI":"10.21437\/Interspeech.2016-1236"},{"key":"6896_CR35","doi-asserted-by":"crossref","unstructured":"Pons J, Serra X (2019) Randomly weighted CNNs for (music) audio classification. In: IEEE international conference on acoustics, speech and signal processing, Brighton, United Kingdom, May 12\u201317, 2019, pp 336\u2013340","DOI":"10.1109\/ICASSP.2019.8682912"},{"issue":"3","key":"6896_CR36","doi-asserted-by":"publisher","first-page":"102185","DOI":"10.1016\/j.ipm.2019.102185","volume":"57","author":"C Li","year":"2020","unstructured":"Li C, Bao Z, Li L, Zhao Z (2020) Exploring temporal representations by leveraging attention-based bidirectional LSTM-RNNs for multi-modal emotion recognition. Inf Process Manag 57(3):102185","journal-title":"Inf Process Manag"},{"key":"6896_CR37","doi-asserted-by":"crossref","unstructured":"Russo M, Kraljevi\u0107 L, Stella M, Sikora M (2020) Cochleogram-based approach for detecting perceived emotions in music. Inf Process Manag 57(5):102270","DOI":"10.1016\/j.ipm.2020.102270"},{"issue":"1","key":"6896_CR38","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1093\/nsr\/nwy108","volume":"6","author":"ZH Zhou","year":"2019","unstructured":"Zhou ZH, Feng J (2019) Deep forest. National Sci Rev 6(1):74\u201386","journal-title":"National Sci Rev"},{"key":"6896_CR39","doi-asserted-by":"crossref","unstructured":"Chopra S, Hadsell R, LeCun Y (2005) Learning a similarity metric discriminatively, with application to face verification. In: 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR'05), San Diego, CA, USA, June 20\u201326, 2005, Vol. 1, pp 539\u2013546","DOI":"10.1109\/CVPR.2005.202"},{"key":"6896_CR40","doi-asserted-by":"crossref","unstructured":"Hoffer E, Ailon N (2015) Deep metric learning using triplet network. In: International workshop on similarity-based pattern recognition, Copenhagen, Denmark, October 12\u201314, 2015, pp 84\u201392","DOI":"10.1007\/978-3-319-24261-3_7"},{"key":"6896_CR41","doi-asserted-by":"crossref","unstructured":"Kemelmacher-Shlizerman I, Seitz SM, Miller D, Brossard E (2016) The megaface benchmark: 1 million faces for recognition at scale. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Las Vegas, NV, USA, June 27\u201330, 2016, pp 4873\u20134882","DOI":"10.1109\/CVPR.2016.527"},{"key":"6896_CR42","doi-asserted-by":"crossref","unstructured":"Wolf L, Hassner T, Maoz I (2011) Face recognition in unconstrained videos with matched background similarity. In: the 24th IEEE conference on computer vision and pattern recognition, Colorado Springs, CO, USA, 20\u201325 June 2011, pp 529\u2013534","DOI":"10.1109\/CVPR.2011.5995566"},{"key":"6896_CR43","unstructured":"Ranjan R, Castillo CD, Chellappa R (2017) L2-constrained softmax loss for discriminative face verification. arXiv preprint arXiv:1703.09507"},{"key":"6896_CR44","doi-asserted-by":"crossref","unstructured":"Wang F, Xiang X, Cheng J, Yuille AL (2017) Normface: L2 hypersphere embedding for face verification. In: Proceedings of the 25th ACM international conference on Multimedia, Mountain View, CA, USA, October 23\u201327, 2017, pp 1041\u20131049","DOI":"10.1145\/3123266.3123359"},{"issue":"5","key":"6896_CR45","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TSA.2002.800560","volume":"10","author":"G Tzanetakis","year":"2002","unstructured":"Tzanetakis G, Cook P (2002) Musical genre classification of audio signals. IEEE Trans Speech Audio Process 10(5):293\u2013302","journal-title":"IEEE Trans Speech Audio Process"},{"key":"6896_CR46","unstructured":"Defferrard M, Benzi K, Vandergheynst P, Bresson X (2017) FMA: a dataset for music analysis. In: 18th international society for music information retrieval conference, Suzhou, China, October 23\u201327, 2017, pp 316\u2013323"},{"key":"6896_CR47","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Las Vegas, NV, USA, June 27\u201330, 2016, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"6896_CR48","unstructured":"Gulli A, Pal S (2017) Deep learning with Keras. Packt Publishing Ltd"},{"key":"6896_CR49","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-06896-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-06896-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-06896-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,25]],"date-time":"2022-06-25T09:11:41Z","timestamp":1656148301000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-06896-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,12]]},"references-count":49,"journal-issue":{"issue":"13","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["6896"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-06896-0","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,2,12]]},"assertion":[{"value":"14 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}