{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T19:06:07Z","timestamp":1775847967313,"version":"3.50.1"},"reference-count":59,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2021,2,4]],"date-time":"2021-02-04T00:00:00Z","timestamp":1612396800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,4]],"date-time":"2021-02-04T00:00:00Z","timestamp":1612396800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,4]]},"DOI":"10.1007\/s11042-020-10381-y","type":"journal-article","created":{"date-parts":[[2021,2,4]],"date-time":"2021-02-04T23:03:29Z","timestamp":1612479809000},"page":"15511-15539","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["A method of music autotagging based on audio and lyrics"],"prefix":"10.1007","volume":"80","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5790-7506","authenticated-orcid":false,"given":"Hei-Chia","family":"Wang","sequence":"first","affiliation":[]},{"given":"Sheng-Wei","family":"Syu","sequence":"additional","affiliation":[]},{"given":"Papis","family":"Wongchaisuwat","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,4]]},"reference":[{"key":"10381_CR1","doi-asserted-by":"crossref","unstructured":"Al\u00edas, F., Socor\u00f3, J. C., & Sevillano, X. (2016). A review of physical and perceptual feature extraction techniques for speech, music and environmental sounds. Applied Sciences 6(5):143","DOI":"10.3390\/app6050143"},{"key":"10381_CR2","unstructured":"Bahdanau, D., Cho, K., & Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473."},{"key":"10381_CR3","unstructured":"Bertin-Mahieux, T., Eck, D., & Mandel, M. I. (2011). Automatic Tagging of Audio: The State-of-the-Art. Machine audition: Principles, algorithms and systems, IGI Global."},{"key":"10381_CR4","doi-asserted-by":"crossref","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., & Bengio, Y. (2014). Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078.","DOI":"10.3115\/v1\/D14-1179"},{"key":"10381_CR5","unstructured":"Choi K (2018) Deep neural networks for music tagging. Queen Mary University of London"},{"key":"10381_CR6","unstructured":"Choi, K., Fazekas, G., & Sandler, M. (2016). Automatic tagging using deep convolutional neural networks. arXiv preprint arXiv:1606.00298."},{"key":"10381_CR7","volume-title":"Paper presented at the 2017 IEEE International Conference on Acoustics","author":"K Choi","year":"2017","unstructured":"Choi K, Fazekas G, Sandler M, Cho K (2017) Convolutional recurrent neural networks for music classification. In: Paper presented at the 2017 IEEE International Conference on Acoustics. Signal Processing (ICASSP), Speech and"},{"key":"10381_CR8","volume-title":"Automatic music tagging with time series models","author":"E Coviello","year":"2014","unstructured":"Coviello, E. (2014). Automatic music tagging with time series models. UC San Diego."},{"key":"10381_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-3959-1","volume-title":"Signal analysis of Hindustani classical music: Springer.Datta, A. K., Solanki, S. S., Sengupta, R., Chakraborty, S., Mahto, K., & Patranabis, A. (2017)","author":"AK Datta","year":"2017","unstructured":"Datta AK, Solanki SS, Sengupta R, Chakraborty S, Mahto K, Patranabis A (2017) Signal analysis of Hindustani classical music: Springer.Datta, A. K., Solanki, S. S., Sengupta, R., Chakraborty, S., Mahto, K., & Patranabis, A. (2017). Springer Singapore, Signal Analysis of Hindustani Classical Music"},{"key":"10381_CR10","unstructured":"De Leon, F., & Martinez, K. (2012). Enhancing timbre model using MFCC and its time derivatives for music similarity estimation. Paper presented at the 2012 Proceedings of the 20th European Signal Processing Conference (EUSIPCO)."},{"key":"10381_CR11","unstructured":"Delbouys, R., Hennequin, R., Piccoli, F., Royo-Letelier, J., & Moussallam, M. (2018). Music mood detection based on audio and lyrics with deep neural Net. arXiv preprint arXiv:1809.07276."},{"key":"10381_CR12","volume-title":"End-to-end learning for music audio. Paper presented at the IEEE International Conference on Acoustics","author":"S Dieleman","year":"2014","unstructured":"Dieleman S, Schrauwen B (2014) End-to-end learning for music audio. Paper presented at the IEEE International Conference on Acoustics. Speech and Signal Processing, Florence, Italy"},{"issue":"4","key":"10381_CR13","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1007\/s10462-012-9362-y","volume":"42","author":"SF Duan","year":"2014","unstructured":"Duan SF, Zhang JL, Roe P, Towsey M (2014) A survey of tagging techniques for music, speech and environmental sound. Artif Intell Rev 42(4):637\u2013661 Retrieved from <Go to ISI>:\/\/WOS:000345089400005","journal-title":"Artif Intell Rev"},{"key":"10381_CR14","volume-title":"Lyrics-based analysis and classification of music. Paper presented at the International Conference on Computational Linguistics","author":"M Fell","year":"2014","unstructured":"Fell M, Sporleder C (2014) Lyrics-based analysis and classification of music. Paper presented at the International Conference on Computational Linguistics. Dublin, Ireland"},{"key":"10381_CR15","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/978-3-319-30569-1_23","volume-title":"Complex networks VII: Proceedings of the 7th Workshop on Complex Networks CompleNet 2016","author":"D Gossi","year":"2016","unstructured":"Gossi D, Gunes MH (2016) Lyric-based music recommendation. In: Cherifi H, Gon\u00e7alves B, Menezes R, Sinatra R (eds) Complex networks VII: Proceedings of the 7th Workshop on Complex Networks CompleNet 2016. Springer International Publishing, Cham, pp 301\u2013310"},{"key":"10381_CR16","unstructured":"Gouyon, F., Sturm, B., Oliveira, J., Hespanhol, N. & Langlois, T. (2014) On evaluation validity in music autotagging, arXiv preprint arXiv:1410.0001."},{"issue":"8","key":"10381_CR17","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Computation 9(8):1735\u20131780","journal-title":"Neural Computation"},{"issue":"C","key":"10381_CR18","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/j.artint.2014.11.004","volume":"219","author":"B Horsburgh","year":"2015","unstructured":"Horsburgh B, Craw S, Massie S (2015) Learning pseudo-tags to augment sparse tagging in hybrid music recommender systems. Artificial Intelligence Review 219(C):25\u201339","journal-title":"Artificial Intelligence Review"},{"issue":"2","key":"10381_CR19","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1002\/asi.23649","volume":"68","author":"X Hu","year":"2017","unstructured":"Hu X, Choi K, Downie JS (2017) A framework for evaluating multimodal music mood classification. Journal of the Association for Information Science and Technology 68(2):273\u2013285 Retrieved from https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1002\/asi.23649","journal-title":"Journal of the Association for Information Science and Technology"},{"key":"10381_CR20","doi-asserted-by":"crossref","unstructured":"Huang, Y., Wang, W., Wang, L., & Tan, T. (2013). Multi-task deep neural network for multi-label learning. Paper presented at the 2013 IEEE International Conference on Image Processing.","DOI":"10.1109\/ICIP.2013.6738596"},{"issue":"11","key":"10381_CR21","doi-asserted-by":"publisher","first-page":"1923","DOI":"10.1109\/TMM.2015.2476658","volume":"17","author":"Y Huang","year":"2015","unstructured":"Huang Y, Wang W, Wang L (2015) Unconstrained multimodal multi-label learning. Ieee Trans Multimed 17(11):1923\u20131935","journal-title":"Ieee Trans Multimed"},{"key":"10381_CR22","unstructured":"Jeong, I.-Y., & Lim, H. (2018). Audio tagging system using densely connected convolutional networks. Paper presented at the Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE2018)."},{"key":"10381_CR23","volume-title":"Paper presented at the 7th ACM conference on Recommender systems","author":"M Kaminskas","year":"2013","unstructured":"Kaminskas M, Ricci F, Schedl M (2013) Location-aware music recommendation using auto-tagging and hybrid matching. In: Paper presented at the 7th ACM conference on Recommender systems. China, Hong Kong"},{"key":"10381_CR24","doi-asserted-by":"crossref","unstructured":"Kim, Y. (2014). Convolutional neural networks for sentence classification. arXiv preprint arXiv:1408.5882.","DOI":"10.3115\/v1\/D14-1181"},{"key":"10381_CR25","doi-asserted-by":"crossref","unstructured":"Kim, T., Lee, J., & Nam, J. (2018). Sample-level cnn architectures for music auto-tagging using raw waveforms. Paper presented at the 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP).","DOI":"10.1109\/ICASSP.2018.8462046"},{"issue":"1","key":"10381_CR26","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1145\/2542205.2542206","volume":"10","author":"P Knees","year":"2013","unstructured":"Knees P, Schedl M (2013) A survey of music similarity and recommendation from music context data. Acm Transactions on Multimedia Computing Communications and Applications 10(1):21 Retrieved from <Go to ISI>:\/\/WOS:000329025400002","journal-title":"Acm Transactions on Multimedia Computing Communications and Applications"},{"key":"10381_CR27","unstructured":"Labrosa. (2011). Last.Fm dataset. Retrieved from: http:\/\/labrosa.ee.columbia.edu\/millionsong\/lastfm"},{"key":"10381_CR28","volume-title":"Recurrent convolutional neural networks for text classification. Paper presented at the Association for the Advancement of Artificial Intelligence","author":"S Lai","year":"2015","unstructured":"Lai S, Xu L, Liu K, Zhao J (2015) Recurrent convolutional neural networks for text classification. Paper presented at the Association for the Advancement of Artificial Intelligence. Austin Texas, USA"},{"issue":"8","key":"10381_CR29","doi-asserted-by":"publisher","first-page":"1208","DOI":"10.1109\/LSP.2017.2713830","volume":"24","author":"J Lee","year":"2017","unstructured":"Lee J, Nam J (2017) Multi-level and multi-scale feature aggregation using pretrained convolutional neural networks for music auto-tagging. IEEE signal processing letters 24(8):1208\u20131212","journal-title":"IEEE signal processing letters"},{"key":"10381_CR30","unstructured":"Lee, J., Park, J., Kim, K. L., & Nam, J. (2017). Sample-level deep convolutional neural networks for music auto-tagging using raw waveforms. arXiv preprint arXiv:.01789."},{"issue":"1","key":"10381_CR31","doi-asserted-by":"publisher","first-page":"150","DOI":"10.3390\/app8010150","volume":"8","author":"J Lee","year":"2018","unstructured":"Lee J, Park J, Kim KL, Nam J (2018) SampleCNN: end-to-end deep convolutional neural networks using very small filters for music classification. Applied Sciences 8(1):150","journal-title":"Applied Sciences"},{"key":"10381_CR32","unstructured":"Liu, K., Li, Y., Xu, N., & Natarajan, P. (2018). Learn to combine modalities in multimodal deep learning. arXiv preprint arXiv:.11730."},{"key":"10381_CR33","doi-asserted-by":"crossref","unstructured":"Malheiro R, Panda R, Gomes P, Paiva RP (2018) Emotionally-relevant features for classification and regression of music lyrics. IEEE Trans Affective Comput (2):240\u2013254","DOI":"10.1109\/TAFFC.2016.2598569"},{"key":"10381_CR34","volume-title":"Three current issues in music autotagging, paper presented at the Proceedings of the 12th International Society for Music Information Retrieval Conference, ISMIR 2011","author":"G Marques","year":"2011","unstructured":"Marques G, Domingues M, Langlois T, Gouyon F (2011) Three current issues in music autotagging, paper presented at the Proceedings of the 12th International Society for Music Information Retrieval Conference, ISMIR 2011. Miami, Florida"},{"key":"10381_CR35","unstructured":"Nam, J., Herrera, J., & Lee, K. (2015). A deep bag-of-features model for music auto-tagging. arXiv preprint arXiv:.04999."},{"key":"10381_CR36","doi-asserted-by":"crossref","unstructured":"Nayyar, R. K., Nair, S., Patil, O., Pawar, R., & Lolage, A. (2017). Content-based auto-tagging of audios using deep learning. Paper presented at the 2017 International Conference on Big Data, IoT and Data Science (BID).","DOI":"10.1109\/BID.2017.8336569"},{"key":"10381_CR37","volume-title":"Lyrics mining for music meta-data estimation. Paper presented at the International Conference on Artificial Intelligence Applications and Innovations","author":"H O\u011ful","year":"2016","unstructured":"O\u011ful H, K\u0131rmac\u0131 B (2016) Lyrics mining for music meta-data estimation. Paper presented at the International Conference on Artificial Intelligence Applications and Innovations. Thessaloniki, Greece"},{"key":"10381_CR38","volume-title":"Paper presented at the 2012 IEEE International Conference on Acoustics","author":"Y Panagakis","year":"2012","unstructured":"Panagakis Y, Kotropoulos C (2012) Automatic music tagging by low-rank representation. In: Paper presented at the 2012 IEEE International Conference on Acoustics. Signal Processing (ICASSP), Speech and"},{"key":"10381_CR39","unstructured":"PwC. (2017). Perspectives from the Global Entertainment and Media Outlook 2017\u20132021. Retrieved from https:\/\/www.pwc.com\/gx\/en\/entertainment-media\/pdf\/outlook-2017-curtain-up.pdf"},{"key":"10381_CR40","unstructured":"Ruder, S. (2017). An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:.05098."},{"issue":"1","key":"10381_CR41","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1007\/s11042-018-5632-2","volume":"78","author":"X Shao","year":"2019","unstructured":"Shao X, Cheng Z, Kankanhalli MS (2019) Music auto-tagging based on the unified latent semantic modeling. Multimedia Tools Applications 78(1):161\u2013176","journal-title":"Multimedia Tools Applications"},{"key":"10381_CR42","doi-asserted-by":"publisher","first-page":"107020","DOI":"10.1016\/j.apacoust.2019.107020","volume":"158","author":"G Sharma","year":"2020","unstructured":"Sharma G, Umapathy K, Krishnan S (2020) Trends in audio signal feature extraction methods. Applied Acoustics 158:107020","journal-title":"Applied Acoustics"},{"key":"10381_CR43","doi-asserted-by":"crossref","unstructured":"Shen J, Meng W, Yan S, Pang H, Hua X (2010) Effective music tagging through advanced statistical modeling. Paper presented at the Proceedings of the 33rd international ACM SIGIR conference on Research and development in information retrieval.","DOI":"10.1145\/1835449.1835555"},{"issue":"6","key":"10381_CR44","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1007\/s00530-019-00613-z","volume":"25","author":"J Shen","year":"2019","unstructured":"Shen J, Tao M, Qu Q, Tao D, Rui Y (2019) Toward efficient indexing structure for scalable content-based music retrieval. Multimedia Systems 25(6):639\u2013653","journal-title":"Multimedia Systems"},{"key":"10381_CR45","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.neucom.2018.02.076","volume":"292","author":"G Song","year":"2018","unstructured":"Song G, Wang Z, Han F, Ding S, Iqbal MA (2018) Music auto-tagging using deep recurrent neural networks. Neurocomputing 292:104\u2013110 Retrieved from <Go to ISI>:\/\/WOS:000429321400009","journal-title":"Neurocomputing"},{"issue":"2","key":"10381_CR46","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1080\/09298215.2014.894533","volume":"43","author":"B Sturm","year":"2014","unstructured":"Sturm B (2014) The state of the art ten years after a state of the art: future research in music information retrieval. Journal of New Music Research 43(2):147\u2013172","journal-title":"Journal of New Music Research"},{"issue":"1","key":"10381_CR47","first-page":"13","volume":"2","author":"B Sung","year":"2008","unstructured":"Sung B, Chung M, Ko I (2008) A feature based music content recognition method using simplified MFCC. International Journal Principles Applications of Information Science and Technology 2(1):13\u201323","journal-title":"International Journal Principles Applications of Information Science and Technology"},{"key":"10381_CR48","first-page":"922","volume":"5","author":"R Thiruvengatanadhan","year":"2018","unstructured":"Thiruvengatanadhan R (2018) Music classification using MFCC and SVM. International Research Journal of Engineering and Technology 5:922\u2013924","journal-title":"International Research Journal of Engineering and Technology"},{"key":"10381_CR49","unstructured":"Tsaptsinos A (2017) Lyrics-based music genre classification using a hierarchical attention network. arXiv preprint arXiv:1707.04678."},{"issue":"2","key":"10381_CR50","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1109\/TASL.2007.913750","volume":"16","author":"D Turnbull","year":"2008","unstructured":"Turnbull D, Barrington L, Torres D, Lanckriet G (2008a) Semantic annotation and retrieval of music and sound effects. IEEE Transactions on Audio, Speech, Language Processing 16(2):467\u2013476","journal-title":"IEEE Transactions on Audio, Speech, Language Processing"},{"key":"10381_CR51","unstructured":"Turnbull D, Barrington L, Lanckriet G (2008b) Five approaches to collecting tags for music. Paper presented at the Proceedings of the 9th International Conference on Music Information Retrieval, ISMIR, Philadelphia."},{"key":"10381_CR52","doi-asserted-by":"crossref","unstructured":"Wang Q, Su F, Wang Y (2019) A hierarchical attentive deep neural network model for semantic music annotation integrating multiple music representations. Paper presented at the Proceedings of the 2019 on International Conference on Multimedia Retrieval.","DOI":"10.1145\/3323873.3325031"},{"key":"10381_CR53","unstructured":"Wei S, Xu K, Wang D, Liao F, Wang H, Kong Q (2018) Sample mixed-based data augmentation for domestic audio tagging. arXiv preprint arXiv:.03883."},{"key":"10381_CR54","unstructured":"Won M, Chun S, Serra X (2019a) Toward interpretable music tagging with self-attention. arXiv preprint arXiv:.04972."},{"key":"10381_CR55","unstructured":"Won M, Chun S, Nieto O, Serra X (2019b) Automatic music tagging with Harmonic CNN. Paper presented at the 20th International society for music information retrieval Deft, Netherlands."},{"key":"10381_CR56","volume-title":"Music tagging with regularized logistic regression","author":"B Xie","year":"2011","unstructured":"Xie B, Bian W, Tao D, Chordia P (2011) Music tagging with regularized logistic regression. Paper presented at the ISMIR."},{"key":"10381_CR57","doi-asserted-by":"crossref","unstructured":"Xu Y, Kong Q, Wang W, Plumbley MD (2018). Large-scale weakly supervised audio classification using gated convolutional neural network. Paper presented at the 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP).","DOI":"10.1109\/ICASSP.2018.8461975"},{"key":"10381_CR58","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/j.patcog.2018.03.018","volume":"80","author":"N Zhuang","year":"2018","unstructured":"Zhuang N, Yan Y, Chen S, Wang H, Shen C (2018) Multi-label learning based deep transfer neural network for facial attribute classification. Pattern Recognition 80:225\u2013240","journal-title":"Pattern Recognition"},{"key":"10381_CR59","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/j.neucom.2015.10.134","volume":"204","author":"Y Zuo","year":"2016","unstructured":"Zuo Y, Zeng J, Gong M, Jiao L (2016) Tag-aware recommender systems based on deep neural networks. Neurocomputing 204:51\u201360","journal-title":"Neurocomputing"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10381-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-10381-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10381-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T06:37:21Z","timestamp":1619505441000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-10381-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,4]]},"references-count":59,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2021,4]]}},"alternative-id":["10381"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-10381-y","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,2,4]]},"assertion":[{"value":"11 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 September 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 December 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 February 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}