{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:31:11Z","timestamp":1772724671750,"version":"3.50.1"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,1,8]],"date-time":"2022-01-08T00:00:00Z","timestamp":1641600000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,8]],"date-time":"2022-01-08T00:00:00Z","timestamp":1641600000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Key R and D Program of China","award":["2016YFB1000101"],"award-info":[{"award-number":["2016YFB1000101"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11042-021-11610-8","type":"journal-article","created":{"date-parts":[[2022,1,8]],"date-time":"2022-01-08T19:02:38Z","timestamp":1641668558000},"page":"5089-5112","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Multi-representation knowledge distillation for audio classification"],"prefix":"10.1007","volume":"81","author":[{"given":"Liang","family":"Gao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5997-5169","authenticated-orcid":false,"given":"Kele","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huaimin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxing","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,1,8]]},"reference":[{"key":"11610_CR1","unstructured":"Anil R, Pereyra G, Passos A, Ormandi R, Dahl GE, Hinton GE (2018) Large scale distributed neural network training through online distillation"},{"key":"11610_CR2","unstructured":"Batra T, Parikh D (2017) Cooperative learning with visual attributes. Computer vision and pattern recognition"},{"key":"11610_CR3","doi-asserted-by":"crossref","unstructured":"Bucilua C, Caruana R, Niculescu-Mizil, A (2006) Model compression. In: Proceedings of the 12th ACM SIGKDD International conference on knowledge discovery and data mining. ACM, pp 535\u2013541","DOI":"10.1145\/1150402.1150464"},{"issue":"3","key":"11610_CR4","doi-asserted-by":"publisher","first-page":"6069","DOI":"10.1016\/j.eswa.2008.06.126","volume":"36","author":"P Dhanalakshmi","year":"2009","unstructured":"Dhanalakshmi P, Palanivel S, Ramalingam V (2009) Classification of audio signals using svm and rbfnn. Expert Sys Appl 36(3):6069\u20136075","journal-title":"Expert Sys Appl"},{"issue":"2","key":"11610_CR5","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1023\/A:1007607513941","volume":"40","author":"TG Dietterich","year":"2000","unstructured":"Dietterich TG (2000) An experimental comparison of three methods for constructing ensembles of decision trees: Bagging, boosting, and randomization. Machine Learning 40(2):139\u2013157","journal-title":"Machine Learning"},{"key":"11610_CR6","unstructured":"Fonseca E, Plakal M, Font F, Ellis DPW, Favory X, Pons J, Serra X (2018) General-purpose tagging of freesound audio with audioset labels: Task description, dataset, and baseline. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE2018), pp 69\u201373"},{"key":"11610_CR7","unstructured":"Fonseca E, Plakal M, Font F, Ellis DPW, Favory X, Pons J, Serra X (2018) General-purpose tagging of freesound audio with audioset labels: Task description, dataset, and baseline. Proceedings of the detection and classification of acoustic scenes and events workshop, pp 69\u201373"},{"key":"11610_CR8","unstructured":"Fonseca E, Puig JP, Favory X, Corbera FF, Bogdanov D, Ferraro A, Oramas S, Porter A, Serra X (2017) Freesound datasets: a platform for the creation of open audio datasets. In: Hu X, Cunningham SJ, Turnbull D, Duan Z (eds.). Proceedings of the 18th ISMIR Conference Oct 23-27; Suzhou, China.[Canada]: International Society for Music Information Retrieval; 2017. p. 486-93. International Society for Music Information Retrieval, 2017"},{"key":"11610_CR9","unstructured":"Fraile R, Blanco-Martin E, Gutierrez-Arriola JM, Saenz-Lechon N, Osma-Ruiz VJ (2018) Classification of acoustic scenes based on modulation spectra and position-pitch maps. Technical report, DCASE2018 Challenge"},{"issue":"4","key":"11610_CR10","doi-asserted-by":"publisher","first-page":"2346","DOI":"10.1016\/j.jfranklin.2018.12.007","volume":"356","author":"RC Guido","year":"2019","unstructured":"Guido RC (2019) Enhancing teager energy operator based on a novel and appealing concept: Signal mass. Journal of the Franklin Institute 356(4):2346\u20132352","journal-title":"Journal of the Franklin Institute"},{"issue":"1","key":"11610_CR11","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1109\/MSP.2018.2874549","volume":"36","author":"RC Guido","year":"2019","unstructured":"Guido RC (2019) Paraconsistent feature engineering [lecture notes]. IEEE Signal Processing Magazine 36(1):154\u2013158","journal-title":"IEEE Signal Processing Magazine"},{"key":"11610_CR12","unstructured":"Hao W, Zhao L, Zhang Q, Zhao HY, Wang JH (2018) DCASE 2018 task 1a: Acoustic scene classification by bi-LSTM-CNN-net multichannel fusion. Technical report, DCASE2018 Challenge"},{"key":"11610_CR13","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"11610_CR14","first-page":"9","volume":"1050","author":"G Hinton","year":"2015","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. Stat 1050:9","journal-title":"Stat"},{"issue":"4","key":"11610_CR15","doi-asserted-by":"publisher","first-page":"1038","DOI":"10.1109\/TCSVT.2018.2823360","volume":"29","author":"Y Huang","year":"2019","unstructured":"Huang Y, Cao X, Wang Q, Zhang B, Zhen X, Li X (2019) Long-short-term features for dynamic scene classification. IEEE Trans Circuits Syst Video Technol 29(4):1038\u20131047","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"11610_CR16","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger, KQ (2017) Densely connected convolutional networks. In: IEEE Conf Comput Vision Pattern Recogn, vol 1, p 3","DOI":"10.1109\/CVPR.2017.243"},{"issue":"12","key":"11610_CR17","doi-asserted-by":"publisher","first-page":"2637","DOI":"10.1109\/TMM.2017.2703939","volume":"19","author":"L Jing","year":"2017","unstructured":"Jing L, Liu B, Choi J, Janin A, Bernd J, Mahoney MW, Friedland G (2017) Dcar: A discriminative and compact audio representation for audio processing. IEEE Trans Multimed 19(12):2637\u20132650","journal-title":"IEEE Trans Multimed"},{"key":"11610_CR18","unstructured":"Jung J, Heo H, Shim H, Yu H (2018) DNN based multi-level features ensemble for acoustic scene classification. Technical report, DCASE2018 Challenge"},{"key":"11610_CR19","unstructured":"Jun W, Shengchen L, (2018) Self-attention mechanism based system for dcase2018 challenge task1 and task4. Technical report, DCASE2018 Challenge"},{"key":"11610_CR20","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp 1097\u20131105"},{"key":"11610_CR21","doi-asserted-by":"crossref","unstructured":"Kulkarni A (2009) Audio signal processing. US Patent 7,490,044","DOI":"10.1121\/1.3182994"},{"issue":"2","key":"11610_CR22","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1023\/A:1022859003006","volume":"51","author":"LI Kuncheva","year":"2003","unstructured":"Kuncheva LI, Whitaker CJ (2003) Measures of diversity in classifier ensembles and their relationship with the ensemble accuracy. Machine Learning 51(2):181\u2013207","journal-title":"Machine Learning"},{"key":"11610_CR23","unstructured":"Lan X, Zhu X, Gong S (2018) Knowledge distillation by on-the-fly native ensemble. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems (NIPS). Curran Associates Inc, pp 7528\u20137538"},{"key":"11610_CR24","unstructured":"Lee J, Park J, Kim KL, Nam J (2017) Sample-level deep convolutional neural networks for music auto-tagging using raw waveforms. arXiv: Sound"},{"issue":"5","key":"11610_CR25","doi-asserted-by":"publisher","first-page":"1169","DOI":"10.1109\/TMM.2018.2875512","volume":"21","author":"C Ma","year":"2019","unstructured":"Ma C, Guo Y, Yang J, An W (2019) Learning multi-view representation with lstm for 3-d shape recognition and retrieval. IEEE Trans Multimed 21(5):1169\u20131182","journal-title":"IEEE Trans Multimed"},{"key":"11610_CR26","doi-asserted-by":"crossref","unstructured":"Ma L, Smith DJ, Milner BP (2003) Context awareness using environmental noise classification. In: European conference on speech communication and technology, pp 2237\u20132240","DOI":"10.1007\/978-3-540-45227-0_36"},{"key":"11610_CR27","unstructured":"Mesaros THA, Virtanen T (2018) A multi-device dataset for urban acoustic scene classification. Proceedings of the detection and classification of acoustic scenes and events workshop, pp 9\u201313"},{"key":"11610_CR28","unstructured":"Mesaros A, Heittola T, Virtanen T (2018) A multi-device dataset for urban acoustic scene classification. In: Proceedings of the detection and classification of acoustic scenes and events 2018 workshop (DCASE2018), pp 9\u201313"},{"key":"11610_CR29","doi-asserted-by":"crossref","unstructured":"Nguyen T, Pernkopf F (2018) Acoustic scene classification using a convolutional neural network ensemble and nearest neighbor filters. Technical report, DCASE2018 Challenge","DOI":"10.1109\/ICMLA.2019.00151"},{"key":"11610_CR30","doi-asserted-by":"crossref","unstructured":"Piczak, KJ (2015) Environmental sound classification with convolutional neural networks. In: 2015 IEEE 25th International Workshop on Machine Learning for Signal Processing (MLSP). IEEE, pp 1\u20136","DOI":"10.1109\/MLSP.2015.7324337"},{"issue":"4945","key":"11610_CR31","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1126\/science.247.4945.978","volume":"247","author":"T Poggio","year":"1990","unstructured":"Poggio T, Girosi F (1990) Regularization algorithms for learning that are equivalent to multilayer networks. Science 247(4945):978\u2013982","journal-title":"Science"},{"issue":"3","key":"11610_CR32","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1109\/TMM.2016.2618218","volume":"19","author":"J Ren","year":"2017","unstructured":"Ren J, Jiang X, Yuan J, Magnenat-Thalmann N (2017) Sound-event classification using robust texture features for robot hearing. IEEE Trans Multimed 19(3):447\u2013458","journal-title":"IEEE Trans Multimed"},{"issue":"1\u20132","key":"11610_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10462-009-9124-7","volume":"33","author":"L Rokach","year":"2010","unstructured":"Rokach L (2010) Ensemble-based classifiers. Artif Intell Rev 33(1\u20132):1\u201339","journal-title":"Artif Intell Rev"},{"key":"11610_CR34","unstructured":"Sercu T, Goel V (2016) Dense prediction on sequences with time-dilated convolutions for speech recognition. arXiv:1611.09288"},{"key":"11610_CR35","unstructured":"Shan S, Ren Y (2018) Automatic audio tagging with 1d and 2d convolutional neural networks. Technical report, DCASE2018 Challenge"},{"key":"11610_CR36","unstructured":"Simonyan K, Zisserman, A (2015) Very deep convolutional networks for large-scale image recognition. In: International conference on learning representations"},{"key":"11610_CR37","doi-asserted-by":"crossref","unstructured":"Sun S, Chen W, Bian J, Liu X, Liu T-Y (2017) Ensemble-compression: A new method for parallel training of deep neural networks. In: Joint European conference on machine learning and knowledge discovery in databases. Springer, pp 187\u2013202","DOI":"10.1007\/978-3-319-71249-9_12"},{"issue":"2","key":"11610_CR38","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1007\/s00521-018-3655-2","volume":"32","author":"FJ Veredas","year":"2020","unstructured":"Veredas FJ, Urda D, Francisco LS, Aledo JC (2020) Combining feature engineering and feature selection to improve the prediction of methionine oxidation sites in proteins. Neural Comput Appl 32(2):323\u2013334","journal-title":"Neural Comput Appl"},{"key":"11610_CR39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63450-0","volume-title":"Computational analysis of sound scenes and events","author":"T Virtanen","year":"2018","unstructured":"Virtanen T, Plumbley MD, Ellis D (2018) Computational analysis of sound scenes and events. Springer, Heidelberg"},{"issue":"2","key":"11610_CR40","doi-asserted-by":"publisher","first-page":"911","DOI":"10.1109\/TGRS.2018.2862899","volume":"57","author":"Qi Wang","year":"2019","unstructured":"Wang Qi, He Xiang, Li Xuelong (2019) Locality and structure regularized low rank representation for hyperspectral image classification. IEEE Trans Geosci Remote Sensing 57(2):911\u2013923","journal-title":"IEEE Trans Geosci Remote Sensing"},{"key":"11610_CR41","unstructured":"Wei Q, Liu Y, Ruan X (2018) A report on audio tagging with deeper cnn, 1d-convnet and 2d-convnet. Technical report, DCASE2018 Challenge"},{"key":"11610_CR42","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1121\/1.5111059","volume":"145","author":"K Xu","year":"2019","unstructured":"Xu K, Zhu B, Kong Q, Mi H, Ding B, Wang D, Wang H (2019) General audio tagging with ensembling convolutional neural networks and statistical features. J Acoust Soc Am 145:521\u2013527","journal-title":"J Acoust Soc Am"},{"key":"11610_CR43","doi-asserted-by":"crossref","unstructured":"Xu Y, Kong Q, Wang W, Plumbley MD (2018) Large-scale weakly supervised audio classification using gated convolutional neural network. In: International conference on acoustics, speech and signal processing. IEEE, pp 121\u2013125","DOI":"10.1109\/ICASSP.2018.8461975"},{"key":"11610_CR44","unstructured":"Xu Z, Smit P, Kurimo M (2018) The aalto system based on fine-tuned audioset features for dcase2018 task2 \u2014- general purpose audio tagging. Technical report, DCASE2018 Challenge"},{"key":"11610_CR45","unstructured":"Yang JH, Kim NK, Kim HK (2018) Se-resnet with gan-based data augmentation applied to acoustic scene classification. Technical report, DCASE2018 Challenge"},{"key":"11610_CR46","doi-asserted-by":"crossref","unstructured":"Yin Y, Shah RR, Zimmermann, R (2018) Learning and fusing multimodal deep features for acoustic scene categorization. In: ACM multimedia conference on multimedia conference. ACM, pp 1892\u20131900","DOI":"10.1145\/3240508.3240631"},{"issue":"6","key":"11610_CR47","doi-asserted-by":"publisher","first-page":"1576","DOI":"10.1109\/TMM.2017.2766843","volume":"20","author":"S Zhang","year":"2018","unstructured":"Zhang S, Zhang S, Huang T, Gao W (2018) Speech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching. IEEE Trans Multimed 20(6):1576\u20131590","journal-title":"IEEE Trans Multimed"},{"issue":"4","key":"11610_CR48","doi-asserted-by":"publisher","first-page":"903","DOI":"10.1109\/TMM.2017.2759500","volume":"20","author":"C Zhang","year":"2018","unstructured":"Zhang C, Cheng J, Tian Q (2018) Multiview label sharing for visual representations and classifications. IEEE Trans Multimed 20(4):903\u2013913","journal-title":"IEEE Trans Multimed"},{"key":"11610_CR49","unstructured":"Zhang H, Cisse M, Dauphin YN, Lopez-Paz D (2018) mixup: Beyond empirical risk minimization. In: International conference on learning representations"},{"key":"11610_CR50","doi-asserted-by":"crossref","unstructured":"Zhang Y, Xiang T, Hospedales TM, Lu H (2018) Deep mutual learning. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 4320\u20134328","DOI":"10.1109\/CVPR.2018.00454"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11610-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11610-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11610-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T06:29:54Z","timestamp":1645511394000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11610-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,8]]},"references-count":50,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["11610"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11610-8","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,8]]},"assertion":[{"value":"14 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 September 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declaration"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}