{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:27:55Z","timestamp":1740122875262,"version":"3.37.3"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"20","license":[{"start":{"date-parts":[[2019,1,9]],"date-time":"2019-01-09T00:00:00Z","timestamp":1546992000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Key R&D Program of China","award":["2018YFB1003203"],"award-info":[{"award-number":["2018YFB1003203"]}]},{"DOI":"10.13039\/501100004731","name":"Natural Science Foundation of Zhejiang Province","doi-asserted-by":"publisher","award":["LY18F010008"],"award-info":[{"award-number":["LY18F010008"]}],"id":[{"id":"10.13039\/501100004731","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61672528","61773392"],"award-info":[{"award-number":["61672528","61773392"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100009193","name":"Marsden Fund","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100009193","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1007\/s11042-018-7142-7","type":"journal-article","created":{"date-parts":[[2019,1,9]],"date-time":"2019-01-09T02:03:40Z","timestamp":1546999420000},"page":"29509-29527","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Relational recurrent neural networks for polyphonic sound event detection"],"prefix":"10.1007","volume":"78","author":[{"given":"Junbo","family":"Ma","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruili","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wanting","family":"Ji","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"En","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianping","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,1,9]]},"reference":[{"key":"7142_CR1","unstructured":"Abadi M, Barham P, Chen J, Chen Z, Davis A, Dean J, Devin M et al (2016) \"Tensorflow: a system for large-scale machine learning.\" In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 16. 265-283"},{"key":"7142_CR2","unstructured":"Sharath A, Virtanen T (2017) \"A report on sound event detection with different binaural features.\" arXiv preprint arXiv:1710.02997"},{"key":"7142_CR3","unstructured":"Adavanne S, G Parascandolo, P Pertil\u00e4, T Heittola, T Virtanen (2016) \u201cSound event detection in multichannel audio using spatial and harmonic features,\u201d IEEE Detection and Classification of Acoustic Scenes and Events workshop"},{"key":"7142_CR4","unstructured":"Adavanne S, G Parascandolo, P Pertil\u00e4, T Heittola, T Virtanen (2017a) \"Sound event detection in multichannel audio using spatial and harmonic features.\" arXiv preprint arXiv:1706.02293"},{"key":"7142_CR5","doi-asserted-by":"crossref","unstructured":"Adavanne S, P Pertil\u00e4, T Virtanen (2017b) \"Sound event detection using spatial features and convolutional recurrent neural network.\" In Acoustics, Speech and Signal Processing (ICASSP), 2017 IEEE International Conference on, pp. 771-775. IEEE","DOI":"10.1109\/ICASSP.2017.7952260"},{"key":"7142_CR6","doi-asserted-by":"crossref","unstructured":"Cak\u0131r E, T Virtanen (2018) \"End-to-End polyphonic sound event detection using convolutional recurrent neural networks with learned time-frequency representation input.\". In Neural Networks (IJCNN), 2018 International Joint Conference on, pp. 1-7. IEEE","DOI":"10.1109\/IJCNN.2018.8489470"},{"key":"7142_CR7","doi-asserted-by":"crossref","unstructured":"Cakir E, T Heittola, H Huttunen, T Virtanen (2015) \"Polyphonic sound event detection using multi label deep neural networks.\" In Neural Networks (IJCNN), 2015 International Joint Conference on, pp. 1-7. IEEE","DOI":"10.1109\/IJCNN.2015.7280624"},{"key":"7142_CR8","unstructured":"Chen Y, Y Zhang, Z Duan (2017) \"DCASE2017: sound event detection using convolutional neural networks.\" DCASE2017 Challenge, Tech. Rep"},{"key":"7142_CR9","doi-asserted-by":"crossref","unstructured":"Dang A, TH Vu, J-C Wang (2017a) \"A survey of deep learning for polyphonic sound event detection.\" In Orange Technologies (ICOT), 2017 International Conference on, pp. 75-78. IEEE","DOI":"10.1109\/ICOT.2017.8336092"},{"key":"7142_CR10","unstructured":"Dang A, TH Vu, J-C Wang (2017b) \"Deep learning for DCASE2017 challenge.\" Detection and Classification of Acoustic Scenes and Events (DCASE 2017) Proceedings 2017"},{"issue":"1","key":"7142_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1687-4722-2013-1","volume":"2013","author":"T Heittola","year":"2013","unstructured":"Heittola T, Mesaros A, Eronen A, Virtanen T (2013) \"Context-dependent sound event detection\" EURASIP J Audio, Speech, Music Proc 2013(1):1","journal-title":"EURASIP J Audio, Speech, Music Proc"},{"key":"7142_CR12","unstructured":"Ioffe S, C Szegedy (2015) \"Batch normalization: Accelerating deep network training by reducing internal covariate shift.\" arXiv preprint arXiv:1502.03167"},{"key":"7142_CR13","unstructured":"Jeong Il-Y, S Lee, Y Han, and K Lee (2017) \"Audio event detection using multiple-input convolutional neural network.\" Detection and Classification of Acoustic Scenes and Events (DCASE)"},{"key":"7142_CR14","doi-asserted-by":"crossref","unstructured":"Ji W, R Wang, J Ma (2018) \"Dictionary-based active learning method for sound event classification.\" Multimedia tools and applications","DOI":"10.1007\/s11042-018-6380-z"},{"key":"7142_CR15","unstructured":"Kingma DP, J Ba (2014) \"Adam: A method for stochastic optimization.\" arXiv preprint arXiv:1412.6980"},{"key":"7142_CR16","unstructured":"Kroos C, M Plumbley (2017) \"Neuroevolution for sound event detection in real life audio: A pilot study.\" Detection and Classification of Acoustic Scenes and Events (DCASE 2017) Proceedings 2017"},{"key":"7142_CR17","unstructured":"Lai Y-H, C-H Wang, S-Y Hou, B-Y Chen, Y Tsao, Y-W Liu (2016) \"DCASE report for task 3: Sound event detection in real life audio.\" IEEE AASP Challenge: Detection and Classification of Acoustic Scenes and Events"},{"issue":"2","key":"7142_CR18","doi-asserted-by":"publisher","first-page":"790","DOI":"10.1109\/TII.2017.2739340","volume":"14","author":"P Li","year":"2018","unstructured":"Li P, Chen Z, Yang LT, Zhang Q, Jamal Deen M (2018) \"Deep convolutional computation model for feature learning on big data in Internet of Things.\" IEEE Trans Ind Inform 14(2):790\u2013798","journal-title":"IEEE Trans Ind Inform"},{"key":"7142_CR19","unstructured":"Srivastava, N, Hinton, G, Krizhevsky, A, Sutskever, I & Salakhutdinov, R (2014) \"Dropout: a simple way to prevent neural networks from overfitting.\" J Machine Learning Res 15, pp. 1929\u20131958"},{"key":"7142_CR20","doi-asserted-by":"crossref","unstructured":"Mahdavinejad, M Saeid, M Rezvan, M Barekatain, P Adibi, P Barnaghi, and AP Sheth (2017) \"Machine learning for Internet of Things data analysis: A survey.\" Digital Communications and Networks","DOI":"10.1016\/j.dcan.2017.10.002"},{"key":"7142_CR21","unstructured":"Mesaros A, T Heittola, A Eronen, T Virtanen (2010) \"Acoustic event detection in real life recordings.\" In Signal Processing Conference, 2010 18th European, pp. 1267-1271. IEEE"},{"key":"7142_CR22","doi-asserted-by":"crossref","unstructured":"Mesaros A, T Heittola, O Dikmen, T Virtanen (2015) \"Sound event detection in real life recordings using coupled matrix factorization of spectral representations and class activity annotations.\" In Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on, pp. 151-155. IEEE","DOI":"10.1109\/ICASSP.2015.7177950"},{"key":"7142_CR23","doi-asserted-by":"crossref","unstructured":"Mesaros A, T Heittola, T Virtanen (2016a) \"TUT database for acoustic scene classification and sound event detection.\" In Signal Processing Conference (EUSIPCO), 2016 24th European, pp. 1128-1132. IEEE","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"issue":"6","key":"7142_CR24","doi-asserted-by":"publisher","first-page":"162","DOI":"10.3390\/app6060162","volume":"6","author":"A Mesaros","year":"2016","unstructured":"Mesaros A, Heittola T, Virtanen T (2016b) \"Metrics for polyphonic sound event detection.\" Appl Sci 6(6):162","journal-title":"Appl Sci"},{"key":"7142_CR25","doi-asserted-by":"crossref","unstructured":"Mohammadi M, Al-Fuqaha A, Sorour S, Guizani M (2018) \"Deep learning for IoT big data and streaming analytics: A survey.\" IEEE Commun Surv Tutor","DOI":"10.1109\/COMST.2018.2844341"},{"key":"7142_CR26","doi-asserted-by":"crossref","unstructured":"Morrison D, R Wang, LC De Silva (2005a) \"Spoken affect classification using neural networks.\" In Granular Computing, 2005 IEEE International Conference on, vol. 2, pp. 583-586. IEEE","DOI":"10.1109\/GRC.2005.1547359"},{"key":"7142_CR27","unstructured":"Morrison D, R Wang, LC De Silva, WL Xu (2005b) \"Real-time spoken affect classification and its application in call-centres.\" In Information Technology and Applications, 2005. ICITA 2005. Third International Conference on, vol. 1, pp. 483-487. IEEE"},{"key":"7142_CR28","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1016\/j.neucom.2017.07.021","volume":"272","author":"I Ozer","year":"2018","unstructured":"Ozer I, Ozer Z, Findik O (2018) \"Noise robust sound event classification with convolutional neural network.\" Neurocomputing 272:505\u2013512","journal-title":"Neurocomputing"},{"key":"7142_CR29","doi-asserted-by":"crossref","unstructured":"Parascandolo G, H Huttunen, T Virtanen (2016) \"Recurrent neural networks for polyphonic sound event detection in real life recordings.\" In Acoustics, Speech and Signal Processing (ICASSP), 2016 IEEE International Conference on, pp. 6440-6444. IEEE","DOI":"10.1109\/ICASSP.2016.7472917"},{"issue":"6","key":"7142_CR30","doi-asserted-by":"publisher","first-page":"1291","DOI":"10.1109\/TASLP.2017.2690575","volume":"25","author":"G Parascandolo","year":"2017","unstructured":"Parascandolo G, Heittola T, Huttunen H, Virtanen T (2017) \"Convolutional recurrent neural networks for polyphonic sound event detection.\" IEEE\/ACM Trans Audio, Speech, Lang Proc 25(6):1291\u20131303","journal-title":"IEEE\/ACM Trans Audio, Speech, Lang Proc"},{"key":"7142_CR31","unstructured":"Phan H, M Krawczyk-Becker, T Gerkmann, A Mertins (2017) \"DNN and CNN with weighted and multi-task loss functions for audio event detection.\" arXiv preprint arXiv:1708.03211"},{"issue":"1","key":"7142_CR32","doi-asserted-by":"publisher","first-page":"048317","DOI":"10.1155\/2007\/48317","volume":"2007","author":"GE Poliner","year":"2006","unstructured":"Poliner GE, Ellis DPW (2006) \"A discriminative model for polyphonic piano transcription.\" EURASIP J Adv Sign Proc 2007(1):048317","journal-title":"EURASIP J Adv Sign Proc"},{"key":"7142_CR33","unstructured":"Santoro A, R Faulkner, D Raposo, J Rae, M Chrzanowski, T Weber, D Wierstra, O Vinyals, R Pascanu, T Lillicrap (2018) \"Relational recurrent neural networks.\" arXiv preprint arXiv:1806.01822"},{"key":"7142_CR34","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber J (2015) \"Deep learning in neural networks: An overview.\" Neural Netw 61:85\u2013117","journal-title":"Neural Netw"},{"key":"7142_CR35","unstructured":"Sharath A, A Politis, T Virtanen (2018) \"Multichannel sound event detection using 3D convolutional neural networks for learning inter-channel features.\" arXiv preprint arXiv:1801.09522"},{"key":"7142_CR36","doi-asserted-by":"publisher","first-page":"1454","DOI":"10.1016\/j.jclepro.2016.10.006","volume":"140","author":"Stojkoska","year":"2017","unstructured":"Stojkoska, Risteska BL, Trivodaliev KV (2017) \"A review of Internet of Things for smart home: Challenges and solutions.\" J Clean Prod 140:1454\u20131464","journal-title":"J Clean Prod"},{"key":"7142_CR37","unstructured":"Vaswani A, N Shazeer, N Parmar, J Uszkoreit, L Jones, AN Gomez, \u0141 Kaiser, I Polosukhin (2017) \"Attention is all you need.\" In Advances in Neural Information Processing Systems, pp. 6000-6010"},{"key":"7142_CR38","unstructured":"Vu TH, Wang J-C (2016) \"Acoustic scene and event recognition using recurrent neural networks.\" Detection and Classification of Acoustic Scenes and Events 2016"},{"key":"7142_CR39","doi-asserted-by":"crossref","unstructured":"Wang R, Ji W, Liu M, Wang X, Weng J, Deng S, Gao S, Yuan C (2018) \"Review on mining data from multiple data sources.\" Pattern Recognition Letters","DOI":"10.1016\/j.patrec.2018.01.013"},{"issue":"17","key":"7142_CR40","doi-asserted-by":"publisher","first-page":"17735","DOI":"10.1007\/s11042-015-2967-9","volume":"76","author":"J Yang","year":"2017","unstructured":"Yang J, He S, Lin Y, Lv Z (2017) \"Multimedia cloud transmission and storage system based on Internet of Things.\" Multimed Tools Appl 76(17):17735\u201317750","journal-title":"Multimed Tools Appl"},{"key":"7142_CR41","doi-asserted-by":"crossref","unstructured":"Zhang H, McLoughlin IV, Song Y (2016) \"Robust Sound Event Detection in Continuous Audio Environments.\" In Interspeech, pp. 2977-2981","DOI":"10.21437\/Interspeech.2016-392"},{"key":"7142_CR42","unstructured":"Zhou J (2017) \"Sound event detection in multichannel audio LSTM network.\" DCASE2017 Challenge, Tech. Rep"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-018-7142-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-018-7142-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-018-7142-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,1,8]],"date-time":"2020-01-08T19:10:37Z","timestamp":1578510637000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-018-7142-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,1,9]]},"references-count":42,"journal-issue":{"issue":"20","published-print":{"date-parts":[[2019,10]]}},"alternative-id":["7142"],"URL":"https:\/\/doi.org\/10.1007\/s11042-018-7142-7","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2019,1,9]]},"assertion":[{"value":"1 July 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 December 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}