{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T21:05:15Z","timestamp":1780607115672,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,22]],"date-time":"2018-10-22T00:00:00Z","timestamp":1540166400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100003392","name":"Natural Science Foundation of Fujian Province","doi-asserted-by":"publisher","award":["2015J01420"],"award-info":[{"award-number":["2015J01420"]}],"id":[{"id":"10.13039\/501100003392","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,22]]},"DOI":"10.1145\/3207677.3277993","type":"proceedings-article","created":{"date-parts":[[2018,10,18]],"date-time":"2018-10-18T10:19:29Z","timestamp":1539857969000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Convolutional Neural Networks with Multi-task Loss for Polyphonic Sound Event Detection"],"prefix":"10.1145","author":[{"given":"Huang","family":"Liu","sequence":"first","affiliation":[{"name":"College of Mathematics and Computer Science, Fuzhou University, Fuzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiu","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Mathematics and Computer Science, Fuzhou University, Fuzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fa-Qian","family":"Guan","sequence":"additional","affiliation":[{"name":"College of Mathematics and Computer Science, Fuzhou University, Fuzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jin-Sen","family":"Hu","sequence":"additional","affiliation":[{"name":"College of Mathematics and Computer Science, Fuzhou University, Fuzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2018,10,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Valenzise G. Gerosa L. Tagliasacchi M. Antonacci F. and Sarti A. 2007. Scream and gunshot detection and localization for audio-surveillance systems. Advanced Video and Signal Based Surveillance. IEEE Piscataway 21--26. 10.1109\/AVSS.2007.4425280","DOI":"10.1109\/AVSS.2007.4425280"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","unstructured":"Cai R. Lu L. Hanjalic A. Zhang H. J. and Cai L. H. 2006. A flexible framework for key audio effects detection and auditory context inference. IEEE Transactions on audio speech and language processing. IEEE Piscataway 1026--1039. 10.1109\/TSA.2005.857575","DOI":"10.1109\/TSA.2005.857575"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/2984093.2984217"},{"key":"e_1_3_2_1_4_1","volume-title":"European Signal Processing Conference. IEEE, Piscataway, 1267--1271","author":"Mesaros A","year":"2010","unstructured":"Mesaros A, Heittola T, Eronen A, and Virtanen. 2010. Acoustic event detection in real life recordings. European Signal Processing Conference. IEEE, Piscataway, 1267--1271."},{"key":"e_1_3_2_1_5_1","volume-title":"European Signal Processing Conference. IEEE, Piscataway, 1--5.","author":"Giannoulis D","unstructured":"Giannoulis D, Dan S, Benetos E, Rossignol, M., Lagrange, M., and Plumbley, M. D. 2015. A database and challenge for acoustic scene classification and event detection. European Signal Processing Conference. IEEE, Piscataway, 1--5."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Gemmeke J F Vuegen L Karsmakers P Vanrumste B. and Hamme H. V. 2014. An exemplar-based NMF approach to audio event detection. Applications of Signal Processing to Audio and Acoustics. IEEE Piscataway 1--4.","DOI":"10.1109\/WASPAA.2013.6701847"},{"key":"e_1_3_2_1_7_1","unstructured":"Elizalde B. Kumar A. Shah A. Badlani R. Vincent E. and Raj B. 2016. Experiments on the DCASE Challenge 2016: Acoustic Scene Classification and Sound Event Detection in Real Life Recording. IEEE Piscataway. DOI: http:\/\/cn.arxiv.org\/abs\/1607.06706."},{"key":"e_1_3_2_1_8_1","unstructured":"Phan H. Hertel L. Maass M. Koch P. and Mertins A. 2016. CaR-FOREST: Joint Classification-Regression Decision Forests for Overlapping Audio Event Detection. IEEE Piscataway."},{"key":"e_1_3_2_1_9_1","unstructured":"Kong Q. Sobieraj I. Wang W. and Plumbley M. D. 2016. Deep neural network baseline for DCASE challenge 2016. IEEE Piscataway."},{"key":"e_1_3_2_1_10_1","volume-title":"DCASE 2016 sound event detection system based on convolutional neural network. IEEE, Piscataway.","author":"Gorin A.","unstructured":"Gorin, A., Makhazhanov, N., and Shmyrev, N. 2016. DCASE 2016 sound event detection system based on convolutional neural network. IEEE, Piscataway."},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Advances in Computing, Communications and Informatics. IEEE, Piscataway, 2495--2500","author":"Agarwal A","unstructured":"Agarwal A, Quadri S M, Murthy S, and Sitaram, D. 2016. Minimally supervised sound event detection using a neural network. International Conference on Advances in Computing, Communications and Informatics. IEEE, Piscataway, 2495--2500."},{"key":"e_1_3_2_1_12_1","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, Piscataway, 771--775","author":"Adavanne S","unstructured":"Adavanne S, Pertil\u00e4 P, and Virtanen T. 2017. Sound event detection using spatial features and convolutional recurrent neural network. IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, Piscataway, 771--775."},{"key":"e_1_3_2_1_13_1","unstructured":"Adavanne S Parascandolo G Pertil\u00e4 P et al. 2017. Sound Event Detection in Multichannel Audio Using Spatial and Harmonic Features. IEEE Piscataway. DOI: http:\/\/cn.arxiv.org\/abs\/1706.02293."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","unstructured":"Phan H. Maa\u00df M. Mazur R. and Mertins A. 2015. Random Regression Forests for Acoustic Event Detection and Classification. IEEE\/ACM Transactions on Audio Speech &amp; Language Processing. IEEE Piscataway 20--31. 10.1109\/TASLP.2014.2367814","DOI":"10.1109\/TASLP.2014.2367814"},{"key":"e_1_3_2_1_15_1","unstructured":"Phan H. Krawczykbecker M. Gerkmann T. and Mertins A. 2017. DNN and CNN with Weighted and Multi-task Loss Functions for Audio Event Detection. IEEE Piscataway. DOI: http:\/\/cn.arxiv.org\/abs\/1708.03211."},{"key":"e_1_3_2_1_16_1","volume-title":"European Signal Processing Conference. IEEE, Piscataway, 1128--1132","author":"Mesaros A","unstructured":"Mesaros A, Heittola T, and Virtanen T. 2016. TUT database for acoustic scene classification and sound event detection. European Signal Processing Conference. IEEE, Piscataway, 1128--1132."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Mesaros A Heittola T and Virtanen T. 2016. Metrics for Polyphonic Sound Event Detection. Applied Sciences. IEEE Piscataway 162.","DOI":"10.3390\/app6060162"},{"key":"e_1_3_2_1_18_1","unstructured":"Adavanne S and Virtanen T. 2017. A report on sound event detection with different binaural features. IEEE Piscataway. DOI: http:\/\/cn.arxiv.org\/abs\/ 1710.02997."},{"key":"e_1_3_2_1_19_1","volume-title":"European Signal Processing Conference. IEEE, Piscataway, 506--510","author":"Gencoglu T.","unstructured":"O. Gencoglu, T. Virtanen, and H. Huttunen. 2014. Recognition of acoustic events using deep neural networks. European Signal Processing Conference. IEEE, Piscataway, 506--510."},{"key":"e_1_3_2_1_20_1","volume-title":"International Joint Conference on Neural Networks. IEEE, Piscataway, 1--7.","author":"Cakir T.","unstructured":"E. Cakir, T. Heittola, H. Huttunen, and T. Virtanen. 2015. Polyphonic sound event detection using multi label deep neural networks. International Joint Conference on Neural Networks. IEEE, Piscataway, 1--7."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","unstructured":"A. Krizhevsky I. Sutskever and G. E. Hinton. 2012. Imagenet classification with deep convolutional neural networks. Neural Information Processing Systems. Springer London 1097--1105.","DOI":"10.5555\/2999134.2999257"},{"key":"e_1_3_2_1_22_1","volume":"201","author":"He X.","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun. 2016. Deep residual learning for image recognition. IEEE Conference on Computer Vision and Pattern Recognition. IEEE, Piscataway, 770--778.","journal-title":"J. Sun."},{"key":"e_1_3_2_1_23_1","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, Piscataway, 559--563","author":"Zhang I.","unstructured":"H. Zhang, I. McLoughlin, and Y. Song. 2015. Robust sound event recognition using convolutional neural networks. IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, Piscataway, 559--563."},{"key":"e_1_3_2_1_24_1","volume-title":"17th Annual Conference of the International Speech Communication Association. IEEE, Piscataway, 3653--3657","author":"Phan L.","year":"2016","unstructured":"H. Phan, L. Hertel, M. Maass, and A.Mertins. 2016. Robust audio event recognition with 1-max pooling convolutional neural networks. Interspeech 2016, 17th Annual Conference of the International Speech Communication Association. IEEE, Piscataway, 3653--3657."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2015.7324337"},{"key":"e_1_3_2_1_26_1","volume-title":"Range Loss for Deep Face Recognition with Long-Tailed Training Data. IEEE International Conference on Computer Vision. IEEE, Piscataway, 5419--5428","author":"Zhang X.","unstructured":"Zhang, X., Fang, Z., Wen, Y., Li, Z., and Qiao, Y. 2017. Range Loss for Deep Face Recognition with Long-Tailed Training Data. IEEE International Conference on Computer Vision. IEEE, Piscataway, 5419--5428."}],"event":{"name":"CSAE '18: The 2nd International Conference on Computer Science and Application Engineering","location":"Hohhot China","acronym":"CSAE '18"},"container-title":["Proceedings of the 2nd International Conference on Computer Science and Application Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3207677.3277993","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3207677.3277993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T20:39:45Z","timestamp":1780605585000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3207677.3277993"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,22]]},"references-count":26,"alternative-id":["10.1145\/3207677.3277993","10.1145\/3207677"],"URL":"https:\/\/doi.org\/10.1145\/3207677.3277993","relation":{},"subject":[],"published":{"date-parts":[[2018,10,22]]},"assertion":[{"value":"2018-10-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}