{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T19:42:35Z","timestamp":1771702955259,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1836220,62176106"],"award-info":[{"award-number":["U1836220,62176106"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Jiangsu key research and development plan","award":["BE2020036"],"award-info":[{"award-number":["BE2020036"]}]},{"name":"Post-graduate Research & Practice Innovation Program of Jiangsu Province","award":["KYCX22_3668"],"award-info":[{"award-number":["KYCX22_3668"]}]},{"name":"MTRAC Grant for Advanced Computing Technologies"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3503161.3548097","type":"proceedings-article","created":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T15:42:46Z","timestamp":1665416566000},"page":"1779-1787","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Adaptive Hierarchical Pooling for Weakly-supervised Sound Event Detection"],"prefix":"10.1145","author":[{"given":"Lijian","family":"Gao","sequence":"first","affiliation":[{"name":"Jiangsu University, Zhenjiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ling","family":"Zhou","sequence":"additional","affiliation":[{"name":"Jiangsu University, Zhenjiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qirong","family":"Mao","sequence":"additional","affiliation":[{"name":"Jiangsu University, Zhenjiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Dong","sequence":"additional","affiliation":[{"name":"Wayne State University, Detroit, MI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,10]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Audio Surveillance: a Systematic Review. arXiv: Sound","author":"Crocco Marco","year":"2014","unstructured":"Marco Crocco , Marco Cristani , Andrea Trucco , and Vittorio Murino . 2014. Audio Surveillance: a Systematic Review. arXiv: Sound ( 2014 ). Marco Crocco, Marco Cristani, Andrea Trucco, and Vittorio Murino. 2014. Audio Surveillance: a Systematic Review. arXiv: Sound (2014)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3054313"},{"key":"e_1_3_2_2_3_1","volume-title":"Wuthrich","author":"Ferrario Andrea","year":"2020","unstructured":"Andrea Ferrario , Alexander Noll , and Mario V . Wuthrich . 2020 . Insights from Inside Neural Networks. CompSciRN: Industry Practical Application (Topic) ( 2020). Andrea Ferrario, Alexander Noll, and Mario V. Wuthrich. 2020. Insights from Inside Neural Networks. CompSciRN: Industry Practical Application (Topic) (2020)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/941"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351086"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of International Conference on Artificial Intelligence and Statistics (AISTATS).","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio . 2010 . Understanding the difficulty of training deep feedforward neural networks . In Proceedings of International Conference on Artificial Intelligence and Statistics (AISTATS). Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In Proceedings of International Conference on Artificial Intelligence and Statistics (AISTATS)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053023"},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 621--625","author":"Imoto Keisuke","unstructured":"Keisuke Imoto , Noriyuki Tonami , Yuma Koizumi , Masahiro Yasuda , Ryosuke Yamanishi , and Y. Yamashita . 2020. Sound Event Detection by Multitask Learning of Sound Events and Scenes with Soft Scene Labels . In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 621--625 . Keisuke Imoto, Noriyuki Tonami, Yuma Koizumi, Masahiro Yasuda, Ryosuke Yamanishi, and Y. Yamashita. 2020. Sound Event Detection by Multitask Learning of Sound Events and Scenes with Soft Scene Labels. In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 621--625."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053150"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3014737"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2930913"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/384"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054433"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, 626--630","author":"Lin Liwei","year":"2020","unstructured":"Liwei Lin , X. Wang , Hong Liu , and Yueliang Qian . 2020 a. Guided Learning for Weakly\u00adLabeled Semi\u00adSupervised Sound Event Detection , In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, 626--630 . Liwei Lin, X. Wang, Hong Liu, and Yueliang Qian. 2020a. Guided Learning for Weakly\u00adLabeled Semi\u00adSupervised Sound Event Detection, In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, 626--630."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2989575"},{"key":"e_1_3_2_2_18_1","volume-title":"Proceedings of International Conference on Learning Representations (ICLR). 1--13","author":"Liu Hanxiao","year":"2019","unstructured":"Hanxiao Liu , Karen Simonyan , and Yiming Yang . 2019 . DARTS: Differentiable Architecture Search . In Proceedings of International Conference on Learning Representations (ICLR). 1--13 . Hanxiao Liu, Karen Simonyan, and Yiming Yang. 2019. DARTS: Differentiable Architecture Search. In Proceedings of International Conference on Learning Representations (ICLR). 1--13."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9533332"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPA.2018.8659533"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML). 1--9.","author":"Maclaurin Dougal","unstructured":"Dougal Maclaurin , David Kristjanson Duvenaud , and Ryan P. Adams . 2015. Gradient-based Hyperparameter Optimization through Reversible Learning . In Proceedings of the International Conference on Machine Learning (ICML). 1--9. Dougal Maclaurin, David Kristjanson Duvenaud, and Ryan P. Adams. 2015. Gradient-based Hyperparameter Optimization through Reversible Learning. In Proceedings of the International Conference on Machine Learning (ICML). 1--9."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2858559"},{"key":"e_1_3_2_2_23_1","volume-title":"DCASE 2017 Challenge setup: Tasks, datasets and baseline system. Technical Report. DCASE.","author":"Mesaros Annamaria","year":"2017","unstructured":"Annamaria Mesaros , Toni Heittola , Aleksandr Diment , Benjamin Elizalde , Ankit Shah , Emmanuel Vincent , Bhiksha Raj , and Tuomas Virtanen . 2017 . DCASE 2017 Challenge setup: Tasks, datasets and baseline system. Technical Report. DCASE. Annamaria Mesaros, Toni Heittola, Aleksandr Diment, Benjamin Elizalde, Ankit Shah, Emmanuel Vincent, Bhiksha Raj, and Tuomas Virtanen. 2017. DCASE 2017 Challenge setup: Tasks, datasets and baseline system. Technical Report. DCASE."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.3390\/app6060162"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2021.3090678"},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 66--70","author":"Miyazaki Koichi","unstructured":"Koichi Miyazaki , Tatsuya Komatsu , Tomoki Hayashi , Shinji Watanabe , Tomoki Toda , and K. Takeda . 2020. Weakly-Supervised Sound Event Detection with Self-Attention . In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 66--70 . Koichi Miyazaki, Tatsuya Komatsu, Tomoki Hayashi, Shinji Watanabe, Tomoki Toda, and K. Takeda. 2020. Weakly-Supervised Sound Event Detection with Self-Attention. In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 66--70."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-019-7547-y"},{"key":"e_1_3_2_2_28_1","volume-title":"DCASE 2017 submission : Multiple Instance Learning for Sound Event Detection. Technical Report. DCASE.","author":"Salamon Justin","year":"2017","unstructured":"Justin Salamon , Brian McFee , Peter Qi Li , and Juan Pablo Bello . 2017 . DCASE 2017 submission : Multiple Instance Learning for Sound Event Detection. Technical Report. DCASE. Justin Salamon, Brian McFee, Peter Qi Li, and Juan Pablo Bello. 2017. DCASE 2017 submission : Multiple Instance Learning for Sound Event Detection. Technical Report. DCASE."},{"key":"e_1_3_2_2_29_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML).","author":"Sankararaman Karthik Abinav","year":"2020","unstructured":"Karthik Abinav Sankararaman , Soham De , Zheng Xu , W. Ronny Huang , and Tom Goldstein . 2020 . The Impact of Neural Network Overparameterization on Gradient Confusion and Stochastic Gradient Descent . In Proceedings of the International Conference on Machine Learning (ICML). Karthik Abinav Sankararaman, Soham De, Zheng Xu, W. Ronny Huang, and Tom Goldstein. 2020. The Impact of Neural Network Overparameterization on Gradient Confusion and Stochastic Gradient Descent. In Proceedings of the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_2_30_1","volume-title":"Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE2018)","author":"Serizel Romain","year":"2018","unstructured":"Romain Serizel , Nicolas Turpault , Hamid Eghbal-Zadeh , and Ankit Parag Shah . 2018 . Large-scale weakly labeled semi-supervised sound event detection in domestic environments . In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE2018) . 19--23. https:\/\/hal.inria.fr\/hal-01850270 Romain Serizel, Nicolas Turpault, Hamid Eghbal-Zadeh, and Ankit Parag Shah. 2018. Large-scale weakly labeled semi-supervised sound event detection in domestic environments. In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE2018). 19--23. https:\/\/hal.inria.fr\/hal-01850270"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054478"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-684"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414789"},{"key":"e_1_3_2_2_35_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. (2017) 5998--6008.  Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. (2017) 5998--6008."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682847"},{"key":"e_1_3_2_2_37_1","volume-title":"Proceedings of Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). 269--274","author":"Wang Yih-Wen","year":"2021","unstructured":"Yih-Wen Wang , Chia-Ping Chen , Chung-Li Lu , and Bo-Cheng Chan . 2021 . Semi-Supervised Sound Event Detection Using Self-Attention and Multiple Techniques of Consistency Training . In Proceedings of Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). 269--274 . Yih-Wen Wang, Chia-Ping Chen, Chung-Li Lu, and Bo-Cheng Chan. 2021. Semi-Supervised Sound Event Detection Using Self-Attention and Multiple Techniques of Consistency Training. In Proceedings of Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). 269--274."},{"key":"e_1_3_2_2_38_1","volume-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 121--125","author":"Xu Yong","unstructured":"Yong Xu , Qiuqiang Kong , Wenwu Wang , and Mark D. Plumbley . 2018. Large-Scale Weakly Supervised Audio Classification Using Gated Convolutional Neural Network . In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 121--125 . Yong Xu, Qiuqiang Kong, Wenwu Wang, and Mark D. Plumbley. 2018. Large-Scale Weakly Supervised Audio Classification Using Gated Convolutional Neural Network. In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 121--125."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053073"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414931"}],"event":{"name":"MM '22: The 30th ACM International Conference on Multimedia","location":"Lisboa Portugal","acronym":"MM '22","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 30th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3548097","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503161.3548097","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:18Z","timestamp":1750186818000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3548097"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":38,"alternative-id":["10.1145\/3503161.3548097","10.1145\/3503161"],"URL":"https:\/\/doi.org\/10.1145\/3503161.3548097","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2022-10-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}