{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T08:58:53Z","timestamp":1765357133266},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icassp43922.2022.9746431","type":"proceedings-article","created":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T19:50:34Z","timestamp":1651089034000},"page":"336-340","source":"Crossref","is-referenced-by-count":4,"title":["Pseudo Strong Labels for Large Scale Weakly Supervised Audio Tagging"],"prefix":"10.1109","author":[{"given":"Heinrich","family":"Dinkel","sequence":"first","affiliation":[{"name":"Xiaomi Corporation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Yan","sequence":"additional","affiliation":[{"name":"Xiaomi Corporation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongqing","family":"Wang","sequence":"additional","affiliation":[{"name":"Xiaomi Corporation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junbo","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xiaomi Corporation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yujun","family":"Wang","sequence":"additional","affiliation":[{"name":"Xiaomi Corporation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00237"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"ref12","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"3rd International Conference on Learning Representations ICLR 2015 San Diego CA USA May 7-9 2015 Conference Track Proceedings"},{"key":"ref13","first-page":"8026","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","author":"paszke","year":"2019","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"article-title":"mixup: Beyond empirical risk minimization","year":"2017","author":"zhang","key":"ref16"},{"key":"ref17","article-title":"The smallrice submission to the dcase2021 task 4 challenge: A lightweight approach for semi-supervised sound event detection with unsupervised data augmentation","author":"dinkel","year":"2021","journal-title":"Tech Rep DCASE2016 Challenge"},{"key":"ref18","article-title":"Distilling the knowledge in a neural network","author":"hinton","year":"2015","journal-title":"Deep Learning and Representation Learning Workshop NIPS"},{"key":"ref19","first-page":"486","article-title":"Freesound datasets: a platform for the creation of open audio datasets","author":"fonseca","year":"2017","journal-title":"Proceedings of the 18th ISMIR Conference 2017 oct 23-27 Suzhou China [Canada] International Society for Music Information Retrieval 2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-698"},{"article-title":"Psla: Improving audio event classification with pretraining, sampling, labeling, and aggregation","year":"2021","author":"gong","key":"ref3"},{"article-title":"Perceiver: General perception with iterative attention","year":"2021","author":"jaegle","key":"ref6"},{"article-title":"Vatt: Transformers for multimodal self-supervised learning from raw video, audio and text","year":"2021","author":"akbari","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1197"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2731"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"ref9","first-page":"5447","article-title":"A sequential self teaching approach for improving generalization in sound event recognition","author":"kumar","year":"2020","journal-title":"International Conference on Machine Learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414579"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.33682\/w13e-5v06"},{"article-title":"Fsd50k: an open dataset of human-labeled sound events","year":"2020","author":"fonseca","key":"ref21"}],"event":{"name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2022,5,23]]},"location":"Singapore, Singapore","end":{"date-parts":[[2022,5,27]]}},"container-title":["ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9745891\/9746004\/09746431.pdf?arnumber=9746431","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,15]],"date-time":"2022-08-15T20:11:08Z","timestamp":1660594268000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9746431\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/icassp43922.2022.9746431","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}