{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T21:16:18Z","timestamp":1729631778598,"version":"3.28.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1109\/icassp49357.2023.10097075","type":"proceedings-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T17:28:30Z","timestamp":1683307710000},"page":"1-5","source":"Crossref","is-referenced-by-count":5,"title":["On Batching Variable Size Inputs for Training End-to-End Speech Enhancement Systems"],"prefix":"10.1109","author":[{"given":"Philippe","family":"Gonzalez","sequence":"first","affiliation":[{"name":"Technical University of Denmark,Department of Health Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tommy","family":"Sonne Alstr\u00f8m","sequence":"additional","affiliation":[{"name":"Technical University of Denmark,Department of Applied Mathematics and Computer Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tobias","family":"May","sequence":"additional","affiliation":[{"name":"Technical University of Denmark,Department of Health Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00109"},{"year":"0","key":"ref35","article-title":"Simulated room impulse responses"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.01.037"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107867"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167"},{"journal-title":"ITU-T Recommendation P 862 International Telecommunication Union","article-title":"Perceptual evaluation of speech quality (PESQ): An objective method for end-to-end speech quality assessment of narrow-band telephone networks and speech codecs","year":"2001","key":"ref37"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-3208"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.17743\/jaes.2020.0026"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1121\/1.4799597"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3109\/00206090109073110"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"article-title":"Audio Spatialisation for Headphones - Impulse Response Dataset","year":"2021","author":"pearce","key":"ref33"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-2100"},{"article-title":"Ambisonics recordings of typical environments (ARTE) database","year":"2019","author":"buchholz","key":"ref32"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1121\/1.4948445"},{"article-title":"Exploring the best loss function for DNN-based low-latency speech enhancement with temporal convolutional networks","year":"2020","author":"koyama","key":"ref17"},{"article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","year":"2015","author":"abadi","key":"ref16"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2585878"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1121\/1.4789895"},{"key":"ref18","first-page":"626","article-title":"SDR&#x2013;half-baked or well done?","author":"le roux","year":"2019","journal-title":"Proc ICASSP"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2051354"},{"article-title":"TAU urban acoustic scenes 2019, development dataset","year":"2019","author":"heittola","key":"ref23"},{"key":"ref26","article-title":"The design for the Wall Street Journal-based CSR corpus","author":"paul","year":"1992","journal-title":"Proceedings of the Workshop on Speech and Natural Language"},{"key":"ref25","first-page":"27403","article-title":"DARPA TIMIT acoustic-phonetic continous speech corpus CD-ROM. NIST speech disc 1 - 1.1","author":"garofolo","year":"1993","journal-title":"NASA STI\/Recon Technical Report"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1121\/1.1506692"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref21","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc ICLR"},{"article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","year":"2019","author":"yamagishi","key":"ref28"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"107951","DOI":"10.1016\/j.dib.2022.107951","article-title":"Dataset of British English speech recordings for psychoacoustics and speech processing research: The clarity speech corpus","volume":"41","author":"graetzer","year":"2022","journal-title":"Data in Brief"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1284"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/DSMP.2016.7583516"},{"article-title":"A comprehensive study of batch construction strategies for recurrent neural networks in MXNet","year":"2017","author":"doetsch","key":"ref9"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747373"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"2816","DOI":"10.21437\/Interspeech.2021-1482","article-title":"DCCRN+: Channel-wise subband DCCRN with SNR estimation for speech enhancement","author":"lv","year":"2021","journal-title":"Proc INTERSPEECH"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1007\/978-3-642-35289-8_26","article-title":"Practical recommendations for gradient-based training of deep architectures","author":"bengio","year":"2012","journal-title":"Neural Networks Tricks of the Trade"},{"key":"ref5","article-title":"On large-batch training for deep learning: Generalization gap and sharp minima","author":"keskar","year":"2017","journal-title":"Proc ICLR"}],"event":{"name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2023,6,4]]},"location":"Rhodes Island, Greece","end":{"date-parts":[[2023,6,10]]}},"container-title":["ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10094559\/10094560\/10097075.pdf?arnumber=10097075","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T22:09:52Z","timestamp":1729375792000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10097075\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icassp49357.2023.10097075","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]}}}