{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T18:32:54Z","timestamp":1778005974932,"version":"3.51.4"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030602758","type":"print"},{"value":"9783030602765","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_8","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"79-86","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":65,"title":["Data Augmentation and Loss Normalization for Deep Noise Suppression"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9060-223X","authenticated-orcid":false,"given":"Sebastian","family":"Braun","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2263-2047","authenticated-orcid":false,"given":"Ivan","family":"Tashev","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"Cho, K., Merri\u00ebnboer, B.V., Bahdanau, D., Bengio, Y.: On the properties of neural machine translation: encoder-decoder approaches. In: Proceedings of the Eighth Workshop on Syntax, Semantics and Structure in Statistical Translation (SSST-8) (2014)","DOI":"10.3115\/v1\/W14-4012"},{"issue":"6","key":"8_CR2","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Ephraim, Y., Malah, D.: Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator. IEEE Trans. Acoust. Speech Signal Process. 32(6), 1109\u20131121 (1984)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"4","key":"8_CR3","doi-asserted-by":"publisher","first-page":"112:1","DOI":"10.1145\/3197517.3201357","volume":"37","author":"A Ephrat","year":"2018","unstructured":"Ephrat, A., et al.: Looking to listen at the cocktail party: a speaker-independent audio-visual model for speech separation. ACM Trans. Graph. 37(4), 112:1\u2013112:11 (2018)","journal-title":"ACM Trans. Graph."},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Gerkmann, T., Hendriks, R.C.: Noise power estimation based on the probability of speech presence. In: Proceedings of the IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), pp. 145\u2013148, October 2011","DOI":"10.1109\/ASPAA.2011.6082266"},{"key":"8_CR5","unstructured":"Hu, K., Divenyi, P., Ellis, D., Jin, Z., Shinn-Cunningham, B.G., Wang, D.: Preliminary intelligibility tests of a monaural speech segregation system. In: Proceedings of the Workshop on Statistical and Perceptual Audition, Brisbane, September 2008"},{"key":"8_CR6","unstructured":"ITU-T: Recommendation P.862: Perceptual evaluation of speech quality (PESQ), an objective method for end-to-end speech quality assessment of narrowband telephone networks and speech codecs, February 2001"},{"key":"8_CR7","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"8_CR8","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1109\/89.928915","volume":"9","author":"R Martin","year":"2001","unstructured":"Martin, R.: Noise power spectral density estimation based on optimal smoothing and minimum statistics. IEEE Trans. Speech Audio Process. 9, 504\u2013512 (2001)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Reddy, C.K.A., et al.: The INTERSPEECH 2020 deep noise suppression challenge: datasets, subjective speech quality and testing framework. In: Proceedings of the INTERSPEECH 2020 (2020, to appear)","DOI":"10.21437\/Interspeech.2020-3038"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Roux, J.L., Wisdom, S., Erdogan, H., Hershey, J.R.: SDR - half-baked or well done? In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 626\u2013630, May 2019","DOI":"10.1109\/ICASSP.2019.8683855"},{"issue":"7","key":"8_CR11","doi-asserted-by":"publisher","first-page":"2125","DOI":"10.1109\/TASL.2011.2114881","volume":"19","author":"CH Taal","year":"2011","unstructured":"Taal, C.H., Hendriks, R.C., Heusdens, R., Jensen, J.: An algorithm for intelligibility prediction of time-frequency weighted noisy speech. IEEE Trans. Audio Speech Lang. Process. 19(7), 2125\u20132136 (2011)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Tan, K., Wang, D.: A convolutional recurrent neural network for real-time speech enhancement. In: Proceedings of the Interspeech, pp. 3229\u20133233 (2018)","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"Tu, Y.H., Tashev, I., Zarar, S., Lee, C.: A hybrid approach to combining conventional and deep learning techniques for single-channel speech enhancement and recognition. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2531\u20132535, April 2018","DOI":"10.1109\/ICASSP.2018.8461944"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Valin, J.: A hybrid DSP\/deep learning approach to real-time full-band speech enhancement. In: 20th International Workshop on Multimedia Signal Processing (MMSP), pp. 1\u20135, August 2018","DOI":"10.1109\/MMSP.2018.8547084"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Vincent, E., Barker, J., Watanabe, S., Nesta, F.: The second \u2018CHIME\u2019 speech separation and recognition challenge: datasets, tasks and baselines. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), June 2012","DOI":"10.1109\/ICASSP.2013.6637622"},{"issue":"10","key":"8_CR16","doi-asserted-by":"publisher","first-page":"1702","DOI":"10.1109\/TASLP.2018.2842159","volume":"26","author":"D Wang","year":"2018","unstructured":"Wang, D., Chen, J.: Supervised speech separation based on deep learning: an overview. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(10), 1702\u20131726 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Wichern, G., Lukin, A.: Low-latency approximation of bidirectional recurrent networks for speech denoising. In: Proceedings of the IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), pp. 66\u201370, October 2017","DOI":"10.1109\/WASPAA.2017.8169996"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Wilson, K., et al.: Exploring tradeoffs in models for low-latency speech enhancement. In: Proceedings of the International Workshop on Acoustic Signal Enhancement (IWAENC), pp. 366\u2013370, September 2018","DOI":"10.1109\/IWAENC.2018.8521347"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Wisdom, S., et al.: Differentiable consistency constraints for improved deep speech enhancement. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 900\u2013904, May 2019","DOI":"10.1109\/ICASSP.2019.8682783"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Xia, R., Braun, S., Reddy, C., Dubey, H., Cutler, R., Tahev, I.: Weighted speech distortion losses for neural-network-based real-time speech enhancement. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2020)","DOI":"10.1109\/ICASSP40776.2020.9054254"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,6]],"date-time":"2021-04-06T02:06:53Z","timestamp":1617674813000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}