{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T19:15:46Z","timestamp":1742930146901,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819706686"},{"type":"electronic","value":"9789819706693"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-0669-3_3","type":"book-chapter","created":{"date-parts":[[2024,2,28]],"date-time":"2024-02-28T21:20:16Z","timestamp":1709155216000},"page":"27-38","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Gaussian Distribution Labeling Method for\u00a0Speech Quality Assessment"],"prefix":"10.1007","author":[{"given":"Minh Tu","family":"Le","sequence":"first","affiliation":[]},{"given":"Bao Thang","family":"Ta","sequence":"additional","affiliation":[]},{"given":"Nhat Minh","family":"Le","sequence":"additional","affiliation":[]},{"given":"Phi Le","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Van Hai","family":"Do","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,29]]},"reference":[{"key":"3_CR1","doi-asserted-by":"publisher","unstructured":"Burnham, D., et al.: Building an audio-visual corpus of Australian English: large corpus collection with an economical portable and replicable black box. In: Proceedings of Interspeech 2011, pp. 841\u2013844 (2011). https:\/\/doi.org\/10.21437\/Interspeech.2011-309","DOI":"10.21437\/Interspeech.2011-309"},{"key":"3_CR2","unstructured":"Demirsahin, I., Kjartansson, O., Gutkin, A., Rivera, C.: Open-source multi-speaker corpora of the English accents in the British isles. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 6532\u20136541 (2020)"},{"key":"3_CR3","doi-asserted-by":"publisher","unstructured":"Desplanques, B., Thienpondt, J., Demuynck, K.: ECAPA-TDNN: emphasized channel attention, propagation and aggregation in TDNN based speaker verification. In: Proceedings of Interspeech 2020, pp. 3830\u20133834 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2650","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"3_CR4","doi-asserted-by":"publisher","unstructured":"Gong, W., Wang, J., Liu, Y., Yang, H.: A no-reference speech quality assessment method based on neural network with densely connected convolutional architecture. In: Proceedings of INTERSPEECH 2023, pp. 536\u2013540 (2023). https:\/\/doi.org\/10.21437\/Interspeech.2023-811","DOI":"10.21437\/Interspeech.2023-811"},{"key":"3_CR5","doi-asserted-by":"publisher","unstructured":"Gulati, A., et al.: Conformer: convolution-augmented transformer for speech recognition. In: Proceedings of Interspeech 2020, pp. 5036\u20135040 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-3015","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"3_CR6","unstructured":"ITU-T Recommendation P.830: Subjective performance assessment of telephone-band and wideband digital codecs (1996)"},{"key":"3_CR7","unstructured":"ITU-T Recommendation P.862: Perceptual evaluation of speech quality (PESQ): An objective method for end-to-end speech quality assessment of narrow-band telephone networks and speech codecs (2001)"},{"key":"3_CR8","unstructured":"ITU-T Recommendation P.863: Perceptual objective listening quality assessment (2011)"},{"key":"3_CR9","doi-asserted-by":"publisher","unstructured":"Jayesh, M.K., Sharma, M., Vonteddu, P., Shaik, M.A.B., Ganapathy, S.: Transformer networks for non-intrusive speech quality prediction. In: Proceedings of Interspeech 2022, pp. 4078\u20134082 (2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-10020","DOI":"10.21437\/Interspeech.2022-10020"},{"key":"3_CR10","unstructured":"Kabal, P.: Tsp speech database. McGill Univ., Database Version 1, 09\u201302 (2002)"},{"key":"3_CR11","doi-asserted-by":"publisher","unstructured":"Liang, X., Cumlin, F., Sch\u00fcldt, C., Chatterjee, S.: DeePMOS: deep posterior mean-opinion-score of speech. In: Proceedings of INTERSPEECH 2023, pp. 526\u2013530 (2023). https:\/\/doi.org\/10.21437\/Interspeech.2023-1436","DOI":"10.21437\/Interspeech.2023-1436"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Liu, M., Wang, J., Xu, L., Zhang, J., Li, S., Xiang, F.: BIT-MI deep learning-based model to non-intrusive speech quality assessment challenge in online conferencing applications. In: Proceedings of Interspeech 2022, pp. 3288\u20133292 (2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-10010","DOI":"10.21437\/Interspeech.2022-10010"},{"key":"3_CR14","series-title":"Studies in Computational Intelligence","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1007\/978-3-642-19551-8_23","volume-title":"Multimedia Analysis, Processing and Communications","author":"PC Loizou","year":"2011","unstructured":"Loizou, P.C.: Speech quality assessment. In: Lin, W., Tao, D., Kacprzyk, J., Li, Z., Izquierdo, E., Wang, H. (eds.) Multimedia Analysis, Processing and Communications. Studies in Computational Intelligence, vol. 346, pp. 623\u2013654. Springer, Berlin (2011). https:\/\/doi.org\/10.1007\/978-3-642-19551-8_23"},{"key":"3_CR15","doi-asserted-by":"publisher","unstructured":"Manocha, P., et al.: SAQAM: spatial audio quality assessment metric. In: Proceedings of Interspeech 2022, pp. 649\u2013653 (2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-406","DOI":"10.21437\/Interspeech.2022-406"},{"key":"3_CR16","unstructured":"Manocha, P., Xu, B., Kumar, A.: NORESQA: a framework for speech quality assessment using non-matching references. In: Thirty-Fifth Conference on Neural Information Processing Systems (2021). https:\/\/proceedings.neurips.cc\/paper\/2021\/file\/bc6d753857fe3dd4275dff707dedf329-Paper.pdf"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Mittag, G., Naderi, B., Chehadi, A., M\u00f6ller, S.: NISQA: a deep CNN-self-attention model for multidimensional speech quality prediction with crowdsourced datasets. In: INTERSPEECH, pp. 2127\u20132131 (2021)","DOI":"10.21437\/Interspeech.2021-299"},{"issue":"2","key":"3_CR18","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1080\/00450618.2011.630412","volume":"44","author":"GS Morrison","year":"2012","unstructured":"Morrison, G.S., Rose, P., Zhang, C.: Protocol for the collection of databases of recordings for forensic-voice-comparison research and practice. Aust. J. Forensic Sci. 44(2), 155\u2013167 (2012)","journal-title":"Aust. J. Forensic Sci."},{"key":"3_CR19","unstructured":"Morrison, G., et al.: Forensic database of voice recordings of 500+ Australian English speakers (2015)"},{"key":"3_CR20","doi-asserted-by":"publisher","unstructured":"Mumtaz, D., Jena, A., Jakhetiya, V., Nathwani, K., Guntuku, S.C.: Transformer-based quality assessment model for generalized user-generated multimedia audio content. In: Proceedings of Interspeech 2022, pp. 674\u2013678 (2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-10386","DOI":"10.21437\/Interspeech.2022-10386"},{"key":"3_CR21","unstructured":"Rec, I.: P. 501, test signals for use in telephony and other speech-based application. Int. Telecommun. Union (2020)"},{"key":"3_CR22","doi-asserted-by":"publisher","unstructured":"Reddy, C.K., et al.: The INTERSPEECH 2020 deep noise suppression challenge: datasets, subjective testing framework, and challenge results. In: Proceedings of Interspeech 2020, pp. 2492\u20132496 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-3038","DOI":"10.21437\/Interspeech.2020-3038"},{"key":"3_CR23","doi-asserted-by":"publisher","unstructured":"Shu, X., et al.: Non-intrusive speech quality assessment with a multi-task learning based Subband adaptive attention temporal convolutional neural network. In: Proceedings of Interspeech 2022, pp. 3298\u20133302 (2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-10315","DOI":"10.21437\/Interspeech.2022-10315"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Sun, L., Du, J., Dai, L.R., Lee, C.H.: Multiple-target deep learning for LSTM-RNN based speech enhancement. In: 2017 Hands-free Speech Communications and Microphone Arrays (HSCMA), pp. 136\u2013140. IEEE (2017)","DOI":"10.1109\/HSCMA.2017.7895577"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Ta, B.T., et al.: Improving Vietnamese accent recognition using ASR transfer learning. In: 2022 25th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA), pp. 1\u20136. IEEE (2022)","DOI":"10.1109\/O-COCOSDA202257103.2022.9997947"},{"key":"3_CR26","doi-asserted-by":"publisher","unstructured":"Ta, B.T., Le, M.T., Le, N.M., Do, V.H.: Probing speech quality information in ASR systems. In: Proceedings of INTERSPEECH 2023, pp. 541\u2013545 (2023). https:\/\/doi.org\/10.21437\/Interspeech.2023-2507","DOI":"10.21437\/Interspeech.2023-2507"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Ta, B.T., et al.: Improving speech emotion recognition via fine-tuning ASR with speaker information. In: 2022 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), pp. 1\u20136. IEEE (2022)","DOI":"10.23919\/APSIPAASC55919.2022.9980214"},{"key":"3_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35019-1","volume-title":"Dimension-Based Quality Modeling of Transmitted Speech","author":"M W\u00e4ltermann","year":"2013","unstructured":"W\u00e4ltermann, M.: Dimension-Based Quality Modeling of Transmitted Speech. Springer, Cham (2013)"},{"key":"3_CR29","doi-asserted-by":"publisher","unstructured":"Yu, M., Zhang, C., Xu, Y., Zhang, S.X., Yu, D.: MetricNet: towards improved modeling for non-intrusive speech quality assessment. In: Proceedings of Interspeech 2021, pp. 2142\u20132146 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-659","DOI":"10.21437\/Interspeech.2021-659"}],"container-title":["Lecture Notes in Computer Science","Computational Data and Social Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-0669-3_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,28]],"date-time":"2024-02-28T21:22:27Z","timestamp":1709155347000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-0669-3_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819706686","9789819706693"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-0669-3_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"29 February 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CSoNet","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Data and Social Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 December 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"csonet2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/csonet-conf.github.io\/csonet23\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easy Chair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"64","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.7","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.0","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The four extended abstracts are also included in this proceedings.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}