{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T21:28:47Z","timestamp":1743110927131,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":44,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819981373"},{"type":"electronic","value":"9789819981380"}],"license":[{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8138-0_30","type":"book-chapter","created":{"date-parts":[[2023,11,25]],"date-time":"2023-11-25T10:02:23Z","timestamp":1700906543000},"page":"376-388","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Attention-Based Deep Convolutional Network for\u00a0Speech Recognition Under Multi-scene Noise Environment"],"prefix":"10.1007","author":[{"given":"Chuanwu","family":"Yang","sequence":"first","affiliation":[]},{"given":"Shuo","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Zhishu","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Qinmu","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Jiamiao","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Peipei","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Yuetian","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xinge","family":"You","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,26]]},"reference":[{"issue":"11","key":"30_CR1","first-page":"1788","volume":"23","author":"D Baby","year":"2015","unstructured":"Baby, D., Virtanen, T., Gemmeke, J.F., et al.: Coupled dictionaries for exemplar-based speech enhancement and automatic speech recognition. IEEE-ACM Trans. Audio Speech 23(11), 1788\u20131799 (2015)","journal-title":"IEEE-ACM Trans. Audio Speech"},{"issue":"2","key":"30_CR2","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"Boll, S.: Suppression of acoustic noise in speech using spectral subtraction. IEEE-ACM Trans. Audio Speech 27(2), 113\u2013120 (1979)","journal-title":"IEEE-ACM Trans. Audio Speech"},{"key":"30_CR3","unstructured":"Cao, J., Xu, J., Shao, S.: Research on multi-noise-robust auto speech recognition. Comput. Appl. 1790\u20131794 (2018)"},{"issue":"4","key":"30_CR4","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1109\/89.397090","volume":"3","author":"Y Ephraim","year":"1995","unstructured":"Ephraim, Y., Van Trees, H.L.: A signal subspace approach for speech enhancement. IEEE Trans. Speech Audio Process. 3(4), 251\u2013266 (1995)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"6","key":"30_CR5","first-page":"1037","volume":"22","author":"JT Geiger","year":"2014","unstructured":"Geiger, J.T., Weninger, F., Gemmeke, J.F., W\u00f6llmer, M., Schuller, B., Rigoll, G.: Memory-enhanced neural networks and NMF for robust ASR. IEEE-ACM T Audio Speech 22(6), 1037\u20131046 (2014)","journal-title":"IEEE-ACM T Audio Speech"},{"issue":"3","key":"30_CR6","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1109\/TSA.2005.857802","volume":"14","author":"V Grancharov","year":"2006","unstructured":"Grancharov, V., Samuelsson, J., Kleijn, B.: On causal algorithms for speech enhancement. IEEE-ACM Trans. Audio Speech 14(3), 764\u2013773 (2006)","journal-title":"IEEE-ACM Trans. Audio Speech"},{"key":"30_CR7","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUS). arXiv preprint arXiv:1606.08415 (2016)"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"1","key":"30_CR9","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1109\/TSA.2003.819949","volume":"12","author":"Y Hu","year":"2004","unstructured":"Hu, Y., Loizou, P.C.: Speech enhancement based on wavelet thresholding the multitaper spectrum. IEEE Trans. Speech Audio Process. 12(1), 59\u201367 (2004)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Islam, M.: GFCC-based robust gender detection. In: ICISET, pp. 1\u20134. IEEE (2016)","DOI":"10.1109\/ICISET.2016.7856507"},{"issue":"3","key":"30_CR11","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/89.568732","volume":"5","author":"BH Juang","year":"1997","unstructured":"Juang, B.H., Hou, W., Lee, C.H.: Minimum classification error rate methods for speech recognition. IEEE Trans. Speech Audio Process. 5(3), 257\u2013265 (1997)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Kamath, S., Loizou, P., et al.: A multi-band spectral subtraction method for enhancing speech corrupted by colored noise. In: ICASSP, vol. 4, pp. 44164\u201344164. Citeseer (2002)","DOI":"10.1109\/ICASSP.2002.5745591"},{"issue":"4","key":"30_CR13","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1109\/LSP.2003.808544","volume":"10","author":"H Lev-Ari","year":"2003","unstructured":"Lev-Ari, H., Ephraim, Y.: Extension of the signal subspace speech enhancement approach to colored noise. IEEE Signal Process. Lett. 10(4), 104\u2013106 (2003)","journal-title":"IEEE Signal Process. Lett."},{"issue":"2","key":"30_CR14","first-page":"192","volume":"3","author":"X Li","year":"2016","unstructured":"Li, X., Wang, Z.: A hmm-based mandarin Chinese singing voice synthesis system. JAS 3(2), 192\u2013202 (2016)","journal-title":"JAS"},{"key":"30_CR15","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/j.neucom.2021.01.052","volume":"438","author":"Y Li","year":"2021","unstructured":"Li, Y., Zhang, W.T., Lou, S.T.: Generative adversarial networks for single channel separation of convolutive mixed speech signals. Neurocomputing 438, 63\u201371 (2021)","journal-title":"Neurocomputing"},{"key":"30_CR16","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1016\/j.neucom.2020.11.025","volume":"428","author":"Z Li","year":"2021","unstructured":"Li, Z., Ming, Y., Yang, L., Xue, J.: Mutual-learning sequence-level knowledge distillation for automatic speech recognition. Neurocomputing 428, 259\u2013267 (2021)","journal-title":"Neurocomputing"},{"issue":"3","key":"30_CR17","first-page":"778","volume":"6","author":"L Liu","year":"2019","unstructured":"Liu, L., Li, W., Wu, X., Zhou, B.X.: Infant cry language analysis and recognition: an experimental approach. JAS 6(3), 778\u2013788 (2019)","journal-title":"JAS"},{"key":"30_CR18","doi-asserted-by":"crossref","unstructured":"Meriem, F., Farid, H., Messaoud, B., Abderrahmene, A.: Robust speaker verification using a new front end based on multitaper and gammatone filters. In: SITIS, pp. 99\u2013103. IEEE (2014)","DOI":"10.1109\/SITIS.2014.111"},{"issue":"2","key":"30_CR19","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1109\/89.824700","volume":"8","author":"U Mittal","year":"2000","unstructured":"Mittal, U., Phamdo, N.: Signal\/noise KLT based approach for enhancing speech degraded by colored noise. IEEE Trans. Speech Audio Process. 8(2), 159\u2013167 (2000)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"1","key":"30_CR20","first-page":"77","volume":"27","author":"T Moriya","year":"2018","unstructured":"Moriya, T., Tanaka, T., Shinozaki, T., Watanabe, S., Duh, K.: Evolution-strategy-based automation of system development for high-performance speech recognition. IEEE-ACM Trans. Audio Speech 27(1), 77\u201388 (2018)","journal-title":"IEEE-ACM Trans. Audio Speech"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Nilufar, S., Ray, N., Molla, M.I., Hirose, K.: Spectrogram based features selection using multiple kernel learning for speech\/music discrimination, pp. 501\u2013504 (2012)","DOI":"10.1109\/ICASSP.2012.6287926"},{"key":"30_CR22","doi-asserted-by":"crossref","unstructured":"Povey, D., Woodland, P.C.: Minimum phone error and i-smoothing for improved discriminative training. In: ICASSP, vol. 1, pp. I-105. IEEE (2002)","DOI":"10.1109\/ICASSP.2002.1005687"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Qin, X., Bu, H., Li, M.: Hi-MIA: a far-field text-dependent speaker verification database and the baselines, pp. 7609\u20137613 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054423"},{"issue":"2","key":"30_CR24","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner, L.R.: A tutorial on hidden Markov models and selected applications in speech recognition. Proc. IEEE 77(2), 257\u2013286 (1989)","journal-title":"Proc. IEEE"},{"key":"30_CR25","doi-asserted-by":"crossref","unstructured":"Schwarz, P., Matejka, P., Cernocky, J.: Hierarchical structures of neural networks for phoneme recognition. In: ICASSP, vol. 1, pp. I-I. IEEE (2006)","DOI":"10.1109\/ICASSP.2006.1660023"},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Shi, X., Yang, H., Zhou, P.: Robust speaker recognition based on improved GFCC. In: IEEE INFOCOM, pp. 1927\u20131931. IEEE (2016)","DOI":"10.1109\/CompComm.2016.7925037"},{"issue":"1","key":"30_CR27","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1016\/j.csl.2012.05.001","volume":"27","author":"SM Siniscalchi","year":"2013","unstructured":"Siniscalchi, S.M., Reed, J., Svendsen, T., Lee, C.H.: Universal attribute characterization of spoken languages for automatic spoken language recognition. Comput. Speech Lang. 27(1), 209\u2013227 (2013)","journal-title":"Comput. Speech Lang."},{"key":"30_CR28","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.: Inception-v4, inception-resnet and the impact of residual connections on learning. arXiv preprint arXiv:1602.07261 (2016)","DOI":"10.1609\/aaai.v31i1.11231"},{"issue":"3","key":"30_CR29","first-page":"493","volume":"25","author":"Z Tang","year":"2016","unstructured":"Tang, Z., Li, L., Wang, D., Vipperla, R.: Collaborative joint training with multitask recurrent model for speech and speaker recognition. IEEE-ACM Trans Audio Speech 25(3), 493\u2013504 (2016)","journal-title":"IEEE-ACM Trans Audio Speech"},{"issue":"12","key":"30_CR30","first-page":"2080","volume":"27","author":"YH Tu","year":"2019","unstructured":"Tu, Y.H., Du, J., Lee, C.H.: Speech enhancement based on teacher-student deep learning using improved speech presence probability for noise-robust speech recognition. IEEE-ACM Trans Audio Speech 27(12), 2080\u20132091 (2019)","journal-title":"IEEE-ACM Trans Audio Speech"},{"issue":"8","key":"30_CR31","doi-asserted-by":"publisher","first-page":"2418","DOI":"10.1109\/TASL.2007.906194","volume":"15","author":"S Umesh","year":"2007","unstructured":"Umesh, S., Sinha, R.: A study of filter bank smoothing in MFCC features for recognition of children\u2019s speech. IEEE-ACM Trans Audio Speech 15(8), 2418\u20132430 (2007)","journal-title":"IEEE-ACM Trans Audio Speech"},{"key":"30_CR32","doi-asserted-by":"crossref","unstructured":"Varga, A., Steeneken, H.J.: Assessment for automatic speech recognition: Ii. noisex-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun. 12(3), 247\u2013251 (1993)","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"30_CR33","doi-asserted-by":"crossref","unstructured":"Variani, E., Sainath, T.N., Shafran, I., Bacchiani, M.: Complex linear projection (CLP): a discriminative approach to joint feature extraction and acoustic modeling (2016)","DOI":"10.21437\/Interspeech.2016-1459"},{"key":"30_CR34","unstructured":"Wang, D., Zhang, X.: THCHS-30: a free Chinese speech corpus. arXiv preprint arXiv:1512.01882 (2015)"},{"issue":"4","key":"30_CR35","first-page":"588","volume":"4","author":"K Wang","year":"2017","unstructured":"Wang, K., Gou, C., Duan, Y., Lin, Y., Zheng, X., Wang, F.Y.: Generative adversarial networks: introduction and outlook. JAS 4(4), 588\u2013598 (2017)","journal-title":"JAS"},{"issue":"7","key":"30_CR36","first-page":"1185","volume":"26","author":"Q Wang","year":"2018","unstructured":"Wang, Q., Du, J., Dai, L.R., Lee, C.H.: A multiobjective learning and ensembling approach to high-performance speech enhancement with compact neural network architectures. IEEE-ACM Trans. Audio Speech 26(7), 1185\u20131197 (2018)","journal-title":"IEEE-ACM Trans. Audio Speech"},{"issue":"4","key":"30_CR37","first-page":"796","volume":"24","author":"ZQ Wang","year":"2016","unstructured":"Wang, Z.Q., Wang, D.: A joint training framework for robust automatic speech recognition. IEEE-ACM Trans. Audio Speech 24(4), 796\u2013806 (2016)","journal-title":"IEEE-ACM Trans. Audio Speech"},{"key":"30_CR38","unstructured":"Warden, P.: Speech commands: a dataset for limited-vocabulary speech recognition. arXiv preprint arXiv:1804.03209 (2018)"},{"key":"30_CR39","doi-asserted-by":"crossref","unstructured":"Wen, Y., Zhang, K., Li, Z., Qiao, Y.: A discriminative feature learning approach for deep face recognition, pp. 499\u2013515 (2016)","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"30_CR40","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., So Kweon, I.: CBAM: convolutional block attention module, pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"30_CR41","unstructured":"Xiang, B., Jing, X., Yang, H.: Vehicular speech recognition based on noise classification and compensation. Comput. Eng. (3), 37 (2017)"},{"key":"30_CR42","unstructured":"Ye, S., et al.: Discriminative suprasphere embedding for fine-grained visual categorization. IEEE Trans. Neural Netw. Learn. Syst. (2022)"},{"key":"30_CR43","doi-asserted-by":"crossref","unstructured":"Ye, S., Wang, Y., Peng, Q., You, X., Chen, C.P.: The image data and backbone in weakly supervised fine-grained visual categorization: A revisit and further thinking. IEEE Trans. Circ. Syst. Video Technol. (2023)","DOI":"10.1109\/TCSVT.2023.3284405"},{"key":"30_CR44","doi-asserted-by":"crossref","unstructured":"Yu, G., Slotine, J.J.: Audio classification from time-frequency texture, pp. 1677\u20131680 (2009)","DOI":"10.1109\/ICASSP.2009.4959924"}],"container-title":["Communications in Computer and Information Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8138-0_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T09:36:29Z","timestamp":1730626589000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8138-0_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,26]]},"ISBN":["9789819981373","9789819981380"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8138-0_30","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023,11,26]]},"assertion":[{"value":"26 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Changsha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1274","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"650","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.14","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.46","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}