{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T23:24:53Z","timestamp":1743031493145,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030522452"},{"type":"electronic","value":"9783030522469"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-52246-9_15","type":"book-chapter","created":{"date-parts":[[2020,7,3]],"date-time":"2020-07-03T11:03:49Z","timestamp":1593774229000},"page":"214-231","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["MESRS: Models Ensemble Speech Recognition System"],"prefix":"10.1007","author":[{"given":"Ben","family":"Zagagy","sequence":"first","affiliation":[]},{"given":"Maya","family":"Herman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,7,4]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Sainath, T., Kingsbury, B., Ramabhadran, B., Novak, P., Mohamed, A.: Making deep belief networks effective for large vocabulary continuous speech recognition. In: Proceedings of the ASRU (2011)","DOI":"10.1109\/ASRU.2011.6163900"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Deng, L., Abdel-Hamid, O., Yu, D.: A deep convolutional neural network using heterogeneous pooling for trading acoustic invariance with phonetic confusion. In: Proceedings of the ICASSP (2013)","DOI":"10.1109\/ICASSP.2013.6638952"},{"issue":"2","key":"15_CR3","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1109\/TASL.2012.2227738","volume":"21","author":"D Yu","year":"2013","unstructured":"Yu, D., Deng, L., Seide, F.: The deep tensor neural network with applications to large vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 21(2), 388\u2013396 (2013)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"15_CR4","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"A Mohamed","year":"2012","unstructured":"Mohamed, A., Dahl, G., Hinton, G.: Acoustic modeling using deep belief networks. IEEE Trans. Audio Speech Language Process. 20, 14\u201322 (2012)","journal-title":"IEEE Trans. Audio Speech Language Process."},{"issue":"11","key":"15_CR5","doi-asserted-by":"publisher","first-page":"2267","DOI":"10.1109\/TASL.2013.2284378","volume":"21","author":"T Sainath","year":"2013","unstructured":"Sainath, T., Kingsbury, B., Soltau, H., Ramabhadran, B.: Optimization techniques to improve training speed of deep neural networks for large speech tasks. IEEE Trans. Audio Speech Language Process. 21(11), 2267\u20132276 (2013)","journal-title":"IEEE Trans. Audio Speech Language Process."},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Sainath, T., Kingsbury, B., Mohamed, A., Dahl, G., Saon, G., Soltau, H., Beran, T., Aravkin, A., Ramabhadran, B.: Improvements to deep convolutional neural networks for LVCSR. In: Proceedings of the ASRU (2013)","DOI":"10.1109\/ASRU.2013.6707749"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Deng, L., Li, J., Huang, Yao, K., Yu, D., Seide, F., Seltzer, M., Zweig, G., He, X., Williams, J., Gong, Y., Acero, A.: Recent advances in deep learning for speech research at Microsoft. In: Proceedings of the ICASSP (2013)","DOI":"10.1109\/ICASSP.2013.6639345"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Deng, L., Hinton, G., Kingsbury, B.: New types of deep neural network learning for speech recognition and related applications: an overview. In: Proceedings of the ICASSP (2013)","DOI":"10.1109\/ICASSP.2013.6639344"},{"issue":"3","key":"15_CR9","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1109\/MSP.2009.932166","volume":"26","author":"J Baker","year":"2009","unstructured":"Baker, J., Deng, L., Glass, J., Khudanpur, S., Lee, C.-H., Morgan, N., O\u2019Shaughnessy, D.: Research developments and directions in speech recognition and understanding. IEEE Sig. Proc. Mag. 26(3), 75\u201380 (2009)","journal-title":"IEEE Sig. Proc. Mag."},{"key":"15_CR10","unstructured":"Hinton, G., Deng, L., Yu, D., Dahl, G., Mohamed, A., Jaitly, N., Senior, A., Vanhoucke, V., Nguyen, P., Sainath, T., Kingsbury, B.: Deep neural networks for acoustic modeling in speech recognition. IEEE Signal Process. Mag. 29(6), 82\u201397 (2012)"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Seide, F., Li, G., Yu, D.: Conversational speech transcription using context-dependent deep neural networks. In: Proceedings of the Interspeech (2011)","DOI":"10.21437\/Interspeech.2011-169"},{"key":"15_CR12","unstructured":"Yu, D., Deng, L., Dahl, G.E.: Roles of pre-training and fine-tuning in context-dependent DBN-HMMs for real-world speech recognition. In: NIPS Workshop on Deep Learning and Unsupervised Feature Learning (2010)"},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Jaitly, N., Nguyen, P., Vanhoucke, V.: Application of pre-trained deep neural networks to large vocabulary speech recognition. In: Proceedings of the Interspeech (2012)","DOI":"10.21437\/Interspeech.2012-10"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Kingsbury, B., Sainath, T., Soltau, H.: Scalable minimum Bayes risk training of deep neural network acoustic models using distributed Hessian-free optimization. In: Proceedings of the Interspeech (2012)","DOI":"10.21437\/Interspeech.2012-3"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Sainath, T., Mohamed, A., Kingsbury, B., Ramabhadran, B.: Convolutional neural networks for LVCSR. In: Proceedings of the ICASSP (2013)","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"15_CR16","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"Dahl, G., Yu, D., Deng, L., Acero, A.: Context-dependent, pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans. Audio Speech Language Process. 20, 30\u201342 (2012)","journal-title":"IEEE Trans. Audio Speech Language Process."},{"key":"15_CR17","unstructured":"Training & Testing sets Used in Kaggle\u2019s Speech Recognition Challenge. https:\/\/www.kaggle.com\/c\/tensorflow-speech-recognition-challenge\/data"},{"key":"15_CR18","unstructured":"Google\u2019s Crowd sourcing Open Speech Recording. http:\/\/aiyprojects.withgoogle.com\/open_speech_recording"},{"key":"15_CR19","unstructured":"Librosa \u2013 python package for music and audio analysis. https:\/\/librosa.github.io\/librosa"},{"key":"15_CR20","unstructured":"Pytorch \u2013 An open source deep learning platform by Facebook. https:\/\/pytorch.org"},{"key":"15_CR21","unstructured":"Squeeze-and-excitation Networks (SeNet). https:\/\/arxiv.org\/abs\/1709.01507"},{"key":"15_CR22","unstructured":"Deep Residual Learning for Image Recognition (ResNet). https:\/\/arxiv.org\/abs\/1512.03385"},{"key":"15_CR23","unstructured":"Very Deep Convolutional Networks For Large Scale Image Recognition (VGG). https:\/\/arxiv.org\/pdf\/1409.1556.pdf"},{"key":"15_CR24","unstructured":"Densely Connected Convolutional Networks (DenseNet). https:\/\/arxiv.org\/pdf\/1608.06993.pdf"},{"key":"15_CR25","unstructured":"Pytorch neural networks module inheritance documentation. https:\/\/pytorch.org\/docs\/stable\/nn.html"},{"key":"15_CR26","unstructured":"Tensor Flow Speech Recognition Challenge. https:\/\/www.kaggle.com\/c\/tensorflow-speech-recognition-challenge"},{"key":"15_CR27","unstructured":"Tensorflow Simple Audio Recognition Library. https:\/\/github.com\/tensorflow\/docs\/blob\/master\/site\/en\/r1\/tutorials\/sequences\/audio_recognition.md"}],"container-title":["Advances in Intelligent Systems and Computing","Intelligent Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-52246-9_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,31]],"date-time":"2022-10-31T23:50:43Z","timestamp":1667260243000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-52246-9_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030522452","9783030522469"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-52246-9_15","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"4 July 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Science and Information Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"London","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 July 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 July 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sai2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/saiconference.com\/Computing","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}