{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T16:03:23Z","timestamp":1758125003696,"version":"3.37.3"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,2,14]],"date-time":"2022-02-14T00:00:00Z","timestamp":1644796800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,2,14]],"date-time":"2022-02-14T00:00:00Z","timestamp":1644796800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s11042-022-12304-5","type":"journal-article","created":{"date-parts":[[2022,2,14]],"date-time":"2022-02-14T19:02:57Z","timestamp":1644865377000},"page":"9969-9988","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Feature-based hybrid strategies for gradient descent optimization in end-to-end speech recognition"],"prefix":"10.1007","volume":"81","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7202-2899","authenticated-orcid":false,"given":"Yesim","family":"Dokuz","sequence":"first","affiliation":[]},{"given":"Zekeriya","family":"T\u00fcfekci","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,14]]},"reference":[{"key":"12304_CR1","unstructured":"Chang HS, Learned-Miller E, McCallum A (2017) Active bias: training more accurate neural networks by emphasizing high variance samples. In Advances in Neural Information Processing Systems (NIPS 2017), Long Beach, pp 1002\u20131012"},{"issue":"10","key":"12304_CR2","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/LSP.2018.2860246","volume":"25","author":"M Chen","year":"2018","unstructured":"Chen M, He X, Yang J, Zhang H (2018) 3-D convolutional recurrent neural networks with attention model for speech emotion recognition. IEEE Signal Process Lett 25(10):1440\u20131444","journal-title":"IEEE Signal Process Lett"},{"key":"12304_CR3","doi-asserted-by":"crossref","unstructured":"Dai X, Yan X, Zhou K, Wang Y, Yang H, Cheng J (2020) Convolutional embedding for edit distance. In proceedings of the 43rd international ACM SIGIR conference on Research and Development in information retrieval (pp. 599-608)","DOI":"10.1145\/3397271.3401045"},{"issue":"3\u20134","key":"12304_CR4","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1561\/2000000039","volume":"7","author":"L Deng","year":"2014","unstructured":"Deng L, Yu D (2014) Deep learning: methods and applications. Found. Trends Signal Process 7(3\u20134):197\u2013387","journal-title":"Found. Trends Signal Process"},{"key":"12304_CR5","unstructured":"Doetsch P, Golik P, Ney H (2017) A comprehensive study of batch construction strategies for recurrent neural networks in mxnet. arXiv preprint, arXiv:1705.02414, 1\u20134"},{"key":"12304_CR6","doi-asserted-by":"publisher","first-page":"107573","DOI":"10.1016\/j.apacoust.2020.107573","volume":"171","author":"Y Dokuz","year":"2021","unstructured":"Dokuz Y, Tufekci Z (2021) Mini-batch sample selection strategies for deep learning based speech recognition. Appl Acoust 171:107573","journal-title":"Appl Acoust"},{"key":"12304_CR7","doi-asserted-by":"publisher","first-page":"114416","DOI":"10.1016\/j.eswa.2020.114416","volume":"168","author":"A Garain","year":"2021","unstructured":"Garain A, Singh PK, Sarkar R (2021) FuzzyGCP: a deep learning architecture for automatic spoken language identification from speech signals. Expert Syst Appl 168:114416","journal-title":"Expert Syst Appl"},{"key":"12304_CR8","unstructured":"Goodfellow I, Bengio Y, Courville A (2016) Deep Learning. MIT Press"},{"key":"12304_CR9","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/978-3-642-24797-2_7","volume-title":"Supervised Sequence Labelling with Recurrent Neural Networks","author":"A Graves","year":"2012","unstructured":"Graves A (2012) Connectionist temporal classification. In: Supervised Sequence Labelling with Recurrent Neural Networks. Springer, Berlin, Heidelberg, pp 61\u201393"},{"key":"12304_CR10","unstructured":"Graves A, Jaitly N (2014) Towards end-to-end speech recognition with recurrent neural networks, proceedings of the 31st international conference on international conference on machine learning, pp. II\u20131764\u2013II\u20131772"},{"key":"12304_CR11","doi-asserted-by":"crossref","unstructured":"Graves A, Fern\u00e1ndez S, Gomez F, Schmidhuber J (2006) Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In proceedings of the 23rd international conference on machine learning (pp. 369-376)","DOI":"10.1145\/1143844.1143891"},{"key":"12304_CR12","doi-asserted-by":"crossref","unstructured":"Graves A, Jaitly N, Mohamed AR (2013) Hybrid speech recognition with deep bidirectional LSTM. In 2013 IEEE workshop on automatic speech recognition and understanding (pp. 273-278). IEEE","DOI":"10.1109\/ASRU.2013.6707742"},{"issue":"1","key":"12304_CR13","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/s10772-019-09665-y","volume":"23","author":"S Hourri","year":"2020","unstructured":"Hourri S, Kharroubi J (2020) A deep learning approach for speaker recognition. Int J Speech Technol 23(1):123\u2013131","journal-title":"Int J Speech Technol"},{"key":"12304_CR14","doi-asserted-by":"publisher","first-page":"107941","DOI":"10.1016\/j.apacoust.2021.107941","volume":"177","author":"W Hussain","year":"2021","unstructured":"Hussain W, Sadiq MT, Siuly S, Rehman AU (2021) Epileptic seizure detection using 1 D-convolutional long short-term memory neural networks. Appl Acoust 177:107941","journal-title":"Appl Acoust"},{"key":"12304_CR15","doi-asserted-by":"crossref","unstructured":"Joseph KJ, Singh K, Balasubramanian VN (2019) Submodular batch selection for training deep neural networks. arXiv preprint, arXiv:1906.08771, 1\u20139","DOI":"10.24963\/ijcai.2019\/372"},{"issue":"8","key":"12304_CR16","first-page":"707","volume":"10","author":"VI Levenshtein","year":"1966","unstructured":"Levenshtein VI (1966) Binary codes capable of correcting deletions, insertions, and reversals. In Soviet Physics Doklady 10(8):707\u2013710","journal-title":"In Soviet Physics Doklady"},{"key":"12304_CR17","doi-asserted-by":"crossref","unstructured":"Li M, Zhang T, Chen Y, Smola AJ (2014) Efficient mini-batch training for stochastic optimization. In proceedings of the 20th ACM SIGKDD international conference on knowledge discovery and data mining (pp. 661\u2013670)","DOI":"10.1145\/2623330.2623612"},{"key":"12304_CR18","doi-asserted-by":"crossref","unstructured":"Liang Y, He F, Zeng X (2020) 3D mesh simplification with feature preservation based on whale optimization algorithm and differential evolution. Integr Comput-Aided Eng 27(4):417\u2013435","DOI":"10.3233\/ICA-200641"},{"key":"12304_CR19","doi-asserted-by":"crossref","unstructured":"Lim W, Jang D, Lee T (2016) Speech emotion recognition using convolutional and recurrent neural networks. In 2016 Asia-Pacific signal and information processing association annual summit and conference (APSIPA) (pp. 1\u20134). IEEE","DOI":"10.1109\/APSIPA.2016.7820699"},{"key":"12304_CR20","unstructured":"Loshchilov I, Hutter F (2015) Online batch selection for faster training of neural networks, arXiv preprint, arXiv:1511.06343, 1\u201320"},{"key":"12304_CR21","doi-asserted-by":"crossref","unstructured":"Maas A, Xie Z, Jurafsky D, Ng A (2015) Lexicon-free conversational speech recognition with neural networks, proceedings of the 2015 conference of the north American chapter of the Association for Computational Linguistics: human language technologies, pp. 345\u2013354","DOI":"10.3115\/v1\/N15-1038"},{"key":"12304_CR22","doi-asserted-by":"publisher","first-page":"3638","DOI":"10.1049\/ipr2.12271","volume":"15","author":"M Mei","year":"2021","unstructured":"Mei M, He F (2021) Multi-label learning based target detecting from multi-frame data. IET Image Process 15:3638\u20133644","journal-title":"IET Image Process"},{"key":"12304_CR23","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1016\/j.specom.2019.06.002","volume":"111","author":"A Nicolson","year":"2019","unstructured":"Nicolson A, Paliwal KK (2019) Deep learning for minimum mean-square error approaches to speech enhancement. Speech Comm 111:44\u201355","journal-title":"Speech Comm"},{"key":"12304_CR24","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1016\/j.image.2018.04.015","volume":"67","author":"JS Park","year":"2018","unstructured":"Park JS, Kim HG, Kim DG, Yu IJ, Lee HK (2018) Paired mini-batch training: a new deep network training for image forensics and steganalysis. Signal Process Image Commun 67:132\u2013139","journal-title":"Signal Process Image Commun"},{"key":"12304_CR25","doi-asserted-by":"crossref","unstructured":"Peng X, Li L, Wang FY (2019) Accelerating minibatch stochastic gradient descent using typicality sampling. IEEE Trans Neural Networks Learn Syst 31:4649\u20134659","DOI":"10.1109\/TNNLS.2019.2957003"},{"issue":"2","key":"12304_CR26","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1007\/s00371-020-01796-7","volume":"37","author":"Q Quan","year":"2021","unstructured":"Quan Q, He F, Li H (2021) A multi-phase blending method with incremental intensity for training detection networks. Vis Comput 37(2):245\u2013259","journal-title":"Vis Comput"},{"key":"12304_CR27","unstructured":"Ruder S (2016) An overview of gradient descent optimization algorithms, arXiv preprint, arXiv:1609.04747, 1\u201314"},{"key":"12304_CR28","doi-asserted-by":"crossref","unstructured":"Sainath TN, Vinyals O, Senior A, Sak H (2015) Convolutional, long short-term memory, fully connected deep neural networks. In 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 4580-4584). IEEE","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"12304_CR29","doi-asserted-by":"publisher","first-page":"107854","DOI":"10.1016\/j.apacoust.2020.107854","volume":"177","author":"S Souli","year":"2021","unstructured":"Souli S, Amami R, Yahia SB (2021) A robust pathological voices recognition system based on DCNN and scattering transform. Appl Acoust 177:107854","journal-title":"Appl Acoust"},{"key":"12304_CR30","doi-asserted-by":"crossref","unstructured":"Trigeorgis G, Ringeval F, Brueckner R, Marchi E, Nicolaou MA, Schuller B, Zafeiriou S (2016) Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network. In 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5200-5204). IEEE","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"12304_CR31","unstructured":"Veaux C, Yamagishi J, MacDonald K (2019) Cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit. University of Edinburgh. The Centre for Speech Technology Research (CSTR)"},{"issue":"5","key":"12304_CR32","doi-asserted-by":"publisher","first-page":"644","DOI":"10.3390\/sym11050644","volume":"11","author":"D Wang","year":"2019","unstructured":"Wang D, Wang X, Lv S (2019) End-to-end mandarin speech recognition combining CNN and BLSTM. Symmetry 11(5):644","journal-title":"Symmetry"},{"key":"12304_CR33","doi-asserted-by":"publisher","first-page":"107647","DOI":"10.1016\/j.apacoust.2020.107647","volume":"172","author":"Z Wang","year":"2021","unstructured":"Wang Z, Zhang T, Shao Y, Ding B (2021) LSTM-convolutional-BLSTM encoder-decoder network for minimum mean-square error approach to speech enhancement. Appl Acoust 172:107647","journal-title":"Appl Acoust"},{"key":"12304_CR34","doi-asserted-by":"crossref","unstructured":"Watanabe S, Hori T, Kim S, Hershey JR, Hayashi T (2017) Hybrid ctc\/attention architecture for end-to-end speech recognition. IEEE J Sel Top Signal Process 11(8):1240\u20131253","DOI":"10.1109\/JSTSP.2017.2763455"},{"key":"12304_CR35","unstructured":"Yu D, Deng L (2016) Automatic speech recognition a deep learning approach. Springer, p 347"},{"key":"12304_CR36","doi-asserted-by":"crossref","unstructured":"Zheng L, Duffner S, Idrissi K, Garcia C, Baskurt A (2016) Siamese multi-layer perceptrons for dimensionality reduction and face identification. Multimed Tools Appl 75(9):5055\u20135073","DOI":"10.1007\/s11042-015-2847-3"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12304-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-12304-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12304-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,23]],"date-time":"2022-03-23T17:34:08Z","timestamp":1648056848000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-12304-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,14]]},"references-count":36,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["12304"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-12304-5","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2022,2,14]]},"assertion":[{"value":"20 May 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Ethical approval is not necessary for this study.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}