{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T12:40:53Z","timestamp":1688820053858},"reference-count":83,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2022,8,27]],"date-time":"2022-08-27T00:00:00Z","timestamp":1661558400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,27]],"date-time":"2022-08-27T00:00:00Z","timestamp":1661558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s11063-022-11006-1","type":"journal-article","created":{"date-parts":[[2022,8,27]],"date-time":"2022-08-27T11:02:59Z","timestamp":1661598179000},"page":"3205-3224","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Time-Frequency Localization Using Deep Convolutional Maxout Neural Network in Persian Speech Recognition"],"prefix":"10.1007","volume":"55","author":[{"given":"Arash","family":"Dehghani","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seyyed Ali","family":"Seyyedsalehi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,8,27]]},"reference":[{"key":"11006_CR1","doi-asserted-by":"crossref","unstructured":"Allen JB (1995) How do humans process and recognize speech? \u201d in Modern methods of speech processing. Springer, pp 251\u2013275","DOI":"10.1007\/978-1-4615-2281-2_11"},{"issue":"10","key":"11006_CR2","doi-asserted-by":"publisher","first-page":"4114","DOI":"10.1523\/jneurosci.22-10-04114.2002","volume":"22","author":"MA Escab\u00ed","year":"2002","unstructured":"Escab\u00ed MA, Schreiner CE (2002) Nonlinear Spectrotemporal Sound Analysis by Neurons in the Auditory Midbrain. J Neurosci 22(10):4114\u20134131. doi: https:\/\/doi.org\/10.1523\/jneurosci.22-10-04114.2002","journal-title":"J Neurosci"},{"issue":"3","key":"11006_CR3","doi-asserted-by":"publisher","first-page":"1220","DOI":"10.1152\/jn.2001.85.3.1220","volume":"85","author":"DA Depireux","year":"2001","unstructured":"Depireux DA, Simon JZ, Klein DJ, Shamma SA (2001) Spectro-temporal response field characterization with dynamic ripples in ferret primary auditory cortex. J Neurophysiol 85(3):1220\u20131234. doi: https:\/\/doi.org\/10.1152\/jn.2001.85.3.1220","journal-title":"J Neurophysiol"},{"issue":"2","key":"11006_CR4","doi-asserted-by":"publisher","first-page":"887","DOI":"10.1121\/1.1945807","volume":"118","author":"T Chi","year":"2005","unstructured":"Chi T, Ru P, Shamma SA (2005) Multiresolution spectrotemporal analysis of complex sounds. J Acoust Soc Am 118(2):887\u2013906. doi: https:\/\/doi.org\/10.1121\/1.1945807","journal-title":"J Acoust Soc Am"},{"issue":"6","key":"11006_CR5","doi-asserted-by":"publisher","first-page":"2315","DOI":"10.1523\/JNEUROSCI.20-06-02315.2000","volume":"20","author":"FE Theunissen","year":"2000","unstructured":"Theunissen FE, Sen K, Doupe AJ (2000) Spectral-temporal receptive fields of nonlinear auditory neurons obtained using natural sounds. J Neurosci 20(6):2315\u20132331","journal-title":"J Neurosci"},{"issue":"11","key":"11006_CR6","doi-asserted-by":"publisher","first-page":"1216","DOI":"10.1038\/nn1141","volume":"6","author":"J Fritz","year":"2003","unstructured":"Fritz J, Shamma S, Elhilali M, Klein D (2003) Rapid task-related plasticity of spectrotemporal receptive fields in primary auditory cortex. Nat Neurosci 6(11):1216\u20131223. doi: https:\/\/doi.org\/10.1038\/nn1141","journal-title":"Nat Neurosci"},{"issue":"3","key":"11006_CR7","doi-asserted-by":"publisher","first-page":"926","DOI":"10.1046\/j.1460-9568.1998.00102.x","volume":"10","author":"BM Calhoun","year":"1998","unstructured":"Calhoun BM, Schreiner CE (1998) Spectral envelope coding in cat primary auditory cortex: linear and non-linear effects of stimulus characteristics. Eur J Neurosci 10(3):926\u2013940. doi: https:\/\/doi.org\/10.1046\/j.1460-9568.1998.00102.x","journal-title":"Eur J Neurosci"},{"issue":"5234","key":"11006_CR8","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1126\/science.270.5234.303","volume":"270","author":"RV Shannon","year":"1995","unstructured":"Shannon RV, Zeng F-G, Kamath V, Wygonski J, Ekelid M (1995) Speech recognition with primarily temporal cues. Sci (80-) 270(5234):303\u2013304","journal-title":"Sci (80-)"},{"issue":"6","key":"11006_CR9","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1250\/ast.34.388","volume":"34","author":"AJ Oxenham","year":"2013","unstructured":"Oxenham AJ (2013) Revisiting place and temporal theories of pitch. Acoust Sci Technol 34(6):388\u2013396. doi: https:\/\/doi.org\/10.1250\/ast.34.388","journal-title":"Acoust Sci Technol"},{"key":"11006_CR10","doi-asserted-by":"publisher","unstructured":"Swanson BA, Marimuthu VMR, Mannell RH (2019) \u201cPlace and Temporal Cues in Cochlear Implant Pitch and Melody Perception,\u201d Front. Neurosci., vol.\u00a013, no. November, pp.\u00a01\u201318, doi: https:\/\/doi.org\/10.3389\/fnins.2019.01266","DOI":"10.3389\/fnins.2019.01266"},{"key":"11006_CR11","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/S0378-5955(02)00644-5","volume":"174","author":"F Zeng","year":"2002","unstructured":"Zeng F (2002) Temporal pitch in electric hearing. Hear Res 174:101\u2013106","journal-title":"Hear Res"},{"issue":"1","key":"11006_CR12","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1152\/jn.00851.2002","volume":"90","author":"A Qiu","year":"2003","unstructured":"Qiu A, Schreiner CE, Escab\u00ed MA (2003) Gabor analysis of auditory midbrain receptive fields: spectro-temporal and binaural composition. J Neurophysiol 90(1):456\u2013476. doi: https:\/\/doi.org\/10.1152\/jn.00851.2002","journal-title":"J Neurophysiol"},{"issue":"8","key":"11006_CR13","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1016\/0031-8914(69)90287-0","volume":"5","author":"S Shamma","year":"2001","unstructured":"Shamma S, De Groot SR, van Weert CG, Hermens WT, van Leeuwen WA, Shamma S (2001) On the role of space and time in auditory processing. Trends Cogn Sci 5(8):340\u2013348. doi: https:\/\/doi.org\/10.1016\/0031-8914(69)90287-0","journal-title":"Trends Cogn Sci"},{"issue":"10","key":"11006_CR14","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1016\/0166-2236(95)94496-R","volume":"18","author":"GC DeAngelis","year":"1995","unstructured":"DeAngelis GC, Ohzawa I, Freeman RD (1995) Receptive-field dynamics in the central visual pathways. Trends Neurosci 18(10):451\u2013458","journal-title":"Trends Neurosci"},{"issue":"4","key":"11006_CR15","doi-asserted-by":"publisher","first-page":"2047","DOI":"10.1121\/1.4916618","volume":"137","author":"MR Sch\u00e4dler","year":"2015","unstructured":"Sch\u00e4dler MR, Kollmeier B (2015) Separable spectro-temporal Gabor filter bank features: Reducing the complexity of robust features for automatic speech recognition. J Acoust Soc Am 137(4):2047\u20132059. doi: https:\/\/doi.org\/10.1121\/1.4916618","journal-title":"J Acoust Soc Am"},{"key":"11006_CR16","unstructured":"Robertson S, Penn G, Wang Y (2019) \u201cExploring spectro-temporal features in end-to-end convolutional neural networks,\u201d arXiv Prepr. arXiv1901.00072, pp.\u00a01\u20139,"},{"key":"11006_CR17","unstructured":"Kleinschmidt M (2002) \u201cRobust speech recognition based on spectro-temporal processing. \u201d Universit\u00e4t Oldenburg"},{"key":"11006_CR18","doi-asserted-by":"publisher","unstructured":"Bouvrie J, Ezzat T, Poggio T (2008) \u201cLocalized spectro-temporal cepstral analysis of speech,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing, 2008, no. May 2014, pp.\u00a04733\u20134736, doi: https:\/\/doi.org\/10.1109\/ICASSP.2008.4518714","DOI":"10.1109\/ICASSP.2008.4518714"},{"key":"11006_CR19","doi-asserted-by":"publisher","unstructured":"And\u00e9n J, Lostanlen V, Mallat SS, Anden J, Lostanlen V, Mallat SS (2015) \u201cJoint time-frequency scattering for audio classification,\u201d in IEEE 25th International Workshop on Machine Learning for Signal Processing (MLSP), 2015, vol. 2015-Novem, pp.\u00a01\u20136, doi: https:\/\/doi.org\/10.1109\/MLSP.2015.7324385","DOI":"10.1109\/MLSP.2015.7324385"},{"issue":"1","key":"11006_CR20","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1162\/neco.1989.1.1.39","volume":"1","author":"A Waibel","year":"1989","unstructured":"Waibel A (1989) Modular Construction of Time-Delay Neural Networks for Speech Recognition. Neural Comput 1(1):39\u201346. doi: https:\/\/doi.org\/10.1162\/neco.1989.1.1.39","journal-title":"Neural Comput"},{"key":"11006_CR21","doi-asserted-by":"crossref","unstructured":"Lecun Y, Bottou LL, Bengio Y, Haffner PPP (1998) \u201cGradient-based learning applied to document recognition,\u201d Proc. IEEE, vol.\u00a086, no. 11, pp.\u00a02278\u20132324, [Online]. Available: http:\/\/ieeexplore.ieee.org\/document\/726791\/#full-text-section","DOI":"10.1109\/5.726791"},{"issue":"8","key":"11006_CR22","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"M Tlanusta Garret","year":"1997","unstructured":"Tlanusta Garret M et al (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"11006_CR23","doi-asserted-by":"publisher","unstructured":"Abdel-Hamid O, Mohamed AR, Jiang H, Deng L, Penn G, Yu D (2014) \u201cConvolutional neural networks for speech recognition,\u201d IEEE\/ACM Trans. audio, speech, Lang. Process., vol.\u00a022, no. 10, pp.\u00a01533\u20131545, doi: https:\/\/doi.org\/10.1109\/TASLP.2014.2339736","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"11006_CR24","unstructured":"LeCun Y, Bengio Y, Yann L, Yoshua B (1995) \u201cConvolutional networks for images, speech, and time series,\u201d Handb. brain theory neural networks, vol.\u00a03361, no. 10, p.\u00a01995"},{"key":"11006_CR25","first-page":"1096","volume":"22","author":"H Lee","year":"2009","unstructured":"Lee H et al (2009) Unsupervised feature learning for audio classification using convolutional deep belief networks. Adv Neural Inf Process Syst 22:1096\u20131104","journal-title":"Adv Neural Inf Process Syst"},{"key":"11006_CR26","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid O, Deng L, Yu D (2013) \u201cExploring convolutional neural network structures and optimization techniques for speech recognition.,\u201d in Interspeech, vol.\u00a011, no. August, pp.\u00a073\u201375","DOI":"10.21437\/Interspeech.2013-744"},{"issue":"6","key":"11006_CR27","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1109\/MSP.2012.2209906","volume":"29","author":"RM Stern","year":"2012","unstructured":"Stern RM et al (2012) Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal Process Mag 29(6):16\u201317. doi: https:\/\/doi.org\/10.1109\/MSP.2012.2209906","journal-title":"IEEE Signal Process Mag"},{"key":"11006_CR28","doi-asserted-by":"crossref","unstructured":"T\u00f3th L (2014) \u201cConvolutional deep maxout networks for phone recognition,\u201d in Fifteenth Annual Conference of the International Speech Communication Association, no. September, pp.\u00a01078\u20131082","DOI":"10.21437\/Interspeech.2014-278"},{"key":"11006_CR29","doi-asserted-by":"publisher","unstructured":"Abdel-Hamid O et al (2012) IEEE international conference on Acoustics, speech and signal processing (ICASSP), 2012, no. July 2015, pp.\u00a04277\u20134280, doi: https:\/\/doi.org\/10.1109\/ICASSP.2012.6288864","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"11006_CR30","doi-asserted-by":"publisher","unstructured":"Cai M, Shi Y, Kang J, Liu J, Su T (2014) \u201cConvolutional maxout neural networks for low-resource speech recognition,\u201d in The 9th International Symposium on Chinese Spoken Language Processing, pp.\u00a0133\u2013137, doi: https:\/\/doi.org\/10.1109\/ISCSLP.2014.6936676","DOI":"10.1109\/ISCSLP.2014.6936676"},{"key":"11006_CR31","doi-asserted-by":"crossref","unstructured":"Mitra V, Franco H (2015) \u201cTime-frequency convolutional networks for robust speech recognition,\u201d in 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp.\u00a0317\u2013323","DOI":"10.1109\/ASRU.2015.7404811"},{"key":"11006_CR32","unstructured":"Amodei D et al (2016) \u201cDeep speech 2: End-to-end speech recognition in english and mandarin,\u201d in International conference on machine learning, vol.\u00a01, pp.\u00a0173\u2013182"},{"key":"11006_CR33","unstructured":"Abrol V, Dubagunta SP, Magimai M (2019) \u201cUnderstanding raw waveform based CNN through low-rank spectro-temporal decoupling,\u201dIdiap,"},{"key":"11006_CR34","unstructured":"Zhu B et al (2018) International Joint Conference on Neural Networks (IJCNN), 2018, pp.\u00a01\u20138"},{"key":"11006_CR35","doi-asserted-by":"crossref","unstructured":"Zhao T, Zhao Y, Chen X (2015) \u201cTime-frequency kernel-based CNN for speech recognition,\u201d in Sixteenth Annual Conference of the International Speech Communication Association, vol. 2015-Janua, pp.\u00a01888\u20131892","DOI":"10.21437\/Interspeech.2015-417"},{"key":"11006_CR36","doi-asserted-by":"publisher","unstructured":"Li J, Mohamed A, Zweig G, Gong Y (2016) \u201cExploring multidimensional LSTMs for large vocabulary ASR,\u201d in 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), vol. 2016-May, pp.\u00a04940\u20134944, doi: https:\/\/doi.org\/10.1109\/ICASSP.2016.7472617","DOI":"10.1109\/ICASSP.2016.7472617"},{"key":"11006_CR37","doi-asserted-by":"publisher","unstructured":"Li J, Mohamed A, Zweig G, Gong Y (2015) \u201cLSTM time and frequency recurrence for automatic speech recognition,\u201d in 2015 IEEE workshop on automatic speech recognition and understanding (ASRU), pp.\u00a0187\u2013191, doi: https:\/\/doi.org\/10.1109\/ASRU.2015.7404793","DOI":"10.1109\/ASRU.2015.7404793"},{"key":"11006_CR38","unstructured":"van Segbroeck M et al (2007) \u201cMulti-view Frequency LSTM: An Efficient Frontend for Automatic Speech Recognition,\u201d arXiv Prepr. arXiv00131, 2020"},{"key":"11006_CR39","unstructured":"Maiti, Bidinger TN, Sainath O, Vinyals A, Senior, Sak H (2015) \u201cConvolutional, long short-term memory, fully connected deep neural networks,\u201d in IEEE international conference on acoustics, speech and signal processing (ICASSP), 2015, vol.\u00a053, no. 9, pp.\u00a04580\u20134584"},{"key":"11006_CR40","doi-asserted-by":"publisher","unstructured":"Kreyssig FL, Zhang C, Woodland PC (2018) \u201cImproved TDNNs using deep kernels and frequency dependent Grid-RNNs,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, vol. 2018-April, pp.\u00a04864\u20134868, doi: https:\/\/doi.org\/10.1109\/ICASSP.2018.8462523","DOI":"10.1109\/ICASSP.2018.8462523"},{"key":"11006_CR41","doi-asserted-by":"publisher","unstructured":"Yuan W (2020) \u201cA time\u2013frequency smoothing neural network for speech enhancement,\u201d Speech Commun., vol.\u00a0124, no. August, pp.\u00a075\u201384, doi: https:\/\/doi.org\/10.1016\/j.specom.2020.09.002","DOI":"10.1016\/j.specom.2020.09.002"},{"key":"11006_CR42","doi-asserted-by":"publisher","unstructured":"Miao X, McLoughlin I, Yan Y (2019) \u201cA New Time-Frequency Attention Mechanism for TDNN and CNN-LSTM-TDNN, with Application to Language Identification.,\u201d in Interspeech, vol. 2019-Septe, pp.\u00a04080\u20134084, doi: https:\/\/doi.org\/10.21437\/Interspeech.2019-1256","DOI":"10.21437\/Interspeech.2019-1256"},{"key":"11006_CR43","unstructured":"Bae SH, Choi I, Kim NS (2016) \u201cAcoustic scene classification using parallel combination of LSTM and CNN,\u201d Detect. Classif. Acoust. Scenes Events no. September, 2016"},{"key":"11006_CR44","unstructured":"Lidy T, Schindler A, Scenes A, Lidy T, Schindler A (2016) \u201cCQT-based convolutional neural networks for audio scene classification,\u201d in Proceedings of the detection and classification of acoustic scenes and events workshop (DCASE2016), 2016, vol.\u00a090, no. September, pp.\u00a01032\u20131048"},{"key":"11006_CR45","doi-asserted-by":"crossref","unstructured":"Lim TY, Yeh RA, Xu Y, Do MN, Hasegawa-johnson M (2018) \u201cTime-frequency networks for audio super-resolution,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, pp.\u00a0646\u2013650","DOI":"10.1109\/ICASSP.2018.8462049"},{"issue":"4","key":"11006_CR46","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1007\/s00521-019-04158-0","volume":"32","author":"J Deng","year":"2020","unstructured":"Deng J et al (2020) Exploiting time-frequency patterns with LSTM-RNNs for low-bitrate audio restoration. Neural Comput Appl 32(4):1095\u20131107. doi: https:\/\/doi.org\/10.1007\/s00521-019-04158-0","journal-title":"Neural Comput Appl"},{"key":"11006_CR47","doi-asserted-by":"publisher","unstructured":"Li R, Wu Z, Ning Y, Sun L, Meng H, Cai L (2017) \u201cSpectro-Temporal Modelling with Time-Frequency LSTM and Structured Output Layer for Voice Conversion.,\u201d in INTERSPEECH, vol. 2017-Augus, pp.\u00a03409\u20133413, doi: https:\/\/doi.org\/10.21437\/Interspeech.2017-1122","DOI":"10.21437\/Interspeech.2017-1122"},{"key":"11006_CR48","unstructured":"Glorot X, Bordes A, Bengio Y (2011) \u201cDeep sparse rectifier neural networks,\u201d in Proceedings of the fourteenth international conference on artificial intelligence and statistics, vol.\u00a015, pp.\u00a0315\u2013323"},{"key":"11006_CR49","unstructured":"Goodfellow IJ, Warde-Farley D, Mirza M, Courville A, Bengio Y (2013) \u201cMaxout networks,\u201d in International conference on machine learning, no. PART 3, pp.\u00a01319\u20131327"},{"key":"11006_CR50","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: A simple way to prevent neural networks from overfitting. J Mach Learn Res 15:1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"11006_CR51","unstructured":"Srebro N, Rennie JDM, Jaakkola TS (2005) \u201cMaximum-margin matrix factorization,\u201d in Advances in neural information processing systems, pp.\u00a01329\u20131336"},{"key":"11006_CR52","doi-asserted-by":"crossref","unstructured":"Kleinschmidt M, Section MP, Universit CVO (2003) \u201cLocalized spectro-temporal features for automatic speech recognition,\u201d in Eighth European conference on speech communication and technology, pp.\u00a01\u20134","DOI":"10.21437\/Eurospeech.2003-710"},{"key":"11006_CR53","doi-asserted-by":"crossref","unstructured":"Ezzat T, Bouvrie J, Poggio T (2007) \u201cSpectro-temporal analysis of speech using 2-D Gabor filters,\u201d in Eighth Annual Conference of the International Speech Communication Association, vol.\u00a04, pp.\u00a02308\u20132311","DOI":"10.21437\/Interspeech.2007-236"},{"key":"11006_CR54","doi-asserted-by":"crossref","unstructured":"Lei H, Meyer BT, Mirghafori N (2012) \u201cSpectro-temporal Gabor features for speaker recognition,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2012, pp.\u00a04241\u20134244","DOI":"10.1109\/ICASSP.2012.6288855"},{"issue":"5","key":"11006_CR55","doi-asserted-by":"publisher","first-page":"4134","DOI":"10.1121\/1.3699200","volume":"131","author":"MR Sch\u00e4dler","year":"2012","unstructured":"Sch\u00e4dler MR, Meyer BT, Kollmeier B (2012) Spectro-temporal modulation subspace-spanning filter bank features for robust automatic speech recognition. J Acoust Soc Am 131(5):4134\u20134151. doi: https:\/\/doi.org\/10.1121\/1.3699200","journal-title":"J Acoust Soc Am"},{"key":"11006_CR56","doi-asserted-by":"crossref","unstructured":"Chang S, Morgan N (2014) \u201cRobust CNN - based Speec h Recognition With Gabor Filter Kernels,\u201d","DOI":"10.21437\/Interspeech.2014-226"},{"issue":"1","key":"11006_CR57","doi-asserted-by":"publisher","first-page":"117","DOI":"10.14232\/actacyb.22.1.2015.8","volume":"22","author":"G Kov\u00e1cs","year":"2015","unstructured":"Kov\u00e1cs G, T\u00f3th L (2015) Joint optimization of spectro-temporal features and deep neural nets for robust automatic speech recognition. Acta Cybern 22(1):117\u2013134. doi: https:\/\/doi.org\/10.14232\/actacyb.22.1.2015.8","journal-title":"Acta Cybern"},{"key":"11006_CR58","doi-asserted-by":"publisher","first-page":"13090","DOI":"10.1523\/JNEUROSCI.1671-15.2015","volume":"35","author":"SJ Slee","year":"2015","unstructured":"Slee SJ, David SV (2015) Rapid task-related plasticity of spectrotemporal receptive fields in the auditory midbrain. J Neurosci 35:13090\u201313102. doi: https:\/\/doi.org\/10.1523\/JNEUROSCI.1671-15.2015","journal-title":"J Neurosci"},{"key":"11006_CR59","doi-asserted-by":"crossref","unstructured":"T\u00f3th L (2014) \u201cCombining time-and frequency-domain convolution in convolutional neural network-based phone recognition,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2014, pp.\u00a0190\u2013194","DOI":"10.1109\/ICASSP.2014.6853584"},{"key":"11006_CR60","doi-asserted-by":"publisher","unstructured":"Vesel\u00fd K, Karafi\u00e1t M, Gr\u00e9zl F (2011) \u201cConvolutive bottleneck network features for LVCSR,\u201d in 2011 IEEE Workshop on Automatic Speech Recognition & Understanding, pp.\u00a042\u201347, doi: https:\/\/doi.org\/10.1109\/ASRU.2011.6163903","DOI":"10.1109\/ASRU.2011.6163903"},{"key":"11006_CR61","doi-asserted-by":"publisher","unstructured":"Kim J, Truong KP, Englebienne G, Evers V (2017) \u201cLearning spectro-temporal features with 3D CNNs for speech emotion recognition,\u201d in Seventh International Conference on Affective Computing and Intelligent Interaction (ACII), 2017, vol. 2018-Janua, pp.\u00a0383\u2013388, doi: https:\/\/doi.org\/10.1109\/ACII.2017.8273628","DOI":"10.1109\/ACII.2017.8273628"},{"issue":"1","key":"11006_CR62","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M, Yu K (2013) 3D Convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35(1):221\u2013231. doi: https:\/\/doi.org\/10.1109\/TPAMI.2012.59","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11006_CR63","doi-asserted-by":"publisher","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) \u201cLearning spatiotemporal features with 3d convolutional networks,\u201d in Proceedings of the IEEE international conference on computer vision, vol.\u00a02015 Inter, pp.\u00a04489\u20134497, doi: https:\/\/doi.org\/10.1109\/ICCV.2015.510","DOI":"10.1109\/ICCV.2015.510"},{"issue":"1","key":"11006_CR64","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1113\/jphysiol.1962.sp006837","volume":"160","author":"DH Hubel","year":"1962","unstructured":"Hubel DH, Wiesel TN (1962) Receptive fields, binocular interaction and functional architecture in the cat\u2019s visual cortex. J Physiol 160(1):106\u2013154","journal-title":"J Physiol"},{"key":"11006_CR65","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1016\/j.neucom.2015.05.057","volume":"168","author":"SAS Seyyede Zohreh","year":"2015","unstructured":"Seyyede Zohreh SAS, Seyyedsalehi (2015) A fast and efficient pre-training method based on layer-by-layer maximum discrimination for deep neural networks. Neurocomputing 168:669\u2013680. doi: https:\/\/doi.org\/10.1016\/j.neucom.2015.05.057","journal-title":"Neurocomputing"},{"key":"11006_CR66","first-page":"10","volume":"2","author":"SZ Seyyedsalehi","year":"2015","unstructured":"Seyyedsalehi SZ, Seyyedsalehi SA (2015) Bidirectional Layer-By-Layer Pre-Training Method for Deep Neural Networks Training (In Persian). Comput Intell Electr Eng 2:10","journal-title":"Comput Intell Electr Eng"},{"key":"11006_CR67","doi-asserted-by":"publisher","unstructured":"Hinton GE, Salakhutdinov RR (2006) \u201cReducing the dimensionality of data with neural networks,\u201d Science (80-.)., vol.\u00a0313, no. 5786, pp.\u00a0504\u2013507, doi: https:\/\/doi.org\/10.1126\/science.1127647","DOI":"10.1126\/science.1127647"},{"key":"11006_CR68","doi-asserted-by":"publisher","unstructured":"Dayan P, Abbott L (2002) Theoretical Neuroscience: Computational and Mathematical Modeling of Neural Systems (Computational Neuroscience). J Cogn Neurosci 480. doi: https:\/\/doi.org\/10.1016\/j.neuron.2008.10.019","DOI":"10.1016\/j.neuron.2008.10.019"},{"key":"11006_CR69","unstructured":"Zeiler MD et al (2013) IEEE International Conference on Acoustics, Speech and Signal Processing, 2013, pp.\u00a03517\u20133521"},{"key":"11006_CR70","doi-asserted-by":"crossref","unstructured":"Dahl G, Sainath T, Hinton G (2013) \u201cImproving Deep Neural Netowrks for LVCSR Using Recitified Linear Units and Dropout, Department of Computer Science, University of Toronto,\u201d Acoust. Speech Signal Process. (ICASSP), IEEE Int. Conf., pp.\u00a08609\u20138613, 2013","DOI":"10.1109\/ICASSP.2013.6639346"},{"key":"11006_CR71","unstructured":"Maas AL, Hannun AY, Ng AY (2013) \u201cRectifier nonlinearities improve neural network acoustic models,\u201d in Proc. icml, vol.\u00a030, no. 1, p.\u00a03"},{"key":"11006_CR72","doi-asserted-by":"crossref","unstructured":"T\u00f3th L (2013) \u201cPhone recognition with deep sparse rectifier neural networks,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing, 2013, pp.\u00a06985\u20136989","DOI":"10.1109\/ICASSP.2013.6639016"},{"key":"11006_CR73","doi-asserted-by":"crossref","unstructured":"T\u00f3th L (2013) \u201cConvolutional deep rectifier neural nets for phone recognition,\u201d in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH, no. August, pp.\u00a01722\u20131726","DOI":"10.21437\/Interspeech.2013-429"},{"key":"11006_CR74","doi-asserted-by":"publisher","unstructured":"Miao Y, Metze F, Rawat S (2013) \u201cDeep maxout networks for low-resource speech recognition,\u201d in IEEE Workshop on Automatic Speech Recognition and Understanding, 2013, pp.\u00a0398\u2013403, doi: https:\/\/doi.org\/10.1109\/ASRU.2013.6707763","DOI":"10.1109\/ASRU.2013.6707763"},{"key":"11006_CR75","doi-asserted-by":"crossref","unstructured":"Cai M, Shi Y, Liu J (2013) \u201cDeep maxout neural networks for speech recognition,\u201d in 2013 IEEE Workshop on Automatic Speech Recognition and Understanding, pp.\u00a0291\u2013296","DOI":"10.1109\/ASRU.2013.6707745"},{"key":"11006_CR76","doi-asserted-by":"crossref","unstructured":"Miao Y, Metze F (2014) \u201cImproving language-universal feature extraction with deep maxout and convolutional neural networks,\u201d in Fifteenth Annual Conference of the International Speech Communication Association, pp.\u00a0800\u2013804","DOI":"10.21437\/Interspeech.2014-205"},{"key":"11006_CR77","doi-asserted-by":"crossref","unstructured":"Swietojanski P, Li J, Huang J-T (2014) \u201cInvestigation of maxout networks for speech recognition,\u201d in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2014, pp.\u00a07649\u20137653","DOI":"10.1109\/ICASSP.2014.6855088"},{"key":"11006_CR78","doi-asserted-by":"publisher","unstructured":"Zhang Y, Pezeshki M, Brakel P, Zhang S, Bengio CLY, Courville A (2017) \u201cTowards end-to-end speech recognition with deep convolutional neural networks,\u201d arXiv Prepr. arXiv1701.02720, vol. 08-12-Sept, pp.\u00a0410\u2013414, doi: https:\/\/doi.org\/10.21437\/Interspeech.2016-1446","DOI":"10.21437\/Interspeech.2016-1446"},{"issue":"2","key":"11006_CR79","doi-asserted-by":"publisher","first-page":"123","DOI":"10.3390\/risks8030083","volume":"24","author":"L Breiman","year":"1996","unstructured":"Breiman L (1996) Bagging predictors. Mach Learn 24(2):123\u2013140. doi: https:\/\/doi.org\/10.3390\/risks8030083","journal-title":"Mach Learn"},{"key":"11006_CR80","doi-asserted-by":"crossref","unstructured":"Dehghani A, Seyyedsalehi SA (2018) \u201cPerformance Evaluation of Deep Convolutional Maxout Neural Network in Speech Recognition,\u201d in 25th National and 3rd International Iranian Conference on Biomedical Engineering (ICBME), 2018, pp.\u00a01\u20136","DOI":"10.1109\/ICBME.2018.8703593"},{"key":"11006_CR81","unstructured":"Bijankhan M, Sheikhzadegan J, Roohani MR (1994) \u201cFARSDAT-The speech database of Farsi spoken language,\u201d"},{"key":"11006_CR82","unstructured":"Mahdi Rahiminejad SAS \u201cA Comparative Study of Representation Parameters Extraction and Normalization Methods for Speaker Independent Recognition of Speech (In Persian),\u201dAmirkabir, vol. 55, p.20, 1382"},{"key":"11006_CR83","first-page":"1","volume":"5","author":"RB Palm","year":"2012","unstructured":"Palm RB (2012) Prediction as a candidate for learning deep hierarchical models of data. Tech Univ Denmark 5:1\u201387","journal-title":"Tech Univ Denmark"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11006-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-11006-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11006-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T12:13:55Z","timestamp":1688818435000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-11006-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,27]]},"references-count":83,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["11006"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-11006-1","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,27]]},"assertion":[{"value":"20 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 May 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 August 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}