{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T03:32:05Z","timestamp":1771558325780,"version":"3.50.1"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/asru46091.2019.9004037","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T07:01:33Z","timestamp":1582268493000},"page":"853-859","source":"Crossref","is-referenced-by-count":8,"title":["Spatio-Temporal Context Modelling for Speech Emotion Classification"],"prefix":"10.1109","author":[{"given":"Md Asif","family":"Jalal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roger K","family":"Moore","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Hain","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-200","article-title":"Efficient emotion recognition from speech using deep learning on spectrograms","author":"satt","year":"2017","journal-title":"InterSpeech"},{"key":"ref32","author":"degottex","year":"2014","journal-title":"Covarep A collaborative voice analysis repository for speech technologies"},{"key":"ref31","article-title":"Adam: A method for stochastic optimization","volume":"abs 1412 6980","author":"kingma","year":"2014","journal-title":"CoRR"},{"key":"ref30","article-title":"Au-tomatic differentiation in pytorch","author":"paszke","year":"2017","journal-title":"NIPS-W"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1242"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2911996.2912051"},{"key":"ref11","article-title":"Gen-eralization in Deep Learning","author":"kawaguchi","year":"2017","journal-title":"ArXiv e-prints"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654984"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2016.7820699"},{"key":"ref14","first-page":"1006","article-title":"Deep temporal models using identity skip-connections for speech emotion recognition","author":"jaebok","year":"2017","journal-title":"Proc 25th ACM Int Conf Multimedia"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683163"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3068"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref19","first-page":"251","article-title":"Multi-modal sequence fusion via recursive attention for emotion recognition","author":"rory","year":"2018","journal-title":"Proceedings of the Conference on Computational Natural Language Learning"},{"key":"ref28","author":"zhang","year":"2018","journal-title":"Self-attention generative adversarial networks"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-25789-1_17","article-title":"Long-Term statistical feature extraction from speech signal and its application in emotion recognition","volume":"9449","author":"loweimi","year":"2015","journal-title":"Lecture Notes in Computer Science"},{"key":"ref27","article-title":"Non-local neural networks","volume":"abs 1711 7971","author":"xiaolong","year":"2017","journal-title":"CoRR"},{"key":"ref3","author":"cao","year":"0","journal-title":"Computer Speech and Language"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1242"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2009-103"},{"key":"ref8","article-title":"Abstract learning via demodulation in a deep neural network","volume":"abs 1502 4042","author":"simpson","year":"2015","journal-title":"ArXiv"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707732"},{"key":"ref2","first-page":"401","article-title":"Hidden Markov model-based speech emotion recognition","volume":"2","author":"schuller","year":"2003","journal-title":"2003 IEEE International Conference on Acoustics Speech and Signal Processing 2003 Proceedings (ICASSP'03)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2005.1415275"},{"key":"ref22","article-title":"Network in network","volume":"abs 1312 4400","author":"lin","year":"2013","journal-title":"CoRR"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(05)80023-1"},{"key":"ref23","article-title":"Towards principled design of deep convolutional networks: Introducing simpnet","volume":"abs 1802 6205","author":"hasanpour","year":"2018","journal-title":"CoRR"},{"key":"ref26","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume":"abs 1502 3167","author":"ioffe","year":"2015","journal-title":"CoRR"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-46805-6_19"}],"event":{"name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"SG, Singapore","start":{"date-parts":[[2019,12,14]]},"end":{"date-parts":[[2019,12,18]]}},"container-title":["2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8985378\/9003727\/09004037.pdf?arnumber=9004037","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T14:51:20Z","timestamp":1658155880000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9004037\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/asru46091.2019.9004037","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}