{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:25:06Z","timestamp":1730298306187,"version":"3.28.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/spcom50965.2020.9179519","type":"proceedings-article","created":{"date-parts":[[2020,8,28]],"date-time":"2020-08-28T20:20:46Z","timestamp":1598646046000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Jointly learning to align and transcribe using attention-based alignment and uncertainty-to-weigh losses"],"prefix":"10.1109","author":[{"given":"Shreekantha","family":"Nadig","sequence":"first","affiliation":[]},{"given":"Sumit","family":"Chakraborty","sequence":"additional","affiliation":[]},{"given":"Anuj","family":"Shah","sequence":"additional","affiliation":[]},{"given":"Chaitanay","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ramasubramanian","sequence":"additional","affiliation":[]},{"given":"Sachit","family":"Rao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"7482","article-title":"Multi-task learning using uncertainty to weigh losses for scene geometry and semantics","author":"kendall","year":"2018","journal-title":"Proc IEEE Conf Computer Vision and Pattern Recognition"},{"key":"ref11","volume":"338","author":"villani","year":"2008","journal-title":"Optimal Transport Old and New"},{"key":"ref12","first-page":"2292","article-title":"Sinkhorn Distances: Lightspeed Computation of Optimal Transport","volume":"26","author":"cuturi","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref13","first-page":"2681","article-title":"Interpolating between Optimal Transport and MMD using Sinkhorn Divergences","author":"feydy","year":"2019","journal-title":"In The 22nd International Conference on Artificial Intelligence and Statistics"},{"year":"2020","author":"geomloss","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref16","article-title":"The Kaldi Speech Recognition Toolkit","author":"povey","year":"2011","journal-title":"Proc IEEE Workshop Automatic Speech Recognition and Understanding (ASRU)"},{"key":"ref17","article-title":"ADADELTA: an adaptive learning rate method","volume":"abs 1212 5701","author":"zeiler","year":"2012","journal-title":"CoRR"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref19","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"sak","year":"2014","journal-title":"In Fifteenth Annual Conference of the International Speech Communication Association"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1616"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2763455"},{"key":"ref6","article-title":"The consciousness prior","author":"bengio","year":"2017","journal-title":"arXiv preprint arXiv 1709 08115"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1910"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1453"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2914149"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1780"},{"key":"ref1","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"577","article-title":"Attention-Based Models for Speech Recognition","volume":"28","author":"chorowski","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref20","first-page":"1171","article-title":"Scheduled sampling for sequence prediction with recurrent neural networks","author":"bengio","year":"2015","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICFHR.2014.55"}],"event":{"name":"2020 International Conference on Signal Processing and Communications (SPCOM)","start":{"date-parts":[[2020,7,19]]},"location":"Bangalore, India","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Conference on Signal Processing and Communications (SPCOM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9176988\/9179490\/09179519.pdf?arnumber=9179519","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,30]],"date-time":"2022-06-30T15:18:57Z","timestamp":1656602337000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9179519\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/spcom50965.2020.9179519","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}