{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:16:45Z","timestamp":1780636605305,"version":"3.54.1"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000288","name":"Royal Society","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000288","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1109\/cvpr46437.2021.01658","type":"proceedings-article","created":{"date-parts":[[2021,11,2]],"date-time":"2021-11-02T21:56:02Z","timestamp":1635890162000},"page":"16852-16861","source":"Crossref","is-referenced-by-count":27,"title":["Read and Attend: Temporal Localisation in Sign Language Videos"],"prefix":"10.1109","author":[{"given":"Gul","family":"Varol","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Liliane","family":"Momeni","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Samuel","family":"Albanie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Triantafyllos","family":"Afouras","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andrew","family":"Zisserman","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Temporal action local ization in untrimmed videos via multi-stage CNNs","author":"shou","year":"2016","journal-title":"CVPR"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1777"},{"key":"ref33","article-title":"Domain-adaptive discriminative one-shot learning of gestures","author":"pfister","year":"2014","journal-title":"ECCV"},{"key":"ref32","article-title":"Sign language recognition using sequential pattern trees","author":"ong","year":"2012","journal-title":"CVPR"},{"key":"ref31","article-title":"Watch, read and lookup: learning to spot signs from multiple supervisors","author":"momeni","year":"2020","journal-title":"ACCV"},{"key":"ref30","article-title":"Seeing wake words: Audio-visual keyword spotting","author":"momeni","year":"2020","journal-title":"BMVC"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00458"},{"key":"ref36","first-page":"136","article-title":"Building the british sign language corpus","volume":"7","author":"schembri","year":"2013","journal-title":"Journal of Language Documentation and Conservation"},{"key":"ref35","article-title":"British Sign Language Corpus Project: A corpus of digital video data and annotations of British Sign Language 2008 - 2017","author":"schembri","year":"2017"},{"key":"ref34","article-title":"Accessibility of voice-activated agents for people who are deaf or hard of hearing","author":"rodolitz","year":"2019","journal-title":"The Journal on Technology and Persons with Disabilities"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00624"},{"key":"ref27","article-title":"Word-level deep sign language recognition from video: A new large-scale dataset and methods comparison","author":"li","year":"2019","journal-title":"WACV"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210003"},{"key":"ref2","article-title":"BSL-1K: Scaling up co-articulated sign language recognition using mouthing cues","author":"albanie","year":"2020","journal-title":"ECCV"},{"key":"ref1","article-title":"SeeHear: Signer diarisation and a new dataset","author":"albanie","year":"2021","journal-title":"ICASSP"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1453"},{"key":"ref22","article-title":"Jointly discovering visual objects and spoken words from raw sensory input","author":"harwath","year":"2018","journal-title":"ECCV"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref24","article-title":"MS-ASL: A large-scale data set and benchmark for understanding American Sign Language","author":"joze","year":"2019","journal-title":"BMVC"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11903","article-title":"Video-based sign language recognition without temporal segmentation","author":"huang","year":"2018","journal-title":"AAAI"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1016\/j.cviu.2015.09.013","article-title":"Continuous sign language recognition: Towards large vocabulary statistical recognition systems handling multiple signers","volume":"141","author":"koller","year":"2015","journal-title":"Computer Vision and Image Understanding"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.3390\/app9132683","article-title":"Neural sign language translation based on human keypoint estimation","author":"ko","year":"2019","journal-title":"Appl Sci"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00876"},{"key":"ref10","article-title":"Multi-channel transformers for multi-articulatory sign language translation","author":"camgoz","year":"2020","journal-title":"ECCVW"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"ref40","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-85","article-title":"Combining residual networks with lstms for lipreading","author":"stafylakis","year":"2017","journal-title":"InterSpeech"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1015"},{"key":"ref14","article-title":"Fully convolutional networks for continuous sign language recognition","author":"cheng","year":"2020","journal-title":"ECCV"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683474"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00808"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350535"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/AMFG.2003.1240841"},{"key":"ref4","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"ICLRE"},{"key":"ref3","article-title":"Objects that sound","author":"arandjelovic","year":"2017","journal-title":"ECCV"},{"key":"ref6","first-page":"155","article-title":"Effects of language modality on word segmentation: An experimental study of phonological factors in a sign language","volume":"8","author":"brentari","year":"2009","journal-title":"Papers in Laboratory Phonology"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3308561.3353774"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.332"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00142"},{"key":"ref7","article-title":"Automatic segmentation of sign language into subtitle-units","author":"bull","year":"2020","journal-title":"ECCVW Sign Language Recognition Translation and Production (SLRTP)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00812"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1006\/cviu.2000.0895"},{"key":"ref45","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019062"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.2.270"},{"key":"ref42","article-title":"Visual recognition of American Sign Language using hidden Markov models","author":"starner","year":"1995","journal-title":"Technical Report"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_32"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/0031-3203(88)90048-9"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139167048"}],"event":{"name":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"Nashville, TN, USA","start":{"date-parts":[[2021,6,20]]},"end":{"date-parts":[[2021,6,25]]}},"container-title":["2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9577055\/9577056\/09578762.pdf?arnumber=9578762","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T05:04:34Z","timestamp":1699765474000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9578762\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/cvpr46437.2021.01658","relation":{},"subject":[],"published":{"date-parts":[[2021,6]]}}}