{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T15:33:15Z","timestamp":1776785595025,"version":"3.51.2"},"reference-count":61,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/iccv48922.2021.01135","type":"proceedings-article","created":{"date-parts":[[2022,2,28]],"date-time":"2022-02-28T17:08:02Z","timestamp":1646068082000},"page":"11532-11541","source":"Crossref","is-referenced-by-count":22,"title":["Aligning Subtitles in Sign Language Videos"],"prefix":"10.1109","author":[{"given":"Hannah","family":"Bull","sequence":"first","affiliation":[{"name":"LISN, Univ Paris-Saclay, CNRS,France"}]},{"given":"Triantafyllos","family":"Afouras","sequence":"additional","affiliation":[{"name":"University of Oxford,Visual Geometry Group,UK"}]},{"given":"Gul","family":"Varol","sequence":"additional","affiliation":[{"name":"University of Oxford,Visual Geometry Group,UK"}]},{"given":"Samuel","family":"Albanie","sequence":"additional","affiliation":[{"name":"University of Oxford,Visual Geometry Group,UK"}]},{"given":"Liliane","family":"Momeni","sequence":"additional","affiliation":[{"name":"University of Oxford,Visual Geometry Group,UK"}]},{"given":"Andrew","family":"Zisserman","sequence":"additional","affiliation":[{"name":"University of Oxford,Visual Geometry Group,UK"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Watch, read and lookup: Learning to spot signs from multiple supervisors","author":"momeni","year":"2020","journal-title":"Proc ACCV"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210003"},{"key":"ref33","article-title":"Deep sign: Hybrid CNN-HMM for continuous sign language recognition","author":"koller","year":"2016","journal-title":"BMVC"},{"key":"ref32","article-title":"Quantitative survey of the state of the art in sign language recognition","author":"koller","year":"2020"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11903","article-title":"Video-based sign language recognition without temporal segmentation","author":"huang","year":"2018","journal-title":"AAAI"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/AFGR.1998.671007"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00624"},{"key":"ref35","article-title":"TSPNet: Hierarchical feature learning via temporal semantic pyramid for sign language translation","author":"li","year":"2020","journal-title":"NeurIPS"},{"key":"ref34","article-title":"Resign: Re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs","author":"koller","year":"2017","journal-title":"CVPR"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01030"},{"key":"ref28","article-title":"Excl: Extractive clip localization using natural language descriptions","author":"ghosh","year":"2019","journal-title":"NAACL-HLT"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2013.49"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018393"},{"key":"ref2","article-title":"Signer diarisation in the wild","author":"albanie","year":"2021"},{"key":"ref1","article-title":"BBC-Oxford British Sign Language Dataset","year":"2021"},{"key":"ref20","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683523"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.5244\/C.20.92"},{"key":"ref24","article-title":"Seeing sentence boundaries: the production and perception of visual markers signalling boundaries in signed languages","author":"fenlon","year":"2010","journal-title":"PhD thesis"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.51"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853855"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.563"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139167048"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578727"},{"key":"ref59","article-title":"Detecting coarticulation in sign language using conditional random fields","author":"yang","year":"2006","journal-title":"ICPR"},{"key":"ref58","article-title":"Sign language spotting with a threshold model based on conditional random fields","author":"yang","year":"2008","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref57","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.12328","article-title":"Spatial temporal graph convolutional networks for skeleton-based action recognition","author":"yan","year":"2018","journal-title":"AAAI"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019062"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00042"},{"key":"ref54","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01658"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298792"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.332"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00812"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-66096-3_17"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"ref13","article-title":"End-to-end object detection with transformers","author":"carion","year":"2020","journal-title":"ECCV"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1015"},{"key":"ref16","article-title":"Fully convolutional networks for continuous sign language recognition","author":"cheng","year":"2020","journal-title":"ECCV"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/AFGR.2008.4813363"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206647"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01101"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.507"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3308561.3353774"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683257"},{"key":"ref8","article-title":"Employing signed TV broadcasts for automated learning of British sign language","author":"buehler","year":"2010","journal-title":"LREC Workshop on the Representation and Processing of Sign Languages"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206523"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3132525.3132559"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-66096-3_14"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2009.5457527"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.5244\/C.23.121"},{"key":"ref48","first-page":"136","article-title":"Building the British Sign Language Corpus","volume":"7","author":"schembri","year":"2013","journal-title":"Journal of Language Documentation and Conservation"},{"key":"ref47","article-title":"British Sign Language Corpus Project: A corpus of digital video data and annotations of British Sign Language 2008-2017 (Third Edition)","author":"schembri","year":"2017"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.248"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1981.tb00272.x"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413817"},{"key":"ref43","article-title":"Large-scale learning of sign language by watching TV (using co-occurrences)","author":"pfister","year":"2013","journal-title":"Proc BMVC"}],"event":{"name":"2021 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Montreal, QC, Canada","start":{"date-parts":[[2021,10,10]]},"end":{"date-parts":[[2021,10,17]]}},"container-title":["2021 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9709627\/9709628\/09710309.pdf?arnumber=9710309","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,18]],"date-time":"2023-11-18T01:26:04Z","timestamp":1700270764000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9710309\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":61,"URL":"https:\/\/doi.org\/10.1109\/iccv48922.2021.01135","relation":{},"subject":[],"published":{"date-parts":[[2021,10]]}}}