{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T11:44:29Z","timestamp":1762429469382,"version":"3.40.3"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030695439"},{"type":"electronic","value":"9783030695446"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-69544-6_18","type":"book-chapter","created":{"date-parts":[[2021,2,25]],"date-time":"2021-02-25T11:05:00Z","timestamp":1614251100000},"page":"291-308","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Watch, Read and Lookup: Learning to Spot Signs from Multiple Supervisors"],"prefix":"10.1007","author":[{"given":"Liliane","family":"Momeni","sequence":"first","affiliation":[]},{"given":"G\u00fcl","family":"Varol","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Albanie","sequence":"additional","affiliation":[]},{"given":"Triantafyllos","family":"Afouras","sequence":"additional","affiliation":[]},{"given":"Andrew","family":"Zisserman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,26]]},"reference":[{"key":"18_CR1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139167048","volume-title":"The Linguistics of British Sign Language: An Introduction","author":"R Sutton-Spence","year":"1999","unstructured":"Sutton-Spence, R., Woll, B.: The Linguistics of British Sign Language: An Introduction. Cambridge University Press, London (1999)"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Coucke, A., Chlieh, M., Gisselbrecht, T., Leroy, D., Poumeyrol, M., Lavril, T.: Efficient keyword spotting using dilated convolutions and gating. In: ICASSP (2019)","DOI":"10.1109\/ICASSP.2019.8683474"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"V\u00e9niat, T., Schwander, O., Denoyer, L.: Stochastic adaptive neural architecture search for keyword spotting. In: ICASSP (2019)","DOI":"10.1109\/ICASSP.2019.8683305"},{"key":"18_CR4","unstructured":"Momeni, L., Afouras, T., Stafylakis, T., Albanie, S., Zisserman, A.: Seeing wake words: audio-visual keyword spotting. In: BMVC (2020)"},{"key":"18_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"536","DOI":"10.1007\/978-3-030-01225-0_32","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Stafylakis","year":"2018","unstructured":"Stafylakis, T., Tzimiropoulos, G.: Zero-shot keyword spotting for visual speech recognition in-the-wild. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 536\u2013552. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_32"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Senior, A., Vinyals, O., Zisserman, A.: Lip reading sentences in the wild. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.367"},{"key":"18_CR7","unstructured":"Afouras, T., Chung, J.S., Zisserman, A.: LRS3-TED: a large-scale dataset for visual speech recognition. arXiv preprint arXiv:1809.00496 (2018)"},{"key":"18_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/978-3-030-58621-8_3","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Albanie","year":"2020","unstructured":"Albanie, S., et al.: BSL-1K: scaling up co-articulated sign language recognition using mouthing cues. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12356, pp. 35\u201353. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58621-8_3"},{"key":"18_CR9","unstructured":"Schembri, A., Fenlon, J., Rentelis, R., Cormier, K.: British Sign Language Corpus Project: A corpus of digital video data and annotations of British Sign Language 2008\u20132017 (Third Edition) (2017)"},{"key":"18_CR10","unstructured":"Gutmann, M., Hyv\u00e4rinen, A.: Noise-contrastive estimation: a new estimation principle for unnormalized statistical models. In: Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, pp. 297\u2013304 (2010)"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Kadir, T., Bowden, R., Ong, E.J., Zisserman, A.: Minimal training, large lexicon, unconstrained sign language recognition. In: Proceedings of the BMVC (2004)","DOI":"10.5244\/C.18.96"},{"key":"18_CR12","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1016\/0031-3203(88)90048-9","volume":"21","author":"S Tamura","year":"1988","unstructured":"Tamura, S., Kawasaki, S.: Recognition of sign language motion images. Pattern Recogn. 21, 343\u2013353 (1988)","journal-title":"Pattern Recogn."},{"key":"18_CR13","unstructured":"Starner, T.: Visual recognition of American sign language using hidden Markov models. Master\u2019s thesis, Massachusetts Institute of Technology (1995)"},{"key":"18_CR14","unstructured":"Fillbrandt, H., Akyol, S., Kraiss, K.: Extraction of 3D hand shape and posture from image sequences for sign language recognition. In: IEEE International SOI Conference (2003)"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Buehler, P., Everingham, M., Zisserman, A.: Learning sign language by watching TV (using weakly aligned subtitles). In: Proceedings of the CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206523"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Cooper, H., Pugeault, N., Bowden, R.: Reading the signs: a video based sign dictionary. In: ICCVW (2011)","DOI":"10.1109\/ICCVW.2011.6130349"},{"key":"18_CR17","unstructured":"Ong, E., Cooper, H., Pugeault, N., Bowden, R.: Sign language recognition using sequential pattern trees. In: CVPR (2012)"},{"key":"18_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1007\/978-3-319-10599-4_52","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T Pfister","year":"2014","unstructured":"Pfister, T., Charles, J., Zisserman, A.: Domain-adaptive discriminative one-shot learning of gestures. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 814\u2013829. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_52"},{"key":"18_CR19","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1007\/s10209-007-0104-x","volume":"6","author":"U Agris","year":"2008","unstructured":"Agris, U., Zieren, J., Canzler, U., Bauer, B., Kraiss, K.F.: Recent developments in visual sign language recognition. Univ. Access Inf. Soc. 6, 323\u2013362 (2008)","journal-title":"Univ. Access Inf. Soc."},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Forster, J., Oberd\u00f6rfer, C., Koller, O., Ney, H.: Modality combination techniques for continuous sign language recognition. In: Pattern Recognition and Image Analysis (2013)","DOI":"10.1007\/978-3-642-38628-2_10"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Camgoz, N.C., Hadfield, S., Koller, O., Bowden, R.: SubUNets: end-to-end hand shape and continuous sign language recognition. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.332"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Huang, J., Zhou, W., Zhang, Q., Li, H., Li, W.: Video-based sign language recognition without temporal segmentation. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11903"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Ye, Y., Tian, Y., Huenerfauth, M., Liu, J.: Recognizing American sign language gestures from within continuous videos. In: CVPRW (2018)","DOI":"10.1109\/CVPRW.2018.00280"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, W., Zhou, Y., Li, H.: Spatial-temporal multi-cue network for continuous sign language recognition. CoRR abs\/2002.03187 (2020)","DOI":"10.1109\/TMM.2021.3059098"},{"key":"18_CR25","unstructured":"Camgoz, N.C., Koller, O., Hadfield, S., Bowden, R.: Sign language transformers: joint end-to-end sign language recognition and translation. In: CVPR (2020)"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, action recognition? A new model and the Kinetics dataset. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"18_CR27","unstructured":"Joze, H.R.V., Koller, O.: MS-ASL: a large-scale data set and benchmark for understanding American sign language. In: BMVC (2019)"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Li, D., Opazo, C.R., Yu, X., Li, H.: Word-level deep sign language recognition from video: a new large-scale dataset and methods comparison. In: WACV (2019)","DOI":"10.1109\/WACV45572.2020.9093512"},{"key":"18_CR29","unstructured":"Viitaniemi, V., Jantunen, T., Savolainen, L., Karppa, M., Laaksonen, J.: S-pot - a benchmark in spotting signs within continuous signing. In: LREC (2014)"},{"key":"18_CR30","unstructured":"Eng-Jon Ong, Koller, O., Pugeault, N., Bowden, R.: Sign spotting using hierarchical sequential patterns with temporal intervals. In: CVPR (2014)"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Farhadi, A., Forsyth, D.A., White, R.: Transfer learning in sign language. In: CVPR (2007)","DOI":"10.1109\/CVPR.2007.383346"},{"key":"18_CR32","unstructured":"Bilge, Y.C., Ikizler, N., Cinbis, R.: Zero-shot sign language recognition: can textual data uncover sign languages? In: BMVC (2019)"},{"key":"18_CR33","unstructured":"Motiian, S., Jones, Q., Iranmanesh, S.M., Doretto, G.: Few-shot adversarial domain adaptation. In: NeurIPS (2017)"},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, J., Chen, Z., Huang, J., Lin, L., Zhang, D.: Few-shot structured domain adaptation for virtual-to-real scene parsing. In: ICCVW (2019)","DOI":"10.1109\/ICCVW.2019.00008"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Chang, W.G., You, T., Seo, S., Kwak, S., Han, B.: Domain-specific batch normalization for unsupervised domain adaptation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00753"},{"key":"18_CR36","doi-asserted-by":"crossref","unstructured":"Li, D., Yu, X., Xu, C., Petersson, L., Li, H.: Transferring cross-domain knowledge for video sign language recognition. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00624"},{"key":"18_CR37","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1016\/j.cviu.2015.09.013","volume":"141","author":"O Koller","year":"2015","unstructured":"Koller, O., Forster, J., Ney, H.: Continuous sign language recognition: towards large vocabulary statistical recognition systems handling multiple signers. Comput. Vis. Image Underst. 141, 108\u2013125 (2015)","journal-title":"Comput. Vis. Image Underst."},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"von Agris, U., Knorr, M., Kraiss, K.: The significance of facial features for automatic sign language recognition. In: 2008 8th IEEE International Conference on Automatic Face Gesture Recognition (2008)","DOI":"10.1109\/AFGR.2008.4813472"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Athitsos, V., et al.: The American sign language lexicon video dataset. In: CVPRW (2008)","DOI":"10.1109\/CVPRW.2008.4563181"},{"key":"18_CR40","unstructured":"Wilbur, R.B., Kak, A.C.: Purdue RVL-SLLL American sign language database. School of Electrical and Computer Engineering Technical report, TR-06-12, Purdue University, W. Lafayette, IN 47906 (2006)"},{"key":"18_CR41","unstructured":"Chai, X., Wang, H., Chen, X.: The devisign large vocabulary of Chinese sign language database and baseline evaluations. Technical report VIPL-TR-14-SLR-001. Key Lab of Intelligent Information Processing of Chinese Academy of Sciences (CAS), Institute of Computing Technology, CAS (2014)"},{"key":"18_CR42","first-page":"136","volume":"7","author":"A Schembri","year":"2013","unstructured":"Schembri, A., Fenlon, J., Rentelis, R., Reynolds, S., Cormier, K.: Building the British sign language corpus. Lang. Document. Conserv. 7, 136\u2013154 (2013)","journal-title":"Lang. Document. Conserv."},{"key":"18_CR43","doi-asserted-by":"crossref","unstructured":"Cooper, H., Bowden, R.: Learning signs from subtitles: a weakly supervised approach to sign language recognition. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206647"},{"key":"18_CR44","unstructured":"Chung, J.S., Zisserman, A.: Signs in time: encoding human motion as a temporal image. In: Workshop on Brave New Ideas for Motion Representations, ECCV (2016)"},{"key":"18_CR45","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/S0004-3702(96)00034-3","volume":"89","author":"TG Dietterich","year":"1997","unstructured":"Dietterich, T.G., Lathrop, R.H., Lozano-P\u00e9rez, T.: Solving the multiple instance problem with axis-parallel rectangles. Artif. Intell. 89, 31\u201371 (1997)","journal-title":"Artif. Intell."},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Pfister, T., Charles, J., Zisserman, A.: Large-scale learning of sign language by watching TV (using co-occurrences). In: BMVC (2013)","DOI":"10.5244\/C.27.20"},{"key":"18_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/978-3-030-01264-9_4","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Feng","year":"2018","unstructured":"Feng, Y., Ma, L., Liu, W., Zhang, T., Luo, J.: Video re-localization. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 55\u201370. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_4"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Yang, H., He, X., Porikli, F.: One-shot action localization by learning sequence matching network. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00157"},{"key":"18_CR49","doi-asserted-by":"crossref","unstructured":"Cao, K., Ji, J., Cao, Z., Chang, C.Y., Niebles, J.C.: Few-shot video classification via temporal alignment. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01063"},{"key":"18_CR50","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S.X., Lin, D.: Unsupervised feature learning via non-parametric instance discrimination. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00393"},{"key":"18_CR52","doi-asserted-by":"crossref","unstructured":"Miech, A., Alayrac, J.B., Smaira, L., Laptev, I., Sivic, J., Zisserman, A.: End-to-end learning of visual representations from uncurated instructional videos. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"18_CR53","unstructured":"https:\/\/www.signbsl.com\/. (British sign language dictionary)"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Cao, Z., Hidalgo, G., Simon, T., Wei, S.E., Sheikh, Y.: OpenPose: realtime multi-person 2D pose estimation using Part Affinity Fields. In: arXiv preprint arXiv:1812.08008 (2018)","DOI":"10.1109\/CVPR.2017.143"},{"key":"18_CR55","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"18_CR56","doi-asserted-by":"crossref","unstructured":"SignumMcKee, D., Kennedy, G.: Lexical comparison of signs from American, Australian, British and New Zealand sign languages. An anthology to honor Ursula Bellugi and Edward Klima, The signs of language revisited (2000)","DOI":"10.1353\/lan.2001.0231"},{"key":"18_CR57","first-page":"123","volume":"2","author":"R Aldersson","year":"2007","unstructured":"Aldersson, R., McEntee-Atalianis, L.: A lexical comparison of Icelandic sign language and Danish sign language. Birkbeck Stud. Appl. Ling. 2, 123\u2013158 (2007)","journal-title":"Birkbeck Stud. Appl. Ling."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2020"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-69544-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,18]],"date-time":"2022-12-18T23:49:32Z","timestamp":1671407372000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-69544-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030695439","9783030695446"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-69544-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"26 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2020.kyoto\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"768","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"254","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}