{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:25:13Z","timestamp":1743121513496,"version":"3.40.3"},"publisher-location":"Cham","reference-count":54,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030585730"},{"type":"electronic","value":"9783030585747"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58574-7_36","type":"book-chapter","created":{"date-parts":[[2020,11,12]],"date-time":"2020-11-12T16:19:07Z","timestamp":1605197947000},"page":"598-614","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Tracking Emerges by Looking Around Static Scenes, with Neural 3D Mapping"],"prefix":"10.1007","author":[{"given":"Adam W.","family":"Harley","sequence":"first","affiliation":[]},{"given":"Shrinidhi Kowshika","family":"Lakshmikanth","sequence":"additional","affiliation":[]},{"given":"Paul","family":"Schydlo","sequence":"additional","affiliation":[]},{"given":"Katerina","family":"Fragkiadaki","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,13]]},"reference":[{"key":"36_CR1","doi-asserted-by":"crossref","unstructured":"Agrawal, P., Carreira, J., Malik, J.: Learning to see by moving. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.13"},{"key":"36_CR2","unstructured":"Bernardin, K., Elbs, A., Stiefelhagen, R.: Multiple object tracking performance metrics and evaluation in a smart room environment. In: Sixth IEEE International Workshop on Visual Surveillance, in Conjunction with ECCV, vol. 90, p. 91. Citeseer (2006)"},{"key":"36_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1007\/978-3-319-48881-3_56","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"L Bertinetto","year":"2016","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.S.: Fully-convolutional siamese networks for object tracking. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9914, pp. 850\u2013865. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48881-3_56"},{"issue":"24","key":"36_CR4","doi-asserted-by":"publisher","first-page":"8997","DOI":"10.1523\/JNEUROSCI.1529-14.2015","volume":"35","author":"A Brodski","year":"2015","unstructured":"Brodski, A., Paasch, G.F., Helbling, S., Wibral, M.: The faces of predictive coding. J. Neurosci. 35(24), 8997\u20139006 (2015)","journal-title":"J. Neurosci."},{"key":"36_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/978-3-642-15555-0_21","volume-title":"Computer Vision \u2013 ECCV 2010","author":"T Brox","year":"2010","unstructured":"Brox, T., Malik, J.: Object segmentation by long term analysis of point trajectories. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6315, pp. 282\u2013295. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15555-0_21"},{"key":"36_CR6","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. arXiv preprint arXiv:2002.05709 (2020)"},{"key":"36_CR7","doi-asserted-by":"crossref","unstructured":"Cheriyadat, A., Radke, R.J.: Non-negative matrix factorization of partial track data for motion segmentation. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459311"},{"key":"36_CR8","doi-asserted-by":"crossref","unstructured":"Costeira, J., Kanade, T.: A multi-body factorization method for motion analysis. In: ICCV (1995)","DOI":"10.21236\/ADA295489"},{"key":"36_CR9","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., Koltun, V.: CARLA: an open urban driving simulator. In: CORL, pp. 1\u201316 (2017)"},{"key":"36_CR10","doi-asserted-by":"publisher","unstructured":"Eslami, S.M.A., et al.: Neural scene representation and rendering. Science 360(6394), 1204\u20131210 (2018). https:\/\/doi.org\/10.1126\/science.aar6170","DOI":"10.1126\/science.aar6170"},{"key":"36_CR11","unstructured":"Florence, P.R., Manuelli, L., Tedrake, R.: Dense object nets: learning dense visual object descriptors by and for robotic manipulation. In: CoRL (2018)"},{"key":"36_CR12","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Shi, J.: Exploiting motion and topology for segmenting and tracking under entanglement. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995366"},{"issue":"7","key":"36_CR13","doi-asserted-by":"publisher","first-page":"999","DOI":"10.3758\/BF03194829","volume":"65","author":"SL Franconeri","year":"2003","unstructured":"Franconeri, S.L., Simons, D.J.: Moving and looming stimuli capture attention. Perception & psychophysics 65(7), 999\u20131010 (2003). https:\/\/doi.org\/10.3758\/BF03194829","journal-title":"Perception & psychophysics"},{"issue":"9","key":"36_CR14","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1016\/j.neunet.2003.06.005","volume":"16","author":"K Friston","year":"2003","unstructured":"Friston, K.: Learning and inference in the brain. Neural Netw. 16(9), 1325\u20131352 (2003)","journal-title":"Neural Netw."},{"key":"36_CR15","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger, A., Lenz, P., Stiller, C., Urtasun, R.: Vision meets robotics: the kitti dataset. Int. J. Robot. Res. (IJRR) 32, 1231\u20131237 (2013)","journal-title":"Int. J. Robot. Res. (IJRR)"},{"key":"36_CR16","volume-title":"The Ecological Approach to Visual Perception","author":"JJ Gibson","year":"1979","unstructured":"Gibson, J.J.: The Ecological Approach to Visual Perception. Houghton Mifflin, Boston (1979)"},{"key":"36_CR17","unstructured":"Harley, A.W., Lakshmikanth, S.K., Li, F., Zhou, X., Tung, H.Y.F., Fragkiadaki, K.: Learning from unlabelled videos using contrastive predictive neural 3D mapping. In: ICLR (2020)"},{"key":"36_CR18","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"36_CR19","doi-asserted-by":"crossref","unstructured":"Jayaraman, D., Grauman, K.: Learning image representations tied to ego-motion. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.166"},{"key":"36_CR20","unstructured":"Kar, A., H\u00e4ne, C., Malik, J.: Learning a multi-view stereo machine. In: NIPS (2017)"},{"key":"36_CR21","doi-asserted-by":"crossref","unstructured":"Kato, H., Ushiku, Y., Harada, T.: Neural 3D mesh renderer. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00411"},{"key":"36_CR22","doi-asserted-by":"crossref","unstructured":"Lai, Z., Lu, E., Xie, W.: MAST: a memory-augmented self-supervised tracker. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00651"},{"key":"36_CR23","doi-asserted-by":"crossref","unstructured":"Lee, H.Y., Huang, J.B., Singh, M., Yang, M.H.: Unsupervised representation learning by sorting sequences. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 667\u2013676 (2017)","DOI":"10.1109\/ICCV.2017.79"},{"key":"36_CR24","doi-asserted-by":"publisher","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. ACM Trans. Graph. 34(6), 248:1\u2013248:16 (2015). https:\/\/doi.org\/10.1145\/2816795.2818013, http:\/\/doi.acm.org\/10.1145\/2816795.2818013","DOI":"10.1145\/2816795.2818013"},{"issue":"6","key":"36_CR25","doi-asserted-by":"publisher","first-page":"810","DOI":"10.1109\/TPAMI.2004.16","volume":"26","author":"L Matthews","year":"2004","unstructured":"Matthews, L., Ishikawa, T., Baker, S.: The template update problem. IEEE Trans. Pattern Anal. Mach. Intell. 26(6), 810\u2013815 (2004)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"5","key":"36_CR26","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1037\/0033-295X.88.5.375","volume":"88","author":"JL McClelland","year":"1981","unstructured":"McClelland, J.L., Rumelhart, D.E.: An interactive activation model of context effects in letter perception: I. an account of basic findings. Psychol. Rev. 88(5), 375 (1981)","journal-title":"Psychol. Rev."},{"key":"36_CR27","doi-asserted-by":"crossref","unstructured":"Menze, M., Geiger, A.: Object scene flow for autonomous vehicles. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298925"},{"key":"36_CR28","unstructured":"Misra, I., Zitnick, C.L., Hebert, M.: Unsupervised learning using sequential verification for action recognition. In: ECCV (2016)"},{"key":"36_CR29","doi-asserted-by":"crossref","unstructured":"Ochs, P., Brox, T.: Object segmentation in video: a hierarchical variational approach for turning point trajectories into dense regions. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126418"},{"key":"36_CR30","volume-title":"The Cognitive Neurosciences","author":"B Olshausen","year":"2013","unstructured":"Olshausen, B.: Perception as an inference problem. In: Gazzaniga, M.S. (ed.) The Cognitive Neurosciences. MIT Press, Cambridge (2013)"},{"key":"36_CR31","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv:1807.03748 (2018)"},{"key":"36_CR32","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/S0166-4115(08)60738-4","volume":"78","author":"AE Patla","year":"1991","unstructured":"Patla, A.E.: Visual control of human locomotion. Adv. Psychol. 78, 55\u201397 (1991). Elsevier","journal-title":"Adv. Psychol."},{"issue":"8","key":"36_CR33","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1167\/15.8.13","volume":"15","author":"Y Pinto","year":"2015","unstructured":"Pinto, Y., van Gaal, S., de Lange, F.P., Lamme, V.A., Seth, A.K.: Expectations accelerate entry of visual stimuli into awareness. J. Vis. 15(8), 13\u201313 (2015)","journal-title":"J. Vis."},{"key":"36_CR34","unstructured":"Pont-Tuset, J., Perazzi, F., Caelles, S., Arbel\u00e1ez, P., Sorkine-Hornung, A., Van Gool, L.: The 2017 davis challenge on video object segmentation. arXiv:1704.00675 (2017)"},{"key":"36_CR35","unstructured":"Rahimi, A., Recht, B.: Random features for large-scale kernel machines. In: Advances in Neural Information Processing Systems, pp. 1177\u20131184 (2008)"},{"issue":"1","key":"36_CR36","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1038\/4580","volume":"2","author":"RP Rao","year":"1999","unstructured":"Rao, R.P., Ballard, D.H.: Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects. Nat. Neurosci. 2(1), 79 (1999)","journal-title":"Nat. Neurosci."},{"key":"36_CR37","unstructured":"Roberts, L.: Machine perception of three-dimensional solids. Ph.D. thesis, MIT (1965)"},{"issue":"5306","key":"36_CR38","doi-asserted-by":"publisher","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W Schultz","year":"1997","unstructured":"Schultz, W., Dayan, P., Montague, P.R.: A neural substrate of prediction and reward. Science 275(5306), 1593\u20131599 (1997)","journal-title":"Science"},{"key":"36_CR39","unstructured":"Sohn, K.: Improved deep metric learning with multi-class N-pair loss objective. In: NIPS, pp. 1857\u20131865 (2016)"},{"key":"36_CR40","doi-asserted-by":"crossref","unstructured":"Tatarchenko, M., Dosovitskiy, A., Brox, T.: Single-view to multi-view: reconstructing unseen views with a convolutional network. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46478-7_20"},{"key":"36_CR41","doi-asserted-by":"publisher","unstructured":"Tomasi, C., Kanade, T.: Shape and motion from image streams under orthography: a factorization method. Int. J. Comput. Vis. 9(2), 137\u2013154 (Nov 1992). https:\/\/doi.org\/10.1007\/BF00129684","DOI":"10.1007\/BF00129684"},{"key":"36_CR42","doi-asserted-by":"crossref","unstructured":"Tulsiani, S., Zhou, T., Efros, A.A., Malik, J.: Multi-view supervision for single-view reconstruction via differentiable ray consistency. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.30"},{"key":"36_CR43","doi-asserted-by":"crossref","unstructured":"Tung, H.Y.F., Cheng, R., Fragkiadaki, K.: Learning spatial common sense with geometry-aware recurrent networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00270"},{"key":"36_CR44","doi-asserted-by":"crossref","unstructured":"Tung, H.F., Harley, A.W., Seto, W., Fragkiadaki, K.: Adversarial inverse graphics networks: Learning 2d-to-3d lifting and image-to-image translation with unpaired supervision. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.467"},{"key":"36_CR45","unstructured":"Vijayanarasimhan, S., Ricco, S., Schmid, C., Sukthankar, R., Fragkiadaki, K.: SFM-net: learning of structure and motion from video. arXiv:1704.07804 (2017)"},{"key":"36_CR46","doi-asserted-by":"crossref","unstructured":"Vondrick, C., Shrivastava, A., Fathi, A., Guadarrama, S., Murphy, K.: Tracking emerges by colorizing videos. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 391\u2013408 (2018)","DOI":"10.1007\/978-3-030-01261-8_24"},{"key":"36_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"835","DOI":"10.1007\/978-3-319-46478-7_51","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Walker","year":"2016","unstructured":"Walker, J., Doersch, C., Gupta, A., Hebert, M.: An uncertain future: forecasting from static images using variational autoencoders. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 835\u2013851. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_51"},{"key":"36_CR48","doi-asserted-by":"crossref","unstructured":"Wang, X., Gupta, A.: Unsupervised learning of visual representations using videos. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.320"},{"key":"36_CR49","doi-asserted-by":"crossref","unstructured":"Wang, X., Jabri, A., Efros, A.A.: Learning correspondence from the cycle-consistency of time. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00267"},{"issue":"4","key":"36_CR50","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1162\/089976602317318938","volume":"14","author":"L Wiskott","year":"2002","unstructured":"Wiskott, L., Sejnowski, T.J.: Slow feature analysis: unsupervised learning of invariances. Neural Comput. 14(4), 715\u2013770 (2002)","journal-title":"Neural Comput."},{"key":"36_CR51","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1007\/978-3-319-46466-4_22","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Wu","year":"2016","unstructured":"Wu, J., et al.: Single image 3D interpreter network. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9910, pp. 365\u2013382. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46466-4_22"},{"key":"36_CR52","unstructured":"Wu, Z., et al.: 3D shapenets: a deep representation for volumetric shapes. In: CVPR, pp. 1912\u20131920. IEEE Computer Society (2015)"},{"key":"36_CR53","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/j.tics.2006.05.002","volume":"10","author":"A Yuille","year":"2006","unstructured":"Yuille, A., Kersten, D.: Vision as Bayesian inference: analysis by synthesis? Trends Cogn. Sci. 10, 301\u2013308 (2006)","journal-title":"Trends Cogn. Sci."},{"key":"36_CR54","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.700"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58574-7_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:11:42Z","timestamp":1731370302000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58574-7_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585730","9783030585747"],"references-count":54,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58574-7_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"13 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}