{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T11:05:09Z","timestamp":1759230309984,"version":"3.37.3"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,2,20]],"date-time":"2021-02-20T00:00:00Z","timestamp":1613779200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,20]],"date-time":"2021-02-20T00:00:00Z","timestamp":1613779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100010684","name":"H2020 Spreading Excellence and Widening Participation","doi-asserted-by":"publisher","award":["739551"],"award-info":[{"award-number":["739551"]}],"id":[{"id":"10.13039\/100010684","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2021,8]]},"DOI":"10.1007\/s11554-021-01077-z","type":"journal-article","created":{"date-parts":[[2021,2,20]],"date-time":"2021-02-20T16:51:26Z","timestamp":1613839886000},"page":"1421-1433","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["$$\\text{C}^{3}\\text{Net}$$: end-to-end deep learning for efficient real-time visual active camera control"],"prefix":"10.1007","volume":"18","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7926-7642","authenticated-orcid":false,"given":"Christos","family":"Kyrkou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,20]]},"reference":[{"key":"1077_CR1","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man\u00e9, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi\u00e9gas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: Tensorflow: a system for large-scale machine learning. In: Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation, OSDI\u201916, pp. 265\u2013283. USENIX Association, Berkeley, CA, USA (2016). http:\/\/dl.acm.org\/citation.cfm?id=3026877.3026899"},{"key":"1077_CR2","volume-title":"Beyond the Static Camera: Issues and Trends in Active Vision","author":"M Al Haj","year":"2011","unstructured":"Al Haj, M., Fern\u00e1ndez, C., Xiong, Z., Huerta, I., Gonz\u00e0lez, J., Roca, X.: Beyond the Static Camera: Issues and Trends in Active Vision. Springer, London (2011)"},{"key":"1077_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11554-016-0569-z","volume":"16","author":"F Al Machot","year":"2019","unstructured":"Al Machot, F., Ali, M., Haj Mosa, A.: Real-time raindrop detection based on cellular neural networks for ADAS. J Real Time Image Proc. 16, 1 (2019)","journal-title":"J Real Time Image Proc."},{"key":"1077_CR4","doi-asserted-by":"publisher","unstructured":"Angella, F., Reithler, L., Gallesio, F.: Optimal deployment of cameras for video surveillance systems. In: 2007 IEEE Conference on Advanced Video and Signal Based Surveillance, pp. 388\u2013392 (2007). https:\/\/doi.org\/10.1109\/AVSS.2007.4425342","DOI":"10.1109\/AVSS.2007.4425342"},{"key":"1077_CR5","doi-asserted-by":"publisher","unstructured":"Bateux, Q., Marchand, E., Leitner, J., Chaumette, F., Corke, P.: Training deep neural networks for visual servoing. In: 2018 IEEEInternational Conference on Robotics and Automation (ICRA), pp. 3307\u20133314 (2018). https:\/\/doi.org\/10.1109\/ICRA.2018.8461068","DOI":"10.1109\/ICRA.2018.8461068"},{"key":"1077_CR6","doi-asserted-by":"publisher","unstructured":"Bernardin, K., van\u00a0de Camp, F., Stiefelhagen, R.: Automatic person detection and tracking using fuzzy controlled active cameras. In: 2007 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20138 (2007). https:\/\/doi.org\/10.1109\/CVPR.2007.383502","DOI":"10.1109\/CVPR.2007.383502"},{"key":"1077_CR7","doi-asserted-by":"publisher","unstructured":"Bewley, A., Ge, Z., Ott, L., Ramos, F., Upcroft, B.: Simple on-line and realtime tracking. In: 2016 IEEE International Confer-ence on Image Processing (ICIP), pp. 3464\u20133468 (2016). https:\/\/doi.org\/10.1109\/ICIP.2016.7533003","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"1077_CR8","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-85729-127-1","volume-title":"Distributed Video Sensor Networks","author":"B Bhanu","year":"2011","unstructured":"Bhanu, B., Ravishankar, C.V., Roy-Chowdhury, A.K., Aghajan, H., Terzopoulos, D.: Distributed Video Sensor Networks, 1st edn. Springer Publishing Company, Incorporated, Berlin (2011)","edition":"1"},{"key":"1077_CR9","doi-asserted-by":"publisher","unstructured":"Biswas, A., Guha, P., Mukerjee, A., Venkatesh, K.S.: Intrusion detection and tracking with pan-tilt cameras. In: 2006 IET International Conference on Visual Information Engineering, pp. 565\u2013571 (2006). https:\/\/doi.org\/10.1049\/cp:20060593","DOI":"10.1049\/cp:20060593"},{"key":"1077_CR10","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4614-7705-1","volume-title":"Distributed Embedded Smart Cameras: Architectures, Design and Applications","author":"C Bobda","year":"2014","unstructured":"Bobda, C., Velipasalar, S.: Distributed Embedded Smart Cameras: Architectures, Design and Applications, 1st edn. Springer, New York (2014)","edition":"1"},{"key":"1077_CR11","doi-asserted-by":"publisher","unstructured":"Bo Bo, N., Deboeverie, F., Veelaert, P., Philips, W.: Real-time multi-people tracking by greedy likelihood maximization. In: Proceedings of the 9th International Conference on Distributed Smart Cameras, ICDSC \u201915, pp. 32\u201337. ACM, New York (2015). https:\/\/doi.org\/10.1145\/2789116.2789125. http:\/\/doi.acm.org\/10.1145\/2789116.2789125","DOI":"10.1145\/2789116.2789125"},{"key":"1077_CR12","doi-asserted-by":"crossref","unstructured":"Bodla, N., Singh, B., Chellappa, R., Davis, L.S.: Soft-nms \u2013 improving object detection with one line of code. In: Proceedings ofthe IEEE international conference on computer vision (ICCV)(2017)","DOI":"10.1109\/ICCV.2017.593"},{"key":"1077_CR13","unstructured":"Bojarski, M., Testa, D.D., Dworakowski, D., Firner, B., Flepp, B., Goyal, P., Jackel, L.D., Monfort, M., Muller, U., Zhang, J., Zhang, X., Zhao, J., Zieba, K.: End to end learning for self-driving cars. CoRR (2016). arXiv:1604.07316"},{"key":"1077_CR14","unstructured":"Bradski, G.: The OpenCV library. Dr. Dobb\u2019s J. Softw. Tools (2000)"},{"key":"1077_CR15","doi-asserted-by":"publisher","unstructured":"Campmany, V., Silva, S., Espinosa, A., Moure, J., V\u00e1zquez, D., L\u00f3pez, A.: GPU-based pedestrian detection for autonomous driving. Procedia Comput. Sci. 80, 2377\u20132381 (2016). https:\/\/doi.org\/10.1016\/j.procs.2016.05.455. International Conference on Computational Science 2016, ICCS 2016, 6\u20138 June 2016, San Diego, California, USA","DOI":"10.1016\/j.procs.2016.05.455"},{"key":"1077_CR16","doi-asserted-by":"crossref","unstructured":"Chahyati,D., Fanany,M.I., Arymurthy, A.M.: Tracking people by detection using cnn features. Procedia Computer Science, 124, 167\u2013172 (2017)","DOI":"10.1016\/j.procs.2017.12.143"},{"key":"1077_CR17","doi-asserted-by":"publisher","unstructured":"Chen, H., Zhao, X., Tan, M.: A novel pan-tilt camera control approach for visual tracking. In: Proceeding of the 11th World Congress on Intelligent Control and Automation, pp. 2860\u20132865 (2014). https:\/\/doi.org\/10.1109\/WCICA.2014.7053182","DOI":"10.1109\/WCICA.2014.7053182"},{"key":"1077_CR18","unstructured":"Chollet, F.: Keras (2015). https:\/\/github.com\/fchollet\/keras"},{"key":"1077_CR19","doi-asserted-by":"publisher","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR\u201905), vol.\u00a01, pp. 886\u2013893 (2005). https:\/\/doi.org\/10.1109\/CVPR.2005.177","DOI":"10.1109\/CVPR.2005.177"},{"key":"1077_CR20","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/978-3-642-04091-7_22","volume-title":"Computational Intelligence in Security for Information Systems","author":"PS Dhillon","year":"2009","unstructured":"Dhillon, P.S.: Robust real-time face tracking using an active camera. In: Herrero, \u00c1., Gastaldo, P., Zunino, R., Corchado, E. (eds.) Computational Intelligence in Security for Information Systems, pp. 179\u2013186. Springer, Berlin (2009)"},{"issue":"7","key":"1077_CR21","doi-asserted-by":"publisher","first-page":"3282","DOI":"10.1109\/TIP.2012.2188806","volume":"21","author":"C Ding","year":"2012","unstructured":"Ding, C., Song, B., Morye, A., Farrell, J.A., Roy-Chowdhury, A.K.: Collaborative sensing in a distributed PTZ camera network. IEEE Trans. Image Process. 21(7), 3282\u20133295 (2012). https:\/\/doi.org\/10.1109\/IROS.2009.5353915","journal-title":"IEEE Trans. Image Process."},{"key":"1077_CR22","doi-asserted-by":"publisher","unstructured":"Dinh, T., Yu, Q., Medioni, G.: Real time tracking using an active pan-tilt-zoom network camera. In: 2009 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 3786\u20133793 (2009). https:\/\/doi.org\/10.1109\/IROS.2009.5353915","DOI":"10.1109\/IROS.2009.5353915"},{"key":"1077_CR23","doi-asserted-by":"crossref","unstructured":"Fan, H., Ling, H.: Siamese cascaded region proposal networksfor real-time visual tracking. In: 2019 IEEE\/CVF conferenceon computer vision and pattern recognition (CVPR), pp. 7944\u20137953 (2019)","DOI":"10.1109\/CVPR.2019.00814"},{"key":"1077_CR24","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Detect to track andtrack to detect. In: Proceedings of the IEEE international conference on computer Vision (ICCV), pp. 3038\u20133046 (2017)","DOI":"10.1109\/ICCV.2017.330"},{"key":"1077_CR25","doi-asserted-by":"publisher","unstructured":"Ferryman, J., Shahrokni, A.: Pets2009: Dataset and challenge. In: 2009 Twelfth IEEE International Workshop on Performance Evaluation of Tracking and Surveillance, pp. 1\u20136 (2009). https:\/\/doi.org\/10.1109\/PETS-WINTER.2009.5399556","DOI":"10.1109\/PETS-WINTER.2009.5399556"},{"key":"1077_CR26","doi-asserted-by":"publisher","unstructured":"Haj, M.A., Bagdanov, A.D., Gonzalez, J., Roca, F..: Reactive object tracking with a single PTZ camera. In: 2010 20th International Conference on Pattern Recognition, pp. 1690\u20131693 (2010). https:\/\/doi.org\/10.1109\/ICPR.2010.418","DOI":"10.1109\/ICPR.2010.418"},{"key":"1077_CR27","doi-asserted-by":"crossref","unstructured":"Hosang, J., Benenson, R., Schiele, B.: Learning non-maximumsuppression. In: The IEEE conference on computer vision and pattern recognition (CVPR), pp. 4507\u20134515 (2017)","DOI":"10.1109\/CVPR.2017.685"},{"key":"1077_CR28","doi-asserted-by":"crossref","unstructured":"Kiran, M., Tiwari, V., Nguyen-Meidine, L., Morin, L., Granger, E.: On the interaction between deep detectors and siamese trackers in video surveillance. In: 2019 16th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS). IEEE Computer Society, Los Alamitos, CA, USA pp. 1\u20138 (2019)","DOI":"10.1109\/AVSS.2019.8909864"},{"key":"1077_CR29","first-page":"373","volume-title":"Collaborative Face Recognition Using a Network of Embedded Cameras","author":"V Kulathumani","year":"2011","unstructured":"Kulathumani, V., Parupati, S., Ross, A., Jillela, R.: Collaborative Face Recognition Using a Network of Embedded Cameras, pp. 373\u2013387. Springer, London (2011)"},{"key":"1077_CR30","unstructured":"Kyrkou, C., Christoforou, E.G., Timotheou, S., Theocharides, T., Panayiotou, C., Polycarpou, M.: Optimizing the detectionperformance of smart camera networks through a probabilistic image-based model. In: IEEE transactions on circuits and systems for video technology 28(5), 1197\u20131211 (2018)"},{"key":"1077_CR31","doi-asserted-by":"crossref","unstructured":"Ser-Nam L., Elgammal, A., Davis, L.S.: Image-based pan-tilt camera control in a multi-camera surveillance environment. In: 2003 International conference on multimedia and Expo. ICME\u201903. Proceedings (Cat. No.03TH8698), vol. 1, pp. I\u2013645 (2003)","DOI":"10.1109\/ICME.2003.1221000"},{"key":"1077_CR32","unstructured":"Luo, W., Sun, P., Zhong, F., Liu, W., Zhang, T., Wang, Y.: End-to-end active object tracking via reinforcement learning. In: Proceed-ings of the 35th international conference on machine learning, proceedings of machine learning research, vol. 80, pp. 3286\u20133295 (2018)"},{"key":"1077_CR33","doi-asserted-by":"crossref","unstructured":"Miao, X., Zhen, X., Liu, X., Deng, C., Athitsos, V., Huang, H.: Direct shape regression networks for end-to-end face alignment. In: 2018 IEEE\/CVF conference on computer vi-sion and pattern recognition, pp. 5040\u20135049 (2018)","DOI":"10.1109\/CVPR.2018.00529"},{"issue":"5","key":"1077_CR34","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1109\/MSP.2010.937333","volume":"27","author":"C Micheloni","year":"2010","unstructured":"Micheloni, C., Rinner, B., Foresti, G.L.: Video analysis in pan-tilt-zoom camera networks. IEEE Signal Process. Mag. 27(5), 78\u201390 (2010). https:\/\/doi.org\/10.1109\/CVPR.2017.690","journal-title":"IEEE Signal Process. Mag."},{"key":"1077_CR35","doi-asserted-by":"crossref","unstructured":"Neff, C., Mendieta, M., Mohan, S., Baharani, M., Rogers, S., Tabkhi, H.: Revamp2t: Real-time edge video analytics for multi camera privacy-aware pedestrian tracking. IEEE Int of Things J 7(4), 2591\u20132602 (2020)","DOI":"10.1109\/JIOT.2019.2954804"},{"issue":"11","key":"1077_CR36","first-page":"138","volume":"5","author":"HR Patil","year":"2015","unstructured":"Patil, H.R., Bhagat, K.S.: Detection and tracking of moving objects; a survey. Int. J. Eng. Res. Appl. 5(11), 138\u2013142 (2015)","journal-title":"Int. J. Eng. Res. Appl."},{"key":"1077_CR37","unstructured":"Pflugfelder, R.P.: Siamese learning visual tracking: a survey. CoRR (2017). arXiv:1707.00569"},{"key":"1077_CR38","doi-asserted-by":"publisher","unstructured":"Redmon, J., Farhadi, A.: Yolo9000: Better, faster, stronger. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6517\u20136525 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.690","DOI":"10.1109\/CVPR.2017.690"},{"key":"1077_CR39","doi-asserted-by":"publisher","unstructured":"Salih, Y., Malik, A.S.: Depth and geometry from a single 2d image using triangulation. In: 2012 IEEE International Conference on Multimedia and Expo Workshops, pp. 511\u2013515 (2012). https:\/\/doi.org\/10.1109\/ICMEW.2012.95","DOI":"10.1109\/ICMEW.2012.95"},{"key":"1077_CR40","doi-asserted-by":"publisher","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-cam: Visual explanations from deep networks via gradient-based localization. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 618\u2013626 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.74","DOI":"10.1109\/ICCV.2017.74"},{"issue":"11","key":"1077_CR41","doi-asserted-by":"publisher","first-page":"1379","DOI":"10.1007\/s00371-015-1206-8","volume":"32","author":"R Wang","year":"2016","unstructured":"Wang, R., Dong, H., Han, T.X., Mei, L.: Robust tracking via monocular active vision for an intelligent teaching system. Vis. Comput. 32(11), 1379\u20131394 (2016). https:\/\/doi.org\/10.1007\/s00371-015-1206-8","journal-title":"Vis. Comput."},{"issue":"11","key":"1077_CR42","doi-asserted-by":"publisher","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","volume":"30","author":"Z Zhao","year":"2019","unstructured":"Zhao, Z., Zheng, P., Xu, S., Wu, X.: Object detection with deep learning: a review. IEEE Trans. Neural Netw. Learn. Syst. 30(11), 3212\u20133232 (2019). http:\/\/dl.acm.org\/citation.cfm?id=3026877.30268990","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-021-01077-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-021-01077-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-021-01077-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T15:56:41Z","timestamp":1697903801000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-021-01077-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,20]]},"references-count":42,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,8]]}},"alternative-id":["1077"],"URL":"https:\/\/doi.org\/10.1007\/s11554-021-01077-z","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"type":"print","value":"1861-8200"},{"type":"electronic","value":"1861-8219"}],"subject":[],"published":{"date-parts":[[2021,2,20]]},"assertion":[{"value":"31 March 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 February 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}