{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T14:36:15Z","timestamp":1773498975973,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T00:00:00Z","timestamp":1706659200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T00:00:00Z","timestamp":1706659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s00138-023-01504-0","type":"journal-article","created":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T11:02:21Z","timestamp":1706698941000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["STARNet: spatio-temporal aware recurrent network for efficient video object detection on embedded devices"],"prefix":"10.1007","volume":"35","author":[{"given":"Mohammad","family":"Hajizadeh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohammad","family":"Sabokrou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adel","family":"Rahmani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,1,31]]},"reference":[{"key":"1504_CR1","doi-asserted-by":"crossref","unstructured":"Bertasius, G., Torresani, L., Shi, J.: Object detection in video with spatiotemporal sampling networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 331\u2013346 (2018)","DOI":"10.1007\/978-3-030-01258-8_21"},{"key":"1504_CR2","doi-asserted-by":"crossref","unstructured":"Bewley, A., Ge, Z., Ott, L., Ramos, F., Upcroft, B.: Simple online and realtime tracking. In: 2016 IEEE International Conference on Image Processing (ICIP), IEEE. pp. 3464\u20133468 (2016)","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"1504_CR3","doi-asserted-by":"crossref","unstructured":"Chen, L., Ai, H., Zhuang, Z., Shang, C.: Real-time multiple people tracking with deeply learned candidate selection and person re-identification. In: 2018 IEEE International Conference on Multimedia and Expo (ICME), IEEE, pp. 1\u20136 (2018)","DOI":"10.1109\/ICME.2018.8486597"},{"key":"1504_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Y., Cao, Y., Hu, H., Wang, L.: Memory enhanced global-local aggregation for video object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10337\u201310346 (2020)","DOI":"10.1109\/CVPR42600.2020.01035"},{"key":"1504_CR5","doi-asserted-by":"crossref","unstructured":"Cui, Y., Yan, L., Cao, Z., Liu, D.: Tf-blender: temporal feature blender for video object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8138\u20138147 (2021)","DOI":"10.1109\/ICCV48922.2021.00803"},{"key":"1504_CR6","doi-asserted-by":"crossref","unstructured":"Deng, J., Pan, Y., Yao, T., Zhou, W., Li, H., Mei, T.: Relation distillation networks for video object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7023\u20137032 (2019)","DOI":"10.1109\/ICCV.2019.00712"},{"key":"1504_CR7","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., Fischer, P., Ilg, E., Hausser, P., Hazirbas, C., Golkov, V., Van Der Smagt, P., Cremers, D., Brox, T.: Flownet: learning optical flow with convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2758\u20132766 (2015)","DOI":"10.1109\/ICCV.2015.316"},{"key":"1504_CR8","doi-asserted-by":"crossref","unstructured":"Ehteshami Bejnordi, B., Habibian, A., Porikli, F., Ghodrati, A.: Salisa: saliency-based input sampling for efficient video object detection. In: European Conference on Computer Vision, pp. 300\u2013316. Springer (2022)","DOI":"10.1007\/978-3-031-20080-9_18"},{"key":"1504_CR9","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Detect to track and track to detect. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3038\u20133046 (2017)","DOI":"10.1109\/ICCV.2017.330"},{"key":"1504_CR10","doi-asserted-by":"publisher","first-page":"1253","DOI":"10.1109\/TIP.2017.2651367","volume":"26","author":"L Galteri","year":"2017","unstructured":"Galteri, L., Seidenari, L., Bertini, M., Del Bimbo, A.: Spatio-temporal closed-loop object detection. IEEE Trans. Image Process. 26, 1253\u20131263 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"1504_CR11","doi-asserted-by":"crossref","unstructured":"Habibian, A., Abati, D., Cohen, T.S., Bejnordi, B.E.: Skip-convolutions for efficient video processing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2695\u20132704 (2021)","DOI":"10.1109\/CVPR46437.2021.00272"},{"key":"1504_CR12","doi-asserted-by":"crossref","unstructured":"Habibian, A., Ben Yahia, H., Abati, D., Gavves, E., Porikli, F.: Delta distillation for efficient video processing. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2022)","DOI":"10.1007\/978-3-031-19833-5_13"},{"key":"1504_CR13","doi-asserted-by":"publisher","first-page":"119348","DOI":"10.1016\/j.eswa.2022.119348","volume":"215","author":"M Hajizadeh","year":"2023","unstructured":"Hajizadeh, M., Sabokrou, M., Rahmani, A.: MobileDenseNet: a new approach to object detection on mobile devices. Expert Syst. Appl. 215, 119348 (2023)","journal-title":"Expert Syst. Appl."},{"key":"1504_CR14","unstructured":"Han, W., Khorrami, P., Paine, T.L., Ramachandran, P., Babaeizadeh, M., Shi, H., Li, J., Yan, S., Huang, T.S.: Seq-nms for video object detection (2016). arXiv preprint arXiv:1602.08465"},{"key":"1504_CR15","doi-asserted-by":"publisher","first-page":"2896","DOI":"10.1109\/TCSVT.2017.2736553","volume":"28","author":"K Kang","year":"2017","unstructured":"Kang, K., Li, H., Yan, J., Zeng, X., Yang, B., Xiao, T., Zhang, C., Wang, Z., Wang, R., Wang, X., et al.: T-cnn: tubelets with convolutional neural networks for object detection from videos. IEEE Trans. Circuits Syst. Video Technol. 28, 2896\u20132907 (2017)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1504_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"1504_CR17","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Dollar, P.: Focal loss for dense\u00b4 object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"1504_CR18","doi-asserted-by":"crossref","unstructured":"Liu, M., Zhu, M.: Mobile video object detection with temporally-aware feature maps. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5686\u20135695 (2018)","DOI":"10.1109\/CVPR.2018.00596"},{"key":"1504_CR19","unstructured":"Liu, M., Zhu, M., White, M., Li, Y., Kalenichenko, D.: Looking fast and slow: Memory-guided mobile video object detection (2019). arXiv preprint arXiv:1903.10172"},{"key":"1504_CR20","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., Berg, A.C.: Ssd: single shot multibox detector. In: European Conference on Computer Vision, pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1504_CR21","unstructured":"Mao, H., Zhu, S., Han, S., Dally, W.J.: Patchnet\u2013short-range template matching for efficient video processing (2021). arXiv preprint arXiv:2103.07371"},{"key":"1504_CR22","doi-asserted-by":"crossref","unstructured":"Qin, Z., Li, Z., Zhang, Z., Bao, Y., Yu, G., Peng, Y., Sun, J.: Thundernet: towards real-time generic object detection on mobile devices. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6718\u20136727 (2019)","DOI":"10.1109\/ICCV.2019.00682"},{"key":"1504_CR23","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1504_CR24","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"1504_CR25","doi-asserted-by":"crossref","unstructured":"Schulter, S., Vernaza, P., Choi, W., Chandraker, M.: Deep network flow for multi-object tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6951\u20136960 (2017)","DOI":"10.1109\/CVPR.2017.292"},{"key":"1504_CR26","doi-asserted-by":"crossref","unstructured":"Tan, M., Pang, R., Le, Q.V.: Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10781\u201310790 (2020)","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"1504_CR27","doi-asserted-by":"crossref","unstructured":"Tang, Q., Li, J., Shi, Z., Hu, Y.: Lightdet: a lightweight and accurate object detection network. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, pp. 2243\u20132247 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054101"},{"key":"1504_CR28","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhou, Y., Yan, J., Deng, Z.: Fully motion-aware network for video object detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 542\u2013557 (2018)","DOI":"10.1007\/978-3-030-01261-8_33"},{"key":"1504_CR29","doi-asserted-by":"crossref","unstructured":"Wang, Z., Zheng, L., Liu, Y., Li, Y., Wang, S.: Towards real-time multiobject tracking. In: European Conference on Computer Vision, pp. 107\u2013122. Springer (2020)","DOI":"10.1007\/978-3-030-58621-8_7"},{"key":"1504_CR30","doi-asserted-by":"crossref","unstructured":"Wojke, N., Bewley, A., Paulus, D.: Simple online and realtime tracking with a deep association metric. In: 2017 IEEE International Conference on Image Processing (ICIP), IEEE, pp. 3645\u20133649 (2017)","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"1504_CR31","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., Kweon, I.S.: Cbam: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1504_CR32","doi-asserted-by":"crossref","unstructured":"Wu, H., Chen, Y., Wang, N., Zhang, Z.: Sequence level semantics aggregation for video object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9217\u20139225 (2019)","DOI":"10.1109\/ICCV.2019.00931"},{"key":"1504_CR33","doi-asserted-by":"crossref","unstructured":"Xiao, F., Lee, Y.J.: Video object detection with an aligned spatialtemporal memory. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 485\u2013501 (2018)","DOI":"10.1007\/978-3-030-01237-3_30"},{"key":"1504_CR34","doi-asserted-by":"crossref","unstructured":"Xu, R., Mu, F., Lee, J., Mukherjee, P., Chaterji, S., Bagchi, S., Li, Y.: Smartadapt: multi-branch object detection framework for videos on mobiles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2528\u20132538 (2023)","DOI":"10.1109\/CVPR52688.2022.00256"},{"key":"1504_CR35","doi-asserted-by":"crossref","unstructured":"Yao, C.H., Fang, C., Shen, X., Wan, Y., Yang, M.H.: Video object detection via object-level temporal aggregation. In: European Conference on Computer Vision, pp. 160\u2013177. Springer (2020)","DOI":"10.1007\/978-3-030-58568-6_10"},{"key":"1504_CR36","doi-asserted-by":"crossref","unstructured":"Zhu, X., Dai, J., Yuan, L., Wei, Y.: Towards high performance video object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7210\u20137218 (2018)","DOI":"10.1109\/CVPR.2018.00753"},{"key":"1504_CR37","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., Wei, Y.: Flow-guided feature aggregation for video object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 408\u2013417 (2017)","DOI":"10.1109\/ICCV.2017.52"},{"key":"1504_CR38","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xiong, Y., Dai, J., Yuan, L., Wei, Y.: Deep feature flow for video recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2349\u20132358 (2017)","DOI":"10.1109\/CVPR.2017.441"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01504-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-023-01504-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01504-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T17:05:38Z","timestamp":1731171938000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-023-01504-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,31]]},"references-count":38,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["1504"],"URL":"https:\/\/doi.org\/10.1007\/s00138-023-01504-0","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,31]]},"assertion":[{"value":"29 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}],"article-number":"23"}}