{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:17:23Z","timestamp":1740107843932,"version":"3.37.3"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2022,1,9]],"date-time":"2022-01-09T00:00:00Z","timestamp":1641686400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,9]],"date-time":"2022-01-09T00:00:00Z","timestamp":1641686400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s00530-021-00874-7","type":"journal-article","created":{"date-parts":[[2022,1,9]],"date-time":"2022-01-09T00:03:07Z","timestamp":1641686587000},"page":"1845-1859","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["HandO: a hybrid 3D hand\u2013object reconstruction model for unknown objects"],"prefix":"10.1007","volume":"28","author":[{"given":"Hang","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chilam","family":"Cheang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6595-6893","authenticated-orcid":false,"given":"Yanwei","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangyang","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,1,9]]},"reference":[{"key":"874_CR1","doi-asserted-by":"crossref","unstructured":"Shan, D., Geng, J., Shu, M., Fouhey, D.: Understanding human hands in contact at internet scale. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 9869\u20139878 (2020)","DOI":"10.1109\/CVPR42600.2020.00989"},{"key":"874_CR2","doi-asserted-by":"crossref","unstructured":"Zhang, J., Pepose, S., Joo, H., Ramanan, D., Malik, J., Kanazawa, A.: Perceiving 3d human-object spatial arrangements from a single image in the wild. In: European conference on computer vision, pp. 34\u201351 (2020)","DOI":"10.1007\/978-3-030-58610-2_3"},{"key":"874_CR3","unstructured":"Diller, C., Funkhouser, T., Dai, A.: Forecasting characteristic 3D poses of human actions. ArXiv Preprint. arXiv:2011.15079 (2020)"},{"key":"874_CR4","doi-asserted-by":"crossref","unstructured":"Parger, M., Tang, C., Xu, Y., Twigg, C., Tao, L., Li, Y., Wang, R., Steinberger, M.: UNOC: understanding occlusion for embodied presence in virtual reality. ArXiv Preprint. arXiv:2012.03680 (2020)","DOI":"10.1109\/TVCG.2021.3085407"},{"key":"874_CR5","doi-asserted-by":"crossref","unstructured":"Hassan, M., Choutas, V., Tzionas, D., Black, M.: Resolving 3D human pose ambiguities with 3D scene constraints. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 2282\u20132292 (2019)","DOI":"10.1109\/ICCV.2019.00237"},{"key":"874_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3322961","volume":"38","author":"A Monszpart","year":"2019","unstructured":"Monszpart, A., Guerrero, P., Ceylan, D., Yumer, E., Mitra, N.: iMapper: interaction-guided scene mapping from monocular videos. ACM Trans. Graph. 38, 1\u201315 (2019)","journal-title":"ACM Trans. Graph."},{"key":"874_CR7","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Hassan, M., Neumann, H., Black, M., Tang, S.: Generating 3d people in scenes without people. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 6194\u20136204 (2020)","DOI":"10.1109\/CVPR42600.2020.00623"},{"key":"874_CR8","doi-asserted-by":"crossref","unstructured":"Hassan, M., Ghosh, P., Tesch, J., Tzionas, D., Black, M.: Populating 3D scenes by learning human\u2013scene interaction. ArXiv Preprint. arXiv:2012.11581 (2020)","DOI":"10.1109\/CVPR46437.2021.01447"},{"key":"874_CR9","doi-asserted-by":"crossref","unstructured":"Liu, M., Pan, Z., Xu, K., Ganguly, K., Manocha, D.: Generating grasp poses for a high-dof gripper using neural networks. ArXiv Preprint. arXiv:1903.00425 (2019)","DOI":"10.1109\/IROS40897.2019.8968115"},{"key":"874_CR10","doi-asserted-by":"crossref","unstructured":"Karunratanakul, K., Yang, J., Zhang, Y., Black, M., Muandet, K., Tang, S.: Grasping field: learning implicit representations for human grasps. ArXiv Preprint. arXiv:2008.04451 (2020)","DOI":"10.1109\/3DV50981.2020.00043"},{"key":"874_CR11","doi-asserted-by":"crossref","unstructured":"Fan, H., Su, H., Guibas, L.: A point set generation network for 3d object reconstruction from a single image. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 605\u2013613 (2017)","DOI":"10.1109\/CVPR.2017.264"},{"key":"874_CR12","doi-asserted-by":"crossref","unstructured":"Tatarchenko, M., Dosovitskiy, A., Brox, T.: Octree generating networks: efficient convolutional architectures for high-resolution 3d outputs. In: Proceedings of the IEEE international conference on computer vision, pp. 2088\u20132096 (2017)","DOI":"10.1109\/ICCV.2017.230"},{"key":"874_CR13","doi-asserted-by":"crossref","unstructured":"Groueix, T., Fisher, M., Kim, V., Russell, B., Aubry, M.: A papier-m\u00e2ch\u00e9 approach to learning 3d surface generation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 216\u2013224 (2018)","DOI":"10.1109\/CVPR.2018.00030"},{"key":"874_CR14","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhang, Y., Li, Z., Fu, Y., Liu, W., Jiang, Y.: Pixel2mesh: generating 3d mesh models from single rgb images. In: Proceedings of the European conference on computer vision (ECCV), pp. 52\u201367 (2018)","DOI":"10.1007\/978-3-030-01252-6_4"},{"key":"874_CR15","doi-asserted-by":"crossref","unstructured":"Mescheder, L., Oechsle, M., Niemeyer, M., Nowozin, S., Geiger, A.: Occupancy networks: learning 3d reconstruction in function space. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4460\u20134470 (2019)","DOI":"10.1109\/CVPR.2019.00459"},{"key":"874_CR16","doi-asserted-by":"publisher","first-page":"1578","DOI":"10.1109\/TPAMI.2019.2954885","volume":"43","author":"X Han","year":"2019","unstructured":"Han, X., Laga, H., Bennamoun, M.: Image-based 3D object reconstruction: state-of-the-art and trends in the deep learning era. IEEE Trans. Pattern Anal. Mach. Intell. 43, 1578\u20131604 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"874_CR17","doi-asserted-by":"crossref","unstructured":"Choy, C., Xu, D., Gwak, J., Chen, K., Savarese, S.: 3d-r2n2: a unified approach for single and multi-view 3d object reconstruction. In: European conference on computer vision, pp. 628\u2013644 (2016)","DOI":"10.1007\/978-3-319-46484-8_38"},{"key":"874_CR18","doi-asserted-by":"crossref","unstructured":"Wen, C., Zhang, Y., Li, Z., Fu, Y.: Pixel2mesh++: Multi-view 3d mesh generation via deformation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 1042\u20131051 (2019)","DOI":"10.1109\/ICCV.2019.00113"},{"key":"874_CR19","doi-asserted-by":"crossref","unstructured":"Hasson, Y., Tekin, B., Bogo, F., Laptev, I., Pollefeys, M., Schmid, C.: Leveraging photometric consistency over time for sparsely supervised hand-object reconstruction. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 571\u2013580 (2020)","DOI":"10.1109\/CVPR42600.2020.00065"},{"key":"874_CR20","doi-asserted-by":"crossref","unstructured":"Corona, E., Pumarola, A., Alenya, G., Moreno-Noguer, F., Rogez, G.: Ganhand: predicting human grasp affordances in multi-object scenes. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5031\u20135041 (2020)","DOI":"10.1109\/CVPR42600.2020.00508"},{"key":"874_CR21","doi-asserted-by":"crossref","unstructured":"Cao, Z., Radosavovic, I., Kanazawa, A., Malik, J.: Reconstructing hand\u2013object interactions in the wild. ArXiv Preprint. arXiv:2012.09856 (2020)","DOI":"10.1109\/ICCV48922.2021.01219"},{"key":"874_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3130800.3130883","volume":"36","author":"J Romero","year":"2017","unstructured":"Romero, J., Tzionas, D., Black, M.: Embodied hands: modeling and capturing hands and bodies together. ACM Trans. Graph. 36, 1\u201317 (2017)","journal-title":"ACM Trans. Graph."},{"key":"874_CR23","doi-asserted-by":"crossref","unstructured":"Zimmermann, C., Brox, T.: Learning to estimate 3d hand pose from single RGB images. In: Proceedings of the IEEE international conference on computer vision, pp. 4903\u20134911 (2017)","DOI":"10.1109\/ICCV.2017.525"},{"key":"874_CR24","doi-asserted-by":"crossref","unstructured":"Tekin, B., Bogo, F., Pollefeys, M.: H+ o: Unified egocentric recognition of 3d hand-object poses and interactions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4511\u20134520 (2019)","DOI":"10.1109\/CVPR.2019.00464"},{"key":"874_CR25","doi-asserted-by":"crossref","unstructured":"Hasson, Y., Varol, G., Tzionas, D., Kalevatykh, I., Black, M., Laptev, I., Schmid, C.: Learning joint reconstruction of hands and manipulated objects. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 11807\u201311816 (2019)","DOI":"10.1109\/CVPR.2019.01208"},{"key":"874_CR26","doi-asserted-by":"crossref","unstructured":"Yang, L., Zhan, X., Li, K., Xu, W., Li, J., Lu, C.: CPF: Learning a contact potential field to model the hand\u2013object interaction. ArXiv Preprint. arXiv:2012.00924 (2020)","DOI":"10.1109\/ICCV48922.2021.01091"},{"key":"874_CR27","doi-asserted-by":"crossref","unstructured":"Taheri, O., Ghorbani, N., Black, M., Tzionas, D.: GRAB: a dataset of whole-body human grasping of objects. In: European conference on computer vision, pp. 581\u2013600 (2020)","DOI":"10.1007\/978-3-030-58548-8_34"},{"key":"874_CR28","doi-asserted-by":"crossref","unstructured":"Song, C., Song, J., Huang, Q.: Hybridpose: 6d object pose estimation under hybrid representations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 431\u2013440 (2020)","DOI":"10.1109\/CVPR42600.2020.00051"},{"key":"874_CR29","doi-asserted-by":"crossref","unstructured":"Yang, Z., Yan, S., Huang, Q.: Extreme relative pose network under hybrid representations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2455\u20132464 (2020)","DOI":"10.1109\/CVPR42600.2020.00253"},{"key":"874_CR30","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2019","unstructured":"Cao, Z., Hidalgo, G., Simon, T., Wei, S., Sheikh, Y.: OpenPose: realtime multi-person 2D pose estimation using part affinity fields. IEEE Trans. Pattern Anal. Mach. Intell. 43, 172\u2013186 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"874_CR31","doi-asserted-by":"crossref","unstructured":"Rong, Y., Shiratori, T., Joo, H.: FrankMocap: fast monocular 3D hand and body motion capture by regression and integration. ArXiv Preprint. arXiv:2008.08324 (2020)","DOI":"10.1109\/ICCVW54120.2021.00201"},{"key":"874_CR32","doi-asserted-by":"crossref","unstructured":"Hampali, S., Rad, M., Oberweger, M., Lepetit, V.: Honnotate: a method for 3d annotation of hand and object poses. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3196\u20133206 (2020)","DOI":"10.1109\/CVPR42600.2020.00326"},{"key":"874_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, T., Huang, B., Wang, Y.: Object-occluded human shape and pose estimation from a single color image. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 7376\u20137385 (2020)","DOI":"10.1109\/CVPR42600.2020.00740"},{"key":"874_CR34","doi-asserted-by":"crossref","unstructured":"Kulon, D., Guler, R.A., Kokkinos, I., et al.: Weakly-supervised mesh-convolutional hand reconstruction in the wild. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4990\u20135000 (2020)","DOI":"10.1109\/CVPR42600.2020.00504"},{"key":"874_CR35","doi-asserted-by":"crossref","unstructured":"Jiang, H., Liu, S., Wang, J., et al.: Hand-object contact consistency reasoning for human grasps generation. arXiv preprint. arXiv:2104.03304 (2021)","DOI":"10.1109\/ICCV48922.2021.01092"},{"key":"874_CR36","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. ArXiv Preprint. arXiv:1406.2283 (2014)"},{"key":"874_CR37","unstructured":"Qi, C., Su, H., Mo, K., Guibas, L.: Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 652\u2013660 (2017)"},{"key":"874_CR38","unstructured":"Qi, C., Yi, L., Su, H., Guibas, L.: Pointnet++: deep hierarchical feature learning on point sets in a metric space. ArXiv Preprint. arXiv:1706.02413 (2017)"},{"key":"874_CR39","doi-asserted-by":"crossref","unstructured":"Park, J., Florence, P., Straub, J., Newcombe, R., Lovegrove, S.: Deepsdf: Learning continuous signed distance functions for shape representation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 165\u2013174 (2019)","DOI":"10.1109\/CVPR.2019.00025"},{"key":"874_CR40","unstructured":"Xu, Q., Wang, W., Ceylan, D., Mech, R., Neumann, U.: Disn: deep implicit surface network for high-quality single-view 3d reconstruction. ArXiv Preprint. arXiv:1905.10711 (2019)"},{"key":"874_CR41","doi-asserted-by":"publisher","first-page":"2585","DOI":"10.1587\/transinf.E95.D.2585","volume":"95","author":"Z Shi","year":"2012","unstructured":"Shi, Z., Yu, L., El-Latif, A., Ahmed, A., Niu, X.: Skeleton modulated topological perception map for rapid viewpoint selection. IEICE Trans. Inf. Syst. 95, 2585\u20132588 (2012)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"874_CR42","first-page":"2887","volume":"4","author":"Z-F Shi","year":"2012","unstructured":"Shi, Z.-F., Yu, L.-Y., El-Latif, A., Ahmed, A., Le, D., Niu, X.-M.: A kinematics significance based skeleton map for rapid viewpoint selection. Res. J. Appl. Sci. Eng. Technol. 4, 2887\u20132892 (2012)","journal-title":"Res. J. Appl. Sci. Eng. Technol."},{"key":"874_CR43","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1016\/j.future.2018.06.020","volume":"86","author":"R Gad","year":"2018","unstructured":"Gad, R., Talha, M., El-Latif, A., Ahmed, A., Zorkany, M., El-Sayed, A., El-Fishawy, N., Ghulam, M.: Iris recognition using multi-algorithmic approaches for cognitive internet of things (CIoT) framewok. Future Gener. Comput. Syst. 86, 178\u2013191 (2018)","journal-title":"Future Gener. Comput. Syst."},{"key":"874_CR44","doi-asserted-by":"crossref","unstructured":"Kumar, A., Singh, N., Kumar, P., Vijayvergia, A., Kumar, K.: A novel superpixel based color spatial feature for salient object detection. In: 2017 Conference on information and communication technology (CICT), IEEE, pp. 1\u20135 (2017)","DOI":"10.1109\/INFOCOMTECH.2017.8340630"},{"key":"874_CR45","doi-asserted-by":"crossref","unstructured":"Kumain, S.C., Singh, M., Singh, N., Kumar, K.: An efficient Gaussian noise reduction technique for noisy images using optimized filter approach. In: IEEE in 2018 first international conference on secure cyber computing and communication (ICSCCC), IEEE, pp. 243\u2013248 (2018)","DOI":"10.1109\/ICSCCC.2018.8703305"},{"key":"874_CR46","doi-asserted-by":"crossref","unstructured":"Atrish, A., Singh, N., Kumar, K., Kumar, V.: An automated hierarchical framework for player recognition in sports image. In: Proceedings of the international conference on video and image processing, pp. 103\u2013108 (2017)","DOI":"10.1145\/3177404.3177432"},{"key":"874_CR47","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1007\/978-981-13-0923-6_39","volume-title":"Machine intelligence and signal analysis","author":"K Kumar","year":"2019","unstructured":"Kumar, K., Shrimankar, D.D., Singh, N.: Key-lectures: keyframes extraction in video lectures. In: Machine intelligence and signal analysis, pp. 453\u2013459. Springer, Singapore (2019)"},{"key":"874_CR48","doi-asserted-by":"publisher","DOI":"10.1080\/03772063.2020.1780164","author":"S Sharma","year":"2020","unstructured":"Sharma, S., Kumar, K., Singh, N.: Deep eigen space based ASL recognition system. IETE J Res (2020). https:\/\/doi.org\/10.1080\/03772063.2020.1780164","journal-title":"IETE J Res"},{"issue":"7","key":"874_CR49","doi-asserted-by":"publisher","first-page":"11079","DOI":"10.1007\/s11042-020-10157-4","volume":"80","author":"K Kumar","year":"2021","unstructured":"Kumar, K.: Text query based summarized event searching interface system using deep learning over cloud. Multimedia Tools Appl. 80(7), 11079\u201311094 (2021)","journal-title":"Multimedia Tools Appl."},{"key":"874_CR50","doi-asserted-by":"crossref","unstructured":"Sharma, S., Kumar, P., Kumar, K.: A-PNR: automatic plate number recognition. In: Proceedings of the 7th international conference on computer and communication technology, pp. 106\u2013110 (2017)","DOI":"10.1145\/3154979.3154999"},{"key":"874_CR51","doi-asserted-by":"crossref","unstructured":"Loper, M., Black, M.: OpenDR: an approximate differentiable renderer. In: European conference on computer vision, pp. 154\u2013169 (2014)","DOI":"10.1007\/978-3-319-10584-0_11"},{"key":"874_CR52","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1145\/37402.37422","volume":"21","author":"W Lorensen","year":"1987","unstructured":"Lorensen, W., Cline, H.: Marching cubes: a high resolution 3D surface construction algorithm. Comput. Graph. 21, 163\u2013169 (1987)","journal-title":"Comput. Graph."},{"key":"874_CR53","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Choutas, V., Ghorbani, N., Bolkart, T., Osman, A., Tzionas, D., Black, M.: Expressive body capture: 3d hands, face, and body from a single image. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10975\u201310985 (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"874_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2816795.2818013","volume":"34","author":"M Loper","year":"2015","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.: SMPL: a skinned multi-person linear model. ACM Trans. Graph. 34, 1\u201316 (2015)","journal-title":"ACM Trans. Graph."},{"key":"874_CR55","doi-asserted-by":"crossref","unstructured":"Garcia-Hernando, G., Yuan, S., Baek, S., Kim, T.: First-person hand action benchmark with rgb-d videos and 3d hand pose annotations. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 409\u2013419 (2018)","DOI":"10.1109\/CVPR.2018.00050"},{"key":"874_CR56","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N., Pons-Moll, G., Black, M.: AMASS: archive of motion capture as surface shapes. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"874_CR57","doi-asserted-by":"crossref","unstructured":"Saito, S., Huang, Z., Natsume, R., Morishima, S., Kanazawa, A., Li, H.: Pifu: pixel-aligned implicit function for high-resolution clothed human digitization. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 2304\u20132314 (2019)","DOI":"10.1109\/ICCV.2019.00239"},{"key":"874_CR58","doi-asserted-by":"crossref","unstructured":"Bhatnagar, B., Tiwari, G., Theobalt, C., Pons-Moll, G.: Multi-garment net: learning to dress 3d people from images. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 5420\u20135430 (2019)","DOI":"10.1109\/ICCV.2019.00552"},{"key":"874_CR59","doi-asserted-by":"crossref","unstructured":"Brahmbhatt, S., Ham, C., Kemp, C., Hays, J.: Contactdb: analyzing and predicting grasp contact via thermal imaging. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8709\u20138719 (2019)","DOI":"10.1109\/CVPR.2019.00891"},{"key":"874_CR60","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Sanchez-Riera, J., Choi, G., Sanfeliu, A., Moreno-Noguer, F.: 3dpeople: modeling the geometry of dressed humans. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 2242\u20132251 (2019)","DOI":"10.1109\/ICCV.2019.00233"},{"key":"874_CR61","unstructured":"Jakob, W.: Mitsuba renderer (2010). http:\/\/www.mitsuba-renderer.org. Accessed 1 Dec 2020"},{"key":"874_CR62","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"874_CR63","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"874_CR64","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"874_CR65","unstructured":"Kingma, D., Ba, J.: Adam: A method for stochastic optimization. ArXiv Preprint. arXiv:1412.6980 (2014)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-021-00874-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-021-00874-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-021-00874-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,25]],"date-time":"2022-09-25T12:23:22Z","timestamp":1664108602000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-021-00874-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,9]]},"references-count":65,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["874"],"URL":"https:\/\/doi.org\/10.1007\/s00530-021-00874-7","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2022,1,9]]},"assertion":[{"value":"15 August 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}