{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T03:59:42Z","timestamp":1743047982077,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":41,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819609598"},{"type":"electronic","value":"9789819609604"}],"license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0960-4_4","type":"book-chapter","created":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T07:36:31Z","timestamp":1733556991000},"page":"53-70","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multiview Detection with\u00a0Cardboard Human Modeling"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4326-0091","authenticated-orcid":false,"given":"Jiahao","family":"Ma","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5681-6960","authenticated-orcid":false,"given":"Zicheng","family":"Duan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1464-9500","authenticated-orcid":false,"given":"Liang","family":"Zheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3167-3422","authenticated-orcid":false,"given":"Chuong","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Appel, A.: Some techniques for shading machine renderings of solids. In: Proceedings of the April 30\u2013May 2, 1968, spring joint computer conference. pp. 37\u201345 (1968)","DOI":"10.1145\/1468075.1468082"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Baqu\u00e9, P., Fleuret, F., Fua, P.: Deep occlusion reasoning for multi-camera multi-target detection. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 271\u2013279 (2017)","DOI":"10.1109\/ICCV.2017.38"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2d pose estimation using part affinity fields. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 7291\u20137299 (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Chavdarova, T., Baqu\u00e9, P., Bouquet, S., Maksai, A., Jose, C., Bagautdinov, T., Lettry, L., Fua, P., Van\u00a0Gool, L., Fleuret, F.: Wildtrack: A multi-camera hd dataset for dense unscripted pedestrian detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 5030\u20135039 (2018)","DOI":"10.1109\/CVPR.2018.00528"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Chu, X., Zheng, A., Zhang, X., Sun, J.: Detection in crowded scenes: One proposal, multiple predictions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2020)","DOI":"10.1109\/CVPR42600.2020.01223"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Ververas, E., Kotsia, I., Zafeiriou, S.: Retinaface: Single-shot multi-level face localisation in the wild. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 5203\u20135212 (2020)","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Fleuret, F., Berclaz, J., Lengagne, R., Fua, P.: Multicamera people tracking with a probabilistic occupancy map. IEEE Trans. Pattern Anal. Mach. Intell. 30(2), 267\u2013282 (2007)","DOI":"10.1109\/TPAMI.2007.1174"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 2002\u20132011 (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"G\u00fcler, R.A., Neverova, N., Kokkinos, I.: Densepose: Dense human pose estimation in the wild. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 7297\u20137306 (2018)","DOI":"10.1109\/CVPR.2018.00762"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Hou, Y., Zheng, L.: Multiview detection with shadow transformer (and view-coherent data augmentation). In: Proceedings of the 29th ACM International Conference on Multimedia. pp. 1673\u20131682 (2021)","DOI":"10.1145\/3474085.3475310"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Hou, Y., Zheng, L., Gould, S.: Multiview detection with feature perspective transformation. In: European Conference on Computer Vision. pp. 1\u201318. Springer (2020)","DOI":"10.1007\/978-3-030-58571-6_1"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, B., Hong, Y., Bao, H., Zhang, J.: Selfrecon: Self reconstruction your digital avatar from monocular video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 5605\u20135615 (2022)","DOI":"10.1109\/CVPR52688.2022.00552"},{"key":"4_CR13","doi-asserted-by":"publisher","unstructured":"Khan, S.M., Shah, M.: A Multiview Approach to Tracking People in Crowded Scenes Using a Planar Homography Constraint. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3954, pp. 133\u2013146. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744085_11","DOI":"10.1007\/11744085_11"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Lang, A.H., Vora, S., Caesar, H., Zhou, L., Yang, J., Beijbom, O.: Pointpillars: Fast encoders for object detection from point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 12697\u201312705 (2019)","DOI":"10.1109\/CVPR.2019.01298"},{"key":"4_CR15","unstructured":"Li, W., Wang, Z., Yin, B., Peng, Q., Du, Y., Xiao, T., Yu, G., Lu, H., Wei, Y., Sun, J.: Rethinking on multi-stage networks for human pose estimation. arXiv preprint arXiv:1901.00148 (2019)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Lima, J.P., Roberto, R., Figueiredo, L., Simoes, F., Teichrieb, V.: Generalizable multi-camera 3d pedestrian detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops. pp. 1232\u20131240 (June 2021)","DOI":"10.1109\/CVPRW53098.2021.00135"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision. pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision. pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., Berg, A.C.: Ssd: Single shot multibox detector. In: European conference on computer vision. pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"4_CR21","unstructured":"Ma, J., Tong, J., Wang, S., Zhao, W., Zheng, L., Nguyen, C.: Voxelized 3d feature aggregation for multiview detection. arXiv preprint arXiv:2112.03471 (2021)"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: European conference on computer vision. pp. 483\u2013499. Springer (2016)","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Possegger, H., Sternig, S., Mauthner, T., Roth, P.M., Bischof, H.: Robust real-time tracking of multiple objects by volumetric mass densities. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 2395\u20132402 (2013)","DOI":"10.1109\/CVPR.2013.310"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Qi, C.R., Liu, W., Wu, C., Su, H., Guibas, L.J.: Frustum pointnets for 3d object detection from rgb-d data. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 918\u2013927 (2018)","DOI":"10.1109\/CVPR.2018.00102"},{"key":"4_CR25","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 652\u2013660 (2017)"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Qiu, R., Xu, M., Yan, Y., Smith, J.S., Yang, X.: 3d random occlusion and multi-layer projection for deep multi-camera pedestrian localization. arXiv preprint arXiv:2207.10895 (2022)","DOI":"10.1007\/978-3-031-20080-9_40"},{"key":"4_CR27","unstructured":"Redmon, J., Farhadi, A.: Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"4_CR28","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Roig, G., Boix, X., Shitrit, H.B., Fua, P.: Conditional random fields for multi-camera object detection. In: 2011 International Conference on Computer Vision. pp. 563\u2013570. IEEE (2011)","DOI":"10.1109\/ICCV.2011.6126289"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Saito, S., Huang, Z., Natsume, R., Morishima, S., Kanazawa, A., Li, H.: Pifu: Pixel-aligned implicit function for high-resolution clothed human digitization. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp. 2304\u20132314 (2019)","DOI":"10.1109\/ICCV.2019.00239"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Sekii, T.: Robust, real-time 3d tracking of multiple objects with similar appearances. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 4275\u20134283 (2016)","DOI":"10.1109\/CVPR.2016.463"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Song, L., Wu, J., Yang, M., Zhang, Q., Li, Y., Yuan, J.: Stacked homography transformations for multi-view pedestrian detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 6049\u20136057 (2021)","DOI":"10.1109\/ICCV48922.2021.00599"},{"key":"4_CR33","doi-asserted-by":"publisher","unstructured":"Tu, H., Wang, C., Zeng, W.: VoxelPose: Towards Multi-camera 3D Human Pose Estimation in Wild Environment. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 197\u2013212. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_12","DOI":"10.1007\/978-3-030-58452-8_12"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.L., Garg, D., Hariharan, B., Campbell, M., Weinberger, K.Q.: Pseudo-lidar from visual depth estimation: Bridging the gap in 3d object detection for autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 8445\u20138453 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Weng, X., Kitani, K.: Monocular 3d object detection with pseudo-lidar point cloud. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops. pp.\u00a00\u20130 (2019)","DOI":"10.1109\/ICCVW.2019.00114"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Xu, B., Chen, Z.: Multi-level fusion based 3d object detection from monocular images. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 2345\u20132353 (2018)","DOI":"10.1109\/CVPR.2018.00249"},{"key":"4_CR37","doi-asserted-by":"crossref","unstructured":"Xu, Y., Liu, X., Liu, Y., Zhu, S.C.: Multi-view people tracking via hierarchical trajectory composition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 4256\u20134265 (2016)","DOI":"10.1109\/CVPR.2016.461"},{"key":"4_CR38","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107703","volume":"112","author":"Y Yan","year":"2021","unstructured":"Yan, Y., Xu, M., Smith, J.S., Shen, M., Xi, J.: Multicamera pedestrian detection using logic minimization. Pattern Recogn. 112, 107703 (2021)","journal-title":"Pattern Recogn."},{"key":"4_CR39","doi-asserted-by":"crossref","unstructured":"Ye, H., Zhu, W., Wang, C., Wu, R., Wang, Y.: Faster voxelpose: Real-time 3d human pose estimation by orthographic projection. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part VI. pp. 142\u2013159. Springer (2022)","DOI":"10.1007\/978-3-031-20068-7_9"},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, Y., Cavallaro, A., Xiang, T.: Omni-scale feature learning for person re-identification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 3702\u20133712 (2019)","DOI":"10.1109\/ICCV.2019.00380"},{"key":"4_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: Voxelnet: End-to-end learning for point cloud based 3d object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 4490\u20134499 (2018)","DOI":"10.1109\/CVPR.2018.00472"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0960-4_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T08:31:01Z","timestamp":1733560261000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0960-4_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"ISBN":["9789819609598","9789819609604"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0960-4_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"8 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}