{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T09:00:47Z","timestamp":1773738047968,"version":"3.50.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2022,12,10]],"date-time":"2022-12-10T00:00:00Z","timestamp":1670630400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,10]],"date-time":"2022-12-10T00:00:00Z","timestamp":1670630400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s11263-022-01695-5","type":"journal-article","created":{"date-parts":[[2022,12,10]],"date-time":"2022-12-10T04:29:35Z","timestamp":1670646575000},"page":"659-679","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Automatic Modelling for Interactive Action Assessment"],"prefix":"10.1007","volume":"131","author":[{"given":"Jibin","family":"Gao","sequence":"first","affiliation":[]},{"given":"Jia-Hui","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Shao-Jie","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8327-0003","authenticated-orcid":false,"given":"Wei-Shi","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,12,10]]},"reference":[{"key":"1695_CR1","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Gronat, P., Torii, A., Pajdla, T., & Sivic, J. (2016). Netvlad: Cnn architecture for weakly supervised place recognition. In CVPR (pp. 5297\u20135307).","DOI":"10.1109\/CVPR.2016.572"},{"key":"1695_CR2","doi-asserted-by":"crossref","unstructured":"Azar, S. M., Atigh, M. G., Nickabadi, A., & Alahi, A. (2019). Convolutional relational machine for group activity recognition. In CVPR (pp. 7892\u20137901).","DOI":"10.1109\/CVPR.2019.00808"},{"key":"1695_CR3","doi-asserted-by":"crossref","unstructured":"Bertasius, G., Soo\u00a0Park, H., Yu, S. X., & Shi, J. (2017). Am I a baller? Basketball performance assessment from first-person videos. In ICCV (pp. 2177\u20132185).","DOI":"10.1109\/ICCV.2017.239"},{"key":"1695_CR4","unstructured":"Cai, H., Zhu, L., & Han, S. (2018). Proxylessnas: Direct neural architecture search on target task and hardware. In ICLR."},{"key":"1695_CR5","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A. (2017). Quo vadis, action recognition? A new model and the kinetics dataset. In CVPR (pp. 6299\u20136308).","DOI":"10.1109\/CVPR.2017.502"},{"key":"1695_CR6","doi-asserted-by":"crossref","unstructured":"Chang, X., Zheng, W.-S., & Zhang, J. (2015). Learning person-person interaction in collective activity recognition. TIP 24(6), 1905\u20131918.","DOI":"10.1109\/TIP.2015.2409564"},{"key":"1695_CR7","doi-asserted-by":"crossref","unstructured":"Chen, J., Wang, Y., Qin, J., Liu, L., & Shao, L. (July 2017). Fast person re-identification via cross-camera semantic binary transformation. In CVPR.","DOI":"10.1109\/CVPR.2017.566"},{"issue":"3","key":"1695_CR8","first-page":"245","volume":"125","author":"DM Corey","year":"1998","unstructured":"Corey, D. M., Dunlap, W. P., & Burke, M. J. (1998). Averaging correlations: Expected values and bias in combined Pearson RS and Fisher\u2019s Z transformations. JGP, 125(3), 245\u2013261.","journal-title":"JGP"},{"key":"1695_CR9","doi-asserted-by":"crossref","unstructured":"Dong, X., & Yang, Y. (2019). Searching for a robust neural architecture in four GPU hours. In CVPR (pp. 1761\u20131770).","DOI":"10.1109\/CVPR.2019.00186"},{"key":"1695_CR10","unstructured":"Doughty, H., Damen, D., & Mayol-Cuevas, W. (2018). Who\u015b better, who\u015b best: Skill determination in video using deep ranking. In CVPR."},{"key":"1695_CR11","doi-asserted-by":"crossref","unstructured":"Doughty, H., Mayol-Cuevas, W., & Damen, D. (2019). The pros and cons: Rank-aware temporal attention for skill determination in long videos. In CVPR (pp. 7862\u20137871).","DOI":"10.1109\/CVPR.2019.00805"},{"key":"1695_CR12","doi-asserted-by":"crossref","unstructured":"Fang, H.-S., Xie, S., Tai, Y.-W., & Lu, C. (2017). Rmpe: Regional multi-person pose estimation. In ICCV (pp. 2334\u20132343).","DOI":"10.1109\/ICCV.2017.256"},{"issue":"4","key":"1695_CR13","first-page":"507","volume":"10","author":"RA Fisher","year":"1915","unstructured":"Fisher, R. A. (1915). Frequency distribution of the values of the correlation coefficient in samples from an indefinitely large population. Biometrika, 10(4), 507\u2013521.","journal-title":"Biometrika"},{"key":"1695_CR14","doi-asserted-by":"crossref","unstructured":"Gao, J., Zheng, W.-S., Pan, J.-H., Gao, C., Wang, Y., Zeng, W., & Lai, J. (2020). An asymmetric modeling for action assessment. In ECCV (pp. 222\u2013238), Springer.","DOI":"10.1007\/978-3-030-58577-8_14"},{"key":"1695_CR15","unstructured":"Gao, Y., Vedula, S. S., Reiley, C. E., Ahmidi, N., Varadarajan, B., Lin, H. C., Tao, L., Zappella, L., B\u00e9jar, B., Yuh, D. D. et\u00a0al. (2014). Jhu-isi gesture and skill assessment working set (jigsaws): A surgical activity dataset for human motion modeling. In W2CAI (Vol. 3, p. 3)."},{"key":"1695_CR16","doi-asserted-by":"crossref","unstructured":"Guo, Z., Zhang, X., Mu, H., Heng, W., Liu, Z., Wei, Y., & Sun, J. (2019). Single path one-shot neural architecture search with uniform sampling. In ECCV (pp. 544\u2013560).","DOI":"10.1007\/978-3-030-58517-4_32"},{"key":"1695_CR17","doi-asserted-by":"crossref","unstructured":"Hu, S., Xie, S., Zheng, H., Liu, C., Shi, J., Liu, X., & Lin, D. (2020). Dsnas: Direct neural architecture search without parameter retraining. In CVPR (pp. 12084\u201312092).","DOI":"10.1109\/CVPR42600.2020.01210"},{"key":"1695_CR18","doi-asserted-by":"crossref","unstructured":"Ilg, W., Mezger, J., & Giese, M. (2003). Estimation of skill levels in sports based on hierarchical Spatio-temporal correspondences. In JPRS (pp. 523\u2013531), Springer.","DOI":"10.1007\/978-3-540-45243-0_67"},{"key":"1695_CR19","unstructured":"International Swimming Federation (FINA). Fina diving rules, 2017. URL https:\/\/resources.fina.org\/fina\/document\/2021\/01\/12\/916f78f6-2a42-46d6-bea8-e49130211edf\/2017-2021_diving_16032018.pdf."},{"key":"1695_CR20","doi-asserted-by":"crossref","unstructured":"Joachims, T. (2006). Training linear SVMs in linear time. In SIGKDD (pp. 217\u2013226).","DOI":"10.1145\/1150402.1150429"},{"key":"1695_CR21","doi-asserted-by":"crossref","unstructured":"Liu, D., Li, Q., Jiang, T., Wang, Y., Miao, R., Shan, F., & Li, Z. (June 2021). Towards unified surgical skill assessment. In CVPR (pp. 9522\u20139531).","DOI":"10.1109\/CVPR46437.2021.00940"},{"key":"1695_CR22","unstructured":"Liu, H., Simonyan, K., & Yang, Y. (2018). Darts: Differentiable architecture search. In ICLR."},{"key":"1695_CR23","doi-asserted-by":"crossref","unstructured":"Lu, L., Lu, Y., Yu, R., Di, H., Zhang, L., & Wang, S. (2019). Gaim: Graph attention interaction model for collective activity recognition. TMM 22(2), 524\u2013539.","DOI":"10.1109\/TMM.2019.2930344"},{"key":"1695_CR24","doi-asserted-by":"crossref","unstructured":"Malpani, A., Vedula, S. S., Chen, C. C. G., & Hager, G. D. (2014). Pairwise comparison-based objective score for automated skill assessment of segments in a surgical task. In IPCAI (pp. 138\u2013147), Springer.","DOI":"10.1007\/978-3-319-07521-1_15"},{"key":"1695_CR25","doi-asserted-by":"crossref","unstructured":"Martin, J., Regehr, G., Reznick, R., Macrae, H., Murnaghan, J., Hutchison, C., & Brown, M. (1997). Objective structured assessment of technical skill (OSATS) for surgical residents. BJS, 84(2), 273\u2013278.","DOI":"10.1046\/j.1365-2168.1997.02502.x"},{"key":"1695_CR26","doi-asserted-by":"crossref","unstructured":"Pan, J.-H., Gao, J., & Zheng, W.-S. (October 2019). Action assessment by joint relation graphs. In ICCV.","DOI":"10.1109\/ICCV.2019.00643"},{"key":"1695_CR27","doi-asserted-by":"crossref","unstructured":"Parmar, P., & Morris, B. T. (June 2019). What and how well you performed? A multitask learning approach to action quality assessment. In CVPR.","DOI":"10.1109\/CVPR.2019.00039"},{"key":"1695_CR28","doi-asserted-by":"crossref","unstructured":"Parmar, P., & Tran\u00a0Morris, B. (2017). Learning to score Olympic events. In CVPRW (pp. 20\u201328).","DOI":"10.1109\/CVPRW.2017.16"},{"key":"1695_CR29","doi-asserted-by":"publisher","unstructured":"Parmar, P., Tran\u00a0Morris, B. (Jan 2019). Action quality assessment across multiple actions. In WACV (pp. 1468\u20131476). https:\/\/doi.org\/10.1109\/WACV.2019.00161.","DOI":"10.1109\/WACV.2019.00161"},{"key":"1695_CR30","doi-asserted-by":"publisher","unstructured":"Pearson, K. (1913). On the probable error of a correlation coefficient as found from a fourfold table. Biometrika. https:\/\/doi.org\/10.1093\/biomet\/9.1-2.22","DOI":"10.1093\/biomet\/9.1-2.22"},{"key":"1695_CR31","doi-asserted-by":"crossref","unstructured":"P\u00e9rez, J. S., Meinhardt-Llopis, E., & Facciolo, G. (2013). Tv-l1 optical flow estimation. In IPOL (pp. 137\u2013150).","DOI":"10.5201\/ipol.2013.26"},{"key":"1695_CR32","unstructured":"Pham, H., Guan, M. Y., Zoph, B., Le, Q. V., & Dean, J. (2018). Efficient neural architecture search via parameters sharing. In ICML (pp. 4092\u20134101)."},{"key":"1695_CR33","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., Vondrick, C., & Torralba, A. (2014). Assessing the quality of actions. In ECCV (pp. 556\u2013571), Springer.","DOI":"10.1007\/978-3-319-10599-4_36"},{"issue":"1","key":"1695_CR34","first-page":"61","volume":"20","author":"F Scarselli","year":"2009","unstructured":"Scarselli, F., Gori, M., Tsoi, A. C., Hagenbuchner, M., & Monfardini, G. (2009). The graph neural network model. TNN, 20(1), 61\u201380.","journal-title":"TNN"},{"key":"1695_CR35","unstructured":"Sharma, Y., Bettadapura, V., Pl\u00f6tz, T., Hammerla, N., Mellor, S., McNaney, R., Olivier, P., Deshmukh, S., McCaskie, A., & Essa, I. (2014). Video based assessment of OSATS using sequential motion textures, Georgia Institute of Technology."},{"key":"1695_CR36","doi-asserted-by":"crossref","unstructured":"Shu, T., Todorovic, S., Zhu, S.-C. (2017). Cern: Confidence-energy recurrent network for group activity recognition. In CVPR (pp. 5523\u20135531).","DOI":"10.1109\/CVPR.2017.453"},{"key":"1695_CR37","doi-asserted-by":"crossref","unstructured":"Tang, Y., Ni, Z., Zhou, J., Zhang, D., Lu, J., Wu, Y., & Zhou, J. (2020) Uncertainty-aware score distribution learning for action quality assessment. In CVPR (pp. 9839\u20139848).","DOI":"10.1109\/CVPR42600.2020.00986"},{"key":"1695_CR38","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, L.u., & Polosukhin, I. (2017). Attention is all you need. In NeurIPS (pp. 5998\u20136008). Curran Associates, Inc.,. URL http:\/\/papers.nips.cc\/paper\/7181-attention-is-all-you-need.pdf."},{"key":"1695_CR39","doi-asserted-by":"crossref","unstructured":"Wang, M., Ni, B., & Yang, X. (2017). Recurrent modeling of interaction context for collective activity recognition. In CVPR (pp. 3048\u20133056).","DOI":"10.1109\/CVPR.2017.783"},{"key":"1695_CR40","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, L., Wang, L., Guo, J., & Wu, G. (2019). Learning actor relation graphs for group activity recognition. In CVPR (pp. 9964\u20139974).","DOI":"10.1109\/CVPR.2019.01020"},{"key":"1695_CR41","unstructured":"Xie, S., Zheng, H., Liu, C., & Lin, L. (2018). Snas: Stochastic neural architecture search. In ICLR."},{"key":"1695_CR42","unstructured":"Xu, C., Fu, Y., Zhang, B., Chen, Z., Jiang, Y.-G., & Xue, X. (2018). Learning to score the figure skating sports videos. arXiv preprint arXiv:1802.02774."},{"key":"1695_CR43","doi-asserted-by":"crossref","unstructured":"Yan, R., Tang, J., Shu, X., Li, Z., & Tian, Q. (2018a). Participation-contributed temporal dynamic model for group activity recognition. In ACM MM (pp. 1292\u20131300).","DOI":"10.1145\/3240508.3240572"},{"key":"1695_CR44","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., & Lin, D. (2018b). Spatial temporal graph convolutional networks for skeleton-based action recognition. In AAAI.","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"1695_CR45","doi-asserted-by":"crossref","unstructured":"Yao, T., Mei, T., & Rui, Y. (2016). Highlight detection with pairwise deep ranking for first-person video summarization. In CVPR (pp. 982\u2013990).","DOI":"10.1109\/CVPR.2016.112"},{"key":"1695_CR46","doi-asserted-by":"crossref","unstructured":"Zeng, L.-A., Hong, F.-T., Zheng, W.-S., Yu, Q.-Z., Zeng, W., Wang, Y.-W., & Lai, J.-H. (2020). Hybrid dynamic-static context-aware attention network for action assessment in long videos. In ACM MM (pp. 2526\u20132534).","DOI":"10.1145\/3394171.3413560"},{"key":"1695_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, P., Tang, Y., Hu, J.-F., & Zheng, W.-S. (2019). Fast collective activity recognition under weak supervision. TIP 29, 29\u201343.","DOI":"10.1109\/TIP.2019.2918725"},{"key":"1695_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, Q. & Li, B. (2011). Video-based motion expertise analysis in simulation-based surgical training using hierarchical dirichlet process hidden Markov model. In MMAR (pp. 19\u201324), ACM.","DOI":"10.1145\/2072545.2072550"},{"issue":"6","key":"1695_CR49","doi-asserted-by":"publisher","first-page":"1206","DOI":"10.1109\/TPAMI.2014.2361121","volume":"37","author":"Q Zhang","year":"2015","unstructured":"Zhang, Q., & Li, B. (2015). Relative hidden Markov models for video-based evaluation of motion skills in surgical training. TPAMI, 37(6), 1206\u20131218.","journal-title":"TPAMI"},{"key":"1695_CR50","unstructured":"Zhang, Y., Wang, C., Wang, X., Zeng, W., & Liu, W. (2020). Fairmot: On the fairness of detection and re-identification in multiple object tracking. arXiv preprint arXiv:2004.01888."},{"key":"1695_CR51","doi-asserted-by":"crossref","unstructured":"Zhu, K. & Wu, J. (2021). Residual attention: A simple but effective method for multi-label recognition. In ICCV (pp. 184\u2013193).","DOI":"10.1109\/ICCV48922.2021.00025"},{"key":"1695_CR52","first-page":"731","volume":"13","author":"A Zia","year":"2018","unstructured":"Zia, A., & Essa, I. (2018). Automated surgical skill assessment in RMIS training. IJCARS, 13, 731\u2013739.","journal-title":"IJCARS"},{"issue":"9","key":"1695_CR53","first-page":"1623","volume":"11","author":"A Zia","year":"2016","unstructured":"Zia, A., Sharma, Y., Bettadapura, V., Sarin, E. L., Ploetz, T., Clements, M. A., & Essa, I. (2016). Automated video-based assessment of surgical skills for training and evaluation in medical schools. IJCARS, 11(9), 1623\u20131636.","journal-title":"IJCARS"},{"key":"1695_CR54","doi-asserted-by":"crossref","unstructured":"Zia, A., Sharma, Y., Bettadapura, V., Sarin, E. L., & Essa, I. (2018). Video and accelerometer-based motion analysis for automated surgical skills assessment. IJCARS, 13(3), 443\u2013455.","DOI":"10.1007\/s11548-018-1704-z"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01695-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-022-01695-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01695-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,17]],"date-time":"2023-02-17T07:00:01Z","timestamp":1676617201000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-022-01695-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,10]]},"references-count":54,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["1695"],"URL":"https:\/\/doi.org\/10.1007\/s11263-022-01695-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,10]]},"assertion":[{"value":"30 August 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 September 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 December 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}