{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T00:58:51Z","timestamp":1780880331105,"version":"3.54.1"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Displays"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.displa.2026.103500","type":"journal-article","created":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T06:31:37Z","timestamp":1777271497000},"page":"103500","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Efficient analysis on behavior video: A fine-grained self-stimulatory dataset and a balanced patch-partition embedding framework"],"prefix":"10.1016","volume":"94","author":[{"given":"Yunxiu","family":"Zhao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jinyang","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shigang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Feiyong","family":"Jia","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Meimei","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Honghua","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jian","family":"Wei","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yan","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaozhou","family":"Hu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yang","family":"Gao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiyuan","family":"Zha","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.displa.2026.103500_b1","article-title":"Interpersonal synchronization and eye-tracking in children with autism spectrum disorder: A systematic review","volume":"87","author":"hua Zhang","year":"2025","journal-title":"Displays"},{"key":"10.1016\/j.displa.2026.103500_b2","doi-asserted-by":"crossref","first-page":"224","DOI":"10.1007\/s10803-016-2938-7","article-title":"Brief Report: Prevalence of Co-occurring Epilepsy and Autism Spectrum Disorder: The U.S. National Survey of Children\u2019s Health 2011\u20132012","volume":"47","author":"Thomas","year":"2017","journal-title":"J. Autism Dev. Disord."},{"key":"10.1016\/j.displa.2026.103500_b3","series-title":"ADI-R. Autism Diagnostic Interview Revised. Manual","author":"Rutter","year":"2003"},{"key":"10.1016\/j.displa.2026.103500_b4","series-title":"Autism Diagnostic Observation Schedule","author":"Catherine","year":"2012"},{"issue":"1","key":"10.1016\/j.displa.2026.103500_b5","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1038\/s41398-020-01015-w","article-title":"Computer vision in autism spectrum disorder research: a systematic review of published studies from 2009 to 2019","volume":"10","author":"De Belen","year":"2020","journal-title":"Transl. Psychiatry"},{"key":"10.1016\/j.displa.2026.103500_b6","series-title":"2013 IEEE International Conference on Computer Vision Workshops","first-page":"755","article-title":"Self-Stimulatory Behaviours in the Wild for Autism Diagnosis","author":"Rajagopalan","year":"2013"},{"issue":"12","key":"10.1016\/j.displa.2026.103500_b7","doi-asserted-by":"crossref","first-page":"1354","DOI":"10.1111\/jcpp.12269","article-title":"Heterogeneity and plasticity in the development of language: a 17-year follow-up of children referred early for possible autism","volume":"55","author":"Pickles","year":"2014","journal-title":"J. Child Psychol. Psychiatry"},{"key":"10.1016\/j.displa.2026.103500_b8","series-title":"2019 IEEE International Conference on Multimedia and Expo","first-page":"272","article-title":"Video-Based Early ASD Detection via Temporal Pyramid Networks","author":"Tian","year":"2019"},{"issue":"3","key":"10.1016\/j.displa.2026.103500_b9","doi-asserted-by":"crossref","first-page":"775","DOI":"10.1017\/S0954579408000370","article-title":"Early behavioral intervention, brain plasticity, and the prevention of autism spectrum disorder","volume":"20","author":"Dawson","year":"2008","journal-title":"Dev. Psychopathol."},{"key":"10.1016\/j.displa.2026.103500_b10","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2025.103049","article-title":"Autism screening for children based on appearance features across multiple paradigms","volume":"88","author":"Lu","year":"2025","journal-title":"Displays"},{"issue":"8","key":"10.1016\/j.displa.2026.103500_b11","doi-asserted-by":"crossref","first-page":"5427","DOI":"10.1109\/TCSVT.2022.3148392","article-title":"Influence-aware attention networks for anomaly detection in surveillance videos","volume":"32","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.displa.2026.103500_b12","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2025.103298","article-title":"3D human pose estimation-based action recognition method for complex industrial scenarios","volume":"92","author":"Zhang","year":"2026","journal-title":"Displays"},{"key":"10.1016\/j.displa.2026.103500_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2025.103020","article-title":"vAQA-SS: Vision-based action quality assessment for style-based skiing","volume":"88","author":"Wen","year":"2025","journal-title":"Displays"},{"key":"10.1016\/j.displa.2026.103500_b14","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2022.102360","article-title":"A multimodal discrimination method for the response to name behavior of autistic children based on human pose tracking and head pose estimation","volume":"76","author":"Song","year":"2023","journal-title":"Displays"},{"key":"10.1016\/j.displa.2026.103500_b15","doi-asserted-by":"crossref","unstructured":"P. Washington, A. Kline, O.C. Mutlu, E. Leblanc, C. Hou, N. Stockham, K. Paskov, B. Chrisman, D. Wall, Activity recognition with moving cameras and few training examples: applications for detection of autism-related headbanging, in: Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems, 2021, pp. 1\u20137.","DOI":"10.1145\/3411763.3451701"},{"issue":"4","key":"10.1016\/j.displa.2026.103500_b16","doi-asserted-by":"crossref","first-page":"1427","DOI":"10.1007\/s13246-023-01309-5","article-title":"Automated anomalous child repetitive head movement identification through transformer networks","volume":"46","author":"Wedasingha","year":"2023","journal-title":"Phys. Eng. Sci. Med."},{"key":"10.1016\/j.displa.2026.103500_b17","series-title":"ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"Unsupervised video anomaly detection for stereotypical behaviours in autism","author":"Gao","year":"2023"},{"key":"10.1016\/j.displa.2026.103500_b18","doi-asserted-by":"crossref","unstructured":"C. Feichtenhofer, H. Fan, J. Malik, K. He, SlowFast Networks for Video Recognition, in: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV, 2019, pp. 6201\u20136210.","DOI":"10.1109\/ICCV.2019.00630"},{"key":"10.1016\/j.displa.2026.103500_b19","doi-asserted-by":"crossref","unstructured":"J. Lin, C. Gan, S. Han, Tsm: Temporal shift module for efficient video understanding, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 7083\u20137093.","DOI":"10.1109\/ICCV.2019.00718"},{"key":"10.1016\/j.displa.2026.103500_b20","doi-asserted-by":"crossref","unstructured":"Z. Liu, L. Wang, W. Wu, C. Qian, T. Lu, Tam: Temporal adaptive module for video recognition, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 13708\u201313718.","DOI":"10.1109\/ICCV48922.2021.01345"},{"key":"10.1016\/j.displa.2026.103500_b21","series-title":"2022 IEEE International Conference on Image Processing","first-page":"3356","article-title":"Detecting a child\u2019s stimming behaviours for autism spectrum disorder diagnosis using rgbpose-slowfast network","author":"S","year":"2022"},{"issue":"6","key":"10.1016\/j.displa.2026.103500_b22","doi-asserted-by":"crossref","DOI":"10.1016\/j.heliyon.2023.e16763","article-title":"Vision-based activity recognition in children with autism-related behaviors","volume":"9","author":"Wei","year":"2023","journal-title":"Heliyon"},{"key":"10.1016\/j.displa.2026.103500_b23","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/j.neucom.2021.03.004","article-title":"Vision-assisted recognition of stereotype behaviors for early diagnosis of autism spectrum disorders","volume":"446","author":"Negin","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.displa.2026.103500_b24","series-title":"2023 IEEE 36th International Symposium on Computer-Based Medical Systems","first-page":"225","article-title":"Stimming behavior dataset-unifying stereotype behavior dataset in the wild","author":"Ribeiro","year":"2023"},{"key":"10.1016\/j.displa.2026.103500_b25","doi-asserted-by":"crossref","unstructured":"A. Ali, F.F. Negin, F.F. Bremond, S. Th\u00fcmmler, Video-based behavior understanding of children for objective diagnosis of autism, in: VISAPP 2022-17th International Conference on Computer Vision Theory and Applications, 2022.","DOI":"10.5220\/0010839200003124"},{"issue":"7","key":"10.1016\/j.displa.2026.103500_b26","first-page":"1270","article-title":"Stereotypical motor movement recognition using microsoft kinect with artificial neural network","volume":"10","author":"Jazouli","year":"2016","journal-title":"Int. J. Comput. Inf. Eng."},{"issue":"3","key":"10.1016\/j.displa.2026.103500_b27","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1504\/IJBET.2019.097621","article-title":"Automatic detection of stereotyped movements in autistic children using the kinect sensor","volume":"29","author":"Jazouli","year":"2019","journal-title":"Int. J. Biomed. Eng. Technol."},{"key":"10.1016\/j.displa.2026.103500_b28","series-title":"2017 12th IEEE International Conference on Automatic Face & Gesture Recognition","first-page":"762","article-title":"Automatic detection of ADHD and ASD from expressive behaviour in rgbd data","author":"Jaiswal","year":"2017"},{"key":"10.1016\/j.displa.2026.103500_b29","series-title":"2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance","first-page":"1","article-title":"3D-AD: 3D-autism dataset for repetitive behaviours with kinect sensor","author":"Rihawi","year":"2017"},{"key":"10.1016\/j.displa.2026.103500_b30","doi-asserted-by":"crossref","unstructured":"D. Tran, L. Bourdev, R. Fergus, L. Torresani, M. Paluri, Learning spatiotemporal features with 3d convolutional networks, in: Proceedings of the IEEE International Conference on Computer Vision, 2015, pp. 4489\u20134497.","DOI":"10.1109\/ICCV.2015.510"},{"key":"10.1016\/j.displa.2026.103500_b31","series-title":"European Conference on Computer Vision","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"Wang","year":"2016"},{"key":"10.1016\/j.displa.2026.103500_b32","doi-asserted-by":"crossref","unstructured":"J. Carreira, A. Zisserman, Quo vadis, action recognition? a new model and the kinetics dataset, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 6299\u20136308.","DOI":"10.1109\/CVPR.2017.502"},{"key":"10.1016\/j.displa.2026.103500_b33","first-page":"11966","article-title":"Temporal interlacing network","volume":"vol. 34","author":"Shao","year":"2020"},{"key":"10.1016\/j.displa.2026.103500_b34","first-page":"4","article-title":"Is space-time attention all you need for video understanding?","volume":"vol. 2","author":"Bertasius","year":"2021"},{"key":"10.1016\/j.displa.2026.103500_b35","doi-asserted-by":"crossref","unstructured":"Z. Liu, J. Ning, Y. Cao, Y. Wei, Z. Zhang, S. Lin, H. Hu, Video swin transformer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 3202\u20133211.","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"10.1016\/j.displa.2026.103500_b36","doi-asserted-by":"crossref","unstructured":"D.C. Senadeera, X. Yang, D. Kollias, G. Slabaugh, Cue-net: violence detection video analytics with spatial cropping enhanced uniformerv2 and modified efficient additive attention, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 4888\u20134897.","DOI":"10.1109\/CVPRW63382.2024.00493"},{"key":"10.1016\/j.displa.2026.103500_b37","doi-asserted-by":"crossref","first-page":"3805","DOI":"10.1109\/TIP.2020.2966082","article-title":"A multimodal saliency model for videos with high audio-visual correspondence","volume":"29","author":"Min","year":"2020","journal-title":"IEEE Trans. Image Process."},{"issue":"7","key":"10.1016\/j.displa.2026.103500_b38","doi-asserted-by":"crossref","first-page":"6575","DOI":"10.1109\/TCSVT.2025.3540104","article-title":"Full-reference and no-reference quality assessment for video frame interpolation","volume":"35","author":"Han","year":"2025","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.displa.2026.103500_b39","doi-asserted-by":"crossref","first-page":"6054","DOI":"10.1109\/TIP.2020.2988148","article-title":"Study of subjective and objective quality assessment of audio-visual signals","volume":"29","author":"Min","year":"2020","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.displa.2026.103500_b40","doi-asserted-by":"crossref","first-page":"7607","DOI":"10.1109\/TMM.2022.3224319","article-title":"Blind Image Quality Assessment via Cross-View Consistency","volume":"25","author":"Zhu","year":"2023","journal-title":"IEEE Trans. Multimed."},{"issue":"1","key":"10.1016\/j.displa.2026.103500_b41","doi-asserted-by":"crossref","first-page":"551","DOI":"10.1109\/TCSVT.2025.3572380","article-title":"Future Fixation Sequence Prediction for Audio-Visual 360\u00b0Videos","volume":"36","author":"Zhu","year":"2026","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.displa.2026.103500_b42","first-page":"222","article-title":"Head and Eye Movement Prediction in Omnidirectional Videos via Multimodal Fusion and Knowledge Distillation","volume":"2","author":"Zhu","year":"2025","journal-title":"IEEE Open J. Immersive Displays"},{"issue":"7","key":"10.1016\/j.displa.2026.103500_b43","doi-asserted-by":"crossref","first-page":"4188","DOI":"10.1109\/TCSVT.2021.3126590","article-title":"Viewing Behavior Supported Visual Saliency Predictor for 360 Degree Videos","volume":"32","author":"Zhu","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.displa.2026.103500_b44","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2025.103199","article-title":"Scanpath prediction in panoramic videos through multimodal fusion","volume":"91","author":"Zhu","year":"2026","journal-title":"Displays"},{"issue":"2s","key":"10.1016\/j.displa.2026.103500_b45","first-page":"1","article-title":"Toward Visual Behavior and Attention Understanding for Augmented 360 Degree Videos","volume":"19","author":"Zhu","year":"2023","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"issue":"8","key":"10.1016\/j.displa.2026.103500_b46","doi-asserted-by":"crossref","first-page":"7778","DOI":"10.1109\/TCSVT.2025.3544659","article-title":"Exploring rich subjective quality information for image quality assessment in the wild","volume":"35","author":"Min","year":"2025","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"8","key":"10.1016\/j.displa.2026.103500_b47","doi-asserted-by":"crossref","first-page":"2879","DOI":"10.1109\/TITS.2018.2868771","article-title":"Objective quality evaluation of dehazed images","volume":"20","author":"Min","year":"2019","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.displa.2026.103500_b48","doi-asserted-by":"crossref","first-page":"3790","DOI":"10.1109\/TIP.2020.2966081","article-title":"A metric for light field reconstruction, compression, and display quality evaluation","volume":"29","author":"Min","year":"2020","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"10.1016\/j.displa.2026.103500_b49","doi-asserted-by":"crossref","first-page":"10865","DOI":"10.1038\/s41598-023-38099-5","article-title":"Unsupervised blind image quality assessment via joint spatial and transform features","volume":"13","author":"Yang","year":"2023","journal-title":"Sci. Rep."},{"issue":"7","key":"10.1016\/j.displa.2026.103500_b50","doi-asserted-by":"crossref","first-page":"2046","DOI":"10.1109\/TASL.2011.2109381","article-title":"Subjective and objective quality assessment of audio source separation","volume":"19","author":"Emiya","year":"2011","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"issue":"4","key":"10.1016\/j.displa.2026.103500_b51","doi-asserted-by":"crossref","DOI":"10.1088\/1361-6560\/ad1f88","article-title":"Balanced transformer: efficient classification of glioblastoma and primary central nervous system lymphoma","volume":"69","author":"Wang","year":"2024","journal-title":"Phys. Med. Biol."},{"key":"10.1016\/j.displa.2026.103500_b52","doi-asserted-by":"crossref","unstructured":"J. Deng, W. Dong, R. Socher, L.-J. Li, K. Li, L. Fei-Fei, ImageNet: A large-scale hierarchical image database, in: 2009 IEEE Conference on Computer Vision and Pattern Recognition, 2009, pp. 248\u2013255.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"10.1016\/j.displa.2026.103500_b53","doi-asserted-by":"crossref","unstructured":"R.R. Selvaraju, M. Cogswell, A. Das, R. Vedantam, D. Parikh, D. Batra, Grad-CAM: Visual explanations from deep networks via gradient-based localization, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 618\u2013626.","DOI":"10.1109\/ICCV.2017.74"}],"container-title":["Displays"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0141938226001630?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0141938226001630?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T00:14:10Z","timestamp":1780877650000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0141938226001630"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":53,"alternative-id":["S0141938226001630"],"URL":"https:\/\/doi.org\/10.1016\/j.displa.2026.103500","relation":{"is-supplemented-by":[{"id-type":"uri","id":"https:\/\/github.com\/iris-zhaoyx\/stimasd","asserted-by":"subject"}]},"ISSN":["0141-9382"],"issn-type":[{"value":"0141-9382","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Efficient analysis on behavior video: A fine-grained self-stimulatory dataset and a balanced patch-partition embedding framework","name":"articletitle","label":"Article Title"},{"value":"Displays","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.displa.2026.103500","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"103500"}}