{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T07:22:25Z","timestamp":1776064945754,"version":"3.50.1"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T00:00:00Z","timestamp":1712707200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T00:00:00Z","timestamp":1712707200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s00138-024-01535-1","type":"journal-article","created":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T11:01:46Z","timestamp":1712746906000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["AP-TransNet: a polarized transformer based aerial human action recognition framework"],"prefix":"10.1007","volume":"35","author":[{"given":"Chhavi","family":"Dhiman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anunay","family":"Varshney","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ved","family":"Vyapak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,4,10]]},"reference":[{"key":"1535_CR1","doi-asserted-by":"crossref","unstructured":"Reshma, R., Ramesh, T., Sathishkumar, P.: Security situational aware intelligent road traffic monitoring using UAVs. In: International Conference on VLSI Systems, Architectures, Technology and Applications (VLSI-SATA), Bengaluru, India, (2016)","DOI":"10.1109\/VLSI-SATA.2016.7593027"},{"key":"1535_CR2","unstructured":"Kaff, A.A., Moreno, F.M., Jos\u00e9, L.J.S., Garc\u00eda, F., Mart\u00edn, D., Escalera, A.D.l., Nieva, A., Garc\u00e9a, J.L.M.: VBII-UAV: Vision-Based Infrastructure Inspection-UAV. In: Recent Advances in Information Systems and Technologies. (WorldCIST 2017) Advances in Intelligent Systems and Computing, Porto Santo Island, Madeira, Portugal, (2017)"},{"issue":"1","key":"1535_CR3","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1109\/MPRV.2017.11","volume":"16","author":"M Erdelj","year":"2012","unstructured":"Erdelj, M., Natalizio, E., Chowdhu, K.R., Akyildiz, I.F.: Help from the Sky: leveraging UAVs for Disaster Management. IEEE Pervasive Comput. 16(1), 24\u201332 (2012)","journal-title":"IEEE Pervasive Comput."},{"issue":"1","key":"1535_CR4","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/TSMCC.2012.2220133","volume":"43","author":"JM Peschel","year":"2013","unstructured":"Peschel, J.M., Murphy, R.R.: On the human-machine interaction of unmanned aerial system mission specialists. IEEE Trans. Human-Machine Syst. 43(1), 53\u201362 (2013)","journal-title":"IEEE Trans. Human-Machine Syst."},{"key":"1535_CR5","doi-asserted-by":"crossref","unstructured":"San, K.T., Mun, S.J., Choe, Y.H., Chang, Y.S.: UAV Delivery Monitoring System. In: MATEC Web of Conferences, (2018)","DOI":"10.1051\/matecconf\/201815104011"},{"issue":"1","key":"1535_CR6","doi-asserted-by":"publisher","first-page":"033542","DOI":"10.1117\/1.3216822","volume":"3","author":"A Rango","year":"2009","unstructured":"Rango, A., Laliberte, A., Herrick, J.E., Winters, C., Havstad, K., Steele, C., Browning, D.: Unmanned aerial vehicle-based remote sensing for rangeland assessment, monitoring, and management. J. Appl. Remote. Sens. 3(1), 033542 (2009)","journal-title":"J. Appl. Remote. Sens."},{"issue":"5","key":"1535_CR7","doi-asserted-by":"publisher","first-page":"3887","DOI":"10.1007\/s10462-020-09943-1","volume":"54","author":"Y Akbari","year":"2021","unstructured":"Akbari, Y., Almaadeed, N., Maadeed, S.A., Elharrouss, O.: Applications, databases and open computer vision research from drone videos and images: a survey. Artif. Int. Rev. 54(5), 3887\u20133938 (2021)","journal-title":"Artif. Int. Rev."},{"issue":"2","key":"1535_CR8","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1016\/j.clsr.2012.01.005","volume":"28","author":"RL Finn","year":"2012","unstructured":"Finn, R.L., Wright, D.: Unmanned aircraft systems: Surveillance, ethics and privacy in civil applications. Comput. Law Secur. Rev. 28(2), 184\u2013194 (2012)","journal-title":"Comput. Law Secur. Rev."},{"issue":"2","key":"1535_CR9","first-page":"213","volume":"11","author":"HC Kim","year":"2016","unstructured":"Kim, H.C., Lim, C.S., Lee, C.S., Choi, J.H.: Introduction of real-time video surveillance system using UAV. J. Commun. 11(2), 213\u2013220 (2016)","journal-title":"J. Commun."},{"key":"1535_CR10","doi-asserted-by":"crossref","unstructured":"Bozcan, I., Kayacan, E.: UAV-AdNet: Unsupervised Anomaly Detection using Deep Neural Networks for Aerial Surveillance. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), Las Vegas, USA, (2020)","DOI":"10.1109\/IROS45743.2020.9341790"},{"key":"1535_CR11","doi-asserted-by":"crossref","unstructured":"Dilshad, N., Hwang, J., Song, J., Sung, N.: Applications and Challenges in Video Surveillance via Drone: A Brief Survey. In: International Conference on Information and Communication Technology Convergence (ICTC), Jeju Islan, Korea, (2020)","DOI":"10.1109\/ICTC49870.2020.9289536"},{"key":"1535_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren S., Sun, J.: Deep Residual Learning for Image Recognition. In: IEEE conference on Computer Vision and Pattern Recognition (CVPR), Las Vegas, Nevada, (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"1","key":"1535_CR13","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/MSP.2017.2764116","volume":"35","author":"R Ranjan","year":"2018","unstructured":"Ranjan, R., Sankaranarayanan, S., Bansal, A., Bodla, N., Chen, J.C., Patel, V.M., Castillo, C.D., Chellappa, R.: Deep learning for understanding faces: machines may be just as good, or better, than humans. IEEE Signal Process. Mag. 35(1), 66\u201383 (2018)","journal-title":"IEEE Signal Process. Mag."},{"issue":"4","key":"1535_CR14","doi-asserted-by":"publisher","first-page":"939","DOI":"10.1109\/TMM.2017.2759504","volume":"20","author":"Z Qiu","year":"2018","unstructured":"Qiu, Z., Yao, T., Mei, T.: Learning deep spatio-temporal dependence for semantic video segmentation. IEEE Trans. Multimed. 20(4), 939\u2013949 (2018)","journal-title":"IEEE Trans. Multimed."},{"key":"1535_CR15","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3150917","author":"P Jin","year":"2022","unstructured":"Jin, P., Mou, L., Hua, Y., Xia, G.S., Zhu, X.X.: FuTH-Net: fusing temporal relations and holistic features for aerial video classification. IEEE Trans. Geosci. Remote Sensing (2022). https:\/\/doi.org\/10.1109\/TGRS.2022.3150917","journal-title":"IEEE Trans. Geosci. Remote Sensing"},{"key":"1535_CR16","doi-asserted-by":"crossref","unstructured":"Hou, R., Chen, C., Shah, M.: Tube Convolutional Neural Network (T-CNN) for Action Detection in Videos. In: IEEE International Conference on Computer Vision (ICCV), Venice, Italy, (2017)","DOI":"10.1109\/ICCV.2017.620"},{"key":"1535_CR17","doi-asserted-by":"publisher","DOI":"10.1049\/ipr2.12725","author":"Z Yang","year":"2022","unstructured":"Yang, Z., An, G., Zhang, R., Zheng, Z., Ruan, Q.: SRI3D: Two-stream inflated 3D ConvNet based on sparse regularization for action recognition. IET Image Process. (2022). https:\/\/doi.org\/10.1049\/ipr2.12725","journal-title":"IET Image Process."},{"key":"1535_CR18","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1016\/j.future.2021.06.045","volume":"125","author":"K Muhammad","year":"2021","unstructured":"Muhammad, K., Ullah, M.A., Imran, A.S., Sajjad, M., Kiran, M.S., Sannino, G., Albuquerque, V.H.C.D.: Human action recognition using attention based LSTM network with dilated CNN features. Future Gener. Comput. Syst. 125, 820\u2013830 (2021)","journal-title":"Future Gener. Comput. Syst."},{"issue":"3","key":"1535_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3441628","volume":"17","author":"C Dhiman","year":"2020","unstructured":"Dhiman, C., Vishwakarma, D.K., Aggarwal, P.: Part-wise Spatio-temporal attention driven CNN based 3D human action recognition. ACM Trans. Multimed. Comput. Commun. Appl. 17(3), 1\u201324 (2020)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1535_CR20","doi-asserted-by":"publisher","first-page":"3835","DOI":"10.1109\/TIP.2020.2965299","volume":"29","author":"C Dhiman","year":"2020","unstructured":"Dhiman, C., Vishwakarma, D.K.: View-invariant deep architecture for human action recognition using two-stream motion and shape temporal dynamics. IEEE Trans. Image Process. (TIP) 29, 3835\u20133844 (2020)","journal-title":"IEEE Trans. Image Process. (TIP)"},{"issue":"6","key":"1535_CR21","doi-asserted-by":"publisher","first-page":"1510","DOI":"10.1109\/TPAMI.2017.2712608","volume":"40","author":"G Varol","year":"2018","unstructured":"Varol, G., Laptev, I., Schmid, C.: Long-term temporal convolutions for action recognition. IEEE Trans. Pattern Recog. Machine Intell. 40(6), 1510\u20131517 (2018)","journal-title":"IEEE Trans. Pattern Recog. Machine Intell."},{"key":"1535_CR22","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, USA, (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"1535_CR23","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A Closer Look at Spatiotemporal Convolutions for Action Recognition. In: IEEE International Conference on Pattern Recognition (CVPR) , Salt Lake City, Utah, (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"1535_CR24","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning Spatiotemporal Features with 3D Convolutional Networks. In: IEEE International Conference on Computer Vision (ICCV), Santiago, Chile, (2015)","DOI":"10.1109\/ICCV.2015.510"},{"issue":"4","key":"1535_CR25","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1109\/TPAMI.2016.2599174","volume":"39","author":"J Donahue","year":"2017","unstructured":"Donahue, J., Hendricks, L.A., Rohrbach, M., Venugopalan, S., Guadarrama, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 677\u2013691 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1535_CR26","unstructured":"Ng, J.Y.-H., Hausknecht, M., Vijayanarasimhan, S., Vinyals, O., Monga, R., Toderici, G.: Beyond Short Snippets: Deep Networks for Video Classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR),, Boston, MA, USA, (2015)"},{"key":"1535_CR27","doi-asserted-by":"publisher","first-page":"122583","DOI":"10.1109\/ACCESS.2019.2938249","volume":"7","author":"R Geraldes","year":"2019","unstructured":"Geraldes, R., Gon\u00e7alves, A., Lai, T., Villerabel, M., Deng, W., Salta, A., Nakayama, K.: UAV-based situational awareness system using deep learning. IEEE Access 7, 122583\u2013122594 (2019)","journal-title":"IEEE Access"},{"key":"1535_CR28","doi-asserted-by":"publisher","first-page":"2259","DOI":"10.1007\/s10462-020-09904-8","volume":"54","author":"P Pareek","year":"2021","unstructured":"Pareek, P., Thakkar, A.: A survey on video-based human action recognition: recent updates, datasets, challenges, and applications. Artif. Intell. Rev. 54, 2259\u20132232 (2021)","journal-title":"Artif. Intell. Rev."},{"key":"1535_CR29","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.engappai.2018.08.014","volume":"77","author":"C Dhiman","year":"2019","unstructured":"Dhiman, C., Vishwakarma, D.K.: A review of state-of-the-art techniques for abnormal human activity recognition. Eng. Appl. Artif. Intell. 77, 21\u201345 (2019)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1535_CR30","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3D: Expanding Architectures for Efficient Video Recognition. In: Conference on Computer Vision and Pattern Recognition (CVPR), Virtual, (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"1535_CR31","unstructured":"Behl, H.S., Sapienza, M., Sin, G., Saha, S., Cuzzolin, F., Torr, P.H.S.: Incremental Tube Construction for Human Action Detection. In: British Machine Vision Conference (BMVC) , Northumbria University Newcastle, (2018)"},{"key":"1535_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103186","author":"W Sultani","year":"2021","unstructured":"Sultani, W., Shah, M.: Human action recognition in drone videos using a few aerial training examples. Comp. Vision Pattern Recogn. (2021). https:\/\/doi.org\/10.1016\/j.cviu.2021.103186","journal-title":"Comp. Vision Pattern Recogn."},{"key":"1535_CR33","doi-asserted-by":"crossref","unstructured":"Zhou, X., Liu, S., Pavlakos, G., Kumar, V., Daniilidis, K.: Human Motion Capture Using a Drone. In: IEEE International Conference on Robotics and Automation (ICRA), Brisbane, Australia, (2018)","DOI":"10.1109\/ICRA.2018.8462830"},{"key":"1535_CR34","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Kristina, K.: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In: Computation and Language, (2019)"},{"key":"1535_CR35","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative. In: Pre-print, (2018)"},{"key":"1535_CR36","unstructured":"Liu, M.O.N.G.J.D.M.J.D.C.O.L.M.L.L.Z.V.S.Y.: RoBERTa: A Robustly Optimized BERT Pretraining Approach. In: preprint arXiv:1907.11692 , (2019)"},{"key":"1535_CR37","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv:2010.11929, (2020)"},{"key":"1535_CR38","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9g, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, PMLR, (2021)"},{"key":"1535_CR39","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagor, S.: End-to-End Object Detection with Transformers. In: European conference on computer vision, (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1535_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xu, Z., Wang, X., Shen, C., Cheng, B., Shen, H., Xia, H.: End-to-end video instance segmentation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (2021)","DOI":"10.1109\/CVPR46437.2021.00863"},{"key":"1535_CR41","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Luci, M.: ViViT: A Video Vision Transformer. In: International conference on Computer Vision (ICCV), (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"1535_CR42","doi-asserted-by":"crossref","unstructured":"Chen, J., Ho, C.M.: MM-ViT: Multi-Modal Video Transformer for Compressed Video Action Recognition. In: WACV, (2022)","DOI":"10.1109\/WACV51458.2022.00086"},{"key":"1535_CR43","doi-asserted-by":"publisher","first-page":"2217","DOI":"10.3390\/s21062217","volume":"6","author":"B Zhao","year":"2021","unstructured":"Zhao, B., Wang, Y., Su, K., Ren, H., Sun, H.: reading pictures instead of looking\": RGB-D image-based action recognition via capsule network and kalman filter. Sensors (Basel) 6, 2217 (2021)","journal-title":"Sensors (Basel)"},{"key":"1535_CR44","doi-asserted-by":"crossref","unstructured":"He, J., Gao, S.: TBSN: Sparse-Transformer Based Siamese Network for Few-Shot Action Recognition. In: 2nd Information Communication Technologies Conference (ICTC), Nanjing, China, (2021)","DOI":"10.1109\/ICTC51749.2021.9441568"},{"key":"1535_CR45","doi-asserted-by":"crossref","unstructured":"Akkaya, I.B. Kathiresan, S.S., Arani, E., Zonooz, B.: Enhancing Performance of Vision Transformers on Small Datasets through Local Inductive Bias Incorporation. In: arXiv:2305.08551 [cs.CV], (2023)","DOI":"10.1016\/j.patcog.2024.110510"},{"key":"1535_CR46","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., Rabinovich, A.: Going deeper with convolutions. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Boston, MA, USA, (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1535_CR47","doi-asserted-by":"crossref","unstructured":"Liu, H., Liu, F., Fan X., Huang, D.: Polarized Self-Attention: Towards High-quality Pixel-wise Regression. In: arXiv:2107.00782v2 [cs.CV], (2021)","DOI":"10.1016\/j.neucom.2022.07.054"},{"key":"1535_CR48","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., Kweon, I.S.: CBAM: Convolutional block attention module. In: ECCV, Munich, (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1535_CR49","doi-asserted-by":"crossref","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., Lu, H.: Dual attention network for scene segmentation. In: CVPR, Long Beach, CA, (2019)","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1535_CR50","unstructured":"Hu, J., Shen, L., Albanie, S., Sun, G., Vedaldi, A.: Gather-excite: Exploiting feature context in convolutional neural networks. In: NIPS, Montreal, Canada, (2018)"},{"key":"1535_CR51","doi-asserted-by":"crossref","unstructured":"Cao, Y., Xu, J., Lin, S., Wei, F., Hu, H.: Gcnet: Non-local networks meet squeeze-excitation networks and beyond. In: ICCV, Seoul, Korea, (2019)","DOI":"10.1109\/ICCVW.2019.00246"},{"key":"1535_CR52","doi-asserted-by":"publisher","DOI":"10.3390\/drones3040082","author":"AG Perera","year":"2019","unstructured":"Perera, A.G., Law, Y.W., Chah, J.: Drone-action: an outdoor recorded drone video dataset for action recognition. Drones (2019). https:\/\/doi.org\/10.3390\/drones3040082","journal-title":"Drones"},{"key":"1535_CR53","unstructured":"Nagendran, A., Harper, D., Shah, M.: Visual sensors and an inertial navigation system mounted on a helium balloon can collect high-definition video that is synchronized with metadata. In: SPIE : The international SOciety of optics and photonics, (2010)"},{"key":"1535_CR54","first-page":"1","volume":"99","author":"AG Perera","year":"2020","unstructured":"Perera, A.G., Law, Y.W., Ogunwa, T., Chahl, J.: A multiviewpoint outdoor dataset for human action recognition. IEEE Trans. Human-Machine Syst. 99, 1\u20139 (2020)","journal-title":"IEEE Trans. Human-Machine Syst."},{"key":"1535_CR55","unstructured":"Kingma, D., Ba, J.: Adam: A Method for Stochastic Optimization. In: arXiv, (2014)"},{"key":"1535_CR56","doi-asserted-by":"publisher","DOI":"10.4467\/20838476SI.16.004.6185","author":"K Janocha","year":"2017","unstructured":"Janocha, K., Czarnecki, W.M.: On loss functions for deep neural networks in classification. Theor. Foundat. Machine Learn. (TFML ) (2017). https:\/\/doi.org\/10.4467\/20838476SI.16.004.6185","journal-title":"Theor. Foundat. Machine Learn. (TFML )"},{"key":"1535_CR57","unstructured":"Zhang, Z., Sabuncu, M.R.: Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels. In: Conference on Neural Information Processing Systems (NeurIPS), Montr\u00e9al, Canada, (2018)"},{"key":"1535_CR58","unstructured":"Rodriguez, E.G. Ganem, G.L., Pleiss, G., Cunningham, J.P.: Uses and Abuses of the Cross-Entropy Loss: Case Studies in Modern Deep Learning. In: Proceedings on \"I Can't Believe It's Not Better!\" at NeurIPS Workshops, PMLR, (2020)"},{"key":"1535_CR59","unstructured":"Han, P., Abolfazl, R.: Fully Autonomous UAV-Based Action Recognition System Using Aerial Imagery. In: Advances in Visual Computing; Lecture Notes in Computer Science; Springer, Cham, Switzerland, (2020)"},{"issue":"3","key":"1535_CR60","doi-asserted-by":"publisher","first-page":"148","DOI":"10.3390\/drones7030148","volume":"7","author":"NA Othman","year":"2023","unstructured":"Othman, N.A., Aydin, I.: Development of a novel lightweight CNN model for classification of human actions in UAV-captured videos. Drones 7(3), 148 (2023)","journal-title":"Drones"},{"key":"1535_CR61","doi-asserted-by":"publisher","first-page":"103186","DOI":"10.1016\/j.cviu.2021.103186","volume":"206","author":"W Sultania","year":"2021","unstructured":"Sultania, W., Shah, M.: Human action recognition in drone videos using a few aerial training examples. Comp. Vision Image Understan. 206, 103186 (2021)","journal-title":"Comp. Vision Image Understan."},{"key":"1535_CR62","doi-asserted-by":"crossref","unstructured":"Jhuang, H., Gall, J., Zuffi, S., Schmid, C., Black, M.J.: Towards Understanding Action Recognition. In: IEEE International Conference on Computer Vision, Sydney, Australia, (2013)","DOI":"10.1109\/ICCV.2013.396"},{"key":"1535_CR63","doi-asserted-by":"crossref","unstructured":"Cheron, G., Laptev, I., Schmid, C.: PCNN: Pose-Based CNN Features for Action Recognition. In: IEEE International Conference on Computer Vision (ICCV), Santiago, Chile, (2015)","DOI":"10.1109\/ICCV.2015.368"},{"key":"1535_CR64","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3d: Expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern, (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"1535_CR65","doi-asserted-by":"crossref","unstructured":"Kothandaraman, D., Guan, T., Wang, X., Hu, S., Lin, M., Manocha, D.: FAR: Fourier aerial video recognition. In: arXiv:2203.10694, (2022)","DOI":"10.1007\/978-3-031-19836-6_37"},{"key":"1535_CR66","doi-asserted-by":"crossref","unstructured":"Wang, X., Xian, R., Guan, T., Melo, C.M.D. Nogar, S.M. Bera, A., Manocha, D.: AZTR: Aerial Video Action Recognition with Auto Zoom and Temporal. In: arXiv:2303.01589v1, (2023)","DOI":"10.1109\/ICRA48891.2023.10160564"},{"key":"1535_CR67","first-page":"1","volume":"99","author":"AG Perera","year":"2020","unstructured":"Perera, A.G., Law, Y.W., Ogunwa, T., Chahl, J.: A multi-viewpoint outdoor dataset for human action recognition. IEEE Trans. Human Machine Syst. 99, 1\u20139 (2020)","journal-title":"IEEE Trans. Human Machine Syst."},{"key":"1535_CR68","doi-asserted-by":"publisher","first-page":"107140","DOI":"10.1016\/j.patcog.2019.107140","volume":"100","author":"M Hazar","year":"2020","unstructured":"Hazar, M., Fatma, B., Mohamed, H.: Human activity recognition from UAV-captured video sequences. Pattern Recogn. 100, 107140 (2020)","journal-title":"Pattern Recogn."},{"key":"1535_CR69","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3182315","author":"N Aldahoul","year":"2022","unstructured":"Aldahoul, N., Karim, H.A., Sabri, A.Q.M., Tan, M.J.T., Momo, M.A., Fermin, J.L.: A comparison between various human detectors and cnn-based feature extractors for human activity recognition via aerial. IEEE Access (2022). https:\/\/doi.org\/10.1109\/ACCESS.2022.3182315","journal-title":"IEEE Access"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01535-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01535-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01535-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T04:22:04Z","timestamp":1715660524000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01535-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,10]]},"references-count":69,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["1535"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01535-1","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,10]]},"assertion":[{"value":"1 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 March 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 April 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare they have no financial interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"52"}}