{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T14:40:23Z","timestamp":1777300823930,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["62576346"],"award-info":[{"award-number":["62576346"]}]},{"name":"MOE Project of Key Research Institute of Humanities and Social Sciences","award":["22JJD110001"],"award-info":[{"award-number":["22JJD110001"]}]},{"name":"the research funds of Renmin University of China","award":["24XNKJ13"],"award-info":[{"award-number":["24XNKJ13"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3774904.3792197","type":"proceedings-article","created":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T13:28:36Z","timestamp":1777296516000},"page":"7013-7023","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Byte-token Enhanced Language Models for Temporal Point Processes Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6669-9190","authenticated-orcid":false,"given":"Quyu","family":"Kong","sequence":"first","affiliation":[{"name":"Independent Researcher, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0094-7143","authenticated-orcid":false,"given":"Yixuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Southeast University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8389-9182","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"additional","affiliation":[{"name":"Independent Researcher, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3046-5143","authenticated-orcid":false,"given":"Panrong","family":"Tong","sequence":"additional","affiliation":[{"name":"Independent Researcher, Hangzhou, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7913-4238","authenticated-orcid":false,"given":"Enqi","family":"Liu","sequence":"additional","affiliation":[{"name":"Independent Researcher, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0842-306X","authenticated-orcid":false,"given":"Feng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Center for Applied Statistics and School of Statistics, Renmin University of China, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2026,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"International conference on learning representations.","author":"Bae Wonho","year":"2023","unstructured":"Wonho Bae, Mohamed Osama Ahmed, Frederick Tung, and Gabriel L Oliveira. 2023. Meta temporal point processes. In International conference on learning representations."},{"key":"e_1_3_2_1_3_1","volume-title":"International Conference on Machine Learning. PMLR, 1692-1717","author":"Bao Fan","year":"2023","unstructured":"Fan Bao, Shen Nie, Kaiwen Xue, Chongxuan Li, Shi Pu, Yaole Wang, Gang Yue, Yue Cao, Hang Su, and Jun Zhu. 2023. One transformer fits all distributions in multi-modal diffusion at scale. In International Conference on Machine Learning. PMLR, 1692-1717."},{"key":"e_1_3_2_1_4_1","volume-title":"Clex: Continuous length extrapolation for large language models. arXiv preprint arXiv:2310.16450","author":"Chen Guanzheng","year":"2023","unstructured":"Guanzheng Chen, Xin Li, Zaiqiao Meng, Shangsong Liang, and Lidong Bing. 2023a. Clex: Continuous length extrapolation for large language models. arXiv preprint arXiv:2310.16450 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Extending context window of large language models via positional interpolation. arXiv preprint arXiv:2306.15595","author":"Chen Shouyuan","year":"2023","unstructured":"Shouyuan Chen, Sherman Wong, Liangjian Chen, and Yuandong Tian. 2023b. Extending context window of large language models via positional interpolation. arXiv preprint arXiv:2306.15595 (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2001-3_13"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Tri Dao Daniel Y. Fu Stefano Ermon Atri Rudra and Christopher R\u00e9. 2022. FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness. In Advances in Neural Information Processing Systems (NeurIPS).","DOI":"10.52202\/068431-1189"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939875"},{"key":"e_1_3_2_1_9_1","volume-title":"Shuang Li, Hongyuan Zha, and Le Song.","author":"Farajtabar Mehrdad","year":"2015","unstructured":"Mehrdad Farajtabar, Yichen Wang, Manuel Gomez Rodriguez, Shuang Li, Hongyuan Zha, and Le Song. 2015. COEVOLVE: A Joint Point Process Model for Information Diffusion and Network Co-evolution. In NeurIPS."},{"key":"e_1_3_2_1_10_1","unstructured":"Chaoyou Fu Haojia Lin Xiong Wang Yi-Fan Zhang Yunhang Shen Xiaoyu Liu Yangze Li Zuwei Long Heting Gao Ke Li et al. 2025. VITA-1.5: Towards GPT-4o Level Real-Time Vision and Speech Interaction. arXiv preprint arXiv:2501.01957 (2025)."},{"key":"e_1_3_2_1_11_1","unstructured":"Aaron Hurst Adam Lerer Adam P Goucher Adam Perelman Aditya Ramesh Aidan Clark AJ Ostrow Akila Welihinda Alan Hayes Alec Radford et al. 2024. Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v8i1.14550"},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Learning Representations.","author":"Jin Ming","year":"2024","unstructured":"Ming Jin, Shiyu Wang, Lintao Ma, Zhixuan Chu, James Y Zhang, Xiaoming Shi, Pin-Yu Chen, Yuxuan Liang, Yuan-Fang Li, Shirui Pan, and Qingsong Wen. 2024. Time-LLM: Time series forecasting by reprogramming large language models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_14_1","volume-title":"SNAP Datasets: Stanford large network dataset collection. Retrieved","author":"Jure Leskovec","year":"2021","unstructured":"Leskovec Jure. 2014. SNAP Datasets: Stanford large network dataset collection. Retrieved December 2021 from http:\/\/snap. stanford. edu\/data (2014)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583481"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Quyu Kong Marian-Andrei Rizoiu and Lexing Xie. 2020. Describing and predicting online items with reshare cascades via dual mixture self-exciting processes. In CIKM.","DOI":"10.1145\/3340531.3411861"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_18_1","volume-title":"Sparse Transformer Hawkes Process for Long Event Sequences. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases.","author":"Li Zhuoqun","year":"2023","unstructured":"Zhuoqun Li and Mingxuan Sun. 2023. Sparse Transformer Hawkes Process for Long Event Sequences. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases."},{"key":"e_1_3_2_1_19_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81."},{"key":"e_1_3_2_1_20_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_21_1","unstructured":"Nelson F. Liu Kevin Lin John Hewitt Ashwin Paranjape Michele Bevilacqua Fabio Petroni and Percy Liang. 2023. Lost in the Middle: How Language Models Use Long Contexts. arXiv:2307.03172."},{"key":"e_1_3_2_1_22_1","volume-title":"TPP-LLM: Modeling Temporal Point Processes by Efficiently Fine-Tuning Large Language Models. arXiv preprint","author":"Liu Zefang","year":"2024","unstructured":"Zefang Liu and Yinzhu Quan. 2024. TPP-LLM: Modeling Temporal Point Processes by Efficiently Fine-Tuning Large Language Models. arXiv preprint (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"Andrew D White, and Philippe Schwaller.","author":"Bran Andres M.","year":"2024","unstructured":"Andres M. Bran, Sam Cox, Oliver Schilter, Carlo Baldassari, Andrew D White, and Philippe Schwaller. 2024. Augmenting large language models with chemistry tools. Nature Machine Intelligence (2024), 1-11."},{"key":"e_1_3_2_1_24_1","unstructured":"Hongyuan Mei and Jason Eisner. 2017. The Neural Hawkes Process: A Neurally Self-Modulating Multivariate Point Process. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_25_1","volume-title":"International conference on learning representations.","author":"Mei Hongyuan","year":"2021","unstructured":"Hongyuan Mei, Chenghao Yang, and Jason Eisner. 2021. Transformer embeddings of irregularly spaced events and their participants. In International conference on learning representations."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671720"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983812"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v12i1.15030"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1018"},{"key":"e_1_3_2_1_30_1","unstructured":"OpenAI. 2022. Introducing Chatgpt. https:\/\/openai.com\/index\/chatgpt"},{"key":"e_1_3_2_1_31_1","unstructured":"Artidoro Pagnoni Ram Pasunuru Pedro Rodriguez John Nguyen Benjamin Muller Margaret Li Chunting Zhou Lili Yu Jason Weston Luke Zettlemoyer et al. 2024. Byte Latent Transformer: Patches Scale Better Than Tokens. arXiv preprint arXiv:2412.09871 (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"Lecture notes: Temporal point processes and the conditional intensity function. arXiv preprint arXiv:1806.00221","author":"Rasmussen Jakob Gulddahl","year":"2018","unstructured":"Jakob Gulddahl Rasmussen. 2018. Lecture notes: Temporal point processes and the conditional intensity function. arXiv preprint arXiv:1806.00221 (2018)."},{"key":"e_1_3_2_1_33_1","volume-title":"Intensity-Free Learning of Temporal Point Processes. In International Conference on Learning Representations.","author":"Shchur Oleksandr","year":"2020","unstructured":"Oleksandr Shchur, Marin Bilos, and Stephan G\u00fcnnemann. 2020. Intensity-Free Learning of Temporal Point Processes. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_34_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Shi Xiaoming","year":"2024","unstructured":"Xiaoming Shi, Siqiao Xue, Kangrui Wang, Fan Zhou, James Zhang, Jun Zhou, Chenhao Tan, and Hongyuan Mei. 2024. Language models can improve event prediction by few-shot abductive reasoning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_35_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Tang Zineng","year":"2024","unstructured":"Zineng Tang, Ziyi Yang, Chenguang Zhu, Michael Zeng, and Mohit Bansal. 2024. Any-to-any generation via composable diffusion. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al.","author":"Team Gemini","year":"2024","unstructured":"Gemini Team, Petko Georgiev, Ving Ian Lei, Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al., 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)."},{"key":"e_1_3_2_1_37_1","unstructured":"Gemma Team Aishwarya Kamath Johan Ferret Shreya Pathak Nino Vieillard Ramona Merhej Sarah Perrin Tatiana Matejovicova Alexandre Ram\u00e9 Morgane Rivi\u00e8re et al. 2025. Gemma 3 technical report. arXiv preprint arXiv:2503.19786 (2025)."},{"key":"e_1_3_2_1_38_1","volume-title":"Attention is all you need. NeurIPS","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. NeurIPS (2017)."},{"key":"e_1_3_2_1_39_1","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et al. 2024. Qwen2-vl: Enhancing vision-language model's perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_40_1","unstructured":"Chris Whong. 2014. FOILing NYC's taxi trip data."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Shuai Xiao Junchi Yan Xiaokang Yang Hongyuan Zha and Stephen M Chu. 2017. Modeling the intensity function of point process via recurrent neural networks. In AAAI.","DOI":"10.1609\/aaai.v31i1.10724"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Qi Xin Quyu Kong Hongyi Ji Yue Shen Yuqi Liu Yan Sun Zhilin Zhang Zhaorong Li Xunlong Xia Bing Deng et al. 2024. BioInformatics Agent (BIA): Unleashing the Power of Large Language Models to Reshape Bioinformatics Workflow. bioRxiv (2024) 2024-05.","DOI":"10.1101\/2024.05.22.595240"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00461"},{"key":"e_1_3_2_1_44_1","volume-title":"Easytpp: Towards open benchmarking the temporal point processes. arXiv preprint arXiv:2307.08097","author":"Xue Siqiao","year":"2023","unstructured":"Siqiao Xue, Xiaoming Shi, Zhixuan Chu, Yan Wang, Fan Zhou, Hongyan Hao, Caigao Jiang, Chen Pan, Yi Xu, James Y Zhang, et al., 2023a. Easytpp: Towards open benchmarking the temporal point processes. arXiv preprint arXiv:2307.08097 (2023)."},{"key":"e_1_3_2_1_45_1","first-page":"34641","article-title":"Hypro: A hybridly normalized probabilistic model for long-horizon prediction of event sequences","volume":"35","author":"Xue Siqiao","year":"2022","unstructured":"Siqiao Xue, Xiaoming Shi, James Zhang, and Hongyuan Mei. 2022b. Hypro: A hybridly normalized probabilistic model for long-horizon prediction of event sequences. Advances in Neural Information Processing Systems, Vol. 35 (2022), 34641-34650.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_46_1","unstructured":"Siqiao Xue Yan Wang Zhixuan Chu Xiaoming Shi Caigao Jiang Hongyan Hao Gangwei Jiang Xiaoyun Feng James Zhang and Jun Zhou. 2023b. Prompt-augmented temporal point process for streaming event sequence. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_47_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang et al. 2024a. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"e_1_3_2_1_48_1","unstructured":"An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei et al. 2024b. Qwen2. 5 Technical Report. arXiv preprint arXiv:2412.15115 (2024)."},{"key":"e_1_3_2_1_49_1","unstructured":"Yuan Yao Tianyu Yu Ao Zhang Chongyi Wang Junbo Cui Hongji Zhu Tianchi Cai Haoyu Li Weilin Zhao Zhihui He et al. 2024. MiniCPM-V: A GPT-4V Level MLLM on Your Phone. arXiv preprint arXiv:2408.01800 (2024)."},{"key":"e_1_3_2_1_50_1","unstructured":"Qiang Zhang Aldo Lipani Omer Kirnap and Emine Yilmaz. 2020. Self-attentive Hawkes process. In ICML."},{"key":"e_1_3_2_1_51_1","unstructured":"Ke Zhou Hongyuan Zha and Le Song. 2013. Learning triggering kernels for multi-dimensional hawkes processes. In ICML."},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Artificial Intelligence and Statistics.","author":"Zhu Shixiang","year":"2021","unstructured":"Shixiang Zhu, Minghe Zhang, Ruyi Ding, and Yao Xie. 2021. Deep fourier kernel for self-attentive point processes. In International Conference on Artificial Intelligence and Statistics."},{"key":"e_1_3_2_1_53_1","unstructured":"Simiao Zuo Haoming Jiang Zichong Li Tuo Zhao and Hongyuan Zha. 2020. Transformer hawkes process. In ICML."}],"event":{"name":"WWW '26: The ACM Web Conference 2026","location":"Dubai United Arab Emirates","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774904.3792197","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T13:46:02Z","timestamp":1777297562000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774904.3792197"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":53,"alternative-id":["10.1145\/3774904.3792197","10.1145\/3774904"],"URL":"https:\/\/doi.org\/10.1145\/3774904.3792197","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}