{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T10:04:03Z","timestamp":1778407443384,"version":"3.51.4"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031434174","type":"print"},{"value":"9783031434181","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43418-1_32","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T09:02:26Z","timestamp":1694854946000},"page":"533-549","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Comparing Apples and\u00a0Oranges? On\u00a0the\u00a0Evaluation of\u00a0Methods for\u00a0Temporal Knowledge Graph Forecasting"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1914-6723","authenticated-orcid":false,"given":"Julia","family":"Gastinger","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8132-5920","authenticated-orcid":false,"given":"Timo","family":"Sztyler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2522-1209","authenticated-orcid":false,"given":"Lokesh","family":"Sharma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anett","family":"Schuelke","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0209-3859","authenticated-orcid":false,"given":"Heiner","family":"Stuckenschmidt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"key":"32_CR1","unstructured":"Bordes, A., Usunier, N., Garc\u00eda-Dur\u00e1n, A., Weston, J., Yakhnenko, O.: Translating embeddings for modeling multi-relational data. In: Burges, C.J.C., Bottou, L., Ghahramani, Z., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a Meeting Held 5\u20138 December 2013, Lake Tahoe, Nevada, United States, pp. 2787\u20132795 (2013)"},{"key":"32_CR2","unstructured":"Bordes, A., Weston, J., Collobert, R., Bengio, Y.: Learning structured embeddings of knowledge bases. In: Burgard, W., Roth, D. (eds.) Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2011, San Francisco, California, USA, August 7\u201311, 2011. AAAI Press (2011)"},{"key":"32_CR3","unstructured":"Boschee, E., Lautenschlager, J., O\u2019Brien, S., Shellman, S., Starz, J., Ward, M.: ICEWS Coded Event Data (2015)"},{"key":"32_CR4","unstructured":"Brownlee, J.: Deep learning for time series forecasting: predict the future with MLPs, CNNs and LSTMs in Python. Machine Learning Mastery (2018)"},{"key":"32_CR5","unstructured":"Errica, F., Podda, M., Bacciu, D., Micheli, A.: A fair comparison of graph neural networks for graph classification. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26\u201330, 2020 (2020)"},{"key":"32_CR6","doi-asserted-by":"crossref","unstructured":"Garc\u00eda-Dur\u00e1n, A., Duman\u010di\u0107, S., Niepert, M.: Learning sequence encoders for temporal knowledge graph completion. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, Brussels, Belgium, October-November pp. 4816\u20134821. Association for Computational Linguistics (2018)","DOI":"10.18653\/v1\/D18-1516"},{"key":"32_CR7","unstructured":"Han, Z., Chen, P., Ma, Y., Tresp, V.: Explainable subgraph reasoning for forecasting on temporal knowledge graphs. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, 3\u20137 May 2021 (2021)"},{"key":"32_CR8","doi-asserted-by":"crossref","unstructured":"Han, Z., Ding, Z., Ma, Y., Gu, Y., Tresp, V.: Learning neural ordinary equations for forecasting future links on temporal knowledge graphs. In: Moens, M., Huang, X., Specia, L., Yih, S.W. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event\/Punta Cana, Dominican Republic, 7\u201311 November 2021, pp. 8352\u20138364. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.658"},{"key":"32_CR9","unstructured":"Han, Z., Ma, Y., Wang, Y., G\u00fcnnemann, S., Tresp, V.: Graph Hawkes neural network for forecasting on temporal knowledge graphs. In: Das, D., Hajishirzi, H., McCallum, A., Singh, S. (eds.) Conference on Automated Knowledge Base Construction, AKBC 2020, Virtual, 22\u201324 June 2020 (2020)"},{"key":"32_CR10","doi-asserted-by":"crossref","unstructured":"Han, Z., Zhang, G., Ma, Y., Tresp, V.: Time-dependent entity embedding is not all you need: a re-evaluation of temporal knowledge graph completion models under a unified framework. In: Moens, M., Huang, X., Specia, L., Yih, S.W. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event\/Punta Cana, Dominican Republic, 7\u201311 November 2021, pp. 8104\u20138118. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.639"},{"key":"32_CR11","doi-asserted-by":"crossref","unstructured":"Jin, W., Qu, M., Jin, X., Ren, X.: Recurrent event network: autoregressive structure inference over temporal knowledge graphs. arXiv preprint arXiv:1904.05530 (2019). preprint version","DOI":"10.18653\/v1\/2020.emnlp-main.541"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Jin, W., Qu, M., Jin, X., Ren, X.: Recurrent event network: autoregressive structure inference over temporal knowledge graphs. In: Webber, B., Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP 2020, Online, 16\u201320 November 2020, pp. 6669\u20136683. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.541"},{"key":"32_CR13","series-title":"Models and Applications","doi-asserted-by":"crossref","DOI":"10.1002\/0471722065","volume-title":"Continuous Multivariate Distributions","author":"S Kotz","year":"2000","unstructured":"Kotz, S., Balakrishnan, N., Johnson, N.L.: Continuous Multivariate Distributions. Models and Applications, vol. 1. Wiley, New York (2000)"},{"key":"32_CR14","doi-asserted-by":"crossref","unstructured":"Leblay, J., Chekol, M.W.: Deriving validity time in knowledge graph. In: Champin, P., Gandon, F., Lalmas, M., Ipeirotis, P.G. (eds.) Companion of the The Web Conference 2018 on The Web Conference 2018, WWW 2018, Lyon, France, 23\u201327 April 2018, pp. 1771\u20131776. ACM (2018)","DOI":"10.1145\/3184558.3191639"},{"key":"32_CR15","unstructured":"Leetaru, K., Schrodt, P.A.: Gdelt: global data on events, location, and tone, 1979\u20132012. In: ISA Annual Convention, pp. 1\u201349. Citeseer (2013)"},{"key":"32_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Complex evolutional pattern learning for temporal knowledge graph reasoning. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), Dublin, Ireland, May 2022, pp. 290\u2013296. Association for Computational Linguistics (2022)","DOI":"10.18653\/v1\/2022.acl-short.32"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Search from history and reason for future: two-stage reasoning on temporal knowledge graphs. In: Zong, C., Xia, F., Li, W., Navigli, R. (eds.) Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, ACL\/IJCNLP 2021, (Volume 1: Long Papers), Virtual Event, 1\u20136 August 2021, pp. 4732\u20134743. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.acl-long.365"},{"key":"32_CR18","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Temporal knowledge graph reasoning based on evolutional representation learning. In: Diaz, F., Shah, C., Suel, T., Castells, P., Jones, R., Sakai, T. (eds.) SIGIR 2021: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 11\u201315 July 2021, pp. 408\u2013417. ACM (2021)","DOI":"10.1145\/3404835.3462963"},{"key":"32_CR19","unstructured":"Liao, T., Taori, R., Raji, I.D., Schmidt, L.: Are we learning yet? A meta review of evaluation failures across machine learning. In: Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2) (2021)"},{"key":"32_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Y., Ma, Y., Hildebrandt, M., Joblin, M., Tresp, V.: Tlogic: temporal logical rules for explainable link forecasting on temporal knowledge graphs. In: Thirty-Sixth AAAI Conference on Artificial Intelligence, AAAI 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, IAAI 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, EAAI 2022 Virtual Event, 22 February\u20131 March 2022, pp. 4120\u20134127. AAAI Press (2022)","DOI":"10.1609\/aaai.v36i4.20330"},{"key":"32_CR21","unstructured":"Mahdisoltani, F., Biega, J.A., Suchanek, F.M.: Yago3: a knowledge base from multilingual Wikipedia\u2019s. In: CIDR (2015)"},{"issue":"3","key":"32_CR22","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1109\/TNN.2008.2010350","volume":"20","author":"A Micheli","year":"2009","unstructured":"Micheli, A.: Neural network for graphs: a contextual constructive approach. IEEE Trans. Neural Networks 20(3), 498\u2013511 (2009)","journal-title":"IEEE Trans. Neural Networks"},{"issue":"2","key":"32_CR23","doi-asserted-by":"publisher","first-page":"14:1","DOI":"10.1145\/3424672","volume":"15","author":"A Rossi","year":"2021","unstructured":"Rossi, A., Barbosa, D., Firmani, D., Matinata, A., Merialdo, P.: Knowledge graph embedding for link prediction: a comparative analysis. ACM Trans. Knowl. Discov. Data 15(2), 14:1-14:49 (2021)","journal-title":"ACM Trans. Knowl. Discov. Data"},{"issue":"1","key":"32_CR24","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2009","unstructured":"Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE Trans. Neural Networks 20(1), 61\u201380 (2009)","journal-title":"IEEE Trans. Neural Networks"},{"key":"32_CR25","unstructured":"Shchur, O., Mumme, M., Bojchevski, A., G\u00fcnnemann, S.: Pitfalls of graph neural network evaluation. In: Relational Representation Learning Workshop (R2L 2018), NeurIPS, Montr\u00e9al, Canada (2018)"},{"key":"32_CR26","doi-asserted-by":"crossref","unstructured":"Sun, H., Zhong, J., Ma, Y., Han, Z., He, K.: Timetraveler: reinforcement learning for temporal knowledge graph forecasting. In: Moens, M., Huang, X., Specia, L., Yih, S.W. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event\/Punta Cana, Dominican Republic, 7\u201311 November 2021, pp. 8306\u20138319. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.655"},{"key":"32_CR27","doi-asserted-by":"crossref","unstructured":"Sun, Z., Vashishth, S., Sanyal, S., Talukdar, P.P., Yang, Y.: A re-evaluation of knowledge graph completion methods. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J.R. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, 5\u201310 July 2020, pp. 5516\u20135522. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.489"},{"key":"32_CR28","unstructured":"Trivedi, R., Dai, H., Wang, Y., Song, L.: Know-evolve: deep temporal reasoning for dynamic knowledge graphs. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning, ICML 2017. Proceedings of Machine Learning Research, Sydney, NSW, Australia, 6\u201311 August 2017, vol. 70, pp. 3462\u20133471. PMLR (2017)"},{"key":"32_CR29","doi-asserted-by":"crossref","unstructured":"Widjaja, H., et al.: KGxBoard: explainable and interactive leaderboard for evaluation of knowledge graph completion models. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Abu Dhabi, UAE, December 2022, pp. 338\u2013350. Association for Computational Linguistics (2022)","DOI":"10.18653\/v1\/2022.emnlp-demos.34"},{"key":"32_CR30","doi-asserted-by":"crossref","unstructured":"Zhu, C., Chen, M., Fan, C., Cheng, G., Zhang, Y.: Learning from history: modeling temporal knowledge graphs with sequential copy-generation networks. In: Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, 2\u20139 February 2021, pp. 4732\u20134740. AAAI Press (2021)","DOI":"10.1609\/aaai.v35i5.16604"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43418-1_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,26]],"date-time":"2024-06-26T13:13:52Z","timestamp":1719407632000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43418-1_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434174","9783031434181"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43418-1_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"While TKG Forecasting has the potential to enable predictions for complex and dynamic systems, we argue that inconsistencies in experimental procedures and evaluation settings can lead to distorted comparisons among models, and ultimately, misinterpretation of results. Therefore, with our work, we want to highlight the importance of transparency and reproducibility in scientific research, as well as the importance of rigorous and reliable scientific practice. In this context we have identified inconsistencies in evaluation settings and provided a unified evaluation protocol. We ensure transparency by providing a URL to a GitHub repository containing our evaluation code. Within this repository, we use forked submodules to explicitly link to the original assets. Additionally, we report the training details, such as hyperparameters, in the supplementary material of our work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"While we have not focused on increasing the interpretability of individual models, we acknowledge the importance of explainability and interpretability in the field. Therefore, we note that among the compared models, xERTE [] and TLogic [] address some aspects of explainability and interpretability.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"We did not evaluate the predictions of existing models on bias and fairness as it was out of scope for this work. However, we recognize that it is essential to increase fairness in the comparison of TKG Forecasting models. Therefore, we highlight inconsistencies and provide a unified evaluation protocol to improve comparability and fairness for existing models.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"In terms of data collection and use, we used publicly available research datasets for our evaluation. We did not use the data for profiling individuals, and it does not contain offensive content. However, it is important to note that even publicly available data can be subject to privacy regulations, and we have taken measures to ensure that our data usage complies with applicable laws and regulations.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"As this study focuses purely on evaluation of existing models, it does not induce direct risk. However, we recognize that TKG Forecasting models can have real-world consequences, especially when applied in domains such as finance and healthcare. Therefore, as the results in Sect.\u00a0 show, we want to stress again that predictions can be unreliable and incomplete, and that these limitations have to be acknowledged when using them for decision making.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}