{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T17:00:08Z","timestamp":1776445208498,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715289","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:09:41Z","timestamp":1748016581000},"page":"761-764","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["EDGAR-CRAWLER: From Raw Web Documents to Structured Financial NLP Datasets"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7473-9428","authenticated-orcid":false,"given":"Lefteris","family":"Loukas","sequence":"first","affiliation":[{"name":"Department of Informatics, Athens University of Economics and Business, Athens, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0311-2176","authenticated-orcid":false,"given":"Fabian","family":"Billert","sequence":"additional","affiliation":[{"name":"Heinrich Heine University of D\u00fcsseldorf, D\u00fcsseldorf, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7657-5156","authenticated-orcid":false,"given":"Manos","family":"Fergadiotis","sequence":"additional","affiliation":[{"name":"Department of Informatics, Athens University of Economics and Business, Athens, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0055-5598","authenticated-orcid":false,"given":"Prodromos","family":"Malakasiotis","sequence":"additional","affiliation":[{"name":"Department of Informatics, Athens University of Economics and Business, Athens, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2969-0509","authenticated-orcid":false,"given":"Ion","family":"Androutsopoulos","sequence":"additional","affiliation":[{"name":"Department of Informatics, Athens University of Economics and Business, Athens, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"ICAIF '24: Proceedings of the 5th ACM International Conference on AI in Finance","unstructured":"2024. ICAIF '24: Proceedings of the 5th ACM International Conference on AI in Finance (Brooklyn, NY, USA). Association for Computing Machinery."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11408-024-00455--4"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1111\/1475-679X.00079"},{"key":"e_1_3_2_2_4_1","unstructured":"Tianyu Cao Natraj Raman Danial Dervovic and Chenhao Tan. 2024. Characterizing Multimodal Long-form Summarization: A Case Study on Financial Reports. arXiv:2404.06162 [cs.CL] https:\/\/arxiv.org\/abs\/2404.06162"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.2307\/2491399"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.2308\/acch-51240"},{"key":"e_1_3_2_2_7_1","unstructured":"Chung-Chi Chen Hen-Hsen Huang Hiroya Takamura and Hsin-Hsi Chen (Eds.). 2022. Proceedings of the Fourth Workshop on Financial Technology and Natural Language Processing (FinNLP). Association for Computational Linguistics Abu Dhabi United Arab Emirates (Hybrid). https:\/\/aclanthology.org\/2022.finnlp-1.0"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.17016\/FEDS.2024.020"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1080\/15427560.2019.155"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1027351630866"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.3386\/w33171"},{"key":"e_1_3_2_2_12_1","volume-title":"KPI-BERT: A Joint Named Entity Recognition and Relation Extraction Model for Financial Reports. 2022 26th International Conference on Pattern Recognition (ICPR)","author":"Hillebrand Lars Patrick","year":"2022","unstructured":"Lars Patrick Hillebrand, Tobias Deu\u00dfer, Tim Dilmaghani Khameneh, Bernd Kliem, R\u00fcdiger Loitz, Christian Bauckhage, and Rafet Sifa. 2022. KPI-BERT: A Joint Named Entity Recognition and Relation Extraction Model for Financial Reports. 2022 26th International Conference on Pattern Recognition (ICPR) (2022), 606--612. https:\/\/api.semanticscholar.org\/CorpusID:251280287"},{"key":"e_1_3_2_2_13_1","volume-title":"Pyrgiotakis","author":"Katsafados Apostolos G.","year":"2020","unstructured":"Apostolos G. Katsafados, Ion Androutsopoulos, Ilias Chalkidis, Manos Fergadiotis, George N. Leledakis, and Emmanouil G. Pyrgiotakis. 2020. Textual Information and IPO Underpricing: A Machine Learning Approach. MPRA Paper 103813. University Library of Munich, Germany."},{"key":"e_1_3_2_2_14_1","volume-title":"Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics","author":"Kogan Shimon","unstructured":"Shimon Kogan, Dimitry Levin, Bryan R. Routledge, Jacob S. Sagi, and Noah A. Smith. 2009. Predicting Risk from Financial Reports with Regression. In Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics. Boulder, Colorado, 272--280. https:\/\/aclanthology.org\/N09--1031"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)","author":"Lee Heeyoung","year":"2014","unstructured":"Heeyoung Lee, Mihai Surdeanu, Bill MacCartney, and Dan Jurafsky. 2014. On the Importance of Text Analysis for Stock Price Prediction. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14). Reykjavik, Iceland, 1170--1175."},{"key":"e_1_3_2_2_16_1","volume-title":"The effect of quarterly report readability on information efficiency of stock prices. Contemporary Accounting Research 29, 4","author":"Lee Yen-Jung","year":"2012","unstructured":"Yen-Jung Lee. 2012. The effect of quarterly report readability on information efficiency of stock prices. Contemporary Accounting Research 29, 4 (2012)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.econlp-1.2"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.303"},{"key":"e_1_3_2_2_19_1","volume-title":"1st International Conference on Learning Representations, ICLR","author":"Mikolov Tom\u00e1s","year":"2013","unstructured":"Tom\u00e1s Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. In 1st International Conference on Learning Representations, ICLR 2013, Scottsdale, Arizona, USA, May 2--4, 2013, Workshop Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.219"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490354.3494453"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715289","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715289","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T02:01:55Z","timestamp":1759888915000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715289"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":21,"alternative-id":["10.1145\/3701716.3715289","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715289","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}