@inproceedings{effe155180aa405cbb115441939bf602,
title = "Topic Modelling vs Distant Supervision: A Comparative Evaluation based on the Classification of Parliamentary Enquiries",
abstract = "We investigate two types of approaches to text classification, in the way of enriching categorising Parliamentary enquiries recorded by the UK House of Commons Library. One is an unsupervised approach, i.e., topic modelling, and the other is a supervised approach based on weakly labelled data, i.e., distant supervision. Models were trained on two types of feature sets: one based only on bag of words, and the other combining bag of words with structured metadata attached to enquiries. Our results show that topic modelling obtains superior performance on this task, and that the incorporation of structured metadata as learning features contributes insignificantly to improved model performance.",
author = "Batista-Navarro, {Riza Theresa} and Oliver Hawkins",
year = "2019",
doi = "978-3-030-30760-8",
language = "English",
isbn = "978-3-030-30759-2",
volume = "11799 ",
series = "Lecture Notes in Computer Science",
publisher = "Springer Nature",
booktitle = "Digital Libraries for Open Knowledge",
address = "United States",
}