@article{Frieder2022ACM,
author = {Frieder, Ophir and Mele, Ida and Muntean, Cristina Ioana and Nardini, Franco Maria and Perego, Raffaele and Tonellotto, Nicola},
title = {Caching Historical Embeddings in Conversational Search},
year = {2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
issn = {1559-1131},
url = {https://doi.org/10.1145/3578519},
doi = {10.1145/3578519},
abstract = {Rapid response, namely low latency, is fundamental in search applications; it is particularly so in interactive search sessions, such as those encountered in conversational settings. An observation with a potential to reduce latency asserts that conversational queries exhibit a temporal locality in the lists of documents retrieved. Motivated by this observation, we propose and evaluate a client-side document embedding cache, improving the responsiveness of conversational search systems. By leveraging state-of-the-art dense retrieval models to abstract document and query semantics, we cache the embeddings of documents retrieved for a topic introduced in the conversation, as they are likely relevant to successive queries. Our document embedding cache implements an efficient metric index, answering nearest-neighbor similarity queries by estimating the approximate result sets returned. We demonstrate the efficiency achieved using our cache via reproducible experiments based on TREC CAsT datasets, achieving a hit rate of up to 75\% without degrading answer quality. Our achieved high cache hit rates significantly improve the responsiveness of conversational systems while likewise reducing the number of queries managed on the search back-end.},
note = {Just Accepted},
journal = {ACM Trans. Web},
month = {dec},
keywords = {conversational search, dense retrieval, caching, similarity search}
}