๐ข Update: Check out our follow-up work TARA: Simple and Efficient Time Aware Retrieval Adaptation of MLLMs for Video Understanding that uses Multimodal LLMs (MLLMs) to encode videos for time-aware video-text retrieval.
๐ข Update: Check out our follow-up work TARA: Simple and Efficient Time Aware Retrieval Adaptation of MLLMs for Video Understanding that uses Multimodal LLMs (MLLMs) to encode videos for time-aware video-text retrieval.
@article{bagad2025chirality,
title={Chirality in Action: Time-Aware Video Representation Learning by Latent Straightening},
author={Bagad, Piyush and Zisserman, Andrew},
journal={arXiv preprint arXiv:2509.08502},
year={2025}
}
@InProceedings{Bagad25,
author = "Piyush Bagad and Andrew Zisserman",
title = "Chirality in Action: Time-Aware Video Representation Learning by Latent Straightening",
booktitle = "NeurIPS",
year = "2025",
}