@preprint{wu2025sailing,title={Sailing AI by the Stars: A Survey of Learning from Rewards in Post-Training and Test-Time Scaling of Large Language Models},author={Wu, Xiaobao},year={2025},journal={arXiv preprint arXiv:2505.02686},url={https://arxiv.org/abs/2505.02686},}
@inproceedings{wu2025antileak,title={AntiLeakBench: Preventing Data Contamination by Automatically Constructing Benchmarks with Updated Real-World Knowledge},author={Wu, Xiaobao and Pan, Liangming and Xie, Yuxi and Zhou, Ruiwen and Zhao, Shuai and Ma, Yubo and Du, Mingzhe and Mao, Rui and Luu, Anh Tuan and Wang, William Yang},booktitle={Annual Meeting of the Association for Computational Linguistics (ACL)},year={2025},url={https://arxiv.org/pdf/2412.13670.pdf},}
@inproceedings{zhou2025rulearena,title={RuleArena: A Benchmark for Rule-Guided Reasoning with LLMs in Real-World Scenarios},author={Zhou, Ruiwen and Hua, Wenyue and Pan, Liangming and Cheng, Sitao and Wu, Xiaobao and Yu, En and Wang, William Yang},journal={arXiv preprint arXiv:2412.08972},booktitle={Annual Meeting of the Association for Computational Linguistics (ACL)},year={2025},url={https://arxiv.org/pdf/2412.08972.pdf},}
ACL Findings
Full-Step-DPO: Self-Supervised Preference Optimization with Step-wise Rewards for Mathematical Reasoning
Huimin Xu, Xin Mao, Feng-Lin Li, Xiaobao Wu✉, Wang Chen, Wei Zhang, and Anh Tuan Luu✉
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2025
@inproceedings{xu2025fullstepdpo,title={Full-Step-DPO: Self-Supervised Preference Optimization with Step-wise Rewards for Mathematical Reasoning},author={Xu, Huimin and Mao, Xin and Li, Feng-Lin and Wu✉, Xiaobao and Chen, Wang and Zhang, Wei and Luu✉, Anh Tuan},booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},year={2025},url={https://arxiv.org/pdf/2502.14356.pdf}}
ACL Findings
Towards Storage-Efficient Visual Document Retrieval: An Empirical Study on Reducing Patch-Level Embeddings
Yubo Ma, Jinsong Li, Yuhang Zang, Xiaobao Wu, Xiaoyi Dong, Pan Zhang, Yuhang Cao, Haodong Duan, Jiaqi Wang, Yixin Cao, and Aixin Sun
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2025
@inproceedings{ma2025storage,title={Towards Storage-Efficient Visual Document Retrieval: An Empirical Study on Reducing Patch-Level Embeddings},author={Ma, Yubo and Li, Jinsong and Zang, Yuhang and Wu, Xiaobao and Dong, Xiaoyi and Zhang, Pan and Cao, Yuhang and Duan, Haodong and Wang, Jiaqi and Cao, Yixin and Sun, Aixin},booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},year={2025}}
ACL Findings
Unlearning Backdoor Attacks for LLMs with Weak-to-Strong Knowledge Distillation
Shuai Zhao, Xiaobao Wu, Cong-Duy Nguyen, Meihuizi Jia, Yichao Feng, and Luu Anh Tuan
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2025
@inproceedings{zhao2025unlearning,title={Unlearning Backdoor Attacks for LLMs with Weak-to-Strong Knowledge Distillation},author={Zhao, Shuai and Wu, Xiaobao and Nguyen, Cong-Duy and Jia, Meihuizi and Feng, Yichao and Tuan, Luu Anh},booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},year={2025},url={https://arxiv.org/pdf/2410.14425}}
ACL Findings
SCOPE: Compress Mathematical Reasoning Steps for Efficient Automated Process Annotation
Huimin Xu, Xin Mao, Fenglin Li, Xiaobao Wu✉, Wang Chen, Wei Zhang, and Anh Tuan Luu✉
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2025
@inproceedings{xu2025scope,title={SCOPE: Compress Mathematical Reasoning Steps for Efficient Automated Process Annotation},author={Xu, Huimin and Mao, Xin and Li, Fenglin and Wu✉, Xiaobao and Chen, Wang and Zhang, Wei and Luu✉, Anh Tuan},booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},year={2025},url={https://arxiv.org/pdf/2505.14419}}
ACL Demo
CodeArena: A Collective Evaluation Platform for LLM Code Generation
Mingzhe Du, Anh Tuan Luu, Bin Ji, Xiaobao Wu, Dong Huang, Terry Yue Zhuo, Qian Liu, and See-Kiong Ng
In Annual Meeting of the Association for Computational Linguistics: System Demonstration Track (ACL Demo), 2025
@inproceedings{du2025codearena,title={CodeArena: A Collective Evaluation Platform for LLM Code Generation},author={Du, Mingzhe and Luu, Anh Tuan and Ji, Bin and Wu, Xiaobao and Huang, Dong and Zhuo, Terry Yue and Liu, Qian and Ng, See-Kiong},booktitle={Annual Meeting of the Association for Computational Linguistics: System Demonstration Track (ACL Demo)},year={2025},url={https://arxiv.org/pdf/2503.01295}}
@inproceedings{luo2025kbqa-o1,title={KBQA-o1: Agentic Knowledge Base Question Answering with Monte Carlo Tree Search},author={Luo, Haoran and Guo, Yikai and Lin, Qika and Wu, Xiaobao and Mu, Xinyu and Liu, Wenhao and Song, Meina and Zhu, Yifan and Tuan, Luu Anh and others},booktitle={International Conference on Machine Learning (ICML)},year={2025},url={https://arxiv.org/pdf/2501.18922},}
@inproceedings{nguyen2025enhancing,title={Enhancing Multimodal Entity Linking with Jaccard Distance-based Conditional Contrastive Learning and Contextual Visual Augmentation},author={Nguyen, Cong-Duy and Wu✉, Xiaobao and Nguyen, Thong Thanh and Le, Khoi and Nguyen, Viet Anh and Luu✉, Anh Tuan},booktitle={Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},year={2025},url={https://arxiv.org/pdf/2501.14166}}
@article{zhao2025survey,title={A Survey of Recent Backdoor Attacks and Defenses in Large Language Models},author={Zhao, Shuai and Jia, Meihuizi and Guo, Zhongliang and Gan, Leilei and Xu, Xiaoyu and Wu✉, Xiaobao and Fu, Jie and Feng, Yichao and Pan, Fengjun and Luu✉, Anh Tuan},journal={Transactions on Machine Learning Research (TMLR)},url={https://openreview.net/forum?id=wZLWuFHxt5},year={2025},issn={2835-8856}}
@inproceedings{nguyen2025motion,title={Motion-aware Contrastive Learning for Temporal Panoptic Scene Graph Generation},author={Nguyen, Thong Thanh and Wu, Xiaobao and Bin, Yi and Nguyen, Cong-Duy T and Ng, See-Kiong and Luu, Anh Tuan},year={2025},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},url={https://arxiv.org/pdf/2412.07160},}
@inproceedings{nguyen2025multi,title={Multi-Scale Contrastive Learning for Video Temporal Grounding},author={Nguyen, Thong Thanh and Bin, Yi and Wu, Xiaobao and Hu, Zhiyuan and Nguyen, Cong-Duy T and Ng, See-Kiong and Luu, Anh Tuan},year={2025},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},url={https://arxiv.org/pdf/2412.07157},}
SAC
Curriculum Demonstration Selection for In-Context Learning
Duc Anh Vu, Nguyen Tran Cong Duy, Xiaobao Wu, Hoang Minh Nhat, Du Mingzhe, Nguyen Thanh Thong, and Anh Tuan Luu
In ACM/SIGAPP Symposium On Applied Computing conference (SAC), 2025
@inproceedings{vu2025curriculum,title={Curriculum Demonstration Selection for In-Context Learning},author={Vu, Duc Anh and Duy, Nguyen Tran Cong and Wu, Xiaobao and Nhat, Hoang Minh and Mingzhe, Du and Thong, Nguyen Thanh and Luu, Anh Tuan},booktitle={ACM/SIGAPP Symposium On Applied Computing conference (SAC)},year={2025},url={https://arxiv.org/pdf/2411.18126},}
@inproceedings{liu2025zero,title={Zero-to-Strong Generalization: Eliciting Strong Capabilities of Large Language Models Iteratively without Gold Labels},author={Liu, Chaoqun and Chao, Qin and Zhang, Wenxuan and Wu, Xiaobao and Li, Boyang and Luu, Anh Tuan and Bing, Lidong},booktitle={International Conference on Computational Linguistics (COLING)},year={2025},url={https://arxiv.org/pdf/2409.12425},}
@inproceedings{wu2024fastopic,title={FASTopic: Pretrained Transformer is a Fast, Adaptive, Stable, and Transferable Topic Model},author={Wu, Xiaobao and Nguyen, Thong and Zhang, Delvin Ce and Wang, William Yang and Luu, Anh Tuan},booktitle={Neural Information Processing Systems (NeurIPS)},year={2024},url={https://arxiv.org/pdf/2405.17978},}
@inproceedings{wu2024akew,title={AKEW: Assessing Knowledge Editing in the Wild},author={Wu, Xiaobao and Pan, Liangming and Wang, William Yang and Luu, Anh Tuan},year={2024},booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},url={https://arxiv.org/pdf/2402.18909},}
@inproceedings{pan2024fallacy,title={Are LLMs Good Zero-shot Fallacy Classifiers?},author={Pan#, Fengjun and Wu#, Xiaobao and Li, Zongrui and Luu, Anh Tuan},year={2024},booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},url={https://arxiv.org/pdf/2410.15050},}
@inproceedings{nguyen2024encoding,title={Encoding and Controlling Global Semantics for Long-form Video Question Answering},author={Nguyen, Thong Thanh and Hu, Zhiyuan and Wu, Xiaobao and Nguyen, Cong-Duy and Ng, See-Kiong and Luu, Anh Tuan},year={2024},url={https://arxiv.org/pdf/2405.19723},booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)}}
AIR
A Survey on Neural Topic Models: Methods, Applications, and Challenges
@article{wu2024survey,title={A Survey on Neural Topic Models: Methods, Applications, and Challenges},author={Wu, Xiaobao and Nguyen, Thong and Luu, Anh Tuan},journal={Artificial Intelligence Review (AIR)},url={https://doi.org/10.1007/s10462-023-10661-7},year={2024},publisher={Springer}}
ACL Demo
Towards the TopMost: A Topic Modeling System Toolkit
@inproceedings{wu2024topmost,title={Towards the TopMost: A Topic Modeling System Toolkit},author={Wu, Xiaobao and Pan, Fengjun and Luu, Anh Tuan},year={2024},booktitle={Annual Meeting of the Association for Computational Linguistics: System Demonstration Track (ACL Demo)},url={https://arxiv.org/pdf/2309.06908.pdf},}
@inproceedings{wu2024traco,title={On the Affinity, Rationality, and Diversity of Hierarchical Topic Modeling},author={Wu, Xiaobao and Pan, Fengjun and Nguyen, Thong and Feng, Yichao and Liu, Chaoqun and Nguyen, Cong-Duy and Luu, Anh Tuan},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},year={2024},url={https://arxiv.org/pdf/2401.14113.pdf},}
ACL Findings
Modeling Dynamic Topics in Chain-Free Fashion by Evolution-Tracking Contrastive Learning and Unassociated Word Exclusion
Xiaobao Wu, Xinshuai Dong, Liangming Pan, Thong Nguyen, and Anh Tuan Luu
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2024
@inproceedings{wu2024dynamic,booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},title={Modeling Dynamic Topics in Chain-Free Fashion by Evolution-Tracking Contrastive Learning and Unassociated Word Exclusion},author={Wu, Xiaobao and Dong, Xinshuai and Pan, Liangming and Nguyen, Thong and Luu, Anh Tuan},year={2024},url={https://arxiv.org/pdf/2405.17957},}
ECCV
Meta-optimized Angular Margin Contrastive Framework for Video-Language Representation Learning
Thong Nguyen, Yi Bin, Xiaobao Wu, Xinshuai Dong, Zhiyuan Hu, Khoi Le, Cong-Duy Nguyen, See-Kiong Ng, and Luu Anh Tuan
In European Conference on Computer Vision (ECCV), 2024
@inproceedings{nguyen2024meta,booktitle={European Conference on Computer Vision (ECCV)},title={Meta-optimized Angular Margin Contrastive Framework for Video-Language Representation Learning},author={Nguyen, Thong and Bin, Yi and Wu, Xiaobao and Dong, Xinshuai and Hu, Zhiyuan and Le, Khoi and Nguyen, Cong-Duy and Ng, See-Kiong and Tuan, Luu Anh},year={2024},url={https://arxiv.org/abs/2407.03788},}
@inproceedings{nguyen2024read,title={READ-PVLA: Recurrent Adapter with Partial Video-Language Alignment for Parameter-Efficient Transfer Learning in Low-Resource Video-Language Modeling},author={Nguyen, Thong and Wu, Xiaobao and Dong, Xinshuai and Le, Khoi M and Hu, Zhiyuan and Nguyen, Cong-Duy and Ng, See-Kiong and Luu, Anh Tuan},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},volume={38},number={17},pages={18824--18832},year={2024},url={https://arxiv.org/pdf/2312.06950},}
@inproceedings{nguyen2024topic,title={Topic Modeling as Multi-Objective Contrastive Optimization},author={Nguyen, Thong Thanh and Wu, Xiaobao and Dong, Xinshuai and Nguyen, Cong-Duy T and Ng, See-Kiong and Luu, Anh Tuan},booktitle={International Conference on Learning Representations (ICLR)},year={2024},url={https://openreview.net/forum?id=HdAoLSBYXj}}
@inproceedings{nguyen2024kdmcse,title={KDMCSE: Knowledge Distillation Multimodal Sentence Embeddings with Adaptive Angular margin Contrastive Learning},author={Nguyen, Cong-Duy and Nguyen, Thong and Wu, Xiaobao and Luu, Anh Tuan},booktitle={Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},year={2024},url={https://arxiv.org/pdf/2403.17486}}
@inproceedings{wu2023effective,booktitle={International Conference on Machine Learning (ICML)},organization={PMLR},title={Effective Neural Topic Modeling with Embedding Clustering Regularization},author={Wu, Xiaobao and Dong, Xinshuai and Nguyen, Thong and Luu, Anh Tuan},year={2023},url={https://arxiv.org/pdf/2306.04217},}
@inproceedings{wu2023infoctm,oral={true},title={Infoctm: A Mutual Information Maximization Perspective of Cross-lingual Topic Modeling},author={Wu, Xiaobao and Dong, Xinshuai and Nguyen, Thong and Liu, Chaoqun and Pan, Liang-Ming and Luu, Anh Tuan},year={2023},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},volume={37},pages={13763--13771},url={https://arxiv.org/pdf/2304.03544},}
@inproceedings{pan2023fact,booktitle={Annual Meeting of the Association for Computational Linguistics (ACL)},title={Fact-Checking Complex Claims with Program-Guided Reasoning},author={Pan, Liangming and Wu, Xiaobao and Lu, Xinyuan and Luu, Anh Tuan and Wang, William Yang and Kan, Min-Yen and Nakov, Preslav},year={2023},address={Toronto, Canada},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2023.acl-long.386},pages={6981--7004},}
EMNLP Findings
DemaFormer: Damped Exponential Moving Average Transformer with Energy-Based Modeling for Temporal Language Grounding
Thong Nguyen, Xiaobao Wu, Xinshuai Dong, Cong-Duy Nguyen, See Kiong Ng, and Anh Luu
In Findings of Conference on Empirical Methods in Natural Language Processing (EMNLP), 2023
@inproceedings{nguyen2023demaformer,booktitle={Findings of Conference on Empirical Methods in Natural Language Processing (EMNLP)},title={DemaFormer: Damped Exponential Moving Average Transformer with Energy-Based Modeling for Temporal Language Grounding},author={Nguyen, Thong and Wu, Xiaobao and Dong, Xinshuai and Nguyen, Cong-Duy and Ng, See Kiong and Luu, Anh},pages={3635--3649},year={2023},url={https://aclanthology.org/2023.findings-emnlp.235.pdf}}
ACL Findings
Gradient-Boosted Decision Tree for Listwise Context Model in Multimodal Review Helpfulness Prediction
Thong Thanh Nguyen, Xiaobao Wu, Xinshuai Dong, Cong-Duy T Nguyen, Zhen Hai, Lidong Bing, and Anh Tuan Luu
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2023
@inproceedings{nguyen2023gradient,title={Gradient-Boosted Decision Tree for Listwise Context Model in Multimodal Review Helpfulness Prediction},author={Nguyen, Thong Thanh and Wu, Xiaobao and Dong, Xinshuai and Nguyen, Cong-Duy T and Hai, Zhen and Bing, Lidong and Luu, Anh Tuan},booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},year={2023},url={https://aclanthology.org/2023.findings-acl.106.pdf}}
ACL Findings
Zero-Shot Text Classification via Self-Supervised Tuning
Chaoqun Liu, Wenxuan Zhang, Guizhen Chen, Xiaobao Wu, Anh Tuan Luu, Chip Hong Chang, and Lidong Bing
In Findings of Annual Meeting of the Association for Computational Linguistics (ACL), 2023
@inproceedings{liu2023zero,booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},title={Zero-Shot Text Classification via Self-Supervised Tuning},author={Liu, Chaoqun and Zhang, Wenxuan and Chen, Guizhen and Wu, Xiaobao and Luu, Anh Tuan and Chang, Chip Hong and Bing, Lidong},pages={1743--1761},year={2023},}
@inproceedings{wu2022mitigating,title={Mitigating Data Sparsity for Short Text Topic Modeling by Topic-Semantic Contrastive Learning},author={Wu, Xiaobao and Luu, Anh Tuan and Dong, Xinshuai},year={2022},booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},publisher={Association for Computational Linguistics},address={Abu Dhabi, United Arab Emirates},pages={2748--2760},url={https://aclanthology.org/2022.emnlp-main.176},}
@inproceedings{nguyen2022adaptive,booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},title={Adaptive Contrastive Learning on Multimodal Transformer for Review Helpfulness Predictions},author={Nguyen, Thong and Wu, Xiaobao and Luu, Anh-Tuan and Nguyen, Cong-Duy and Hai, Zhen and Bing, Lidong},year={2022},url={https://aclanthology.org/2022.emnlp-main.686.pdf}}
2021
ACL Findings
Discovering Topics in Long-tailed Corpora with Causal Intervention
@inproceedings{wu2021discovering,title={Discovering Topics in Long-tailed Corpora with Causal Intervention},author={Wu, Xiaobao and Li, Chunping and Miao, Yishu},year={2021},booktitle={Findings of Annual Meeting of the Association for Computational Linguistics (ACL)},publisher={Association for Computational Linguistics},address={Online},pages={175--185},doi={10.18653/v1/2021.findings-acl.15},url={https://aclanthology.org/2021.findings-acl.15},}
@inproceedings{wu2020short,title={Short Text Topic Modeling with Topic Distribution Quantization and Negative Sampling Decoder},author={Wu, Xiaobao and Li, Chunping and Zhu, Yan and Miao, Yishu},year={2020},booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},address={Online},pages={1772--1782},url={https://aclanthology.org/2020.emnlp-main.138.pdf},}
NLPCC Oral Presentation
Learning Multilingual Topics with Neural Variational Inference
@inproceedings{wu2020learning,oral={true},title={Learning Multilingual Topics with Neural Variational Inference},author={Wu, Xiaobao and Li, Chunping and Zhu, Yan and Miao, Yishu},year={2020},booktitle={International Conference on Natural Language Processing and Chinese Computing (NLPCC)},url={https://link.springer.com/chapter/10.1007/978-3-030-60450-9_66},}
2019
IJCNN
Short Text Topic Modeling with Flexible Word Patterns
@inproceedings{wu2019short,title={Short Text Topic Modeling with Flexible Word Patterns},author={Wu, Xiaobao and Li, Chunping},year={2019},booktitle=ijcnn,url={https://ieeexplore.ieee.org/abstract/document/8852366/},}