@inproceedings{akarajaradwong-chaksangchaichot-2025-cold,
title = "Cold Starts and Hard Cases: A Two-Stage {SFT}-{RLVR} Approach for Legal Machine Translation (Just-{NLP} {L}-{MT} shared task)",
author = "Akarajaradwong, Pawitsapak and
Chaksangchaichot, Chompakorn",
editor = "Modi, Ashutosh and
Ghosh, Saptarshi and
Ekbal, Asif and
Goyal, Pawan and
Jain, Sarika and
Joshi, Abhinav and
Mishra, Shivani and
Datta, Debtanu and
Paul, Shounak and
Singh, Kshetrimayum Boynao and
Kumar, Sandeep",
booktitle = "Proceedings of the 1st Workshop on NLP for Empowering Justice (JUST-NLP 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.justnlp-main.9/",
pages = "101--106",
ISBN = "979-8-89176-312-8",
abstract = "This paper details our system for the JUST-NLP 2025 Shared Task on English-to-Hindi Legal Machine Translation. We propose a novel two-stage, data-centric approach. First, we annotate the training data by translation difficulty and create easy and hard subsets.We perform SFT on the easier subset to establish a robust ``cold start''. Then, we apply RLVR exclusively on the harder subset, using machine translation metrics as reward signals. This strategy allowed our system to significantly outperform strong baselines, demonstrating the capability of our systems for machine translation tasks. Source code and model weights are available at https://github.com/ppaolong/FourCorners-JustNLP-MT-Shared-Task"
}Markdown (Informal)
[Cold Starts and Hard Cases: A Two-Stage SFT-RLVR Approach for Legal Machine Translation (Just-NLP L-MT shared task)](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.justnlp-main.9/) (Akarajaradwong & Chaksangchaichot, JUSTNLP 2025)
ACL