AI/ML News & Innovations Hub

@inproceedings{zheng-etal-2026-comprehensive,
    title = "A Comprehensive Survey of Process Reward Models: Data Generation, Model Construction, and Usage",
    author = "Zheng, Congmin  and
      Zhu, Jiachen  and
      Ou, Zhuoying  and
      Chen, Yuxiang  and
      Zhang, Kangning  and
      Shan, Rong  and
      Zheng, Zeyu  and
      Yang, Mengyue  and
      Lin, Jianghao  and
      Yu, Yong  and
      Zhang, Weinan",
    editor = "Liakata, Maria  and
      Moreira, Viviane P.  and
      Zhang, Jiajun  and
      Jurgens, David",
    booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2026",
    address = "San Diego, California, United States",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2026.acl-long.163/",
    pages = "3591--3607",
    ISBN = "979-8-89176-390-6",
    abstract = "Large Language Models (LLMs) have advanced reasoning ability, yet conventional alignment remains dominated by outcome reward models (ORMs) that judge only final answers. Process Reward Models(PRMs) address this gap by evaluating and guiding reasoning at the step or trajectory level. This survey provides a systematic overview of PRMs through the full loop: how to generate process data, build PRMs, and use PRMs for test-time scaling and reinforcement learning. We summarize applications across math, code, text, multimodal reasoning, robotics, and agents, and review emerging benchmarks. Our goal is to clarify design spaces, reveal open challenges, and guide future research toward fine-grained, robust reasoning alignment."
}

<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2026-comprehensive">
    <titleInfo>
        <title>A Comprehensive Survey of Process Reward Models: Data Generation, Model Construction, and Usage</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Congmin</namePart>
        <namePart type="family">Zheng</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jiachen</namePart>
        <namePart type="family">Zhu</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Zhuoying</namePart>
        <namePart type="family">Ou</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Yuxiang</namePart>
        <namePart type="family">Chen</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Kangning</namePart>
        <namePart type="family">Zhang</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Rong</namePart>
        <namePart type="family">Shan</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Zeyu</namePart>
        <namePart type="family">Zheng</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Mengyue</namePart>
        <namePart type="family">Yang</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jianghao</namePart>
        <namePart type="family">Lin</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Yong</namePart>
        <namePart type="family">Yu</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Weinan</namePart>
        <namePart type="family">Zhang</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2026-07</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Maria</namePart>
            <namePart type="family">Liakata</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Viviane</namePart>
            <namePart type="given">P</namePart>
            <namePart type="family">Moreira</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jiajun</namePart>
            <namePart type="family">Zhang</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">David</namePart>
            <namePart type="family">Jurgens</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">San Diego, California, United States</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
        <identifier type="isbn">979-8-89176-390-6</identifier>
    </relatedItem>
    <abstract>Large Language Models (LLMs) have advanced reasoning ability, yet conventional alignment remains dominated by outcome reward models (ORMs) that judge only final answers. Process Reward Models(PRMs) address this gap by evaluating and guiding reasoning at the step or trajectory level. This survey provides a systematic overview of PRMs through the full loop: how to generate process data, build PRMs, and use PRMs for test-time scaling and reinforcement learning. We summarize applications across math, code, text, multimodal reasoning, robotics, and agents, and review emerging benchmarks. Our goal is to clarify design spaces, reveal open challenges, and guide future research toward fine-grained, robust reasoning alignment.</abstract>
    <identifier type="citekey">zheng-etal-2026-comprehensive</identifier>
    <location>
        <url>https://aclanthology.org/2026.acl-long.163/</url>
    </location>
    <part>
        <date>2026-07</date>
        <extent unit="page">
            <start>3591</start>
            <end>3607</end>
        </extent>
    </part>
</mods>
</modsCollection>

%0 Conference Proceedings
%T A Comprehensive Survey of Process Reward Models: Data Generation, Model Construction, and Usage
%A Zheng, Congmin
%A Zhu, Jiachen
%A Ou, Zhuoying
%A Chen, Yuxiang
%A Zhang, Kangning
%A Shan, Rong
%A Zheng, Zeyu
%A Yang, Mengyue
%A Lin, Jianghao
%A Yu, Yong
%A Zhang, Weinan
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F zheng-etal-2026-comprehensive
%X Large Language Models (LLMs) have advanced reasoning ability, yet conventional alignment remains dominated by outcome reward models (ORMs) that judge only final answers. Process Reward Models(PRMs) address this gap by evaluating and guiding reasoning at the step or trajectory level. This survey provides a systematic overview of PRMs through the full loop: how to generate process data, build PRMs, and use PRMs for test-time scaling and reinforcement learning. We summarize applications across math, code, text, multimodal reasoning, robotics, and agents, and review emerging benchmarks. Our goal is to clarify design spaces, reveal open challenges, and guide future research toward fine-grained, robust reasoning alignment.
%U https://aclanthology.org/2026.acl-long.163/
%P 3591-3607

Markdown (Informal)

[A Comprehensive Survey of Process Reward Models: Data Generation, Model Construction, and Usage](https://aclanthology.org/2026.acl-long.163/) (Zheng et al., ACL 2026)

ACL

Congmin Zheng, Jiachen Zhu, Zhuoying Ou, Yuxiang Chen, Kangning Zhang, Rong Shan, Zeyu Zheng, Mengyue Yang, Jianghao Lin, Yong Yu, and Weinan Zhang. 2026. A Comprehensive Survey of Process Reward Models: Data Generation, Model Construction, and Usage. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 3591–3607, San Diego, California, United States. Association for Computational Linguistics.