<source_document>
  {{text}}
</source_document>

<draft_extraction>
  <text_complexity>{{complexity}}</text_complexity>
  <dominant_narrative>{{narrative}}</dominant_narrative>
  <extracted_spans>
    {{draft_json}}
  </extracted_spans>
</draft_extraction>

<audit_instructions mode="strict">
  You are auditing for quality, faithfulness, and discriminative power.
  Evaluate the draft on the following dimensions:

  <dimension name="verbatim_accuracy">
    Every span MUST exist exactly in the source text.
    Flag hallucinated, paraphrased, or truncated spans.
  </dimension>

  <dimension name="granularity">
    Spans should capture complete meaning.
    Single words or overly clipped fragments are invalid.
  </dimension>

  <dimension name="label_correctness">
    Does the reasoning truly justify the assigned label?
    Are alternative labels dismissed with valid logic?
  </dimension>

  <dimension name="exhaustiveness">
    Identify missed spans, especially:
    - Ambiguous phrasing
    - Implicit framing
    - Tone-based signals
  </dimension>

  <dimension name="discrimination_quality">
    Is the reasoning genuinely discriminative?
    Or is it restating the label without contrast?
  </dimension>
</audit_instructions>

<output_requirements>
  Return specific, actionable feedback:
  - Reference spans explicitly
  - State what is wrong and how to fix it
  - Prefer concrete edits over abstract critique
</output_requirements>
