sam-docs/sam/docs/contracts/scripts/insert_revision_table.py

#!/usr/bin/env python3
"""
DOCX 개정이력 테이블 삽입 스크립트

revisions.json을 읽어 각 DOCX 문서의 제목 직후에 개정이력 테이블을 삽입한다.
- 삽입 위치: 문서 제목(첫 번째 Heading 또는 Bold 텍스트) 직후
- 스타일: Pretendard 9pt, 연한 파란 헤더, 회색 테두리
- 원본 백업 후 삽입
"""

import json
import shutil
import sys
from pathlib import Path

from docx import Document
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls, qn
from docx.shared import Pt, RGBColor

BASE_DIR = Path(__file__).resolve().parent.parent
DOCX_DIR = BASE_DIR / "docx"
BACKUP_DIR = BASE_DIR / "docx" / "backup"
REVISIONS_FILE = BASE_DIR / "revisions.json"

# 스타일 설정
FONT_NAME = "Pretendard"
FONT_NAME_FALLBACK = "맑은 고딕"
FONT_SIZE = Pt(9)
HEADER_BG_COLOR = "D6E4F0"  # 연한 파란색
BORDER_COLOR = "999999"  # 회색 테두리
HEADER_FONT_COLOR = RGBColor(0x2B, 0x47, 0x6B)  # 진한 파란 텍스트


def set_cell_border(cell, **kwargs):
    """셀 테두리 설정"""
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()

    tcBorders = parse_xml(
        f'<w:tcBorders {nsdecls("w")}>'
        f'  <w:top w:val="single" w:sz="4" w:color="{BORDER_COLOR}"/>'
        f'  <w:left w:val="single" w:sz="4" w:color="{BORDER_COLOR}"/>'
        f'  <w:bottom w:val="single" w:sz="4" w:color="{BORDER_COLOR}"/>'
        f'  <w:right w:val="single" w:sz="4" w:color="{BORDER_COLOR}"/>'
        f"</w:tcBorders>"
    )
    tcPr.append(tcBorders)


def set_cell_shading(cell, color):
    """셀 배경색 설정"""
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    shading = parse_xml(
        f'<w:shd {nsdecls("w")} w:fill="{color}" w:val="clear"/>'
    )
    tcPr.append(shading)


def set_run_font(run, bold=False):
    """런의 폰트 설정"""
    run.font.size = FONT_SIZE
    run.font.bold = bold

    # Pretendard 설정 (없으면 맑은 고딕 폴백)
    run.font.name = FONT_NAME
    r = run._element
    rPr = r.get_or_add_rPr()
    rFonts = parse_xml(
        f'<w:rFonts {nsdecls("w")} '
        f'w:ascii="{FONT_NAME}" '
        f'w:hAnsi="{FONT_NAME}" '
        f'w:eastAsia="{FONT_NAME}" '
        f'w:cs="{FONT_NAME_FALLBACK}"/>'
    )
    # 기존 rFonts 제거
    for existing in rPr.findall(qn("w:rFonts")):
        rPr.remove(existing)
    rPr.insert(0, rFonts)


def create_revision_table(doc, revisions):
    """개정이력 테이블 생성 (Document에 직접 추가하지 않고 XML 요소만 생성)"""
    # 테이블 생성
    headers = ["버전", "날짜", "작성자", "변경 내용"]
    num_cols = len(headers)
    num_rows = 1 + len(revisions)

    table = doc.add_table(rows=num_rows, cols=num_cols)
    table.alignment = WD_TABLE_ALIGNMENT.CENTER

    # 헤더 행 설정
    header_row = table.rows[0]
    for i, header_text in enumerate(headers):
        cell = header_row.cells[i]
        cell.text = ""
        paragraph = cell.paragraphs[0]
        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
        run = paragraph.add_run(header_text)
        set_run_font(run, bold=True)
        run.font.color.rgb = HEADER_FONT_COLOR
        set_cell_shading(cell, HEADER_BG_COLOR)
        set_cell_border(cell)

    # 데이터 행 설정 (최신 순)
    sorted_revisions = sorted(revisions, key=lambda r: r["date"], reverse=True)
    for row_idx, rev in enumerate(sorted_revisions):
        row = table.rows[row_idx + 1]
        values = [rev["version"], rev["date"], rev["author"], rev["description"]]
        for col_idx, value in enumerate(values):
            cell = row.cells[col_idx]
            cell.text = ""
            paragraph = cell.paragraphs[0]
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = paragraph.add_run(value)
            set_run_font(run)
            set_cell_border(cell)

    return table


def find_title_paragraph_index(doc, doc_type="pattern"):
    """문서 제목 문단의 인덱스를 찾는다"""
    for i, para in enumerate(doc.paragraphs):
        text = para.text.strip()
        if not text:
            continue

        if doc_type == "styled":
            # Heading 1 스타일의 첫 번째 문단
            if para.style and para.style.name == "Heading 1":
                return i
        else:
            # 첫 번째 Bold 텍스트 (제목)
            has_bold = any(r.bold for r in para.runs if r.bold)
            if has_bold:
                return i

    return 0  # 찾지 못하면 맨 앞


def find_subtitle_index(doc, title_idx):
    """제목 다음의 부제목(영문 제목 등) 인덱스를 찾는다"""
    # 제목 바로 다음 문단이 영문 부제목이면 그 다음에 삽입
    if title_idx + 1 < len(doc.paragraphs):
        next_para = doc.paragraphs[title_idx + 1]
        text = next_para.text.strip()
        if text and any(
            text.startswith(prefix)
            for prefix in ["Customer Service", "Sales Partner", "Non-Disclosure"]
        ):
            return title_idx + 1
    return title_idx


def insert_table_after_paragraph(doc, para_idx, table):
    """특정 문단 인덱스 다음에 테이블을 이동"""
    body = doc.element.body

    # 빈 문단 추가 (테이블 전 여백)
    spacer_before = parse_xml(
        f'<w:p {nsdecls("w")}>'
        f"  <w:pPr><w:spacing w:before=\"120\" w:after=\"120\"/></w:pPr>"
        f"</w:p>"
    )

    # 빈 문단 추가 (테이블 후 여백)
    spacer_after = parse_xml(
        f'<w:p {nsdecls("w")}>'
        f'  <w:pPr><w:spacing w:before="120" w:after="120"/></w:pPr>'
        f"</w:p>"
    )

    # "개정이력" 라벨 문단
    label_para = parse_xml(
        f'<w:p {nsdecls("w")}>'
        f"  <w:pPr>"
        f"    <w:jc w:val=\"center\"/>"
        f'    <w:spacing w:before="200" w:after="80"/>'
        f"  </w:pPr>"
        f"  <w:r>"
        f"    <w:rPr>"
        f'      <w:rFonts w:ascii="{FONT_NAME}" w:hAnsi="{FONT_NAME}" '
        f'        w:eastAsia="{FONT_NAME}" w:cs="{FONT_NAME_FALLBACK}"/>'
        f'      <w:sz w:val="18"/>'
        f'      <w:szCs w:val="18"/>'
        f"      <w:b/>"
        f'      <w:color w:val="666666"/>'
        f"    </w:rPr>"
        f"    <w:t>[ 개정이력 ]</w:t>"
        f"  </w:r>"
        f"</w:p>"
    )

    # 대상 문단의 XML 요소 찾기
    para_elements = body.findall(qn("w:p"))
    if para_idx >= len(para_elements):
        para_idx = len(para_elements) - 1

    target_para = para_elements[para_idx]

    # 테이블 XML 요소 (이미 doc에 추가되어 body 끝에 있음)
    table_element = table._tbl

    # body에서 테이블 제거 (끝에서)
    body.remove(table_element)

    # 대상 문단 다음에 삽입 (역순으로 삽입)
    target_para.addnext(spacer_after)
    target_para.addnext(table_element)
    target_para.addnext(label_para)
    target_para.addnext(spacer_before)


def remove_existing_revision_table(doc):
    """기존 개정이력 테이블이 있으면 제거"""
    body = doc.element.body

    # "[ 개정이력 ]" 라벨 문단 찾기
    for para in body.findall(qn("w:p")):
        texts = para.findall(f".//{qn('w:t')}")
        full_text = "".join(t.text or "" for t in texts)
        if "개정이력" in full_text:
            # 이 문단과 바로 다음의 테이블, 그리고 전후 spacer 제거
            siblings = list(body)
            idx = siblings.index(para)

            # 이전 spacer (빈 문단)
            if idx > 0:
                prev = siblings[idx - 1]
                prev_tag = prev.tag.split("}")[-1] if "}" in prev.tag else prev.tag
                if prev_tag == "p":
                    prev_texts = prev.findall(f".//{qn('w:t')}")
                    prev_full = "".join(t.text or "" for t in prev_texts)
                    if not prev_full.strip():
                        body.remove(prev)
                        siblings = list(body)
                        idx = siblings.index(para)

            # 라벨 문단 다음의 테이블
            if idx + 1 < len(siblings):
                next_elem = siblings[idx + 1]
                next_tag = (
                    next_elem.tag.split("}")[-1]
                    if "}" in next_elem.tag
                    else next_elem.tag
                )
                if next_tag == "tbl":
                    body.remove(next_elem)
                    siblings = list(body)

                    # 테이블 다음 spacer
                    if idx + 1 < len(siblings):
                        after = siblings[idx + 1]
                        after_tag = (
                            after.tag.split("}")[-1]
                            if "}" in after.tag
                            else after.tag
                        )
                        if after_tag == "p":
                            after_texts = after.findall(f".//{qn('w:t')}")
                            after_full = "".join(t.text or "" for t in after_texts)
                            if not after_full.strip():
                                body.remove(after)

            # 라벨 문단 제거
            body.remove(para)
            return True

    return False


def process_document(docx_name, doc_info):
    """단일 DOCX에 개정이력 테이블 삽입"""
    docx_path = DOCX_DIR / docx_name
    if not docx_path.exists():
        print(f"  [SKIP] {docx_name} - 파일 없음")
        return False

    # 백업
    BACKUP_DIR.mkdir(parents=True, exist_ok=True)
    backup_path = BACKUP_DIR / docx_name
    shutil.copy2(docx_path, backup_path)
    print(f"  [BACKUP] {docx_name} → backup/")

    doc = Document(str(docx_path))

    # 기존 개정이력 테이블 제거
    if remove_existing_revision_table(doc):
        print(f"  [INFO] 기존 개정이력 테이블 제거됨")

    # 문서 유형 판별
    has_heading_styles = any(
        p.style and p.style.name.startswith("Heading")
        for p in doc.paragraphs
    )
    doc_type = "styled" if has_heading_styles else "pattern"

    # 제목 위치 찾기
    title_idx = find_title_paragraph_index(doc, doc_type)
    # 부제목(영문 제목) 확인
    insert_after_idx = find_subtitle_index(doc, title_idx)

    # 테이블 생성
    table = create_revision_table(doc, doc_info["revisions"])

    # 테이블 삽입
    insert_table_after_paragraph(doc, insert_after_idx, table)

    # 저장
    doc.save(str(docx_path))
    print(f"  [OK] {docx_name} - 개정이력 테이블 삽입 완료")
    return True


def main():
    print("DOCX 개정이력 테이블 삽입 시작")
    print(f"  DOCX 디렉토리: {DOCX_DIR}")
    print(f"  개정 데이터: {REVISIONS_FILE}")
    print()

    # revisions.json 로드
    if not REVISIONS_FILE.exists():
        print(f"[ERROR] {REVISIONS_FILE} 파일을 찾을 수 없습니다.")
        return 1

    with open(REVISIONS_FILE, "r", encoding="utf-8") as f:
        data = json.load(f)

    documents = data.get("documents", {})

    success = 0
    for doc_key, doc_info in documents.items():
        docx_name = doc_info["docx_file"]
        print(f"처리 중: {doc_info['title']}")
        if process_document(docx_name, doc_info):
            success += 1
        print()

    print(f"완료: {success}/{len(documents)} 파일 처리됨")
    return 0 if success == len(documents) else 1


if __name__ == "__main__":
    sys.exit(main())