atsuoishimoto · atsuoishimoto · Jul 6, 2025 · Jul 6, 2025 · Jul 6, 2025 · Jul 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 cpython
+locales
+.*
+__pycache__
diff --git a/CLAUDE.md b/CLAUDE.md
diff --git a/Makefile b/Makefile
@@ -139,3 +139,8 @@ clean:
 	find -name '*.mo' -delete
 	@echo "Cleaning build directory"
 	$(MAKE) -C venv/cpython/Doc/ clean
+
+.PHONY: publish
+publish:
+	@echo "Publishing documentation to S3..."
+	cd venv/cpython/Doc/build/html && aws s3 sync . s3://test-pythondoc-ja-cloude1
diff --git a/check_translation_progress.py b/check_translation_progress.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""
+翻訳進捗チェックツール
+分割された翻訳ファイルの進捗を確認し、作業再開をサポートします。
+"""
+
+import os
+import sys
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+
+
+def get_split_info(po_file: str) -> Tuple[str, str, str]:
+    """
+    ファイルパスから分割作業用のディレクトリパスを取得
+    Returns: (base_name, splitted_dir, done_dir)
+    """
+    po_path = Path(po_file)
+    base_name = po_path.stem
+
+    # ファイル固有のディレクトリを作成
+    splitted_dir = Path(".splitted") / base_name
+    done_dir = Path(".splitted_done") / base_name
+
+    return base_name, splitted_dir, done_dir
+
+
+def check_split_files(splitted_dir: Path) -> List[Path]:
+    """分割ファイルの一覧を取得"""
+    if not splitted_dir.exists():
+        return []
+
+    split_files = list(splitted_dir.glob("*_part_*.po"))
+    return sorted(split_files)
+
+
+def check_done_files(done_dir: Path) -> List[Path]:
+    """完了ファイルの一覧を取得"""
+    if not done_dir.exists():
+        return []
+
+    done_files = list(done_dir.glob("*_part_*.po"))
+    return sorted(done_files)
+
+
+def get_untranslated_count(po_file: Path) -> int:
+    """POファイルの未翻訳エントリ数を取得"""
+    try:
+        with open(po_file, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        # 未翻訳エントリを検出
+        all_entries = re.findall(r'(#: [^\n]+\nmsgid[^m]+?msgstr "")', content, re.DOTALL)
+        untranslated_text = []
+
+        for entry in all_entries:
+            # コードサンプルを除外
+            if not any(marker in entry for marker in ['def ', 'class ', 'assert ', 'print(', 'return ', '>>>', 'import ', 'raise ']):
+                untranslated_text.append(entry)
+
+        return len(untranslated_text)
+    except Exception:
+        return -1
+
+
+def analyze_progress(po_file: str) -> Dict:
+    """翻訳進捗を分析"""
+    base_name, splitted_dir, done_dir = get_split_info(po_file)
+
+    # 元ファイルの存在確認
+    original_exists = os.path.exists(po_file)
+
+    # 分割ファイルの確認
+    split_files = check_split_files(splitted_dir)
+    done_files = check_done_files(done_dir)
+
+    # 各ファイルの未翻訳エントリ数を取得
+    split_status = {}
+    for split_file in split_files:
+        untranslated = get_untranslated_count(split_file)
+        split_status[split_file.name] = {
+            'path': split_file,
+            'untranslated': untranslated,
+            'completed': untranslated == 0
+        }
+
+    done_status = {}
+    for done_file in done_files:
+        untranslated = get_untranslated_count(done_file)
+        done_status[done_file.name] = {
+            'path': done_file,
+            'untranslated': untranslated,
+            'completed': untranslated == 0
+        }
+
+    return {
+        'base_name': base_name,
+        'original_file': po_file,
+        'original_exists': original_exists,
+        'splitted_dir': splitted_dir,
+        'done_dir': done_dir,
+        'split_files': split_files,
+        'done_files': done_files,
+        'split_status': split_status,
+        'done_status': done_status,
+        'has_work_in_progress': len(split_files) > 0 or len(done_files) > 0
+    }
+
+
+def print_progress_report(progress: Dict):
+    """進捗レポートを表示"""
+    print(f"=== 翻訳進捗レポート: {progress['base_name']} ===")
+    print(f"元ファイル: {progress['original_file']} ({'存在' if progress['original_exists'] else '不存在'})")
+    print(f"分割作業ディレクトリ: {progress['splitted_dir']}")
+    print(f"完了ディレクトリ: {progress['done_dir']}")
+
+    if not progress['has_work_in_progress']:
+        print("✅ 作業中のファイルはありません")
+        return
+
+    print("\n--- 作業中ファイル ---")
+    for filename, status in progress['split_status'].items():
+        status_mark = "✅" if status['completed'] else "🔄"
+        print(f"{status_mark} {filename}: 未翻訳{status['untranslated']}件")
+
+    print("\n--- 完了ファイル ---")
+    for filename, status in progress['done_status'].items():
+        status_mark = "✅" if status['completed'] else "⚠️"
+        print(f"{status_mark} {filename}: 未翻訳{status['untranslated']}件")
+
+    # 次の作業提案
+    incomplete_splits = [f for f, s in progress['split_status'].items() if not s['completed']]
+    if incomplete_splits:
+        print(f"\n📋 次の作業: {incomplete_splits[0]} の翻訳を継続")
+    else:
+        print("\n📋 次の作業: 完了ファイルの結合")
+
+
+def get_next_action(progress: Dict) -> str:
+    """次に実行すべきアクションを取得"""
+    if not progress['has_work_in_progress']:
+        return "start_fresh"
+
+    # 作業中ファイルに未完了があるか確認
+    incomplete_splits = [f for f, s in progress['split_status'].items() if not s['completed']]
+    if incomplete_splits:
+        return "continue_translation"
+
+    # すべて完了している場合は結合
+    if progress['done_files']:
+        return "join_files"
+
+    return "unknown"
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("使用法: python check_translation_progress.py <po_file>")
+        sys.exit(1)
+
+    po_file = sys.argv[1]
+    if not os.path.exists(po_file):
+        print(f"エラー: ファイル '{po_file}' が見つかりません")
+        sys.exit(1)
+
+    progress = analyze_progress(po_file)
+    print_progress_report(progress)
+
+    action = get_next_action(progress)
+    print(f"\n🎯 推奨アクション: {action}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/check_untranslated.py b/check_untranslated.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+import re
+import sys
+
+def check_untranslated(filename):
+    try:
+        with open(filename, 'r', encoding='utf-8') as f:
+            content = f.read()
+    except FileNotFoundError:
+        print(f'❌ ファイル {filename} が見つかりません')
+        return
+
+    # POエントリを正確に分割
+    entries = re.findall(r'#:.*?\nmsgid.*?\nmsgstr.*?(?=\n#:|$)', content, re.DOTALL)
+
+    untranslated_count = 0
+    code_samples = 0
+    translatable_untranslated = []
+
+    for i, entry in enumerate(entries):
+        # msgidとmsgstrを抽出
+        msgid_match = re.search(r'msgid\s+"(.*?)"', entry, re.DOTALL)
+        msgstr_match = re.search(r'msgstr\s+"(.*?)"', entry, re.DOTALL)
+
+        if not msgid_match or not msgstr_match:
+            continue
+
+        msgid_content = msgid_match.group(1)
+        msgstr_content = msgstr_match.group(1)
+
+        # 空のmsgstrかチェック
+        if msgstr_content.strip() == "":
+            untranslated_count += 1
+
+            # コードサンプルまたは翻訳不要な内容かチェック
+            is_code_or_no_translate = (
+                # 空のmsgid
+                msgid_content.strip() == "" or
+                # プログラムコード
+                'def ' in msgid_content or
+                'class ' in msgid_content or
+                'assert ' in msgid_content or
+                'print(' in msgid_content or
+                'return ' in msgid_content or
+                'import ' in msgid_content or
+                '>>>' in msgid_content or
+                'raise ' in msgid_content or
+                'if __name__' in msgid_content or
+                'try:' in msgid_content or
+                'except:' in msgid_content or
+                # パス、URL、コマンド例
+                ('.py' in msgid_content and '/' in msgid_content) or
+                ('.txt' in msgid_content and '/' in msgid_content) or
+                'http://' in msgid_content or
+                'https://' in msgid_content or
+                # バージョン番号のみ
+                re.match(r'^\d+\.\d+$', msgid_content.strip()) or
+                # PEPやissue番号のみ
+                re.match(r'^:pep:`\d+`$', msgid_content.strip()) or
+                re.match(r'^:gh:`\d+`$', msgid_content.strip()) or
+                # 単一の技術用語（既に翻訳されているもの）
+                msgid_content.strip() in ['Parameters', 'Examples:', 'Constant'] or
+                # 改行のみの内容
+                msgid_content.strip() == "\\n" or
+                # 空白のみの内容
+                re.match(r'^\\s*$', msgid_content)
+            )
+
+            if is_code_or_no_translate:
+                code_samples += 1
+            else:
+                # 翻訳可能だが未翻訳のエントリ
+                translatable_untranslated.append({
+                    'entry_num': i + 1,
+                    'msgid': msgid_content[:100] + '...' if len(msgid_content) > 100 else msgid_content
+                })
+
+    print(f'📊 翻訳状況レポート: {filename}')
+    print(f'=' * 80)
+    print(f'総未翻訳エントリ数: {untranslated_count}')
+    print(f'  - コードサンプル/翻訳不要: {code_samples}')
+    print(f'  - 翻訳が必要: {len(translatable_untranslated)}')
+
+    total_entries = len(entries)
+    translated_entries = total_entries - untranslated_count
+    translation_rate = (translated_entries / total_entries * 100) if total_entries > 0 else 0
+
+    print(f'翻訳完了率: {translation_rate:.1f}% ({translated_entries}/{total_entries})')
+
+    if len(translatable_untranslated) == 0:
+        print('✅ すべての翻訳対象エントリが翻訳済みです！')
+    else:
+        print(f'⚠️  {len(translatable_untranslated)}個の翻訳対象エントリが未翻訳です')
+        print()
+        print('未翻訳エントリ一覧（最初の20個）:')
+        print('-' * 80)
+        for entry in translatable_untranslated[:20]:
+            print(f'{entry["entry_num"]:4d}: {entry["msgid"]}')
+
+        if len(translatable_untranslated) > 20:
+            print(f'... 他 {len(translatable_untranslated) - 20}個')
+
+    print(f'=' * 80)
+
+if __name__ == "__main__":
+    filename = sys.argv[1] if len(sys.argv) > 1 else 'library/typing.po'
+    check_untranslated(filename)
diff --git a/final_check_untranslated.py b/final_check_untranslated.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+import re
+import sys
+
+def final_check_untranslated(filename):
+    """
+    Final comprehensive check for untranslated entries
+    """
+    with open(filename, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Split into entries based on #: comments
+    entries = re.split(r'\n(?=#:)', content)
+
+    untranslated_entries = []
+
+    for entry in entries:
+        if not entry.strip() or '#: ../../library/typing.rst:' not in entry:
+            continue
+
+        # Look for patterns: msgid ... msgstr ""
+        # where msgstr is truly empty (no following quoted content)
+        if 'msgid' in entry and 'msgstr' in entry:
+            # Find the msgstr line and check if it's followed by actual translation
+            lines = entry.split('\n')
+            msgstr_found = False
+            is_empty = True
+            msgid_content = ""
+
+            for i, line in enumerate(lines):
+                if line.startswith('msgid'):
+                    # Collect msgid content
+                    msgid_content = line
+                    j = i + 1
+                    while j < len(lines) and (lines[j].startswith('"') or lines[j].strip() == ''):
+                        if lines[j].startswith('"'):
+                            msgid_content += " " + lines[j]
+                        j += 1
+
+                if line.startswith('msgstr'):
+                    msgstr_found = True
+                    # Check if this msgstr line is just msgstr ""
+                    if line.strip() == 'msgstr ""':
+                        # Check if the next lines contain translation
+                        j = i + 1
+                        has_translation = False
+                        while j < len(lines) and not lines[j].startswith('#'):
+                            if lines[j].startswith('"') and lines[j].strip() != '""':
+                                has_translation = True
+                                break
+                            elif lines[j].startswith('msgid'):
+                                break
+                            j += 1
+
+                        if not has_translation:
+                            # This is truly untranslated
+                            # But skip if it's clearly code
+                            msgid_clean = re.sub(r'"', '', msgid_content)
+                            if not any(x in msgid_clean for x in [
+                                'def ', 'class ', '>>>', 'import ', 'return ', 'print(',
+                                'assert ', 'raise ', 'try:', 'except:', 'if __name__'
+                            ]) and msgid_clean.strip():
+                                untranslated_entries.append({
+                                    'msgid': msgid_clean[:100] + '...' if len(msgid_clean) > 100 else msgid_clean,
+                                    'location': re.search(r'#: (.*)', entry).group(1) if re.search(r'#: (.*)', entry) else 'unknown'
+                                })
+                    break
+
+    print(f'🔍 最終確認 - 未翻訳エントリ: {filename}')
+    print(f'=' * 80)
+    print(f'見つかった未翻訳エントリ: {len(untranslated_entries)}')
+
+    if untranslated_entries:
+        print('\n未翻訳エントリ:')
+        for i, entry in enumerate(untranslated_entries, 1):
+            print(f'{i:3d}: {entry["location"]}')
+            print(f'     {entry["msgid"]}')
+            print()
+    else:
+        print('✅ 未翻訳エントリは見つかりませんでした！')
+
+    print(f'=' * 80)
+
+if __name__ == "__main__":
+    filename = sys.argv[1] if len(sys.argv) > 1 else 'library/typing.po'
+    final_check_untranslated(filename)