Files
Zivro/Report/append_sources_to_report.py
2026-03-19 14:54:09 +03:00

223 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""
Append source files to a markdown report and save as a new file.
Example:
python3 Report/append_sources_to_report.py \
--input Report/zivro-open-project-report.md \
--output Report/zivro-open-project-report-with-code.md \
--base .
"""
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Iterable
DEFAULT_EXTENSIONS = {
".zig",
".zon",
".json",
".toml",
".yaml",
".yml",
".md",
".txt",
".py",
".puml",
}
DEFAULT_EXCLUDE_DIRS = {
".git",
"zig-out",
"zig-cache",
".zig-cache",
".cursor",
"mcps",
}
DEFAULT_EXCLUDE_FILES = {
".DS_Store",
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Adds source code listings to the end of a markdown report and writes "
"the result to a new markdown file."
)
)
parser.add_argument("--input", required=True, help="Path to source markdown report")
parser.add_argument("--output", required=True, help="Path to output markdown report")
parser.add_argument(
"--base",
default=".",
help="Project root to scan for source files (default: current directory)",
)
parser.add_argument(
"--include",
nargs="*",
default=["src", "build.zig", "build.zig.zon"],
help=(
"Files/directories (relative to --base) to include in appendix scan. "
"Default: src build.zig build.zig.zon"
),
)
parser.add_argument(
"--extensions",
nargs="*",
default=sorted(DEFAULT_EXTENSIONS),
help=(
"Allowed file extensions (e.g. .zig .md). "
"If empty, all file extensions are allowed."
),
)
parser.add_argument(
"--exclude-dir",
nargs="*",
default=sorted(DEFAULT_EXCLUDE_DIRS),
help="Directory names to exclude recursively",
)
parser.add_argument(
"--max-bytes",
type=int,
default=1_000_000,
help="Skip files larger than this size in bytes (default: 1_000_000)",
)
return parser.parse_args()
def is_text_file(path: Path) -> bool:
try:
data = path.read_bytes()
except OSError:
return False
if b"\x00" in data:
return False
return True
def iter_files(
base: Path,
include_paths: Iterable[str],
extensions: set[str],
exclude_dirs: set[str],
max_bytes: int,
) -> list[Path]:
files: list[Path] = []
def add_file(path: Path) -> None:
if not path.is_file():
return
if path.name in DEFAULT_EXCLUDE_FILES:
return
if extensions and path.suffix.lower() not in extensions:
return
try:
size = path.stat().st_size
except OSError:
return
if size > max_bytes:
return
if not is_text_file(path):
return
files.append(path)
for rel in include_paths:
item = (base / rel).resolve()
if not item.exists():
continue
if item.is_file():
add_file(item)
continue
for path in item.rglob("*"):
if any(part in exclude_dirs for part in path.parts):
continue
add_file(path)
return sorted(set(files), key=lambda p: p.relative_to(base).as_posix())
def language_for(path: Path) -> str:
ext = path.suffix.lower()
if ext == ".zig":
return "zig"
if ext == ".py":
return "python"
if ext in {".yaml", ".yml"}:
return "yaml"
if ext == ".json":
return "json"
if ext == ".toml":
return "toml"
if ext == ".md":
return "markdown"
return ""
def main() -> int:
args = parse_args()
input_path = Path(args.input).resolve()
output_path = Path(args.output).resolve()
base_path = Path(args.base).resolve()
if not input_path.exists():
raise FileNotFoundError(f"Input report not found: {input_path}")
if input_path == output_path:
raise ValueError("--input and --output must be different files")
report_text = input_path.read_text(encoding="utf-8")
extensions = {e.lower() if e.startswith(".") else f".{e.lower()}" for e in args.extensions}
exclude_dirs = set(args.exclude_dir)
files = iter_files(
base=base_path,
include_paths=args.include,
extensions=extensions,
exclude_dirs=exclude_dirs,
max_bytes=args.max_bytes,
)
appendix_lines: list[str] = []
appendix_lines.append("")
appendix_lines.append("---")
appendix_lines.append("")
appendix_lines.append("## Приложение A. Исходные тексты")
appendix_lines.append("")
appendix_lines.append(
f"Сформировано автоматически скриптом `Report/append_sources_to_report.py` "
f"(файлов: {len(files)})."
)
appendix_lines.append("")
for idx, path in enumerate(files, start=1):
rel = path.relative_to(base_path).as_posix()
lang = language_for(path)
code = path.read_text(encoding="utf-8", errors="replace")
appendix_lines.append(f"### A.{idx}. `{rel}`")
appendix_lines.append("")
appendix_lines.append(f"```{lang}")
appendix_lines.append(code.rstrip("\n"))
appendix_lines.append("```")
appendix_lines.append("")
output_text = report_text.rstrip() + "\n" + "\n".join(appendix_lines)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(output_text, encoding="utf-8")
print(f"Created: {output_path}")
print(f"Input report preserved: {input_path}")
print(f"Attached files: {len(files)}")
return 0
if __name__ == "__main__":
raise SystemExit(main())