#!/usr/bin/env python3 """ Single-file Lab3 exporter. What it does: 1. Runs benchmark series by recursion depth. 2. Runs benchmark series by min_size threshold. 3. Runs one logged execution with --log. 4. Draws old benchmark graphs. 5. Draws old timeline/depth histogram graphs. 6. Adds DFS process-tree graph: each process has its children directly below it; only after a whole subtree is drawn, the next sibling is drawn. Generated output: out/benchmark_depth.csv out/benchmark_min_size.csv out/logs/timeline.log out/pics/time_by_depth.png out/pics/speedup_by_depth.png out/pics/process_count_by_depth.png out/pics/time_by_min_size.png out/pics/timeline.png out/pics/depth_hist.png out/pics/process_tree_dfs.png Usage: python3 exporter.py python3 exporter.py --bin ./lab3 --out out python3 exporter.py --fast python3 exporter.py --skip-run """ from __future__ import annotations import argparse import csv import re import shutil import subprocess import sys from collections import defaultdict from pathlib import Path from typing import Dict, List import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt STAT_RE = re.compile( r"STAT:.*size=(\d+).*depth=(\d+).*min_size=(\d+).*processes=(\d+).*valid=(\d+).*time=([\d.]+)" ) EVENT_RE = re.compile( r"(START|END) PID=(\d+) PPID=(\d+) depth=(\d+) size=(\d+) time=([\d.]+)" ) def seed_for(size: int, depth: int, min_size: int, salt: int) -> int: # Для каждой точки используется свой seed, поэтому вход всегда случайный, # но результаты можно воспроизвести. return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31 def run_once( bin_path: str, size: int, depth: int, min_size: int, seed: int, log_path: Path | None = None, ) -> Dict[str, int | float | str]: cmd = [ bin_path, "--size", str(size), "--depth", str(depth), "--min-size", str(min_size), "--seed", str(seed), ] if log_path is not None: cmd.append("--log") p = subprocess.run( cmd, stdout=subprocess.PIPE if log_path is not None else subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, ) if log_path is not None: log_path.parent.mkdir(parents=True, exist_ok=True) log_path.write_text(p.stdout, encoding="utf-8") if p.returncode != 0: raise RuntimeError( "Command failed:\n" + " ".join(cmd) + "\n\nSTDOUT:\n" + (p.stdout or "") + "\nSTDERR:\n" + (p.stderr or "") ) m = STAT_RE.search(p.stderr) if not m: raise RuntimeError(f"STAT not found:\n{p.stderr}") if m.group(5) != "1": raise RuntimeError(f"sort validation failed:\n{p.stderr}") return { "size": int(m.group(1)), "depth": int(m.group(2)), "min_size": int(m.group(3)), "processes": int(m.group(4)), "valid": int(m.group(5)), "time": float(m.group(6)), "seed": seed, "logfile": str(log_path) if log_path is not None else "", } def save_csv(path: Path, rows: List[dict], header: List[str]) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8", newline="") as f: w = csv.DictWriter(f, fieldnames=header) w.writeheader() for row in rows: w.writerow({key: row.get(key, "") for key in header}) def read_csv(path: Path) -> List[dict]: result = [] with path.open("r", encoding="utf-8", newline="") as f: for row in csv.DictReader(f): converted = dict(row) for key in ["size", "depth", "min_size", "processes", "valid", "seed"]: if key in converted and converted[key] != "": converted[key] = int(converted[key]) if "time" in converted and converted["time"] != "": converted["time"] = float(converted["time"]) result.append(converted) return result def save_plot(path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) plt.tight_layout() plt.savefig(path, dpi=140) plt.close() def clean_output(out_dir: Path) -> None: if out_dir.exists(): shutil.rmtree(out_dir) (out_dir / "pics").mkdir(parents=True, exist_ok=True) (out_dir / "logs").mkdir(parents=True, exist_ok=True) def plot_depth_scaling( bin_path: str, out_dir: Path, fast: bool = False, skip_run: bool = False, ) -> List[dict]: pics = out_dir / "pics" csv_path = out_dir / "benchmark_depth.csv" if skip_run: rows = read_csv(csv_path) else: if fast: depths = list(range(10)) sizes = [20_000, 50_000] min_size = 2048 else: # ВАЖНО: на графиках по глубине ровно 30 точек по оси X: 0..29. # # depth идет до 29, но реально число процессов не взорвется бесконечно, # потому что дальнейшее деление останавливает min_size. depths = list(range(30)) # Несколько размеров дают несколько линий на одном графике. sizes = [50_000, 100_000, 200_000] min_size = 4096 rows = [] for size in sizes: for d in depths: seed = seed_for(size, d, min_size, salt=1) r = run_once(bin_path, size, d, min_size, seed) row = {**r, "seed": seed} rows.append(row) print( f"depth_scaling: " f"size={size} depth={d} min_size={min_size} " f"seed={seed} processes={r['processes']} time={r['time']:.6f}", flush=True, ) save_csv( csv_path, rows, [ "size", "depth", "min_size", "seed", "processes", "valid", "time", "logfile", ], ) plt.figure(figsize=(12, 6)) for size in sorted(set(r["size"] for r in rows)): cur = [r for r in rows if r["size"] == size] cur.sort(key=lambda r: r["depth"]) plt.plot( [r["depth"] for r in cur], [r["time"] for r in cur], marker="o", label=f"N={size}", ) plt.xlabel("Глубина порождения процессов") plt.ylabel("Время, сек") plt.title("Зависимость времени сортировки от глубины fork-рекурсии") plt.grid(True) plt.legend() save_plot(pics / "time_by_depth.png") plt.figure(figsize=(12, 6)) for size in sorted(set(r["size"] for r in rows)): cur = [r for r in rows if r["size"] == size] cur.sort(key=lambda r: r["depth"]) base_time = cur[0]["time"] speedup = [base_time / r["time"] if r["time"] > 0 else 0 for r in cur] plt.plot( [r["depth"] for r in cur], speedup, marker="s", label=f"N={size}", ) plt.xlabel("Глубина порождения процессов") plt.ylabel("Ускорение относительно depth=0") plt.title("Ускорение при использовании процессов") plt.grid(True) plt.legend() save_plot(pics / "speedup_by_depth.png") plt.figure(figsize=(12, 6)) for size in sorted(set(r["size"] for r in rows)): cur = [r for r in rows if r["size"] == size] cur.sort(key=lambda r: r["depth"]) plt.plot( [r["depth"] for r in cur], [r["processes"] for r in cur], marker="^", label=f"N={size}", ) plt.xlabel("Глубина порождения процессов") plt.ylabel("Количество процессов") plt.title("Размер дерева процессов") plt.grid(True) plt.legend() save_plot(pics / "process_count_by_depth.png") return rows def plot_threshold_effect( bin_path: str, out_dir: Path, fast: bool = False, skip_run: bool = False, ) -> List[dict]: pics = out_dir / "pics" csv_path = out_dir / "benchmark_min_size.csv" if skip_run: rows = read_csv(csv_path) else: size = 200_000 depth = 5 if fast: # Быстрый режим: меньше точек. min_sizes = [round(2 ** (7 + i * (8 / 14))) for i in range(15)] else: # ВАЖНО: на графике по min_size ровно 30 точек по оси X. # # 30 значений порога от 128 до 131072, примерно равномерно по log2-шкале. min_sizes = [round(2 ** (7 + i * (10 / 29))) for i in range(30)] rows = [] for m in min_sizes: seed = seed_for(size, depth, m, salt=2) r = run_once(bin_path, size, depth, m, seed) row = {**r, "seed": seed} rows.append(row) print( f"min_size_effect: " f"size={size} depth={depth} min_size={m} " f"seed={seed} processes={r['processes']} time={r['time']:.6f}", flush=True, ) save_csv( csv_path, rows, [ "size", "depth", "min_size", "seed", "processes", "valid", "time", "logfile", ], ) rows.sort(key=lambda r: r["min_size"]) plt.figure(figsize=(12, 6)) plt.plot( [r["min_size"] for r in rows], [r["time"] for r in rows], marker="o", ) plt.xscale("log", base=2) plt.xlabel("Минимальный размер части для fork") plt.ylabel("Время, сек") plt.title("Влияние порога min_size на производительность") plt.grid(True) save_plot(pics / "time_by_min_size.png") return rows def parse_events(log_path: Path) -> List[dict]: events = defaultdict(dict) if not log_path.exists(): return [] with log_path.open("r", encoding="utf-8", errors="ignore") as f: for line in f: m = EVENT_RE.search(line) if not m: continue typ, pid, ppid, depth, size, t = m.groups() key = (int(pid), int(depth), int(size)) events[key][typ] = float(t) events[key]["pid"] = int(pid) events[key]["ppid"] = int(ppid) events[key]["depth"] = int(depth) events[key]["size"] = int(size) rows = [] for v in events.values(): if "START" in v and "END" in v: rows.append(v) rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"])) return rows def plot_timeline(log_path: Path, out_dir: Path) -> None: pics = out_dir / "pics" rows = parse_events(log_path) # Exclude root process: depth=0. rows = [r for r in rows if r["depth"] != 0] if not rows: print("В логе нет полных START/END событий без корневого процесса depth=0") return base = log_path.stem t0 = min(r["START"] for r in rows) # Временная диаграмма: видно параллельность и время жизни дочерних процессов. plt.figure(figsize=(12, max(5, len(rows) * 0.35))) for y, r in enumerate(rows): start = r["START"] - t0 end = r["END"] - t0 plt.plot([start, end], [y, y], linewidth=5) plt.text( end, y, f" pid={r['pid']} d={r['depth']} n={r['size']}", va="center", fontsize=8, ) plt.xlabel("Время от старта первого дочернего процесса, сек") plt.ylabel("Дочерние процессы / задачи сортировки") plt.title(f"Временная диаграмма дочерних процессов: {base}") plt.grid(True) save_plot(pics / "timeline.png") # Гистограмма глубин без depth=0. by_depth = defaultdict(int) for r in rows: by_depth[r["depth"]] += 1 plt.figure(figsize=(8, 5)) xs = sorted(by_depth) plt.bar(xs, [by_depth[x] for x in xs]) plt.xlabel("Глубина рекурсии") plt.ylabel("Количество дочерних процессов") plt.title(f"Распределение дочерних процессов по глубине: {base}") plt.grid(True, axis="y") save_plot(pics / "depth_hist.png") def plot_process_tree_dfs( log_path: Path, out_dir: Path, hide_root: bool = False ) -> None: """ Draw process tree in DFS order. Root process depth=0 is always excluded. If process 2 has children 3 and 4, and process 3 has children 5 and 6, the order is: 2 3 5 6 4 Only after the whole subtree of process 2 is drawn, the next sibling is drawn. """ pics = out_dir / "pics" rows = parse_events(log_path) if not rows: return # Always exclude root process: depth=0. rows = [r for r in rows if r["depth"] != 0] if not rows: return by_pid = {r["pid"]: r for r in rows} children = defaultdict(list) for r in rows: parent_pid = r["ppid"] if parent_pid in by_pid: children[parent_pid].append(r["pid"]) for parent_pid in children: children[parent_pid].sort(key=lambda pid: by_pid[pid]["START"]) # These are direct children of hidden root or processes whose parent is not present. roots = [r["pid"] for r in rows if r["ppid"] not in by_pid] roots.sort(key=lambda pid: by_pid[pid]["START"]) ordered = [] def dfs(pid: int, level: int) -> None: ordered.append((pid, level)) for child_pid in children.get(pid, []): dfs(child_pid, level + 1) for root_pid in roots: dfs(root_pid, 0) if not ordered: return y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)} t0 = min(by_pid[pid]["START"] for pid, _ in ordered) plt.figure(figsize=(14, max(5, len(ordered) * 0.45))) for y, (pid, level) in enumerate(ordered): r = by_pid[pid] start = r["START"] - t0 end = r["END"] - t0 plt.plot([start, end], [y, y], linewidth=5) indent = " " * level label = f"{indent}pid={pid} d={r['depth']} n={r['size']}" plt.text( end, y, " " + label, va="center", fontsize=8, ) parent_pid = r["ppid"] if parent_pid in y_by_pid: parent_y = y_by_pid[parent_pid] parent_start = by_pid[parent_pid]["START"] - t0 plt.plot( [start, start], [parent_y, y], linewidth=1, linestyle="--", ) plt.plot( [parent_start, start], [parent_y, parent_y], linewidth=1, linestyle="--", ) plt.gca().invert_yaxis() plt.xlabel("Время от старта первого дочернего процесса, сек") plt.ylabel("Дерево дочерних процессов в DFS-порядке") plt.title("Дерево процессов без корня: потомки расположены сразу под родителем") plt.grid(True) save_plot(pics / "process_tree_dfs.png") def run_timeline_case( bin_path: str, out_dir: Path, size: int, depth: int, min_size: int, ) -> Path: log_path = out_dir / "logs" / "timeline.log" seed = seed_for(size, depth, min_size, salt=5) r = run_once( bin_path=bin_path, size=size, depth=depth, min_size=min_size, seed=seed, log_path=log_path, ) print( f"timeline: " f"size={size} depth={depth} min_size={min_size} " f"seed={seed} processes={r['processes']} time={r['time']:.6f} " f"log={log_path}", flush=True, ) return log_path def generate_report(out_dir: Path) -> None: report = out_dir / "REPORT.md" text = """# Lab3 benchmark report Скрипт `exporter.py` запускает серию тестов и строит графики. ## CSV - `benchmark_depth.csv` — серия по глубине рекурсии. - `benchmark_min_size.csv` — серия по порогу локальной сортировки. ## Графики - `pics/time_by_depth.png` — зависимость времени сортировки от глубины. - `pics/speedup_by_depth.png` — ускорение относительно `depth=0`. - `pics/process_count_by_depth.png` — количество процессов. - `pics/time_by_min_size.png` — влияние `min_size`. - `pics/timeline.png` — временная диаграмма процессов. - `pics/depth_hist.png` — распределение процессов по глубине. - `pics/process_tree_dfs.png` — дерево процессов в DFS-порядке: потомки идут сразу под родителем. ## Запуск ```bash python3 exporter.py --bin ./lab3 --out out Быстрый режим: python3 exporter.py --bin ./lab3 --out out --fast """ report.write_text(text, encoding="utf-8") def main() -> int: parser = argparse.ArgumentParser(description="Single-file Lab3 benchmark exporter") parser.add_argument("--bin", default="./lab3", help="Путь к бинарнику lab3") parser.add_argument("--out", default="out", help="Каталог вывода") parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки") parser.add_argument( "--skip-run", action="store_true", help="Не запускать benchmark, а построить графики из существующих CSV", ) parser.add_argument( "--hide-root", action="store_true", help="Скрыть корневой процесс на DFS-графике дерева", ) parser.add_argument("--timeline-size", type=int, default=8192) parser.add_argument("--timeline-depth", type=int, default=3) parser.add_argument("--timeline-min-size", type=int, default=64) args = parser.parse_args() bin_path = args.bin out_dir = Path(args.out) if not args.skip_run: clean_output(out_dir) else: (out_dir / "pics").mkdir(parents=True, exist_ok=True) (out_dir / "logs").mkdir(parents=True, exist_ok=True) plot_depth_scaling( bin_path=bin_path, out_dir=out_dir, fast=args.fast, skip_run=args.skip_run, ) plot_threshold_effect( bin_path=bin_path, out_dir=out_dir, fast=args.fast, skip_run=args.skip_run, ) log_path = out_dir / "logs" / "timeline.log" if not args.skip_run: log_path = run_timeline_case( bin_path=bin_path, out_dir=out_dir, size=args.timeline_size, depth=args.timeline_depth, min_size=args.timeline_min_size, ) if log_path.exists(): plot_timeline(log_path, out_dir) plot_process_tree_dfs(log_path, out_dir, hide_root=args.hide_root) else: print(f"Timeline log not found: {log_path}", file=sys.stderr) generate_report(out_dir) print(f"Графики сохранены в {out_dir / 'pics'}.") print(f"CSV сохранены в {out_dir}.") print(f"Лог сохранен в {out_dir / 'logs' / 'timeline.log'}.") print(f"Отчет сохранен в {out_dir / 'REPORT.md'}.") return 0 if __name__ == "__main__": raise SystemExit(main())