#!/usr/bin/env python3 """ Lab4 exporter: runs socket-sort benchmarks and creates report graphs. Root parent process is excluded from process-tree/timeline visualizations: - process graph shows child processes only: processes - 1 - timeline ignores depth=0 rows - DFS tree ignores depth=0 root and places each process's children directly below it Generated output: out_lab4/csv/lab4_all_results.csv out_lab4/pics/01_time_by_depth.png out_lab4/pics/02_speedup_by_depth.png out_lab4/pics/03_child_processes_by_depth.png out_lab4/pics/04_time_by_min_size.png out_lab4/pics/05_timeline_child_processes.png out_lab4/pics/06_process_tree_dfs.png out_lab4/logs/timeline.log out_lab4/REPORT.md Usage: python3 exporter.py --bin ./lab4 --out out_lab4 python3 exporter.py --bin ./lab4 --out out_lab4 --fast python3 exporter.py --bin ./lab4 --out out_lab4 --skip-run """ from __future__ import annotations import argparse import csv import re import shutil import subprocess import sys from collections import defaultdict from dataclasses import asdict, dataclass from pathlib import Path from typing import Iterable, List import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt STAT_RE = re.compile( r"STAT:\s*.*?size=(\d+)\s+.*?depth=(\d+)\s+.*?min_size=(\d+)\s+" r".*?processes=(\d+)\s+.*?counter=(\d+)\s+.*?valid=(\d+)\s+" r".*?time=([\d.]+)" ) EVENT_RE = re.compile( r"(START|END)\s+PID=(\d+)\s+PPID=(\d+)\s+depth=(\d+)\s+size=(\d+)" r"(?:\s+port=(\d+))?\s+time=([\d.]+)" ) @dataclass class RunResult: scenario: str size: int depth: int min_size: int seed: int port_base: int processes: int counter: int valid: int time: float logfile: str = "" @property def child_processes(self) -> int: return max(0, self.processes - 1) @property def speed(self) -> float: return self.size / self.time if self.time > 0 else 0.0 def seed_for(size: int, depth: int, min_size: int, salt: int) -> int: return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31 def run_lab4( bin_path: str, size: int, depth: int, min_size: int, seed: int, port_base: int, scenario: str, log_path: Path | None = None, ) -> RunResult: cmd = [ bin_path, "--size", str(size), "--depth", str(depth), "--min-size", str(min_size), "--seed", str(seed), "--port-base", str(port_base), ] if log_path is not None: cmd.append("--log") p = subprocess.run(cmd, text=True, capture_output=True) if log_path is not None: log_path.parent.mkdir(parents=True, exist_ok=True) log_path.write_text(p.stdout, encoding="utf-8") if p.returncode != 0: raise RuntimeError( "Команда завершилась с ошибкой:\n" + " ".join(cmd) + "\n\nSTDOUT:\n" + p.stdout + "\nSTDERR:\n" + p.stderr ) m = STAT_RE.search(p.stderr) if not m: raise RuntimeError("Не найден STAT в stderr:\n" + p.stderr) result = RunResult( scenario=scenario, size=int(m.group(1)), depth=int(m.group(2)), min_size=int(m.group(3)), seed=seed, port_base=port_base, processes=int(m.group(4)), counter=int(m.group(5)), valid=int(m.group(6)), time=float(m.group(7)), logfile=str(log_path) if log_path is not None else "", ) if result.valid != 1: raise RuntimeError("Сортировка не прошла проверку:\n" + p.stderr) return result def group_by(rows: Iterable[RunResult], key_fn): result = defaultdict(list) for row in rows: result[key_fn(row)].append(row) return result def write_csv(path: Path, rows: List[RunResult]) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8", newline="") as f: fieldnames = ( list(asdict(rows[0]).keys()) if rows else list(RunResult.__dataclass_fields__.keys()) ) writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(asdict(row)) def read_csv(path: Path) -> List[RunResult]: rows: List[RunResult] = [] with path.open("r", encoding="utf-8", newline="") as f: for raw in csv.DictReader(f): rows.append( RunResult( scenario=raw["scenario"], size=int(raw["size"]), depth=int(raw["depth"]), min_size=int(raw["min_size"]), seed=int(raw["seed"]), port_base=int(raw["port_base"]), processes=int(raw["processes"]), counter=int(raw["counter"]), valid=int(raw["valid"]), time=float(raw["time"]), logfile=raw.get("logfile", ""), ) ) return rows def save_plot(path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) plt.tight_layout() plt.savefig(path, dpi=140) plt.close() def clean_graphs(pics: Path) -> None: pics.mkdir(parents=True, exist_ok=True) for old in pics.glob("*.png"): old.unlink() def plot_depth_graphs(rows: List[RunResult], pics: Path) -> None: cur = [r for r in rows if r.scenario == "depth_scaling"] if not cur: return by_size = group_by(cur, lambda r: r.size) # 1. Main performance graph. plt.figure(figsize=(12, 6)) for size, rs in sorted(by_size.items()): rs = sorted(rs, key=lambda r: r.depth) plt.plot( [r.depth for r in rs], [r.time for r in rs], marker="o", label=f"N={size}", ) plt.xlabel("Глубина рекурсии") plt.ylabel("Время, сек") plt.title("1. Время сортировки от глубины рекурсии") plt.grid(True) plt.legend() save_plot(pics / "01_time_by_depth.png") # 2. Speedup against sequential depth=0. plt.figure(figsize=(12, 6)) for size, rs in sorted(by_size.items()): rs = sorted(rs, key=lambda r: r.depth) base = next((r.time for r in rs if r.depth == 0), rs[0].time) speedup = [base / r.time if r.time > 0 else 0 for r in rs] plt.plot( [r.depth for r in rs], speedup, marker="s", label=f"N={size}", ) plt.xlabel("Глубина рекурсии") plt.ylabel("Ускорение относительно depth=0") plt.title("2. Ускорение от использования процессов и сокетов") plt.grid(True) plt.legend() save_plot(pics / "02_speedup_by_depth.png") # 3. Child process tree size, root parent excluded. plt.figure(figsize=(12, 6)) for size, rs in sorted(by_size.items()): rs = sorted(rs, key=lambda r: r.depth) plt.plot( [r.depth for r in rs], [r.child_processes for r in rs], marker="^", label=f"N={size}", ) plt.xlabel("Глубина рекурсии") plt.ylabel("Количество дочерних процессов") plt.title("3. Размер дерева дочерних процессов без корневого родителя") plt.grid(True) plt.legend() save_plot(pics / "03_child_processes_by_depth.png") def plot_min_size_graph(rows: List[RunResult], pics: Path) -> None: cur = sorted( [r for r in rows if r.scenario == "min_size_effect"], key=lambda r: r.min_size, ) if not cur: return # 4. Threshold effect. plt.figure(figsize=(12, 6)) plt.plot( [r.min_size for r in cur], [r.time for r in cur], marker="o", ) plt.xscale("log", base=2) plt.xlabel("Минимальный размер части для fork/socket") plt.ylabel("Время, сек") plt.title("4. Влияние min_size на время сортировки") plt.grid(True) save_plot(pics / "04_time_by_min_size.png") def parse_events(log_path: Path) -> List[dict]: events = defaultdict(dict) if not log_path.exists(): return [] for line in log_path.read_text(encoding="utf-8", errors="ignore").splitlines(): m = EVENT_RE.search(line) if not m: continue typ, pid, ppid, depth, size, port, t = m.groups() depth_i = int(depth) # Remove root parent process from timeline/tree. if depth_i == 0: continue key = (int(pid), depth_i, int(size)) events[key][typ] = float(t) events[key]["pid"] = int(pid) events[key]["ppid"] = int(ppid) events[key]["depth"] = depth_i events[key]["size"] = int(size) events[key]["port"] = int(port) if port else -1 rows = [v for v in events.values() if "START" in v and "END" in v] rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"])) return rows def plot_timeline(log_path: Path, pics: Path) -> None: rows = parse_events(log_path) if not rows: return t0 = min(r["START"] for r in rows) # 5. Timeline for child process lifetime and real parallelism. plt.figure(figsize=(13, max(5, len(rows) * 0.38))) for y, r in enumerate(rows): start = r["START"] - t0 end = r["END"] - t0 plt.plot([start, end], [y, y], linewidth=5) label = f"pid={r['pid']} d={r['depth']} n={r['size']}" if r.get("port", -1) >= 0: label += f" p={r['port']}" plt.text(end, y, " " + label, va="center", fontsize=8) plt.xlabel("Время от старта первого дочернего процесса, сек") plt.ylabel("Дочерние процессы / задачи сортировки") plt.title("5. Временная диаграмма дочерних процессов") plt.grid(True) save_plot(pics / "05_timeline_child_processes.png") def plot_process_tree_dfs(log_path: Path, pics: Path) -> None: """ Draws a process tree from timeline.log. Root depth=0 parent is excluded. Children are placed directly below their parent in DFS order: depth=1 process A depth=2 child of A depth=3 child of that child depth=2 next child of A depth=1 process B ... """ rows = parse_events(log_path) if not rows: return by_pid = {r["pid"]: r for r in rows} children = defaultdict(list) for r in rows: ppid = r["ppid"] if ppid in by_pid: children[ppid].append(r["pid"]) for pid in children: children[pid].sort(key=lambda child_pid: by_pid[child_pid]["START"]) # These are children of the hidden root or processes whose parent is absent # after filtering depth=0. roots = [r["pid"] for r in rows if r["ppid"] not in by_pid] roots.sort(key=lambda pid: by_pid[pid]["START"]) ordered = [] def dfs(pid: int, level: int) -> None: ordered.append((pid, level)) for child_pid in children.get(pid, []): dfs(child_pid, level + 1) for root_pid in roots: dfs(root_pid, 0) if not ordered: return y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)} t0 = min(by_pid[pid]["START"] for pid, _ in ordered) plt.figure(figsize=(14, max(5, len(ordered) * 0.45))) for y, (pid, level) in enumerate(ordered): r = by_pid[pid] start = r["START"] - t0 end = r["END"] - t0 # Horizontal lifetime line. plt.plot([start, end], [y, y], linewidth=5) # Indentation makes nesting visible in labels. indent = " " * level label = f"{indent}pid={pid} d={r['depth']} n={r['size']}" if r.get("port", -1) >= 0: label += f" p={r['port']}" plt.text(end, y, " " + label, va="center", fontsize=8) # Parent-child connector. parent_pid = r["ppid"] if parent_pid in y_by_pid: parent_y = y_by_pid[parent_pid] parent_start = by_pid[parent_pid]["START"] - t0 plt.plot( [start, start], [parent_y, y], linewidth=1, linestyle="--", ) plt.plot( [parent_start, start], [parent_y, parent_y], linewidth=1, linestyle="--", ) plt.gca().invert_yaxis() plt.xlabel("Время от старта первого дочернего процесса, сек") plt.ylabel("Дерево процессов в DFS-порядке") plt.title("6. Дерево процессов: потомки расположены сразу под родителем") plt.grid(True) save_plot(pics / "06_process_tree_dfs.png") def make_experiments(args) -> List[RunResult]: rows: List[RunResult] = [] logs = Path(args.out) / "logs" logs.mkdir(parents=True, exist_ok=True) if args.fast: depth_sizes = [20_000, 50_000] depths = list(range(0, 10)) min_size_depth = 2048 min_size_points = [round(2 ** (7 + i * (8 / 14))) for i in range(15)] else: # Two 30-point series are enough for a clean report without producing extra graphs. depth_sizes = [50_000, 100_000, 200_000] depths = list(range(0, 30)) min_size_depth = 4096 min_size_points = [round(2 ** (7 + i * (10 / 29))) for i in range(30)] # 1) Depth scaling: feeds graphs 01, 02, 03. for size in depth_sizes: for depth in depths: seed = seed_for(size, depth, min_size_depth, 1) row = run_lab4( args.bin, size, depth, min_size_depth, seed, args.port_base, "depth_scaling", ) rows.append(row) print( f"depth_scaling: " f"N={size} " f"depth={depth} " f"child_proc={row.child_processes} " f"time={row.time:.6f}", flush=True, ) # 2) min_size effect: feeds graph 04. for min_size in min_size_points: seed = seed_for( args.min_size_test_size, args.min_size_test_depth, min_size, 2, ) row = run_lab4( args.bin, args.min_size_test_size, args.min_size_test_depth, min_size, seed, args.port_base, "min_size_effect", ) rows.append(row) print( f"min_size_effect: " f"min_size={min_size} " f"child_proc={row.child_processes} " f"time={row.time:.6f}", flush=True, ) # 3) One log run: feeds graphs 05 and 06. log_path = logs / "timeline.log" seed = seed_for( args.timeline_size, args.timeline_depth, args.timeline_min_size, 5, ) row = run_lab4( args.bin, args.timeline_size, args.timeline_depth, args.timeline_min_size, seed, args.port_base, "timeline", log_path=log_path, ) rows.append(row) print( f"timeline: {log_path} child_proc={row.child_processes} time={row.time:.6f}", flush=True, ) return rows def generate_report(out_dir: Path, rows: List[RunResult]) -> None: by_scenario = group_by(rows, lambda r: r.scenario) lines = [ "# Lab4 sockets: основные графики\n\n", "Сгенерированы основные графики для отчета.\n\n", "Корневой родительский процесс исключен из графика дерева процессов " "и из временных диаграмм.\n\n", "## Графики\n\n", "1. `pics/01_time_by_depth.png` — время от глубины рекурсии.\n", "2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`.\n", "3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня.\n", "4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки.\n", "5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов.\n", "6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем.\n\n", "## Краткая статистика\n\n", "| Сценарий | Запусков | Лучшее время | Лучший запуск |\n", "|---|---:|---:|---|\n", ] for scenario, rs in sorted(by_scenario.items()): best = min(rs, key=lambda r: r.time) lines.append( f"| `{scenario}` | {len(rs)} | {best.time:.6f} | " f"N={best.size}, " f"depth={best.depth}, " f"min_size={best.min_size}, " f"child_proc={best.child_processes} |\n" ) (out_dir / "REPORT.md").write_text("".join(lines), encoding="utf-8") def build_plots(out_dir: Path, rows: List[RunResult]) -> None: pics = out_dir / "pics" clean_graphs(pics) plot_depth_graphs(rows, pics) plot_min_size_graph(rows, pics) timeline_row = next( (r for r in rows if r.scenario == "timeline" and r.logfile), None, ) if timeline_row is not None: timeline_log = Path(timeline_row.logfile) plot_timeline(timeline_log, pics) plot_process_tree_dfs(timeline_log, pics) generate_report(out_dir, rows) def main() -> int: parser = argparse.ArgumentParser(description="Exporter for Lab4 socket sort graphs") parser.add_argument("--bin", default="./lab4", help="Путь к бинарнику lab4") parser.add_argument("--out", default="out_lab4", help="Каталог вывода") parser.add_argument( "--port-base", type=int, default=20000, help="Базовый порт для lab4" ) parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки") parser.add_argument( "--skip-run", action="store_true", help="Строить графики из существующего CSV" ) parser.add_argument("--min-size-test-size", type=int, default=200_000) parser.add_argument("--min-size-test-depth", type=int, default=5) parser.add_argument("--timeline-size", type=int, default=8192) parser.add_argument("--timeline-depth", type=int, default=3) parser.add_argument("--timeline-min-size", type=int, default=64) args = parser.parse_args() out_dir = Path(args.out) csv_path = out_dir / "csv" / "lab4_all_results.csv" if args.skip_run: if not csv_path.exists(): print(f"CSV не найден: {csv_path}", file=sys.stderr) return 2 rows = read_csv(csv_path) else: # Keep old output from previous exporter versions from confusing the report. if out_dir.exists(): shutil.rmtree(out_dir) rows = make_experiments(args) write_csv(csv_path, rows) build_plots(out_dir, rows) print("\nГотово: построены основные графики, включая DFS-дерево процессов.") print("Корневой родительский процесс исключен из дерева/таймлайна.") print(f"CSV: {csv_path}") print(f"Графики: {out_dir / 'pics'}") print(f"Отчет: {out_dir / 'REPORT.md'}") return 0 if __name__ == "__main__": raise SystemExit(main())