674 lines
20 KiB
Python
674 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Lab4 exporter: runs socket-sort benchmarks and creates report graphs.
|
|
|
|
Root parent process is excluded from process-tree/timeline visualizations:
|
|
- process graph shows child processes only: processes - 1
|
|
- timeline ignores depth=0 rows
|
|
- DFS tree ignores depth=0 root and places each process's children directly below it
|
|
|
|
Generated output:
|
|
out_lab4/csv/lab4_all_results.csv
|
|
out_lab4/pics/01_time_by_depth.png
|
|
out_lab4/pics/02_speedup_by_depth.png
|
|
out_lab4/pics/03_child_processes_by_depth.png
|
|
out_lab4/pics/04_time_by_min_size.png
|
|
out_lab4/pics/05_timeline_child_processes.png
|
|
out_lab4/pics/06_process_tree_dfs.png
|
|
out_lab4/logs/timeline.log
|
|
out_lab4/REPORT.md
|
|
|
|
Usage:
|
|
python3 exporter.py --bin ./lab4 --out out_lab4
|
|
python3 exporter.py --bin ./lab4 --out out_lab4 --fast
|
|
python3 exporter.py --bin ./lab4 --out out_lab4 --skip-run
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import csv
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
from collections import defaultdict
|
|
from dataclasses import asdict, dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable, List
|
|
|
|
import matplotlib
|
|
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
|
|
STAT_RE = re.compile(
|
|
r"STAT:\s*.*?size=(\d+)\s+.*?depth=(\d+)\s+.*?min_size=(\d+)\s+"
|
|
r".*?processes=(\d+)\s+.*?counter=(\d+)\s+.*?valid=(\d+)\s+"
|
|
r".*?time=([\d.]+)"
|
|
)
|
|
|
|
EVENT_RE = re.compile(
|
|
r"(START|END)\s+PID=(\d+)\s+PPID=(\d+)\s+depth=(\d+)\s+size=(\d+)"
|
|
r"(?:\s+port=(\d+))?\s+time=([\d.]+)"
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class RunResult:
|
|
scenario: str
|
|
size: int
|
|
depth: int
|
|
min_size: int
|
|
seed: int
|
|
port_base: int
|
|
processes: int
|
|
counter: int
|
|
valid: int
|
|
time: float
|
|
logfile: str = ""
|
|
|
|
@property
|
|
def child_processes(self) -> int:
|
|
return max(0, self.processes - 1)
|
|
|
|
@property
|
|
def speed(self) -> float:
|
|
return self.size / self.time if self.time > 0 else 0.0
|
|
|
|
|
|
def seed_for(size: int, depth: int, min_size: int, salt: int) -> int:
|
|
return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31
|
|
|
|
|
|
def run_lab4(
|
|
bin_path: str,
|
|
size: int,
|
|
depth: int,
|
|
min_size: int,
|
|
seed: int,
|
|
port_base: int,
|
|
scenario: str,
|
|
log_path: Path | None = None,
|
|
) -> RunResult:
|
|
cmd = [
|
|
bin_path,
|
|
"--size",
|
|
str(size),
|
|
"--depth",
|
|
str(depth),
|
|
"--min-size",
|
|
str(min_size),
|
|
"--seed",
|
|
str(seed),
|
|
"--port-base",
|
|
str(port_base),
|
|
]
|
|
|
|
if log_path is not None:
|
|
cmd.append("--log")
|
|
|
|
p = subprocess.run(cmd, text=True, capture_output=True)
|
|
|
|
if log_path is not None:
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text(p.stdout, encoding="utf-8")
|
|
|
|
if p.returncode != 0:
|
|
raise RuntimeError(
|
|
"Команда завершилась с ошибкой:\n"
|
|
+ " ".join(cmd)
|
|
+ "\n\nSTDOUT:\n"
|
|
+ p.stdout
|
|
+ "\nSTDERR:\n"
|
|
+ p.stderr
|
|
)
|
|
|
|
m = STAT_RE.search(p.stderr)
|
|
if not m:
|
|
raise RuntimeError("Не найден STAT в stderr:\n" + p.stderr)
|
|
|
|
result = RunResult(
|
|
scenario=scenario,
|
|
size=int(m.group(1)),
|
|
depth=int(m.group(2)),
|
|
min_size=int(m.group(3)),
|
|
seed=seed,
|
|
port_base=port_base,
|
|
processes=int(m.group(4)),
|
|
counter=int(m.group(5)),
|
|
valid=int(m.group(6)),
|
|
time=float(m.group(7)),
|
|
logfile=str(log_path) if log_path is not None else "",
|
|
)
|
|
|
|
if result.valid != 1:
|
|
raise RuntimeError("Сортировка не прошла проверку:\n" + p.stderr)
|
|
|
|
return result
|
|
|
|
|
|
def group_by(rows: Iterable[RunResult], key_fn):
|
|
result = defaultdict(list)
|
|
for row in rows:
|
|
result[key_fn(row)].append(row)
|
|
return result
|
|
|
|
|
|
def write_csv(path: Path, rows: List[RunResult]) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with path.open("w", encoding="utf-8", newline="") as f:
|
|
fieldnames = (
|
|
list(asdict(rows[0]).keys())
|
|
if rows
|
|
else list(RunResult.__dataclass_fields__.keys())
|
|
)
|
|
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
|
|
for row in rows:
|
|
writer.writerow(asdict(row))
|
|
|
|
|
|
def read_csv(path: Path) -> List[RunResult]:
|
|
rows: List[RunResult] = []
|
|
|
|
with path.open("r", encoding="utf-8", newline="") as f:
|
|
for raw in csv.DictReader(f):
|
|
rows.append(
|
|
RunResult(
|
|
scenario=raw["scenario"],
|
|
size=int(raw["size"]),
|
|
depth=int(raw["depth"]),
|
|
min_size=int(raw["min_size"]),
|
|
seed=int(raw["seed"]),
|
|
port_base=int(raw["port_base"]),
|
|
processes=int(raw["processes"]),
|
|
counter=int(raw["counter"]),
|
|
valid=int(raw["valid"]),
|
|
time=float(raw["time"]),
|
|
logfile=raw.get("logfile", ""),
|
|
)
|
|
)
|
|
|
|
return rows
|
|
|
|
|
|
def save_plot(path: Path) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=140)
|
|
plt.close()
|
|
|
|
|
|
def clean_graphs(pics: Path) -> None:
|
|
pics.mkdir(parents=True, exist_ok=True)
|
|
|
|
for old in pics.glob("*.png"):
|
|
old.unlink()
|
|
|
|
|
|
def plot_depth_graphs(rows: List[RunResult], pics: Path) -> None:
|
|
cur = [r for r in rows if r.scenario == "depth_scaling"]
|
|
if not cur:
|
|
return
|
|
|
|
by_size = group_by(cur, lambda r: r.size)
|
|
|
|
# 1. Main performance graph.
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
for size, rs in sorted(by_size.items()):
|
|
rs = sorted(rs, key=lambda r: r.depth)
|
|
plt.plot(
|
|
[r.depth for r in rs],
|
|
[r.time for r in rs],
|
|
marker="o",
|
|
label=f"N={size}",
|
|
)
|
|
|
|
plt.xlabel("Глубина рекурсии")
|
|
plt.ylabel("Время, сек")
|
|
plt.title("1. Время сортировки от глубины рекурсии")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
save_plot(pics / "01_time_by_depth.png")
|
|
|
|
# 2. Speedup against sequential depth=0.
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
for size, rs in sorted(by_size.items()):
|
|
rs = sorted(rs, key=lambda r: r.depth)
|
|
base = next((r.time for r in rs if r.depth == 0), rs[0].time)
|
|
speedup = [base / r.time if r.time > 0 else 0 for r in rs]
|
|
|
|
plt.plot(
|
|
[r.depth for r in rs],
|
|
speedup,
|
|
marker="s",
|
|
label=f"N={size}",
|
|
)
|
|
|
|
plt.xlabel("Глубина рекурсии")
|
|
plt.ylabel("Ускорение относительно depth=0")
|
|
plt.title("2. Ускорение от использования процессов и сокетов")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
save_plot(pics / "02_speedup_by_depth.png")
|
|
|
|
# 3. Child process tree size, root parent excluded.
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
for size, rs in sorted(by_size.items()):
|
|
rs = sorted(rs, key=lambda r: r.depth)
|
|
plt.plot(
|
|
[r.depth for r in rs],
|
|
[r.child_processes for r in rs],
|
|
marker="^",
|
|
label=f"N={size}",
|
|
)
|
|
|
|
plt.xlabel("Глубина рекурсии")
|
|
plt.ylabel("Количество дочерних процессов")
|
|
plt.title("3. Размер дерева дочерних процессов без корневого родителя")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
save_plot(pics / "03_child_processes_by_depth.png")
|
|
|
|
|
|
def plot_min_size_graph(rows: List[RunResult], pics: Path) -> None:
|
|
cur = sorted(
|
|
[r for r in rows if r.scenario == "min_size_effect"],
|
|
key=lambda r: r.min_size,
|
|
)
|
|
|
|
if not cur:
|
|
return
|
|
|
|
# 4. Threshold effect.
|
|
plt.figure(figsize=(12, 6))
|
|
plt.plot(
|
|
[r.min_size for r in cur],
|
|
[r.time for r in cur],
|
|
marker="o",
|
|
)
|
|
|
|
plt.xscale("log", base=2)
|
|
plt.xlabel("Минимальный размер части для fork/socket")
|
|
plt.ylabel("Время, сек")
|
|
plt.title("4. Влияние min_size на время сортировки")
|
|
plt.grid(True)
|
|
save_plot(pics / "04_time_by_min_size.png")
|
|
|
|
|
|
def parse_events(log_path: Path) -> List[dict]:
|
|
events = defaultdict(dict)
|
|
|
|
if not log_path.exists():
|
|
return []
|
|
|
|
for line in log_path.read_text(encoding="utf-8", errors="ignore").splitlines():
|
|
m = EVENT_RE.search(line)
|
|
if not m:
|
|
continue
|
|
|
|
typ, pid, ppid, depth, size, port, t = m.groups()
|
|
depth_i = int(depth)
|
|
|
|
# Remove root parent process from timeline/tree.
|
|
if depth_i == 0:
|
|
continue
|
|
|
|
key = (int(pid), depth_i, int(size))
|
|
|
|
events[key][typ] = float(t)
|
|
events[key]["pid"] = int(pid)
|
|
events[key]["ppid"] = int(ppid)
|
|
events[key]["depth"] = depth_i
|
|
events[key]["size"] = int(size)
|
|
events[key]["port"] = int(port) if port else -1
|
|
|
|
rows = [v for v in events.values() if "START" in v and "END" in v]
|
|
rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"]))
|
|
|
|
return rows
|
|
|
|
|
|
def plot_timeline(log_path: Path, pics: Path) -> None:
|
|
rows = parse_events(log_path)
|
|
if not rows:
|
|
return
|
|
|
|
t0 = min(r["START"] for r in rows)
|
|
|
|
# 5. Timeline for child process lifetime and real parallelism.
|
|
plt.figure(figsize=(13, max(5, len(rows) * 0.38)))
|
|
|
|
for y, r in enumerate(rows):
|
|
start = r["START"] - t0
|
|
end = r["END"] - t0
|
|
|
|
plt.plot([start, end], [y, y], linewidth=5)
|
|
|
|
label = f"pid={r['pid']} d={r['depth']} n={r['size']}"
|
|
if r.get("port", -1) >= 0:
|
|
label += f" p={r['port']}"
|
|
|
|
plt.text(end, y, " " + label, va="center", fontsize=8)
|
|
|
|
plt.xlabel("Время от старта первого дочернего процесса, сек")
|
|
plt.ylabel("Дочерние процессы / задачи сортировки")
|
|
plt.title("5. Временная диаграмма дочерних процессов")
|
|
plt.grid(True)
|
|
save_plot(pics / "05_timeline_child_processes.png")
|
|
|
|
|
|
def plot_process_tree_dfs(log_path: Path, pics: Path) -> None:
|
|
"""
|
|
Draws a process tree from timeline.log.
|
|
|
|
Root depth=0 parent is excluded.
|
|
Children are placed directly below their parent in DFS order:
|
|
|
|
depth=1 process A
|
|
depth=2 child of A
|
|
depth=3 child of that child
|
|
depth=2 next child of A
|
|
depth=1 process B
|
|
...
|
|
"""
|
|
rows = parse_events(log_path)
|
|
if not rows:
|
|
return
|
|
|
|
by_pid = {r["pid"]: r for r in rows}
|
|
children = defaultdict(list)
|
|
|
|
for r in rows:
|
|
ppid = r["ppid"]
|
|
if ppid in by_pid:
|
|
children[ppid].append(r["pid"])
|
|
|
|
for pid in children:
|
|
children[pid].sort(key=lambda child_pid: by_pid[child_pid]["START"])
|
|
|
|
# These are children of the hidden root or processes whose parent is absent
|
|
# after filtering depth=0.
|
|
roots = [r["pid"] for r in rows if r["ppid"] not in by_pid]
|
|
roots.sort(key=lambda pid: by_pid[pid]["START"])
|
|
|
|
ordered = []
|
|
|
|
def dfs(pid: int, level: int) -> None:
|
|
ordered.append((pid, level))
|
|
for child_pid in children.get(pid, []):
|
|
dfs(child_pid, level + 1)
|
|
|
|
for root_pid in roots:
|
|
dfs(root_pid, 0)
|
|
|
|
if not ordered:
|
|
return
|
|
|
|
y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)}
|
|
t0 = min(by_pid[pid]["START"] for pid, _ in ordered)
|
|
|
|
plt.figure(figsize=(14, max(5, len(ordered) * 0.45)))
|
|
|
|
for y, (pid, level) in enumerate(ordered):
|
|
r = by_pid[pid]
|
|
|
|
start = r["START"] - t0
|
|
end = r["END"] - t0
|
|
|
|
# Horizontal lifetime line.
|
|
plt.plot([start, end], [y, y], linewidth=5)
|
|
|
|
# Indentation makes nesting visible in labels.
|
|
indent = " " * level
|
|
label = f"{indent}pid={pid} d={r['depth']} n={r['size']}"
|
|
|
|
if r.get("port", -1) >= 0:
|
|
label += f" p={r['port']}"
|
|
|
|
plt.text(end, y, " " + label, va="center", fontsize=8)
|
|
|
|
# Parent-child connector.
|
|
parent_pid = r["ppid"]
|
|
if parent_pid in y_by_pid:
|
|
parent_y = y_by_pid[parent_pid]
|
|
parent_start = by_pid[parent_pid]["START"] - t0
|
|
|
|
plt.plot(
|
|
[start, start],
|
|
[parent_y, y],
|
|
linewidth=1,
|
|
linestyle="--",
|
|
)
|
|
plt.plot(
|
|
[parent_start, start],
|
|
[parent_y, parent_y],
|
|
linewidth=1,
|
|
linestyle="--",
|
|
)
|
|
|
|
plt.gca().invert_yaxis()
|
|
plt.xlabel("Время от старта первого дочернего процесса, сек")
|
|
plt.ylabel("Дерево процессов в DFS-порядке")
|
|
plt.title("6. Дерево процессов: потомки расположены сразу под родителем")
|
|
plt.grid(True)
|
|
save_plot(pics / "06_process_tree_dfs.png")
|
|
|
|
|
|
def make_experiments(args) -> List[RunResult]:
|
|
rows: List[RunResult] = []
|
|
|
|
logs = Path(args.out) / "logs"
|
|
logs.mkdir(parents=True, exist_ok=True)
|
|
|
|
if args.fast:
|
|
depth_sizes = [20_000, 50_000]
|
|
depths = list(range(0, 10))
|
|
min_size_depth = 2048
|
|
min_size_points = [round(2 ** (7 + i * (8 / 14))) for i in range(15)]
|
|
else:
|
|
# Two 30-point series are enough for a clean report without producing extra graphs.
|
|
depth_sizes = [50_000, 100_000, 200_000]
|
|
depths = list(range(0, 30))
|
|
min_size_depth = 4096
|
|
min_size_points = [round(2 ** (7 + i * (10 / 29))) for i in range(30)]
|
|
|
|
# 1) Depth scaling: feeds graphs 01, 02, 03.
|
|
for size in depth_sizes:
|
|
for depth in depths:
|
|
seed = seed_for(size, depth, min_size_depth, 1)
|
|
|
|
row = run_lab4(
|
|
args.bin,
|
|
size,
|
|
depth,
|
|
min_size_depth,
|
|
seed,
|
|
args.port_base,
|
|
"depth_scaling",
|
|
)
|
|
|
|
rows.append(row)
|
|
|
|
print(
|
|
f"depth_scaling: "
|
|
f"N={size} "
|
|
f"depth={depth} "
|
|
f"child_proc={row.child_processes} "
|
|
f"time={row.time:.6f}",
|
|
flush=True,
|
|
)
|
|
|
|
# 2) min_size effect: feeds graph 04.
|
|
for min_size in min_size_points:
|
|
seed = seed_for(
|
|
args.min_size_test_size,
|
|
args.min_size_test_depth,
|
|
min_size,
|
|
2,
|
|
)
|
|
|
|
row = run_lab4(
|
|
args.bin,
|
|
args.min_size_test_size,
|
|
args.min_size_test_depth,
|
|
min_size,
|
|
seed,
|
|
args.port_base,
|
|
"min_size_effect",
|
|
)
|
|
|
|
rows.append(row)
|
|
|
|
print(
|
|
f"min_size_effect: "
|
|
f"min_size={min_size} "
|
|
f"child_proc={row.child_processes} "
|
|
f"time={row.time:.6f}",
|
|
flush=True,
|
|
)
|
|
|
|
# 3) One log run: feeds graphs 05 and 06.
|
|
log_path = logs / "timeline.log"
|
|
seed = seed_for(
|
|
args.timeline_size,
|
|
args.timeline_depth,
|
|
args.timeline_min_size,
|
|
5,
|
|
)
|
|
|
|
row = run_lab4(
|
|
args.bin,
|
|
args.timeline_size,
|
|
args.timeline_depth,
|
|
args.timeline_min_size,
|
|
seed,
|
|
args.port_base,
|
|
"timeline",
|
|
log_path=log_path,
|
|
)
|
|
|
|
rows.append(row)
|
|
|
|
print(
|
|
f"timeline: {log_path} child_proc={row.child_processes} time={row.time:.6f}",
|
|
flush=True,
|
|
)
|
|
|
|
return rows
|
|
|
|
|
|
def generate_report(out_dir: Path, rows: List[RunResult]) -> None:
|
|
by_scenario = group_by(rows, lambda r: r.scenario)
|
|
|
|
lines = [
|
|
"# Lab4 sockets: основные графики\n\n",
|
|
"Сгенерированы основные графики для отчета.\n\n",
|
|
"Корневой родительский процесс исключен из графика дерева процессов "
|
|
"и из временных диаграмм.\n\n",
|
|
"## Графики\n\n",
|
|
"1. `pics/01_time_by_depth.png` — время от глубины рекурсии.\n",
|
|
"2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`.\n",
|
|
"3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня.\n",
|
|
"4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки.\n",
|
|
"5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов.\n",
|
|
"6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем.\n\n",
|
|
"## Краткая статистика\n\n",
|
|
"| Сценарий | Запусков | Лучшее время | Лучший запуск |\n",
|
|
"|---|---:|---:|---|\n",
|
|
]
|
|
|
|
for scenario, rs in sorted(by_scenario.items()):
|
|
best = min(rs, key=lambda r: r.time)
|
|
|
|
lines.append(
|
|
f"| `{scenario}` | {len(rs)} | {best.time:.6f} | "
|
|
f"N={best.size}, "
|
|
f"depth={best.depth}, "
|
|
f"min_size={best.min_size}, "
|
|
f"child_proc={best.child_processes} |\n"
|
|
)
|
|
|
|
(out_dir / "REPORT.md").write_text("".join(lines), encoding="utf-8")
|
|
|
|
|
|
def build_plots(out_dir: Path, rows: List[RunResult]) -> None:
|
|
pics = out_dir / "pics"
|
|
|
|
clean_graphs(pics)
|
|
plot_depth_graphs(rows, pics)
|
|
plot_min_size_graph(rows, pics)
|
|
|
|
timeline_row = next(
|
|
(r for r in rows if r.scenario == "timeline" and r.logfile),
|
|
None,
|
|
)
|
|
|
|
if timeline_row is not None:
|
|
timeline_log = Path(timeline_row.logfile)
|
|
plot_timeline(timeline_log, pics)
|
|
plot_process_tree_dfs(timeline_log, pics)
|
|
|
|
generate_report(out_dir, rows)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Exporter for Lab4 socket sort graphs")
|
|
|
|
parser.add_argument("--bin", default="./lab4", help="Путь к бинарнику lab4")
|
|
parser.add_argument("--out", default="out_lab4", help="Каталог вывода")
|
|
parser.add_argument(
|
|
"--port-base", type=int, default=20000, help="Базовый порт для lab4"
|
|
)
|
|
parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки")
|
|
parser.add_argument(
|
|
"--skip-run", action="store_true", help="Строить графики из существующего CSV"
|
|
)
|
|
|
|
parser.add_argument("--min-size-test-size", type=int, default=200_000)
|
|
parser.add_argument("--min-size-test-depth", type=int, default=5)
|
|
|
|
parser.add_argument("--timeline-size", type=int, default=8192)
|
|
parser.add_argument("--timeline-depth", type=int, default=3)
|
|
parser.add_argument("--timeline-min-size", type=int, default=64)
|
|
|
|
args = parser.parse_args()
|
|
|
|
out_dir = Path(args.out)
|
|
csv_path = out_dir / "csv" / "lab4_all_results.csv"
|
|
|
|
if args.skip_run:
|
|
if not csv_path.exists():
|
|
print(f"CSV не найден: {csv_path}", file=sys.stderr)
|
|
return 2
|
|
|
|
rows = read_csv(csv_path)
|
|
else:
|
|
# Keep old output from previous exporter versions from confusing the report.
|
|
if out_dir.exists():
|
|
shutil.rmtree(out_dir)
|
|
|
|
rows = make_experiments(args)
|
|
write_csv(csv_path, rows)
|
|
|
|
build_plots(out_dir, rows)
|
|
|
|
print("\nГотово: построены основные графики, включая DFS-дерево процессов.")
|
|
print("Корневой родительский процесс исключен из дерева/таймлайна.")
|
|
print(f"CSV: {csv_path}")
|
|
print(f"Графики: {out_dir / 'pics'}")
|
|
print(f"Отчет: {out_dir / 'REPORT.md'}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|