add 4, edit 3

This commit is contained in:
2026-05-12 19:13:20 +07:00
parent 17ae59ea48
commit 2ad3f7eaa4
16 changed files with 1944 additions and 94 deletions
+673
View File
@@ -0,0 +1,673 @@
#!/usr/bin/env python3
"""
Lab4 exporter: runs socket-sort benchmarks and creates report graphs.
Root parent process is excluded from process-tree/timeline visualizations:
- process graph shows child processes only: processes - 1
- timeline ignores depth=0 rows
- DFS tree ignores depth=0 root and places each process's children directly below it
Generated output:
out_lab4/csv/lab4_all_results.csv
out_lab4/pics/01_time_by_depth.png
out_lab4/pics/02_speedup_by_depth.png
out_lab4/pics/03_child_processes_by_depth.png
out_lab4/pics/04_time_by_min_size.png
out_lab4/pics/05_timeline_child_processes.png
out_lab4/pics/06_process_tree_dfs.png
out_lab4/logs/timeline.log
out_lab4/REPORT.md
Usage:
python3 exporter.py --bin ./lab4 --out out_lab4
python3 exporter.py --bin ./lab4 --out out_lab4 --fast
python3 exporter.py --bin ./lab4 --out out_lab4 --skip-run
"""
from __future__ import annotations
import argparse
import csv
import re
import shutil
import subprocess
import sys
from collections import defaultdict
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Iterable, List
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
STAT_RE = re.compile(
r"STAT:\s*.*?size=(\d+)\s+.*?depth=(\d+)\s+.*?min_size=(\d+)\s+"
r".*?processes=(\d+)\s+.*?counter=(\d+)\s+.*?valid=(\d+)\s+"
r".*?time=([\d.]+)"
)
EVENT_RE = re.compile(
r"(START|END)\s+PID=(\d+)\s+PPID=(\d+)\s+depth=(\d+)\s+size=(\d+)"
r"(?:\s+port=(\d+))?\s+time=([\d.]+)"
)
@dataclass
class RunResult:
scenario: str
size: int
depth: int
min_size: int
seed: int
port_base: int
processes: int
counter: int
valid: int
time: float
logfile: str = ""
@property
def child_processes(self) -> int:
return max(0, self.processes - 1)
@property
def speed(self) -> float:
return self.size / self.time if self.time > 0 else 0.0
def seed_for(size: int, depth: int, min_size: int, salt: int) -> int:
return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31
def run_lab4(
bin_path: str,
size: int,
depth: int,
min_size: int,
seed: int,
port_base: int,
scenario: str,
log_path: Path | None = None,
) -> RunResult:
cmd = [
bin_path,
"--size",
str(size),
"--depth",
str(depth),
"--min-size",
str(min_size),
"--seed",
str(seed),
"--port-base",
str(port_base),
]
if log_path is not None:
cmd.append("--log")
p = subprocess.run(cmd, text=True, capture_output=True)
if log_path is not None:
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text(p.stdout, encoding="utf-8")
if p.returncode != 0:
raise RuntimeError(
"Команда завершилась с ошибкой:\n"
+ " ".join(cmd)
+ "\n\nSTDOUT:\n"
+ p.stdout
+ "\nSTDERR:\n"
+ p.stderr
)
m = STAT_RE.search(p.stderr)
if not m:
raise RuntimeError("Не найден STAT в stderr:\n" + p.stderr)
result = RunResult(
scenario=scenario,
size=int(m.group(1)),
depth=int(m.group(2)),
min_size=int(m.group(3)),
seed=seed,
port_base=port_base,
processes=int(m.group(4)),
counter=int(m.group(5)),
valid=int(m.group(6)),
time=float(m.group(7)),
logfile=str(log_path) if log_path is not None else "",
)
if result.valid != 1:
raise RuntimeError("Сортировка не прошла проверку:\n" + p.stderr)
return result
def group_by(rows: Iterable[RunResult], key_fn):
result = defaultdict(list)
for row in rows:
result[key_fn(row)].append(row)
return result
def write_csv(path: Path, rows: List[RunResult]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as f:
fieldnames = (
list(asdict(rows[0]).keys())
if rows
else list(RunResult.__dataclass_fields__.keys())
)
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(asdict(row))
def read_csv(path: Path) -> List[RunResult]:
rows: List[RunResult] = []
with path.open("r", encoding="utf-8", newline="") as f:
for raw in csv.DictReader(f):
rows.append(
RunResult(
scenario=raw["scenario"],
size=int(raw["size"]),
depth=int(raw["depth"]),
min_size=int(raw["min_size"]),
seed=int(raw["seed"]),
port_base=int(raw["port_base"]),
processes=int(raw["processes"]),
counter=int(raw["counter"]),
valid=int(raw["valid"]),
time=float(raw["time"]),
logfile=raw.get("logfile", ""),
)
)
return rows
def save_plot(path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
plt.tight_layout()
plt.savefig(path, dpi=140)
plt.close()
def clean_graphs(pics: Path) -> None:
pics.mkdir(parents=True, exist_ok=True)
for old in pics.glob("*.png"):
old.unlink()
def plot_depth_graphs(rows: List[RunResult], pics: Path) -> None:
cur = [r for r in rows if r.scenario == "depth_scaling"]
if not cur:
return
by_size = group_by(cur, lambda r: r.size)
# 1. Main performance graph.
plt.figure(figsize=(12, 6))
for size, rs in sorted(by_size.items()):
rs = sorted(rs, key=lambda r: r.depth)
plt.plot(
[r.depth for r in rs],
[r.time for r in rs],
marker="o",
label=f"N={size}",
)
plt.xlabel("Глубина рекурсии")
plt.ylabel("Время, сек")
plt.title("1. Время сортировки от глубины рекурсии")
plt.grid(True)
plt.legend()
save_plot(pics / "01_time_by_depth.png")
# 2. Speedup against sequential depth=0.
plt.figure(figsize=(12, 6))
for size, rs in sorted(by_size.items()):
rs = sorted(rs, key=lambda r: r.depth)
base = next((r.time for r in rs if r.depth == 0), rs[0].time)
speedup = [base / r.time if r.time > 0 else 0 for r in rs]
plt.plot(
[r.depth for r in rs],
speedup,
marker="s",
label=f"N={size}",
)
plt.xlabel("Глубина рекурсии")
plt.ylabel("Ускорение относительно depth=0")
plt.title("2. Ускорение от использования процессов и сокетов")
plt.grid(True)
plt.legend()
save_plot(pics / "02_speedup_by_depth.png")
# 3. Child process tree size, root parent excluded.
plt.figure(figsize=(12, 6))
for size, rs in sorted(by_size.items()):
rs = sorted(rs, key=lambda r: r.depth)
plt.plot(
[r.depth for r in rs],
[r.child_processes for r in rs],
marker="^",
label=f"N={size}",
)
plt.xlabel("Глубина рекурсии")
plt.ylabel("Количество дочерних процессов")
plt.title("3. Размер дерева дочерних процессов без корневого родителя")
plt.grid(True)
plt.legend()
save_plot(pics / "03_child_processes_by_depth.png")
def plot_min_size_graph(rows: List[RunResult], pics: Path) -> None:
cur = sorted(
[r for r in rows if r.scenario == "min_size_effect"],
key=lambda r: r.min_size,
)
if not cur:
return
# 4. Threshold effect.
plt.figure(figsize=(12, 6))
plt.plot(
[r.min_size for r in cur],
[r.time for r in cur],
marker="o",
)
plt.xscale("log", base=2)
plt.xlabel("Минимальный размер части для fork/socket")
plt.ylabel("Время, сек")
plt.title("4. Влияние min_size на время сортировки")
plt.grid(True)
save_plot(pics / "04_time_by_min_size.png")
def parse_events(log_path: Path) -> List[dict]:
events = defaultdict(dict)
if not log_path.exists():
return []
for line in log_path.read_text(encoding="utf-8", errors="ignore").splitlines():
m = EVENT_RE.search(line)
if not m:
continue
typ, pid, ppid, depth, size, port, t = m.groups()
depth_i = int(depth)
# Remove root parent process from timeline/tree.
if depth_i == 0:
continue
key = (int(pid), depth_i, int(size))
events[key][typ] = float(t)
events[key]["pid"] = int(pid)
events[key]["ppid"] = int(ppid)
events[key]["depth"] = depth_i
events[key]["size"] = int(size)
events[key]["port"] = int(port) if port else -1
rows = [v for v in events.values() if "START" in v and "END" in v]
rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"]))
return rows
def plot_timeline(log_path: Path, pics: Path) -> None:
rows = parse_events(log_path)
if not rows:
return
t0 = min(r["START"] for r in rows)
# 5. Timeline for child process lifetime and real parallelism.
plt.figure(figsize=(13, max(5, len(rows) * 0.38)))
for y, r in enumerate(rows):
start = r["START"] - t0
end = r["END"] - t0
plt.plot([start, end], [y, y], linewidth=5)
label = f"pid={r['pid']} d={r['depth']} n={r['size']}"
if r.get("port", -1) >= 0:
label += f" p={r['port']}"
plt.text(end, y, " " + label, va="center", fontsize=8)
plt.xlabel("Время от старта первого дочернего процесса, сек")
plt.ylabel("Дочерние процессы / задачи сортировки")
plt.title("5. Временная диаграмма дочерних процессов")
plt.grid(True)
save_plot(pics / "05_timeline_child_processes.png")
def plot_process_tree_dfs(log_path: Path, pics: Path) -> None:
"""
Draws a process tree from timeline.log.
Root depth=0 parent is excluded.
Children are placed directly below their parent in DFS order:
depth=1 process A
depth=2 child of A
depth=3 child of that child
depth=2 next child of A
depth=1 process B
...
"""
rows = parse_events(log_path)
if not rows:
return
by_pid = {r["pid"]: r for r in rows}
children = defaultdict(list)
for r in rows:
ppid = r["ppid"]
if ppid in by_pid:
children[ppid].append(r["pid"])
for pid in children:
children[pid].sort(key=lambda child_pid: by_pid[child_pid]["START"])
# These are children of the hidden root or processes whose parent is absent
# after filtering depth=0.
roots = [r["pid"] for r in rows if r["ppid"] not in by_pid]
roots.sort(key=lambda pid: by_pid[pid]["START"])
ordered = []
def dfs(pid: int, level: int) -> None:
ordered.append((pid, level))
for child_pid in children.get(pid, []):
dfs(child_pid, level + 1)
for root_pid in roots:
dfs(root_pid, 0)
if not ordered:
return
y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)}
t0 = min(by_pid[pid]["START"] for pid, _ in ordered)
plt.figure(figsize=(14, max(5, len(ordered) * 0.45)))
for y, (pid, level) in enumerate(ordered):
r = by_pid[pid]
start = r["START"] - t0
end = r["END"] - t0
# Horizontal lifetime line.
plt.plot([start, end], [y, y], linewidth=5)
# Indentation makes nesting visible in labels.
indent = " " * level
label = f"{indent}pid={pid} d={r['depth']} n={r['size']}"
if r.get("port", -1) >= 0:
label += f" p={r['port']}"
plt.text(end, y, " " + label, va="center", fontsize=8)
# Parent-child connector.
parent_pid = r["ppid"]
if parent_pid in y_by_pid:
parent_y = y_by_pid[parent_pid]
parent_start = by_pid[parent_pid]["START"] - t0
plt.plot(
[start, start],
[parent_y, y],
linewidth=1,
linestyle="--",
)
plt.plot(
[parent_start, start],
[parent_y, parent_y],
linewidth=1,
linestyle="--",
)
plt.gca().invert_yaxis()
plt.xlabel("Время от старта первого дочернего процесса, сек")
plt.ylabel("Дерево процессов в DFS-порядке")
plt.title("6. Дерево процессов: потомки расположены сразу под родителем")
plt.grid(True)
save_plot(pics / "06_process_tree_dfs.png")
def make_experiments(args) -> List[RunResult]:
rows: List[RunResult] = []
logs = Path(args.out) / "logs"
logs.mkdir(parents=True, exist_ok=True)
if args.fast:
depth_sizes = [20_000, 50_000]
depths = list(range(0, 10))
min_size_depth = 2048
min_size_points = [round(2 ** (7 + i * (8 / 14))) for i in range(15)]
else:
# Two 30-point series are enough for a clean report without producing extra graphs.
depth_sizes = [50_000, 100_000, 200_000]
depths = list(range(0, 30))
min_size_depth = 4096
min_size_points = [round(2 ** (7 + i * (10 / 29))) for i in range(30)]
# 1) Depth scaling: feeds graphs 01, 02, 03.
for size in depth_sizes:
for depth in depths:
seed = seed_for(size, depth, min_size_depth, 1)
row = run_lab4(
args.bin,
size,
depth,
min_size_depth,
seed,
args.port_base,
"depth_scaling",
)
rows.append(row)
print(
f"depth_scaling: "
f"N={size} "
f"depth={depth} "
f"child_proc={row.child_processes} "
f"time={row.time:.6f}",
flush=True,
)
# 2) min_size effect: feeds graph 04.
for min_size in min_size_points:
seed = seed_for(
args.min_size_test_size,
args.min_size_test_depth,
min_size,
2,
)
row = run_lab4(
args.bin,
args.min_size_test_size,
args.min_size_test_depth,
min_size,
seed,
args.port_base,
"min_size_effect",
)
rows.append(row)
print(
f"min_size_effect: "
f"min_size={min_size} "
f"child_proc={row.child_processes} "
f"time={row.time:.6f}",
flush=True,
)
# 3) One log run: feeds graphs 05 and 06.
log_path = logs / "timeline.log"
seed = seed_for(
args.timeline_size,
args.timeline_depth,
args.timeline_min_size,
5,
)
row = run_lab4(
args.bin,
args.timeline_size,
args.timeline_depth,
args.timeline_min_size,
seed,
args.port_base,
"timeline",
log_path=log_path,
)
rows.append(row)
print(
f"timeline: {log_path} child_proc={row.child_processes} time={row.time:.6f}",
flush=True,
)
return rows
def generate_report(out_dir: Path, rows: List[RunResult]) -> None:
by_scenario = group_by(rows, lambda r: r.scenario)
lines = [
"# Lab4 sockets: основные графики\n\n",
"Сгенерированы основные графики для отчета.\n\n",
"Корневой родительский процесс исключен из графика дерева процессов "
"и из временных диаграмм.\n\n",
"## Графики\n\n",
"1. `pics/01_time_by_depth.png` — время от глубины рекурсии.\n",
"2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`.\n",
"3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня.\n",
"4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки.\n",
"5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов.\n",
"6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем.\n\n",
"## Краткая статистика\n\n",
"| Сценарий | Запусков | Лучшее время | Лучший запуск |\n",
"|---|---:|---:|---|\n",
]
for scenario, rs in sorted(by_scenario.items()):
best = min(rs, key=lambda r: r.time)
lines.append(
f"| `{scenario}` | {len(rs)} | {best.time:.6f} | "
f"N={best.size}, "
f"depth={best.depth}, "
f"min_size={best.min_size}, "
f"child_proc={best.child_processes} |\n"
)
(out_dir / "REPORT.md").write_text("".join(lines), encoding="utf-8")
def build_plots(out_dir: Path, rows: List[RunResult]) -> None:
pics = out_dir / "pics"
clean_graphs(pics)
plot_depth_graphs(rows, pics)
plot_min_size_graph(rows, pics)
timeline_row = next(
(r for r in rows if r.scenario == "timeline" and r.logfile),
None,
)
if timeline_row is not None:
timeline_log = Path(timeline_row.logfile)
plot_timeline(timeline_log, pics)
plot_process_tree_dfs(timeline_log, pics)
generate_report(out_dir, rows)
def main() -> int:
parser = argparse.ArgumentParser(description="Exporter for Lab4 socket sort graphs")
parser.add_argument("--bin", default="./lab4", help="Путь к бинарнику lab4")
parser.add_argument("--out", default="out_lab4", help="Каталог вывода")
parser.add_argument(
"--port-base", type=int, default=20000, help="Базовый порт для lab4"
)
parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки")
parser.add_argument(
"--skip-run", action="store_true", help="Строить графики из существующего CSV"
)
parser.add_argument("--min-size-test-size", type=int, default=200_000)
parser.add_argument("--min-size-test-depth", type=int, default=5)
parser.add_argument("--timeline-size", type=int, default=8192)
parser.add_argument("--timeline-depth", type=int, default=3)
parser.add_argument("--timeline-min-size", type=int, default=64)
args = parser.parse_args()
out_dir = Path(args.out)
csv_path = out_dir / "csv" / "lab4_all_results.csv"
if args.skip_run:
if not csv_path.exists():
print(f"CSV не найден: {csv_path}", file=sys.stderr)
return 2
rows = read_csv(csv_path)
else:
# Keep old output from previous exporter versions from confusing the report.
if out_dir.exists():
shutil.rmtree(out_dir)
rows = make_experiments(args)
write_csv(csv_path, rows)
build_plots(out_dir, rows)
print("\nГотово: построены основные графики, включая DFS-дерево процессов.")
print("Корневой родительский процесс исключен из дерева/таймлайна.")
print(f"CSV: {csv_path}")
print(f"Графики: {out_dir / 'pics'}")
print(f"Отчет: {out_dir / 'REPORT.md'}")
return 0
if __name__ == "__main__":
raise SystemExit(main())