diff --git a/3/Makefile b/3/Makefile index b704bd0..94b9eaa 100644 --- a/3/Makefile +++ b/3/Makefile @@ -3,7 +3,7 @@ CXXFLAGS := -O2 -std=c++17 -Wall -Wextra -pedantic TARGET := lab3 OUT_DIR := out -.PHONY: all clean run test bench timelines pack +.PHONY: all run test export clean pack all: $(TARGET) @@ -15,25 +15,12 @@ run: $(TARGET) test: $(TARGET) python3 test_lab3.py - ./$(TARGET) --size 0 --depth 3 --min-size 1 >/dev/null - ./$(TARGET) --size 1 --depth 3 --min-size 1 >/dev/null - ./$(TARGET) --size 10000 --depth 0 --min-size 1 >/dev/null - ./$(TARGET) --size 10000 --depth 2 --min-size 128 >/dev/null - ./$(TARGET) --size 10000 --depth 3 --min-size 256 --seed 2026 >/dev/null - ./$(TARGET) --size 12345 --depth 4 --min-size 257 --seed 777 >/dev/null -bench: $(TARGET) - python3 benchmark.py - -timelines: $(TARGET) - mkdir -p $(OUT_DIR)/logs $(OUT_DIR)/pics - ./$(TARGET) --size 2048 --depth 2 --min-size 64 --log > $(OUT_DIR)/logs/depth2.log 2>$(OUT_DIR)/logs/depth2.stat - ./$(TARGET) --size 4096 --depth 3 --min-size 64 --log > $(OUT_DIR)/logs/depth3.log 2>$(OUT_DIR)/logs/depth3.stat - python3 exporter.py $(OUT_DIR)/logs/depth2.log $(OUT_DIR)/pics - python3 exporter.py $(OUT_DIR)/logs/depth3.log $(OUT_DIR)/pics +export: $(TARGET) + python3 exporter.py --bin ./$(TARGET) --out $(OUT_DIR) pack: clean - zip -r lab3_process_pipes.zip main.cpp Makefile benchmark.py exporter.py test_lab3.py README.md + zip -r lab3_process_pipes.zip main.cpp Makefile exporter.py test_lab3.py README.md clean: rm -f $(TARGET) lab3_process_pipes.zip diff --git a/3/README.md b/3/README.md index 35c7cb9..048f233 100644 --- a/3/README.md +++ b/3/README.md @@ -22,10 +22,22 @@ - `uint64_t processes` — число процессов в поддереве. 8. Родитель выполняет слияние двух отсортированных частей. -Для одного потомка используются два канала: родитель → потомок и потомок → родитель. Так как потомков два, на рекурсивном узле создается четыре канала. +Для одного потомка используются два канала: -## Сборка и запуск +- родитель → потомок; +- потомок → родитель. + +Так как потомков два, на рекурсивном узле создается четыре канала. + +## Файлы + +- `main.cpp` — программа лабораторной работы. +- `Makefile` — сборка, запуск, тесты, экспорт графиков. +- `exporter.py` — единый экспортёр графиков и CSV. +- `test_lab3.py` — тесты корректности. +- `README.md` — описание. + +## Сборка ```bash make -./lab3 --size 100000 --depth 2 --min-size 4096 diff --git a/3/exporter.py b/3/exporter.py index 2a704da..d2771b7 100644 --- a/3/exporter.py +++ b/3/exporter.py @@ -1,86 +1,707 @@ -import os +#!/usr/bin/env python3 +""" +Single-file Lab3 exporter. + +What it does: + 1. Runs benchmark series by recursion depth. + 2. Runs benchmark series by min_size threshold. + 3. Runs one logged execution with --log. + 4. Draws old benchmark graphs. + 5. Draws old timeline/depth histogram graphs. + 6. Adds DFS process-tree graph: + each process has its children directly below it; + only after a whole subtree is drawn, the next sibling is drawn. + +Generated output: + out/benchmark_depth.csv + out/benchmark_min_size.csv + out/logs/timeline.log + + out/pics/time_by_depth.png + out/pics/speedup_by_depth.png + out/pics/process_count_by_depth.png + out/pics/time_by_min_size.png + out/pics/timeline.png + out/pics/depth_hist.png + out/pics/process_tree_dfs.png + +Usage: + python3 exporter.py + python3 exporter.py --bin ./lab3 --out out + python3 exporter.py --fast + python3 exporter.py --skip-run +""" + +from __future__ import annotations + +import argparse +import csv import re +import shutil +import subprocess import sys from collections import defaultdict +from pathlib import Path +from typing import Dict, List import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt -if len(sys.argv) < 3: - print("Использование: python3 exporter.py ") - sys.exit(1) +STAT_RE = re.compile( + r"STAT:.*size=(\d+).*depth=(\d+).*min_size=(\d+).*processes=(\d+).*valid=(\d+).*time=([\d.]+)" +) -logfile = sys.argv[1] -out_dir = sys.argv[2] -os.makedirs(out_dir, exist_ok=True) -base = os.path.splitext(os.path.basename(logfile))[0] - -pattern = re.compile( +EVENT_RE = re.compile( r"(START|END) PID=(\d+) PPID=(\d+) depth=(\d+) size=(\d+) time=([\d.]+)" ) -events = defaultdict(dict) -with open(logfile, encoding="utf-8") as f: - for line in f: - m = pattern.search(line) - if not m: - continue - typ, pid, ppid, depth, size, t = m.groups() - key = (int(pid), int(depth), int(size)) - events[key][typ] = float(t) - events[key]["pid"] = int(pid) - events[key]["ppid"] = int(ppid) - events[key]["depth"] = int(depth) - events[key]["size"] = int(size) -rows = [] -for v in events.values(): - if "START" in v and "END" in v: - rows.append(v) +def seed_for(size: int, depth: int, min_size: int, salt: int) -> int: + # Для каждой точки используется свой seed, поэтому вход всегда случайный, + # но результаты можно воспроизвести. + return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31 -if not rows: - print("В логе нет полных START/END событий") - sys.exit(1) -rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"])) -t0 = min(r["START"] for r in rows) +def run_once( + bin_path: str, + size: int, + depth: int, + min_size: int, + seed: int, + log_path: Path | None = None, +) -> Dict[str, int | float | str]: + cmd = [ + bin_path, + "--size", + str(size), + "--depth", + str(depth), + "--min-size", + str(min_size), + "--seed", + str(seed), + ] -# 1. Временная диаграмма: видно параллельность и время жизни каждого процесса. -plt.figure(figsize=(12, max(5, len(rows) * 0.35))) -for y, r in enumerate(rows): - start = r["START"] - t0 - end = r["END"] - t0 - plt.plot([start, end], [y, y], linewidth=5) - plt.text( - end, - y, - f" pid={r['pid']} d={r['depth']} n={r['size']}", - va="center", - fontsize=8, + if log_path is not None: + cmd.append("--log") + + p = subprocess.run( + cmd, + stdout=subprocess.PIPE if log_path is not None else subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, ) -plt.xlabel("Время от начала, сек") -plt.ylabel("Процессы/задачи сортировки") -plt.title(f"Временная диаграмма процессов: {base}") -plt.grid(True) -plt.tight_layout() -plt.savefig(os.path.join(out_dir, f"{base}_timeline.png")) -plt.close() + if log_path is not None: + log_path.parent.mkdir(parents=True, exist_ok=True) + log_path.write_text(p.stdout, encoding="utf-8") -# 2. Гистограмма глубин: проверка, что дерево дошло до нужной глубины. -by_depth = defaultdict(int) -for r in rows: - by_depth[r["depth"]] += 1 + if p.returncode != 0: + raise RuntimeError( + "Command failed:\n" + + " ".join(cmd) + + "\n\nSTDOUT:\n" + + (p.stdout or "") + + "\nSTDERR:\n" + + (p.stderr or "") + ) -plt.figure(figsize=(8, 5)) -xs = sorted(by_depth) -plt.bar(xs, [by_depth[x] for x in xs]) -plt.xlabel("Глубина рекурсии") -plt.ylabel("Количество процессов") -plt.title(f"Распределение процессов по глубине: {base}") -plt.grid(True, axis="y") -plt.tight_layout() -plt.savefig(os.path.join(out_dir, f"{base}_depth_hist.png")) -plt.close() + m = STAT_RE.search(p.stderr) + if not m: + raise RuntimeError(f"STAT not found:\n{p.stderr}") + + if m.group(5) != "1": + raise RuntimeError(f"sort validation failed:\n{p.stderr}") + + return { + "size": int(m.group(1)), + "depth": int(m.group(2)), + "min_size": int(m.group(3)), + "processes": int(m.group(4)), + "valid": int(m.group(5)), + "time": float(m.group(6)), + "seed": seed, + "logfile": str(log_path) if log_path is not None else "", + } + + +def save_csv(path: Path, rows: List[dict], header: List[str]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + + with path.open("w", encoding="utf-8", newline="") as f: + w = csv.DictWriter(f, fieldnames=header) + w.writeheader() + + for row in rows: + w.writerow({key: row.get(key, "") for key in header}) + + +def read_csv(path: Path) -> List[dict]: + result = [] + + with path.open("r", encoding="utf-8", newline="") as f: + for row in csv.DictReader(f): + converted = dict(row) + + for key in ["size", "depth", "min_size", "processes", "valid", "seed"]: + if key in converted and converted[key] != "": + converted[key] = int(converted[key]) + + if "time" in converted and converted["time"] != "": + converted["time"] = float(converted["time"]) + + result.append(converted) + + return result + + +def save_plot(path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + plt.tight_layout() + plt.savefig(path, dpi=140) + plt.close() + + +def clean_output(out_dir: Path) -> None: + if out_dir.exists(): + shutil.rmtree(out_dir) + + (out_dir / "pics").mkdir(parents=True, exist_ok=True) + (out_dir / "logs").mkdir(parents=True, exist_ok=True) + + +def plot_depth_scaling( + bin_path: str, + out_dir: Path, + fast: bool = False, + skip_run: bool = False, +) -> List[dict]: + pics = out_dir / "pics" + csv_path = out_dir / "benchmark_depth.csv" + + if skip_run: + rows = read_csv(csv_path) + else: + if fast: + depths = list(range(10)) + sizes = [20_000, 50_000] + min_size = 2048 + else: + # ВАЖНО: на графиках по глубине ровно 30 точек по оси X: 0..29. + # + # depth идет до 29, но реально число процессов не взорвется бесконечно, + # потому что дальнейшее деление останавливает min_size. + depths = list(range(30)) + + # Несколько размеров дают несколько линий на одном графике. + sizes = [50_000, 100_000, 200_000] + min_size = 4096 + + rows = [] + + for size in sizes: + for d in depths: + seed = seed_for(size, d, min_size, salt=1) + r = run_once(bin_path, size, d, min_size, seed) + row = {**r, "seed": seed} + rows.append(row) + + print( + f"depth_scaling: " + f"size={size} depth={d} min_size={min_size} " + f"seed={seed} processes={r['processes']} time={r['time']:.6f}", + flush=True, + ) + + save_csv( + csv_path, + rows, + [ + "size", + "depth", + "min_size", + "seed", + "processes", + "valid", + "time", + "logfile", + ], + ) + + plt.figure(figsize=(12, 6)) + + for size in sorted(set(r["size"] for r in rows)): + cur = [r for r in rows if r["size"] == size] + cur.sort(key=lambda r: r["depth"]) + + plt.plot( + [r["depth"] for r in cur], + [r["time"] for r in cur], + marker="o", + label=f"N={size}", + ) + + plt.xlabel("Глубина порождения процессов") + plt.ylabel("Время, сек") + plt.title("Зависимость времени сортировки от глубины fork-рекурсии") + plt.grid(True) + plt.legend() + save_plot(pics / "time_by_depth.png") + + plt.figure(figsize=(12, 6)) + + for size in sorted(set(r["size"] for r in rows)): + cur = [r for r in rows if r["size"] == size] + cur.sort(key=lambda r: r["depth"]) + + base_time = cur[0]["time"] + speedup = [base_time / r["time"] if r["time"] > 0 else 0 for r in cur] + + plt.plot( + [r["depth"] for r in cur], + speedup, + marker="s", + label=f"N={size}", + ) + + plt.xlabel("Глубина порождения процессов") + plt.ylabel("Ускорение относительно depth=0") + plt.title("Ускорение при использовании процессов") + plt.grid(True) + plt.legend() + save_plot(pics / "speedup_by_depth.png") + + plt.figure(figsize=(12, 6)) + + for size in sorted(set(r["size"] for r in rows)): + cur = [r for r in rows if r["size"] == size] + cur.sort(key=lambda r: r["depth"]) + + plt.plot( + [r["depth"] for r in cur], + [r["processes"] for r in cur], + marker="^", + label=f"N={size}", + ) + + plt.xlabel("Глубина порождения процессов") + plt.ylabel("Количество процессов") + plt.title("Размер дерева процессов") + plt.grid(True) + plt.legend() + save_plot(pics / "process_count_by_depth.png") + + return rows + + +def plot_threshold_effect( + bin_path: str, + out_dir: Path, + fast: bool = False, + skip_run: bool = False, +) -> List[dict]: + pics = out_dir / "pics" + csv_path = out_dir / "benchmark_min_size.csv" + + if skip_run: + rows = read_csv(csv_path) + else: + size = 200_000 + depth = 5 + + if fast: + # Быстрый режим: меньше точек. + min_sizes = [round(2 ** (7 + i * (8 / 14))) for i in range(15)] + else: + # ВАЖНО: на графике по min_size ровно 30 точек по оси X. + # + # 30 значений порога от 128 до 131072, примерно равномерно по log2-шкале. + min_sizes = [round(2 ** (7 + i * (10 / 29))) for i in range(30)] + + rows = [] + + for m in min_sizes: + seed = seed_for(size, depth, m, salt=2) + r = run_once(bin_path, size, depth, m, seed) + row = {**r, "seed": seed} + rows.append(row) + + print( + f"min_size_effect: " + f"size={size} depth={depth} min_size={m} " + f"seed={seed} processes={r['processes']} time={r['time']:.6f}", + flush=True, + ) + + save_csv( + csv_path, + rows, + [ + "size", + "depth", + "min_size", + "seed", + "processes", + "valid", + "time", + "logfile", + ], + ) + + rows.sort(key=lambda r: r["min_size"]) + + plt.figure(figsize=(12, 6)) + plt.plot( + [r["min_size"] for r in rows], + [r["time"] for r in rows], + marker="o", + ) + + plt.xscale("log", base=2) + plt.xlabel("Минимальный размер части для fork") + plt.ylabel("Время, сек") + plt.title("Влияние порога min_size на производительность") + plt.grid(True) + save_plot(pics / "time_by_min_size.png") + + return rows + + +def parse_events(log_path: Path) -> List[dict]: + events = defaultdict(dict) + + if not log_path.exists(): + return [] + + with log_path.open("r", encoding="utf-8", errors="ignore") as f: + for line in f: + m = EVENT_RE.search(line) + if not m: + continue + + typ, pid, ppid, depth, size, t = m.groups() + key = (int(pid), int(depth), int(size)) + + events[key][typ] = float(t) + events[key]["pid"] = int(pid) + events[key]["ppid"] = int(ppid) + events[key]["depth"] = int(depth) + events[key]["size"] = int(size) + + rows = [] + + for v in events.values(): + if "START" in v and "END" in v: + rows.append(v) + + rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"])) + return rows + + +def plot_timeline(log_path: Path, out_dir: Path) -> None: + pics = out_dir / "pics" + rows = parse_events(log_path) + + # Exclude root process: depth=0. + rows = [r for r in rows if r["depth"] != 0] + + if not rows: + print("В логе нет полных START/END событий без корневого процесса depth=0") + return + + base = log_path.stem + t0 = min(r["START"] for r in rows) + + # Временная диаграмма: видно параллельность и время жизни дочерних процессов. + plt.figure(figsize=(12, max(5, len(rows) * 0.35))) + + for y, r in enumerate(rows): + start = r["START"] - t0 + end = r["END"] - t0 + + plt.plot([start, end], [y, y], linewidth=5) + + plt.text( + end, + y, + f" pid={r['pid']} d={r['depth']} n={r['size']}", + va="center", + fontsize=8, + ) + + plt.xlabel("Время от старта первого дочернего процесса, сек") + plt.ylabel("Дочерние процессы / задачи сортировки") + plt.title(f"Временная диаграмма дочерних процессов: {base}") + plt.grid(True) + save_plot(pics / "timeline.png") + + # Гистограмма глубин без depth=0. + by_depth = defaultdict(int) + + for r in rows: + by_depth[r["depth"]] += 1 + + plt.figure(figsize=(8, 5)) + xs = sorted(by_depth) + + plt.bar(xs, [by_depth[x] for x in xs]) + plt.xlabel("Глубина рекурсии") + plt.ylabel("Количество дочерних процессов") + plt.title(f"Распределение дочерних процессов по глубине: {base}") + plt.grid(True, axis="y") + save_plot(pics / "depth_hist.png") + + +def plot_process_tree_dfs( + log_path: Path, out_dir: Path, hide_root: bool = False +) -> None: + """ + Draw process tree in DFS order. + + Root process depth=0 is always excluded. + + If process 2 has children 3 and 4, and process 3 has children 5 and 6, + the order is: + + 2 + 3 + 5 + 6 + 4 + + Only after the whole subtree of process 2 is drawn, the next sibling is drawn. + """ + pics = out_dir / "pics" + rows = parse_events(log_path) + + if not rows: + return + + # Always exclude root process: depth=0. + rows = [r for r in rows if r["depth"] != 0] + + if not rows: + return + + by_pid = {r["pid"]: r for r in rows} + children = defaultdict(list) + + for r in rows: + parent_pid = r["ppid"] + if parent_pid in by_pid: + children[parent_pid].append(r["pid"]) + + for parent_pid in children: + children[parent_pid].sort(key=lambda pid: by_pid[pid]["START"]) + + # These are direct children of hidden root or processes whose parent is not present. + roots = [r["pid"] for r in rows if r["ppid"] not in by_pid] + roots.sort(key=lambda pid: by_pid[pid]["START"]) + + ordered = [] + + def dfs(pid: int, level: int) -> None: + ordered.append((pid, level)) + for child_pid in children.get(pid, []): + dfs(child_pid, level + 1) + + for root_pid in roots: + dfs(root_pid, 0) + + if not ordered: + return + + y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)} + t0 = min(by_pid[pid]["START"] for pid, _ in ordered) + + plt.figure(figsize=(14, max(5, len(ordered) * 0.45))) + + for y, (pid, level) in enumerate(ordered): + r = by_pid[pid] + + start = r["START"] - t0 + end = r["END"] - t0 + + plt.plot([start, end], [y, y], linewidth=5) + + indent = " " * level + label = f"{indent}pid={pid} d={r['depth']} n={r['size']}" + + plt.text( + end, + y, + " " + label, + va="center", + fontsize=8, + ) + + parent_pid = r["ppid"] + + if parent_pid in y_by_pid: + parent_y = y_by_pid[parent_pid] + parent_start = by_pid[parent_pid]["START"] - t0 + + plt.plot( + [start, start], + [parent_y, y], + linewidth=1, + linestyle="--", + ) + + plt.plot( + [parent_start, start], + [parent_y, parent_y], + linewidth=1, + linestyle="--", + ) + + plt.gca().invert_yaxis() + plt.xlabel("Время от старта первого дочернего процесса, сек") + plt.ylabel("Дерево дочерних процессов в DFS-порядке") + plt.title("Дерево процессов без корня: потомки расположены сразу под родителем") + plt.grid(True) + save_plot(pics / "process_tree_dfs.png") + + +def run_timeline_case( + bin_path: str, + out_dir: Path, + size: int, + depth: int, + min_size: int, +) -> Path: + log_path = out_dir / "logs" / "timeline.log" + seed = seed_for(size, depth, min_size, salt=5) + + r = run_once( + bin_path=bin_path, + size=size, + depth=depth, + min_size=min_size, + seed=seed, + log_path=log_path, + ) + + print( + f"timeline: " + f"size={size} depth={depth} min_size={min_size} " + f"seed={seed} processes={r['processes']} time={r['time']:.6f} " + f"log={log_path}", + flush=True, + ) + + return log_path + + +def generate_report(out_dir: Path) -> None: + report = out_dir / "REPORT.md" + + text = """# Lab3 benchmark report + + Скрипт `exporter.py` запускает серию тестов и строит графики. + + ## CSV + + - `benchmark_depth.csv` — серия по глубине рекурсии. + - `benchmark_min_size.csv` — серия по порогу локальной сортировки. + + ## Графики + + - `pics/time_by_depth.png` — зависимость времени сортировки от глубины. + - `pics/speedup_by_depth.png` — ускорение относительно `depth=0`. + - `pics/process_count_by_depth.png` — количество процессов. + - `pics/time_by_min_size.png` — влияние `min_size`. + - `pics/timeline.png` — временная диаграмма процессов. + - `pics/depth_hist.png` — распределение процессов по глубине. + - `pics/process_tree_dfs.png` — дерево процессов в DFS-порядке: потомки идут сразу под родителем. + + ## Запуск + + ```bash + python3 exporter.py --bin ./lab3 --out out + Быстрый режим: + + python3 exporter.py --bin ./lab3 --out out --fast + + """ + + report.write_text(text, encoding="utf-8") + + +def main() -> int: + parser = argparse.ArgumentParser(description="Single-file Lab3 benchmark exporter") + + parser.add_argument("--bin", default="./lab3", help="Путь к бинарнику lab3") + parser.add_argument("--out", default="out", help="Каталог вывода") + parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки") + parser.add_argument( + "--skip-run", + action="store_true", + help="Не запускать benchmark, а построить графики из существующих CSV", + ) + parser.add_argument( + "--hide-root", + action="store_true", + help="Скрыть корневой процесс на DFS-графике дерева", + ) + + parser.add_argument("--timeline-size", type=int, default=8192) + parser.add_argument("--timeline-depth", type=int, default=3) + parser.add_argument("--timeline-min-size", type=int, default=64) + + args = parser.parse_args() + + bin_path = args.bin + out_dir = Path(args.out) + + if not args.skip_run: + clean_output(out_dir) + else: + (out_dir / "pics").mkdir(parents=True, exist_ok=True) + (out_dir / "logs").mkdir(parents=True, exist_ok=True) + + plot_depth_scaling( + bin_path=bin_path, + out_dir=out_dir, + fast=args.fast, + skip_run=args.skip_run, + ) + + plot_threshold_effect( + bin_path=bin_path, + out_dir=out_dir, + fast=args.fast, + skip_run=args.skip_run, + ) + + log_path = out_dir / "logs" / "timeline.log" + + if not args.skip_run: + log_path = run_timeline_case( + bin_path=bin_path, + out_dir=out_dir, + size=args.timeline_size, + depth=args.timeline_depth, + min_size=args.timeline_min_size, + ) + + if log_path.exists(): + plot_timeline(log_path, out_dir) + plot_process_tree_dfs(log_path, out_dir, hide_root=args.hide_root) + else: + print(f"Timeline log not found: {log_path}", file=sys.stderr) + + generate_report(out_dir) + + print(f"Графики сохранены в {out_dir / 'pics'}.") + print(f"CSV сохранены в {out_dir}.") + print(f"Лог сохранен в {out_dir / 'logs' / 'timeline.log'}.") + print(f"Отчет сохранен в {out_dir / 'REPORT.md'}.") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/3/main.cpp b/3/main.cpp index 9b1f466..1c4ccd1 100644 --- a/3/main.cpp +++ b/3/main.cpp @@ -173,9 +173,12 @@ static pid_t spawn_sort_child(const std::vector& part, try { close_checked(to_child[1]); close_checked(from_child[0]); + std::vector input = recv_vector(to_child[0]); close_checked(to_child[0]); + SortResult result = process_recursive_sort(std::move(input), child_depth, opt); + send_result(from_child[1], result); close_checked(from_child[1]); _exit(0); @@ -186,8 +189,10 @@ static pid_t spawn_sort_child(const std::vector& part, close_checked(to_child[0]); close_checked(from_child[1]); + send_vector(to_child[1], part); close_checked(to_child[1]); + result_read_fd = from_child[0]; return pid; } @@ -205,21 +210,28 @@ static SortResult process_recursive_sort(std::vector a, int depth, const Op std::vector left(a.begin(), a.begin() + static_cast(mid)); std::vector right(a.begin() + static_cast(mid), a.end()); - int left_fd = -1, right_fd = -1; + int left_fd = -1; + int right_fd = -1; + pid_t left_pid = spawn_sort_child(left, depth + 1, opt, left_fd); pid_t right_pid = spawn_sort_child(right, depth + 1, opt, right_fd); SortResult left_result = recv_result(left_fd); SortResult right_result = recv_result(right_fd); + close_checked(left_fd); close_checked(right_fd); - int status_left = 0, status_right = 0; + int status_left = 0; + int status_right = 0; + while (waitpid(left_pid, &status_left, 0) < 0 && errno == EINTR) {} while (waitpid(right_pid, &status_right, 0) < 0 && errno == EINTR) {} + if (!WIFEXITED(status_left) || WEXITSTATUS(status_left) != 0) { throw std::runtime_error("left child failed"); } + if (!WIFEXITED(status_right) || WEXITSTATUS(status_right) != 0) { throw std::runtime_error("right child failed"); } @@ -234,19 +246,28 @@ static SortResult process_recursive_sort(std::vector a, int depth, const Op static Options parse_args(int argc, char** argv) { Options opt; + for (int i = 1; i < argc; ++i) { std::string s = argv[i]; + auto need_value = [&](const std::string& name) -> std::string { if (i + 1 >= argc) throw std::runtime_error("missing value for " + name); return argv[++i]; }; - if (s == "--size" || s == "-n") opt.size = std::stoull(need_value(s)); - else if (s == "--depth" || s == "-d") opt.max_depth = std::stoi(need_value(s)); - else if (s == "--min-size" || s == "-m") opt.min_size = std::stoull(need_value(s)); - else if (s == "--seed") opt.seed = static_cast(std::stoul(need_value(s))); - else if (s == "--print") opt.print = true; - else if (s == "--log") opt.log = true; - else if (s == "--help" || s == "-h") { + + if (s == "--size" || s == "-n") { + opt.size = std::stoull(need_value(s)); + } else if (s == "--depth" || s == "-d") { + opt.max_depth = std::stoi(need_value(s)); + } else if (s == "--min-size" || s == "-m") { + opt.min_size = std::stoull(need_value(s)); + } else if (s == "--seed") { + opt.seed = static_cast(std::stoul(need_value(s))); + } else if (s == "--print") { + opt.print = true; + } else if (s == "--log") { + opt.log = true; + } else if (s == "--help" || s == "-h") { std::cout << "Usage: ./lab3 [--size N] [--depth D] [--min-size M] [--seed S] " << "[--print] [--log]\n"; std::exit(0); @@ -254,6 +275,7 @@ static Options parse_args(int argc, char** argv) { throw std::runtime_error("unknown argument: " + s); } } + if (opt.max_depth < 0) throw std::runtime_error("depth must be non-negative"); return opt; } @@ -264,6 +286,7 @@ static std::vector generate_data(const Options& opt) { std::vector a(opt.size); std::mt19937 rng(opt.seed); std::uniform_int_distribution dist(-100000000, 100000000); + for (auto& x : a) x = dist(rng); return a; } @@ -276,6 +299,7 @@ int main(int argc, char** argv) { const auto t1 = std::chrono::steady_clock::now(); SortResult result = process_recursive_sort(std::move(data), 0, opt); const auto t2 = std::chrono::steady_clock::now(); + const double elapsed = std::chrono::duration(t2 - t1).count(); const bool ok = std::is_sorted(result.data.begin(), result.data.end()); @@ -293,6 +317,7 @@ int main(int argc, char** argv) { << " processes=" << result.processes << " valid=" << (ok ? 1 : 0) << " time=" << elapsed << " sec\n"; + return ok ? 0 : 3; } catch (const std::exception& e) { std::cerr << "ERROR: " << e.what() << "\n"; diff --git a/3/test_lab3.py b/3/test_lab3.py index 585c97e..ace1cd8 100644 --- a/3/test_lab3.py +++ b/3/test_lab3.py @@ -121,6 +121,7 @@ if __name__ == "__main__": test_11_help_has_no_mode_argument, test_12_unknown_mode_is_rejected, ] + for t in tests: t() print(f"OK {t.__name__}") diff --git a/4/Makefile b/4/Makefile new file mode 100644 index 0000000..09770a4 --- /dev/null +++ b/4/Makefile @@ -0,0 +1,19 @@ +CXX := g++ +CXXFLAGS := -O2 -std=c++17 -Wall -Wextra -pedantic +TARGET := lab4 +OUT_DIR := out_lab4 + +.PHONY: run export clean + +$(TARGET): main.cpp + $(CXX) $(CXXFLAGS) main.cpp -o $(TARGET) + +run: $(TARGET) + ./$(TARGET) --size 10000 --depth 3 --min-size 256 + +export: $(TARGET) + python3 exporter.py --bin ./$(TARGET) --out $(OUT_DIR) + +clean: + rm -f $(TARGET) + rm -rf $(OUT_DIR) out __pycache__ diff --git a/4/README.md b/4/README.md new file mode 100644 index 0000000..bdaac57 --- /dev/null +++ b/4/README.md @@ -0,0 +1,53 @@ +# Лабораторная работа 4. Сетевые соединения. Сокеты + +Вариант из лабораторной работы 1: рекурсивная сортировка разделением. + +Программа строит дерево процессов. Каждый внутренний процесс открывает TCP `ServerSocket` на `127.0.0.1` с портом `port_base + pid % span`, порождает двух потомков через `fork()`, принимает от них локальные соединения и передает им левую и правую части массива. + +По умолчанию `port_base = 20000`. Это сделано из-за того, что чистый порт `pid` может оказаться меньше 1024 и потребовать root-права. Формула соответствует замечанию из задания про `ServerSocketBase + `. + +## Формат обмена + +Родитель отправляет потомку: + +1. `uint32_t depth` — глубина рекурсии потомка; +2. `uint64_t size` — размер массива; +3. `int32_t array[size]` — часть массива. + +Потомок возвращает: + +1. `uint64_t counter` — счетчик операций сравнения при слиянии; +2. `uint64_t size` — размер отсортированной части; +3. `int32_t array[size]` — отсортированная часть; +4. `uint64_t processes` — расширение для статистики количества процессов. + +Первые три поля ответа соответствуют заданию: счетчик / размерность / массив. + +## Сборка и запуск + +```bash +make +./lab4 --size 10000 --depth 3 --min-size 1 +``` + +Полезные параметры: + +```bash +./lab4 --size 1000 --depth 2 --min-size 16 --seed 42 --print +./lab4 --size 2048 --depth 2 --min-size 64 --log +./lab4 --size 10000 --depth 3 --port-base 30000 +``` + +## Проверка + +```bash +make test +``` + +В конце программа печатает статистику в `stderr`: + +```text +STAT: size=... depth=... min_size=... processes=... counter=... valid=1 time=... sec +``` + +`valid=1` означает, что итоговый массив действительно отсортирован. diff --git a/4/exporter.py b/4/exporter.py new file mode 100644 index 0000000..d24bde0 --- /dev/null +++ b/4/exporter.py @@ -0,0 +1,673 @@ +#!/usr/bin/env python3 +""" +Lab4 exporter: runs socket-sort benchmarks and creates report graphs. + +Root parent process is excluded from process-tree/timeline visualizations: + - process graph shows child processes only: processes - 1 + - timeline ignores depth=0 rows + - DFS tree ignores depth=0 root and places each process's children directly below it + +Generated output: + out_lab4/csv/lab4_all_results.csv + out_lab4/pics/01_time_by_depth.png + out_lab4/pics/02_speedup_by_depth.png + out_lab4/pics/03_child_processes_by_depth.png + out_lab4/pics/04_time_by_min_size.png + out_lab4/pics/05_timeline_child_processes.png + out_lab4/pics/06_process_tree_dfs.png + out_lab4/logs/timeline.log + out_lab4/REPORT.md + +Usage: + python3 exporter.py --bin ./lab4 --out out_lab4 + python3 exporter.py --bin ./lab4 --out out_lab4 --fast + python3 exporter.py --bin ./lab4 --out out_lab4 --skip-run +""" + +from __future__ import annotations + +import argparse +import csv +import re +import shutil +import subprocess +import sys +from collections import defaultdict +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Iterable, List + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +STAT_RE = re.compile( + r"STAT:\s*.*?size=(\d+)\s+.*?depth=(\d+)\s+.*?min_size=(\d+)\s+" + r".*?processes=(\d+)\s+.*?counter=(\d+)\s+.*?valid=(\d+)\s+" + r".*?time=([\d.]+)" +) + +EVENT_RE = re.compile( + r"(START|END)\s+PID=(\d+)\s+PPID=(\d+)\s+depth=(\d+)\s+size=(\d+)" + r"(?:\s+port=(\d+))?\s+time=([\d.]+)" +) + + +@dataclass +class RunResult: + scenario: str + size: int + depth: int + min_size: int + seed: int + port_base: int + processes: int + counter: int + valid: int + time: float + logfile: str = "" + + @property + def child_processes(self) -> int: + return max(0, self.processes - 1) + + @property + def speed(self) -> float: + return self.size / self.time if self.time > 0 else 0.0 + + +def seed_for(size: int, depth: int, min_size: int, salt: int) -> int: + return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31 + + +def run_lab4( + bin_path: str, + size: int, + depth: int, + min_size: int, + seed: int, + port_base: int, + scenario: str, + log_path: Path | None = None, +) -> RunResult: + cmd = [ + bin_path, + "--size", + str(size), + "--depth", + str(depth), + "--min-size", + str(min_size), + "--seed", + str(seed), + "--port-base", + str(port_base), + ] + + if log_path is not None: + cmd.append("--log") + + p = subprocess.run(cmd, text=True, capture_output=True) + + if log_path is not None: + log_path.parent.mkdir(parents=True, exist_ok=True) + log_path.write_text(p.stdout, encoding="utf-8") + + if p.returncode != 0: + raise RuntimeError( + "Команда завершилась с ошибкой:\n" + + " ".join(cmd) + + "\n\nSTDOUT:\n" + + p.stdout + + "\nSTDERR:\n" + + p.stderr + ) + + m = STAT_RE.search(p.stderr) + if not m: + raise RuntimeError("Не найден STAT в stderr:\n" + p.stderr) + + result = RunResult( + scenario=scenario, + size=int(m.group(1)), + depth=int(m.group(2)), + min_size=int(m.group(3)), + seed=seed, + port_base=port_base, + processes=int(m.group(4)), + counter=int(m.group(5)), + valid=int(m.group(6)), + time=float(m.group(7)), + logfile=str(log_path) if log_path is not None else "", + ) + + if result.valid != 1: + raise RuntimeError("Сортировка не прошла проверку:\n" + p.stderr) + + return result + + +def group_by(rows: Iterable[RunResult], key_fn): + result = defaultdict(list) + for row in rows: + result[key_fn(row)].append(row) + return result + + +def write_csv(path: Path, rows: List[RunResult]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + + with path.open("w", encoding="utf-8", newline="") as f: + fieldnames = ( + list(asdict(rows[0]).keys()) + if rows + else list(RunResult.__dataclass_fields__.keys()) + ) + + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for row in rows: + writer.writerow(asdict(row)) + + +def read_csv(path: Path) -> List[RunResult]: + rows: List[RunResult] = [] + + with path.open("r", encoding="utf-8", newline="") as f: + for raw in csv.DictReader(f): + rows.append( + RunResult( + scenario=raw["scenario"], + size=int(raw["size"]), + depth=int(raw["depth"]), + min_size=int(raw["min_size"]), + seed=int(raw["seed"]), + port_base=int(raw["port_base"]), + processes=int(raw["processes"]), + counter=int(raw["counter"]), + valid=int(raw["valid"]), + time=float(raw["time"]), + logfile=raw.get("logfile", ""), + ) + ) + + return rows + + +def save_plot(path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + plt.tight_layout() + plt.savefig(path, dpi=140) + plt.close() + + +def clean_graphs(pics: Path) -> None: + pics.mkdir(parents=True, exist_ok=True) + + for old in pics.glob("*.png"): + old.unlink() + + +def plot_depth_graphs(rows: List[RunResult], pics: Path) -> None: + cur = [r for r in rows if r.scenario == "depth_scaling"] + if not cur: + return + + by_size = group_by(cur, lambda r: r.size) + + # 1. Main performance graph. + plt.figure(figsize=(12, 6)) + + for size, rs in sorted(by_size.items()): + rs = sorted(rs, key=lambda r: r.depth) + plt.plot( + [r.depth for r in rs], + [r.time for r in rs], + marker="o", + label=f"N={size}", + ) + + plt.xlabel("Глубина рекурсии") + plt.ylabel("Время, сек") + plt.title("1. Время сортировки от глубины рекурсии") + plt.grid(True) + plt.legend() + save_plot(pics / "01_time_by_depth.png") + + # 2. Speedup against sequential depth=0. + plt.figure(figsize=(12, 6)) + + for size, rs in sorted(by_size.items()): + rs = sorted(rs, key=lambda r: r.depth) + base = next((r.time for r in rs if r.depth == 0), rs[0].time) + speedup = [base / r.time if r.time > 0 else 0 for r in rs] + + plt.plot( + [r.depth for r in rs], + speedup, + marker="s", + label=f"N={size}", + ) + + plt.xlabel("Глубина рекурсии") + plt.ylabel("Ускорение относительно depth=0") + plt.title("2. Ускорение от использования процессов и сокетов") + plt.grid(True) + plt.legend() + save_plot(pics / "02_speedup_by_depth.png") + + # 3. Child process tree size, root parent excluded. + plt.figure(figsize=(12, 6)) + + for size, rs in sorted(by_size.items()): + rs = sorted(rs, key=lambda r: r.depth) + plt.plot( + [r.depth for r in rs], + [r.child_processes for r in rs], + marker="^", + label=f"N={size}", + ) + + plt.xlabel("Глубина рекурсии") + plt.ylabel("Количество дочерних процессов") + plt.title("3. Размер дерева дочерних процессов без корневого родителя") + plt.grid(True) + plt.legend() + save_plot(pics / "03_child_processes_by_depth.png") + + +def plot_min_size_graph(rows: List[RunResult], pics: Path) -> None: + cur = sorted( + [r for r in rows if r.scenario == "min_size_effect"], + key=lambda r: r.min_size, + ) + + if not cur: + return + + # 4. Threshold effect. + plt.figure(figsize=(12, 6)) + plt.plot( + [r.min_size for r in cur], + [r.time for r in cur], + marker="o", + ) + + plt.xscale("log", base=2) + plt.xlabel("Минимальный размер части для fork/socket") + plt.ylabel("Время, сек") + plt.title("4. Влияние min_size на время сортировки") + plt.grid(True) + save_plot(pics / "04_time_by_min_size.png") + + +def parse_events(log_path: Path) -> List[dict]: + events = defaultdict(dict) + + if not log_path.exists(): + return [] + + for line in log_path.read_text(encoding="utf-8", errors="ignore").splitlines(): + m = EVENT_RE.search(line) + if not m: + continue + + typ, pid, ppid, depth, size, port, t = m.groups() + depth_i = int(depth) + + # Remove root parent process from timeline/tree. + if depth_i == 0: + continue + + key = (int(pid), depth_i, int(size)) + + events[key][typ] = float(t) + events[key]["pid"] = int(pid) + events[key]["ppid"] = int(ppid) + events[key]["depth"] = depth_i + events[key]["size"] = int(size) + events[key]["port"] = int(port) if port else -1 + + rows = [v for v in events.values() if "START" in v and "END" in v] + rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"])) + + return rows + + +def plot_timeline(log_path: Path, pics: Path) -> None: + rows = parse_events(log_path) + if not rows: + return + + t0 = min(r["START"] for r in rows) + + # 5. Timeline for child process lifetime and real parallelism. + plt.figure(figsize=(13, max(5, len(rows) * 0.38))) + + for y, r in enumerate(rows): + start = r["START"] - t0 + end = r["END"] - t0 + + plt.plot([start, end], [y, y], linewidth=5) + + label = f"pid={r['pid']} d={r['depth']} n={r['size']}" + if r.get("port", -1) >= 0: + label += f" p={r['port']}" + + plt.text(end, y, " " + label, va="center", fontsize=8) + + plt.xlabel("Время от старта первого дочернего процесса, сек") + plt.ylabel("Дочерние процессы / задачи сортировки") + plt.title("5. Временная диаграмма дочерних процессов") + plt.grid(True) + save_plot(pics / "05_timeline_child_processes.png") + + +def plot_process_tree_dfs(log_path: Path, pics: Path) -> None: + """ + Draws a process tree from timeline.log. + + Root depth=0 parent is excluded. + Children are placed directly below their parent in DFS order: + + depth=1 process A + depth=2 child of A + depth=3 child of that child + depth=2 next child of A + depth=1 process B + ... + """ + rows = parse_events(log_path) + if not rows: + return + + by_pid = {r["pid"]: r for r in rows} + children = defaultdict(list) + + for r in rows: + ppid = r["ppid"] + if ppid in by_pid: + children[ppid].append(r["pid"]) + + for pid in children: + children[pid].sort(key=lambda child_pid: by_pid[child_pid]["START"]) + + # These are children of the hidden root or processes whose parent is absent + # after filtering depth=0. + roots = [r["pid"] for r in rows if r["ppid"] not in by_pid] + roots.sort(key=lambda pid: by_pid[pid]["START"]) + + ordered = [] + + def dfs(pid: int, level: int) -> None: + ordered.append((pid, level)) + for child_pid in children.get(pid, []): + dfs(child_pid, level + 1) + + for root_pid in roots: + dfs(root_pid, 0) + + if not ordered: + return + + y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)} + t0 = min(by_pid[pid]["START"] for pid, _ in ordered) + + plt.figure(figsize=(14, max(5, len(ordered) * 0.45))) + + for y, (pid, level) in enumerate(ordered): + r = by_pid[pid] + + start = r["START"] - t0 + end = r["END"] - t0 + + # Horizontal lifetime line. + plt.plot([start, end], [y, y], linewidth=5) + + # Indentation makes nesting visible in labels. + indent = " " * level + label = f"{indent}pid={pid} d={r['depth']} n={r['size']}" + + if r.get("port", -1) >= 0: + label += f" p={r['port']}" + + plt.text(end, y, " " + label, va="center", fontsize=8) + + # Parent-child connector. + parent_pid = r["ppid"] + if parent_pid in y_by_pid: + parent_y = y_by_pid[parent_pid] + parent_start = by_pid[parent_pid]["START"] - t0 + + plt.plot( + [start, start], + [parent_y, y], + linewidth=1, + linestyle="--", + ) + plt.plot( + [parent_start, start], + [parent_y, parent_y], + linewidth=1, + linestyle="--", + ) + + plt.gca().invert_yaxis() + plt.xlabel("Время от старта первого дочернего процесса, сек") + plt.ylabel("Дерево процессов в DFS-порядке") + plt.title("6. Дерево процессов: потомки расположены сразу под родителем") + plt.grid(True) + save_plot(pics / "06_process_tree_dfs.png") + + +def make_experiments(args) -> List[RunResult]: + rows: List[RunResult] = [] + + logs = Path(args.out) / "logs" + logs.mkdir(parents=True, exist_ok=True) + + if args.fast: + depth_sizes = [20_000, 50_000] + depths = list(range(0, 10)) + min_size_depth = 2048 + min_size_points = [round(2 ** (7 + i * (8 / 14))) for i in range(15)] + else: + # Two 30-point series are enough for a clean report without producing extra graphs. + depth_sizes = [50_000, 100_000, 200_000] + depths = list(range(0, 30)) + min_size_depth = 4096 + min_size_points = [round(2 ** (7 + i * (10 / 29))) for i in range(30)] + + # 1) Depth scaling: feeds graphs 01, 02, 03. + for size in depth_sizes: + for depth in depths: + seed = seed_for(size, depth, min_size_depth, 1) + + row = run_lab4( + args.bin, + size, + depth, + min_size_depth, + seed, + args.port_base, + "depth_scaling", + ) + + rows.append(row) + + print( + f"depth_scaling: " + f"N={size} " + f"depth={depth} " + f"child_proc={row.child_processes} " + f"time={row.time:.6f}", + flush=True, + ) + + # 2) min_size effect: feeds graph 04. + for min_size in min_size_points: + seed = seed_for( + args.min_size_test_size, + args.min_size_test_depth, + min_size, + 2, + ) + + row = run_lab4( + args.bin, + args.min_size_test_size, + args.min_size_test_depth, + min_size, + seed, + args.port_base, + "min_size_effect", + ) + + rows.append(row) + + print( + f"min_size_effect: " + f"min_size={min_size} " + f"child_proc={row.child_processes} " + f"time={row.time:.6f}", + flush=True, + ) + + # 3) One log run: feeds graphs 05 and 06. + log_path = logs / "timeline.log" + seed = seed_for( + args.timeline_size, + args.timeline_depth, + args.timeline_min_size, + 5, + ) + + row = run_lab4( + args.bin, + args.timeline_size, + args.timeline_depth, + args.timeline_min_size, + seed, + args.port_base, + "timeline", + log_path=log_path, + ) + + rows.append(row) + + print( + f"timeline: {log_path} child_proc={row.child_processes} time={row.time:.6f}", + flush=True, + ) + + return rows + + +def generate_report(out_dir: Path, rows: List[RunResult]) -> None: + by_scenario = group_by(rows, lambda r: r.scenario) + + lines = [ + "# Lab4 sockets: основные графики\n\n", + "Сгенерированы основные графики для отчета.\n\n", + "Корневой родительский процесс исключен из графика дерева процессов " + "и из временных диаграмм.\n\n", + "## Графики\n\n", + "1. `pics/01_time_by_depth.png` — время от глубины рекурсии.\n", + "2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`.\n", + "3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня.\n", + "4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки.\n", + "5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов.\n", + "6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем.\n\n", + "## Краткая статистика\n\n", + "| Сценарий | Запусков | Лучшее время | Лучший запуск |\n", + "|---|---:|---:|---|\n", + ] + + for scenario, rs in sorted(by_scenario.items()): + best = min(rs, key=lambda r: r.time) + + lines.append( + f"| `{scenario}` | {len(rs)} | {best.time:.6f} | " + f"N={best.size}, " + f"depth={best.depth}, " + f"min_size={best.min_size}, " + f"child_proc={best.child_processes} |\n" + ) + + (out_dir / "REPORT.md").write_text("".join(lines), encoding="utf-8") + + +def build_plots(out_dir: Path, rows: List[RunResult]) -> None: + pics = out_dir / "pics" + + clean_graphs(pics) + plot_depth_graphs(rows, pics) + plot_min_size_graph(rows, pics) + + timeline_row = next( + (r for r in rows if r.scenario == "timeline" and r.logfile), + None, + ) + + if timeline_row is not None: + timeline_log = Path(timeline_row.logfile) + plot_timeline(timeline_log, pics) + plot_process_tree_dfs(timeline_log, pics) + + generate_report(out_dir, rows) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Exporter for Lab4 socket sort graphs") + + parser.add_argument("--bin", default="./lab4", help="Путь к бинарнику lab4") + parser.add_argument("--out", default="out_lab4", help="Каталог вывода") + parser.add_argument( + "--port-base", type=int, default=20000, help="Базовый порт для lab4" + ) + parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки") + parser.add_argument( + "--skip-run", action="store_true", help="Строить графики из существующего CSV" + ) + + parser.add_argument("--min-size-test-size", type=int, default=200_000) + parser.add_argument("--min-size-test-depth", type=int, default=5) + + parser.add_argument("--timeline-size", type=int, default=8192) + parser.add_argument("--timeline-depth", type=int, default=3) + parser.add_argument("--timeline-min-size", type=int, default=64) + + args = parser.parse_args() + + out_dir = Path(args.out) + csv_path = out_dir / "csv" / "lab4_all_results.csv" + + if args.skip_run: + if not csv_path.exists(): + print(f"CSV не найден: {csv_path}", file=sys.stderr) + return 2 + + rows = read_csv(csv_path) + else: + # Keep old output from previous exporter versions from confusing the report. + if out_dir.exists(): + shutil.rmtree(out_dir) + + rows = make_experiments(args) + write_csv(csv_path, rows) + + build_plots(out_dir, rows) + + print("\nГотово: построены основные графики, включая DFS-дерево процессов.") + print("Корневой родительский процесс исключен из дерева/таймлайна.") + print(f"CSV: {csv_path}") + print(f"Графики: {out_dir / 'pics'}") + print(f"Отчет: {out_dir / 'REPORT.md'}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/4/main.cpp b/4/main.cpp new file mode 100644 index 0000000..c84b306 --- /dev/null +++ b/4/main.cpp @@ -0,0 +1,437 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Лабораторная работа 4. Сетевые соединения. Сокеты. +// Вариант из лабораторной 1: рекурсивная сортировка разделением. +// +// В отличие от lab1/lab3, данные между родителем и потомками передаются +// не через общую память и не через pipe(), а через TCP-сокеты localhost. +// Каждый процесс, которому нужно разделить задачу, открывает серверный сокет +// на 127.0.0.1:port_base + pid, порождает двух потомков и передает им части +// массива в формате: depth / size / array. Потомки возвращают: +// counter / size / sorted_array / process_count. + +using i32 = int32_t; +using u32 = uint32_t; +using u64 = uint64_t; + +struct Options { + size_t size = 10000; + int max_depth = 3; + size_t min_size = 1; + unsigned seed = 1337; + int port_base = 20000; + bool print = false; + bool log = false; +}; + +struct SortResult { + std::vector data; + u64 counter = 0; // счетчик операций слияния/сравнений + u64 processes = 1; // текущее поддерево процессов, включая текущий процесс +}; + +static double now_seconds() { + using clock = std::chrono::steady_clock; + static const auto start = clock::now(); + return std::chrono::duration(clock::now() - start).count(); +} + +static void log_event(const char* type, int depth, size_t n, int port = -1) { + std::ostringstream ss; + ss << type + << " PID=" << static_cast(getpid()) + << " PPID=" << static_cast(getppid()) + << " depth=" << depth + << " size=" << n; + if (port >= 0) ss << " port=" << port; + ss << " time=" << now_seconds() << '\n'; + const std::string s = ss.str(); + (void)!write(STDOUT_FILENO, s.data(), s.size()); +} + +[[noreturn]] static void die_child(const std::string& msg) { + std::cerr << "CHILD_ERROR pid=" << getpid() << " " << msg << "\n"; + _exit(2); +} + +static void throw_errno(const std::string& what) { + throw std::runtime_error(what + ": " + std::strerror(errno)); +} + +static void close_checked(int fd) { + if (fd >= 0) { + while (close(fd) < 0 && errno == EINTR) {} + } +} + +static void set_common_socket_options(int fd) { + int yes = 1; + (void)setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); +#ifdef SO_REUSEPORT + (void)setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &yes, sizeof(yes)); +#endif + (void)setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)); + + // Блокирующий режим чтения оставляем стандартным, но задаем таймаут, + // чтобы ошибка соединения не превращалась в бесконечное зависание. + timeval tv{}; + tv.tv_sec = 30; + tv.tv_usec = 0; + (void)setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + (void)setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); +} + +static int port_for_pid(pid_t pid, int port_base) { + if (port_base < 1024 || port_base > 65000) { + throw std::runtime_error("port-base must be in range 1024..65000"); + } + const int span = 65535 - port_base; + return port_base + static_cast(static_cast(pid) % span); +} + +static void write_all(int fd, const void* ptr, size_t bytes) { + const char* p = static_cast(ptr); + while (bytes > 0) { + ssize_t w = send(fd, p, bytes, MSG_NOSIGNAL); + if (w < 0) { + if (errno == EINTR) continue; + throw_errno("send"); + } + if (w == 0) throw std::runtime_error("send returned 0"); + p += w; + bytes -= static_cast(w); + } +} + +static void read_all(int fd, void* ptr, size_t bytes) { + char* p = static_cast(ptr); + while (bytes > 0) { + ssize_t r = recv(fd, p, bytes, MSG_WAITALL); + if (r < 0) { + if (errno == EINTR) continue; + throw_errno("recv"); + } + if (r == 0) throw std::runtime_error("unexpected EOF in socket"); + p += r; + bytes -= static_cast(r); + } +} + +static void send_u32(int fd, u32 v) { write_all(fd, &v, sizeof(v)); } +static void send_u64(int fd, u64 v) { write_all(fd, &v, sizeof(v)); } +static u32 recv_u32(int fd) { u32 v = 0; read_all(fd, &v, sizeof(v)); return v; } +static u64 recv_u64(int fd) { u64 v = 0; read_all(fd, &v, sizeof(v)); return v; } + +static void send_task(int fd, int depth, const std::vector& a) { + send_u32(fd, static_cast(depth)); + send_u64(fd, static_cast(a.size())); + if (!a.empty()) write_all(fd, a.data(), a.size() * sizeof(i32)); +} + +static std::pair> recv_task(int fd) { + int depth = static_cast(recv_u32(fd)); + u64 n = recv_u64(fd); + if (n > static_cast(SIZE_MAX / sizeof(i32))) { + throw std::runtime_error("too large task array"); + } + std::vector a(static_cast(n)); + if (!a.empty()) read_all(fd, a.data(), a.size() * sizeof(i32)); + return {depth, std::move(a)}; +} + +static void send_result(int fd, const SortResult& result) { + send_u64(fd, result.counter); + send_u64(fd, static_cast(result.data.size())); + if (!result.data.empty()) write_all(fd, result.data.data(), result.data.size() * sizeof(i32)); + // Расширение протокола для статистики. Первые три поля соответствуют заданию: + // counter / size / array. + send_u64(fd, result.processes); +} + +static SortResult recv_result(int fd) { + SortResult r; + r.counter = recv_u64(fd); + u64 n = recv_u64(fd); + if (n > static_cast(SIZE_MAX / sizeof(i32))) { + throw std::runtime_error("too large result array"); + } + r.data.resize(static_cast(n)); + if (!r.data.empty()) read_all(fd, r.data.data(), r.data.size() * sizeof(i32)); + r.processes = recv_u64(fd); + return r; +} + +static int create_server_socket(int preferred_port, int& actual_port) { + // Основной вариант соответствует методичке: порт вычисляется от pid. + // Во время длинных серий benchmark порт иногда может быть еще занят ядром + // или совпасть по modulo. Поэтому при EADDRINUSE берем ближайший свободный + // порт и передаем именно его потомкам. + std::string last_error; + for (int shift = 0; shift < 2000; ++shift) { + int port = preferred_port + shift; + if (port > 65535) port = 1024 + (port - 65536); + + int fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) throw_errno("socket"); + set_common_socket_options(fd); + + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = htons(static_cast(port)); + + if (bind(fd, reinterpret_cast(&addr), sizeof(addr)) == 0) { + if (listen(fd, 2) < 0) { + close_checked(fd); + throw_errno("listen"); + } + actual_port = port; + return fd; + } + + last_error = std::strerror(errno); + close_checked(fd); + if (errno != EADDRINUSE && errno != EACCES) { + throw std::runtime_error("bind 127.0.0.1:" + std::to_string(port) + ": " + last_error); + } + } + throw std::runtime_error("cannot bind server socket near port " + + std::to_string(preferred_port) + ": " + last_error); +} + +static int accept_client(int server_fd) { + for (;;) { + int fd = accept(server_fd, nullptr, nullptr); + if (fd < 0) { + if (errno == EINTR) continue; + throw_errno("accept"); + } + set_common_socket_options(fd); + return fd; + } +} + +static int connect_to_parent(int port) { + int fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) throw_errno("socket"); + set_common_socket_options(fd); + + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = htons(static_cast(port)); + + // После fork сервер обычно уже слушает, но короткий retry делает запуск стабильнее. + for (int attempt = 0; attempt < 200; ++attempt) { + if (connect(fd, reinterpret_cast(&addr), sizeof(addr)) == 0) return fd; + if (errno != ECONNREFUSED && errno != EINTR) break; + usleep(1000); + } + close_checked(fd); + throw_errno("connect 127.0.0.1:" + std::to_string(port)); + throw std::runtime_error("unreachable connect failure"); +} + +static std::vector merge_sorted(const std::vector& left, + const std::vector& right, + u64& counter) { + std::vector out; + out.reserve(left.size() + right.size()); + size_t i = 0, j = 0; + while (i < left.size() && j < right.size()) { + ++counter; + if (left[i] <= right[j]) out.push_back(left[i++]); + else out.push_back(right[j++]); + } + out.insert(out.end(), left.begin() + static_cast(i), left.end()); + out.insert(out.end(), right.begin() + static_cast(j), right.end()); + return out; +} + +static SortResult local_sort(std::vector a) { + SortResult r; + if (a.size() < 2) { + r.data = std::move(a); + return r; + } + const size_t mid = a.size() / 2; + std::vector left(a.begin(), a.begin() + static_cast(mid)); + std::vector right(a.begin() + static_cast(mid), a.end()); + SortResult l = local_sort(std::move(left)); + SortResult rr = local_sort(std::move(right)); + r.counter = l.counter + rr.counter; + r.data = merge_sorted(l.data, rr.data, r.counter); + return r; +} + +static SortResult socket_recursive_sort(std::vector a, int depth, const Options& opt); + +static pid_t spawn_child_and_send(int server_fd, + int parent_port, + const std::vector& part, + int child_depth, + const Options& opt, + int& child_socket) { + pid_t pid = fork(); + if (pid < 0) throw_errno("fork"); + + if (pid == 0) { + try { + close_checked(server_fd); + int fd = connect_to_parent(parent_port); + auto task = recv_task(fd); + SortResult result = socket_recursive_sort(std::move(task.second), task.first, opt); + send_result(fd, result); + close_checked(fd); + _exit(0); + } catch (const std::exception& e) { + die_child(e.what()); + } + } + + child_socket = accept_client(server_fd); + send_task(child_socket, child_depth, part); + return pid; +} + +static SortResult socket_recursive_sort(std::vector a, int depth, const Options& opt) { + int my_port = port_for_pid(getpid(), opt.port_base); + if (opt.log) log_event("START", depth, a.size(), my_port); + + if (a.size() < 2 || depth >= opt.max_depth || a.size() <= opt.min_size) { + SortResult r = local_sort(std::move(a)); + r.processes = 1; + if (opt.log) log_event("END", depth, r.data.size(), my_port); + return r; + } + + const size_t mid = a.size() / 2; + std::vector left(a.begin(), a.begin() + static_cast(mid)); + std::vector right(a.begin() + static_cast(mid), a.end()); + + int actual_port = my_port; + int server_fd = create_server_socket(my_port, actual_port); + + int left_sock = -1; + pid_t left_pid = spawn_child_and_send(server_fd, actual_port, left, depth + 1, opt, left_sock); + + int right_sock = -1; + pid_t right_pid = spawn_child_and_send(server_fd, actual_port, right, depth + 1, opt, right_sock); + + close_checked(server_fd); + + SortResult left_result = recv_result(left_sock); + SortResult right_result = recv_result(right_sock); + close_checked(left_sock); + close_checked(right_sock); + + int status_left = 0, status_right = 0; + while (waitpid(left_pid, &status_left, 0) < 0 && errno == EINTR) {} + while (waitpid(right_pid, &status_right, 0) < 0 && errno == EINTR) {} + if (!WIFEXITED(status_left) || WEXITSTATUS(status_left) != 0) { + throw std::runtime_error("left child failed"); + } + if (!WIFEXITED(status_right) || WEXITSTATUS(status_right) != 0) { + throw std::runtime_error("right child failed"); + } + + SortResult result; + result.counter = left_result.counter + right_result.counter; + result.data = merge_sorted(left_result.data, right_result.data, result.counter); + result.processes = 1 + left_result.processes + right_result.processes; + + if (opt.log) log_event("END", depth, result.data.size(), my_port); + return result; +} + +static Options parse_args(int argc, char** argv) { + Options opt; + for (int i = 1; i < argc; ++i) { + std::string s = argv[i]; + auto need_value = [&](const std::string& name) -> std::string { + if (i + 1 >= argc) throw std::runtime_error("missing value for " + name); + return argv[++i]; + }; + if (s == "--size" || s == "-n") opt.size = std::stoull(need_value(s)); + else if (s == "--depth" || s == "-d") opt.max_depth = std::stoi(need_value(s)); + else if (s == "--min-size" || s == "-m") opt.min_size = std::stoull(need_value(s)); + else if (s == "--seed") opt.seed = static_cast(std::stoul(need_value(s))); + else if (s == "--port-base") opt.port_base = std::stoi(need_value(s)); + else if (s == "--print") opt.print = true; + else if (s == "--log") opt.log = true; + else if (s == "--help" || s == "-h") { + std::cout << "Usage: ./lab4 [--size N] [--depth D] [--min-size M] [--seed S] " + << "[--port-base P] [--print] [--log]\n"; + std::exit(0); + } else { + throw std::runtime_error("unknown argument: " + s); + } + } + if (opt.max_depth < 0) throw std::runtime_error("depth must be non-negative"); + if (opt.port_base < 1024 || opt.port_base > 65000) { + throw std::runtime_error("port-base must be in range 1024..65000"); + } + return opt; +} + +static std::vector generate_data(const Options& opt) { + std::vector a(opt.size); + std::mt19937 rng(opt.seed); + std::uniform_int_distribution dist(-100000000, 100000000); + for (auto& x : a) x = dist(rng); + return a; +} + +int main(int argc, char** argv) { + try { + Options opt = parse_args(argc, argv); + std::vector data = generate_data(opt); + + const auto t1 = std::chrono::steady_clock::now(); + SortResult result = socket_recursive_sort(std::move(data), 0, opt); + const auto t2 = std::chrono::steady_clock::now(); + const double elapsed = std::chrono::duration(t2 - t1).count(); + const bool ok = std::is_sorted(result.data.begin(), result.data.end()); + + if (opt.print) { + for (size_t i = 0; i < result.data.size(); ++i) { + if (i) std::cout << ' '; + std::cout << result.data[i]; + } + std::cout << '\n'; + } else { + std::cout << "Sorted list/array first 20 elements: "; + const size_t limit = std::min(20, result.data.size()); + for (size_t i = 0; i < limit; ++i) std::cout << result.data[i] << ' '; + std::cout << '\n'; + } + + std::cerr << "STAT: size=" << opt.size + << " depth=" << opt.max_depth + << " min_size=" << opt.min_size + << " processes=" << result.processes + << " counter=" << result.counter + << " valid=" << (ok ? 1 : 0) + << " time=" << elapsed << " sec\n"; + return ok ? 0 : 3; + } catch (const std::exception& e) { + std::cerr << "ERROR: " << e.what() << "\n"; + return 1; + } +} diff --git a/4/out_lab4/REPORT.md b/4/out_lab4/REPORT.md new file mode 100644 index 0000000..a96bd48 --- /dev/null +++ b/4/out_lab4/REPORT.md @@ -0,0 +1,22 @@ +# Lab4 sockets: основные графики + +Сгенерированы основные графики для отчета. + +Корневой родительский процесс исключен из графика дерева процессов и из временных диаграмм. + +## Графики + +1. `pics/01_time_by_depth.png` — время от глубины рекурсии. +2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`. +3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня. +4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки. +5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов. +6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем. + +## Краткая статистика + +| Сценарий | Запусков | Лучшее время | Лучший запуск | +|---|---:|---:|---| +| `depth_scaling` | 90 | 0.016914 | N=50000, depth=2, min_size=4096, child_proc=6 | +| `min_size_effect` | 30 | 0.057425 | N=200000, depth=5, min_size=50384, child_proc=6 | +| `timeline` | 1 | 0.010911 | N=8192, depth=3, min_size=64, child_proc=14 | diff --git a/4/out_lab4/pics/01_time_by_depth.png b/4/out_lab4/pics/01_time_by_depth.png new file mode 100644 index 0000000..cc8fd62 Binary files /dev/null and b/4/out_lab4/pics/01_time_by_depth.png differ diff --git a/4/out_lab4/pics/02_speedup_by_depth.png b/4/out_lab4/pics/02_speedup_by_depth.png new file mode 100644 index 0000000..7c89c55 Binary files /dev/null and b/4/out_lab4/pics/02_speedup_by_depth.png differ diff --git a/4/out_lab4/pics/03_child_processes_by_depth.png b/4/out_lab4/pics/03_child_processes_by_depth.png new file mode 100644 index 0000000..9b4909a Binary files /dev/null and b/4/out_lab4/pics/03_child_processes_by_depth.png differ diff --git a/4/out_lab4/pics/04_time_by_min_size.png b/4/out_lab4/pics/04_time_by_min_size.png new file mode 100644 index 0000000..ca3f63e Binary files /dev/null and b/4/out_lab4/pics/04_time_by_min_size.png differ diff --git a/4/out_lab4/pics/05_timeline_child_processes.png b/4/out_lab4/pics/05_timeline_child_processes.png new file mode 100644 index 0000000..9b82e1d Binary files /dev/null and b/4/out_lab4/pics/05_timeline_child_processes.png differ diff --git a/4/out_lab4/pics/06_process_tree_dfs.png b/4/out_lab4/pics/06_process_tree_dfs.png new file mode 100644 index 0000000..3fa6bed Binary files /dev/null and b/4/out_lab4/pics/06_process_tree_dfs.png differ