add 4, edit 3

This commit is contained in:
2026-05-12 19:13:20 +07:00
parent 17ae59ea48
commit 2ad3f7eaa4
16 changed files with 1944 additions and 94 deletions
+4 -17
View File
@@ -3,7 +3,7 @@ CXXFLAGS := -O2 -std=c++17 -Wall -Wextra -pedantic
TARGET := lab3 TARGET := lab3
OUT_DIR := out OUT_DIR := out
.PHONY: all clean run test bench timelines pack .PHONY: all run test export clean pack
all: $(TARGET) all: $(TARGET)
@@ -15,25 +15,12 @@ run: $(TARGET)
test: $(TARGET) test: $(TARGET)
python3 test_lab3.py python3 test_lab3.py
./$(TARGET) --size 0 --depth 3 --min-size 1 >/dev/null
./$(TARGET) --size 1 --depth 3 --min-size 1 >/dev/null
./$(TARGET) --size 10000 --depth 0 --min-size 1 >/dev/null
./$(TARGET) --size 10000 --depth 2 --min-size 128 >/dev/null
./$(TARGET) --size 10000 --depth 3 --min-size 256 --seed 2026 >/dev/null
./$(TARGET) --size 12345 --depth 4 --min-size 257 --seed 777 >/dev/null
bench: $(TARGET) export: $(TARGET)
python3 benchmark.py python3 exporter.py --bin ./$(TARGET) --out $(OUT_DIR)
timelines: $(TARGET)
mkdir -p $(OUT_DIR)/logs $(OUT_DIR)/pics
./$(TARGET) --size 2048 --depth 2 --min-size 64 --log > $(OUT_DIR)/logs/depth2.log 2>$(OUT_DIR)/logs/depth2.stat
./$(TARGET) --size 4096 --depth 3 --min-size 64 --log > $(OUT_DIR)/logs/depth3.log 2>$(OUT_DIR)/logs/depth3.stat
python3 exporter.py $(OUT_DIR)/logs/depth2.log $(OUT_DIR)/pics
python3 exporter.py $(OUT_DIR)/logs/depth3.log $(OUT_DIR)/pics
pack: clean pack: clean
zip -r lab3_process_pipes.zip main.cpp Makefile benchmark.py exporter.py test_lab3.py README.md zip -r lab3_process_pipes.zip main.cpp Makefile exporter.py test_lab3.py README.md
clean: clean:
rm -f $(TARGET) lab3_process_pipes.zip rm -f $(TARGET) lab3_process_pipes.zip
+15 -3
View File
@@ -22,10 +22,22 @@
- `uint64_t processes` — число процессов в поддереве. - `uint64_t processes` — число процессов в поддереве.
8. Родитель выполняет слияние двух отсортированных частей. 8. Родитель выполняет слияние двух отсортированных частей.
Для одного потомка используются два канала: родитель → потомок и потомок → родитель. Так как потомков два, на рекурсивном узле создается четыре канала. Для одного потомка используются два канала:
## Сборка и запуск - родитель → потомок;
- потомок → родитель.
Так как потомков два, на рекурсивном узле создается четыре канала.
## Файлы
- `main.cpp` — программа лабораторной работы.
- `Makefile` — сборка, запуск, тесты, экспорт графиков.
- `exporter.py` — единый экспортёр графиков и CSV.
- `test_lab3.py` — тесты корректности.
- `README.md` — описание.
## Сборка
```bash ```bash
make make
./lab3 --size 100000 --depth 2 --min-size 4096
+650 -29
View File
@@ -1,34 +1,389 @@
import os #!/usr/bin/env python3
"""
Single-file Lab3 exporter.
What it does:
1. Runs benchmark series by recursion depth.
2. Runs benchmark series by min_size threshold.
3. Runs one logged execution with --log.
4. Draws old benchmark graphs.
5. Draws old timeline/depth histogram graphs.
6. Adds DFS process-tree graph:
each process has its children directly below it;
only after a whole subtree is drawn, the next sibling is drawn.
Generated output:
out/benchmark_depth.csv
out/benchmark_min_size.csv
out/logs/timeline.log
out/pics/time_by_depth.png
out/pics/speedup_by_depth.png
out/pics/process_count_by_depth.png
out/pics/time_by_min_size.png
out/pics/timeline.png
out/pics/depth_hist.png
out/pics/process_tree_dfs.png
Usage:
python3 exporter.py
python3 exporter.py --bin ./lab3 --out out
python3 exporter.py --fast
python3 exporter.py --skip-run
"""
from __future__ import annotations
import argparse
import csv
import re import re
import shutil
import subprocess
import sys import sys
from collections import defaultdict from collections import defaultdict
from pathlib import Path
from typing import Dict, List
import matplotlib import matplotlib
matplotlib.use("Agg") matplotlib.use("Agg")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
if len(sys.argv) < 3: STAT_RE = re.compile(
print("Использование: python3 exporter.py <logfile> <output_dir>") r"STAT:.*size=(\d+).*depth=(\d+).*min_size=(\d+).*processes=(\d+).*valid=(\d+).*time=([\d.]+)"
sys.exit(1) )
logfile = sys.argv[1] EVENT_RE = re.compile(
out_dir = sys.argv[2]
os.makedirs(out_dir, exist_ok=True)
base = os.path.splitext(os.path.basename(logfile))[0]
pattern = re.compile(
r"(START|END) PID=(\d+) PPID=(\d+) depth=(\d+) size=(\d+) time=([\d.]+)" r"(START|END) PID=(\d+) PPID=(\d+) depth=(\d+) size=(\d+) time=([\d.]+)"
) )
def seed_for(size: int, depth: int, min_size: int, salt: int) -> int:
# Для каждой точки используется свой seed, поэтому вход всегда случайный,
# но результаты можно воспроизвести.
return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31
def run_once(
bin_path: str,
size: int,
depth: int,
min_size: int,
seed: int,
log_path: Path | None = None,
) -> Dict[str, int | float | str]:
cmd = [
bin_path,
"--size",
str(size),
"--depth",
str(depth),
"--min-size",
str(min_size),
"--seed",
str(seed),
]
if log_path is not None:
cmd.append("--log")
p = subprocess.run(
cmd,
stdout=subprocess.PIPE if log_path is not None else subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True,
)
if log_path is not None:
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text(p.stdout, encoding="utf-8")
if p.returncode != 0:
raise RuntimeError(
"Command failed:\n"
+ " ".join(cmd)
+ "\n\nSTDOUT:\n"
+ (p.stdout or "")
+ "\nSTDERR:\n"
+ (p.stderr or "")
)
m = STAT_RE.search(p.stderr)
if not m:
raise RuntimeError(f"STAT not found:\n{p.stderr}")
if m.group(5) != "1":
raise RuntimeError(f"sort validation failed:\n{p.stderr}")
return {
"size": int(m.group(1)),
"depth": int(m.group(2)),
"min_size": int(m.group(3)),
"processes": int(m.group(4)),
"valid": int(m.group(5)),
"time": float(m.group(6)),
"seed": seed,
"logfile": str(log_path) if log_path is not None else "",
}
def save_csv(path: Path, rows: List[dict], header: List[str]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as f:
w = csv.DictWriter(f, fieldnames=header)
w.writeheader()
for row in rows:
w.writerow({key: row.get(key, "") for key in header})
def read_csv(path: Path) -> List[dict]:
result = []
with path.open("r", encoding="utf-8", newline="") as f:
for row in csv.DictReader(f):
converted = dict(row)
for key in ["size", "depth", "min_size", "processes", "valid", "seed"]:
if key in converted and converted[key] != "":
converted[key] = int(converted[key])
if "time" in converted and converted["time"] != "":
converted["time"] = float(converted["time"])
result.append(converted)
return result
def save_plot(path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
plt.tight_layout()
plt.savefig(path, dpi=140)
plt.close()
def clean_output(out_dir: Path) -> None:
if out_dir.exists():
shutil.rmtree(out_dir)
(out_dir / "pics").mkdir(parents=True, exist_ok=True)
(out_dir / "logs").mkdir(parents=True, exist_ok=True)
def plot_depth_scaling(
bin_path: str,
out_dir: Path,
fast: bool = False,
skip_run: bool = False,
) -> List[dict]:
pics = out_dir / "pics"
csv_path = out_dir / "benchmark_depth.csv"
if skip_run:
rows = read_csv(csv_path)
else:
if fast:
depths = list(range(10))
sizes = [20_000, 50_000]
min_size = 2048
else:
# ВАЖНО: на графиках по глубине ровно 30 точек по оси X: 0..29.
#
# depth идет до 29, но реально число процессов не взорвется бесконечно,
# потому что дальнейшее деление останавливает min_size.
depths = list(range(30))
# Несколько размеров дают несколько линий на одном графике.
sizes = [50_000, 100_000, 200_000]
min_size = 4096
rows = []
for size in sizes:
for d in depths:
seed = seed_for(size, d, min_size, salt=1)
r = run_once(bin_path, size, d, min_size, seed)
row = {**r, "seed": seed}
rows.append(row)
print(
f"depth_scaling: "
f"size={size} depth={d} min_size={min_size} "
f"seed={seed} processes={r['processes']} time={r['time']:.6f}",
flush=True,
)
save_csv(
csv_path,
rows,
[
"size",
"depth",
"min_size",
"seed",
"processes",
"valid",
"time",
"logfile",
],
)
plt.figure(figsize=(12, 6))
for size in sorted(set(r["size"] for r in rows)):
cur = [r for r in rows if r["size"] == size]
cur.sort(key=lambda r: r["depth"])
plt.plot(
[r["depth"] for r in cur],
[r["time"] for r in cur],
marker="o",
label=f"N={size}",
)
plt.xlabel("Глубина порождения процессов")
plt.ylabel("Время, сек")
plt.title("Зависимость времени сортировки от глубины fork-рекурсии")
plt.grid(True)
plt.legend()
save_plot(pics / "time_by_depth.png")
plt.figure(figsize=(12, 6))
for size in sorted(set(r["size"] for r in rows)):
cur = [r for r in rows if r["size"] == size]
cur.sort(key=lambda r: r["depth"])
base_time = cur[0]["time"]
speedup = [base_time / r["time"] if r["time"] > 0 else 0 for r in cur]
plt.plot(
[r["depth"] for r in cur],
speedup,
marker="s",
label=f"N={size}",
)
plt.xlabel("Глубина порождения процессов")
plt.ylabel("Ускорение относительно depth=0")
plt.title("Ускорение при использовании процессов")
plt.grid(True)
plt.legend()
save_plot(pics / "speedup_by_depth.png")
plt.figure(figsize=(12, 6))
for size in sorted(set(r["size"] for r in rows)):
cur = [r for r in rows if r["size"] == size]
cur.sort(key=lambda r: r["depth"])
plt.plot(
[r["depth"] for r in cur],
[r["processes"] for r in cur],
marker="^",
label=f"N={size}",
)
plt.xlabel("Глубина порождения процессов")
plt.ylabel("Количество процессов")
plt.title("Размер дерева процессов")
plt.grid(True)
plt.legend()
save_plot(pics / "process_count_by_depth.png")
return rows
def plot_threshold_effect(
bin_path: str,
out_dir: Path,
fast: bool = False,
skip_run: bool = False,
) -> List[dict]:
pics = out_dir / "pics"
csv_path = out_dir / "benchmark_min_size.csv"
if skip_run:
rows = read_csv(csv_path)
else:
size = 200_000
depth = 5
if fast:
# Быстрый режим: меньше точек.
min_sizes = [round(2 ** (7 + i * (8 / 14))) for i in range(15)]
else:
# ВАЖНО: на графике по min_size ровно 30 точек по оси X.
#
# 30 значений порога от 128 до 131072, примерно равномерно по log2-шкале.
min_sizes = [round(2 ** (7 + i * (10 / 29))) for i in range(30)]
rows = []
for m in min_sizes:
seed = seed_for(size, depth, m, salt=2)
r = run_once(bin_path, size, depth, m, seed)
row = {**r, "seed": seed}
rows.append(row)
print(
f"min_size_effect: "
f"size={size} depth={depth} min_size={m} "
f"seed={seed} processes={r['processes']} time={r['time']:.6f}",
flush=True,
)
save_csv(
csv_path,
rows,
[
"size",
"depth",
"min_size",
"seed",
"processes",
"valid",
"time",
"logfile",
],
)
rows.sort(key=lambda r: r["min_size"])
plt.figure(figsize=(12, 6))
plt.plot(
[r["min_size"] for r in rows],
[r["time"] for r in rows],
marker="o",
)
plt.xscale("log", base=2)
plt.xlabel("Минимальный размер части для fork")
plt.ylabel("Время, сек")
plt.title("Влияние порога min_size на производительность")
plt.grid(True)
save_plot(pics / "time_by_min_size.png")
return rows
def parse_events(log_path: Path) -> List[dict]:
events = defaultdict(dict) events = defaultdict(dict)
with open(logfile, encoding="utf-8") as f: if not log_path.exists():
return []
with log_path.open("r", encoding="utf-8", errors="ignore") as f:
for line in f: for line in f:
m = pattern.search(line) m = EVENT_RE.search(line)
if not m: if not m:
continue continue
typ, pid, ppid, depth, size, t = m.groups() typ, pid, ppid, depth, size, t = m.groups()
key = (int(pid), int(depth), int(size)) key = (int(pid), int(depth), int(size))
events[key][typ] = float(t) events[key][typ] = float(t)
events[key]["pid"] = int(pid) events[key]["pid"] = int(pid)
events[key]["ppid"] = int(ppid) events[key]["ppid"] = int(ppid)
@@ -36,23 +391,38 @@ with open(logfile, encoding="utf-8") as f:
events[key]["size"] = int(size) events[key]["size"] = int(size)
rows = [] rows = []
for v in events.values(): for v in events.values():
if "START" in v and "END" in v: if "START" in v and "END" in v:
rows.append(v) rows.append(v)
if not rows:
print("В логе нет полных START/END событий")
sys.exit(1)
rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"])) rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"]))
return rows
def plot_timeline(log_path: Path, out_dir: Path) -> None:
pics = out_dir / "pics"
rows = parse_events(log_path)
# Exclude root process: depth=0.
rows = [r for r in rows if r["depth"] != 0]
if not rows:
print("В логе нет полных START/END событий без корневого процесса depth=0")
return
base = log_path.stem
t0 = min(r["START"] for r in rows) t0 = min(r["START"] for r in rows)
# 1. Временная диаграмма: видно параллельность и время жизни каждого процесса. # Временная диаграмма: видно параллельность и время жизни дочерних процессов.
plt.figure(figsize=(12, max(5, len(rows) * 0.35))) plt.figure(figsize=(12, max(5, len(rows) * 0.35)))
for y, r in enumerate(rows): for y, r in enumerate(rows):
start = r["START"] - t0 start = r["START"] - t0
end = r["END"] - t0 end = r["END"] - t0
plt.plot([start, end], [y, y], linewidth=5) plt.plot([start, end], [y, y], linewidth=5)
plt.text( plt.text(
end, end,
y, y,
@@ -61,26 +431,277 @@ for y, r in enumerate(rows):
fontsize=8, fontsize=8,
) )
plt.xlabel("Время от начала, сек") plt.xlabel("Время от старта первого дочернего процесса, сек")
plt.ylabel("Процессы/задачи сортировки") plt.ylabel("Дочерние процессы / задачи сортировки")
plt.title(f"Временная диаграмма процессов: {base}") plt.title(f"Временная диаграмма дочерних процессов: {base}")
plt.grid(True) plt.grid(True)
plt.tight_layout() save_plot(pics / "timeline.png")
plt.savefig(os.path.join(out_dir, f"{base}_timeline.png"))
plt.close()
# 2. Гистограмма глубин: проверка, что дерево дошло до нужной глубины. # Гистограмма глубин без depth=0.
by_depth = defaultdict(int) by_depth = defaultdict(int)
for r in rows: for r in rows:
by_depth[r["depth"]] += 1 by_depth[r["depth"]] += 1
plt.figure(figsize=(8, 5)) plt.figure(figsize=(8, 5))
xs = sorted(by_depth) xs = sorted(by_depth)
plt.bar(xs, [by_depth[x] for x in xs]) plt.bar(xs, [by_depth[x] for x in xs])
plt.xlabel("Глубина рекурсии") plt.xlabel("Глубина рекурсии")
plt.ylabel("Количество процессов") plt.ylabel("Количество дочерних процессов")
plt.title(f"Распределение процессов по глубине: {base}") plt.title(f"Распределение дочерних процессов по глубине: {base}")
plt.grid(True, axis="y") plt.grid(True, axis="y")
plt.tight_layout() save_plot(pics / "depth_hist.png")
plt.savefig(os.path.join(out_dir, f"{base}_depth_hist.png"))
plt.close()
def plot_process_tree_dfs(
log_path: Path, out_dir: Path, hide_root: bool = False
) -> None:
"""
Draw process tree in DFS order.
Root process depth=0 is always excluded.
If process 2 has children 3 and 4, and process 3 has children 5 and 6,
the order is:
2
3
5
6
4
Only after the whole subtree of process 2 is drawn, the next sibling is drawn.
"""
pics = out_dir / "pics"
rows = parse_events(log_path)
if not rows:
return
# Always exclude root process: depth=0.
rows = [r for r in rows if r["depth"] != 0]
if not rows:
return
by_pid = {r["pid"]: r for r in rows}
children = defaultdict(list)
for r in rows:
parent_pid = r["ppid"]
if parent_pid in by_pid:
children[parent_pid].append(r["pid"])
for parent_pid in children:
children[parent_pid].sort(key=lambda pid: by_pid[pid]["START"])
# These are direct children of hidden root or processes whose parent is not present.
roots = [r["pid"] for r in rows if r["ppid"] not in by_pid]
roots.sort(key=lambda pid: by_pid[pid]["START"])
ordered = []
def dfs(pid: int, level: int) -> None:
ordered.append((pid, level))
for child_pid in children.get(pid, []):
dfs(child_pid, level + 1)
for root_pid in roots:
dfs(root_pid, 0)
if not ordered:
return
y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)}
t0 = min(by_pid[pid]["START"] for pid, _ in ordered)
plt.figure(figsize=(14, max(5, len(ordered) * 0.45)))
for y, (pid, level) in enumerate(ordered):
r = by_pid[pid]
start = r["START"] - t0
end = r["END"] - t0
plt.plot([start, end], [y, y], linewidth=5)
indent = " " * level
label = f"{indent}pid={pid} d={r['depth']} n={r['size']}"
plt.text(
end,
y,
" " + label,
va="center",
fontsize=8,
)
parent_pid = r["ppid"]
if parent_pid in y_by_pid:
parent_y = y_by_pid[parent_pid]
parent_start = by_pid[parent_pid]["START"] - t0
plt.plot(
[start, start],
[parent_y, y],
linewidth=1,
linestyle="--",
)
plt.plot(
[parent_start, start],
[parent_y, parent_y],
linewidth=1,
linestyle="--",
)
plt.gca().invert_yaxis()
plt.xlabel("Время от старта первого дочернего процесса, сек")
plt.ylabel("Дерево дочерних процессов в DFS-порядке")
plt.title("Дерево процессов без корня: потомки расположены сразу под родителем")
plt.grid(True)
save_plot(pics / "process_tree_dfs.png")
def run_timeline_case(
bin_path: str,
out_dir: Path,
size: int,
depth: int,
min_size: int,
) -> Path:
log_path = out_dir / "logs" / "timeline.log"
seed = seed_for(size, depth, min_size, salt=5)
r = run_once(
bin_path=bin_path,
size=size,
depth=depth,
min_size=min_size,
seed=seed,
log_path=log_path,
)
print(
f"timeline: "
f"size={size} depth={depth} min_size={min_size} "
f"seed={seed} processes={r['processes']} time={r['time']:.6f} "
f"log={log_path}",
flush=True,
)
return log_path
def generate_report(out_dir: Path) -> None:
report = out_dir / "REPORT.md"
text = """# Lab3 benchmark report
Скрипт `exporter.py` запускает серию тестов и строит графики.
## CSV
- `benchmark_depth.csv` — серия по глубине рекурсии.
- `benchmark_min_size.csv` — серия по порогу локальной сортировки.
## Графики
- `pics/time_by_depth.png` — зависимость времени сортировки от глубины.
- `pics/speedup_by_depth.png` — ускорение относительно `depth=0`.
- `pics/process_count_by_depth.png` — количество процессов.
- `pics/time_by_min_size.png` — влияние `min_size`.
- `pics/timeline.png` — временная диаграмма процессов.
- `pics/depth_hist.png` — распределение процессов по глубине.
- `pics/process_tree_dfs.png` — дерево процессов в DFS-порядке: потомки идут сразу под родителем.
## Запуск
```bash
python3 exporter.py --bin ./lab3 --out out
Быстрый режим:
python3 exporter.py --bin ./lab3 --out out --fast
"""
report.write_text(text, encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser(description="Single-file Lab3 benchmark exporter")
parser.add_argument("--bin", default="./lab3", help="Путь к бинарнику lab3")
parser.add_argument("--out", default="out", help="Каталог вывода")
parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки")
parser.add_argument(
"--skip-run",
action="store_true",
help="Не запускать benchmark, а построить графики из существующих CSV",
)
parser.add_argument(
"--hide-root",
action="store_true",
help="Скрыть корневой процесс на DFS-графике дерева",
)
parser.add_argument("--timeline-size", type=int, default=8192)
parser.add_argument("--timeline-depth", type=int, default=3)
parser.add_argument("--timeline-min-size", type=int, default=64)
args = parser.parse_args()
bin_path = args.bin
out_dir = Path(args.out)
if not args.skip_run:
clean_output(out_dir)
else:
(out_dir / "pics").mkdir(parents=True, exist_ok=True)
(out_dir / "logs").mkdir(parents=True, exist_ok=True)
plot_depth_scaling(
bin_path=bin_path,
out_dir=out_dir,
fast=args.fast,
skip_run=args.skip_run,
)
plot_threshold_effect(
bin_path=bin_path,
out_dir=out_dir,
fast=args.fast,
skip_run=args.skip_run,
)
log_path = out_dir / "logs" / "timeline.log"
if not args.skip_run:
log_path = run_timeline_case(
bin_path=bin_path,
out_dir=out_dir,
size=args.timeline_size,
depth=args.timeline_depth,
min_size=args.timeline_min_size,
)
if log_path.exists():
plot_timeline(log_path, out_dir)
plot_process_tree_dfs(log_path, out_dir, hide_root=args.hide_root)
else:
print(f"Timeline log not found: {log_path}", file=sys.stderr)
generate_report(out_dir)
print(f"Графики сохранены в {out_dir / 'pics'}.")
print(f"CSV сохранены в {out_dir}.")
print(f"Лог сохранен в {out_dir / 'logs' / 'timeline.log'}.")
print(f"Отчет сохранен в {out_dir / 'REPORT.md'}.")
return 0
if __name__ == "__main__":
raise SystemExit(main())
+34 -9
View File
@@ -173,9 +173,12 @@ static pid_t spawn_sort_child(const std::vector<i32>& part,
try { try {
close_checked(to_child[1]); close_checked(to_child[1]);
close_checked(from_child[0]); close_checked(from_child[0]);
std::vector<i32> input = recv_vector(to_child[0]); std::vector<i32> input = recv_vector(to_child[0]);
close_checked(to_child[0]); close_checked(to_child[0]);
SortResult result = process_recursive_sort(std::move(input), child_depth, opt); SortResult result = process_recursive_sort(std::move(input), child_depth, opt);
send_result(from_child[1], result); send_result(from_child[1], result);
close_checked(from_child[1]); close_checked(from_child[1]);
_exit(0); _exit(0);
@@ -186,8 +189,10 @@ static pid_t spawn_sort_child(const std::vector<i32>& part,
close_checked(to_child[0]); close_checked(to_child[0]);
close_checked(from_child[1]); close_checked(from_child[1]);
send_vector(to_child[1], part); send_vector(to_child[1], part);
close_checked(to_child[1]); close_checked(to_child[1]);
result_read_fd = from_child[0]; result_read_fd = from_child[0];
return pid; return pid;
} }
@@ -205,21 +210,28 @@ static SortResult process_recursive_sort(std::vector<i32> a, int depth, const Op
std::vector<i32> left(a.begin(), a.begin() + static_cast<long>(mid)); std::vector<i32> left(a.begin(), a.begin() + static_cast<long>(mid));
std::vector<i32> right(a.begin() + static_cast<long>(mid), a.end()); std::vector<i32> right(a.begin() + static_cast<long>(mid), a.end());
int left_fd = -1, right_fd = -1; int left_fd = -1;
int right_fd = -1;
pid_t left_pid = spawn_sort_child(left, depth + 1, opt, left_fd); pid_t left_pid = spawn_sort_child(left, depth + 1, opt, left_fd);
pid_t right_pid = spawn_sort_child(right, depth + 1, opt, right_fd); pid_t right_pid = spawn_sort_child(right, depth + 1, opt, right_fd);
SortResult left_result = recv_result(left_fd); SortResult left_result = recv_result(left_fd);
SortResult right_result = recv_result(right_fd); SortResult right_result = recv_result(right_fd);
close_checked(left_fd); close_checked(left_fd);
close_checked(right_fd); close_checked(right_fd);
int status_left = 0, status_right = 0; int status_left = 0;
int status_right = 0;
while (waitpid(left_pid, &status_left, 0) < 0 && errno == EINTR) {} while (waitpid(left_pid, &status_left, 0) < 0 && errno == EINTR) {}
while (waitpid(right_pid, &status_right, 0) < 0 && errno == EINTR) {} while (waitpid(right_pid, &status_right, 0) < 0 && errno == EINTR) {}
if (!WIFEXITED(status_left) || WEXITSTATUS(status_left) != 0) { if (!WIFEXITED(status_left) || WEXITSTATUS(status_left) != 0) {
throw std::runtime_error("left child failed"); throw std::runtime_error("left child failed");
} }
if (!WIFEXITED(status_right) || WEXITSTATUS(status_right) != 0) { if (!WIFEXITED(status_right) || WEXITSTATUS(status_right) != 0) {
throw std::runtime_error("right child failed"); throw std::runtime_error("right child failed");
} }
@@ -234,19 +246,28 @@ static SortResult process_recursive_sort(std::vector<i32> a, int depth, const Op
static Options parse_args(int argc, char** argv) { static Options parse_args(int argc, char** argv) {
Options opt; Options opt;
for (int i = 1; i < argc; ++i) { for (int i = 1; i < argc; ++i) {
std::string s = argv[i]; std::string s = argv[i];
auto need_value = [&](const std::string& name) -> std::string { auto need_value = [&](const std::string& name) -> std::string {
if (i + 1 >= argc) throw std::runtime_error("missing value for " + name); if (i + 1 >= argc) throw std::runtime_error("missing value for " + name);
return argv[++i]; return argv[++i];
}; };
if (s == "--size" || s == "-n") opt.size = std::stoull(need_value(s));
else if (s == "--depth" || s == "-d") opt.max_depth = std::stoi(need_value(s)); if (s == "--size" || s == "-n") {
else if (s == "--min-size" || s == "-m") opt.min_size = std::stoull(need_value(s)); opt.size = std::stoull(need_value(s));
else if (s == "--seed") opt.seed = static_cast<unsigned>(std::stoul(need_value(s))); } else if (s == "--depth" || s == "-d") {
else if (s == "--print") opt.print = true; opt.max_depth = std::stoi(need_value(s));
else if (s == "--log") opt.log = true; } else if (s == "--min-size" || s == "-m") {
else if (s == "--help" || s == "-h") { opt.min_size = std::stoull(need_value(s));
} else if (s == "--seed") {
opt.seed = static_cast<unsigned>(std::stoul(need_value(s)));
} else if (s == "--print") {
opt.print = true;
} else if (s == "--log") {
opt.log = true;
} else if (s == "--help" || s == "-h") {
std::cout << "Usage: ./lab3 [--size N] [--depth D] [--min-size M] [--seed S] " std::cout << "Usage: ./lab3 [--size N] [--depth D] [--min-size M] [--seed S] "
<< "[--print] [--log]\n"; << "[--print] [--log]\n";
std::exit(0); std::exit(0);
@@ -254,6 +275,7 @@ static Options parse_args(int argc, char** argv) {
throw std::runtime_error("unknown argument: " + s); throw std::runtime_error("unknown argument: " + s);
} }
} }
if (opt.max_depth < 0) throw std::runtime_error("depth must be non-negative"); if (opt.max_depth < 0) throw std::runtime_error("depth must be non-negative");
return opt; return opt;
} }
@@ -264,6 +286,7 @@ static std::vector<i32> generate_data(const Options& opt) {
std::vector<i32> a(opt.size); std::vector<i32> a(opt.size);
std::mt19937 rng(opt.seed); std::mt19937 rng(opt.seed);
std::uniform_int_distribution<i32> dist(-100000000, 100000000); std::uniform_int_distribution<i32> dist(-100000000, 100000000);
for (auto& x : a) x = dist(rng); for (auto& x : a) x = dist(rng);
return a; return a;
} }
@@ -276,6 +299,7 @@ int main(int argc, char** argv) {
const auto t1 = std::chrono::steady_clock::now(); const auto t1 = std::chrono::steady_clock::now();
SortResult result = process_recursive_sort(std::move(data), 0, opt); SortResult result = process_recursive_sort(std::move(data), 0, opt);
const auto t2 = std::chrono::steady_clock::now(); const auto t2 = std::chrono::steady_clock::now();
const double elapsed = std::chrono::duration<double>(t2 - t1).count(); const double elapsed = std::chrono::duration<double>(t2 - t1).count();
const bool ok = std::is_sorted(result.data.begin(), result.data.end()); const bool ok = std::is_sorted(result.data.begin(), result.data.end());
@@ -293,6 +317,7 @@ int main(int argc, char** argv) {
<< " processes=" << result.processes << " processes=" << result.processes
<< " valid=" << (ok ? 1 : 0) << " valid=" << (ok ? 1 : 0)
<< " time=" << elapsed << " sec\n"; << " time=" << elapsed << " sec\n";
return ok ? 0 : 3; return ok ? 0 : 3;
} catch (const std::exception& e) { } catch (const std::exception& e) {
std::cerr << "ERROR: " << e.what() << "\n"; std::cerr << "ERROR: " << e.what() << "\n";
+1
View File
@@ -121,6 +121,7 @@ if __name__ == "__main__":
test_11_help_has_no_mode_argument, test_11_help_has_no_mode_argument,
test_12_unknown_mode_is_rejected, test_12_unknown_mode_is_rejected,
] ]
for t in tests: for t in tests:
t() t()
print(f"OK {t.__name__}") print(f"OK {t.__name__}")
+19
View File
@@ -0,0 +1,19 @@
CXX := g++
CXXFLAGS := -O2 -std=c++17 -Wall -Wextra -pedantic
TARGET := lab4
OUT_DIR := out_lab4
.PHONY: run export clean
$(TARGET): main.cpp
$(CXX) $(CXXFLAGS) main.cpp -o $(TARGET)
run: $(TARGET)
./$(TARGET) --size 10000 --depth 3 --min-size 256
export: $(TARGET)
python3 exporter.py --bin ./$(TARGET) --out $(OUT_DIR)
clean:
rm -f $(TARGET)
rm -rf $(OUT_DIR) out __pycache__
+53
View File
@@ -0,0 +1,53 @@
# Лабораторная работа 4. Сетевые соединения. Сокеты
Вариант из лабораторной работы 1: рекурсивная сортировка разделением.
Программа строит дерево процессов. Каждый внутренний процесс открывает TCP `ServerSocket` на `127.0.0.1` с портом `port_base + pid % span`, порождает двух потомков через `fork()`, принимает от них локальные соединения и передает им левую и правую части массива.
По умолчанию `port_base = 20000`. Это сделано из-за того, что чистый порт `pid` может оказаться меньше 1024 и потребовать root-права. Формула соответствует замечанию из задания про `ServerSocketBase + <pid родителя>`.
## Формат обмена
Родитель отправляет потомку:
1. `uint32_t depth` — глубина рекурсии потомка;
2. `uint64_t size` — размер массива;
3. `int32_t array[size]` — часть массива.
Потомок возвращает:
1. `uint64_t counter` — счетчик операций сравнения при слиянии;
2. `uint64_t size` — размер отсортированной части;
3. `int32_t array[size]` — отсортированная часть;
4. `uint64_t processes` — расширение для статистики количества процессов.
Первые три поля ответа соответствуют заданию: счетчик / размерность / массив.
## Сборка и запуск
```bash
make
./lab4 --size 10000 --depth 3 --min-size 1
```
Полезные параметры:
```bash
./lab4 --size 1000 --depth 2 --min-size 16 --seed 42 --print
./lab4 --size 2048 --depth 2 --min-size 64 --log
./lab4 --size 10000 --depth 3 --port-base 30000
```
## Проверка
```bash
make test
```
В конце программа печатает статистику в `stderr`:
```text
STAT: size=... depth=... min_size=... processes=... counter=... valid=1 time=... sec
```
`valid=1` означает, что итоговый массив действительно отсортирован.
+673
View File
@@ -0,0 +1,673 @@
#!/usr/bin/env python3
"""
Lab4 exporter: runs socket-sort benchmarks and creates report graphs.
Root parent process is excluded from process-tree/timeline visualizations:
- process graph shows child processes only: processes - 1
- timeline ignores depth=0 rows
- DFS tree ignores depth=0 root and places each process's children directly below it
Generated output:
out_lab4/csv/lab4_all_results.csv
out_lab4/pics/01_time_by_depth.png
out_lab4/pics/02_speedup_by_depth.png
out_lab4/pics/03_child_processes_by_depth.png
out_lab4/pics/04_time_by_min_size.png
out_lab4/pics/05_timeline_child_processes.png
out_lab4/pics/06_process_tree_dfs.png
out_lab4/logs/timeline.log
out_lab4/REPORT.md
Usage:
python3 exporter.py --bin ./lab4 --out out_lab4
python3 exporter.py --bin ./lab4 --out out_lab4 --fast
python3 exporter.py --bin ./lab4 --out out_lab4 --skip-run
"""
from __future__ import annotations
import argparse
import csv
import re
import shutil
import subprocess
import sys
from collections import defaultdict
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Iterable, List
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
STAT_RE = re.compile(
r"STAT:\s*.*?size=(\d+)\s+.*?depth=(\d+)\s+.*?min_size=(\d+)\s+"
r".*?processes=(\d+)\s+.*?counter=(\d+)\s+.*?valid=(\d+)\s+"
r".*?time=([\d.]+)"
)
EVENT_RE = re.compile(
r"(START|END)\s+PID=(\d+)\s+PPID=(\d+)\s+depth=(\d+)\s+size=(\d+)"
r"(?:\s+port=(\d+))?\s+time=([\d.]+)"
)
@dataclass
class RunResult:
scenario: str
size: int
depth: int
min_size: int
seed: int
port_base: int
processes: int
counter: int
valid: int
time: float
logfile: str = ""
@property
def child_processes(self) -> int:
return max(0, self.processes - 1)
@property
def speed(self) -> float:
return self.size / self.time if self.time > 0 else 0.0
def seed_for(size: int, depth: int, min_size: int, salt: int) -> int:
return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31
def run_lab4(
bin_path: str,
size: int,
depth: int,
min_size: int,
seed: int,
port_base: int,
scenario: str,
log_path: Path | None = None,
) -> RunResult:
cmd = [
bin_path,
"--size",
str(size),
"--depth",
str(depth),
"--min-size",
str(min_size),
"--seed",
str(seed),
"--port-base",
str(port_base),
]
if log_path is not None:
cmd.append("--log")
p = subprocess.run(cmd, text=True, capture_output=True)
if log_path is not None:
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text(p.stdout, encoding="utf-8")
if p.returncode != 0:
raise RuntimeError(
"Команда завершилась с ошибкой:\n"
+ " ".join(cmd)
+ "\n\nSTDOUT:\n"
+ p.stdout
+ "\nSTDERR:\n"
+ p.stderr
)
m = STAT_RE.search(p.stderr)
if not m:
raise RuntimeError("Не найден STAT в stderr:\n" + p.stderr)
result = RunResult(
scenario=scenario,
size=int(m.group(1)),
depth=int(m.group(2)),
min_size=int(m.group(3)),
seed=seed,
port_base=port_base,
processes=int(m.group(4)),
counter=int(m.group(5)),
valid=int(m.group(6)),
time=float(m.group(7)),
logfile=str(log_path) if log_path is not None else "",
)
if result.valid != 1:
raise RuntimeError("Сортировка не прошла проверку:\n" + p.stderr)
return result
def group_by(rows: Iterable[RunResult], key_fn):
result = defaultdict(list)
for row in rows:
result[key_fn(row)].append(row)
return result
def write_csv(path: Path, rows: List[RunResult]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as f:
fieldnames = (
list(asdict(rows[0]).keys())
if rows
else list(RunResult.__dataclass_fields__.keys())
)
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(asdict(row))
def read_csv(path: Path) -> List[RunResult]:
rows: List[RunResult] = []
with path.open("r", encoding="utf-8", newline="") as f:
for raw in csv.DictReader(f):
rows.append(
RunResult(
scenario=raw["scenario"],
size=int(raw["size"]),
depth=int(raw["depth"]),
min_size=int(raw["min_size"]),
seed=int(raw["seed"]),
port_base=int(raw["port_base"]),
processes=int(raw["processes"]),
counter=int(raw["counter"]),
valid=int(raw["valid"]),
time=float(raw["time"]),
logfile=raw.get("logfile", ""),
)
)
return rows
def save_plot(path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
plt.tight_layout()
plt.savefig(path, dpi=140)
plt.close()
def clean_graphs(pics: Path) -> None:
pics.mkdir(parents=True, exist_ok=True)
for old in pics.glob("*.png"):
old.unlink()
def plot_depth_graphs(rows: List[RunResult], pics: Path) -> None:
cur = [r for r in rows if r.scenario == "depth_scaling"]
if not cur:
return
by_size = group_by(cur, lambda r: r.size)
# 1. Main performance graph.
plt.figure(figsize=(12, 6))
for size, rs in sorted(by_size.items()):
rs = sorted(rs, key=lambda r: r.depth)
plt.plot(
[r.depth for r in rs],
[r.time for r in rs],
marker="o",
label=f"N={size}",
)
plt.xlabel("Глубина рекурсии")
plt.ylabel("Время, сек")
plt.title("1. Время сортировки от глубины рекурсии")
plt.grid(True)
plt.legend()
save_plot(pics / "01_time_by_depth.png")
# 2. Speedup against sequential depth=0.
plt.figure(figsize=(12, 6))
for size, rs in sorted(by_size.items()):
rs = sorted(rs, key=lambda r: r.depth)
base = next((r.time for r in rs if r.depth == 0), rs[0].time)
speedup = [base / r.time if r.time > 0 else 0 for r in rs]
plt.plot(
[r.depth for r in rs],
speedup,
marker="s",
label=f"N={size}",
)
plt.xlabel("Глубина рекурсии")
plt.ylabel("Ускорение относительно depth=0")
plt.title("2. Ускорение от использования процессов и сокетов")
plt.grid(True)
plt.legend()
save_plot(pics / "02_speedup_by_depth.png")
# 3. Child process tree size, root parent excluded.
plt.figure(figsize=(12, 6))
for size, rs in sorted(by_size.items()):
rs = sorted(rs, key=lambda r: r.depth)
plt.plot(
[r.depth for r in rs],
[r.child_processes for r in rs],
marker="^",
label=f"N={size}",
)
plt.xlabel("Глубина рекурсии")
plt.ylabel("Количество дочерних процессов")
plt.title("3. Размер дерева дочерних процессов без корневого родителя")
plt.grid(True)
plt.legend()
save_plot(pics / "03_child_processes_by_depth.png")
def plot_min_size_graph(rows: List[RunResult], pics: Path) -> None:
cur = sorted(
[r for r in rows if r.scenario == "min_size_effect"],
key=lambda r: r.min_size,
)
if not cur:
return
# 4. Threshold effect.
plt.figure(figsize=(12, 6))
plt.plot(
[r.min_size for r in cur],
[r.time for r in cur],
marker="o",
)
plt.xscale("log", base=2)
plt.xlabel("Минимальный размер части для fork/socket")
plt.ylabel("Время, сек")
plt.title("4. Влияние min_size на время сортировки")
plt.grid(True)
save_plot(pics / "04_time_by_min_size.png")
def parse_events(log_path: Path) -> List[dict]:
events = defaultdict(dict)
if not log_path.exists():
return []
for line in log_path.read_text(encoding="utf-8", errors="ignore").splitlines():
m = EVENT_RE.search(line)
if not m:
continue
typ, pid, ppid, depth, size, port, t = m.groups()
depth_i = int(depth)
# Remove root parent process from timeline/tree.
if depth_i == 0:
continue
key = (int(pid), depth_i, int(size))
events[key][typ] = float(t)
events[key]["pid"] = int(pid)
events[key]["ppid"] = int(ppid)
events[key]["depth"] = depth_i
events[key]["size"] = int(size)
events[key]["port"] = int(port) if port else -1
rows = [v for v in events.values() if "START" in v and "END" in v]
rows.sort(key=lambda r: (r["START"], r["depth"], r["pid"]))
return rows
def plot_timeline(log_path: Path, pics: Path) -> None:
rows = parse_events(log_path)
if not rows:
return
t0 = min(r["START"] for r in rows)
# 5. Timeline for child process lifetime and real parallelism.
plt.figure(figsize=(13, max(5, len(rows) * 0.38)))
for y, r in enumerate(rows):
start = r["START"] - t0
end = r["END"] - t0
plt.plot([start, end], [y, y], linewidth=5)
label = f"pid={r['pid']} d={r['depth']} n={r['size']}"
if r.get("port", -1) >= 0:
label += f" p={r['port']}"
plt.text(end, y, " " + label, va="center", fontsize=8)
plt.xlabel("Время от старта первого дочернего процесса, сек")
plt.ylabel("Дочерние процессы / задачи сортировки")
plt.title("5. Временная диаграмма дочерних процессов")
plt.grid(True)
save_plot(pics / "05_timeline_child_processes.png")
def plot_process_tree_dfs(log_path: Path, pics: Path) -> None:
"""
Draws a process tree from timeline.log.
Root depth=0 parent is excluded.
Children are placed directly below their parent in DFS order:
depth=1 process A
depth=2 child of A
depth=3 child of that child
depth=2 next child of A
depth=1 process B
...
"""
rows = parse_events(log_path)
if not rows:
return
by_pid = {r["pid"]: r for r in rows}
children = defaultdict(list)
for r in rows:
ppid = r["ppid"]
if ppid in by_pid:
children[ppid].append(r["pid"])
for pid in children:
children[pid].sort(key=lambda child_pid: by_pid[child_pid]["START"])
# These are children of the hidden root or processes whose parent is absent
# after filtering depth=0.
roots = [r["pid"] for r in rows if r["ppid"] not in by_pid]
roots.sort(key=lambda pid: by_pid[pid]["START"])
ordered = []
def dfs(pid: int, level: int) -> None:
ordered.append((pid, level))
for child_pid in children.get(pid, []):
dfs(child_pid, level + 1)
for root_pid in roots:
dfs(root_pid, 0)
if not ordered:
return
y_by_pid = {pid: y for y, (pid, _) in enumerate(ordered)}
t0 = min(by_pid[pid]["START"] for pid, _ in ordered)
plt.figure(figsize=(14, max(5, len(ordered) * 0.45)))
for y, (pid, level) in enumerate(ordered):
r = by_pid[pid]
start = r["START"] - t0
end = r["END"] - t0
# Horizontal lifetime line.
plt.plot([start, end], [y, y], linewidth=5)
# Indentation makes nesting visible in labels.
indent = " " * level
label = f"{indent}pid={pid} d={r['depth']} n={r['size']}"
if r.get("port", -1) >= 0:
label += f" p={r['port']}"
plt.text(end, y, " " + label, va="center", fontsize=8)
# Parent-child connector.
parent_pid = r["ppid"]
if parent_pid in y_by_pid:
parent_y = y_by_pid[parent_pid]
parent_start = by_pid[parent_pid]["START"] - t0
plt.plot(
[start, start],
[parent_y, y],
linewidth=1,
linestyle="--",
)
plt.plot(
[parent_start, start],
[parent_y, parent_y],
linewidth=1,
linestyle="--",
)
plt.gca().invert_yaxis()
plt.xlabel("Время от старта первого дочернего процесса, сек")
plt.ylabel("Дерево процессов в DFS-порядке")
plt.title("6. Дерево процессов: потомки расположены сразу под родителем")
plt.grid(True)
save_plot(pics / "06_process_tree_dfs.png")
def make_experiments(args) -> List[RunResult]:
rows: List[RunResult] = []
logs = Path(args.out) / "logs"
logs.mkdir(parents=True, exist_ok=True)
if args.fast:
depth_sizes = [20_000, 50_000]
depths = list(range(0, 10))
min_size_depth = 2048
min_size_points = [round(2 ** (7 + i * (8 / 14))) for i in range(15)]
else:
# Two 30-point series are enough for a clean report without producing extra graphs.
depth_sizes = [50_000, 100_000, 200_000]
depths = list(range(0, 30))
min_size_depth = 4096
min_size_points = [round(2 ** (7 + i * (10 / 29))) for i in range(30)]
# 1) Depth scaling: feeds graphs 01, 02, 03.
for size in depth_sizes:
for depth in depths:
seed = seed_for(size, depth, min_size_depth, 1)
row = run_lab4(
args.bin,
size,
depth,
min_size_depth,
seed,
args.port_base,
"depth_scaling",
)
rows.append(row)
print(
f"depth_scaling: "
f"N={size} "
f"depth={depth} "
f"child_proc={row.child_processes} "
f"time={row.time:.6f}",
flush=True,
)
# 2) min_size effect: feeds graph 04.
for min_size in min_size_points:
seed = seed_for(
args.min_size_test_size,
args.min_size_test_depth,
min_size,
2,
)
row = run_lab4(
args.bin,
args.min_size_test_size,
args.min_size_test_depth,
min_size,
seed,
args.port_base,
"min_size_effect",
)
rows.append(row)
print(
f"min_size_effect: "
f"min_size={min_size} "
f"child_proc={row.child_processes} "
f"time={row.time:.6f}",
flush=True,
)
# 3) One log run: feeds graphs 05 and 06.
log_path = logs / "timeline.log"
seed = seed_for(
args.timeline_size,
args.timeline_depth,
args.timeline_min_size,
5,
)
row = run_lab4(
args.bin,
args.timeline_size,
args.timeline_depth,
args.timeline_min_size,
seed,
args.port_base,
"timeline",
log_path=log_path,
)
rows.append(row)
print(
f"timeline: {log_path} child_proc={row.child_processes} time={row.time:.6f}",
flush=True,
)
return rows
def generate_report(out_dir: Path, rows: List[RunResult]) -> None:
by_scenario = group_by(rows, lambda r: r.scenario)
lines = [
"# Lab4 sockets: основные графики\n\n",
"Сгенерированы основные графики для отчета.\n\n",
"Корневой родительский процесс исключен из графика дерева процессов "
"и из временных диаграмм.\n\n",
"## Графики\n\n",
"1. `pics/01_time_by_depth.png` — время от глубины рекурсии.\n",
"2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`.\n",
"3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня.\n",
"4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки.\n",
"5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов.\n",
"6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем.\n\n",
"## Краткая статистика\n\n",
"| Сценарий | Запусков | Лучшее время | Лучший запуск |\n",
"|---|---:|---:|---|\n",
]
for scenario, rs in sorted(by_scenario.items()):
best = min(rs, key=lambda r: r.time)
lines.append(
f"| `{scenario}` | {len(rs)} | {best.time:.6f} | "
f"N={best.size}, "
f"depth={best.depth}, "
f"min_size={best.min_size}, "
f"child_proc={best.child_processes} |\n"
)
(out_dir / "REPORT.md").write_text("".join(lines), encoding="utf-8")
def build_plots(out_dir: Path, rows: List[RunResult]) -> None:
pics = out_dir / "pics"
clean_graphs(pics)
plot_depth_graphs(rows, pics)
plot_min_size_graph(rows, pics)
timeline_row = next(
(r for r in rows if r.scenario == "timeline" and r.logfile),
None,
)
if timeline_row is not None:
timeline_log = Path(timeline_row.logfile)
plot_timeline(timeline_log, pics)
plot_process_tree_dfs(timeline_log, pics)
generate_report(out_dir, rows)
def main() -> int:
parser = argparse.ArgumentParser(description="Exporter for Lab4 socket sort graphs")
parser.add_argument("--bin", default="./lab4", help="Путь к бинарнику lab4")
parser.add_argument("--out", default="out_lab4", help="Каталог вывода")
parser.add_argument(
"--port-base", type=int, default=20000, help="Базовый порт для lab4"
)
parser.add_argument("--fast", action="store_true", help="Быстрый режим проверки")
parser.add_argument(
"--skip-run", action="store_true", help="Строить графики из существующего CSV"
)
parser.add_argument("--min-size-test-size", type=int, default=200_000)
parser.add_argument("--min-size-test-depth", type=int, default=5)
parser.add_argument("--timeline-size", type=int, default=8192)
parser.add_argument("--timeline-depth", type=int, default=3)
parser.add_argument("--timeline-min-size", type=int, default=64)
args = parser.parse_args()
out_dir = Path(args.out)
csv_path = out_dir / "csv" / "lab4_all_results.csv"
if args.skip_run:
if not csv_path.exists():
print(f"CSV не найден: {csv_path}", file=sys.stderr)
return 2
rows = read_csv(csv_path)
else:
# Keep old output from previous exporter versions from confusing the report.
if out_dir.exists():
shutil.rmtree(out_dir)
rows = make_experiments(args)
write_csv(csv_path, rows)
build_plots(out_dir, rows)
print("\nГотово: построены основные графики, включая DFS-дерево процессов.")
print("Корневой родительский процесс исключен из дерева/таймлайна.")
print(f"CSV: {csv_path}")
print(f"Графики: {out_dir / 'pics'}")
print(f"Отчет: {out_dir / 'REPORT.md'}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
+437
View File
@@ -0,0 +1,437 @@
#include <algorithm>
#include <arpa/inet.h>
#include <cerrno>
#include <chrono>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <random>
#include <sstream>
#include <stdexcept>
#include <string>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
#include <vector>
// Лабораторная работа 4. Сетевые соединения. Сокеты.
// Вариант из лабораторной 1: рекурсивная сортировка разделением.
//
// В отличие от lab1/lab3, данные между родителем и потомками передаются
// не через общую память и не через pipe(), а через TCP-сокеты localhost.
// Каждый процесс, которому нужно разделить задачу, открывает серверный сокет
// на 127.0.0.1:port_base + pid, порождает двух потомков и передает им части
// массива в формате: depth / size / array. Потомки возвращают:
// counter / size / sorted_array / process_count.
using i32 = int32_t;
using u32 = uint32_t;
using u64 = uint64_t;
struct Options {
size_t size = 10000;
int max_depth = 3;
size_t min_size = 1;
unsigned seed = 1337;
int port_base = 20000;
bool print = false;
bool log = false;
};
struct SortResult {
std::vector<i32> data;
u64 counter = 0; // счетчик операций слияния/сравнений
u64 processes = 1; // текущее поддерево процессов, включая текущий процесс
};
static double now_seconds() {
using clock = std::chrono::steady_clock;
static const auto start = clock::now();
return std::chrono::duration<double>(clock::now() - start).count();
}
static void log_event(const char* type, int depth, size_t n, int port = -1) {
std::ostringstream ss;
ss << type
<< " PID=" << static_cast<long>(getpid())
<< " PPID=" << static_cast<long>(getppid())
<< " depth=" << depth
<< " size=" << n;
if (port >= 0) ss << " port=" << port;
ss << " time=" << now_seconds() << '\n';
const std::string s = ss.str();
(void)!write(STDOUT_FILENO, s.data(), s.size());
}
[[noreturn]] static void die_child(const std::string& msg) {
std::cerr << "CHILD_ERROR pid=" << getpid() << " " << msg << "\n";
_exit(2);
}
static void throw_errno(const std::string& what) {
throw std::runtime_error(what + ": " + std::strerror(errno));
}
static void close_checked(int fd) {
if (fd >= 0) {
while (close(fd) < 0 && errno == EINTR) {}
}
}
static void set_common_socket_options(int fd) {
int yes = 1;
(void)setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
#ifdef SO_REUSEPORT
(void)setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &yes, sizeof(yes));
#endif
(void)setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes));
// Блокирующий режим чтения оставляем стандартным, но задаем таймаут,
// чтобы ошибка соединения не превращалась в бесконечное зависание.
timeval tv{};
tv.tv_sec = 30;
tv.tv_usec = 0;
(void)setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
(void)setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
}
static int port_for_pid(pid_t pid, int port_base) {
if (port_base < 1024 || port_base > 65000) {
throw std::runtime_error("port-base must be in range 1024..65000");
}
const int span = 65535 - port_base;
return port_base + static_cast<int>(static_cast<unsigned long>(pid) % span);
}
static void write_all(int fd, const void* ptr, size_t bytes) {
const char* p = static_cast<const char*>(ptr);
while (bytes > 0) {
ssize_t w = send(fd, p, bytes, MSG_NOSIGNAL);
if (w < 0) {
if (errno == EINTR) continue;
throw_errno("send");
}
if (w == 0) throw std::runtime_error("send returned 0");
p += w;
bytes -= static_cast<size_t>(w);
}
}
static void read_all(int fd, void* ptr, size_t bytes) {
char* p = static_cast<char*>(ptr);
while (bytes > 0) {
ssize_t r = recv(fd, p, bytes, MSG_WAITALL);
if (r < 0) {
if (errno == EINTR) continue;
throw_errno("recv");
}
if (r == 0) throw std::runtime_error("unexpected EOF in socket");
p += r;
bytes -= static_cast<size_t>(r);
}
}
static void send_u32(int fd, u32 v) { write_all(fd, &v, sizeof(v)); }
static void send_u64(int fd, u64 v) { write_all(fd, &v, sizeof(v)); }
static u32 recv_u32(int fd) { u32 v = 0; read_all(fd, &v, sizeof(v)); return v; }
static u64 recv_u64(int fd) { u64 v = 0; read_all(fd, &v, sizeof(v)); return v; }
static void send_task(int fd, int depth, const std::vector<i32>& a) {
send_u32(fd, static_cast<u32>(depth));
send_u64(fd, static_cast<u64>(a.size()));
if (!a.empty()) write_all(fd, a.data(), a.size() * sizeof(i32));
}
static std::pair<int, std::vector<i32>> recv_task(int fd) {
int depth = static_cast<int>(recv_u32(fd));
u64 n = recv_u64(fd);
if (n > static_cast<u64>(SIZE_MAX / sizeof(i32))) {
throw std::runtime_error("too large task array");
}
std::vector<i32> a(static_cast<size_t>(n));
if (!a.empty()) read_all(fd, a.data(), a.size() * sizeof(i32));
return {depth, std::move(a)};
}
static void send_result(int fd, const SortResult& result) {
send_u64(fd, result.counter);
send_u64(fd, static_cast<u64>(result.data.size()));
if (!result.data.empty()) write_all(fd, result.data.data(), result.data.size() * sizeof(i32));
// Расширение протокола для статистики. Первые три поля соответствуют заданию:
// counter / size / array.
send_u64(fd, result.processes);
}
static SortResult recv_result(int fd) {
SortResult r;
r.counter = recv_u64(fd);
u64 n = recv_u64(fd);
if (n > static_cast<u64>(SIZE_MAX / sizeof(i32))) {
throw std::runtime_error("too large result array");
}
r.data.resize(static_cast<size_t>(n));
if (!r.data.empty()) read_all(fd, r.data.data(), r.data.size() * sizeof(i32));
r.processes = recv_u64(fd);
return r;
}
static int create_server_socket(int preferred_port, int& actual_port) {
// Основной вариант соответствует методичке: порт вычисляется от pid.
// Во время длинных серий benchmark порт иногда может быть еще занят ядром
// или совпасть по modulo. Поэтому при EADDRINUSE берем ближайший свободный
// порт и передаем именно его потомкам.
std::string last_error;
for (int shift = 0; shift < 2000; ++shift) {
int port = preferred_port + shift;
if (port > 65535) port = 1024 + (port - 65536);
int fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) throw_errno("socket");
set_common_socket_options(fd);
sockaddr_in addr{};
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr.sin_port = htons(static_cast<uint16_t>(port));
if (bind(fd, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) == 0) {
if (listen(fd, 2) < 0) {
close_checked(fd);
throw_errno("listen");
}
actual_port = port;
return fd;
}
last_error = std::strerror(errno);
close_checked(fd);
if (errno != EADDRINUSE && errno != EACCES) {
throw std::runtime_error("bind 127.0.0.1:" + std::to_string(port) + ": " + last_error);
}
}
throw std::runtime_error("cannot bind server socket near port " +
std::to_string(preferred_port) + ": " + last_error);
}
static int accept_client(int server_fd) {
for (;;) {
int fd = accept(server_fd, nullptr, nullptr);
if (fd < 0) {
if (errno == EINTR) continue;
throw_errno("accept");
}
set_common_socket_options(fd);
return fd;
}
}
static int connect_to_parent(int port) {
int fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) throw_errno("socket");
set_common_socket_options(fd);
sockaddr_in addr{};
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr.sin_port = htons(static_cast<uint16_t>(port));
// После fork сервер обычно уже слушает, но короткий retry делает запуск стабильнее.
for (int attempt = 0; attempt < 200; ++attempt) {
if (connect(fd, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) == 0) return fd;
if (errno != ECONNREFUSED && errno != EINTR) break;
usleep(1000);
}
close_checked(fd);
throw_errno("connect 127.0.0.1:" + std::to_string(port));
throw std::runtime_error("unreachable connect failure");
}
static std::vector<i32> merge_sorted(const std::vector<i32>& left,
const std::vector<i32>& right,
u64& counter) {
std::vector<i32> out;
out.reserve(left.size() + right.size());
size_t i = 0, j = 0;
while (i < left.size() && j < right.size()) {
++counter;
if (left[i] <= right[j]) out.push_back(left[i++]);
else out.push_back(right[j++]);
}
out.insert(out.end(), left.begin() + static_cast<long>(i), left.end());
out.insert(out.end(), right.begin() + static_cast<long>(j), right.end());
return out;
}
static SortResult local_sort(std::vector<i32> a) {
SortResult r;
if (a.size() < 2) {
r.data = std::move(a);
return r;
}
const size_t mid = a.size() / 2;
std::vector<i32> left(a.begin(), a.begin() + static_cast<long>(mid));
std::vector<i32> right(a.begin() + static_cast<long>(mid), a.end());
SortResult l = local_sort(std::move(left));
SortResult rr = local_sort(std::move(right));
r.counter = l.counter + rr.counter;
r.data = merge_sorted(l.data, rr.data, r.counter);
return r;
}
static SortResult socket_recursive_sort(std::vector<i32> a, int depth, const Options& opt);
static pid_t spawn_child_and_send(int server_fd,
int parent_port,
const std::vector<i32>& part,
int child_depth,
const Options& opt,
int& child_socket) {
pid_t pid = fork();
if (pid < 0) throw_errno("fork");
if (pid == 0) {
try {
close_checked(server_fd);
int fd = connect_to_parent(parent_port);
auto task = recv_task(fd);
SortResult result = socket_recursive_sort(std::move(task.second), task.first, opt);
send_result(fd, result);
close_checked(fd);
_exit(0);
} catch (const std::exception& e) {
die_child(e.what());
}
}
child_socket = accept_client(server_fd);
send_task(child_socket, child_depth, part);
return pid;
}
static SortResult socket_recursive_sort(std::vector<i32> a, int depth, const Options& opt) {
int my_port = port_for_pid(getpid(), opt.port_base);
if (opt.log) log_event("START", depth, a.size(), my_port);
if (a.size() < 2 || depth >= opt.max_depth || a.size() <= opt.min_size) {
SortResult r = local_sort(std::move(a));
r.processes = 1;
if (opt.log) log_event("END", depth, r.data.size(), my_port);
return r;
}
const size_t mid = a.size() / 2;
std::vector<i32> left(a.begin(), a.begin() + static_cast<long>(mid));
std::vector<i32> right(a.begin() + static_cast<long>(mid), a.end());
int actual_port = my_port;
int server_fd = create_server_socket(my_port, actual_port);
int left_sock = -1;
pid_t left_pid = spawn_child_and_send(server_fd, actual_port, left, depth + 1, opt, left_sock);
int right_sock = -1;
pid_t right_pid = spawn_child_and_send(server_fd, actual_port, right, depth + 1, opt, right_sock);
close_checked(server_fd);
SortResult left_result = recv_result(left_sock);
SortResult right_result = recv_result(right_sock);
close_checked(left_sock);
close_checked(right_sock);
int status_left = 0, status_right = 0;
while (waitpid(left_pid, &status_left, 0) < 0 && errno == EINTR) {}
while (waitpid(right_pid, &status_right, 0) < 0 && errno == EINTR) {}
if (!WIFEXITED(status_left) || WEXITSTATUS(status_left) != 0) {
throw std::runtime_error("left child failed");
}
if (!WIFEXITED(status_right) || WEXITSTATUS(status_right) != 0) {
throw std::runtime_error("right child failed");
}
SortResult result;
result.counter = left_result.counter + right_result.counter;
result.data = merge_sorted(left_result.data, right_result.data, result.counter);
result.processes = 1 + left_result.processes + right_result.processes;
if (opt.log) log_event("END", depth, result.data.size(), my_port);
return result;
}
static Options parse_args(int argc, char** argv) {
Options opt;
for (int i = 1; i < argc; ++i) {
std::string s = argv[i];
auto need_value = [&](const std::string& name) -> std::string {
if (i + 1 >= argc) throw std::runtime_error("missing value for " + name);
return argv[++i];
};
if (s == "--size" || s == "-n") opt.size = std::stoull(need_value(s));
else if (s == "--depth" || s == "-d") opt.max_depth = std::stoi(need_value(s));
else if (s == "--min-size" || s == "-m") opt.min_size = std::stoull(need_value(s));
else if (s == "--seed") opt.seed = static_cast<unsigned>(std::stoul(need_value(s)));
else if (s == "--port-base") opt.port_base = std::stoi(need_value(s));
else if (s == "--print") opt.print = true;
else if (s == "--log") opt.log = true;
else if (s == "--help" || s == "-h") {
std::cout << "Usage: ./lab4 [--size N] [--depth D] [--min-size M] [--seed S] "
<< "[--port-base P] [--print] [--log]\n";
std::exit(0);
} else {
throw std::runtime_error("unknown argument: " + s);
}
}
if (opt.max_depth < 0) throw std::runtime_error("depth must be non-negative");
if (opt.port_base < 1024 || opt.port_base > 65000) {
throw std::runtime_error("port-base must be in range 1024..65000");
}
return opt;
}
static std::vector<i32> generate_data(const Options& opt) {
std::vector<i32> a(opt.size);
std::mt19937 rng(opt.seed);
std::uniform_int_distribution<i32> dist(-100000000, 100000000);
for (auto& x : a) x = dist(rng);
return a;
}
int main(int argc, char** argv) {
try {
Options opt = parse_args(argc, argv);
std::vector<i32> data = generate_data(opt);
const auto t1 = std::chrono::steady_clock::now();
SortResult result = socket_recursive_sort(std::move(data), 0, opt);
const auto t2 = std::chrono::steady_clock::now();
const double elapsed = std::chrono::duration<double>(t2 - t1).count();
const bool ok = std::is_sorted(result.data.begin(), result.data.end());
if (opt.print) {
for (size_t i = 0; i < result.data.size(); ++i) {
if (i) std::cout << ' ';
std::cout << result.data[i];
}
std::cout << '\n';
} else {
std::cout << "Sorted list/array first 20 elements: ";
const size_t limit = std::min<size_t>(20, result.data.size());
for (size_t i = 0; i < limit; ++i) std::cout << result.data[i] << ' ';
std::cout << '\n';
}
std::cerr << "STAT: size=" << opt.size
<< " depth=" << opt.max_depth
<< " min_size=" << opt.min_size
<< " processes=" << result.processes
<< " counter=" << result.counter
<< " valid=" << (ok ? 1 : 0)
<< " time=" << elapsed << " sec\n";
return ok ? 0 : 3;
} catch (const std::exception& e) {
std::cerr << "ERROR: " << e.what() << "\n";
return 1;
}
}
+22
View File
@@ -0,0 +1,22 @@
# Lab4 sockets: основные графики
Сгенерированы основные графики для отчета.
Корневой родительский процесс исключен из графика дерева процессов и из временных диаграмм.
## Графики
1. `pics/01_time_by_depth.png` — время от глубины рекурсии.
2. `pics/02_speedup_by_depth.png` — ускорение относительно `depth=0`.
3. `pics/03_child_processes_by_depth.png` — количество дочерних процессов без корня.
4. `pics/04_time_by_min_size.png` — влияние порога локальной сортировки.
5. `pics/05_timeline_child_processes.png` — временная диаграмма дочерних процессов.
6. `pics/06_process_tree_dfs.png` — дерево процессов: потомки идут сразу под родителем.
## Краткая статистика
| Сценарий | Запусков | Лучшее время | Лучший запуск |
|---|---:|---:|---|
| `depth_scaling` | 90 | 0.016914 | N=50000, depth=2, min_size=4096, child_proc=6 |
| `min_size_effect` | 30 | 0.057425 | N=200000, depth=5, min_size=50384, child_proc=6 |
| `timeline` | 1 | 0.010911 | N=8192, depth=3, min_size=64, child_proc=14 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB