201 lines
6.2 KiB
Python
201 lines
6.2 KiB
Python
import csv
|
|
import os
|
|
import re
|
|
import subprocess
|
|
|
|
import matplotlib
|
|
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
|
|
BIN = "./lab3"
|
|
OUT = "out"
|
|
STAT_RE = re.compile(
|
|
r"STAT:.*size=(\d+).*depth=(\d+).*min_size=(\d+).*processes=(\d+).*valid=(\d+).*time=([\d.]+)"
|
|
)
|
|
|
|
|
|
def run_once(size, depth, min_size, seed):
|
|
cmd = [
|
|
BIN,
|
|
"--size",
|
|
str(size),
|
|
"--depth",
|
|
str(depth),
|
|
"--min-size",
|
|
str(min_size),
|
|
"--seed",
|
|
str(seed),
|
|
]
|
|
p = subprocess.run(
|
|
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True
|
|
)
|
|
if p.returncode != 0:
|
|
raise RuntimeError(p.stderr)
|
|
m = STAT_RE.search(p.stderr)
|
|
if not m:
|
|
raise RuntimeError(f"STAT not found: {p.stderr}")
|
|
if m.group(5) != "1":
|
|
raise RuntimeError(f"sort validation failed: {p.stderr}")
|
|
return {
|
|
"size": int(m.group(1)),
|
|
"depth": int(m.group(2)),
|
|
"min_size": int(m.group(3)),
|
|
"processes": int(m.group(4)),
|
|
"time": float(m.group(6)),
|
|
}
|
|
|
|
|
|
def seed_for(size, depth, min_size, salt):
|
|
# Для каждой точки используется свой seed, поэтому вход всегда случайный,
|
|
# но результаты можно воспроизвести.
|
|
return 2026 + salt * 1_000_003 + size * 17 + depth * 1009 + min_size * 31
|
|
|
|
|
|
def save_csv(path, rows, header):
|
|
with open(path, "w", encoding="utf-8", newline="") as f:
|
|
w = csv.DictWriter(f, fieldnames=header)
|
|
w.writeheader()
|
|
w.writerows(rows)
|
|
|
|
|
|
def plot_depth_scaling():
|
|
os.makedirs(f"{OUT}/pics", exist_ok=True)
|
|
|
|
# ВАЖНО: на графиках по глубине ровно 30 точек по оси X: 0..29.
|
|
#
|
|
# depth идет до 29, но реально число процессов не взорвется бесконечно,
|
|
# потому что дальнейшее деление останавливает min_size.
|
|
depths = list(range(30))
|
|
|
|
# Несколько размеров дают несколько линий на одном графике.
|
|
sizes = [50_000, 100_000, 200_000]
|
|
min_size = 4096
|
|
|
|
rows = []
|
|
for size in sizes:
|
|
for d in depths:
|
|
seed = seed_for(size, d, min_size, salt=1)
|
|
r = run_once(size, d, min_size, seed)
|
|
row = {**r, "seed": seed}
|
|
rows.append(row)
|
|
print(
|
|
f"size={size} depth={d} min_size={min_size} "
|
|
f"seed={seed} processes={r['processes']} time={r['time']:.6f}",
|
|
flush=True,
|
|
)
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
for size in sizes:
|
|
cur = [r for r in rows if r["size"] == size]
|
|
plt.plot(
|
|
[r["depth"] for r in cur],
|
|
[r["time"] for r in cur],
|
|
marker="o",
|
|
label=f"N={size}",
|
|
)
|
|
plt.xlabel("Глубина порождения процессов")
|
|
plt.ylabel("Время, сек")
|
|
plt.title("Зависимость времени сортировки от глубины fork-рекурсии")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
plt.savefig(f"{OUT}/pics/time_by_depth.png")
|
|
plt.close()
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
for size in sizes:
|
|
cur = [r for r in rows if r["size"] == size]
|
|
base_time = cur[0]["time"]
|
|
speedup = [base_time / r["time"] if r["time"] > 0 else 0 for r in cur]
|
|
plt.plot(
|
|
[r["depth"] for r in cur],
|
|
speedup,
|
|
marker="s",
|
|
label=f"N={size}",
|
|
)
|
|
plt.xlabel("Глубина порождения процессов")
|
|
plt.ylabel("Ускорение относительно depth=0")
|
|
plt.title("Ускорение при использовании процессов")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
plt.savefig(f"{OUT}/pics/speedup_by_depth.png")
|
|
plt.close()
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
for size in sizes:
|
|
cur = [r for r in rows if r["size"] == size]
|
|
plt.plot(
|
|
[r["depth"] for r in cur],
|
|
[r["processes"] for r in cur],
|
|
marker="^",
|
|
label=f"N={size}",
|
|
)
|
|
plt.xlabel("Глубина порождения процессов")
|
|
plt.ylabel("Количество процессов")
|
|
plt.title("Размер дерева процессов")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
plt.savefig(f"{OUT}/pics/process_count_by_depth.png")
|
|
plt.close()
|
|
|
|
save_csv(
|
|
f"{OUT}/benchmark_depth.csv",
|
|
rows,
|
|
["size", "depth", "min_size", "seed", "processes", "time"],
|
|
)
|
|
return rows
|
|
|
|
|
|
def plot_threshold_effect():
|
|
os.makedirs(f"{OUT}/pics", exist_ok=True)
|
|
|
|
# ВАЖНО: на графике по min_size ровно 30 точек по оси X.
|
|
size = 200_000
|
|
depth = 5
|
|
|
|
# 30 значений порога от 128 до 131072, примерно равномерно по log2-шкале.
|
|
min_sizes = [round(2 ** (7 + i * (10 / 29))) for i in range(30)]
|
|
|
|
rows = []
|
|
for m in min_sizes:
|
|
seed = seed_for(size, depth, m, salt=2)
|
|
r = run_once(size, depth, m, seed)
|
|
row = {**r, "seed": seed}
|
|
rows.append(row)
|
|
print(
|
|
f"size={size} depth={depth} min_size={m} "
|
|
f"seed={seed} processes={r['processes']} time={r['time']:.6f}",
|
|
flush=True,
|
|
)
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
plt.plot(
|
|
[r["min_size"] for r in rows],
|
|
[r["time"] for r in rows],
|
|
marker="o",
|
|
)
|
|
plt.xscale("log", base=2)
|
|
plt.xlabel("Минимальный размер части для fork")
|
|
plt.ylabel("Время, сек")
|
|
plt.title("Влияние порога min_size на производительность")
|
|
plt.grid(True)
|
|
plt.tight_layout()
|
|
plt.savefig(f"{OUT}/pics/time_by_min_size.png")
|
|
plt.close()
|
|
|
|
save_csv(
|
|
f"{OUT}/benchmark_min_size.csv",
|
|
rows,
|
|
["size", "depth", "min_size", "seed", "processes", "time"],
|
|
)
|
|
return rows
|
|
|
|
|
|
if __name__ == "__main__":
|
|
plot_depth_scaling()
|
|
plot_threshold_effect()
|
|
print(f"Графики сохранены в {OUT}/pics. На каждом графике ровно 30 точек по оси X.")
|