|
| 1 | +import re |
| 2 | +import random |
| 3 | +import argparse |
| 4 | +from pathlib import Path |
| 5 | +from datetime import datetime |
| 6 | +from collections import defaultdict |
| 7 | +import numpy as np |
| 8 | +import matplotlib.pyplot as plt |
| 9 | + |
| 10 | +# keep max samples per day (adjust if needed) |
| 11 | +MAX_SAMPLES = 200000 |
| 12 | + |
| 13 | +# Log format looks like: |
| 14 | +# [pid: ...] <ip> ... [Tue Feb 10 14:47:06 2026] GET /... => generated ... in 2740 msecs |
| 15 | +# Use a greedy prefix so we capture the *last* [...] date block before GET. |
| 16 | +pattern = re.compile( |
| 17 | + r".*\[(?P<date>(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\d{4})\]\s+GET\s+(?P<path>/[^\s]*)\s+=> generated .* in (?P<time>\d+) msecs" |
| 18 | +) |
| 19 | + |
| 20 | +# reservoir per day |
| 21 | +samples = defaultdict(list) |
| 22 | +counts = defaultdict(int) |
| 23 | + |
| 24 | +def reservoir_add(day, value): |
| 25 | + counts[day] += 1 |
| 26 | + c = counts[day] |
| 27 | + |
| 28 | + if len(samples[day]) < MAX_SAMPLES: |
| 29 | + samples[day].append(value) |
| 30 | + else: |
| 31 | + # replace elements with decreasing probability |
| 32 | + j = random.randint(0, c - 1) |
| 33 | + if j < MAX_SAMPLES: |
| 34 | + samples[day][j] = value |
| 35 | + |
| 36 | + |
| 37 | +def _default_log_file() -> Path | None: |
| 38 | + downloads = Path.home() / "Downloads" |
| 39 | + if not downloads.exists(): |
| 40 | + return None |
| 41 | + # Prefer the exact historical naming scheme, but fall back to any similar file. |
| 42 | + matches = list(downloads.glob("orumsV3_spoken.log-*")) |
| 43 | + if not matches: |
| 44 | + matches = list(downloads.glob("*orumsV3_spoken*")) |
| 45 | + matches = sorted(matches, key=lambda p: p.stat().st_mtime, reverse=True) |
| 46 | + return matches[0] if matches else None |
| 47 | + |
| 48 | + |
| 49 | +def _parse_args() -> argparse.Namespace: |
| 50 | + parser = argparse.ArgumentParser(description="Compute homepage response-time percentiles from a log file.") |
| 51 | + parser.add_argument( |
| 52 | + "--log-file", |
| 53 | + type=str, |
| 54 | + default=None, |
| 55 | + help="Path to the log file. If omitted, uses the newest ~/Downloads/orumsV3_spoken.log-* file.", |
| 56 | + ) |
| 57 | + parser.add_argument( |
| 58 | + "--max-samples", |
| 59 | + type=int, |
| 60 | + default=MAX_SAMPLES, |
| 61 | + help="Max reservoir samples per day (default: %(default)s).", |
| 62 | + ) |
| 63 | + parser.add_argument( |
| 64 | + "--days", |
| 65 | + type=str, |
| 66 | + default="10,11,12,13,14,15", |
| 67 | + help="Comma-separated day-of-month numbers to include for Feb (default: %(default)s).", |
| 68 | + ) |
| 69 | + parser.add_argument( |
| 70 | + "--homepage-match", |
| 71 | + choices=["strict", "allow-query"], |
| 72 | + default="allow-query", |
| 73 | + help="Homepage matching: strict='/' only; allow-query='/' and '/?…' (default: %(default)s).", |
| 74 | + ) |
| 75 | + parser.add_argument( |
| 76 | + "--plot", |
| 77 | + choices=["compare", "hist", "both", "none"], |
| 78 | + default="compare", |
| 79 | + help="Plot type (default: %(default)s).", |
| 80 | + ) |
| 81 | + return parser.parse_args() |
| 82 | + |
| 83 | + |
| 84 | +args = _parse_args() |
| 85 | +MAX_SAMPLES = args.max_samples |
| 86 | +days = {int(x.strip()) for x in args.days.split(",") if x.strip()} |
| 87 | + |
| 88 | +def _is_homepage(path: str) -> bool: |
| 89 | + if args.homepage_match == "strict": |
| 90 | + return path == "/" |
| 91 | + # allow-query |
| 92 | + return path == "/" or path.startswith("/?") |
| 93 | + |
| 94 | +log_file = Path(args.log_file).expanduser() if args.log_file else _default_log_file() |
| 95 | +if not log_file or not log_file.exists(): |
| 96 | + downloads = Path.home() / "Downloads" |
| 97 | + default_hint = downloads / "orumsV3_spoken.log-*" |
| 98 | + raise SystemExit( |
| 99 | + "Log file not found.\n" |
| 100 | + f"Tried auto-detecting the newest file matching `{default_hint}`.\n" |
| 101 | + "Pass it explicitly, e.g.:\n" |
| 102 | + " python log.py --log-file ~/Downloads/orumsV3_spoken.log-20260215\n" |
| 103 | + ) |
| 104 | + |
| 105 | + |
| 106 | +with open(log_file, "r", errors="ignore") as f: |
| 107 | + for line in f: |
| 108 | + m = pattern.search(line) |
| 109 | + if not m: |
| 110 | + continue |
| 111 | + |
| 112 | + path = m.group("path").strip() |
| 113 | + |
| 114 | + # homepage only |
| 115 | + if not _is_homepage(path): |
| 116 | + continue |
| 117 | + |
| 118 | + raw_date = m.group("date") |
| 119 | + response_time = int(m.group("time")) |
| 120 | + |
| 121 | + dt = datetime.strptime(raw_date, "%a %b %d %H:%M:%S %Y") |
| 122 | + |
| 123 | + # only Feb 10–15 |
| 124 | + if dt.month == 2 and dt.day in days: |
| 125 | + day_key = dt.strftime("%Y-%m-%d") |
| 126 | + reservoir_add(day_key, response_time) |
| 127 | + |
| 128 | + |
| 129 | +# compute percentiles & print a comparison table |
| 130 | +print("\n📊 Homepage response-time percentiles (Feb comparison)\n") |
| 131 | +print(f"Log file: {log_file}") |
| 132 | +print(f"Homepage match: {args.homepage_match}") |
| 133 | +print("") |
| 134 | + |
| 135 | +ordered_days = [f"2026-02-{d:02d}" for d in sorted(days)] |
| 136 | + |
| 137 | +rows = [] |
| 138 | +results = {} # day -> np.ndarray (for optional plotting) |
| 139 | + |
| 140 | +for day in ordered_days: |
| 141 | + arr_list = samples.get(day, []) |
| 142 | + if not arr_list: |
| 143 | + rows.append((day, 0, counts.get(day, 0), None, None, None)) |
| 144 | + continue |
| 145 | + |
| 146 | + arr = np.array(arr_list) |
| 147 | + results[day] = arr |
| 148 | + |
| 149 | + p50 = float(np.percentile(arr, 50)) |
| 150 | + p80 = float(np.percentile(arr, 80)) |
| 151 | + p90 = float(np.percentile(arr, 90)) |
| 152 | + rows.append((day, len(arr), counts.get(day, len(arr)), p50, p80, p90)) |
| 153 | + |
| 154 | +header = ("Date", "samples_used", "total_seen", "p50_ms", "p80_ms", "p90_ms") |
| 155 | +print(f"{header[0]:<12} {header[1]:>12} {header[2]:>10} {header[3]:>10} {header[4]:>10} {header[5]:>10}") |
| 156 | +print("-" * 70) |
| 157 | +for (day, n, total, p50, p80, p90) in rows: |
| 158 | + if p50 is None: |
| 159 | + print(f"{day:<12} {n:>12} {total:>10} {'-':>10} {'-':>10} {'-':>10}") |
| 160 | + else: |
| 161 | + print(f"{day:<12} {n:>12} {total:>10} {p50:>10.1f} {p80:>10.1f} {p90:>10.1f}") |
| 162 | + |
| 163 | + |
| 164 | +def _plot_histograms() -> None: |
| 165 | + for day, arr in results.items(): |
| 166 | + plt.figure() |
| 167 | + plt.hist(arr, bins=40) |
| 168 | + plt.title(f"Homepage Response Time Distribution ({day})") |
| 169 | + plt.xlabel("Response Time (ms)") |
| 170 | + plt.ylabel("Requests") |
| 171 | + plt.grid(True) |
| 172 | + |
| 173 | + |
| 174 | +def _plot_comparison() -> None: |
| 175 | + xs = [] |
| 176 | + p50s = [] |
| 177 | + p80s = [] |
| 178 | + p90s = [] |
| 179 | + for (day, n, _total, p50, p80, p90) in rows: |
| 180 | + if n == 0: |
| 181 | + continue |
| 182 | + xs.append(day) |
| 183 | + p50s.append(p50) |
| 184 | + p80s.append(p80) |
| 185 | + p90s.append(p90) |
| 186 | + |
| 187 | + if not xs: |
| 188 | + return |
| 189 | + |
| 190 | + plt.figure() |
| 191 | + plt.plot(xs, p50s, marker="o", label="P50") |
| 192 | + plt.plot(xs, p80s, marker="o", label="P80") |
| 193 | + plt.plot(xs, p90s, marker="o", label="P90") |
| 194 | + plt.title("Homepage Response Time Percentiles (Feb 10–15)") |
| 195 | + plt.xlabel("Date") |
| 196 | + plt.ylabel("Response time (ms)") |
| 197 | + plt.grid(True) |
| 198 | + plt.legend() |
| 199 | + plt.xticks(rotation=45, ha="right") |
| 200 | + plt.tight_layout() |
| 201 | + |
| 202 | + |
| 203 | +if args.plot in ("hist", "both"): |
| 204 | + _plot_histograms() |
| 205 | +if args.plot in ("compare", "both"): |
| 206 | + _plot_comparison() |
| 207 | +if args.plot != "none": |
| 208 | + plt.show() |
0 commit comments