{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Bab 05 — Probability, Statistics, Gradient Playground\n",
        "\n",
        "Notebook ini mengikuti versi script, tetapi dipecah lebih pelan agar nyaman dipakai di Jupyter, VS Code, Colab, atau Kaggle.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 1. Setup\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import math\n",
        "import random\n",
        "from statistics import mean, median\n",
        "SEED = 42\n",
        "random.seed(SEED)\n",
        "print(\"seed\", SEED)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2. Simulasi dadu\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "counts = {i: 0 for i in range(1, 7)}\n",
        "for _ in range(1000):\n",
        "    counts[random.randint(1, 6)] += 1\n",
        "for face, count in counts.items():\n",
        "    print(face, count, count/1000)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 3. Aturan probabilitas\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def complement(p): return 1-p\n",
        "def independent_and(p_a, p_b): return p_a*p_b\n",
        "def either_or(p_a, p_b, p_both=0): return p_a+p_b-p_both\n",
        "print(complement(0.18))\n",
        "print(independent_and(0.5,0.5))\n",
        "print(either_or(0.30,0.40,0.10))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 4. Conditional probability dan Bayes sederhana\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def conditional_probability(count_a_and_b, count_b):\n",
        "    if count_b == 0:\n",
        "        raise ValueError(\"count_b tidak boleh nol\")\n",
        "    return count_a_and_b / count_b\n",
        "\n",
        "def bayes(prior, likelihood, evidence):\n",
        "    if evidence == 0:\n",
        "        raise ValueError(\"evidence tidak boleh nol\")\n",
        "    return likelihood * prior / evidence\n",
        "print(\"P(beli | keranjang)\", conditional_probability(80, 200))\n",
        "print(\"P(fraud | alarm)\", round(bayes(0.01, 0.90, 0.05), 3))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 5. Statistik deskriptif\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def variance(values):\n",
        "    m = mean(values)\n",
        "    return mean([(x-m)**2 for x in values])\n",
        "def std(values):\n",
        "    return math.sqrt(variance(values))\n",
        "penjualan = [42, 18, 35, 30, 16, 38, 45, 29, 34, 31, 120]\n",
        "print(\"mean\", mean(penjualan))\n",
        "print(\"median\", median(penjualan))\n",
        "print(\"variance\", variance(penjualan))\n",
        "print(\"std\", std(penjualan))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 6. Percentile\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def percentile(values, pct):\n",
        "    xs = sorted(values)\n",
        "    pos = (len(xs)-1) * pct/100\n",
        "    lo, hi = math.floor(pos), math.ceil(pos)\n",
        "    if lo == hi:\n",
        "        return xs[int(pos)]\n",
        "    weight = pos-lo\n",
        "    return xs[lo]*(1-weight) + xs[hi]*weight\n",
        "print(\"p50\", percentile(penjualan, 50))\n",
        "print(\"p90\", percentile(penjualan, 90))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 7. Sampling error\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "population = list(range(1, 101))\n",
        "for sample_size in [5, 10, 30]:\n",
        "    sample = random.sample(population, sample_size)\n",
        "    print(sample_size, mean(sample), mean(population))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 8. Korelasi\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def require_same_length(xs, ys):\n",
        "    if len(xs) != len(ys):\n",
        "        raise ValueError(\"dua list harus sama panjang\")\n",
        "    if not xs:\n",
        "        raise ValueError(\"list tidak boleh kosong\")\n",
        "\n",
        "def covariance(xs, ys):\n",
        "    require_same_length(xs, ys)\n",
        "    mx, my = mean(xs), mean(ys)\n",
        "    return mean([(x-mx)*(y-my) for x,y in zip(xs,ys)])\n",
        "def correlation(xs, ys):\n",
        "    denom = std(xs)*std(ys)\n",
        "    if denom == 0:\n",
        "        raise ValueError(\"korelasi tidak terdefinisi jika salah satu variabel konstan\")\n",
        "    return covariance(xs,ys)/denom\n",
        "suhu = [28,29,30,31,32,33,34]\n",
        "es_teh = [20,22,27,31,35,39,41]\n",
        "print(correlation(suhu, es_teh))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 9. MAE dan MSE\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "actuals = [40, 20, 36, 32]\n",
        "preds = [32, 24, 35, 30]\n",
        "require_same_length(preds, actuals)\n",
        "mae = mean([abs(p-a) for p,a in zip(preds, actuals)])\n",
        "mse = mean([(p-a)**2 for p,a in zip(preds, actuals)])\n",
        "print(mae, mse)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 10. Turunan analitik dan finite difference\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def one_dim_loss(x): return (x-3)**2\n",
        "def one_dim_grad(x): return 2*(x-3)\n",
        "def finite_difference(f, x, h=1e-5): return (f(x+h)-f(x))/h\n",
        "for x in [-4,0,3,5]:\n",
        "    print(x, one_dim_loss(x), one_dim_grad(x), finite_difference(one_dim_loss, x))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 11. Gradient descent satu variabel\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def gradient_descent(start, lr, steps):\n",
        "    x = start\n",
        "    history = []\n",
        "    for step in range(steps):\n",
        "        history.append((step, x, one_dim_loss(x), one_dim_grad(x)))\n",
        "        x = x - lr * one_dim_grad(x)\n",
        "    history.append((steps, x, one_dim_loss(x), one_dim_grad(x)))\n",
        "    return history\n",
        "for lr in [0.01, 0.1, 1.1]:\n",
        "    final = gradient_descent(-4, lr, 12)[-1]\n",
        "    print(lr, final)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 12. Detail descent lr=0.1\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "for step, x, loss, grad in gradient_descent(-4, 0.1, 10):\n",
        "    print(step, round(x,4), round(loss,4), round(grad,4))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 13. Training model linear mini\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def linear_predict(x, w, b): return w*x+b\n",
        "def linear_training_step(xs, ys, w, b, lr):\n",
        "    require_same_length(xs, ys)\n",
        "    preds = [linear_predict(x,w,b) for x in xs]\n",
        "    errors = [p-y for p,y in zip(preds, ys)]\n",
        "    loss = mean([e**2 for e in errors])\n",
        "    grad_w = mean([2*e*x for e,x in zip(errors, xs)])\n",
        "    grad_b = mean([2*e for e in errors])\n",
        "    return w-lr*grad_w, b-lr*grad_b, loss\n",
        "xs = [0,1,2,3,4,5]\n",
        "ys = [1,3,5,7,9,11]\n",
        "w,b = 0.0,0.0\n",
        "for step in range(81):\n",
        "    preds = [linear_predict(x,w,b) for x in xs]\n",
        "    loss = mean([(p-y)**2 for p,y in zip(preds, ys)])\n",
        "    if step % 10 == 0:\n",
        "        print(step, round(w,3), round(b,3), round(loss,4))\n",
        "    if step < 80:\n",
        "        w,b,_ = linear_training_step(xs,ys,w,b,0.03)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 14. Challenge\n",
        "\n",
        "Ubah data, seed, learning rate, jumlah step, dan target linear. Catat perubahan dengan bahasa manusia.\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "pygments_lexer": "ipython3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}