9 files changed, 673 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bfe6b5b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,179 @@
+# LaTeX auxiliary files
+*.aux
+*.lof
+*.log
+*.lot
+*.fls
+*.out
+*.toc
+*.fmt
+*.fot
+*.cb
+*.cb2
+.*.lb
+
+# LaTeX intermediate files
+*.dvi
+*.xdv
+*-converted-to.*
+
+# LaTeX biblatex
+*.bcf
+*.blg
+*.bbl
+*.run.xml
+
+# LaTeX build files
+*.fdb_latexmk
+*.synctex.gz
+*.synctex(busy)
+*.pdfsync
+
+# LaTeX index files
+*.idx
+*.ilg
+*.ind
+*.ist
+
+# LaTeX glossary files
+*.acn
+*.acr
+*.glg
+*.glo
+*.gls
+*.glsdefs
+*.lzo
+*.lzs
+
+# LaTeX beamer files
+*.nav
+*.pre
+*.snm
+*.vrb
+
+# LaTeX algorithm2e
+*.alg
+*.loa
+
+# LaTeX achemso
+acs-*.bib
+
+# LaTeX amsthm
+*.thm
+
+# LaTeX beamer
+*.nav
+*.pre
+*.snm
+*.vrb
+
+# LaTeX changes
+*.soc
+
+# LaTeX cprotect
+*.cpt
+
+# LaTeX endfloat
+*.ttt
+*.fff
+
+# LaTeX fancyhdr
+*.fancyhdr
+
+# LaTeX hyperref
+*.figlist
+*.makefile
+*.xref
+
+# LaTeX listings
+*.lol
+
+# LaTeX minted
+_minted*
+*.pyg
+
+# LaTeX morewrites
+*.mw
+
+# LaTeX nomencl
+*.nlg
+*.nlo
+*.nls
+
+# LaTeX pax
+*.pax
+
+# LaTeX pdfpages
+*.pdf_tex
+
+# LaTeX sagetex
+*.sagetex.sage
+*.sagetex.py
+*.sagetex.scmd
+
+# LaTeX scrwfile
+*.wrt
+
+# LaTeX sympy
+*.upa
+*.upb
+
+# LaTeX TikZ & PGF
+*.auxlock
+*.figlist
+*.makefile
+*.xref
+
+# LaTeX todonotes
+*.tdo
+
+# LaTeX xindy
+*.xdy
+
+# LaTeX xypic
+*.xyc
+
+# LaTeX latexmk
+*.fdb_latexmk
+
+# LaTeX pythontex
+*.pytxcode
+pythontex-files-*/
+
+# LaTeX thmtools
+*.loe
+
+# LaTeX Asymptote
+*.asy
+*.pre
+
+# LaTeX WinEdt
+*.bak
+*.sav
+
+# LaTeX LyX
+*.lyx~
+
+# LaTeX Kile
+*.kilepr
+
+# macOS specific
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Editor specific (keep .vscode/ for LaTeX Workshop config)
+.idea/
+*.swp
+*.swo
+*~
+
+# Backup files
+*~
+*.backup
+*.bak
+*.orig 
+\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..7c5561d
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,50 @@
+{
+    "latex-workshop.latex.tools": [
+        {
+            "name": "latexmk",
+            "command": "/Library/TeX/texbin/latexmk",
+            "args": [
+                "-synctex=1",
+                "-interaction=nonstopmode",
+                "-file-line-error",
+                "-pdf",
+                "-outdir=%OUTDIR%",
+                "%DOC%"
+            ],
+            "env": {
+                "PATH": "/Library/TeX/texbin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
+            }
+        },
+        {
+            "name": "pdflatex",
+            "command": "/Library/TeX/texbin/pdflatex",
+            "args": [
+                "-synctex=1",
+                "-interaction=nonstopmode",
+                "-file-line-error",
+                "%DOC%"
+            ],
+            "env": {
+                "PATH": "/Library/TeX/texbin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
+            }
+        }
+    ],
+    "latex-workshop.latex.recipes": [
+        {
+            "name": "latexmk 🔃",
+            "tools": [
+                "latexmk"
+            ]
+        },
+        {
+            "name": "pdflatex ➞ pdflatex",
+            "tools": [
+                "pdflatex",
+                "pdflatex"
+            ]
+        }
+    ],
+    "latex-workshop.view.pdf.viewer": "tab",
+    "latex-workshop.latex.autoClean.run": "onBuilt",
+    "latex-workshop.latex.autoBuild.run": "onFileChange"
+} 
+\ No newline at end of file
diff --git a/2025-06-26/2025-06-26-notes.pdf b/2025-06-26/2025-06-26-notes.pdf
new file mode 100644
index 0000000..3d0ef8c
--- /dev/null
+++ b/2025-06-26/2025-06-26-notes.pdf
diff --git a/2025-06-26/2025-06-26-notes.tex b/2025-06-26/2025-06-26-notes.tex
new file mode 100644
index 0000000..bbf4ed3
--- /dev/null
+++ b/2025-06-26/2025-06-26-notes.tex
@@ -0,0 +1,261 @@
+\documentclass{article}
+\usepackage[utf8]{inputenc}
+\usepackage[margin=1.5cm, top=2cm, bottom=2cm]{geometry}
+\usepackage{amsmath}
+\usepackage{amsfonts}
+\usepackage{amssymb}
+\usepackage{graphicx}
+\usepackage{setspace}
+\setstretch{0.9}
+
+\title{CS229 Notes - June 26, 2025}
+\author{Yuren Hao}
+\date{\today}
+
+\begin{document}
+
+\maketitle
+
+\section{Maximum Likelihood Estimation (MLE)}
+
+Given i.i.d. observations $x_1, x_2, \ldots, x_n$ from $f(x; \theta)$.
+
+\textbf{Likelihood:} $L(\theta) = \prod_{i=1}^{n} f(x_i; \theta)$
+
+\textbf{Log-likelihood:} $\ell(\theta) = \sum_{i=1}^{n} \log f(x_i; \theta)$
+
+\textbf{MLE:} $\hat{\theta}_{MLE} = \arg\max_{\theta} \ell(\theta)$
+
+Solve: $\frac{\partial \ell(\theta)}{\partial \theta} = 0$
+
+\section{Linear Regression - Normal Equation}
+
+Given training data $\{(x_i, y_i)\}_{i=1}^n$ with $x_i \in \mathbb{R}^d$, $y_i \in \mathbb{R}$.
+
+\textbf{RSS:} $RSS(\theta) = \sum_{i=1}^{n} (y_i - \theta^T x_i)^2 = \|y - X\theta\|_2^2$
+
+\textbf{Derivative:} $\frac{\partial RSS(\theta)}{\partial \theta} = -2X^T(y - X\theta)$
+
+\textbf{Normal Equation:} $X^TX\theta = X^Ty$
+
+\textbf{Closed-form solution:} $\hat{\theta} = (X^TX)^{-1}X^Ty$
+
+\section{Gradient Descent for Linear Regression}
+
+\textbf{Cost function:} $J(\theta) = \frac{1}{2m}\sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)})^2$
+
+\textbf{Gradient:} $\frac{\partial J}{\partial \theta_j} = \frac{1}{m}\sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)})x_j^{(i)}$
+
+\textbf{Algorithm:}
+\begin{enumerate}
+    \item Initialize $\theta$ randomly
+    \item While not converged:
+    \begin{itemize}
+        \item $\theta_j := \theta_j - \alpha \frac{\partial J}{\partial \theta_j}$ for all $j$
+        \item Check convergence: $|J(\theta^{(t+1)}) - J(\theta^{(t)})| < \epsilon$
+    \end{itemize}
+\end{enumerate}
+
+\textbf{Vectorized update:} $\theta := \theta - \alpha \frac{1}{m} X^T(X\theta - y)$
+
+\subsection{Example: 3 Data Points}
+\textbf{Data:} $(1,1)$, $(2,2)$, $(3,4)$. Model: $h_\theta(x) = \theta_0 + \theta_1 x$
+
+\textbf{Initial:} $\theta_0 = 0$, $\theta_1 = 0$, $\alpha = 0.1$
+
+\textbf{Iteration 1:}
+\begin{itemize}
+    \item $J(\theta) = \frac{1}{6}[(0-1)^2 + (0-2)^2 + (0-4)^2] = 3.5$
+    \item $\frac{\partial J}{\partial \theta_0} = \frac{1}{3}[(-1) + (-2) + (-4)] = -2.33$
+    \item $\frac{\partial J}{\partial \theta_1} = \frac{1}{3}[(-1)(1) + (-2)(2) + (-4)(3)] = -5.67$
+    \item $\theta_0 := 0 - 0.1(-2.33) = 0.233$
+    \item $\theta_1 := 0 - 0.1(-5.67) = 0.567$
+\end{itemize}
+
+\textbf{Iteration 2:} $J(\theta) = 1.26$ (continues until convergence...)
+
+\section{ML Framework \& Loss Functions}
+
+\subsection{General ML Pipeline}
+\textbf{Model} $\rightarrow$ \textbf{Algorithm} $\rightarrow$ \textbf{Estimated Parameters}
+
+\textbf{Predictions} $\rightarrow$ \textbf{Decisions} $\rightarrow$ \textbf{Outcomes}
+
+\textbf{Example:} Linear model $\rightarrow$ Gradient descent $\rightarrow$ $\hat{\theta}$
+
+House price prediction $\rightarrow$ Buy/sell decision $\rightarrow$ Profit/loss
+
+\subsection{Loss Functions}
+\textbf{Regression:}
+\begin{itemize}
+    \item \textbf{MSE:} $L(y, \hat{y}) = (y - \hat{y})^2$
+    \item \textbf{MAE:} $L(y, \hat{y}) = |y - \hat{y}|$
+    \item \textbf{Huber:} $L(y, \hat{y}) = \begin{cases} \frac{1}{2}(y-\hat{y})^2 & \text{if } |y-\hat{y}| \leq \delta \\ \delta|y-\hat{y}| - \frac{1}{2}\delta^2 & \text{otherwise} \end{cases}$
+\end{itemize}
+
+\textbf{Classification:}
+\begin{itemize}
+    \item \textbf{0-1 Loss:} $L(y, \hat{y}) = \mathbf{1}[y \neq \hat{y}]$
+    \item \textbf{Logistic:} $L(y, \hat{y}) = -y\log(\hat{y}) - (1-y)\log(1-\hat{y})$
+    \item \textbf{Hinge:} $L(y, \hat{y}) = \max(0, 1 - y\hat{y})$ (SVM)
+\end{itemize}
+
+\section{Training Loss vs Model Complexity}
+
+\subsection{Bias-Variance Tradeoff}
+\textbf{Low Complexity:} High bias, low variance $\rightarrow$ \textbf{Underfitting}
+
+\textbf{High Complexity:} Low bias, high variance $\rightarrow$ \textbf{Overfitting}
+
+\subsection{Typical Curves}
+\begin{itemize}
+    \item \textbf{Training Error:} Decreases as complexity increases
+    \item \textbf{Validation Error:} U-shaped curve
+    \item \textbf{Optimal Complexity:} Minimum validation error
+\end{itemize}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.8\textwidth]{model-complexity-diagram.png}
+\caption{Model Complexity vs Training/Validation Error}
+\end{figure}
+
+\textbf{Total Error} = Bias$^2$ + Variance + Irreducible Error
+
+\textbf{Regularization:} Controls complexity via penalty terms
+\begin{itemize}
+    \item \textbf{L1 (Lasso):} $\lambda \sum_j |\theta_j|$ (sparse solutions)
+    \item \textbf{L2 (Ridge):} $\lambda \sum_j \theta_j^2$ (smooth solutions)
+\end{itemize}
+
+\section{Generalization Error}
+
+\subsection{Definition}
+\textbf{Generalization Error:} Expected error on unseen data from same distribution
+
+\textbf{Continuous Case:} For regression with squared loss
+\[
+\text{Gen Error} = \mathbb{E}_{(x,y) \sim D}[(h(x) - y)^2] = \int_{x,y} (h(x) - y)^2 p(x,y) \, dx \, dy
+\]
+
+If $p(x,y) = p(y|x)p(x)$, then:
+\[
+= \int_x \left[ \int_y (h(x) - y)^2 p(y|x) \, dy \right] p(x) \, dx
+\]
+
+\textbf{Discrete Case:} For classification with 0-1 loss
+\[
+\text{Gen Error} = \mathbb{E}_{(x,y) \sim D}[\mathbf{1}[h(x) \neq y]] = \sum_{x,y} \mathbf{1}[h(x) \neq y] \cdot p(x,y)
+\]
+
+Expanding the sum:
+\[
+= \sum_x \sum_{y: y \neq h(x)} p(x,y) = \sum_x p(x) \sum_{y: y \neq h(x)} p(y|x)
+\]
+
+Where $D$ is the data distribution, $h$ is the hypothesis
+
+\textbf{Empirical Risk:} Approximation using training data
+
+\textbf{Continuous Case:}
+\[
+\hat{R}(h) = \frac{1}{m}\sum_{i=1}^{m} (h(x_i) - y_i)^2
+\]
+
+\textbf{Discrete Case:}
+\[
+\hat{R}(h) = \frac{1}{m}\sum_{i=1}^{m} \mathbf{1}[h(x_i) \neq y_i]
+\]
+
+\textbf{General Form:}
+\[
+\hat{R}(h) = \frac{1}{m}\sum_{i=1}^{m} L(h(x_i), y_i) \approx \mathbb{E}_{(x,y) \sim D}[L(h(x), y)]
+\]
+
+\textbf{Generalization Gap:} $R(h) - \hat{R}(h)$ where $R(h)$ is true risk
+
+\subsection{Sources of Error}
+The expected prediction error can be decomposed into three fundamental components:
+
+\textbf{Expected Prediction Error:}
+\[
+\mathbb{E}[\text{Error}(x)] = \text{Noise} + \text{Bias}^2 + \text{Variance}
+\]
+
+\textbf{Detailed Breakdown:}
+\begin{itemize}
+    \item \textbf{Noise:} $\sigma^2 = \mathbb{E}[(y - f(x))^2]$ - Irreducible error from data
+    \item \textbf{Bias:} $\text{Bias}[\hat{f}(x)] = \mathbb{E}[\hat{f}(x)] - f(x)$ - Model's systematic error
+    \item \textbf{Variance:} $\text{Var}[\hat{f}(x)] = \mathbb{E}[(\hat{f}(x) - \mathbb{E}[\hat{f}(x)])^2]$ - Model's sensitivity to training data
+\end{itemize}
+
+\textbf{MSE Decomposition:}
+\[
+\text{MSE}(x) = \mathbb{E}[(\hat{f}(x) - y)^2] = \text{Bias}^2[\hat{f}(x)] + \text{Var}[\hat{f}(x)] + \sigma^2
+\]
+
+Expanding the MSE:
+\[
+= (\mathbb{E}[\hat{f}(x)] - f(x))^2 + \mathbb{E}[(\hat{f}(x) - \mathbb{E}[\hat{f}(x)])^2] + \mathbb{E}[(y - f(x))^2]
+\]
+
+\textbf{Experimental Setup for Bias-Variance Analysis:}
+
+We create $N$ different training sets by sampling from the data distribution. Each training set produces estimated model parameters, giving us $N$ different models $\hat{f}_1, \hat{f}_2, \ldots, \hat{f}_N$.
+
+Use the average predictions of all the $N$ estimated models, denoted by $\bar{f}_w(x)$:
+\[
+\bar{f}_w(x) = \frac{1}{N}\sum_{i=1}^{N} \hat{f}_i(x)
+\]
+
+The average fit is akin to the expected prediction $\mathbb{E}[\hat{f}(x)]$ over all possible training sets.
+
+\textbf{Bias Definition:}
+\[
+\text{Bias}(x) = f_{\text{true}}(x) - \bar{f}_w(x)
+\]
+
+\textbf{Key Question:} Is our approach flexible enough to capture $f_{\text{true}}(x)$?
+
+Bias is high when the hypothesis class is unable to capture $f_{\text{true}}(x)$. This happens when the model class is too simple or restrictive to represent the true underlying function.
+
+\subsection{Approximating Generalization Error}
+Since true distribution $D$ is unknown, we approximate generalization error using:
+\textbf{Validation Set:} Hold-out data to estimate $R(h) \approx \hat{R}_{val}(h)$
+\textbf{Cross-Validation:} Average over multiple train/validation splits to reduce variance.
+The key insight: empirical risk on unseen validation data provides unbiased estimate of generalization error.
+
+\subsection{Test Error Definition}
+\textbf{Test Error:} Performance on final held-out test set, used only once for final evaluation.
+
+\textbf{Continuous Case (Regression):}
+\[
+\text{Test Error} = \frac{1}{n_{test}}\sum_{i=1}^{n_{test}} (h(x_i^{test}) - y_i^{test})^2
+\]
+
+Expanding the sum:
+\[
+= \frac{1}{n_{test}}\left[ (h(x_1^{test}) - y_1^{test})^2 + (h(x_2^{test}) - y_2^{test})^2 + \ldots + (h(x_{n_{test}}^{test}) - y_{n_{test}}^{test})^2 \right]
+\]
+
+\textbf{Discrete Case (Classification):}
+\[
+\text{Test Error} = \frac{1}{n_{test}}\sum_{i=1}^{n_{test}} \mathbf{1}[h(x_i^{test}) \neq y_i^{test}]
+\]
+
+Expanding the indicator sum:
+\[
+= \frac{1}{n_{test}}\left[ \mathbf{1}[h(x_1^{test}) \neq y_1^{test}] + \mathbf{1}[h(x_2^{test}) \neq y_2^{test}] + \ldots + \mathbf{1}[h(x_{n_{test}}^{test}) \neq y_{n_{test}}^{test}] \right]
+\]
+
+\textbf{Key Point:} Test set should only be used once to avoid overfitting to test data.
+
+\subsection{Key Factors}
+\begin{itemize}
+    \item \textbf{Model Complexity:} More parameters $\rightarrow$ higher capacity to overfit
+    \item \textbf{Training Set Size:} More data $\rightarrow$ better generalization
+    \item \textbf{Regularization:} Penalty terms reduce overfitting
+    \item \textbf{Early Stopping:} Stop training before overfitting occurs
+\end{itemize}
+
+\end{document} 
+\ No newline at end of file
diff --git a/2025-06-26/model-complexity-diagram.png b/2025-06-26/model-complexity-diagram.png
new file mode 100644
index 0000000..45e0fe8
--- /dev/null
+++ b/2025-06-26/model-complexity-diagram.png
diff --git a/README.md b/README.md
index 4e5b017..9a28042 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,73 @@
-# cs229-notes
-\ No newline at end of file
+# CS229 Study Notes
+
+A collection of concise LaTeX notes for Stanford CS229 Machine Learning course.
+
+## 📚 Contents
+
+### Current Notes (June 26, 2025)
+- **Maximum Likelihood Estimation (MLE)** - Theory and definitions
+- **Linear Regression** - Normal equation and gradient descent
+- **Gradient Descent Example** - Step-by-step calculation with 3 data points
+- **ML Framework** - Model → Algorithm → Parameters pipeline
+- **Loss Functions** - Regression and classification examples
+- **Model Complexity** - Bias-variance tradeoff with visualization
+- **Generalization Error** - Continuous and discrete definitions with mathematical derivations
+
+## 🗂️ Project Structure
+
+```
+cs229-notes/
+├── 2025-06-26/                    # Daily notes folder
+│   ├── 2025-06-26-notes.tex      # LaTeX source (7.4KB)
+│   ├── 2025-06-26-notes.pdf      # Compiled PDF (4 pages, 270KB)
+│   └── model-complexity-diagram.png  # Visualization diagram (100KB)
+├── .vscode/                       # LaTeX Workshop configuration
+│   └── settings.json             # Editor settings for LaTeX
+├── template.tex                   # LaTeX template for new notes
+├── template.pdf                   # Template example (2 pages)
+├── main_notes.pdf                 # Additional reference material
+└── README.md                      # This file
+```
+
+## 🛠️ Compilation
+
+### Manual Compilation (Recommended)
+```bash
+cd 2025-06-26
+pdflatex -interaction=nonstopmode 2025-06-26-notes.tex
+```
+
+### Using LaTeX Workshop (VS Code/Cursor)
+- Configure LaTeX paths in `.vscode/settings.json`
+- Use `Cmd + Option + B` to build
+
+### Using the Template
+1. Copy `template.tex` to your new date folder
+2. Rename it (e.g., `2025-06-27-notes.tex`)
+3. Update title and date in the document
+4. Replace example content with your notes
+5. Compile using the methods above
+
+## 📖 Features
+
+- **Concise Format** - Key concepts without excessive explanation
+- **Mathematical Rigor** - Complete derivations with integrals and summations
+- **Visual Aids** - Diagrams for complex concepts
+- **Practical Examples** - Numerical calculations with real data
+- **Organized Structure** - Daily folders for easy navigation
+- **Optimized Layout** - Compact margins and spacing for efficient page usage
+- **Ready-to-Use Template** - Pre-configured template with common structures
+
+## 🔧 Requirements
+
+- LaTeX distribution (BasicTeX/MacTeX)
+- pdflatex and latexmk
+- graphicx package for images
+
+## 📝 Author
+
+Yuren H
+
+## 📅 Last Updated
+
+June 26, 2025
+\ No newline at end of file
diff --git a/main_notes.pdf b/main_notes.pdf
new file mode 100644
index 0000000..6d1681e
--- /dev/null
+++ b/main_notes.pdf
diff --git a/template.pdf b/template.pdf
new file mode 100644
index 0000000..7885fb1
--- /dev/null
+++ b/template.pdf
diff --git a/template.tex b/template.tex
new file mode 100644
index 0000000..dc5742a
--- /dev/null
+++ b/template.tex
@@ -0,0 +1,110 @@
+\documentclass{article}
+\usepackage[utf8]{inputenc}
+\usepackage[margin=1.5cm, top=2cm, bottom=2cm]{geometry}
+\usepackage{amsmath}
+\usepackage{amsfonts}
+\usepackage{amssymb}
+\usepackage{graphicx}
+\usepackage{setspace}
+\setstretch{0.9}
+
+\title{CS229 Notes - [Date]}
+\author{Your Name}
+\date{\today}
+
+\begin{document}
+
+\maketitle
+
+\section{Section Title}
+
+Brief introduction or overview of the topic.
+
+\subsection{Definitions}
+\textbf{Key Term:} Definition here
+
+\textbf{Mathematical Definition:}
+\[
+\text{Formula} = \mathbb{E}_{(x,y) \sim D}[L(h(x), y)]
+\]
+
+\subsection{Important Concepts}
+\begin{itemize}
+    \item \textbf{Point 1:} Description with math $\theta$
+    \item \textbf{Point 2:} Another important concept
+    \item \textbf{Point 3:} Final key point
+\end{itemize}
+
+\section{Mathematical Derivations}
+
+\subsection{Step-by-Step Process}
+Starting from the basic equation:
+\[
+f(x) = ax + b
+\]
+
+Taking the derivative:
+\[
+\frac{df}{dx} = a
+\]
+
+\textbf{Result:} The derivative is constant.
+
+\subsection{Example Calculation}
+Given data points $(x_1, y_1), (x_2, y_2), \ldots, (x_n, y_n)$:
+
+\textbf{Step 1:} Calculate the mean
+\[
+\bar{x} = \frac{1}{n}\sum_{i=1}^{n} x_i
+\]
+
+\textbf{Step 2:} Expanding the sum:
+\[
+= \frac{1}{n}[x_1 + x_2 + \ldots + x_n]
+\]
+
+\section{Algorithms}
+
+\subsection{Algorithm Name}
+\textbf{Input:} Data, parameters, etc.
+
+\textbf{Algorithm:}
+\begin{enumerate}
+    \item Initialize parameters $\theta = 0$
+    \item While not converged:
+    \begin{itemize}
+        \item Update: $\theta := \theta - \alpha \nabla J(\theta)$
+        \item Check convergence: $|J(\theta^{(t+1)}) - J(\theta^{(t)})| < \epsilon$
+    \end{itemize}
+    \item Return $\hat{\theta}$
+\end{enumerate}
+
+\textbf{Output:} Optimized parameters
+
+\section{Visual Elements}
+
+\subsection{Including Figures}
+% Uncomment and modify as needed:
+% \begin{figure}[h]
+% \centering
+% \includegraphics[width=0.8\textwidth]{your-image.png}
+% \caption{Your caption here}
+% \end{figure}
+
+\subsection{Key Insights}
+\textbf{Important Note:} Always remember that...
+
+\textbf{Practical Tip:} In practice, you should...
+
+\section{Summary}
+
+\textbf{Main Takeaways:}
+\begin{itemize}
+    \item Summary point 1
+    \item Summary point 2  
+    \item Summary point 3
+\end{itemize}
+
+\textbf{Next Steps:} What to study next...
+
+\end{document} 
+\ No newline at end of file