import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import expm
from ipywidgets import interact, FloatSlider

# 中文字体设置
plt.rcParams['font.sans-serif'] = ['Noto Sans CJK SC']


def format_axis(ax, xticks=None, yticks=None, fmt='.1f'):
    """统一的坐标轴格式化函数：隐藏右侧和顶部边框，将坐标轴移到原点"""
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_position(('data', 0))
    ax.spines['left'].set_position(('data', 0))
    ax.tick_params(direction='inout', length=8)
    ax.grid(True, alpha=0.3)
    
    # 设置刻度（隐藏原点标签）
    if xticks is not None:
        ax.set_xticks(xticks)
        ax.set_xticklabels([f'{x:{fmt}}' if x != 0 else '' for x in xticks])
    if yticks is not None:
        ax.set_yticks(yticks)
        ax.set_yticklabels([f'{y:{fmt}}' if y != 0 else '' for y in yticks])

np.outer(np.arange(1, 11), np.arange(1, 13))

flag = np.outer(np.array([1, 0.1, 2]), np.ones(6))
print(flag)

flag2 = np.outer(np.array([1, 0.1, 2]), np.array([1, 1, 1, 3, 3, 3]))
print(flag2)

def show_image(M, cmap='rainbow'):
    """显示矩阵为图像"""
    plt.figure(figsize=(6, 4))
    plt.imshow(M, cmap=cmap, aspect='auto')
    plt.axis('off')
    plt.show()

print("flag 的可视化:")
show_image(flag)

print("flag2 的可视化:")
show_image(flag2)

np.random.seed(42)
w = 300

# 创建随机秩-1矩阵
v = np.concatenate([[1, 0.4], np.random.rand(50)])
u = np.random.rand(w)
image = np.outer(v, u)

print("随机秩-1矩阵:")
show_image(image)

image2 = np.outer(np.concatenate([[1, 0.4], np.random.rand(50)]), np.random.rand(w)) + \
         np.outer(np.random.rand(52), np.random.rand(w))

print("随机秩-2矩阵:")
show_image(image2)

noisy_image = image + 0.03 * np.random.randn(*image.shape)

print("原始秩-1矩阵:")
show_image(image)

print("带噪声的矩阵:")
show_image(noisy_image)

print("图像的前两行（前20列）:")
show_image(image[:2, :20])
print(image[:2, :20])

xx = image[0, :]
yy = image[1, :]
xs = noisy_image[0, :]
ys = noisy_image[1, :]

fig, ax = plt.subplots(figsize=(6, 4))
ax.scatter(xs, ys, label='带噪声', alpha=0.3, s=16, marker='.')
ax.scatter(xx, yy, label='秩-1', alpha=0.3, s=9, marker='s', c='red')

ax.set_title('绘制秩-1矩阵会得到一条直线')
ax.set_xlim(-0.1, 1.1)
ax.set_ylim(-0.1, 0.65)
ax.legend(loc='upper right', facecolor='white', edgecolor='black', frameon=True)

format_axis(ax, xticks=[0.25, 0.50, 0.75, 1.00], yticks=[0.2, 0.4, 0.6])
plt.show()

xs_centered = xs - np.mean(xs)
ys_centered = ys - np.mean(ys)

fig, ax = plt.subplots(figsize=(6, 4), dpi=100)
ax.scatter(xs_centered, ys_centered, s=25, alpha=0.5)
ax.axis('equal')

format_axis(ax, xticks=[-0.4, -0.2, 0.2, 0.4], yticks=[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3])
plt.show()

fig, ax = plt.subplots(figsize=(6, 4), dpi=100)

# 绘制投影线
for i in range(len(xs_centered)):
    plt.plot([xs_centered[i], xs_centered[i]], [ys_centered[i], 0], 
             'k--', alpha=0.05)

ax.scatter(xs_centered, ys_centered, s=25, alpha=0.5)
ax.scatter(xs_centered, np.zeros_like(xs_centered), s=15, alpha=0.1, c='brown')
ax.axis('equal')

format_axis(ax, xticks=[-0.4, -0.2, 0.2, 0.4], yticks=[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3])
plt.show()

print(f"平均绝对偏差: {np.mean(np.abs(xs_centered))}")

sigma_x = np.sqrt(np.mean(xs_centered**2))
sigma_y = np.sqrt(np.mean(ys_centered**2))

print(f"σ_x = {sigma_x}")
print(f"σ_y = {sigma_y}")

fig, ax = plt.subplots(figsize=(6, 4), dpi=100)
ax.scatter(xs_centered, ys_centered, s=25, alpha=0.5)
ax.axis('equal')

# 绘制标准差边界
ax.axvline(x=2*sigma_x, color='green', linestyle='--', linewidth=2)
ax.axvline(x=-2*sigma_x, color='green', linestyle='--', linewidth=2)
ax.axhline(y=2*sigma_y, color='blue', linestyle='--', linewidth=2)
ax.axhline(y=-2*sigma_y, color='blue', linestyle='--', linewidth=2)

# 标注标准差值
ax.text(2*sigma_x - 0.02, 0.02, r'$2\sigma_x$', color='green', fontsize=16, ha='right', va='bottom')
ax.text(-2*sigma_x + 0.02, 0.02, r'$-2\sigma_x$', color='green', fontsize=16, ha='left', va='bottom')
ax.text(0.02, 2*sigma_y + 0.02, r'$2\sigma_y$', color='blue', fontsize=16, ha='left', va='bottom')
ax.text(0.02, -2*sigma_y - 0.02, r'$-2\sigma_y$', color='blue', fontsize=16, ha='left', va='top')

format_axis(ax, xticks=[-0.4, -0.2, 0.2, 0.4], yticks=[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3])
plt.show()

M = np.vstack([xs_centered, ys_centered])
print(f"数据矩阵 M 的形状: {M.shape}")
print(M[:, :5])

def rotation_matrix(theta):
    """创建旋转矩阵"""
    return np.array([
        [np.cos(theta), np.sin(theta)],
        [-np.sin(theta), np.cos(theta)]
    ])

def plot_rotation_effects(degrees=28):
    """展示旋转与投影的效果"""
    theta = np.pi * degrees / 180  # 转换为弧度
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
    
    # 左图：原始数据和投影
    ax1.scatter(M[0, :], M[1, :], s=10, alpha=0.5)
    direction = np.array([[np.cos(theta), np.sin(theta)]])
    projections = (direction @ M) * direction.T
    ax1.scatter(projections[0, :], projections[1, :], s=5, alpha=0.1, c='green')
    
    # 绘制投影线
    for i in range(M.shape[1]):
        ax1.plot([M[0, i], projections[0, i]], [M[1, i], projections[1, i]], 
                 'k--', alpha=0.02)
    
    # 绘制方向箭头
    ax1.annotate('', xy=(0.5*np.cos(theta), 0.5*np.sin(theta)), 
                xytext=(-0.5*np.cos(theta), -0.5*np.sin(theta)),
                arrowprops=dict(arrowstyle='->', color='red', lw=2, alpha=0.3))
    
    ax1.axis('tight')
    ax1.set_aspect('equal')
    format_axis(ax1, xticks=[-0.50, -0.25, 0.25, 0.50], yticks=[-0.50, -0.25, 0.25, 0.50], fmt='.2f')
    
    # 右图：旋转后的数据
    M_rotated = rotation_matrix(theta) @ M
    sigma = np.std(M_rotated[0, :])
    
    ax2.scatter(M_rotated[0, :], M_rotated[1, :], s=10, alpha=0.3)
    ax2.scatter(M_rotated[0, :], np.zeros(M.shape[1]), s=8, alpha=0.1, c='green')
    
    # 绘制投影线和标准差边界
    for i in range(M.shape[1]):
        ax2.plot([M_rotated[0, i], M_rotated[0, i]], [M_rotated[1, i], 0], 
                 'k--', alpha=0.02)
    ax2.axvline(x=2*sigma, color='green', linestyle='--', linewidth=2)
    ax2.axvline(x=-2*sigma, color='green', linestyle='--', linewidth=2)
    ax2.text(2*sigma + 0.02, 0.02, r'$2\sigma$', color='green', fontsize=16, ha='left', va='bottom')
    ax2.text(-2*sigma - 0.02, 0.02, r'$-2\sigma$', color='green', fontsize=16, ha='right', va='bottom')
    
    ax2.set_title(f'σ = {sigma:.4f}', fontsize=20)
    ax2.axis('tight')
    ax2.set_aspect('equal')
    format_axis(ax2, xticks=[-0.50, -0.25, 0.25, 0.50], yticks=[-0.50, -0.25, 0.25, 0.50], fmt='.2f')
    
    plt.tight_layout()
    plt.show()

interact(plot_rotation_effects, 
         degrees=FloatSlider(min=0, max=360, step=1, value=28, description='角度'));

def variance_in_direction(theta):
    """计算给定方向上的方差"""
    M_rotated = rotation_matrix(theta) @ M
    return np.var(M_rotated[0, :])

# 计算所有角度的方差
angles = np.linspace(0, 2*np.pi, 361)
variances = [variance_in_direction(a) for a in angles]

plt.figure(figsize=(6, 4))
plt.plot(np.degrees(angles), variances)
plt.xlabel('θ (度)')
plt.ylabel('θ 方向上的方差')
plt.grid(True, alpha=0.3)
plt.show()

theta_values = np.arange(0, 2*np.pi, 0.0001)
f_values = [variance_in_direction(t) for t in theta_values]

theta_max = theta_values[np.argmax(f_values)]
theta_min = theta_values[np.argmin(f_values)]

f_max = variance_in_direction(theta_max)
f_min = variance_in_direction(theta_min)

print(f"最大方差方向: θ = {np.degrees(theta_max):.2f}°")
print(f"最小方差方向: θ = {np.degrees(theta_min):.2f}°")
print(f"最大方差: {f_max:.6f}")
print(f"最小方差: {f_min:.6f}")

fig, ax = plt.subplots(figsize=(6, 4))
ax.scatter(xs_centered, ys_centered, s=25, alpha=0.3)

# 绘制主方向箭头
max_arrow = 2 * np.sqrt(f_max)
min_arrow = 2 * np.sqrt(f_min)
ax.annotate('', xy=(max_arrow*np.cos(theta_max), max_arrow*np.sin(theta_max)), 
            xytext=(0, 0),
            arrowprops=dict(arrowstyle='->', color='red', lw=3))
ax.annotate('', xy=(min_arrow*np.cos(theta_min), min_arrow*np.sin(theta_min)), 
            xytext=(0, 0),
            arrowprops=dict(arrowstyle='->', color='red', lw=3))

ax.axis('tight')
format_axis(ax, xticks=[-0.4, -0.2, 0.2, 0.4], yticks=[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3])
plt.show()

# 构建椭圆
theta_ellipse = np.linspace(0, 2*np.pi, 100)
circle = np.vstack([np.cos(theta_ellipse), np.sin(theta_ellipse)])
stretch = np.array([[2 * np.sqrt(f_max), 0], [0, 2 * np.sqrt(f_min)]])
ellipse = rotation_matrix(-theta_max) @ stretch @ circle

fig, ax = plt.subplots(figsize=(6, 4))
ax.scatter(xs_centered, ys_centered, s=25, alpha=0.3)
ax.fill(ellipse[0, :], ellipse[1, :], alpha=0.4, color='orange', label='拟合椭圆')

# 绘制主方向箭头
ax.annotate('', xy=(max_arrow*np.cos(theta_max), max_arrow*np.sin(theta_max)), 
            xytext=(0, 0),
            arrowprops=dict(arrowstyle='->', color='red', lw=3))
ax.annotate('', xy=(min_arrow*np.cos(theta_min), min_arrow*np.sin(theta_min)), 
            xytext=(0, 0),
            arrowprops=dict(arrowstyle='->', color='red', lw=3))

ax.axis('tight')
ax.legend()
format_axis(ax, xticks=[-0.4, -0.2, 0.2, 0.4], yticks=[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3])
plt.show()

# 三维数据
data_3d = noisy_image[:3, :]

fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(data_3d[0, :], data_3d[1, :], data_3d[2, :], alpha=0.5, s=10)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()

M_example = np.array([[2, 1], [1, 1]])
U, S, Vt = np.linalg.svd(M_example)

print("原始矩阵 M:")
print(M_example)
print("\nU (左奇异向量):")
print(U)
print("\n奇异值 Σ:")
print(S)
print("\nV^T (右奇异向量):")
print(Vt)

print("\n验证: U @ diag(Σ) @ Vt:")
print(U @ np.diag(S) @ Vt)

# 生成单位圆盘中的随机点
np.random.seed(42)
n_points = 2000
points = []
while len(points) < n_points:
    p = np.random.uniform(-1, 1, 2)
    if p[0]**2 + p[1]**2 < 1:
        points.append(p)
unit_disc = np.array(points).T

plt.figure(figsize=(6, 6))
plt.scatter(unit_disc[0, :], unit_disc[1, :], alpha=0.5, s=3)
plt.axis('equal')
plt.grid(True, alpha=0.3)
plt.show()

def plot_svd_effect(t=0.5):
    """展示 SVD 的效果：旋转 + 拉伸"""
    M = np.array([[1 + t, t], [t, 1]])
    transformed = M @ unit_disc
    U, S, Vt = np.linalg.svd(M)
    stretched = np.diag(S) @ unit_disc
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # 左图：拉伸效果
    ax1.scatter(unit_disc[0, :], unit_disc[1, :], alpha=0.5, s=3, label='原始')
    ax1.scatter(stretched[0, :], stretched[1, :], alpha=0.2, s=3, label='拉伸后')
    ax1.set_xlim(-3, 3)
    ax1.set_ylim(-3, 3)
    ax1.set_title('拉伸效果')
    ax1.set_aspect('equal')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 右图：拉伸 + 旋转效果
    ax2.scatter(unit_disc[0, :], unit_disc[1, :], alpha=0.5, s=3, label='原始')
    ax2.scatter(transformed[0, :], transformed[1, :], alpha=0.2, s=3, label='拉伸+旋转后')
    ax2.set_xlim(-3, 3)
    ax2.set_ylim(-3, 3)
    ax2.set_title(f'拉伸 + 旋转效果 (t = {t:.2f})')
    ax2.set_aspect('equal')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

interact(plot_svd_effect, 
         t=FloatSlider(min=0, max=1, step=0.01, value=0.5, description='t'));

# 创建反对称矩阵
dim = M.shape[1]
A = np.random.randn(dim, dim)
A = A - A.T  # 反对称化

print(f"反对称矩阵 A 的形状: {A.shape}")
print(f"验证 A = -A^T: {np.allclose(A, -A.T)}")

def plot_high_dim_rotation(t=0.0):
    """展示高维旋转的效果"""
    orthogonal_matrix = expm(t * A)  # 矩阵指数给出正交矩阵
    M_rotated = M @ orthogonal_matrix
    
    plt.figure(figsize=(6, 4))
    plt.scatter(M[0, :], M[1, :], alpha=0.5, s=10, label='原始')
    plt.scatter(M_rotated[0, :], M_rotated[1, :], alpha=0.5, s=10, label='旋转后')
    plt.xlim(-0.6, 0.6)
    plt.ylim(-0.3, 0.3)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

interact(plot_high_dim_rotation, 
         t=FloatSlider(min=0, max=1, step=0.0002, value=0.0, description='t'));

# 计算完整的 SVD
U, S, Vt = np.linalg.svd(M, full_matrices=True)

print(f"奇异值: {S}")
print(f"\nU 的形状: {U.shape}")
print(f"奇异值数组 S 的形状: {S.shape}")
print(f"V^T 的形状: {Vt.shape}")

# 应用 V 变换：将数据投影到主成分方向
M_transformed = M @ Vt.T

plt.figure(figsize=(6, 4))
plt.scatter(M_transformed[0, :], M_transformed[1, :], alpha=0.5, s=10)
plt.xlabel('第一主成分')
plt.ylabel('第二主成分')
plt.title('PCA 变换后的数据')
plt.axis('equal')
plt.xlim(-5, 5)
plt.grid(True, alpha=0.3)
plt.show()

程序设计与计算思维¶

第 9 讲：主成分分析¶

问题引入¶

准备工作¶

矩阵的秩¶

外积与乘法表的可视化¶

矩阵的秩与外积的关系¶

噪声的影响¶

视角转换：图像作为数据¶

从图像到数据的散点图¶

测量数据云的“大小”¶

测量投影宽度¶

均方根距离：标准差¶

变量的相关性 (Correlation)¶

旋转坐标系与投影¶

交互式展示：旋转与投影¶

寻找方差最大的方向¶

主成分 (Principal Component)¶

拟合数据椭圆¶

推广到高维空间¶

奇异值分解 (SVD)¶

高维旋转的反向视角¶

本讲小结¶