1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
| # ---------------------- PyTorch版(推荐)----------------------
import torch
def cosine_similarity_torch(vec1: torch.Tensor, vec2: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
"""
PyTorch实现余弦相似度(支持GPU,适配批量)
:param vec1: 向量1(shape: [n] 或 [batch, n] 或 [batch1, n])
:param vec2: 向量2(shape: [n] 或 [batch, n] 或 [batch2, n])
:return: 相似度矩阵(若输入为[batch1,n]和[batch2,n],输出[batch1,batch2])
"""
# 标准化向量(L2归一化,等价于原公式,计算更高效)
vec1_norm = vec1 / (vec1.norm(dim=-1, keepdim=True) + eps)
vec2_norm = vec2 / (vec2.norm(dim=-1, keepdim=True) + eps)
# 批量矩阵乘法:[b1,n] @ [n,b2] = [b1,b2](适合检索场景)
if vec1.ndim == 2 and vec2.ndim == 2:
return torch.matmul(vec1_norm, vec2_norm.t()) # t()转置
# 对应元素相乘求和(适合成对向量)
else:
return (vec1_norm * vec2_norm).sum(dim=-1)
# ---------------------- TensorFlow版 ----------------------
import tensorflow as tf
def cosine_similarity_tf(vec1: tf.Tensor, vec2: tf.Tensor, eps: float = 1e-8) -> tf.Tensor:
vec1_norm = tf.nn.l2_normalize(vec1, axis=-1)
vec2_norm = tf.nn.l2_normalize(vec2, axis=-1)
if len(vec1.shape) == 2 and len(vec2.shape) == 2:
return tf.matmul(vec1_norm, tf.transpose(vec2_norm))
else:
return tf.reduce_sum(vec1_norm * vec2_norm, axis=-1)
# ---------------------- 框架版测试 ----------------------
if __name__ == "__main__":
# PyTorch测试(GPU加速)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vec1 = torch.tensor([[1,2,3], [4,5,6]], dtype=torch.float32).to(device)
vec2 = torch.tensor([[1,2,3], [7,8,9]], dtype=torch.float32).to(device)
sim_torch = cosine_similarity_torch(vec1, vec2)
print("PyTorch批量相似度矩阵:\n", sim_torch.cpu().numpy()) # 输出2x2矩阵
# TensorFlow测试
vec1_tf = tf.constant([[1,2,3], [4,5,6]], dtype=tf.float32)
vec2_tf = tf.constant([[1,2,3], [7,8,9]], dtype=tf.float32)
sim_tf = cosine_similarity_tf(vec1_tf, vec2_tf)
print("TensorFlow批量相似度矩阵:\n", sim_tf.numpy())
|
💬 评论