OpenCV 和 PyTorch3D 相机投影

OpenCV 和 PyTorch3D 相机投影

OpenCV Pinhole Camera: https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html

Pytorch3D Camera: https://pytorch3d.org/docs/cameras

OpenCV 坐标系

OpenCV 的世界坐标系、相机坐标系都是右手系,+X 指向右侧,+Y 指向下方。
图像坐标系 +X 指向右侧,+Y 指向下方,原点在主点 (cx,cy)。

OpenCV Pinhole Camera Model

Pytorch3D 坐标系

Pytorch3D 的世界坐标系、相机坐标系、NDC 坐标系都是右手系,+X 指向左侧,+Y 指向上方。
屏幕坐标系 +X 指向右侧,+Y 指向下方,原点在左上方角落。

Pytorch3D Coordinate Systems

1
2
3
4
5
import torch
from pytorch3d.renderer import PerspectiveCameras, camera_conversions

_R = torch.eye(3)[None] # (1, 3, 3)
_T = torch.zeros(1, 3) # (1, 3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def opencv_project(points, opencv_camera):
"""
Project 3D points with OpenCV pinhole camera.

Args:
points: Tensor of shape (P, 3) or (N, P, 3)
opencv_camera: Tensor of shape (3, 3) or (N, 3, 3)
Returns:
new_points: projected 3D points with the same shape as the input
"""
points_batch = points.clone()
if points_batch.dim() != 2 and points_batch.dim() != 3 and points_batch.shape[-1] != 3:
msg = "Expected points to have shape (P, 3) or (N, P, 3): got shape %r"
raise ValueError(msg % repr(points.shape))

if points_batch.dim() == 2:
opencv_pix = opencv_camera.mm(points_batch.transpose(0, 1)) # (3,3)*(3,P) -> (3,P)
opencv_pix = opencv_pix.transpose(0, 1) # (3,P) -> (P,3)
if points_batch.dim() == 3:
N, P, _3 = points_batch.shape
if opencv_camera.dim() == 2:
opencv_camera = torch.cat(N*[opencv_camera[None]])
opencv_pix = opencv_camera.bmm(points_batch.transpose(1, 2)) # (N,3,3)*(N,3,P) -> (N,3,P)
opencv_pix = opencv_pix.transpose(1, 2) # (N,3,P) -> (N,P,3)
opencv_pix[..., :2] = opencv_pix[..., :2] / opencv_pix[..., 2:] # divide z
opencv_pix[..., 2:] = 1.0 / opencv_pix[..., 2:] # keep the same as Pytorch3D
return opencv_pix
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
W, H = 256, 256
cx, cy = 0.5 * W, 0.5 * H
f = 1000
cam_mat = torch.tensor([[f, 0, cx], [0, f, cy], [0, 0, 1]]) # (3, 3)

points = torch.tensor([[2.0, 1.0, 3.0], [1.2, 3.2, 1.5]]) # (2, 3)
# points = torch.rand((2, 2, 3))


def test_projection():
print('Input 3D points\n', points.shape)

ocvcam_pix = opencv_project(points, cam_mat)
print('\nOpenCV projection\n', ocvcam_pix.shape, '\n', ocvcam_pix)

p3docv_cam = camera_conversions._cameras_from_opencv_projection(_R, _T, cam_mat[None], torch.tensor([[W, H]]))
p3docv_pix = p3docv_cam.transform_points_screen(points)
print('\nPytorch3D opencv projection\n', p3docv_pix.shape, '\n', p3docv_pix)

p3dpsp_cam = PerspectiveCameras(focal_length=f, principal_point=((cx, cy),), image_size=((W, H),), in_ndc=False)
points_flipxy = points.clone()
points_flipxy[..., 0:2] = -points_flipxy[..., 0:2] # Convert points from OpenCV (+X right, +Y down) to Pytorch3D (+X left, +Y up) camera space
p3dpsp_pix = p3dpsp_cam.transform_points_screen(points_flipxy)
print('\nPytorch3D PerspectiveCameras projection\n', p3dpsp_pix.shape, '\n', p3dpsp_pix)

print('\nCompare OpenCV projection and Pytorch3D opencv projection:', ocvcam_pix.allclose(p3docv_pix))
print('Compare OpenCV projection and PerspectiveCameras projection:', ocvcam_pix.allclose(p3dpsp_pix))


test_projection()
Input 3D points
 torch.Size([2, 3])

OpenCV projection
 torch.Size([2, 3]) 
 tensor([[7.9467e+02, 4.6133e+02, 3.3333e-01],
        [9.2800e+02, 2.2613e+03, 6.6667e-01]])

Pytorch3D opencv projection
 torch.Size([2, 3]) 
 tensor([[7.9467e+02, 4.6133e+02, 3.3333e-01],
        [9.2800e+02, 2.2613e+03, 6.6667e-01]])

Pytorch3D PerspectiveCameras projection
 torch.Size([2, 3]) 
 tensor([[7.9467e+02, 4.6133e+02, 3.3333e-01],
        [9.2800e+02, 2.2613e+03, 6.6667e-01]])

Compare OpenCV projection and Pytorch3D opencv projection: True
Compare OpenCV projection and PerspectiveCameras projection: True