在CentOS上部署PyTorch模型可以通过多种方法实现,以下是一些常见的步骤和方法:
pip install torch torchvision torchaudio
python -m venv myenv
source myenv/bin/activate
Tracing:适用于动态计算图,通过记录推理路径生成torch.jit.script
模块。
import torch
import torchvision
model = torchvision.models.resnet18()
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("traced_model.pt")
Scripting:适用于静态计算图,直接在Python脚本中编写模型并使用torch.jit.script
编译。
class MyModule(torch.nn.Module):
def __init__(self, n, m):
super(MyModule, self).__init__()
self.weight = torch.nn.Parameter(torch.rand(n, m))
def forward(self, input):
if input.sum() > 0:
output = self.weight.mv(input)
else:
output = self.weight + input
return output
my_module = MyModule(10, 20)
sm = torch.jit.script(my_module)
sm.save("scripted_model.pt")
为了提高模型性能,可以进行模型量化:
import torch.quantization as quantization
model.qconfig = quantization.get_default_qconfig('fbgemm')
quantized_model = quantization.prepare(model, inplace=False)
quantization.convert(quantized_model, inplace=True)
quantized_model.save("quantized_model.pt")
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# 示例模型定义
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc = nn.Linear(16 * 16 * 16, 10)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.maxpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
# 实例化模型
model = SimpleCNN()
model.load_state_dict(torch.load('model.pth'))
model.eval()
# 数据预处理
def preprocess_image(image_path):
transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
image = Image.open(image_path)
image_tensor = transform(image).unsqueeze(0) # 添加 batch 维度
return image_tensor
# 推理流程
image_path = 'test_image.jpg'
input_tensor = preprocess_image(image_path)
with torch.no_grad():
output = model(input_tensor)
_, predicted = torch.max(output.data, 1)
print(f"Predicted class: {predicted.item()}")
# 转换为ONNX
dummy_input = input_tensor.clone().detach()
torch.onnx.export(model, dummy_input, "simple_cnn.onnx", verbose=True)
pip install onnxruntime
import onnx
import onnxruntime as ort
# 加载ONNX模型
onnx_model = onnx.load("simple_cnn.onnx")
onnx.checker.check_model(onnx_model)
# 创建推理会话
ort_session = ort.InferenceSession("simple_cnn.onnx")
# 推理
input_name = ort_session.get_inputs()[0].name
output_name = ort_session.get_outputs()[0].name
ort_inputs = {input_name: input_tensor.numpy()}
ort_outs = ort_session.run([output_name], ort_inputs)
print(f"Predicted class: {ort_outs[0][0]}")
pip install tensorrt
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# 创建TensorRT引擎
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, TRT_LOGGER)
parser.register_input(network.get_input(0).name, trt.Type(trt.float32, (1, 3, 224, 224)))
parser.register_output(network.get_output(0).name, trt.Type(trt.float32, (1, 10)))
parser.parse_file("simple_cnn.onnx")
# 创建TensorRT引擎
engine = builder.build_cuda_engine(network)
# 创建推理处理器
context = engine.create_execution_context()
# 推理
input_tensor = input_tensor.numpy().reshape(1, 3, 224, 224)
output_tensor = np.empty((1, 10), dtype=np.float32)
context.execute(batch_size=1, inputs=[input_tensor], outputs=[output_tensor])
predicted_class = np.argmax(output_tensor)
print(f"Predicted class: {predicted_class}")
通过以上步骤,你可以在CentOS上成功部署PyTorch模型,并根据需要选择使用ONNX Runtime或TensorRT进行推理。
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>