强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

LM Studio 本地模型使用指南 / 06 - 开发者集成

开发者集成

使用 Python、JavaScript 等语言将 LM Studio 集成到你的开发工作流中。

6.1 集成概述

集成方式

LM Studio 提供的集成方式:

1. OpenAI 兼容 API(最推荐)
   ├── 直接使用 OpenAI SDK
   ├── 无需修改现有代码
   └── 只需更改 base_url

2. REST API
   ├── 使用 HTTP 客户端直接调用
   ├── 适合任意编程语言
   └── 最大的灵活性

3. LangChain 集成
   ├── 使用 LangChain 的 OpenAI 兼容接口
   ├── 适合构建复杂的 LLM 应用
   └── 支持链式调用、代理等高级功能

代码迁移策略

从 OpenAI API 迁移到本地模型:

原始代码(OpenAI):
client = OpenAI(api_key="sk-xxx")

修改后(LM Studio):
client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"
)

仅需修改两行代码,其余完全不变!

6.2 Python 集成

环境准备

# 创建虚拟环境
python -m venv lmstudio-env
source lmstudio-env/bin/activate  # Linux/macOS
# lmstudio-env\Scripts\activate   # Windows

# 安装依赖
pip install openai requests httpx

# 可选:安装 LangChain
pip install langchain langchain-openai

OpenAI SDK 完整示例

"""LM Studio Python 集成完整示例"""

from openai import OpenAI
from typing import Generator

class LMStudioClient:
    """LM Studio 客户端封装"""

    def __init__(
        self,
        base_url: str = "http://localhost:1234/v1",
        model: str = "qwen2.5-7b-instruct"
    ):
        self.client = OpenAI(
            base_url=base_url,
            api_key="lm-studio"
        )
        self.model = model

    def chat(
        self,
        message: str,
        system_prompt: str = None,
        temperature: float = 0.7,
        max_tokens: int = 2048
    ) -> str:
        """非流式聊天"""
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": message})

        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens
        )
        return response.choices[0].message.content

    def chat_stream(
        self,
        message: str,
        system_prompt: str = None,
        temperature: float = 0.7,
        max_tokens: int = 2048
    ) -> Generator[str, None, None]:
        """流式聊天,返回生成器"""
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": message})

        stream = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            stream=True
        )
        for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:
                yield content

    def embedding(self, text: str) -> list:
        """获取文本嵌入向量"""
        response = self.client.embeddings.create(
            model=self.model,
            input=text
        )
        return response.data[0].embedding


# 使用示例
if __name__ == "__main__":
    client = LMStudioClient()

    # 非流式调用
    reply = client.chat("什么是微服务架构?")
    print(f"回复: {reply}\n")

    # 流式调用
    print("流式回复: ", end="")
    for chunk in client.chat_stream("用 Python 写一个快速排序"):
        print(chunk, end="", flush=True)
    print()

异步客户端

"""异步 LM Studio 客户端"""

import asyncio
from openai import AsyncOpenAI

class AsyncLMStudioClient:
    def __init__(self, base_url: str = "http://localhost:1234/v1"):
        self.client = AsyncOpenAI(
            base_url=base_url,
            api_key="lm-studio"
        )

    async def chat(self, message: str, model: str = "qwen2.5-7b-instruct") -> str:
        response = await self.client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": message}]
        )
        return response.choices[0].message.content

    async def batch_chat(self, messages: list[str], model: str = "qwen2.5-7b-instruct") -> list[str]:
        """并发处理多个请求"""
        tasks = [self.chat(msg, model) for msg in messages]
        return await asyncio.gather(*tasks)


async def main():
    client = AsyncLMStudioClient()

    # 单个请求
    reply = await client.chat("你好")
    print(f"单个请求: {reply}")

    # 并发请求
    questions = [
        "什么是 Docker?",
        "什么是 Kubernetes?",
        "什么是微服务?"
    ]
    replies = await client.batch_chat(questions)
    for q, r in zip(questions, replies):
        print(f"Q: {q}\nA: {r}\n")

asyncio.run(main())

LangChain 集成

"""使用 LangChain 集成 LM Studio"""

from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

# 创建 LLM 实例
llm = ChatOpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio",
    model="qwen2.5-7b-instruct",
    temperature=0.7,
)

# 基本调用
messages = [
    SystemMessage(content="你是一个有帮助的助手"),
    HumanMessage(content="什么是 RAG?")
]
response = llm.invoke(messages)
print(response.content)

# 使用 Prompt Template
prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一个{role},请用{style}的方式回答"),
    ("user", "{question}")
])

chain = prompt | llm | StrOutputParser()

result = chain.invoke({
    "role": "Python 专家",
    "style": "简洁明了",
    "question": "列表推导式和生成器表达式有什么区别?"
})
print(result)

# 流式输出
for chunk in chain.stream({
    "role": "技术作家",
    "style": "通俗易懂",
    "question": "解释一下 REST API"
}):
    print(chunk, end="", flush=True)

6.3 JavaScript / TypeScript 集成

Node.js 环境

# 初始化项目
mkdir lmstudio-demo && cd lmstudio-demo
npm init -y

# 安装 OpenAI SDK
npm install openai

# 可选:TypeScript 支持
npm install -D typescript @types/node

完整 Node.js 示例

// lmstudio-client.js
const OpenAI = require('openai');

class LMStudioClient {
  constructor(options = {}) {
    this.client = new OpenAI({
      baseURL: options.baseURL || 'http://localhost:1234/v1',
      apiKey: 'lm-studio',
    });
    this.model = options.model || 'qwen2.5-7b-instruct';
  }

  async chat(message, options = {}) {
    const messages = [];
    if (options.systemPrompt) {
      messages.push({ role: 'system', content: options.systemPrompt });
    }
    messages.push({ role: 'user', content: message });

    const response = await this.client.chat.completions.create({
      model: options.model || this.model,
      messages,
      temperature: options.temperature || 0.7,
      max_tokens: options.maxTokens || 2048,
    });

    return response.choices[0].message.content;
  }

  async chatStream(message, options = {}) {
    const messages = [];
    if (options.systemPrompt) {
      messages.push({ role: 'system', content: options.systemPrompt });
    }
    messages.push({ role: 'user', content: message });

    const stream = await this.client.chat.completions.create({
      model: options.model || this.model,
      messages,
      stream: true,
    });

    const chunks = [];
    for await (const chunk of stream) {
      const content = chunk.choices[0]?.delta?.content || '';
      if (content) {
        chunks.push(content);
        if (options.onChunk) options.onChunk(content);
      }
    }
    return chunks.join('');
  }
}

// 使用
async function main() {
  const client = new LMStudioClient();

  // 非流式
  const reply = await client.chat('什么是 GraphQL?');
  console.log('回复:', reply);

  // 流式
  console.log('\n流式输出:');
  await client.chatStream('写一首关于 JavaScript 的诗', {
    onChunk: (chunk) => process.stdout.write(chunk),
  });
  console.log();
}

main().catch(console.error);

TypeScript 类型定义

// types.ts
export interface ChatMessage {
  role: 'system' | 'user' | 'assistant';
  content: string;
}

export interface ChatOptions {
  model?: string;
  systemPrompt?: string;
  temperature?: number;
  maxTokens?: number;
  onChunk?: (chunk: string) => void;
}

export interface LMStudioConfig {
  baseURL?: string;
  model?: string;
}

// client.ts
import OpenAI from 'openai';
import type { ChatMessage, ChatOptions, LMStudioConfig } from './types';

export class LMStudioClient {
  private client: OpenAI;
  private model: string;

  constructor(config: LMStudioConfig = {}) {
    this.client = new OpenAI({
      baseURL: config.baseURL || 'http://localhost:1234/v1',
      apiKey: 'lm-studio',
    });
    this.model = config.model || 'qwen2.5-7b-instruct';
  }

  async chat(message: string, options: ChatOptions = {}): Promise<string> {
    const messages: ChatMessage[] = [];
    if (options.systemPrompt) {
      messages.push({ role: 'system', content: options.systemPrompt });
    }
    messages.push({ role: 'user', content: message });

    const response = await this.client.chat.completions.create({
      model: options.model || this.model,
      messages,
      temperature: options.temperature ?? 0.7,
    });

    return response.choices[0].message.content ?? '';
  }
}

6.4 REST API 直接调用

使用 curl

# 基本聊天
curl http://localhost:1234/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "qwen2.5-7b-instruct",
    "messages": [
      {"role": "user", "content": "Hello"}
    ]
  }'

# 流式请求
curl -N http://localhost:1234/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "qwen2.5-7b-instruct",
    "messages": [
      {"role": "user", "content": "写一个故事"}
    ],
    "stream": true
  }'

# 获取模型列表
curl http://localhost:1234/v1/models

使用 Go

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
)

type ChatRequest struct {
	Model    string    `json:"model"`
	Messages []Message `json:"messages"`
	Stream   bool      `json:"stream"`
}

type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type ChatResponse struct {
	Choices []struct {
		Message struct {
			Content string `json:"content"`
		} `json:"message"`
	} `json:"choices"`
}

func chat(message string) (string, error) {
	reqBody := ChatRequest{
		Model: "qwen2.5-7b-instruct",
		Messages: []Message{
			{Role: "user", Content: message},
		},
	}

	jsonData, _ := json.Marshal(reqBody)
	resp, err := http.Post(
		"http://localhost:1234/v1/chat/completions",
		"application/json",
		bytes.NewBuffer(jsonData),
	)
	if err != nil {
		return "", err
	}
	defer resp.Body.Close()

	body, _ := io.ReadAll(resp.Body)
	var chatResp ChatResponse
	json.Unmarshal(body, &chatResp)

	return chatResp.Choices[0].Message.Content, nil
}

func main() {
	reply, err := chat("什么是容器化?")
	if err != nil {
		fmt.Println("Error:", err)
		return
	}
	fmt.Println(reply)
}

使用 Rust

use serde::{Deserialize, Serialize};
use reqwest;

#[derive(Serialize)]
struct ChatRequest {
    model: String,
    messages: Vec<Message>,
}

#[derive(Serialize)]
struct Message {
    role: String,
    content: String,
}

#[derive(Deserialize)]
struct ChatResponse {
    choices: Vec<Choice>,
}

#[derive(Deserialize)]
struct Choice {
    message: MessageContent,
}

#[derive(Deserialize)]
struct MessageContent {
    content: String,
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = reqwest::Client::new();

    let request = ChatRequest {
        model: "qwen2.5-7b-instruct".to_string(),
        messages: vec![
            Message {
                role: "user".to_string(),
                content: "什么是 Rust 的所有权系统?".to_string(),
            }
        ],
    };

    let response: ChatResponse = client
        .post("http://localhost:1234/v1/chat/completions")
        .json(&request)
        .send()
        .await?
        .json()
        .await?;

    println!("{}", response.choices[0].message.content);
    Ok(())
}

6.5 构建实际应用

应用一:文档问答系统

"""基于 LM Studio 的简单文档问答系统"""

from openai import OpenAI

class DocumentQA:
    def __init__(self):
        self.client = OpenAI(
            base_url="http://localhost:1234/v1",
            api_key="lm-studio"
        )
        self.documents: list[str] = []

    def add_document(self, text: str):
        """添加文档到知识库"""
        # 简单的分块策略
        chunks = [text[i:i+500] for i in range(0, len(text), 500)]
        self.documents.extend(chunks)

    def ask(self, question: str) -> str:
        """基于文档回答问题"""
        # 简单的关键词匹配检索(生产环境应使用向量检索)
        relevant_docs = []
        keywords = question.lower().split()
        for doc in self.documents:
            if any(kw in doc.lower() for kw in keywords):
                relevant_docs.append(doc[:300])

        context = "\n---\n".join(relevant_docs[:3]) if relevant_docs else "无相关文档"

        response = self.client.chat.completions.create(
            model="qwen2.5-7b-instruct",
            messages=[
                {
                    "role": "system",
                    "content": "根据提供的文档内容回答问题。如果文档中没有相关信息,请说明。"
                },
                {
                    "role": "user",
                    "content": f"文档内容:\n{context}\n\n问题:{question}"
                }
            ]
        )
        return response.choices[0].message.content

# 使用
qa = DocumentQA()
qa.add_document("""
Python 是一种高级编程语言,由 Guido van Rossum 于 1991 年创建。
Python 以其简洁的语法和丰富的库生态系统而闻名。
Python 支持多种编程范式,包括面向对象、函数式和过程式编程。
""")
qa.add_document("""
Python 3.12 引入了多项新特性:
- 改进的错误消息
- f-string 的改进
- 类型参数语法
- 性能提升
""")

print(qa.ask("Python 是谁创建的?"))
print(qa.ask("Python 3.12 有什么新特性?"))

应用二:代码审查助手

"""代码审查助手"""

from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"
)

def review_code(code: str, language: str = "python") -> str:
    """审查代码并提供改进建议"""
    response = client.chat.completions.create(
        model="qwen2.5-7b-instruct",
        messages=[
            {
                "role": "system",
                "content": f"""你是一位资深的 {language} 开发者。
请审查以下代码,从以下方面给出评估:
1. 代码质量(1-10 分)
2. 潜在 bug
3. 性能问题
4. 安全隐患
5. 改进建议

请用 Markdown 格式输出。"""
            },
            {
                "role": "user",
                "content": f"```{language}\n{code}\n```"
            }
        ],
        temperature=0.3  # 低温度,保持客观
    )
    return response.choices[0].message.content

# 测试
code = """
def get_user(id):
    query = f"SELECT * FROM users WHERE id = {id}"
    result = db.execute(query)
    return result
"""

print(review_code(code, "python"))

应用三:自动文档生成

"""自动为函数生成文档"""

from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"
)

def generate_docstring(function_code: str) -> str:
    """为 Python 函数生成 docstring"""
    response = client.chat.completions.create(
        model="qwen2.5-7b-instruct",
        messages=[
            {
                "role": "system",
                "content": """你是一个 Python 文档生成专家。
为给定的函数生成 Google 风格的 docstring。
包括:功能描述、参数说明、返回值说明、异常说明(如有)。
只输出 docstring 内容,不要包含其他文字。"""
            },
            {
                "role": "user",
                "content": f"```python\n{function_code}\n```"
            }
        ],
        temperature=0.2
    )
    return response.choices[0].message.content

# 测试
func = """
def calculate_bmi(weight_kg: float, height_m: float) -> float:
    if height_m <= 0:
        raise ValueError("Height must be positive")
    return weight_kg / (height_m ** 2)
"""

doc = generate_docstring(func)
print(doc)

6.6 注意事项

注意事项 说明
错误处理 始终处理连接失败、超时等异常
超时设置 长文本生成可能需要较长超时时间
并发控制 避免过多并发请求导致服务器过载
模型预热 首次请求可能较慢(模型加载到内存)
API Key 本地服务器接受任意值,但代码中保持一致性
速率限制 本地无限制,但硬件资源有限

6.7 本章小结

要点 内容
集成方式 OpenAI SDK 是最简单的方式
语言支持 Python、JavaScript、Go、Rust 等均支持
异步支持 使用 AsyncOpenAI 或对应语言的异步 HTTP 客户端
LangChain 可通过 ChatOpenAI 类无缝集成
实际应用 文档问答、代码审查、文档生成等

扩展阅读