初めに

超高速&格安LLMプラットフォームがあると言うことなので、Google Colobで試してみます

環境

Google Colob CPUのみ(ハイメモリ)

動かしてる

まずは必要なインストールをします

!pip install --upgrade fireworks-ai

import time
import fireworks.client

# APIキーを設定
fireworks.client.api_key = "api key"

# API呼び出しの前に現在の時間を記録
start_time = time.time()

# API呼び出し
completion = fireworks.client.Completion.create(
  model="accounts/fireworks/models/elyza-japanese-llama-2-7b-fast-instruct",
  prompt="まどマギでかわいいキャラクターとその特徴は？",
  n=1,
  max_tokens=150,
  temperature=0.1,
  top_p=0.9, 
)

# API呼び出しの後に現在の時間を記録
end_time = time.time()

# 応答時間を計算して表示
response_time = end_time - start_time
print(f"API Response Time: {response_time} seconds")

# 応答のテキストを表示
print(completion.choices[0].text)