環境
- Google Colob A100
準備
ライブラリをインストールします
!pip install transformers==4.32.0 accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib
実行
ft16やbf16は対応していないっぽくて、動かなかったです また音声データはサンプルのものを使用しています
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig import torch torch.manual_seed(1234) # Note: The default behavior now has injection attack prevention off. tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-Audio-Chat", trust_remote_code=True) # use cuda device model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-Audio-Chat", device_map="cuda", trust_remote_code=True).eval() # Specify hyperparameters for generation (No need to do this if you are using transformers>4.32.0) model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-Audio-Chat", trust_remote_code=True) # 1st dialogue turn query = tokenizer.from_list_format([ {'audio': 'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Audio/1272-128104-0000.flac'}, # Either a local path or an url {'text': '話している人は何と言っていますか?'}, ]) response, history = model.chat(tokenizer, query=query, history=None) print(response) # The person says: "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel". # 2nd dialogue turn response, history = model.chat(tokenizer, '会話の中で大事な部分を要約してください', history=history) print(response)