跳到主要内容

数据集文件格式

对话数据集 & data_info.js

[
{
"instruction": "user instruction (required)",
"input": "user input (optional)",
"output": "model response (required)",
"system": "system prompt (optional)",
"history": [
["user instruction in the first round (optional)", "model response in the first round (optional)"],
["user instruction in the second round (optional)", "model response in the second round (optional)"]
]
}
]
"dataset_name": {
"file_name": "data.json",
"columns": {
"prompt": "instruction",
"query": "input",
"response": "output",
"system": "system",
"history": "history"
}
}

图像数据集 & data_info.js

{
"conversations": [
{
"from": "human",
"value": "<image>\n请告诉我关于这张照片的整体理解,假设你是自动驾驶汽车。"
},
{
"from": "gpt",
"value": "自驾车辆正在高速直行。现场没有交通信号灯。天气晴朗。汽车正在高速公路上行驶。似乎没有行人出现。自驾车辆的驾驶员应该注意的是,要与道路上的其他车辆保持安全距离,以避免任何潜在的碰撞。"
}
],
"images": [
"/ts-lf/ts-llamafactory/app_data/CoVLA/qa_images/images/2022-10-31--15-02-03--37_first/0002.png"
]
},
"CoVLA_zh": {
"file_name": "./CoVLA/CoVLA_llamafactory_zh_split.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"images": "images"
}
},