28 lines
868 B
Python
28 lines
868 B
Python
import json
|
|
import os
|
|
|
|
this_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
def mkdir(path):
|
|
if not os.path.isdir(path):
|
|
os.mkdir(path)
|
|
|
|
|
|
mkdir(this_dir + '/../../data')
|
|
mkdir(this_dir + '/../../data/helpsteer')
|
|
|
|
for filename in ['train.jsonl', 'validation.jsonl']:
|
|
with open(this_dir + '/' + filename, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
role_dict = {'prompt': 'user', 'response': 'assistant'}
|
|
lines = [json.loads(line) for line in lines]
|
|
conversations = [{'messages': [{'role': 'user', 'content': line['prompt']},
|
|
{'role': 'assistant', 'content': line['response']}]} for line in lines]
|
|
|
|
print(conversations[0])
|
|
|
|
with open(this_dir + '/../../data/helpsteer/helpsteer_' + filename[0:-6] + '_all.jsonl', 'w') as f:
|
|
f.writelines([json.dumps(conv) + '\n' for conv in conversations])
|