added name to training sample conversion for helpsteer and oasst
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
|
||||
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
user_names = ['Adam', 'Alice', 'Anne', 'Bob', 'Charlie', 'Cody', 'Corinna', 'Cynthia', 'Fred', 'Grace', 'Jane', 'Paul',
|
||||
'Rachel', 'Ramesh']
|
||||
|
||||
|
||||
def mkdir(path):
|
||||
@@ -9,6 +11,7 @@ def mkdir(path):
|
||||
os.mkdir(path)
|
||||
|
||||
|
||||
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
mkdir(this_dir + '/../../data')
|
||||
mkdir(this_dir + '/../../data/helpsteer')
|
||||
|
||||
@@ -18,8 +21,10 @@ for filename in ['train.jsonl', 'validation.jsonl']:
|
||||
|
||||
role_dict = {'prompt': 'user', 'response': 'assistant'}
|
||||
lines = [json.loads(line) for line in lines]
|
||||
conversations = [{'messages': [{'role': 'user', 'content': line['prompt']},
|
||||
{'role': 'assistant', 'content': line['response']}]} for line in lines]
|
||||
conversations = [{'messages': [
|
||||
{'role': 'user', 'name': user_names[random.randint(0, len(user_names) - 1)], 'content': line['prompt']},
|
||||
{'role': 'assistant', 'name': 'assistant', 'content': line['response']}]
|
||||
} for line in lines]
|
||||
|
||||
print(conversations[0])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user