Files
llm/utils/reduce_conversations.py

38 lines
1.0 KiB
Python

import json
import os
this_dir = os.path.dirname(os.path.abspath(__file__))
conversations_dir = this_dir + '/../conversations/'
dirs = [os.path.relpath(conversations_dir + dir) for dir in os.listdir(conversations_dir)]
dirs = [dir for dir in dirs if os.path.isdir(dir)]
def read_json(path):
with open(path, 'r') as f:
return json.load(f)
for dir in dirs:
print('Reducing ' + dir)
files = [dir + '/' + file for file in os.listdir(dir)]
files = [file for file in files if os.path.isfile(file)]
files.sort()
i = 0
while i < len(files) - 1:
j = i + 1
while j < len(files):
conversation1 = read_json(files[i])
conversation2 = read_json(files[j])
if len(conversation1) <= len(conversation2) and conversation1 == conversation2[0:len(conversation1)]:
print(' Dropping ' + files[i])
os.remove(files[i])
del files[i]
i -= 1
break
else:
j += 1
i += 1