import json import os import random import sys original = sys.argv[1] no_dataset = int(sys.argv[2]) if not original.endswith('.jsonl') or not os.path.isfile(original): print('Not a jsonl file') exit(1) out_dir = os.path.dirname(os.path.abspath(original)) with open(original, 'r') as f: lines = f.readlines() random.shuffle(lines) for i in range(no_dataset): l = int(i * len(lines) / no_dataset) u = int((i + 1) * len(lines) / no_dataset) out_filename = os.path.basename(original)[0:-6].replace('_all', '_' + str(i)) + '.jsonl' with open(out_dir + '/' + out_filename, 'w') as f: f.writelines(lines[l:u])