-
Notifications
You must be signed in to change notification settings - Fork 17
/
config.yaml
72 lines (62 loc) · 2.38 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Three parts
# ─── General Configuration ────────────────────────────────────────────────────
enable_log_saving: true # Whether to save logs
processes: 3 # Number of processes to use for parallel processing
num_blocks: 170 # Number of blocks to process
start_block: 0 # The block to start processing from
seed: 22 # Seed for random number generation
# ─── Datasets To Be Evaluated On And The Evaluator It Uses ────────────────────
dataset:
- "xinhua.XinhuaHallucinations":
path: "data/Xinhua/XinhuaHallucinations.json"
evaluator:
- "discriminative.DiscriminativeEvaluatorKeywordLevel"
- "discriminative.DiscriminativeEvaluatorSentenceLevel"
- "generative.GenerativeEvaluator"
- "selective.SelectiveEvaluator"
- "truthfulqa.TruthfunQAGeneration":
path: "data/TruthfulQA/TruthfulQA.csv"
evaluator:
- "TruthQAEvaluator.GenerativeEvaluatorTrutufulQA"
- "truthfulqa.TruthfunQAMC1":
path: "data/TruthfulQA/mc_task.json"
evaluator:
- "TruthQAEvaluator.SelectiveEvaluatorMC1"
- "truthfulqa.TruthfunQAMC2":
path: "data/TruthfulQA/mc_task.json"
evaluator:
- "TruthQAEvaluator.SelectiveEvaluatorMC2"
- "halueval.HaluEvalQA":
path: "data/HaluEval/qa_data.json"
evaluator:
- "HaluEvalEvaluator.HaluEvalQAEvaluator"
- "halueval.HaluEvalDialogue":
path: "data/HaluEval/dialogue_data.json"
evaluator:
- "HaluEvalEvaluator.HaluEvalDialogueEvaluator"
- "halueval.HaluEvalSummarization":
path: "data/HaluEval/summarization_data.json"
evaluator:
- "HaluEvalEvaluator.HaluEvalSummarizationEvaluator"
- "halluqa.HalluQAMC":
path: "data/HalluQA/HalluQA_mc.json"
evaluator:
- "HalluQAEvaluator.HalluQAMCEvaluator"
# ─── Models To Be Evaluated ───────────────────────────────────────────────────
llm:
api:
- GPT:
model_name: gpt-3.5-turbo
report: true
local:
- Baichuan2_13B_Chat:
temperature: 0.8
top_p: 0.6
- ChatGLM3_6B_Chat:
temperature: 0.8
top_p: 0.6
remote:
- Qwen_14B_Chat:
temperature: 0.8
- Xinyu_7B_Chat:
temperature: 0.8