Event JSON
{
"id": "a6163d93445e8c7573b6960b8c7ed87beb5ef773ed4dd3d20a39b310cb65010d",
"pubkey": "36a30783457602c172cb404d77ab5e299f0b2f486ca63ca30b3a4bf06e5c0447",
"created_at": 1723142108,
"kind": 1,
"tags": [
[
"imeta",
"url https://media.mas.to/media_attachments/files/112/927/840/987/920/460/original/d5c62fb29d44dabc.png",
"m image/png"
],
[
"proxy",
"https://mas.to/@assaf/112927841245854818",
"web"
],
[
"proxy",
"https://mas.to/users/assaf/statuses/112927841245854818",
"activitypub"
],
[
"L",
"pink.momostr"
],
[
"l",
"pink.momostr.activitypub:https://mas.to/users/assaf/statuses/112927841245854818",
"pink.momostr"
],
[
"-"
]
],
"content": "Reinforcement Learning from Human Feedback (RLHF) is just a vibe check\n\n\"You'd train it to agree with the human judgement on average. Once we have a Reward Model vibe check, you run RL with respect to it, learning to play the moves that lead to good vibes. Clearly, this would not have led anywhere too interesting in Go.”\n\nhttps://x.com/karpathy/status/1821277264996352246\nhttps://media.mas.to/media_attachments/files/112/927/840/987/920/460/original/d5c62fb29d44dabc.png\n",
"sig": "2ec02a0d70b75f25d9fee00216f63988d87dba9abf047d699dca7a31419f7034034221e24c39359d497e3a337311a19b58fc8c36119cae70b72322e29b77902d"
}