From 395fa5a70b9df8a944abca43b8f71f7801b0e0e5 Mon Sep 17 00:00:00 2001 From: ravenscroftj Date: Tue, 21 Mar 2023 06:30:05 +0000 Subject: [PATCH] Add 'brainsteam/content/annotations/2023/03/21/1679380149.md' --- .../annotations/2023/03/21/1679380149.md | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 brainsteam/content/annotations/2023/03/21/1679380149.md diff --git a/brainsteam/content/annotations/2023/03/21/1679380149.md b/brainsteam/content/annotations/2023/03/21/1679380149.md new file mode 100644 index 0000000..306dd08 --- /dev/null +++ b/brainsteam/content/annotations/2023/03/21/1679380149.md @@ -0,0 +1,68 @@ +--- +date: '2023-03-21T06:29:09' +hypothesis-meta: + created: '2023-03-21T06:29:09.945605+00:00' + document: + title: + - 'GPT-4 and professional benchmarks: the wrong answer to the wrong question' + flagged: false + group: __world__ + hidden: false + id: sFZzLMexEe2M2r_i759OiA + links: + html: https://hypothes.is/a/sFZzLMexEe2M2r_i759OiA + incontext: https://hyp.is/sFZzLMexEe2M2r_i759OiA/aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks + json: https://hypothes.is/api/annotations/sFZzLMexEe2M2r_i759OiA + permissions: + admin: + - acct:ravenscroftj@hypothes.is + delete: + - acct:ravenscroftj@hypothes.is + read: + - group:__world__ + update: + - acct:ravenscroftj@hypothes.is + tags: + - openai + - gpt + - ModelEvaluation + target: + - selector: + - endContainer: /div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/article[1]/div[4]/div[1]/div[1]/p[8]/span[2] + endOffset: 199 + startContainer: /div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/article[1]/div[4]/div[1]/div[1]/p[8]/span[1] + startOffset: 0 + type: RangeSelector + - end: 7439 + start: 7071 + type: TextPositionSelector + - exact: "Still, we can look for telltale signs. Another symptom of memorization\ + \ is that GPT is highly sensitive to the phrasing of the question. Melanie\ + \ Mitchell gives an example of an MBA test question where changing some details\ + \ in a way that wouldn\u2019t fool a person is enough to fool ChatGPT (running\ + \ GPT-3.5). A more elaborate experiment along these lines would be valuable." + prefix: ' how performance varies by date.' + suffix: "Because of OpenAI\u2019s lack of tran" + type: TextQuoteSelector + source: https://aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks + text: OpenAI has memorised MBA tests- when these are rephrased or certain details + are changed, the system fails to answer + updated: '2023-03-21T06:29:09.945605+00:00' + uri: https://aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks + user: acct:ravenscroftj@hypothes.is + user_info: + display_name: James Ravenscroft +in-reply-to: https://aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks +tags: +- openai +- gpt +- ModelEvaluation +- hypothesis +type: annotation +url: /annotations/2023/03/21/1679380149 + +--- + + + +
Still, we can look for telltale signs. Another symptom of memorization is that GPT is highly sensitive to the phrasing of the question. Melanie Mitchell gives an example of an MBA test question where changing some details in a way that wouldn’t fool a person is enough to fool ChatGPT (running GPT-3.5). A more elaborate experiment along these lines would be valuable.
OpenAI has memorised MBA tests- when these are rephrased or certain details are changed, the system fails to answer \ No newline at end of file