From 1ba009adb625e0cdca867d5063ea7cd6966e9945 Mon Sep 17 00:00:00 2001 From: ravenscroftj Date: Tue, 21 Mar 2023 06:30:10 +0000 Subject: [PATCH] Add 'brainsteam/content/annotations/2023/03/21/1679380079.md' --- .../annotations/2023/03/21/1679380079.md | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 brainsteam/content/annotations/2023/03/21/1679380079.md diff --git a/brainsteam/content/annotations/2023/03/21/1679380079.md b/brainsteam/content/annotations/2023/03/21/1679380079.md new file mode 100644 index 0000000..24c2751 --- /dev/null +++ b/brainsteam/content/annotations/2023/03/21/1679380079.md @@ -0,0 +1,68 @@ +--- +date: '2023-03-21T06:27:59' +hypothesis-meta: + created: '2023-03-21T06:27:59.825632+00:00' + document: + title: + - 'GPT-4 and professional benchmarks: the wrong answer to the wrong question' + flagged: false + group: __world__ + hidden: false + id: hoqyasexEe2ZnQ_nOVgRxA + links: + html: https://hypothes.is/a/hoqyasexEe2ZnQ_nOVgRxA + incontext: https://hyp.is/hoqyasexEe2ZnQ_nOVgRxA/aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks + json: https://hypothes.is/api/annotations/hoqyasexEe2ZnQ_nOVgRxA + permissions: + admin: + - acct:ravenscroftj@hypothes.is + delete: + - acct:ravenscroftj@hypothes.is + read: + - group:__world__ + update: + - acct:ravenscroftj@hypothes.is + tags: + - openai + - gpt + - ModelEvaluation + target: + - selector: + - endContainer: /div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/article[1]/div[4]/div[1]/div[1]/p[6]/span[2] + endOffset: 42 + startContainer: /div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/article[1]/div[4]/div[1]/div[1]/p[6]/span[1] + startOffset: 0 + type: RangeSelector + - end: 6591 + start: 6238 + type: TextPositionSelector + - exact: 'In fact, we can definitively show that it has memorized problems in + its training set: when prompted with the title of a Codeforces problem, GPT-4 + includes a link to the exact contest where the problem appears (and the round + number is almost correct: it is off by one). Note that GPT-4 cannot access + the Internet, so memorization is the only explanation.' + prefix: the problems after September 12. + suffix: GPT-4 memorizes Codeforces probl + type: TextQuoteSelector + source: https://aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks + text: GPT4 knows the link to the coding exams that it was evaluated against but + doesn't have "internet access" so it appears to have memorised this as well + updated: '2023-03-21T06:27:59.825632+00:00' + uri: https://aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks + user: acct:ravenscroftj@hypothes.is + user_info: + display_name: James Ravenscroft +in-reply-to: https://aisnakeoil.substack.com/p/gpt-4-and-professional-benchmarks +tags: +- openai +- gpt +- ModelEvaluation +- hypothesis +type: annotation +url: /annotations/2023/03/21/1679380079 + +--- + + + +
In fact, we can definitively show that it has memorized problems in its training set: when prompted with the title of a Codeforces problem, GPT-4 includes a link to the exact contest where the problem appears (and the round number is almost correct: it is off by one). Note that GPT-4 cannot access the Internet, so memorization is the only explanation.
GPT4 knows the link to the coding exams that it was evaluated against but doesn't have "internet access" so it appears to have memorised this as well \ No newline at end of file