From 293d26a1bed98f226f8afd16b7ee01dfc8713f88 Mon Sep 17 00:00:00 2001 From: ravenscroftj Date: Mon, 19 Dec 2022 15:00:07 +0000 Subject: [PATCH] Add 'brainsteam/content/annotations/2022/12/19/1671461828.md' --- .../annotations/2022/12/19/1671461828.md | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 brainsteam/content/annotations/2022/12/19/1671461828.md diff --git a/brainsteam/content/annotations/2022/12/19/1671461828.md b/brainsteam/content/annotations/2022/12/19/1671461828.md new file mode 100644 index 0000000..a3878d2 --- /dev/null +++ b/brainsteam/content/annotations/2022/12/19/1671461828.md @@ -0,0 +1,77 @@ +--- +date: '2022-12-19T14:57:08' +hypothesis-meta: + created: '2022-12-19T14:57:08.575784+00:00' + document: + title: + - My AI Safety Lecture for UT Effective Altruism + flagged: false + group: __world__ + hidden: false + id: aQ51un-tEe29v2MBjEX6Xw + links: + html: https://hypothes.is/a/aQ51un-tEe29v2MBjEX6Xw + incontext: https://hyp.is/aQ51un-tEe29v2MBjEX6Xw/scottaaronson.blog/?p=6823 + json: https://hypothes.is/api/annotations/aQ51un-tEe29v2MBjEX6Xw + permissions: + admin: + - acct:ravenscroftj@hypothes.is + delete: + - acct:ravenscroftj@hypothes.is + read: + - group:__world__ + update: + - acct:ravenscroftj@hypothes.is + tags: + - explainability + - nlproc + target: + - selector: + - endContainer: /div[2]/div[2]/div[2]/div[1]/p[99] + endOffset: 386 + startContainer: /div[2]/div[2]/div[2]/div[1]/p[99] + startOffset: 0 + type: RangeSelector + - end: 40910 + start: 40524 + type: TextPositionSelector + - exact: "Anyway, we actually have a working prototype of the watermarking scheme,\ + \ built by OpenAI engineer Hendrik Kirchner. It seems to work pretty well\u2014\ + empirically, a few hundred tokens seem to be enough to get a reasonable signal\ + \ that yes, this text came from GPT. In principle, you could even take a\ + \ long text and isolate which parts probably came from GPT and which parts\ + \ probably didn\u2019t." + prefix: 'irst hundred prime numbers). + + + + + ' + suffix: ' + + + + + Now, this can all be defeate' + type: TextQuoteSelector + source: https://scottaaronson.blog/?p=6823 + text: Scott's team hsas already developed a prototype watermarking scheme at OpenAI + and it works pretty well + updated: '2022-12-19T14:57:08.575784+00:00' + uri: https://scottaaronson.blog/?p=6823 + user: acct:ravenscroftj@hypothes.is + user_info: + display_name: James Ravenscroft +in-reply-to: https://scottaaronson.blog/?p=6823 +tags: +- explainability +- nlproc +- hypothesis +type: annotation +url: /annotations/2022/12/19/1671461828 + +--- + + + +
Anyway, we actually have a working prototype of the watermarking scheme, built by OpenAI engineer Hendrik Kirchner. It seems to work pretty well—empirically, a few hundred tokens seem to be enough to get a reasonable signal that yes, this text came from GPT. In principle, you could even take a long text and isolate which parts probably came from GPT and which parts probably didn’t.
Scott's team hsas already developed a prototype watermarking scheme at OpenAI and it works pretty well \ No newline at end of file