Add 'brainsteam/content/annotations/2022/11/28/1669635285.md'

2022-11-28 11:45:15 +00:00 · 2022-11-28 11:45:15 +00:00 · 7e37d7de78
parent e9cc6f1886
commit 7e37d7de78
1 changed files with 62 additions and 0 deletions
--- a/brainsteam/content/annotations/2022/11/28/1669635285.md
+++ b/brainsteam/content/annotations/2022/11/28/1669635285.md
@ -0,0 +1,62 @@
+---
+date: '2022-11-28T11:34:45'
+hypothesis-meta:
+  created: '2022-11-28T11:34:45.963292+00:00'
+  document:
+    title:
+    - 1809.09672.pdf
+  flagged: false
+  group: __world__
+  hidden: false
+  id: qMPVfG8QEe2WJWufCDu9ww
+  links:
+    html: https://hypothes.is/a/qMPVfG8QEe2WJWufCDu9ww
+    incontext: https://hyp.is/qMPVfG8QEe2WJWufCDu9ww/arxiv.org/pdf/1809.09672.pdf
+    json: https://hypothes.is/api/annotations/qMPVfG8QEe2WJWufCDu9ww
+  permissions:
+    admin:
+    - acct:ravenscroftj@hypothes.is
+    delete:
+    - acct:ravenscroftj@hypothes.is
+    read:
+    - group:__world__
+    update:
+    - acct:ravenscroftj@hypothes.is
+  tags:
+  - rl
+  - bandit
+  - nlproc
+  - summarization
+  target:
+  - selector:
+    - end: 10089
+      start: 9945
+      type: TextPositionSelector
+    - exact: andit is a decision-making formal-ization in which an agent repeatedly
+        chooses oneof several actions, and receives a reward based onthis choice.
+      prefix: dient reinforcementlearning. A b
+      suffix: " The agent\u2019s goal is to quickly "
+      type: TextQuoteSelector
+    source: https://arxiv.org/pdf/1809.09672.pdf
+  text: 'Definition for contextual bandit: an agent that repeatedly choses one of
+    several actions and receives a reward based on this choice.'
+  updated: '2022-11-28T11:34:45.963292+00:00'
+  uri: https://arxiv.org/pdf/1809.09672.pdf
+  user: acct:ravenscroftj@hypothes.is
+  user_info:
+    display_name: James Ravenscroft
+in-reply-to: https://arxiv.org/pdf/1809.09672.pdf
+tags:
+- rl
+- bandit
+- nlproc
+- summarization
+- hypothesis
+type: annotation
+url: /annotations/2022/11/28/1669635285
+
+---
+
+
+
+ <blockquote>andit is a decision-making formal-ization in which an agent repeatedly chooses oneof several actions, and receives a reward based onthis choice.</blockquote>Definition for contextual bandit: an agent that repeatedly choses one of several actions and receives a reward based on this choice.