Add 'brainsteam/content/annotations/2022/11/28/1669635285.md'
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
This commit is contained in:
parent
e9cc6f1886
commit
7e37d7de78
|
@ -0,0 +1,62 @@
|
||||||
|
---
|
||||||
|
date: '2022-11-28T11:34:45'
|
||||||
|
hypothesis-meta:
|
||||||
|
created: '2022-11-28T11:34:45.963292+00:00'
|
||||||
|
document:
|
||||||
|
title:
|
||||||
|
- 1809.09672.pdf
|
||||||
|
flagged: false
|
||||||
|
group: __world__
|
||||||
|
hidden: false
|
||||||
|
id: qMPVfG8QEe2WJWufCDu9ww
|
||||||
|
links:
|
||||||
|
html: https://hypothes.is/a/qMPVfG8QEe2WJWufCDu9ww
|
||||||
|
incontext: https://hyp.is/qMPVfG8QEe2WJWufCDu9ww/arxiv.org/pdf/1809.09672.pdf
|
||||||
|
json: https://hypothes.is/api/annotations/qMPVfG8QEe2WJWufCDu9ww
|
||||||
|
permissions:
|
||||||
|
admin:
|
||||||
|
- acct:ravenscroftj@hypothes.is
|
||||||
|
delete:
|
||||||
|
- acct:ravenscroftj@hypothes.is
|
||||||
|
read:
|
||||||
|
- group:__world__
|
||||||
|
update:
|
||||||
|
- acct:ravenscroftj@hypothes.is
|
||||||
|
tags:
|
||||||
|
- rl
|
||||||
|
- bandit
|
||||||
|
- nlproc
|
||||||
|
- summarization
|
||||||
|
target:
|
||||||
|
- selector:
|
||||||
|
- end: 10089
|
||||||
|
start: 9945
|
||||||
|
type: TextPositionSelector
|
||||||
|
- exact: andit is a decision-making formal-ization in which an agent repeatedly
|
||||||
|
chooses oneof several actions, and receives a reward based onthis choice.
|
||||||
|
prefix: dient reinforcementlearning. A b
|
||||||
|
suffix: " The agent\u2019s goal is to quickly "
|
||||||
|
type: TextQuoteSelector
|
||||||
|
source: https://arxiv.org/pdf/1809.09672.pdf
|
||||||
|
text: 'Definition for contextual bandit: an agent that repeatedly choses one of
|
||||||
|
several actions and receives a reward based on this choice.'
|
||||||
|
updated: '2022-11-28T11:34:45.963292+00:00'
|
||||||
|
uri: https://arxiv.org/pdf/1809.09672.pdf
|
||||||
|
user: acct:ravenscroftj@hypothes.is
|
||||||
|
user_info:
|
||||||
|
display_name: James Ravenscroft
|
||||||
|
in-reply-to: https://arxiv.org/pdf/1809.09672.pdf
|
||||||
|
tags:
|
||||||
|
- rl
|
||||||
|
- bandit
|
||||||
|
- nlproc
|
||||||
|
- summarization
|
||||||
|
- hypothesis
|
||||||
|
type: annotation
|
||||||
|
url: /annotations/2022/11/28/1669635285
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<blockquote>andit is a decision-making formal-ization in which an agent repeatedly chooses oneof several actions, and receives a reward based onthis choice.</blockquote>Definition for contextual bandit: an agent that repeatedly choses one of several actions and receives a reward based on this choice.
|
Loading…
Reference in New Issue