abstracts[] |
{'sha1': 'b1ae07184c46b5a5253cf3263c4f383ae85b8d13', 'content': 'We consider a dynamic assortment selection problem where the goal is to offer\na sequence of assortments of cardinality at most K, out of N items, to\nminimize the expected cumulative regret (loss of revenue). The feedback is\ngiven by a multinomial logit (MNL) choice model. This sequential decision\nmaking problem is studied under the MNL contextual bandit framework. The\nexisting algorithms for MNL contexual bandit have frequentist regret guarantees\nas Õ(κ√(T)), where κ is an instance\ndependent constant. κ could be arbitrarily large, e.g. exponentially\ndependent on the model parameters, causing the existing regret guarantees to be\nsubstantially loose. We propose an optimistic algorithm with a carefully\ndesigned exploration bonus term and show that it enjoys\nÕ(√(T)) regret. In our bounds, the κ factor only\naffects the poly-log term and not the leading term of the regret bounds.', 'mimetype': 'text/plain', 'lang': 'en'}
{'sha1': 'a2b0f0d581f87b894b3ad2c78a911124662faae7', 'content': 'We consider a dynamic assortment selection problem where the goal is to offer\na sequence of assortments of cardinality at most $K$, out of $N$ items, to\nminimize the expected cumulative regret (loss of revenue). The feedback is\ngiven by a multinomial logit (MNL) choice model. This sequential decision\nmaking problem is studied under the MNL contextual bandit framework. The\nexisting algorithms for MNL contexual bandit have frequentist regret guarantees\nas $\\tilde{\\mathrm{O}}(\\kappa\\sqrt{T})$, where $\\kappa$ is an instance\ndependent constant. $\\kappa$ could be arbitrarily large, e.g. exponentially\ndependent on the model parameters, causing the existing regret guarantees to be\nsubstantially loose. We propose an optimistic algorithm with a carefully\ndesigned exploration bonus term and show that it enjoys\n$\\tilde{\\mathrm{O}}(\\sqrt{T})$ regret. In our bounds, the $\\kappa$ factor only\naffects the poly-log term and not the leading term of the regret bounds.', 'mimetype': 'application/x-latex', 'lang': 'en'}
|
container |
|
container_id |
|
contribs[] |
{'index': 0, 'creator_id': None, 'creator': None, 'raw_name': 'Priyank Agrawal', 'given_name': None, 'surname': None, 'role': 'author', 'raw_affiliation': None, 'extra': None}
{'index': 1, 'creator_id': None, 'creator': None, 'raw_name': 'Vashist Avadhanula', 'given_name': None, 'surname': None, 'role': 'author', 'raw_affiliation': None, 'extra': None}
{'index': 2, 'creator_id': None, 'creator': None, 'raw_name': 'Theja Tulabandhula', 'given_name': None, 'surname': None, 'role': 'author', 'raw_affiliation': None, 'extra': None}
|
ext_ids |
{'doi': None, 'wikidata_qid': None, 'isbn13': None, 'pmid': None, 'pmcid': None, 'core': None, 'arxiv': '2011.14033v1', 'jstor': None, 'ark': None, 'mag': None, 'doaj': None, 'dblp': None, 'oai': None, 'hdl': None}
|
files[] |
{'state': 'active', 'ident': 'sdobkh5rf5a63mt5xxfq3t2aqq', 'revision': '8dfaae37-6d9e-41b7-99b5-825d81e7574c', 'redirect': None, 'extra': None, 'edit_extra': None, 'size': 784803, 'md5': '7a381106a3f789b92f3f635377c38067', 'sha1': '3cd42135b6625622534811e0ec829cfbdfc22454', 'sha256': '273dec7fec8876a5e8c69477b5f2219d07b68904006f539ca70f929e6e056c8f', 'urls': [{'url': 'https://arxiv.org/pdf/2011.14033v1.pdf', 'rel': 'repository'}, {'url': 'https://web.archive.org/web/20201204011121/https://arxiv.org/pdf/2011.14033v1.pdf', 'rel': 'webarchive'}], 'mimetype': 'application/pdf', 'content_scope': None, 'release_ids': ['abp3jwxqdne77bsizabococ4uy'], 'releases': None}
|
filesets |
[]
|
issue |
|
language |
en
|
license_slug |
ARXIV-1.0
|
number |
|
original_title |
|
pages |
|
publisher |
|
refs |
[]
|
release_date |
2020-11-28
|
release_stage |
submitted
|
release_type |
article
|
release_year |
2020
|
subtitle |
|
title |
Improved Optimistic Algorithm For The Multinomial Logit Contextual Bandit
|
version |
v1
|
volume |
|
webcaptures |
[]
|
withdrawn_date |
|
withdrawn_status |
|
withdrawn_year |
|
work_id |
hcwvigug4faopl4zuw2yfmvk2q
|