diff --git a/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/index.md b/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/index.md index 38de34d..7d59c01 100644 --- a/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/index.md +++ b/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/index.md @@ -35,6 +35,8 @@ tags: - [Defining the Interface with ELI5](#defining-the-interface-with-eli5) - [Getting an Explanation](#getting-an-explanation-1) - [ELI5 and a Remotely Hosted Model / API](#eli5-and-a-remotely-hosted-model--api) + - [Setting up](#setting-up) + - [Building a Remote Model Adapter](#building-a-remote-model-adapter) @@ -355,4 +357,59 @@ Et voila! Hopefully you will get some output that looks like the below: {{
}} -## ELI5 and a Remotely Hosted Model / API \ No newline at end of file + +## ELI5 and a Remotely Hosted Model / API + +This one is quite fun and exciting. Since LIME is model agnostic, we can get an explanation for a remotely hosted model assuming we have access to +the full probability distribution over its labels (and assuming you have enough API credits to train your local model). + +In this example I'm using Huggingface's [inference api](https://api-inference.huggingface.co/docs/python/html/quicktour.html) where they host transformer models on your behalf - you can pay to have your models run on GPUs for higher throughput. I made this guide with the free tier allowance which gives you 30k tokens per month - if you are using LIME with default settings you could easily eat through this whilst generating a single explanation so this is yet again a contrived example that gives you a taster of what is possible. + +### Setting up + +For this part of the tutorial you will need the Python [requests](https://docs.python-requests.org/en/latest/) library and we are also going to make use of [scipy](https://docs.scipy.org). You will also need a huggingface account and you will need to set up your API key as described in the [documentation](https://api-inference.huggingface.co/docs/python/html/quicktour.html). + +### Building a Remote Model Adapter + +Firstly we need to build a model adapter function that allows ELI5 to interface with huggingface's models. + +```python + +import json + +import requests + +MODEL="nlptown/bert-base-multilingual-uncased-sentiment" +API_TOKEN="YOUR API KEY HERE" +API_URL = f"https://api-inference.huggingface.co/models/{MODEL}" +headers = {"Authorization": f"Bearer {API_TOKEN}"} + +def query(payload): + data = json.dumps(payload) + response = requests.request("POST", API_URL, headers=headers, data=data) + return json.loads(response.content.decode("utf-8")) + +def result_to_df(result): + rows = [] + + for result_row in result: + row = {} + for lbl_score in result_row: + row[lbl_score['label']] = lbl_score['score'] + + rows.append(row) + + return pd.DataFrame(rows) + +def remote_model_adapter(texts: List[str]): + + all_scores = [] + + for text in texts: + + data = query(text) + all_scores.extend(result_to_df(data).values) + + return softmax(np.array(all_scores), axis=1) +``` + diff --git a/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/test.ipynb b/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/test.ipynb index fe1b77f..b4d31e6 100644 --- a/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/test.ipynb +++ b/brainsteam/content/posts/2022/01/13-01-painless-explainability-for-text-models-with-eli5/test.ipynb @@ -1182,7 +1182,7 @@ "outputs": [], "source": [ "import json\n", - "\n", + "from scipy.special import softmax\n", "import requests\n", "\n", "MODEL=\"nlptown/bert-base-multilingual-uncased-sentiment\"\n", @@ -1297,35 +1297,440 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 136, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/james/miniconda3/envs/pgesg/lib/python3.7/site-packages/sklearn/base.py:213: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n", - " FutureWarning)\n" - ] - }, { "data": { + "text/html": [ + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " \n", + " y=1 star\n", + " \n", + "\n", + "\n", + " \n", + " (probability 0.001, score -7.683)\n", + "\n", + "top features\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + " Contribution?\n", + " Feature
\n", + " -0.075\n", + " \n", + " <BIAS>\n", + "
\n", + " -7.608\n", + " \n", + " Highlighted in text (sum)\n", + "
\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "

\n", + " the restaurant was amazing, the quality of their food was exceptional. the waiters were so polite.\n", + "

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " \n", + " y=2 stars\n", + " \n", + "\n", + "\n", + " \n", + " (probability 0.021, score -3.995)\n", + "\n", + "top features\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + " Contribution?\n", + " Feature
\n", + " -0.306\n", + " \n", + " <BIAS>\n", + "
\n", + " -3.689\n", + " \n", + " Highlighted in text (sum)\n", + "
\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "

\n", + " the restaurant was amazing, the quality of their food was exceptional. the waiters were so polite.\n", + "

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " \n", + " y=3 stars\n", + " \n", + "\n", + "\n", + " \n", + " (probability 0.858, score 0.965)\n", + "\n", + "top features\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + " Contribution?\n", + " Feature
\n", + " +1.079\n", + " \n", + " Highlighted in text (sum)\n", + "
\n", + " -0.114\n", + " \n", + " <BIAS>\n", + "
\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "

\n", + " the restaurant was amazing, the quality of their food was exceptional. the waiters were so polite.\n", + "

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " \n", + " y=4 stars\n", + " \n", + "\n", + "\n", + " \n", + " (probability 0.016, score -4.281)\n", + "\n", + "top features\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + " Contribution?\n", + " Feature
\n", + " -0.294\n", + " \n", + " <BIAS>\n", + "
\n", + " -3.987\n", + " \n", + " Highlighted in text (sum)\n", + "
\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "

\n", + " the restaurant was amazing, the quality of their food was exceptional. the waiters were so polite.\n", + "

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " \n", + " y=5 stars\n", + " \n", + "\n", + "\n", + " \n", + " (probability 0.104, score -2.343)\n", + "\n", + "top features\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + " Contribution?\n", + " Feature
\n", + " -0.173\n", + " \n", + " <BIAS>\n", + "
\n", + " -2.170\n", + " \n", + " Highlighted in text (sum)\n", + "
\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "

\n", + " the restaurant was amazing, the quality of their food was exceptional. the waiters were so polite.\n", + "

\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + "\n" + ], "text/plain": [ - "TextExplainer(char_based=False,\n", - " clf=SGDClassifier(alpha=0.001, loss='log', penalty='elasticnet',\n", - " random_state=RandomState(MT19937) at 0x7FE4409B88D0),\n", - " n_samples=20, random_state=42,\n", - " sampler=MaskingTextSamplers(random_state=RandomState(MT19937) at 0x7FE4409B88D0,\n", - " sampler_params=None,\n", - " token_pattern='(?u)\\\\b\\\\w+\\\\b',\n", - " weights=array([0.7, 0.3])),\n", - " token_pattern='(?u)\\\\b\\\\w+\\\\b',\n", - " vec=CountVectorizer(ngram_range=(1, 2),\n", - " token_pattern='(?u)\\\\b\\\\w+\\\\b'))" + "" ] }, + "execution_count": 136, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [