{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7a6fce2d-4468-408e-b79d-4a95ef50a06d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import GPT2LMHeadModel, GPT2Tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e20c5ac1-d87e-4c37-a78d-d96fcf1d1519",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_path = r\"C:/Users/IT/Desktop/Advanced NLP/gpt2\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "27035535-83c1-4bed-b003-cd44c14e28e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the tokenizer\n",
    "tokenizer = GPT2Tokenizer.from_pretrained(model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e587a465-ada7-41cd-83b2-7815f8b61e13",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the GPT-2 model\n",
    "model = GPT2LMHeadModel.from_pretrained(model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5810e38e-7363-4a5e-8f10-c0a3ae5301c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test the model\n",
    "from transformers import pipeline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "cf5d3e9b-f111-4c28-b5c6-e5aa1e52b095",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a text generation pipeline\n",
    "text_generator = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c10d7ec4-a53d-4121-b8e8-f91c43a4dbcd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n",
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Artificial Intelligence is a new frontier where the future of technology is at stake... We are at a time when companies need to be smarter and smarter as well to make it more profitable for companies to innovate, innovate and build smarter products and software\" S\n"
     ]
    }
   ],
   "source": [
    "# Test generating text\n",
    "input_text = \"Artificial Intelligence is\"\n",
    "generated_text = text_generator(input_text, max_length=50, num_return_sequences=1)\n",
    "print(generated_text[0]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c8f5185a-0e93-43de-9daf-7c9d4a86c12b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.llms.base import LLM\n",
    "from typing import Optional\n",
    "from pydantic import BaseModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "a4d86571-eadd-4a34-91b9-2605833a554a",
   "metadata": {},
   "outputs": [],
   "source": [
    "class GPT2LangChainWrapper(LLM, BaseModel):\n",
    "    model: object  # Define model as a field\n",
    "    tokenizer: object  # Define tokenizer as a field\n",
    "    max_length: int = 200  # Default value for max_length\n",
    "\n",
    "    def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:\n",
    "        inputs = self.tokenizer.encode(prompt, return_tensors=\"pt\")\n",
    "        outputs = self.model.generate(\n",
    "            inputs,\n",
    "            max_length=self.max_length,\n",
    "            num_return_sequences=1,\n",
    "            pad_token_id=self.tokenizer.eos_token_id,\n",
    "        )\n",
    "        result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "        return result\n",
    "\n",
    "    @property\n",
    "    def _llm_type(self):\n",
    "        return \"custom\"  # Specify the type of LLM being used\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "8e12c891-f315-40df-96bd-643309404356",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import PromptTemplate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "0c27903d-53da-4068-a00e-a2d17fd2b8c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define a simple prompt template\n",
    "template = \"\"\"\n",
    "You are a helpful assistant. Answer the question based on the input below.\n",
    "\n",
    "Input: {input}\n",
    "Answer:\n",
    "\"\"\"\n",
    "\n",
    "prompt = PromptTemplate(input_variables=[\"input\"], template=template)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "214167ad-0621-44fa-b31a-5d744425c49b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\IT\\AppData\\Local\\Temp\\ipykernel_6948\\2148348883.py:5: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 1.0. Use :meth:`~RunnableSequence, e.g., `prompt | llm`` instead.\n",
      "  chain = LLMChain(llm=llm, prompt=prompt)\n",
      "C:\\Users\\IT\\AppData\\Local\\Temp\\ipykernel_6948\\2148348883.py:9: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use :meth:`~invoke` instead.\n",
      "  response = chain.run(input_text)\n",
      "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Response: \n",
      "You are a helpful assistant. Answer the question based on the input below.\n",
      "\n",
      "Input: What is Artificial Intelligence?\n",
      "Answer:\n",
      "\n",
      "The term Artificial Intelligence is used to describe the ability to understand and understand the world around us. It\n"
     ]
    }
   ],
   "source": [
    "from langchain.chains import LLMChain\n",
    "\n",
    "# Create the chain\n",
    "llm = GPT2LangChainWrapper(model=model, tokenizer=tokenizer)\n",
    "chain = LLMChain(llm=llm, prompt=prompt)\n",
    "\n",
    "# Test the chain\n",
    "input_text = \"What is Artificial Intelligence?\"\n",
    "response = chain.run(input_text)\n",
    "print(\"Response:\", response)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "eaa27e88-4ece-4e09-a7d2-54da703a0911",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Summary: \n",
      "You are a summarization bot. Summarize the text below in one sentence.\n",
      "\n",
      "Text:  \n",
      "AI enhancing decision-making processes, and enabling technologies like self-driving cars and voice assistants.\n",
      "\n",
      "Summary:\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Define a prompt for summarization\n",
    "summarization_template = \"\"\"\n",
    "You are a summarization bot. Summarize the text below in one sentence.\n",
    "\n",
    "Text: {text}\n",
    "Summary:\n",
    "\"\"\"\n",
    "summarization_prompt = PromptTemplate(input_variables=[\"text\"], template=summarization_template)\n",
    "\n",
    "# Create the summarization chain\n",
    "summarization_chain = LLMChain(llm=llm, prompt=summarization_prompt)\n",
    "\n",
    "# Test the chain\n",
    "text_to_summarize = \"\"\" \n",
    "AI enhancing decision-making processes, and enabling technologies like self-driving cars and voice assistants.\n",
    "\"\"\"\n",
    "summary = summarization_chain.run(text_to_summarize)\n",
    "print(\"Summary:\", summary)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "6979b576-5da8-412d-b63a-9d33d38b5571",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\IT\\AppData\\Local\\Temp\\ipykernel_6948\\1390734108.py:4: LangChainDeprecationWarning: Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/\n",
      "  memory = ConversationBufferMemory()\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Response 1: \n",
      "You are a helpful assistant. Answer the question based on the input below.\n",
      "\n",
      "Input: What is AI?\n",
      "Answer:\n",
      "\n",
      "AI is a new type of computer that can be programmed to perform tasks that are not possible in human-\n",
      "Response 2: \n",
      "You are a helpful assistant. Answer the question based on the input below.\n",
      "\n",
      "Input: Can you give examples?\n",
      "Answer:\n",
      "\n",
      "Answer:\n",
      "\n",
      "Answer:\n",
      "\n",
      "Answer:\n",
      "\n",
      "Answer:\n",
      "\n",
      "Answer:\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from langchain.memory import ConversationBufferMemory\n",
    "\n",
    "# Add memory to the chain\n",
    "memory = ConversationBufferMemory()\n",
    "chain_with_memory = LLMChain(llm=llm, prompt=prompt, memory=memory)\n",
    "\n",
    "# Test the chain with memory\n",
    "response1 = chain_with_memory.run(\"What is AI?\")\n",
    "response2 = chain_with_memory.run(\"Can you give examples?\")\n",
    "print(\"Response 1:\", response1)\n",
    "print(\"Response 2:\", response2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d209bae-2ffd-4520-b393-a40dcf2888f7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}