import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Sample sentence
sentence = "Welcome to NLP Course, NLTK is a leading platform for building Python programs to work with human language data"

# Tokenize the sentence into words
tokens = word_tokenize(sentence)

# Remove stop words
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

# Perform stemming
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]

# Print the original tokens, filtered tokens, and stemmed tokens
print("Original tokens:", tokens)
print("Filtered tokens (without stop words):", filtered_tokens)
print("Stemmed tokens:", stemmed_tokens)

import nltk
from nltk.tokenize import sent_tokenize

# Sample text
text = "NLTK is a leading platform for building. Python programs to work with human language data. Python is great. I love AI"

# Sentence Tokenization
sentences = sent_tokenize(text)
print("Sentence Tokenization:")
print(sentences)


# Sample text
text = "NLTK is a leading platform for building Python programs to work with human language data."

# Character Tokenization
characters = list(text)
print("\nCharacter Tokenization:")
print(characters)
