import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

# Sample dataset (repeated to simulate larger dataset)
data = {
    'text': ["I am happy", "I hate my life, it's so bad", "what a fantastic moments", "I am bored of this situation, it's time to end", "I am lucky"] * 100,
    'sentiment': [1, 0, 1, 0, 1] * 100  # 1: Positive, 0: Negative
}

df = pd.DataFrame(data)


# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])
padded_sequences = pad_sequences(sequences, maxlen=10)

# Prepare labels
labels = to_categorical(df['sentiment'])
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Build the neural network
model = Sequential()
model.add(Embedding(input_dim=1000, output_dim=64, input_length=10))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')

# New data for prediction
new_texts = ["I love my life", "my life is a trash"]
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_padded_sequences = pad_sequences(new_sequences, maxlen=10)

# Predict and interpret results
predictions = model.predict(new_padded_sequences)
predicted_labels = np.argmax(predictions, axis=1)
label_map = {0: 'negative sentiments', 1: 'Positive sentiments'}
predicted_sentiments = [label_map[label] for label in predicted_labels]

for text, sentiment in zip(new_texts, predicted_sentiments):
    print(f'Text: "{text}" - Sentiment: {sentiment}')

