Pandas

Mon 21 July 2025

import pandas as pd  
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import streamlit as st
data = pd.read_csv('C:/Users/HP/Desktop/OIB-SIP/emailspam/dataset-mail/spam.csv', encoding='latin-1')
print("Original Shape:", data.shape)
print(data.head())
data = data[['v1', 'v2']]
data.drop_duplicates(inplace=True)
data.dropna(inplace=True)
print("\nCleaned Shape:", data.shape)
data['v1'] = data['v1'].replace(['ham', 'spam'], ['Not Spam', 'Spam'])
data.rename(columns={'v1': 'Category', 'v2': 'Message'}, inplace=True)
print("\nLabel Distribution:\n", data['Category'].value_counts())
mess_train, mess_test, cat_train, cat_test = train_test_split(mess, cat, test_size=0.2, random_state=0, stratify=cat)
cv = CountVectorizer(stop_words='english')
features_train = cv.fit_transform(mess_train)
features_test = cv.transform(mess_test)
model = MultinomialNB()
model.fit(features_train, cat_train)
print("Accuracy:", model.score(features_test, cat_test))
def predict(message):
  message = cv.transform([message]).toarray()
  result = model.predict(message)
  return result
st.header("Email Spam Detector")
output = predict("WINNER!! This is the secret code to unlock the money: C3421.")
print(output)
input_message=st.text_input("Enter a message")
if st.button('Go'):
    output=predict(input_message)
    st.text(output)
else:
    pass


Score: 20

Category: basics