# Import necessary libraries
import pandas as pd

# Load the dataset
df = pd.read_csv('lobsterland_passholders_dataset.csv')

# Display the first few rows of the dataset to explore the variables
df.head()

# Identify numerical and categorical columns

# Numerical columns
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns.tolist()

# Also include binary variables (like Renewed_Pass) as categorical
binary_columns = [col for col in df.columns if df[col].nunique() == 2 and col not in categorical_columns]
categorical_columns += binary_columns

# Display the results
print("Numerical Variables:")
print(numerical_columns)

print("\nCategorical Variables:")
print(categorical_columns)

Numerical Variables:
['Age', 'Previous_Visits', 'Total_Spend_2024', 'Feedback_Score', 'Gold_Zone_Visits', 'Email_Engagement_Score', 'Distance_From_Park_Miles', 'Renewed_Pass']

Categorical Variables:
['Home_State', 'Preferred_Attraction', 'Referral_Source', 'Dining_Plan', 'Renewed_Pass']

# Use value_counts() to examine the distribution of the outcome variable 'Renewed_Pass'
renewed_counts = df['Renewed_Pass'].value_counts()

# Display the counts
print("Value counts for 'Renewed_Pass':")
print(renewed_counts)

# Also display relative frequencies (percentages)
renewed_percentages = df['Renewed_Pass'].value_counts(normalize=True) * 100
print("\nPercentage distribution:")
print(renewed_percentages)

Value counts for 'Renewed_Pass':
Renewed_Pass
1    971
0     29
Name: count, dtype: int64

Percentage distribution:
Renewed_Pass
1    97.1
0     2.9
Name: proportion, dtype: float64

# Check for missing values in the dataset
missing_values = df.isnull().sum()

# Display only the columns with missing values
missing_values = missing_values[missing_values > 0]
print("Missing values in the dataset:")
print(missing_values)

# Optional: show percentage of missing values
missing_percentage = (df.isnull().mean() * 100).round(2)
missing_percentage = missing_percentage[missing_percentage > 0]
print("\nPercentage of missing values:")
print(missing_percentage)

# Handling missing values: Let's fill missing 'Dining_Plan' with a new category 'Unknown'
df['Dining_Plan'] = df['Dining_Plan'].fillna('Unknown')

Missing values in the dataset:
Dining_Plan    323
dtype: int64

Percentage of missing values:
Dining_Plan    32.3
dtype: float64

# Check for impossible values in numerical columns

# Age should be > 0 and reasonable (e.g., less than 100)
print("Invalid ages:")
print(df[df['Age'] <= 0])

# Previous_Visits should be >= 0
print("\nInvalid Previous_Visits:")
print(df[df['Previous_Visits'] < 0])

# Total_Spend_2024 should be >= 0
print("\nInvalid Total_Spend_2024:")
print(df[df['Total_Spend_2024'] < 0])

# Feedback_Score should be in range 1 to 5
print("\nInvalid Feedback_Score:")
print(df[(df['Feedback_Score'] < 1) | (df['Feedback_Score'] > 5)])

# Gold_Zone_Visits should be >= 0
print("\nInvalid Gold_Zone_Visits:")
print(df[df['Gold_Zone_Visits'] < 0])

# Email_Engagement_Score should be >= 0
print("\nInvalid Email_Engagement_Score:")
print(df[df['Email_Engagement_Score'] < 0])

# Distance_From_Park_Miles should be >= 0
print("\nInvalid Distance_From_Park_Miles:")
print(df[df['Distance_From_Park_Miles'] < 0])

Invalid ages:
Empty DataFrame
Columns: [Age, Previous_Visits, Total_Spend_2024, Feedback_Score, Gold_Zone_Visits, Email_Engagement_Score, Distance_From_Park_Miles, Home_State, Preferred_Attraction, Referral_Source, Dining_Plan, Renewed_Pass]
Index: []

Invalid Previous_Visits:
Empty DataFrame
Columns: [Age, Previous_Visits, Total_Spend_2024, Feedback_Score, Gold_Zone_Visits, Email_Engagement_Score, Distance_From_Park_Miles, Home_State, Preferred_Attraction, Referral_Source, Dining_Plan, Renewed_Pass]
Index: []

Invalid Total_Spend_2024:
     Age  Previous_Visits  Total_Spend_2024  Feedback_Score  Gold_Zone_Visits  \
351   52                4             -3.18        3.991413                 4   
431   42                4            -23.91        3.239964                 6   
459   72                4             -5.35        3.498286                 2   
486   68                5            -33.30        2.646505                 3   
635   34                3             -0.66        3.046109                 1   
745   26                5             -2.23        3.555003                 2   
751   54                6            -39.95        3.142010                 4   

     Email_Engagement_Score  Distance_From_Park_Miles Home_State  \
351                    66.7                      51.2         NJ   
431                    66.8                      32.5         MA   
459                    78.3                      28.7         NH   
486                    42.3                      37.5         NY   
635                    52.9                      53.3         ME   
745                    55.6                      27.8         VT   
751                    58.9                      10.3         NY   

    Preferred_Attraction Referral_Source Dining_Plan  Renewed_Pass  
351        Entertainment        Ad/Other    Upgraded             1  
431                Other    Social Media    Upgraded             1  
459               Thrill        Ad/Other     Unknown             1  
486        Entertainment          Friend    Upgraded             1  
635                Other        Ad/Other    Upgraded             0  
745               Thrill        Ad/Other    Upgraded             1  
751                Other        Ad/Other    Upgraded             1  

Invalid Feedback_Score:
Empty DataFrame
Columns: [Age, Previous_Visits, Total_Spend_2024, Feedback_Score, Gold_Zone_Visits, Email_Engagement_Score, Distance_From_Park_Miles, Home_State, Preferred_Attraction, Referral_Source, Dining_Plan, Renewed_Pass]
Index: []

Invalid Gold_Zone_Visits:
Empty DataFrame
Columns: [Age, Previous_Visits, Total_Spend_2024, Feedback_Score, Gold_Zone_Visits, Email_Engagement_Score, Distance_From_Park_Miles, Home_State, Preferred_Attraction, Referral_Source, Dining_Plan, Renewed_Pass]
Index: []

Invalid Email_Engagement_Score:
     Age  Previous_Visits  Total_Spend_2024  Feedback_Score  Gold_Zone_Visits  \
73    21                4            345.79        2.868348                 1   
200   47                6             86.80        5.000000                 4   

     Email_Engagement_Score  Distance_From_Park_Miles Home_State  \
73                     -8.1                      39.3         ME   
200                   -10.4                       4.5         NH   

    Preferred_Attraction Referral_Source Dining_Plan  Renewed_Pass  
73                Thrill        Ad/Other     Unknown             1  
200                Other        Ad/Other    Upgraded             0  

Invalid Distance_From_Park_Miles:
Empty DataFrame
Columns: [Age, Previous_Visits, Total_Spend_2024, Feedback_Score, Gold_Zone_Visits, Email_Engagement_Score, Distance_From_Park_Miles, Home_State, Preferred_Attraction, Referral_Source, Dining_Plan, Renewed_Pass]
Index: []

# Remove rows with impossible (negative) values in 'Total_Spend_2024'
df = df[df['Total_Spend_2024'] >= 0]

import seaborn as sns
import matplotlib.pyplot as plt

# Select only numeric independent variables (excluding the target 'Renewed_Pass')
numeric_vars = df.drop(columns=['Renewed_Pass']).select_dtypes(include=['int64', 'float64'])

# Compute the correlation matrix
corr_matrix = numeric_vars.corr()

# Display the correlation matrix
print("Correlation matrix:")
print(corr_matrix)

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", square=True)
plt.title('Correlation Matrix of Numeric Variables')
plt.show()

Correlation matrix:
                               Age  Previous_Visits  Total_Spend_2024  \
Age                       1.000000         0.004119          0.044761   
Previous_Visits           0.004119         1.000000          0.010654   
Total_Spend_2024          0.044761         0.010654          1.000000   
Feedback_Score           -0.009485         0.015656          0.040455   
Gold_Zone_Visits          0.022811        -0.008493         -0.011877   
Email_Engagement_Score   -0.019586        -0.004299          0.004505   
Distance_From_Park_Miles  0.030491        -0.008424          0.061097   

                          Feedback_Score  Gold_Zone_Visits  \
Age                            -0.009485          0.022811   
Previous_Visits                 0.015656         -0.008493   
Total_Spend_2024                0.040455         -0.011877   
Feedback_Score                  1.000000         -0.046864   
Gold_Zone_Visits               -0.046864          1.000000   
Email_Engagement_Score         -0.039351         -0.012413   
Distance_From_Park_Miles       -0.041179          0.008047   

                          Email_Engagement_Score  Distance_From_Park_Miles  
Age                                    -0.019586                  0.030491  
Previous_Visits                        -0.004299                 -0.008424  
Total_Spend_2024                        0.004505                  0.061097  
Feedback_Score                         -0.039351                 -0.041179  
Gold_Zone_Visits                       -0.012413                  0.008047  
Email_Engagement_Score                  1.000000                 -0.085527  
Distance_From_Park_Miles               -0.085527                  1.000000

# Identify categorical variables (excluding the target variable 'Renewed_Pass')
categorical_vars = ['Home_State', 'Preferred_Attraction', 'Referral_Source', 'Dining_Plan']

# Perform one-hot encoding with drop_first=True to avoid dummy variable trap
df_encoded = pd.get_dummies(df, columns=categorical_vars, drop_first=True)

# Display first few rows of the encoded dataset
df_encoded.head()

from sklearn.model_selection import train_test_split

# Use 29 as the seed — meaningful personal date
random_seed = 29

# 60% training, 40% test
train_df, test_df = train_test_split(df_encoded, test_size=0.4, random_state=random_seed)

# Display the sizes
print(f"Training set size: {len(train_df)} rows")
print(f"Test set size: {len(test_df)} rows")

Training set size: 595 rows
Test set size: 398 rows

# Compare mean values of all features grouped by Renewed_Pass
grouped_means = df_encoded.groupby('Renewed_Pass').mean(numeric_only=True)

# Display the result
grouped_means.T.sort_index()

import statsmodels.api as sm

# Separate features and target
X_train = train_df.drop(columns=['Renewed_Pass'])
y_train = train_df['Renewed_Pass']

# Add constant (intercept)
X_train_const = sm.add_constant(X_train)

# Fix data types
X_train_const = X_train_const.astype(float)

# Fit the model
logit_model = sm.Logit(y_train, X_train_const)
result = logit_model.fit()

# Show results
result.summary()

Optimization terminated successfully.
         Current function value: 0.093290
         Iterations 9

import statsmodels.api as sm

# Define reduced feature set for Iteration #2
reduced_columns = [
    'Total_Spend_2024',
    'Feedback_Score',
    'Home_State_VT',
    'Preferred_Attraction_Other',
    'Preferred_Attraction_Thrill',
    'Dining_Plan_Upgraded'
]

# Prepare training data
X_train_reduced = train_df[reduced_columns]
y_train = train_df['Renewed_Pass']

# Add constant
X_train_reduced_const = sm.add_constant(X_train_reduced)
X_train_reduced_const = X_train_reduced_const.astype(float)

# Fit model
logit_model_2 = sm.Logit(y_train, X_train_reduced_const)
result_2 = logit_model_2.fit()

# Show summary
result_2.summary()

Optimization terminated successfully.
         Current function value: 0.096400
         Iterations 9

selected_features = [
    'Total_Spend_2024',
    'Feedback_Score',
    'Home_State_VT',
    'Preferred_Attraction_Other',
    'Preferred_Attraction_Thrill',
    'Dining_Plan_Upgraded'
]

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Define X and y for train and test sets
X_train_final = train_df[selected_features]
y_train_final = train_df['Renewed_Pass']

X_test_final = test_df[selected_features]
y_test_final = test_df['Renewed_Pass']

# Optional: scale features (especially useful for some algorithms)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_final)
X_test_scaled = scaler.transform(X_test_final)

# Build logistic regression model
clf = LogisticRegression(random_state=29)
clf.fit(X_train_scaled, y_train_final)

LogisticRegression(random_state=29)

LogisticRegression(random_state=29)

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, balanced_accuracy_score

# Predict on the test set
y_pred = clf.predict(X_test_scaled)

# Build confusion matrix
cm = confusion_matrix(y_test_final, y_pred)
tn, fp, fn, tp = cm.ravel()

print("Confusion Matrix:")
print(cm)

# Accuracy
accuracy = accuracy_score(y_test_final, y_pred)

# Sensitivity (Recall for class 1)
sensitivity = recall_score(y_test_final, y_pred)

# Specificity = TN / (TN + FP)
specificity = tn / (tn + fp)

# Precision = TP / (TP + FP)
precision = precision_score(y_test_final, y_pred)

# Balanced accuracy = average of sensitivity and specificity
balanced_acc = balanced_accuracy_score(y_test_final, y_pred)

# Display metrics
print(f"\nAccuracy: {accuracy:.4f}")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Balanced Accuracy: {balanced_acc:.4f}")

Confusion Matrix:
[[  0  12]
 [  0 386]]

Accuracy: 0.9698
Sensitivity (Recall): 1.0000
Specificity: 0.0000
Precision: 0.9698
Balanced Accuracy: 0.5000

# Accuracy on training set
train_preds = clf.predict(X_train_scaled)
train_accuracy = accuracy_score(y_train_final, train_preds)

# Accuracy on test set (уже рассчитан ранее)
test_accuracy = accuracy_score(y_test_final, y_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Training Accuracy: 0.9731
Test Accuracy: 0.9698

selected_features = [
    'Total_Spend_2024',
    'Feedback_Score',
    'Home_State_VT',
    'Preferred_Attraction_Other',
    'Preferred_Attraction_Thrill',
    'Dining_Plan_Upgraded'
]

import pandas as pd

# Create a made-up passholder
new_passholder = pd.DataFrame([{
    'Total_Spend_2024': 275.00,       # high spending
    'Feedback_Score': 4.2,            # good experience
    'Home_State_VT': 0,               # not from Vermont
    'Preferred_Attraction_Other': 0,  # not other
    'Preferred_Attraction_Thrill': 1, # loves thrill
    'Dining_Plan_Upgraded': 1         # upgraded plan
}])

# Scale the input using the same scaler
new_scaled = scaler.transform(new_passholder)

# Predict class
prediction = clf.predict(new_scaled)[0]

# Predict probability
probability = clf.predict_proba(new_scaled)[0][1]  # Probability of class 1

print(f"Prediction (1 = Renew): {prediction}")
print(f"Probability of renewal: {probability:.4f}")

Prediction (1 = Renew): 1
Probability of renewal: 0.9984

# Made-up customer with values far outside the dataset's normal range
extreme_passholder = pd.DataFrame([{
    'Total_Spend_2024': 10000,     # extremely high spend
    'Feedback_Score': -5.0,        # invalid low feedback
    'Home_State_VT': 0,
    'Preferred_Attraction_Other': 1,
    'Preferred_Attraction_Thrill': 0,
    'Dining_Plan_Upgraded': 1
}])

# Scale and predict
extreme_scaled = scaler.transform(extreme_passholder)
extreme_pred = clf.predict(extreme_scaled)[0]
extreme_prob = clf.predict_proba(extreme_scaled)[0][1]

print(f"Prediction (1 = Renew): {extreme_pred}")
print(f"Probability of renewal: {extreme_prob:.4f}")

Prediction (1 = Renew): 1
Probability of renewal: 1.0000

# Step 1: Read the original dataset again
df_raw = pd.read_csv('lobsterland_passholders_dataset.csv')

# Step 2: Handle missing values
# Fill missing Dining_Plan values with 'Unknown'
df_raw['Dining_Plan'] = df_raw['Dining_Plan'].fillna('Unknown')

# Step 3: Handle impossible values
# Remove rows where Total_Spend_2024 is negative
df_cleaned = df_raw[df_raw['Total_Spend_2024'] >= 0]

# Identify categorical columns (excluding target)
categorical_vars = ['Home_State', 'Preferred_Attraction', 'Referral_Source', 'Dining_Plan']

# Dummify without dropping any levels
df_dummified = pd.get_dummies(df_cleaned, columns=categorical_vars, drop_first=False)

# View the resulting DataFrame
df_dummified.head()

from sklearn.model_selection import train_test_split

# Separate target and features
X = df_dummified.drop(columns=['Renewed_Pass'])
y = df_dummified['Renewed_Pass']

# Re-partition the data (60% train, 40% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=29
)

# Show sizes
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

Training set size: 595
Test set size: 398

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Define base model
rf = RandomForestClassifier(random_state=29)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2']
}

# Setup GridSearchCV
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

# Fit on training data
grid_search.fit(X_train, y_train)

# Best model
best_rf = grid_search.best_estimator_

# Predict on test set
y_pred_rf = best_rf.predict(X_test)

# Evaluate accuracy
rf_accuracy = accuracy_score(y_test, y_pred_rf)

# Output
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Set Accuracy: {rf_accuracy:.4f}")

Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Test Set Accuracy: 0.9698

{
    'n_estimators': 100,
    'max_depth': None,
    'max_features': 'sqrt',
    'min_samples_split': 2,
    'min_samples_leaf': 1
}

Model Performance:
Test Set Accuracy: 96.98%

The model performs well and matches the accuracy of the logistic regression model, but likely benefits from the ability of random forests to handle complex interactions and nonlinear relationships.

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, balanced_accuracy_score

# Predict using the best Random Forest model
y_pred_rf = best_rf.predict(X_test)

# Confusion matrix
cm_rf = confusion_matrix(y_test, y_pred_rf)
tn, fp, fn, tp = cm_rf.ravel()

print("Confusion Matrix:")
print(cm_rf)

# Accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)

# Sensitivity (Recall for class 1)
sensitivity_rf = recall_score(y_test, y_pred_rf)

# Specificity = TN / (TN + FP)
specificity_rf = tn / (tn + fp)

# Precision = TP / (TP + FP)
precision_rf = precision_score(y_test, y_pred_rf)

# Balanced accuracy
balanced_acc_rf = balanced_accuracy_score(y_test, y_pred_rf)

# Display all metrics
print(f"\nAccuracy: {accuracy_rf:.4f}")
print(f"Sensitivity (Recall): {sensitivity_rf:.4f}")
print(f"Specificity: {specificity_rf:.4f}")
print(f"Precision: {precision_rf:.4f}")
print(f"Balanced Accuracy: {balanced_acc_rf:.4f}")

Confusion Matrix:
[[  0  12]
 [  0 386]]

Accuracy: 0.9698
Sensitivity (Recall): 1.0000
Specificity: 0.0000
Precision: 0.9698
Balanced Accuracy: 0.5000

# Accuracy on training set
y_train_pred_rf = best_rf.predict(X_train)
train_accuracy_rf = accuracy_score(y_train, y_train_pred_rf)

# Accuracy on test set (уже есть)
print(f"Training Accuracy: {train_accuracy_rf:.4f}")
print(f"Test Accuracy: {accuracy_rf:.4f}")

Training Accuracy: 1.0000
Test Accuracy: 0.9698

# Step 1: Create a DataFrame with all columns from training data, filled with zeros
new_passholder_full = pd.DataFrame(0, index=[0], columns=X_train.columns)

# Step 2: Set realistic values for numeric variables
new_passholder_full['Age'] = 35                      # Example age
new_passholder_full['Previous_Visits'] = 3           # Example visits
new_passholder_full['Total_Spend_2024'] = 275.00     # High spending
new_passholder_full['Feedback_Score'] = 4.2          # Positive feedback
new_passholder_full['Gold_Zone_Visits'] = 2          # Example visits
new_passholder_full['Email_Engagement_Score'] = 60   # Example engagement
new_passholder_full['Distance_From_Park_Miles'] = 25 # Example distance

# Step 3: Set categorical variables explicitly
new_passholder_full['Home_State_VT'] = 0                  # Not from Vermont
new_passholder_full['Home_State_MA'] = 1                  # From Massachusetts
new_passholder_full['Preferred_Attraction_Thrill'] = 1    # Prefers thrill attractions
new_passholder_full['Preferred_Attraction_Entertainment'] = 0  # Does not prefer entertainment
new_passholder_full['Preferred_Attraction_Other'] = 0         # Not 'Other'
new_passholder_full['Referral_Source_Social Media'] = 1       # Referred by Social Media
new_passholder_full['Dining_Plan_Upgraded'] = 1               # Dining plan upgraded

# Step 4: Predict using the Random Forest model
prediction_rf = best_rf.predict(new_passholder_full)[0]

# Step 5: Probability of renewal
probability_rf = best_rf.predict_proba(new_passholder_full)[0][1]

# Output results
print(f"Prediction (1 = Renew): {prediction_rf}")
print(f"Probability of renewal: {probability_rf:.4f}")

Prediction (1 = Renew): 1
Probability of renewal: 1.0000

from IPython.display import Image, display

display(Image(filename='dashboard.png'))

Renewed_Pass	0	1
Age	44.678571	46.267358
Dining_Plan_Upgraded	0.642857	0.676684
Distance_From_Park_Miles	29.428571	31.264041
Email_Engagement_Score	50.589286	49.206114
Feedback_Score	2.835381	3.448301
Gold_Zone_Visits	2.107143	2.029016
Home_State_ME	0.107143	0.157513
Home_State_NH	0.107143	0.153368
Home_State_NJ	0.142857	0.163731
Home_State_NY	0.214286	0.169948
Home_State_VT	0.392857	0.165803
Preferred_Attraction_Other	0.571429	0.425907
Preferred_Attraction_Thrill	0.250000	0.377202
Previous_Visits	4.642857	4.977202
Referral_Source_Friend	0.142857	0.174093
Referral_Source_Social Media	0.214286	0.203109
Total_Spend_2024	161.072857	250.191492

Dep. Variable:	Renewed_Pass	No. Observations:	595
Model:	Logit	Df Residuals:	577
Method:	MLE	Df Model:	17
Date:	Sat, 29 Mar 2025	Pseudo R-squ.:	0.2462
Time:	12:02:31	Log-Likelihood:	-55.508
converged:	True	LL-Null:	-73.638
Covariance Type:	nonrobust	LLR p-value:	0.004232

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	-1.8133	2.312	-0.784	0.433	-6.344	2.717
Age	0.0005	0.017	0.027	0.978	-0.032	0.033
Previous_Visits	0.0515	0.122	0.423	0.672	-0.187	0.290
Total_Spend_2024	0.0147	0.004	3.756	0.000	0.007	0.022
Feedback_Score	0.7384	0.315	2.346	0.019	0.122	1.355
Gold_Zone_Visits	-0.1032	0.185	-0.559	0.576	-0.465	0.259
Email_Engagement_Score	0.0054	0.015	0.357	0.721	-0.024	0.035
Distance_From_Park_Miles	0.0097	0.017	0.573	0.566	-0.023	0.043
Home_State_ME	-0.2749	1.455	-0.189	0.850	-3.126	2.577
Home_State_NH	-0.3142	1.454	-0.216	0.829	-3.164	2.535
Home_State_NJ	-1.2240	1.263	-0.969	0.333	-3.700	1.252
Home_State_NY	-1.2570	1.165	-1.079	0.281	-3.540	1.026
Home_State_VT	-2.4701	1.123	-2.200	0.028	-4.671	-0.270
Preferred_Attraction_Other	0.1490	0.658	0.227	0.821	-1.140	1.438
Preferred_Attraction_Thrill	1.3398	0.818	1.638	0.101	-0.264	2.943
Referral_Source_Friend	-0.0265	0.754	-0.035	0.972	-1.503	1.450
Referral_Source_Social Media	-0.5496	0.770	-0.714	0.475	-2.058	0.959
Dining_Plan_Upgraded	0.7144	0.636	1.123	0.262	-0.533	1.962

Dep. Variable:	Renewed_Pass	No. Observations:	595
Model:	Logit	Df Residuals:	588
Method:	MLE	Df Model:	6
Date:	Sat, 29 Mar 2025	Pseudo R-squ.:	0.2211
Time:	12:02:31	Log-Likelihood:	-57.358
converged:	True	LL-Null:	-73.638
Covariance Type:	nonrobust	LLR p-value:	1.273e-05

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	-1.7172	1.389	-1.236	0.216	-4.439	1.005
Total_Spend_2024	0.0141	0.004	3.769	0.000	0.007	0.021
Feedback_Score	0.6804	0.288	2.362	0.018	0.116	1.245
Home_State_VT	-1.6841	0.578	-2.916	0.004	-2.816	-0.552
Preferred_Attraction_Other	0.1913	0.638	0.300	0.764	-1.059	1.442
Preferred_Attraction_Thrill	1.3741	0.794	1.730	0.084	-0.182	2.931
Dining_Plan_Upgraded	0.5579	0.580	0.962	0.336	-0.579	1.695

	Age	Previous_Visits	Total_Spend_2024	Feedback_Score	Gold_Zone_Visits	Email_Engagement_Score	Distance_From_Park_Miles	Home_State	Preferred_Attraction	Referral_Source	Dining_Plan	Renewed_Pass
0	56	4	263.74	3.341462	2	94.9	13.9	VT	Thrill	Social Media	NaN	1
1	69	2	541.82	2.581981	1	28.2	28.5	NY	Other	Friend	Upgraded	1
2	46	3	231.59	3.592377	3	46.3	41.2	MA	Other	Ad/Other	NaN	1
3	32	5	136.98	1.935378	0	56.7	20.7	NH	Thrill	Friend	Upgraded	1
4	60	3	277.30	3.643427	4	95.6	45.3	ME	Thrill	Social Media	Upgraded	1

	Predicted 0	Predicted 1
Actual 0 (No)	0	12
Actual 1 (Yes)	0	386

	Predicted 0	Predicted 1
Actual 0 (No)	0	12
Actual 1 (Yes)	0	386