import evident 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from evident.plotting import plot_power_curve

# URL to the metadata file in the evident GitHub repository
metadata_url = "https://raw.githubusercontent.com/biocore/evident/9538cc2b6d736dfb2b0a6db5e2cac00be701dda0/evident/tests/data/metadata.tsv" # Raw equivalent URL

# Read the metadata file
metadata = pd.read_table(metadata_url, sep="\t", index_col=0)

# Display top rows of the metadata
print(metadata.head())

                   bmi  body_habitat  body_product     body_site calprotectin  \
1629.SubjectIBD001  25  UBERON:feces  UBERON:feces  UBERON:feces         63.0   
1629.SubjectIBD002  25  UBERON:feces  UBERON:feces  UBERON:feces        212.0   
1629.SubjectIBD003  25  UBERON:feces  UBERON:feces  UBERON:feces        258.0   
1629.SubjectIBD004  25  UBERON:feces  UBERON:feces  UBERON:feces        499.0   
1629.SubjectIBD005  25  UBERON:feces  UBERON:feces  UBERON:feces        104.0   

                                              cd_behavior   cd_location  \
1629.SubjectIBD001  Non-stricturing, non-penetrating (B1)  Colonic (L2)   
1629.SubjectIBD002  Non-stricturing, non-penetrating (B1)  Colonic (L2)   
1629.SubjectIBD003  Non-stricturing, non-penetrating (B1)  Colonic (L2)   
1629.SubjectIBD004  Non-stricturing, non-penetrating (B1)  Colonic (L2)   
1629.SubjectIBD005  Non-stricturing, non-penetrating (B1)  Colonic (L2)   

                   cd_resection collection_timestamp    description  ...  \
1629.SubjectIBD001           no           06/17/2010  SubjectIBD001  ...   
1629.SubjectIBD002           no           09/16/2010  SubjectIBD002  ...   
1629.SubjectIBD003           no           01/04/2011  SubjectIBD003  ...   
1629.SubjectIBD004           no           04/07/2011  SubjectIBD004  ...   
1629.SubjectIBD005           no           06/28/2011  SubjectIBD005  ...   

                   sample_type       scientific_name   sex study study_id  \
1629.SubjectIBD001       stool  human gut metagenome  male   IBD     1629   
1629.SubjectIBD002       stool  human gut metagenome  male   IBD     1629   
1629.SubjectIBD003       stool  human gut metagenome  male   IBD     1629   
1629.SubjectIBD004       stool  human gut metagenome  male   IBD     1629   
1629.SubjectIBD005       stool  human gut metagenome  male   IBD     1629   

                   timepoint       uc_extent year_diagnosed   faith_pd  \
1629.SubjectIBD001         2  not applicable           1988   9.798095   
1629.SubjectIBD002         3  not applicable           1988  12.967986   
1629.SubjectIBD003         4  not applicable           1988  18.984788   
1629.SubjectIBD004         5  not applicable           1988  12.328921   
1629.SubjectIBD005         6  not applicable           1988  11.603600   

                   classification  
1629.SubjectIBD001             B1  
1629.SubjectIBD002             B1  
1629.SubjectIBD003             B1  
1629.SubjectIBD004             B1  
1629.SubjectIBD005             B1  

[5 rows x 40 columns]

faith_pd = metadata["faith_pd"]

faith_pd.describe()

count    220.000000
mean      11.472170
std        4.144242
min        3.652244
25%        7.793526
50%       11.341452
75%       14.304098
max       22.203336
Name: faith_pd, dtype: float64

# Draw histrogram of faith_pd with kernal density estimate (KDE)
plt.hist(faith_pd, bins=20, density=True)
faith_pd.plot(kind='kde')
plt.xlabel("Faith's PD")
plt.ylabel("Density")
plt.show()

adh = evident.UnivariateDataHandler(faith_pd, metadata)

metadata.groupby('classification')["faith_pd"].agg(["count", "mean", "std"]).round(2)

print(f"The effect size is: {adh.calculate_effect_size(column='classification').effect_size}")

The effect size is: 1.0311033633149984

# Define alpha levels and observation counts for power analysis
alpha_vals = [0.01, 0.05, 0.1]
obs_vals = np.arange(10, 101, step=10)  # 10 to 100 observations

# Perform power analysis
results = adh.power_analysis(
    "classification",
    alpha=alpha_vals,
    total_observations=obs_vals
)

# Tranform results to a DataFrame for easier viewing
df_results = results.to_dataframe()

print(df_results.head())

   alpha  total_observations     power  effect_size difference    metric  \
0   0.01                  10  0.104221     1.031103       None  cohens_d   
1   0.05                  10  0.301049     1.031103       None  cohens_d   
2   0.10                  10  0.439851     1.031103       None  cohens_d   
3   0.01                  20  0.315771     1.031103       None  cohens_d   
4   0.05                  20  0.587731     1.031103       None  cohens_d   

           column  
0  classification  
1  classification  
2  classification  
3  classification  
4  classification

plot_power_curve(results, target_power=0.8, style="alpha", markers=True)
plt.show()

Introduction¶

Step 1: Define the reseach question¶

Step 2: Choose the primary outcome¶

Step 3: Specify the statistics test¶

Step 4: Specify effect size¶

Step 5: Specify design parameters¶

Step 6: Perform the power analysis¶

6.1 Install and import required packages¶

6.2 Load metadata and alpha diversity values¶

6.3 Subset Faith's PD and assess distribution¶

6.4 Create a UnivariateDataHandler object¶

6.5 Compare group-level summary statistics¶

6.6 Calculate effect size (Cohen's d)¶

6.7 Perform power analysis across sample sizes and alpha level¶

6.8 Examine tabular results¶

6.9 Plot power curves¶

Step 7. Interpret results¶

	count	mean	std
classification
B1	99	13.57	3.46
Non-B1	121	9.76	3.87