Let's say I have data of a chemistry expriment: I have a sample of 11 probe coming from comparable material (ID01 to ID11, the random effect). I make process them with 2 different reactants (reactant A and Reactant B, 2 different fixed effect). There are 10 difference concentrations of reactant A and 3 differents concentrations of reactant B. I finally read a measurement and want to determine the effect of reactant A and reactant B. The assumption is that the concentration of reactant A has an effect on the measurement, whereas reactant B does not. I simulated data accordingly. In the results, it seem though that there is also an effect of reactant B.
Here are my questions:
- Why is there a difference of effect with different reactant B.
- Is there a most simple way of integrating all the dataset and having one answer showing the presence of effect of reactant A on the whole dataset and the absence of effect of reactant B on the whole dataset?
- Is there a way to simplify and code everything in Python (instead of calling R from python with pyper package)?
Here is the code
from pyper import *
import numpy as np
import pandas as pd
import pdb
import copy
scaling = 10
increment = 2
dim1 = 11
dim2 = 10
numNA = 8
def createList00_to_XX(dim, string):
Alist = []
for idx1 in range(0,dim):
if idx1 < 9:
Alist.append(string + "0" + str(idx1 + 1))
if idx1 >= 9 and idx1 < 100:
Alist.append(string + str(idx1 + 1))
return Alist
listID = createList00_to_XX(dim1, "ID")
listReac_A_C = createList00_to_XX(dim2, "Reac_A_C")
def genData(vec, scaling, dim2):
data = np.array([])
data = np.empty((dim1,dim2))
data[:] = np.nan
for i in range(0,dim1):
data[i,:] = vec + (i * increment) + np.random.rand(1,dim2).reshape(-1)*scaling * 5
df = pd.DataFrame(
data,
index = listID)
for nna in range(0,numNA):
df.iloc[np.random.randint(0,dim1),np.random.randint(0,dim2)] = np.nan
df.columns = listReac_A_C
return df
def compareReac_A(df_Reac_B):
selectedreac_A_C01 = copy.copy(df_Reac_B.iloc[:,0].values)
selectedreac_A_C05 = copy.copy(df_Reac_B.iloc[:,4].values)
selectedreac_A_C10 = copy.copy(df_Reac_B.iloc[:,9].values)
r = R(use_pandas=True)
r.assign('selectedreac_A_C01', selectedreac_A_C01)
r.assign('selectedreac_A_C05', selectedreac_A_C05)
r.assign('selectedreac_A_C10', selectedreac_A_C10)
expr1 = """
library(broom)
library(multcomp)
library(lme4)
sizet = 11
ID = rep(c('ID1','ID2','ID3','ID4','ID5','ID6','ID7','ID8','ID9', 'ID10', 'ID11'), 3)
steps = c(rep("reac_A_C01", sizet), rep("reac_A_C05", sizet), rep("reac_A_C10", sizet))
selectedreacs = c(selectedreac_A_C01, selectedreac_A_C05, selectedreac_A_C10)
df = data.frame(ID, steps, selectedreacs)
selectedreacs_lmm_mod = lmer(selectedreacs ~ steps + (1|ID), data = df)
selectedreacs_lmm_mod = glht(selectedreacs_lmm_mod, linfct = mcp(steps = "Tukey"))
selectedreacs_lmm_mod = summary(selectedreacs_lmm_mod)
selectedreacs_lmm_mod = tidy(selectedreacs_lmm_mod)
"""
r(expr1)
lmm = r.get('selectedreacs_lmm_mod')
print(lmm)
def compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, stringC):
selectedReac_B_C1 = copy.copy(df_Reac_B_C01["Reac_A_C" + stringC].values)
selectedReac_B_C2 = copy.copy(df_Reac_B_C02["Reac_A_C" + stringC].values)
selectedReac_B_C3 = copy.copy(df_Reac_B_C03["Reac_A_C" + stringC].values)
r = R(use_pandas=True)
r.assign('selectedReac_B_C1', selectedReac_B_C1)
r.assign('selectedReac_B_C2', selectedReac_B_C2)
r.assign('selectedReac_B_C3', selectedReac_B_C3)
expr1 = """
library(broom)
library(multcomp)
library(lme4)
sizet = 11
ID = rep(c('ID1','ID2','ID3','ID4','ID5','ID6','ID7','ID8','ID9', 'ID10', 'ID11'), 3)
steps = c(rep("Reac_B_C1", sizet), rep("Reac_B_C2", sizet), rep("Reac_B_C3", sizet))
selectedReacs = c(selectedReac_B_C1, selectedReac_B_C2, selectedReac_B_C3)
df = data.frame(ID, steps, selectedReacs)
selectedReacs_lmm_mod = lmer(selectedReacs ~ steps + (1|ID), data = df)
selectedReacs_lmm_mod = glht(selectedReacs_lmm_mod, linfct = mcp(steps = "Tukey"))
selectedReacs_lmm_mod = summary(selectedReacs_lmm_mod)
selectedReacs_lmm_mod = tidy(selectedReacs_lmm_mod)
"""
r(expr1)
lmm = r.get('selectedReacs_lmm_mod')
print(lmm)
np.random.seed(1)
vec_Reac_B_C01 = np.ones(dim2)*100 + np.random.rand(1,dim2).reshape(-1)*scaling * 10
df_Reac_B_C01 = genData(vec_Reac_B_C01, scaling, dim2)
vec_Reac_B_C02 = np.ones(dim2)*100 + np.random.rand(1,dim2).reshape(-1)*scaling * 10
df_Reac_B_C02 = genData(vec_Reac_B_C02, scaling, dim2)
vec_Reac_B_C03 = np.ones(dim2)*100 + np.random.rand(1,dim2).reshape(-1)*scaling * 10
df_Reac_B_C03 = genData(vec_Reac_B_C03, scaling, dim2)
print("Reactant B at concentration C01:")
print(df_Reac_B_C01)
print("Reactant B at concentration C02:")
print(df_Reac_B_C02)
print("Reactant B at concentration C03:")
print(df_Reac_B_C03)
print("Compare Reactant A (reactant B kept constant)")
compareReac_A(df_Reac_B_C01)
compareReac_A(df_Reac_B_C02)
compareReac_A(df_Reac_B_C03)
print("Compare Reactant B (reactant A kept constant)")
compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, "01")
compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, "05")
compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, "10")
Here is the output:
Reactant B at concentration C01:
Reac_A_C01 Reac_A_C02 Reac_A_C03 Reac_A_C04 Reac_A_C05 Reac_A_C06 Reac_A_C07 Reac_A_C08 Reac_A_C09 Reac_A_C10
ID01 162.661926 206.293424 110.234050 NaN 116.044969 142.757235 139.491261 162.490564 146.696094 163.786748
ID02 183.739429 NaN 117.682646 166.849388 160.495047 155.964193 124.878232 138.508812 150.168268 199.788799
ID03 150.619542 197.087831 151.905914 160.891522 153.269445 129.009641 156.951068 180.287356 144.591161 195.388889
ID04 197.145255 215.440732 120.033637 175.697224 125.836889 137.628536 170.055796 155.236780 160.065514 166.383102
ID05 150.670548 213.974226 NaN 151.510590 147.254247 119.901987 155.331901 149.892501 177.142024 196.869591
ID06 156.818922 202.735249 144.731445 160.942221 NaN 146.028680 161.815753 170.300528 NaN 193.209425
ID07 198.872296 190.906185 118.975255 182.602822 146.559431 129.501569 177.001450 163.944366 189.217353 202.181573
ID08 199.867505 217.216060 151.558559 161.678174 142.171984 168.028170 154.030581 196.798075 186.848822 198.966459
ID09 163.439499 235.506912 138.507044 175.152738 151.082429 137.085208 179.794997 NaN 155.820264 200.738919
ID10 176.034446 216.385354 162.308542 166.096745 178.102347 158.401865 137.417083 199.027934 192.221593 221.747816
ID11 170.319226 198.889237 166.641211 185.074165 137.975598 167.007012 176.319831 200.707299 195.252985 180.095221
Reactant B at concentration C02:
Reac_A_C01 Reac_A_C02 Reac_A_C03 Reac_A_C04 Reac_A_C05 Reac_A_C06 Reac_A_C07 Reac_A_C08 Reac_A_C09 Reac_A_C10
ID01 128.672134 191.995986 174.435513 214.340035 175.339963 159.222737 162.009339 134.396792 NaN 117.071475
ID02 162.488720 192.481994 169.854011 236.855185 219.053604 169.616411 NaN 138.989555 155.583391 114.294758
ID03 128.226237 188.211627 167.103320 NaN 207.154736 175.492144 167.076330 138.979030 135.141404 136.002599
ID04 166.564149 220.172164 158.032042 195.670927 199.216374 193.109899 130.154223 149.629547 NaN 123.470943
ID05 171.535156 198.382234 178.759108 220.587701 NaN 176.788862 124.943577 150.745067 158.743247 157.845980
ID06 179.896031 191.408098 160.504169 227.193103 232.162980 213.132829 151.478182 161.772086 154.212656 133.955574
ID07 159.596961 222.926044 178.263460 244.663257 232.648836 187.367804 173.808649 126.689552 130.450822 123.209146
ID08 162.580536 195.786760 200.184575 241.709959 189.463113 178.433836 144.248701 126.541611 166.608880 135.692020
ID09 186.302802 225.811229 NaN 244.590908 235.981780 194.618037 156.962863 161.921949 142.420288 144.967864
ID10 171.252943 199.487183 186.460135 226.038003 233.080191 189.386264 176.289958 NaN 139.334856 NaN
ID11 173.898986 203.405983 179.795748 240.307627 240.642253 175.644125 182.483480 144.820785 180.823522 154.690993
Reactant B at concentration C03:
Reac_A_C01 Reac_A_C02 Reac_A_C03 Reac_A_C04 Reac_A_C05 Reac_A_C06 Reac_A_C07 Reac_A_C08 Reac_A_C09 Reac_A_C10
ID01 177.007374 166.422444 155.377587 180.963075 184.147873 176.930492 195.327927 156.535140 209.998543 210.524986
ID02 195.464611 187.195448 168.834701 179.738261 168.098540 172.282590 192.634836 168.310221 216.514653 173.190557
ID03 189.975088 153.068698 191.351131 NaN NaN 179.486477 200.612599 175.499448 209.428780 195.124090
ID04 190.337482 162.339392 173.835869 162.694627 191.963236 183.220171 181.047851 154.569292 213.028344 198.759298
ID05 211.508162 163.497116 158.611953 191.664729 168.940394 170.572648 NaN 156.123090 231.173059 199.637138
ID06 184.712461 186.177705 175.410781 183.788002 157.375235 179.314785 183.294394 178.992318 246.645009 178.896611
ID07 195.297002 200.733512 NaN 180.003165 195.478372 NaN 215.512287 197.730499 214.317378 212.345306
ID08 216.254728 189.599434 192.191038 156.809696 192.659895 182.324825 189.176856 169.878192 242.995374 199.044962
ID09 222.908313 183.394705 193.347746 206.604724 171.160375 154.888686 223.257359 186.102696 262.117226 215.610309
ID10 234.774530 190.390934 182.531572 NaN 175.933217 151.049323 NaN 198.306587 216.900192 204.107244
ID11 230.196192 NaN 192.757382 170.512975 182.243172 180.817265 231.679522 173.220038 229.766853 200.888743
Compare Reactant A (reactant B kept constant)
term contrast null.value estimate std.error statistic adj.p.value
0 b'steps' b'reac_A_C05 - reac_A_C01' 0.0 -28.112065 6.661587 -4.220025 6.939832e-05
1 b'steps' b'reac_A_C10 - reac_A_C01' 0.0 18.997086 6.469154 2.936564 9.155645e-03
2 b'steps' b'reac_A_C10 - reac_A_C05' 0.0 47.109151 6.661587 7.071761 2.115419e-12
term contrast null.value estimate std.error statistic adj.p.value
0 b'steps' b'reac_A_C05 - reac_A_C01' 0.0 54.772586 5.609275 9.764647 0.000000
1 b'steps' b'reac_A_C10 - reac_A_C01' 0.0 -27.853128 5.609275 -4.965549 0.000002
2 b'steps' b'reac_A_C10 - reac_A_C05' 0.0 -82.625714 5.796750 -14.253799 0.000000
term contrast null.value estimate std.error statistic adj.p.value
0 b'steps' b'reac_A_C05 - reac_A_C01' 0.0 -25.946954 5.949879 -4.360921 0.000038
1 b'steps' b'reac_A_C10 - reac_A_C01' 0.0 -5.482427 5.780588 -0.948420 0.609528
2 b'steps' b'reac_A_C10 - reac_A_C05' 0.0 20.464527 5.949879 3.439486 0.001719
Compare Reactant B (reactant A kept constant)
term contrast null.value estimate std.error statistic adj.p.value
0 b'steps' b'Reac_B_C2 - Reac_B_C1' 0.0 -10.833995 7.002837 -1.547087 2.690180e-01
1 b'steps' b'Reac_B_C3 - Reac_B_C1' 0.0 30.749759 7.002837 4.391043 3.998929e-05
2 b'steps' b'Reac_B_C3 - Reac_B_C2' 0.0 41.583754 7.002837 5.938130 6.426461e-09
term contrast null.value estimate std.error statistic adj.p.value
0 b'steps' b'Reac_B_C2 - Reac_B_C1' 0.0 70.587355 7.914481 8.918759 0.000000
1 b'steps' b'Reac_B_C3 - Reac_B_C1' 0.0 32.931581 7.914481 4.160927 0.000114
2 b'steps' b'Reac_B_C3 - Reac_B_C2' 0.0 -37.655774 7.914481 -4.757832 0.000006
term contrast null.value estimate std.error statistic adj.p.value
0 b'steps' b'Reac_B_C2 - Reac_B_C1' 0.0 -58.136206 6.136439 -9.473932 0.000000
1 b'steps' b'Reac_B_C3 - Reac_B_C1' 0.0 6.270246 5.972221 1.049902 0.545293
2 b'steps' b'Reac_B_C3 - Reac_B_C2' 0.0 64.406452 6.136439 10.495737 0.000000