0

Let's say I have data of a chemistry expriment: I have a sample of 11 probe coming from comparable material (ID01 to ID11, the random effect). I make process them with 2 different reactants (reactant A and Reactant B, 2 different fixed effect). There are 10 difference concentrations of reactant A and 3 differents concentrations of reactant B. I finally read a measurement and want to determine the effect of reactant A and reactant B. The assumption is that the concentration of reactant A has an effect on the measurement, whereas reactant B does not. I simulated data accordingly. In the results, it seem though that there is also an effect of reactant B.

Here are my questions:

  1. Why is there a difference of effect with different reactant B.
  2. Is there a most simple way of integrating all the dataset and having one answer showing the presence of effect of reactant A on the whole dataset and the absence of effect of reactant B on the whole dataset?
  3. Is there a way to simplify and code everything in Python (instead of calling R from python with pyper package)?

Here is the code

from pyper import *
import numpy as np
import pandas as pd
import pdb
import copy


scaling = 10
increment = 2
dim1 = 11
dim2 = 10
numNA = 8

def createList00_to_XX(dim, string):
  Alist = []
  for idx1 in range(0,dim):
    if idx1 < 9:
      Alist.append(string + "0" + str(idx1 + 1))
    if idx1 >= 9 and idx1 < 100:
      Alist.append(string + str(idx1 + 1))
  return Alist

listID = createList00_to_XX(dim1, "ID")
listReac_A_C = createList00_to_XX(dim2, "Reac_A_C")

def genData(vec, scaling, dim2):
  data = np.array([]) 
  data = np.empty((dim1,dim2))
  data[:] = np.nan
  for i in range(0,dim1):
    data[i,:] = vec + (i * increment) + np.random.rand(1,dim2).reshape(-1)*scaling * 5

  df = pd.DataFrame(
      data,
      index = listID) 

  for nna in range(0,numNA):
    df.iloc[np.random.randint(0,dim1),np.random.randint(0,dim2)] = np.nan
  df.columns = listReac_A_C 
  return df

def compareReac_A(df_Reac_B):
  selectedreac_A_C01 = copy.copy(df_Reac_B.iloc[:,0].values)
  selectedreac_A_C05 = copy.copy(df_Reac_B.iloc[:,4].values)
  selectedreac_A_C10 = copy.copy(df_Reac_B.iloc[:,9].values)

  r = R(use_pandas=True)
  r.assign('selectedreac_A_C01', selectedreac_A_C01)
  r.assign('selectedreac_A_C05', selectedreac_A_C05)
  r.assign('selectedreac_A_C10', selectedreac_A_C10)

  expr1 = """
  library(broom)
  library(multcomp)
  library(lme4)
  sizet = 11
  ID = rep(c('ID1','ID2','ID3','ID4','ID5','ID6','ID7','ID8','ID9', 'ID10', 'ID11'), 3)
  steps = c(rep("reac_A_C01", sizet), rep("reac_A_C05", sizet), rep("reac_A_C10", sizet))
  
  selectedreacs = c(selectedreac_A_C01, selectedreac_A_C05, selectedreac_A_C10)
  
  df = data.frame(ID, steps, selectedreacs)
  
  selectedreacs_lmm_mod = lmer(selectedreacs ~ steps + (1|ID), data = df)
  selectedreacs_lmm_mod = glht(selectedreacs_lmm_mod, linfct = mcp(steps = "Tukey"))
  selectedreacs_lmm_mod = summary(selectedreacs_lmm_mod)
  selectedreacs_lmm_mod = tidy(selectedreacs_lmm_mod)
  """
  r(expr1)
  
  lmm = r.get('selectedreacs_lmm_mod')
  print(lmm)


def compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, stringC):
  selectedReac_B_C1 = copy.copy(df_Reac_B_C01["Reac_A_C" + stringC].values)
  selectedReac_B_C2 = copy.copy(df_Reac_B_C02["Reac_A_C" + stringC].values)
  selectedReac_B_C3 = copy.copy(df_Reac_B_C03["Reac_A_C" + stringC].values)

  r = R(use_pandas=True)
  r.assign('selectedReac_B_C1', selectedReac_B_C1)
  r.assign('selectedReac_B_C2', selectedReac_B_C2)
  r.assign('selectedReac_B_C3', selectedReac_B_C3)
  
  expr1 = """
  library(broom)
  library(multcomp)
  library(lme4)
  sizet = 11
  ID = rep(c('ID1','ID2','ID3','ID4','ID5','ID6','ID7','ID8','ID9', 'ID10', 'ID11'), 3)
  steps = c(rep("Reac_B_C1", sizet), rep("Reac_B_C2", sizet), rep("Reac_B_C3", sizet))
  
  selectedReacs = c(selectedReac_B_C1, selectedReac_B_C2, selectedReac_B_C3)
  
  df = data.frame(ID, steps, selectedReacs)
  
  selectedReacs_lmm_mod = lmer(selectedReacs ~ steps + (1|ID), data = df)
  selectedReacs_lmm_mod = glht(selectedReacs_lmm_mod, linfct = mcp(steps = "Tukey"))
  selectedReacs_lmm_mod = summary(selectedReacs_lmm_mod)
  selectedReacs_lmm_mod = tidy(selectedReacs_lmm_mod)
  """
  
  r(expr1)
  
  lmm = r.get('selectedReacs_lmm_mod')
  print(lmm)

np.random.seed(1)
vec_Reac_B_C01 = np.ones(dim2)*100 + np.random.rand(1,dim2).reshape(-1)*scaling * 10
df_Reac_B_C01 = genData(vec_Reac_B_C01, scaling, dim2)

vec_Reac_B_C02 = np.ones(dim2)*100 + np.random.rand(1,dim2).reshape(-1)*scaling * 10
df_Reac_B_C02 = genData(vec_Reac_B_C02, scaling, dim2)

vec_Reac_B_C03 = np.ones(dim2)*100 + np.random.rand(1,dim2).reshape(-1)*scaling * 10
df_Reac_B_C03 = genData(vec_Reac_B_C03, scaling, dim2)

print("Reactant B at concentration C01:")
print(df_Reac_B_C01)
print("Reactant B at concentration C02:")
print(df_Reac_B_C02)
print("Reactant B at concentration C03:")
print(df_Reac_B_C03)

print("Compare Reactant A (reactant B kept constant)")
compareReac_A(df_Reac_B_C01)
compareReac_A(df_Reac_B_C02)
compareReac_A(df_Reac_B_C03)

print("Compare Reactant B (reactant A kept constant)")
compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, "01")
compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, "05")
compareReac_B(df_Reac_B_C01, df_Reac_B_C02, df_Reac_B_C03, "10")

Here is the output:

Reactant B at concentration C01:
      Reac_A_C01  Reac_A_C02  Reac_A_C03  Reac_A_C04  Reac_A_C05  Reac_A_C06  Reac_A_C07  Reac_A_C08  Reac_A_C09  Reac_A_C10
ID01  162.661926  206.293424  110.234050         NaN  116.044969  142.757235  139.491261  162.490564  146.696094  163.786748
ID02  183.739429         NaN  117.682646  166.849388  160.495047  155.964193  124.878232  138.508812  150.168268  199.788799
ID03  150.619542  197.087831  151.905914  160.891522  153.269445  129.009641  156.951068  180.287356  144.591161  195.388889
ID04  197.145255  215.440732  120.033637  175.697224  125.836889  137.628536  170.055796  155.236780  160.065514  166.383102
ID05  150.670548  213.974226         NaN  151.510590  147.254247  119.901987  155.331901  149.892501  177.142024  196.869591
ID06  156.818922  202.735249  144.731445  160.942221         NaN  146.028680  161.815753  170.300528         NaN  193.209425
ID07  198.872296  190.906185  118.975255  182.602822  146.559431  129.501569  177.001450  163.944366  189.217353  202.181573
ID08  199.867505  217.216060  151.558559  161.678174  142.171984  168.028170  154.030581  196.798075  186.848822  198.966459
ID09  163.439499  235.506912  138.507044  175.152738  151.082429  137.085208  179.794997         NaN  155.820264  200.738919
ID10  176.034446  216.385354  162.308542  166.096745  178.102347  158.401865  137.417083  199.027934  192.221593  221.747816
ID11  170.319226  198.889237  166.641211  185.074165  137.975598  167.007012  176.319831  200.707299  195.252985  180.095221
Reactant B at concentration C02:
      Reac_A_C01  Reac_A_C02  Reac_A_C03  Reac_A_C04  Reac_A_C05  Reac_A_C06  Reac_A_C07  Reac_A_C08  Reac_A_C09  Reac_A_C10
ID01  128.672134  191.995986  174.435513  214.340035  175.339963  159.222737  162.009339  134.396792         NaN  117.071475
ID02  162.488720  192.481994  169.854011  236.855185  219.053604  169.616411         NaN  138.989555  155.583391  114.294758
ID03  128.226237  188.211627  167.103320         NaN  207.154736  175.492144  167.076330  138.979030  135.141404  136.002599
ID04  166.564149  220.172164  158.032042  195.670927  199.216374  193.109899  130.154223  149.629547         NaN  123.470943
ID05  171.535156  198.382234  178.759108  220.587701         NaN  176.788862  124.943577  150.745067  158.743247  157.845980
ID06  179.896031  191.408098  160.504169  227.193103  232.162980  213.132829  151.478182  161.772086  154.212656  133.955574
ID07  159.596961  222.926044  178.263460  244.663257  232.648836  187.367804  173.808649  126.689552  130.450822  123.209146
ID08  162.580536  195.786760  200.184575  241.709959  189.463113  178.433836  144.248701  126.541611  166.608880  135.692020
ID09  186.302802  225.811229         NaN  244.590908  235.981780  194.618037  156.962863  161.921949  142.420288  144.967864
ID10  171.252943  199.487183  186.460135  226.038003  233.080191  189.386264  176.289958         NaN  139.334856         NaN
ID11  173.898986  203.405983  179.795748  240.307627  240.642253  175.644125  182.483480  144.820785  180.823522  154.690993
Reactant B at concentration C03:
      Reac_A_C01  Reac_A_C02  Reac_A_C03  Reac_A_C04  Reac_A_C05  Reac_A_C06  Reac_A_C07  Reac_A_C08  Reac_A_C09  Reac_A_C10
ID01  177.007374  166.422444  155.377587  180.963075  184.147873  176.930492  195.327927  156.535140  209.998543  210.524986
ID02  195.464611  187.195448  168.834701  179.738261  168.098540  172.282590  192.634836  168.310221  216.514653  173.190557
ID03  189.975088  153.068698  191.351131         NaN         NaN  179.486477  200.612599  175.499448  209.428780  195.124090
ID04  190.337482  162.339392  173.835869  162.694627  191.963236  183.220171  181.047851  154.569292  213.028344  198.759298
ID05  211.508162  163.497116  158.611953  191.664729  168.940394  170.572648         NaN  156.123090  231.173059  199.637138
ID06  184.712461  186.177705  175.410781  183.788002  157.375235  179.314785  183.294394  178.992318  246.645009  178.896611
ID07  195.297002  200.733512         NaN  180.003165  195.478372         NaN  215.512287  197.730499  214.317378  212.345306
ID08  216.254728  189.599434  192.191038  156.809696  192.659895  182.324825  189.176856  169.878192  242.995374  199.044962
ID09  222.908313  183.394705  193.347746  206.604724  171.160375  154.888686  223.257359  186.102696  262.117226  215.610309
ID10  234.774530  190.390934  182.531572         NaN  175.933217  151.049323         NaN  198.306587  216.900192  204.107244
ID11  230.196192         NaN  192.757382  170.512975  182.243172  180.817265  231.679522  173.220038  229.766853  200.888743
Compare Reactant A (reactant B kept constant)
       term                    contrast   null.value    estimate    std.error    statistic    adj.p.value
0  b'steps'  b'reac_A_C05 - reac_A_C01'           0.0  -28.112065     6.661587    -4.220025   6.939832e-05
1  b'steps'  b'reac_A_C10 - reac_A_C01'           0.0   18.997086     6.469154     2.936564   9.155645e-03
2  b'steps'  b'reac_A_C10 - reac_A_C05'           0.0   47.109151     6.661587     7.071761   2.115419e-12
       term                    contrast   null.value    estimate    std.error    statistic    adj.p.value
0  b'steps'  b'reac_A_C05 - reac_A_C01'           0.0   54.772586     5.609275     9.764647       0.000000
1  b'steps'  b'reac_A_C10 - reac_A_C01'           0.0  -27.853128     5.609275    -4.965549       0.000002
2  b'steps'  b'reac_A_C10 - reac_A_C05'           0.0  -82.625714     5.796750   -14.253799       0.000000
       term                    contrast   null.value    estimate    std.error    statistic    adj.p.value
0  b'steps'  b'reac_A_C05 - reac_A_C01'           0.0  -25.946954     5.949879    -4.360921       0.000038
1  b'steps'  b'reac_A_C10 - reac_A_C01'           0.0   -5.482427     5.780588    -0.948420       0.609528
2  b'steps'  b'reac_A_C10 - reac_A_C05'           0.0   20.464527     5.949879     3.439486       0.001719
Compare Reactant B (reactant A kept constant)
       term                  contrast   null.value    estimate    std.error    statistic    adj.p.value
0  b'steps'  b'Reac_B_C2 - Reac_B_C1'           0.0  -10.833995     7.002837    -1.547087   2.690180e-01
1  b'steps'  b'Reac_B_C3 - Reac_B_C1'           0.0   30.749759     7.002837     4.391043   3.998929e-05
2  b'steps'  b'Reac_B_C3 - Reac_B_C2'           0.0   41.583754     7.002837     5.938130   6.426461e-09
       term                  contrast   null.value    estimate    std.error    statistic    adj.p.value
0  b'steps'  b'Reac_B_C2 - Reac_B_C1'           0.0   70.587355     7.914481     8.918759       0.000000
1  b'steps'  b'Reac_B_C3 - Reac_B_C1'           0.0   32.931581     7.914481     4.160927       0.000114
2  b'steps'  b'Reac_B_C3 - Reac_B_C2'           0.0  -37.655774     7.914481    -4.757832       0.000006
       term                  contrast   null.value    estimate    std.error    statistic    adj.p.value
0  b'steps'  b'Reac_B_C2 - Reac_B_C1'           0.0  -58.136206     6.136439    -9.473932       0.000000
1  b'steps'  b'Reac_B_C3 - Reac_B_C1'           0.0    6.270246     5.972221     1.049902       0.545293
2  b'steps'  b'Reac_B_C3 - Reac_B_C2'           0.0   64.406452     6.136439    10.495737       0.000000

ecjb
  • 539
  • 1
  • 5
  • 16

0 Answers0