C R & Python

C.1 Simulations et modèles démographiques

Nous mettons ici à disposition une partie des codes qui ont servi à produire les simulations.

C.1.1 Modèle en îles

if (!file.exists("ms/ms")) {
  system("gcc -o ms/ms ms/ms.c ms/streec.c ms/rand1.c -lm")
}

### ms : list of parameters ###
nb_demes <- 3
nCHR_per_POP <- 50
nCHR <- nb_demes * nCHR_per_POP
nIND <- nCHR / 2
nb_neutral <- 100
nb_adaptive <- 25
mig_rate_neutral <- 10
mig_rate_adaptive <- 0.1
###############################

nCHR_per_POP_string <- nCHR_per_POP
for (k in 1:(nb_demes - 1)) {
  nCHR_per_POP_string <- paste(nCHR_per_POP_string, nCHR_per_POP)
}

mig_rate_neutral_string <- "x"
mig_rate_adaptive_string <- "x"
for (k in 1:nb_demes){
  for (j in 1:nb_demes){
    if ((k == j) && (k > 1)){
      mig_rate_neutral_string <- paste(mig_rate_neutral_string, "x")
      mig_rate_adaptive_string <- paste(mig_rate_adaptive_string, "x")
    } else if (k != j) {
      mig_rate_neutral_string <- paste(mig_rate_neutral_string,
                                       mig_rate_neutral)
      mig_rate_adaptive_string <- paste(mig_rate_adaptive_string,
                                        mig_rate_adaptive)
    }
  }
}

cmd_neutral <- paste("ms/./ms",
                     nCHR,
                     nb_neutral,
                     "-s 1 -I",
                     nb_demes,
                     nCHR_per_POP_string,
                     "-ma",
                     mig_rate_neutral_string,
                     ">",
                     "data/neutral.txt")

cmd_adaptive <- paste("ms/./ms",
                      nCHR,
                      nb_adaptive,
                      "-s 1 -I",
                      nb_demes,
                      nCHR_per_POP_string,
                      "-ma",
                      mig_rate_adaptive_string,
                      ">",
                      "data/adaptive.txt")

system(cmd_neutral)
system(cmd_adaptive)

file.neutral <- scan(file = "data/neutral.txt",
              what = "character",
              sep = "\n",
              skip = 2)

g.neutral <- NULL

for (locus in 1:nb_neutral){
  res.locus1 <- file.neutral[4:(nCHR + 3)]
  file.neutral <- file.neutral[-(1:(nCHR+3))]
  g.neutral <- cbind(g.neutral, as.numeric(as.factor(res.locus1)))
}

file.adaptive <- scan(file = "data/adaptive.txt",
                      what = "character",
                      sep = "\n",
                      skip = 2)

g.adaptive <- NULL

for (locus in 1:nb_adaptive){
  res.locus1 <- file.adaptive[4:(nCHR + 3)]
  file.adaptive <- file.adaptive[-(1:(nCHR+3))]
  g.adaptive <- cbind(g.adaptive, as.numeric(as.factor(res.locus1)))
}

g <- cbind(g.neutral, g.adaptive)

x <- pcadapt::pcadapt(t(g), K = 2)
pop <- c(rep("A", 50), rep("B", 50), rep("C", 50))
plot(x, option = "scores", pop = pop)

C.1.2 Modèle de divergence

Nous adaptons une version du script Python utilisé dans (Roux et al., 2012), basé sur le module de simulation simuPOP (B. Peng & Kimmel, 2005).

#!/usr/bin/env python
from __future__ import division
import simuOpt, types, os, sys, time
simuOpt.setOptions(alleleType = 'long')
from operator import itemgetter
import numpy as np
from simuPOP import *
from simuPOP.utils import *
from simuPOP.sampling import drawRandomSample

def simulate(Ne, Nsam, T1, T2, T3, s10, s11):
    pop = Population(size = Ne,
                     ploidy = 2,
                     loci = [1],
                     infoFields = ['fitness', 'migrate_to'])

    def getfitness10(geno):
        if geno[0] + geno[1] == 0 :
            return 1 - 2 * s10
        if geno[0] + geno[1] == 1 :
            return 1 - s10
        else :
            return 1

    def getfitness11(geno):
        if geno[0] + geno[1] == 0 :
            return 1 - 2 * s11
        if geno[0] + geno[1] == 1 :
            return 1 - s11
        else :
            return 1

    pop.evolve(
        initOps = [
            InitSex(),
            InitGenotype(loci = ALL_AVAIL,
                         freq = [0.5, 0.5],
                         begin = 0,
                         end = 1)
        ],

        preOps = [
            # resize the ancestral population at the time immediatly
            # before the split
            ResizeSubPops([0],
                          sizes = [Ne + Ne],
                          at = T1 - 1),

            ResizeSubPops(["S1_1"],
                          sizes = [Ne + Ne],
                          at = T1 + T2 - 1),

            # split populations in 2 subpopulations
            SplitSubPops(subPops = [0],
                         sizes = [Ne, Ne],
                         names = ["S1_0", "S1_1"],
                         at = T1),

            SplitSubPops(subPops = ["S1_1"],
                         sizes = [Ne, Ne],
                         at = T1 + T2,
                         names = ["S2_0", "S2_1"]),

            # apply selection by invoking function getfitness
            PySelector(loci = [0],
                       func = getfitness11,
                       begin = T1 + T2,
                       subPops = ["S2_1"]),

            PySelector(loci = [0],
                       func = getfitness10,
                       begin = T1,
                       subPops = ["S1_0"],
                       end = T1 + T2 + T3 - 1)
        ],

        matingScheme = RandomMating(ops = [
                                        Recombinator(intensity = 1)
                                    ]),

        gen = T1 + T2 + T3

    )

    sample = drawRandomSample(pop, sizes = [Nsam, Nsam, Nsam])

    return sample


Ne = 1000
Nsam = 25
T1 = 10
T2 = 100
T3 = 100
s = 0.1
nSNP = 10

G  = np.zeros([3 * Nsam, nSNP])

for i in range(nSNP):
    if i < 1:
        s10 = 2 * s
        s11 = 0.0
    elif i < 2:
        s10 = 0.0
        s11 = s
    else:
        s10 = 0.0
        s11 = 0.0
    res = simulate(Ne, Nsam, T1, T2, T3, s10, s11)
    for j in range(3):
        Sj = res.genotype(j)
        for k in range(int(len(Sj) / 2)):
            idx = j * int(len(Sj) / 2) + k
            G[idx][i] = Sj[2 * k] + Sj[2 * k + 1]

np.savetxt('data/simuPOP.pcadapt', G, fmt = '%i')

References

Roux, C., Pauwels, M., Ruggiero, M.-V., Charlesworth, D., Castric, V., & Vekemans, X. (2012). Recent and ancient signature of balancing selection around the s-locus in arabidopsis halleri and a. lyrata. Molecular Biology and Evolution, 30(2), 435–447.

Peng, B., & Kimmel, M. (2005). SimuPOP: A forward-time population genetics simulation environment. Bioinformatics, 21(18), 3686–3687.