C R & Python
C.1 Simulations et modèles démographiques
Nous mettons ici à disposition une partie des codes qui ont servi à produire les simulations.
C.1.1 Modèle en îles
if (!file.exists("ms/ms")) {
system("gcc -o ms/ms ms/ms.c ms/streec.c ms/rand1.c -lm")
}
### ms : list of parameters ###
nb_demes <- 3
nCHR_per_POP <- 50
nCHR <- nb_demes * nCHR_per_POP
nIND <- nCHR / 2
nb_neutral <- 100
nb_adaptive <- 25
mig_rate_neutral <- 10
mig_rate_adaptive <- 0.1
###############################
nCHR_per_POP_string <- nCHR_per_POP
for (k in 1:(nb_demes - 1)) {
nCHR_per_POP_string <- paste(nCHR_per_POP_string, nCHR_per_POP)
}
mig_rate_neutral_string <- "x"
mig_rate_adaptive_string <- "x"
for (k in 1:nb_demes){
for (j in 1:nb_demes){
if ((k == j) && (k > 1)){
mig_rate_neutral_string <- paste(mig_rate_neutral_string, "x")
mig_rate_adaptive_string <- paste(mig_rate_adaptive_string, "x")
} else if (k != j) {
mig_rate_neutral_string <- paste(mig_rate_neutral_string,
mig_rate_neutral)
mig_rate_adaptive_string <- paste(mig_rate_adaptive_string,
mig_rate_adaptive)
}
}
}
cmd_neutral <- paste("ms/./ms",
nCHR,
nb_neutral,
"-s 1 -I",
nb_demes,
nCHR_per_POP_string,
"-ma",
mig_rate_neutral_string,
">",
"data/neutral.txt")
cmd_adaptive <- paste("ms/./ms",
nCHR,
nb_adaptive,
"-s 1 -I",
nb_demes,
nCHR_per_POP_string,
"-ma",
mig_rate_adaptive_string,
">",
"data/adaptive.txt")
system(cmd_neutral)
system(cmd_adaptive)
file.neutral <- scan(file = "data/neutral.txt",
what = "character",
sep = "\n",
skip = 2)
g.neutral <- NULL
for (locus in 1:nb_neutral){
res.locus1 <- file.neutral[4:(nCHR + 3)]
file.neutral <- file.neutral[-(1:(nCHR+3))]
g.neutral <- cbind(g.neutral, as.numeric(as.factor(res.locus1)))
}
file.adaptive <- scan(file = "data/adaptive.txt",
what = "character",
sep = "\n",
skip = 2)
g.adaptive <- NULL
for (locus in 1:nb_adaptive){
res.locus1 <- file.adaptive[4:(nCHR + 3)]
file.adaptive <- file.adaptive[-(1:(nCHR+3))]
g.adaptive <- cbind(g.adaptive, as.numeric(as.factor(res.locus1)))
}
g <- cbind(g.neutral, g.adaptive)
x <- pcadapt::pcadapt(t(g), K = 2)
pop <- c(rep("A", 50), rep("B", 50), rep("C", 50))
plot(x, option = "scores", pop = pop)
C.1.2 Modèle de divergence
Nous adaptons une version du script Python utilisé dans (Roux et al., 2012), basé sur le module de simulation simuPOP (B. Peng & Kimmel, 2005).
#!/usr/bin/env python
from __future__ import division
import simuOpt, types, os, sys, time
simuOpt.setOptions(alleleType = 'long')
from operator import itemgetter
import numpy as np
from simuPOP import *
from simuPOP.utils import *
from simuPOP.sampling import drawRandomSample
def simulate(Ne, Nsam, T1, T2, T3, s10, s11):
pop = Population(size = Ne,
ploidy = 2,
loci = [1],
infoFields = ['fitness', 'migrate_to'])
def getfitness10(geno):
if geno[0] + geno[1] == 0 :
return 1 - 2 * s10
if geno[0] + geno[1] == 1 :
return 1 - s10
else :
return 1
def getfitness11(geno):
if geno[0] + geno[1] == 0 :
return 1 - 2 * s11
if geno[0] + geno[1] == 1 :
return 1 - s11
else :
return 1
pop.evolve(
initOps = [
InitSex(),
InitGenotype(loci = ALL_AVAIL,
freq = [0.5, 0.5],
begin = 0,
end = 1)
],
preOps = [
# resize the ancestral population at the time immediatly
# before the split
ResizeSubPops([0],
sizes = [Ne + Ne],
at = T1 - 1),
ResizeSubPops(["S1_1"],
sizes = [Ne + Ne],
at = T1 + T2 - 1),
# split populations in 2 subpopulations
SplitSubPops(subPops = [0],
sizes = [Ne, Ne],
names = ["S1_0", "S1_1"],
at = T1),
SplitSubPops(subPops = ["S1_1"],
sizes = [Ne, Ne],
at = T1 + T2,
names = ["S2_0", "S2_1"]),
# apply selection by invoking function getfitness
PySelector(loci = [0],
func = getfitness11,
begin = T1 + T2,
subPops = ["S2_1"]),
PySelector(loci = [0],
func = getfitness10,
begin = T1,
subPops = ["S1_0"],
end = T1 + T2 + T3 - 1)
],
matingScheme = RandomMating(ops = [
Recombinator(intensity = 1)
]),
gen = T1 + T2 + T3
)
sample = drawRandomSample(pop, sizes = [Nsam, Nsam, Nsam])
return sample
Ne = 1000
Nsam = 25
T1 = 10
T2 = 100
T3 = 100
s = 0.1
nSNP = 10
G = np.zeros([3 * Nsam, nSNP])
for i in range(nSNP):
if i < 1:
s10 = 2 * s
s11 = 0.0
elif i < 2:
s10 = 0.0
s11 = s
else:
s10 = 0.0
s11 = 0.0
res = simulate(Ne, Nsam, T1, T2, T3, s10, s11)
for j in range(3):
Sj = res.genotype(j)
for k in range(int(len(Sj) / 2)):
idx = j * int(len(Sj) / 2) + k
G[idx][i] = Sj[2 * k] + Sj[2 * k + 1]
np.savetxt('data/simuPOP.pcadapt', G, fmt = '%i')
References
Roux, C., Pauwels, M., Ruggiero, M.-V., Charlesworth, D., Castric, V., & Vekemans, X. (2012). Recent and ancient signature of balancing selection around the s-locus in arabidopsis halleri and a. lyrata. Molecular Biology and Evolution, 30(2), 435–447.
Peng, B., & Kimmel, M. (2005). SimuPOP: A forward-time population genetics simulation environment. Bioinformatics, 21(18), 3686–3687.