· 6 years ago · Nov 21, 2019, 04:40 PM
1X <- read.table("Global_25_PCA_pop_averages.txt", row.names=1, head = T, stringsAsFactors=T, na.strings=c('',' ','NA'))
2
3single <- function(Y,k){
4NPOPS<-dim(X)[1]
5
6ID <- vector(length=NPOPS)
7ID[1:NPOPS] <- c(as.matrix(row.names(X)))
8
9X <- X[,1:25]
10
11if (length(Y)==1)
12Y <- X[which(ID==Y),]
13
14YDIST<-vector(length=NPOPS)
15
16for (i in 1:NPOPS)
17YDIST[i] <- sqrt(sum((Y-X[i,])^2))
18
19
20ORDER<-order(YDIST)
21ORDER<-cbind(ID[ORDER[1:k]],round(YDIST[ORDER[1:k]],4))
22print(ORDER)
23}
24
25
26
27twoWay <- function(Y,k){
28NPOPS<-dim(X)[1]
29
30ID <- vector(length=NPOPS)
31ID[1:NPOPS] <- c(as.matrix(row.names(X)))
32
33
34 X <- X[,1:25]
35
36if (length(Y)==1)
37Y <- X[which(ID==Y),]
38
39
40YDIST<-vector(length=NPOPS+NPOPS*(NPOPS-1)/2)
41
42
43
44
45XDIST <- as.matrix(dist(X))
46
47for (i in 1:NPOPS)
48YDIST[i] <- sqrt(sum((Y-X[i,])^2))
49
50ID2 <- vector()
51YDIST2 <- vector()
52COUNT <- NPOPS
53
54 for (i in 1:(NPOPS-1))
55 for (j in (i+1):NPOPS) {
56 COUNT<-COUNT+1
57 if (abs(YDIST[i]-YDIST[j])<XDIST[i,j] & YDIST[i]>0 & YDIST[j]>0) {
58 FRAC <- (XDIST[i,j]^2+YDIST[i]^2-YDIST[j]^2)/(2*XDIST[i,j]^2)
59 if (FRAC>=0 & FRAC<=1) {
60 YDIST2[COUNT] <- sqrt(YDIST[i]^2-FRAC^2*XDIST[i,j]^2)
61 ID2[COUNT] <- paste(round(100*(1-FRAC),1),"% ",ID[i]," + ",round(100*FRAC,1),"% ",ID[j],sep=" ")
62 }
63 else
64 YDIST2[COUNT]<- 100
65 }
66 else
67 YDIST2[COUNT]<- 100
68 }
69
70
71ORDER<-order(YDIST2)
72ORDER<-cbind(ID2[ORDER[1:k]],round(YDIST2[ORDER[1:k]],4))
73print(ORDER)
74}
75
76
77
78fifty <- function (Y,k)
79 {
80 NPOPS<-dim(X)[1]
81 ID <- vector()
82ID[1:NPOPS] <- c(as.matrix(row.names(X)))
83 X <- X[,1:25]
84
85if (length(Y)==1)
86Y <- X[which(ID==Y),]
87
88
89YDIST<-vector()
90XDIST <- as.matrix(dist(X))
91
92for (i in 1:NPOPS)
93YDIST[i] <- sqrt(sum((Y-X[i,])^2))
94ID2 <- vector()
95YDIST2 <- vector()
96
97 COUNT <- NPOPS
98 for (i in 1:(NPOPS-1))
99 for (j in (i+1):NPOPS) {
100 COUNT<-COUNT+1
101 YDIST2[COUNT] <- sqrt(max(YDIST[i],YDIST[j])^2-(0.5)^2*XDIST[i,j]^2)
102 ID2[COUNT] <- paste("50%","% ",ID[i]," + ","50%","% ",ID[j])
103 }
104
105
106ORDER<-order(YDIST2)
107ORDER<-cbind(ID2[ORDER[1:k]],round(YDIST2[ORDER[1:k]],4))
108print(ORDER)
109}
110
111
112
113#*******************************************************
114# R script nMonte.R
115# Find mixture composition which minimizes
116# the averaged genetic distance to target.
117# activate with: source('nMonte.R')
118# Use: getMonte(datafile,targetfile);
119# both files should be comma-separated csv.
120# Utilities:
121# subset_data(): Collecting rows from datasheet
122# aggr_pops(): Average populations
123# tab2comma(): tab-separated to comma-separated
124# last modified: averaging, median, percentage
125# v9.0 Huijbregts 29 nov 2017
126#*******************************************************
127
128# default global constants
129batch_0 = 1000 # default rows of sample randomly drawn from data file
130cycles_0 = 1000 # default number of cycles
131
132# START OF GETMONTE FUNCTION
133getMonte <- function(targetfile,omit='',Nbatch=batch_0,Ncycles=cycles_0,save=F) {
134
135 # define AlGORITHM embedded function
136 do_algorithm <- function(selection, targ) {
137 mySel <- as.matrix(selection, rownames.force = NA)
138 myTg <- as.matrix(targ, rownames.force = NA)
139 dif2targ <- sweep(mySel, 2, myTg, '-') # data - target
140 Ndata <- nrow(dif2targ)
141 kcol <- ncol(dif2targ)
142 rowLabels <- rownames(dif2targ)
143 # preallocate data
144 matPop <- matrix(NA_integer_, Nbatch, 1, byrow=T)
145 dumPop <- matrix(NA_integer_, Nbatch, 1, byrow=T)
146 matAdmix <- matrix(NA_real_, Nbatch, kcol, byrow=T)
147 dumAdmix <- matrix(NA_real_, Nbatch, kcol, byrow=T)
148 # initialize data for first cycle
149 ## a. start with nearest neighbor:
150 # distQQ <- rowSums(dif2targ^2)
151 # minPop <- as.integer(which(distQQ==min(distQQ))[1])
152 # matPop <- rep(minPop, Nbatch)
153 # b. start with random pops:
154 matPop <- sample(1:Ndata,Nbatch,replace=T)
155 # fill matPop with random row numbers 1:Ndata from datafile
156 matAdmix <- dif2targ[matPop,]
157 colM1 <- colMeans(matAdmix) # rowvector of column means
158 eval1 <- sum(colM1^2) # sum of squared distances
159 # Ncycles iterations
160 for (c in 1:Ncycles) {
161 # fill batch data
162 dumPop <- sample(1:Ndata, Nbatch, replace=T)
163 dumAdmix <- dif2targ[dumPop,]
164 # loop thru batch
165 # minimize Eucl dist of batch mean to target
166 for (b in 1:Nbatch) {
167 # test alternative pop
168 store <- matAdmix[b,]
169 matAdmix[b,] <- dumAdmix[b,]
170 colM2 <- colMeans(matAdmix)
171 eval2 <- sum(colM2^2)
172 # conditional adjust
173 if (eval2 <= eval1) {
174 matPop[b] <- dumPop[b]
175 colM1 <- colM2
176 eval1 <- eval2
177 } else {matAdmix[b,] <- store}
178 } # end batch
179 } # end cycles
180 # Collect output
181 # get fit of target
182 fitted <- t(colMeans(matAdmix) + myTg[1,])
183 # collect sampled populations
184 popX<- matPop
185 # substute row numbers by row names
186 populations <- factor(rowLabels[popX],levels=rowLabels)
187 # return list of 2 objects
188 return(list('estimated'=fitted, 'pops'=populations))
189 } # end do_algorithm
190
191 # define OUTPUT embedded function
192 # except pop correlations
193 do_output <- function(estim, pops){
194 # set stdOut to sinkFile
195 if (save!=F) {
196 sinkFile <- nameIsFree(save)
197 sink(sinkFile, append=T, split=T)
198 }
199 #print(paste('Ncycles=',Ncycles,sep=' '))
200 #print target and estimation by col
201 dif <- estim - myTarget
202
203 #matrix_out <- rbind(myTarget, estim, dif)
204 #rownames(matrix_out)[2:3] <- c('fitted','dif')
205 #print(matrix_out)
206 # distance
207 dist1_2 <- sqrt(sum(dif^2))
208 dist1_2 <- dist1_2/PCT
209 print(paste('distance%=',round(100*dist1_2,4),sep=''))
210 write('',stdout())
211 # table percentages by pop
212 tgname <- row.names(myTarget)[1]
213 write(paste('\t',tgname),stdout())
214 write('',stdout())
215 tb <- table(pops)
216 tb <- tb[order(tb, decreasing=T)]
217 tb <- as.matrix(100*tb/Nbatch)
218
219 i <- 1
220 while(tb[i]>0)
221 {
222 i <- i+1
223 }
224 tb <- tb[1:(i-1), ,drop=FALSE]
225
226
227 write.table(tb,sep=',',quote=F,col.names=F,dec='.')
228 # reset sinkFile to stdOut
229 if (save!=F) {sink()}
230 } # end do_output
231
232 # MAIN code of getMonte
233 # set environment for embedded functions
234 # proces input
235 tempData <- X
236
237 myData <- tempData[rownames(tempData)!=omit,]
238
239if (length(targetfile)==1){
240NPOPS<-dim(X)[1]
241
242ID <- vector(length=NPOPS)
243ID[1:NPOPS] <- c(as.matrix(row.names(X)))
244
245
246myTarget <- X[which(ID==targetfile),]
247
248myData <- myData[-c(which(ID==targetfile)), ]
249}
250
251else{
252myTarget <-targetfile
253names(myTarget) <- c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9","PC10","PC11","PC12","PC13","PC14","PC15","PC16","PC17","PC18","PC19","PC20","PC21","PC22","PC23","PC24","PC25")
254myTarget <- t(myTarget)
255row.names(myTarget) <- " "
256}
257
258 check_formats(myData, myTarget)
259 check_omit(myData, omit) # single item distances
260 PCT <- ifelse(max(myData[1, ]>2), 100, 1)
261 #print('1. CLOSEST SINGLE ITEM DISTANCE%')
262 #print(nearestItems(myData, myTarget)*100/PCT)
263 #cat('\n')
264
265 # full table nMonte
266 print('nMONTE 1')
267 results <- do_algorithm(myData, myTarget)
268 fitted <- results$estimated
269 populations <- results$pops
270 do_output(fitted, populations)
271 cat('\n')
272
273 #print('CORRELATION OF ADMIXTURE POPULATIONS')
274 #tb <- table(populations)
275 #selFinal <- names(tb[tb>0])
276 #adFinal <- myData[selFinal,,drop=F]
277 # catch error
278 #if (nrow(adFinal)==1) {print('Only 1 population, no correlations.')}
279 #else {
280 # corPops <- cor(t(adFinal))
281 # round(corPops, digits=2)
282 #}
283
284} # end getMonte function
285
286#===================================utilities===================================
287#-------------------------------------------------------------------------------
288# function subset_data()
289# utility for selecting rows from datasheet
290# Use: subset_data('DavidMadeThis.csv', 'IselectedThis.csv' ,'Abkhasian', 'Adygei', 'Afanasievo', 'Altai_IA')
291# In: name of primary dataFile, name of output file, list of selected pops from primary datafile
292# Out: secondary datasheet with selected subset
293# USE WITH ONE SELECTED POPULATION TO CREATE TARGET FILE
294# Error message 1: non-existence or misspelled name of selected pop
295# Error message 2: no pops selected
296# Error message 3: output file exists, choose new name
297#-------------------------------------------------------------------------------
298subset_data <- function(dataFile, saveFile, ...) {
299 input <- read.csv(dataFile, head=T, row.names=1, stringsAsFactor=F)
300 selection <- list(...)
301 selError <- selection[!selection %in% rownames(input)]
302 # test selection
303 if (length(selError)>0) {
304 cat(paste(selError,' is not a valid rowname\n',sep='')) }
305 # output
306 else {output <- input[rownames(input) %in% selection,]
307 print(output) # print to screen
308 write.csv(output, nameIsFree(saveFile), quote=F) # save to file
309 }
310}
311
312#-------------------------------------------------------------------------------
313# function aggr_pops()
314# In the files of Davidski rowlabel has the form 'pop:ID'
315# This function drops the part after the colon
316# and collects the mean of the pop before the colon.
317# Use for mean: aggr_pops(fileName)
318# Use for median: aggr_pops(fileName, myFunc=median)
319#-------------------------------------------------------------------------------
320aggr_pops <- function(fileName, myFunc=mean) {
321 myData <- read.csv(fileName, head=T, row.names=1, stringsAsFactors=FALSE)
322 splitted <- strsplit(rownames(myData),split=':',fixed=T)
323 splitmat <- t(matrix(unlist(splitted), length(splitted[[1]]), length(splitted)))
324 aggrData <- aggregate(myData, by=list(splitmat[,1]),myFunc)
325 temp <- as.matrix(aggrData[,-1]); rownames(temp) <- aggrData[,1]
326 return(round(temp, 7))
327}
328
329#-------------------------------------------------------------------------------
330# function tab2comma()
331# Convert tab-separated csv to comma-separated csv
332# Use: tab2comma(tabFile,commaFile)
333#-------------------------------------------------------------------------------
334tab2comma <- function(tabFile,commaFile) {
335 data <- read.csv(tabFile, head=T, sep='\t', row.names=1, stringsAsFactor=F)
336 nameIsFree(commaFile)
337 write.csv(data, commaFile, row.names=T)
338 }
339
340#-------------------------------------------------------------------------------
341# function nearestItems()
342# Find n best matches.
343# Use: inData <- read file; inTarget <- read target
344# nearestItems(inData, inTarget, maxFits=8)
345# This is not the nearest neighbor algorithm;
346# when the number of items is smaller than maxFits,
347# functions returns all the items.
348#-------------------------------------------------------------------------------
349nearestItems <- function(inData, inTarget, maxFits=8) {
350 totArr <- rbind(inTarget, inData)
351 distMat <- as.matrix(dist(totArr, method='euclidean'))
352 dist1 <- distMat[,1]
353 sortDist <- dist1[order(dist1)]
354 nFits <- min(nrow(inData), maxFits)
355 return(sortDist[2:(nFits+1)])
356 }
357
358#==================================internal stuff===============================
359nameIsFree <- function(newFile) {
360 while (file.exists(newFile)) {
361 newFile <- readline('select new filename for saving (without quotes): ')
362 }
363 return(newFile)
364 }
365
366check_formats <- function(sheet, target) {
367 if (any(is.na(sheet))) {err_row <- as.integer(which(rowSums(is.na(sheet))>0))
368 print(sheet[err_row, ])
369 stop(paste('Missing value in row ',err_row))}
370 if (!identical(colnames(sheet), colnames(target)))
371 {print(colnames(sheet)); print(colnames(target))
372 stop('Colnames input not identical')}
373 }
374
375check_omit <- function(sheet, dropInfo) {
376 if (dropInfo != '' & !dropInfo %in% rownames(sheet)) {
377 print('!!!! WARNING: Request to omit unknown popName. !!!!')
378 }
379 }
380#*******************************************************
381# R script nMonte.R
382# Find mixture composition which minimizes
383# the averaged genetic distance to target.
384# Penalizing of distant admixtures.
385# Activate with: source('nMonte3_temp.R')
386# Use: getMonte(datafile, targetfile)
387# both files should be comma-separated csv.
388# Utilities:
389# subset_data(): Collecting rows from datasheet
390# aggr_pops(): Average populations
391# tab2comma(): tab-separated to comma-separated
392# last modified: headStrings
393# v10.4 Huijbregts 8 jan 2018
394#*******************************************************
395
396# default global constants
397batch_def = 500 # default rows of sample randomly drawn from data file
398cycles_def = 1000 # default number of cycles
399pen_def = 0.001 # default penalty
400
401# START OF GETMONTE FUNCTION
402getMonte3 <- function(targetfile,
403 omit='',Nbatch=batch_def,Ncycles=cycles_def,save=F,pen=pen_def) {
404
405 # define AlGORITHM embedded function
406 do_algorithm <- function(selection, targ) {
407 mySel <- as.matrix(selection, rownames.force = NA)
408 myTg <- as.matrix(targ, rownames.force = NA)
409 dif2targ <- sweep(mySel, 2, myTg, '-') # data - target
410 Ndata <- nrow(dif2targ)
411 kcol <- ncol(dif2targ)
412 rowLabels <- rownames(dif2targ)
413 # preallocate data
414 matPop <- matrix(NA_integer_, Nbatch, 1, byrow=T)
415 dumPop <- matrix(NA_integer_, Nbatch, 1, byrow=T)
416 matAdmix <- matrix(NA_real_, Nbatch, kcol, byrow=T)
417 dumAdmix <- matrix(NA_real_, Nbatch, kcol, byrow=T)
418 matPop <- sample(1:Ndata,Nbatch,replace=T)
419 # fill matPop with random row numbers 1:Ndata from datafile
420 matAdmix <- dif2targ[matPop,]
421 # iniatialize objective function
422 colM1 <- colMeans(matAdmix)
423 eval1 <- (1+pen) * sum(colM1^2)
424 # Ncycles iterations
425 for (c in 1:Ncycles) {
426 # fill batch data
427 dumPop <- sample(1:Ndata, Nbatch, replace=T)
428 dumAdmix <- dif2targ[dumPop,]
429 # loop thru batch
430 # penalty is squared distance of sample to target
431 # objective function =
432 # squared dist of batch mean to target + coef*penalty
433 # minimize objective function
434 for (b in 1:Nbatch) {
435 # test alternative pop
436 store <- matAdmix[b,]
437 matAdmix[b,] <- dumAdmix[b,]
438 colM2 <- colMeans(matAdmix)
439 eval2 <- sum(colM2^2) + pen*sum(matAdmix[b, ]^2)
440 # conditional adjust
441 if (eval2 <= eval1) {
442 matPop[b] <- dumPop[b]
443 colM1 <- colM2
444 eval1 <- eval2
445 } else {matAdmix[b,] <- store}
446 } # end batch
447 } # end cycles
448 # Collect output
449 # get fit of target
450 fitted <- t(colMeans(matAdmix) + myTg[1,])
451 # collect sampled populations
452 # split labels of reference samples
453 popl <- headStrings(rowLabels[matPop], mySep=':')
454 populations <- factor(popl)
455 # return list of 2 objects
456 return(list('estimated'=fitted, 'pops'=populations))
457 } # end do_algorithm
458
459 # define OUTPUT embedded function
460 # except pop correlations
461 do_output <- function(estim, pops){
462 # set stdOut to sinkFile
463 if (save!=F) {
464 sinkFile <- nameIsFree(save)
465 sink(sinkFile, append=T, split=T)
466 }
467 #print(paste('Ncycles=',Ncycles,sep=' '))
468 # print target and estimation by col
469 dif <- estim - myTarget
470 #matrix_out <- rbind(myTarget, estim, dif)
471 #rownames(matrix_out)[2:3] <- c('fitted','dif')
472 #print(matrix_out)
473 # distance
474 dist1_2 <- sqrt(sum(dif^2))
475 dist1_2 <- dist1_2/PCT
476 print(paste('distance%=',round(100*dist1_2,4),sep=''))
477 write('',stdout())
478 # table percentages by pop
479 tgname <- row.names(myTarget)[1]
480 write(paste('\t',tgname),stdout())
481 write('',stdout())
482 tb <- table(pops)
483 tb <- tb[order(tb, decreasing=T)]
484 tb <- as.matrix(100*tb/Nbatch)
485
486
487
488 write.table(tb,sep=',',quote=F,col.names=F,dec='.')
489 # reset sinkFile to stdOut
490 if (save!=F) {sink()}
491 } # end do_output
492
493 # MAIN code of getMonte
494 # set environment for embedded functions
495 # proces input
496 tempData <- X
497
498 myData <- tempData[rownames(tempData)!=omit,]
499if (length(targetfile)==1){
500NPOPS<-dim(X)[1]
501
502ID <- vector(length=NPOPS)
503ID[1:NPOPS] <- c(as.matrix(row.names(X)))
504
505myTarget <- X[which(ID==targetfile),]
506
507myData <- myData[-c(which(ID==targetfile)), ]
508}
509
510else{
511myTarget <-targetfile
512names(myTarget) <- c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9","PC10","PC11","PC12","PC13","PC14","PC15","PC16","PC17","PC18","PC19","PC20","PC21","PC22","PC23","PC24","PC25")
513myTarget <- t(myTarget)
514row.names(myTarget) <- " "
515}
516
517 check_formats(myData, myTarget)
518 check_omit(myData, omit) # single item distances
519 PCT <- ifelse(max(myData[1, ]>2), 100, 1)
520 #print('1. CLOSEST SINGLE ITEM DISTANCE%')
521 #print(nearestItems(myData, myTarget)*100/PCT)
522 #cat('\n')
523
524 # full table nMonte
525 print('nMONTE 3')
526 results <- do_algorithm(myData, myTarget)
527 fitted <- results$estimated
528 populations <- results$pops
529 do_output(fitted, populations)
530 cat('\n')
531
532 #print('CORRELATION OF ADMIXTURE POPULATIONS')
533 #tb <- table(populations)
534 #selFinal <- names(tb[tb>0])
535 #adFinal <- myData[selFinal,,drop=F]
536 # catch error
537 #if (nrow(adFinal)==1) {print('Only 1 population, no correlations.')}
538 #else {
539 # corPops <- cor(t(adFinal))
540 # round(corPops, digits=2)
541 #}
542
543} # end getMonte function
544#===================================utilities===================================
545#-------------------------------------------------------------------------------
546# function subset_data()
547# utility for selecting rows from datasheet
548# Use: subset_data('DavidMadeThis.csv', 'IselectedThis.csv' ,'Abkhasian', 'Adygei', 'Afanasievo', 'Altai_IA')
549# In: name of primary dataFile, name of output file, list of selected pops from primary datafile
550# Out: secondary datasheet with selected subset
551# USE WITH ONE SELECTED POPULATION TO CREATE TARGET FILE
552# Error message 1: non-existence or misspelled name of selected pop
553# Error message 2: no pops selected
554# Error message 3: output file exists, choose new name
555#-------------------------------------------------------------------------------
556subset_data <- function(dataFile, saveFile, ...) {
557 input <- read.csv(dataFile, head=T, row.names=1, stringsAsFactor=F)
558 selection <- list(...)
559 selError <- selection[!selection %in% rownames(input)]
560 # test selection
561 if (length(selError)>0) {
562 cat(paste(selError,' is not a valid rowname\n',sep='')) }
563 # output
564 else {output <- input[rownames(input) %in% selection,]
565 print(output) # print to screen
566 write.csv(output, nameIsFree(saveFile), quote=F) # save to file
567 }
568}
569
570#-------------------------------------------------------------------------------
571# function aggr_pops()
572# In the files of Davidski rowlabel has the form 'pop:ID'
573# This function drops the part after the colon
574# and collects the mean of the pop before the colon.
575# Use for mean: aggr_pops(fileName)
576# Use for median: aggr_pops(fileName, myFunc=median)
577#-------------------------------------------------------------------------------
578aggr_pops <- function(fileName, myFunc=mean) {
579 myData <- read.csv(fileName, head=T, row.names=1, stringsAsFactors=FALSE)
580 splitted <- headStrings(rownames(myData), mySep=':')
581 aggrData <- aggregate(myData, by=list(splitted), myFunc)
582 temp <- as.matrix(aggrData[,-1]); rownames(temp) <- aggrData[,1]
583 return(round(temp, 7))
584}
585
586#-------------------------------------------------------------------------------
587# function tab2comma()
588# Convert tab-separated csv to comma-separated csv
589# Use: tab2comma(tabFile,commaFile)
590#-------------------------------------------------------------------------------
591tab2comma <- function(tabFile,commaFile) {
592 data <- read.csv(tabFile, head=T, sep='\t', row.names=1, stringsAsFactor=F)
593 nameIsFree(commaFile)
594 write.csv(data, commaFile, row.names=T)
595 }
596
597#-------------------------------------------------------------------------------
598# function nearestItems()
599# Find n best matches.
600# Use: inData <- read file; inTarget <- read target
601# nearestItems(inData, inTarget, maxFits=8)
602# This is not the nearest neighbor algorithm;
603# when the number of items is smaller than maxFits,
604# functions returns all the items.
605#-------------------------------------------------------------------------------
606nearestItems <- function(inData, inTarget, maxFits=8) {
607 totArr <- rbind(inTarget, inData)
608 distMat <- as.matrix(dist(totArr, method='euclidean'))
609 dist1 <- distMat[,1]
610 sortDist <- dist1[order(dist1)]
611 nFits <- min(nrow(inData), maxFits)
612 return(sortDist[2:(nFits+1)])
613 }
614
615#==================================internal stuff===============================
616# split head from vector of strings, out vector of heads
617headStrings <- function(strVec, mySep=':') {
618 unlist(lapply(strsplit(strVec, mySep), function(x) x[1]))
619 }
620
621nameIsFree <- function(newFile) {
622 while (file.exists(newFile)) {
623 newFile <- readline('select new filename for saving (without quotes): ')
624 }
625 return(newFile)
626 }
627
628check_formats <- function(sheet, target) {
629 if (any(is.na(sheet))) {err_row <- as.integer(which(rowSums(is.na(sheet))>0))
630 print(sheet[err_row, ])
631 stop(paste('Missing value in row ',err_row))}
632 if (!identical(colnames(sheet), colnames(target)))
633 {print(colnames(sheet)); print(colnames(target))
634 stop('Colnames input not identical')}
635 }
636
637check_omit <- function(sheet, dropInfo) {
638 if (dropInfo != '' & !dropInfo %in% rownames(sheet)) {
639 print('!!!! WARNING: Request to omit unknown popName. !!!!')
640 }
641 }
642
643oracle <- function(Y,TABLE='table.txt',k=20,mode="single")
644 {
645if(mode=="single" | mode=="all")
646{
647single(Y,k)
648}
649
650if(mode=="2way" | mode=="all")
651{
652twoWay(Y,k)
653}
654
655if(mode=="fifty" | mode=="all")
656{
657fifty(Y,k)
658}
659
660if(mode=="nmonte1" | mode=="all")
661{
662getMonte(Y)
663}
664
665if(mode=="nmonte3" | mode=="all")
666{
667getMonte3(Y)
668}
669
670}