6n0WYiwK

· 6 years ago · Nov 11, 2019, 04:14 PM
1X <- read.table("k36ancient.txt", row.names=1, head = T, stringsAsFactors=T, na.strings=c('',' ','NA'))
2
3single <- function(Y,k){
4NPOPS<-dim(X)[1]
5
6ID <- vector(length=NPOPS) 
7ID[1:NPOPS] <- c(as.matrix(row.names(X)))
8
9X <- X[,1:36]
10
11if (length(Y)==1) 
12Y <- X[which(ID==Y),]
13
14YDIST<-vector(length=NPOPS)
15
16for (i in 1:NPOPS)
17YDIST[i] <- sqrt(sum((Y-X[i,])^2))
18
19
20ORDER<-order(YDIST)
21ORDER<-cbind(ID[ORDER[1:k]],round(YDIST[ORDER[1:k]],4))
22print(ORDER)
23}
24
25
26
27twoWay <- function(Y,k){
28NPOPS<-dim(X)[1]
29
30ID <- vector(length=NPOPS) 
31ID[1:NPOPS] <- c(as.matrix(row.names(X)))
32 
33
34 X <- X[,1:36] 
35
36if (length(Y)==1) 
37Y <- X[which(ID==Y),]
38
39
40YDIST<-vector(length=NPOPS+NPOPS*(NPOPS-1)/2)
41
42
43
44
45XDIST <- as.matrix(dist(X))
46
47for (i in 1:NPOPS)
48YDIST[i] <- sqrt(sum((Y-X[i,])^2))
49
50ID2 <- vector()
51YDIST2 <- vector()
52COUNT <- NPOPS
53
54		for (i in 1:(NPOPS-1))
55		for (j in (i+1):NPOPS) { 
56			COUNT<-COUNT+1
57			if (abs(YDIST[i]-YDIST[j])<XDIST[i,j] & YDIST[i]>0 & YDIST[j]>0) {
58				FRAC <- (XDIST[i,j]^2+YDIST[i]^2-YDIST[j]^2)/(2*XDIST[i,j]^2)
59				if (FRAC>=0 & FRAC<=1) { 
60					YDIST2[COUNT] <- sqrt(YDIST[i]^2-FRAC^2*XDIST[i,j]^2)
61					ID2[COUNT] <- paste(round(100*(1-FRAC),1),"% ",ID[i]," + ",round(100*FRAC,1),"% ",ID[j],sep=" ")
62					}
63				else 
64				YDIST2[COUNT]<- 100 
65				} 
66			else
67			YDIST2[COUNT]<- 100
68			}
69		
70
71ORDER<-order(YDIST2)
72ORDER<-cbind(ID2[ORDER[1:k]],round(YDIST2[ORDER[1:k]],4))
73print(ORDER)
74}
75
76
77
78fifty <- function (Y,k)
79 {
80 NPOPS<-dim(X)[1]
81 ID <- vector()
82ID[1:NPOPS] <- c(as.matrix(row.names(X)))
83 X <- X[,1:36] 
84
85if (length(Y)==1) 
86Y <- X[which(ID==Y),]
87
88
89YDIST<-vector()
90XDIST <- as.matrix(dist(X))
91
92for (i in 1:NPOPS)
93YDIST[i] <- sqrt(sum((Y-X[i,])^2))
94ID2 <- vector()
95YDIST2 <- vector()
96
97		COUNT <- NPOPS
98		for (i in 1:(NPOPS-1))
99		for (j in (i+1):NPOPS) {
100			COUNT<-COUNT+1
101			YDIST2[COUNT] <- sqrt(max(YDIST[i],YDIST[j])^2-(0.5)^2*XDIST[i,j]^2)
102			ID2[COUNT] <- paste("50%","% ",ID[i]," + ","50%","% ",ID[j])
103			}
104
105
106ORDER<-order(YDIST2) 
107ORDER<-cbind(ID2[ORDER[1:k]],round(YDIST2[ORDER[1:k]],4))
108print(ORDER)
109}
110
111
112
113#*******************************************************
114# R script nMonte.R
115# Find mixture composition which minimizes
116# the averaged genetic distance to target.
117# activate with: source('nMonte.R')
118# Use: getMonte(datafile,targetfile);
119# both files should be comma-separated csv.
120# Utilities:
121# subset_data(): Collecting rows from datasheet
122# aggr_pops(): Average populations
123# tab2comma(): tab-separated to comma-separated
124# last modified: averaging, median, percentage
125# v9.0 Huijbregts 29 nov 2017
126#*******************************************************
127
128# default global constants
129batch_0  = 1000    # default rows of sample randomly drawn from data file
130cycles_0 = 1000   # default number of cycles
131
132# START OF GETMONTE FUNCTION
133getMonte <- function(targetfile,omit='',Nbatch=batch_0,Ncycles=cycles_0,save=F) {
134    
135    # define AlGORITHM embedded function
136    do_algorithm <- function(selection, targ) {
137        mySel <- as.matrix(selection, rownames.force = NA)       
138        myTg <- as.matrix(targ, rownames.force = NA)
139        dif2targ <- sweep(mySel, 2, myTg, '-')    # data - target       
140        Ndata <- nrow(dif2targ)
141        kcol <- ncol(dif2targ)
142        rowLabels <- rownames(dif2targ)
143        # preallocate data
144        matPop    <- matrix(NA_integer_,  Nbatch, 1, byrow=T)
145        dumPop    <- matrix(NA_integer_,  Nbatch, 1, byrow=T) 
146        matAdmix  <- matrix(NA_real_, Nbatch, kcol, byrow=T)
147        dumAdmix  <- matrix(NA_real_, Nbatch, kcol, byrow=T)
148        # initialize data for first cycle
149        ## a. start with nearest neighbor: 
150        # distQQ <- rowSums(dif2targ^2)
151        # minPop <- as.integer(which(distQQ==min(distQQ))[1])
152        # matPop <- rep(minPop, Nbatch)        
153        # b. start with random pops:
154        matPop <- sample(1:Ndata,Nbatch,replace=T)        
155        # fill matPop with random row numbers 1:Ndata from datafile
156        matAdmix   <- dif2targ[matPop,]
157        colM1 <- colMeans(matAdmix)           # rowvector of column means
158        eval1 <- sum(colM1^2)    # sum of squared distances   
159        # Ncycles iterations
160        for (c in 1:Ncycles) {
161            # fill batch data
162            dumPop <- sample(1:Ndata, Nbatch, replace=T)
163            dumAdmix   <- dif2targ[dumPop,]
164            # loop thru batch
165            # minimize Eucl dist of batch mean to target
166            for (b in 1:Nbatch) {
167                # test alternative pop
168                store <- matAdmix[b,]
169                matAdmix[b,] <- dumAdmix[b,]
170                colM2 <- colMeans(matAdmix)
171                eval2 <- sum(colM2^2)
172                # conditional adjust
173                if (eval2 <= eval1) {
174                    matPop[b] <- dumPop[b]
175                    colM1 <- colM2
176                    eval1 <- eval2
177                } else {matAdmix[b,] <- store} 
178            } # end batch
179        } # end cycles
180        # Collect output
181        # get fit of target
182        fitted <- t(colMeans(matAdmix) + myTg[1,])
183        # collect sampled populations
184        popX<- matPop
185        # substute row numbers by row names
186        populations <- factor(rowLabels[popX],levels=rowLabels)
187        # return list of 2 objects
188        return(list('estimated'=fitted, 'pops'=populations))
189    }   # end do_algorithm    
190    
191    # define OUTPUT embedded function
192    # except pop correlations
193    do_output <- function(estim, pops){
194        # set stdOut to sinkFile
195        if (save!=F) {
196            sinkFile <- nameIsFree(save)            
197            sink(sinkFile, append=T, split=T)
198        }
199        #print(paste('Ncycles=',Ncycles,sep=' '))    
200        #print target and estimation by col
201        dif <- estim - myTarget
202	
203        #matrix_out <- rbind(myTarget, estim, dif)
204        #rownames(matrix_out)[2:3] <- c('fitted','dif')
205        #print(matrix_out)    
206        # distance
207        dist1_2 <- sqrt(sum(dif^2))
208        dist1_2 <- dist1_2/PCT
209        print(paste('distance%=',round(100*dist1_2,4),sep=''))
210        write('',stdout())
211        # table percentages by pop
212        tgname <- row.names(myTarget)[1]
213        write(paste('\t',tgname),stdout())
214        write('',stdout())
215        tb <- table(pops)
216        tb <- tb[order(tb, decreasing=T)]
217        tb <- as.matrix(100*tb/Nbatch)
218	
219	i <- 1
220	while(tb[i]>0)
221         {
222	i <- i+1
223	}
224	tb <- tb[1:(i-1), ,drop=FALSE]
225	
226
227        write.table(tb,sep=',',quote=F,col.names=F,dec='.')
228        # reset sinkFile to stdOut   
229        if (save!=F) {sink()}
230    }   # end do_output
231
232    # MAIN code of getMonte
233    # set environment for embedded functions
234    # proces input
235    tempData <- X
236
237    myData <- tempData[rownames(tempData)!=omit,]
238
239
240myTarget <-targetfile
241names(myTarget) <- c("Amerindian", "Arabian", "Armenian", "Basque", "Central_African", "Central_Euro", "East_African", "East_Asian", "East_Balkan", "East_Central_Asian", "East_Central_Euro", "East_Med", "Eastern_Euro", "Fennoscandian", "French", "Iberian", "Indo_Chinese", "Italian", "Malayan", "Near_Eastern", "North_African", "North_Atlantic", "North_Caucasian", "North_Sea", "Northeast_African", "Oceanian", "Omotic", "Pygmy", "Siberian", "South_Asian", "South_Central_Asian", "South_Chinese", "Volga_Ural", "West_African", "West_Caucasian", "West_Med")
242myTarget <- t(myTarget)
243row.names(myTarget) <- " "
244    
245    check_formats(myData, myTarget)
246    check_omit(myData, omit)    # single item distances
247    PCT <- ifelse(max(myData[1, ]>2), 100, 1)
248    #print('1. CLOSEST SINGLE ITEM DISTANCE%')
249    #print(nearestItems(myData, myTarget)*100/PCT)
250    #cat('\n')
251
252    # full table nMonte
253    print('nMONTE 1')
254    results <- do_algorithm(myData, myTarget)
255    fitted <- results$estimated
256    populations <- results$pops
257    do_output(fitted, populations)
258    cat('\n')
259    
260    #print('CORRELATION OF ADMIXTURE POPULATIONS')
261    #tb <- table(populations)
262    #selFinal <- names(tb[tb>0])
263    #adFinal <- myData[selFinal,,drop=F]
264    # catch error
265    #if (nrow(adFinal)==1) {print('Only 1 population, no correlations.')}
266    #else {
267    #    corPops <- cor(t(adFinal))
268    #    round(corPops, digits=2)
269    #}
270
271} # end getMonte function
272
273#===================================utilities===================================
274#-------------------------------------------------------------------------------
275# function subset_data()
276# utility for selecting rows from datasheet
277# Use: subset_data('DavidMadeThis.csv', 'IselectedThis.csv' ,'Abkhasian', 'Adygei', 'Afanasievo', 'Altai_IA')
278# In: name of primary dataFile, name of output file, list of selected pops from primary datafile
279# Out: secondary datasheet with selected subset
280# USE WITH ONE SELECTED POPULATION TO CREATE TARGET FILE
281# Error message 1: non-existence or misspelled name of selected pop
282# Error message 2: no pops selected
283# Error message 3: output file exists, choose new name     
284#-------------------------------------------------------------------------------
285subset_data <- function(dataFile, saveFile, ...) {
286    input <- read.csv(dataFile, head=T, row.names=1, stringsAsFactor=F)
287    selection <- list(...)
288    selError <- selection[!selection %in% rownames(input)]
289    # test selection
290    if (length(selError)>0) {
291        cat(paste(selError,' is not a valid rowname\n',sep='')) }
292    # output
293    else {output <- input[rownames(input) %in% selection,]
294          print(output)                                       # print to screen
295          write.csv(output, nameIsFree(saveFile), quote=F)    # save to file 
296         }
297}
298
299#-------------------------------------------------------------------------------
300# function aggr_pops()
301# In the files of Davidski rowlabel has the form 'pop:ID'
302# This function drops the part after the colon
303# and collects the mean of the pop before the colon.
304# Use for mean:   aggr_pops(fileName)
305# Use for median: aggr_pops(fileName, myFunc=median)
306#-------------------------------------------------------------------------------
307aggr_pops <- function(fileName, myFunc=mean) {
308    myData <- read.csv(fileName, head=T, row.names=1, stringsAsFactors=FALSE)
309    splitted <- strsplit(rownames(myData),split=':',fixed=T)
310    splitmat <- t(matrix(unlist(splitted), length(splitted[[1]]), length(splitted)))
311    aggrData <- aggregate(myData, by=list(splitmat[,1]),myFunc)
312    temp <- as.matrix(aggrData[,-1]); rownames(temp) <- aggrData[,1]
313    return(round(temp, 7))
314}
315
316#-------------------------------------------------------------------------------
317# function tab2comma()
318# Convert tab-separated csv to comma-separated csv
319# Use: tab2comma(tabFile,commaFile)
320#-------------------------------------------------------------------------------
321tab2comma <- function(tabFile,commaFile) {
322    data <- read.csv(tabFile, head=T, sep='\t', row.names=1, stringsAsFactor=F)
323    nameIsFree(commaFile)
324    write.csv(data, commaFile, row.names=T)
325    }
326
327#-------------------------------------------------------------------------------
328# function nearestItems()
329# Find n best matches.
330# Use: inData <- read file; inTarget <- read target
331#       nearestItems(inData, inTarget, maxFits=8)
332# This is not the nearest neighbor algorithm;
333# when the number of items is smaller than maxFits,
334# functions returns all the items.
335#-------------------------------------------------------------------------------
336nearestItems <- function(inData, inTarget, maxFits=8) {
337    totArr <- rbind(inTarget, inData)
338    distMat <- as.matrix(dist(totArr, method='euclidean'))
339    dist1 <- distMat[,1]
340    sortDist <- dist1[order(dist1)]
341    nFits <- min(nrow(inData), maxFits)
342    return(sortDist[2:(nFits+1)])    
343    }
344
345#==================================internal stuff===============================
346nameIsFree <- function(newFile) {
347    while (file.exists(newFile)) {
348        newFile <- readline('select new filename for saving (without quotes): ')
349        }
350    return(newFile)
351    }
352    
353check_formats <- function(sheet, target) {
354    if (any(is.na(sheet)))  {err_row <- as.integer(which(rowSums(is.na(sheet))>0))
355                            print(sheet[err_row, ])
356                            stop(paste('Missing value in row ',err_row))}
357    if (!identical(colnames(sheet), colnames(target)))
358        {print(colnames(sheet)); print(colnames(target))
359         stop('Colnames input not identical')}
360    }
361
362check_omit <- function(sheet, dropInfo) {
363    if (dropInfo != '' & !dropInfo %in% rownames(sheet)) {
364        print('!!!! WARNING: Request to omit unknown popName. !!!!')
365        }
366    }
367#*******************************************************
368# R script nMonte.R
369# Find mixture composition which minimizes
370# the averaged genetic distance to target.
371# Penalizing of distant admixtures.
372# Activate with: source('nMonte3_temp.R')
373# Use: getMonte(datafile, targetfile)
374# both files should be comma-separated csv.
375# Utilities:
376# subset_data(): Collecting rows from datasheet
377# aggr_pops(): Average populations
378# tab2comma(): tab-separated to comma-separated
379# last modified: headStrings 
380# v10.4 Huijbregts 8 jan 2018
381#*******************************************************
382
383# default global constants
384batch_def  =  500   # default rows of sample randomly drawn from data file
385cycles_def = 1000   # default number of cycles
386pen_def    = 0.001  # default penalty
387
388# START OF GETMONTE FUNCTION
389getMonte3 <- function(targetfile,
390            omit='',Nbatch=batch_def,Ncycles=cycles_def,save=F,pen=pen_def) {
391
392    # define AlGORITHM embedded function
393    do_algorithm <- function(selection, targ) {
394        mySel <- as.matrix(selection, rownames.force = NA)       
395        myTg <- as.matrix(targ, rownames.force = NA)
396        dif2targ <- sweep(mySel, 2, myTg, '-')    # data - target       
397        Ndata <- nrow(dif2targ)
398        kcol <- ncol(dif2targ)
399        rowLabels <- rownames(dif2targ)
400        # preallocate data
401        matPop    <- matrix(NA_integer_,  Nbatch, 1, byrow=T)
402        dumPop    <- matrix(NA_integer_,  Nbatch, 1, byrow=T) 
403        matAdmix  <- matrix(NA_real_, Nbatch, kcol, byrow=T)
404        dumAdmix  <- matrix(NA_real_, Nbatch, kcol, byrow=T)
405        matPop    <- sample(1:Ndata,Nbatch,replace=T)        
406        # fill matPop with random row numbers 1:Ndata from datafile
407        matAdmix   <- dif2targ[matPop,]
408        # iniatialize objective function         
409        colM1 <- colMeans(matAdmix)
410        eval1 <- (1+pen) * sum(colM1^2)  
411        # Ncycles iterations
412        for (c in 1:Ncycles) {
413            # fill batch data
414            dumPop <- sample(1:Ndata, Nbatch, replace=T)
415            dumAdmix   <- dif2targ[dumPop,]
416            # loop thru batch
417            # penalty is squared distance of sample to target
418            # objective function = 
419            #     squared dist of batch mean to target + coef*penalty
420            # minimize objective function 
421            for (b in 1:Nbatch) {
422                # test alternative pop
423                store <- matAdmix[b,]
424                matAdmix[b,] <- dumAdmix[b,]
425                colM2 <- colMeans(matAdmix)
426                eval2 <- sum(colM2^2) + pen*sum(matAdmix[b, ]^2)
427                # conditional adjust
428                if (eval2 <= eval1) {
429                    matPop[b] <- dumPop[b]
430                    colM1 <- colM2
431                    eval1 <- eval2
432                } else {matAdmix[b,] <- store}
433            } # end batch
434        } # end cycles
435        # Collect output
436        # get fit of target
437        fitted <- t(colMeans(matAdmix) + myTg[1,])
438        # collect sampled populations
439        # split labels of reference samples
440        popl <- headStrings(rowLabels[matPop], mySep=':')
441        populations <- factor(popl)
442        # return list of 2 objects
443        return(list('estimated'=fitted, 'pops'=populations))
444    }   # end do_algorithm    
445    
446    # define OUTPUT embedded function
447    # except pop correlations
448    do_output <- function(estim, pops){
449        # set stdOut to sinkFile
450        if (save!=F) {
451            sinkFile <- nameIsFree(save)            
452            sink(sinkFile, append=T, split=T)
453        }
454        #print(paste('Ncycles=',Ncycles,sep=' '))    
455        # print target and estimation by col
456        dif <- estim - myTarget
457        #matrix_out <- rbind(myTarget, estim, dif)
458        #rownames(matrix_out)[2:3] <- c('fitted','dif')
459        #print(matrix_out)    
460        # distance
461        dist1_2 <- sqrt(sum(dif^2))
462        dist1_2 <- dist1_2/PCT
463        print(paste('distance%=',round(100*dist1_2,4),sep=''))
464        write('',stdout())
465        # table percentages by pop
466        tgname <- row.names(myTarget)[1]
467        write(paste('\t',tgname),stdout())
468        write('',stdout())
469        tb <- table(pops)
470        tb <- tb[order(tb, decreasing=T)]
471        tb <- as.matrix(100*tb/Nbatch)
472
473	
474
475        write.table(tb,sep=',',quote=F,col.names=F,dec='.')
476        # reset sinkFile to stdOut   
477        if (save!=F) {sink()}
478    }   # end do_output
479
480    # MAIN code of getMonte
481    # set environment for embedded functions
482    # proces input
483    tempData <- X
484
485    myData <- tempData[rownames(tempData)!=omit,]
486
487
488myTarget <-targetfile
489names(myTarget) <- c("Amerindian", "Arabian", "Armenian", "Basque", "Central_African", "Central_Euro", "East_African", "East_Asian", "East_Balkan", "East_Central_Asian", "East_Central_Euro", "East_Med", "Eastern_Euro", "Fennoscandian", "French", "Iberian", "Indo_Chinese", "Italian", "Malayan", "Near_Eastern", "North_African", "North_Atlantic", "North_Caucasian", "North_Sea", "Northeast_African", "Oceanian", "Omotic", "Pygmy", "Siberian", "South_Asian", "South_Central_Asian", "South_Chinese", "Volga_Ural", "West_African", "West_Caucasian", "West_Med")
490myTarget <- t(myTarget)
491row.names(myTarget) <- " "
492    
493    check_formats(myData, myTarget)
494    check_omit(myData, omit)    # single item distances
495    PCT <- ifelse(max(myData[1, ]>2), 100, 1)
496    #print('1. CLOSEST SINGLE ITEM DISTANCE%')
497    #print(nearestItems(myData, myTarget)*100/PCT)
498    #cat('\n')
499
500    # full table nMonte
501    print('nMONTE 3')
502    results <- do_algorithm(myData, myTarget)
503    fitted <- results$estimated
504    populations <- results$pops
505    do_output(fitted, populations)
506    cat('\n')
507    
508    #print('CORRELATION OF ADMIXTURE POPULATIONS')
509    #tb <- table(populations)
510    #selFinal <- names(tb[tb>0])
511    #adFinal <- myData[selFinal,,drop=F]
512    # catch error
513    #if (nrow(adFinal)==1) {print('Only 1 population, no correlations.')}
514    #else {
515    #    corPops <- cor(t(adFinal))
516    #    round(corPops, digits=2)
517    #}
518
519} # end getMonte function
520#===================================utilities===================================
521#-------------------------------------------------------------------------------
522# function subset_data()
523# utility for selecting rows from datasheet
524# Use: subset_data('DavidMadeThis.csv', 'IselectedThis.csv' ,'Abkhasian', 'Adygei', 'Afanasievo', 'Altai_IA')
525# In: name of primary dataFile, name of output file, list of selected pops from primary datafile
526# Out: secondary datasheet with selected subset
527# USE WITH ONE SELECTED POPULATION TO CREATE TARGET FILE
528# Error message 1: non-existence or misspelled name of selected pop
529# Error message 2: no pops selected
530# Error message 3: output file exists, choose new name     
531#-------------------------------------------------------------------------------
532subset_data <- function(dataFile, saveFile, ...) {
533    input <- read.csv(dataFile, head=T, row.names=1, stringsAsFactor=F)
534    selection <- list(...)
535    selError <- selection[!selection %in% rownames(input)]
536    # test selection
537    if (length(selError)>0) {
538        cat(paste(selError,' is not a valid rowname\n',sep='')) }
539    # output
540    else {output <- input[rownames(input) %in% selection,]
541          print(output)                                       # print to screen
542          write.csv(output, nameIsFree(saveFile), quote=F)    # save to file 
543         }
544}
545
546#-------------------------------------------------------------------------------
547# function aggr_pops()
548# In the files of Davidski rowlabel has the form 'pop:ID'
549# This function drops the part after the colon
550# and collects the mean of the pop before the colon.
551# Use for mean:   aggr_pops(fileName)
552# Use for median: aggr_pops(fileName, myFunc=median)
553#-------------------------------------------------------------------------------
554aggr_pops <- function(fileName, myFunc=mean) {
555    myData <- read.csv(fileName, head=T, row.names=1, stringsAsFactors=FALSE)
556    splitted <- headStrings(rownames(myData), mySep=':') 
557    aggrData <- aggregate(myData, by=list(splitted), myFunc)
558    temp <- as.matrix(aggrData[,-1]); rownames(temp) <- aggrData[,1]
559    return(round(temp, 7))
560}
561
562#-------------------------------------------------------------------------------
563# function tab2comma()
564# Convert tab-separated csv to comma-separated csv
565# Use: tab2comma(tabFile,commaFile)
566#-------------------------------------------------------------------------------
567tab2comma <- function(tabFile,commaFile) {
568    data <- read.csv(tabFile, head=T, sep='\t', row.names=1, stringsAsFactor=F)
569    nameIsFree(commaFile)
570    write.csv(data, commaFile, row.names=T)
571    }
572
573#-------------------------------------------------------------------------------
574# function nearestItems()
575# Find n best matches.
576# Use: inData <- read file; inTarget <- read target
577#       nearestItems(inData, inTarget, maxFits=8)
578# This is not the nearest neighbor algorithm;
579# when the number of items is smaller than maxFits,
580# functions returns all the items.
581#-------------------------------------------------------------------------------
582nearestItems <- function(inData, inTarget, maxFits=8) {
583    totArr <- rbind(inTarget, inData)
584    distMat <- as.matrix(dist(totArr, method='euclidean'))
585    dist1 <- distMat[,1]
586    sortDist <- dist1[order(dist1)]
587    nFits <- min(nrow(inData), maxFits)
588    return(sortDist[2:(nFits+1)])    
589    }
590
591#==================================internal stuff===============================
592# split head from vector of strings, out vector of heads
593headStrings <- function(strVec, mySep=':') {
594        unlist(lapply(strsplit(strVec, mySep), function(x) x[1]))
595        }
596
597nameIsFree <- function(newFile) {
598    while (file.exists(newFile)) {
599        newFile <- readline('select new filename for saving (without quotes): ')
600        }
601    return(newFile)
602    }
603    
604check_formats <- function(sheet, target) {
605    if (any(is.na(sheet)))  {err_row <- as.integer(which(rowSums(is.na(sheet))>0))
606                            print(sheet[err_row, ])
607                            stop(paste('Missing value in row ',err_row))}
608    if (!identical(colnames(sheet), colnames(target)))
609        {print(colnames(sheet)); print(colnames(target))
610         stop('Colnames input not identical')}
611    }
612
613check_omit <- function(sheet, dropInfo) {
614    if (dropInfo != '' & !dropInfo %in% rownames(sheet)) {
615        print('!!!! WARNING: Request to omit unknown popName. !!!!')
616        }
617    }
618
619oracle <- function(Y,TABLE='table.txt',k=20,mode="single") 
620 {
621if(mode=="single" | mode=="all")
622{
623single(Y,k)
624}
625
626if(mode=="2way" | mode=="all")
627{
628twoWay(Y,k)
629}
630
631if(mode=="fifty" | mode=="all")
632{
633fifty(Y,k)
634}
635
636if(mode=="nmonte1" | mode=="all")
637{
638getMonte(Y)
639}
640
641if(mode=="nmonte3" | mode=="all")
642{
643getMonte3(Y)
644}
645
646}