compute.ld <- function( haps, focus.i = 1:nrow(haps), variant.names = rownames( haps ) ) {
# haps is a 0-1 matrix with L SNPs (in rows) and N haplotypes (in columns).
# Since the values are 0, 1, we rely on the fact that a*b=1 iff a and b are 1.
# assume focus.i contains l values in range 1..L
L = nrow( haps )
l = length( focus.i )
# p11 = lxL matrix. i,jth entry is probability of 11 haplotype for ith focal SNP against jth SNP.
focus.hap = haps[ focus.i, , drop = FALSE ]
p11 <- ( focus.hap %*% t( haps )) / ncol( haps )
# p1. = lxL matrix. ith row is filled with the frequency of ith focal SNP.
p1. <- matrix( rep( rowSums( focus.hap ) / ncol( haps ), L ), length( focus.i ), L, byrow = FALSE )
# p.1 = lxL matrix. jth column is filled with the frequency of jth SNP.
frequency = rowSums( haps ) / ncol( haps )
p.1 <- matrix( rep( frequency, length( focus.i ) ), length( focus.i ), L, byrow = TRUE )
# Compute D
D <- p11 - p1. * p.1
# Compute D'
denominator = pmin( p1.*(1-p.1), (1-p1.)*p.1 )
wNeg = (D < 0)
denominator[ wNeg ] = pmin( p1.*p.1, (1-p1.)*(1-p.1) )[wNeg]
denominator[ denominator == 0 ] = NA
Dprime = D / denominator
# Compute correlation, this result should agree with cor( t(haps ))
R = D / sqrt( p1. * ( 1 - p1. ) * p.1 * ( 1 - p.1 ))
if( !is.null( variant.names )) {
rownames( D ) = rownames( Dprime ) = rownames( R ) = variant.names[ focus.i ]
colnames( D ) = colnames( Dprime ) = colnames( R ) = variant.names
names( frequency ) = variant.names
}
return( list( D = D, Dprime = Dprime, frequency = frequency, R = R ) ) ;
}