/*********************************************************
** Copyright (c) 2005
** University of Washington
** Licensed under the terms set forth by University of
** Washington. If you did not sign such a license, you
** are using this software/code illegally and you do not
** have permission to use, modify, or redistribute
** this or any files in this software package.
**
** File: VectorDataFunctions.cpp
**
**********************************************************/
#include "VectorDataFunctions.h"
#include <cmath>
#include "SafeInt.h"
#include "ClusterException.h"

#define ROUNDERROR_FACTOR 0.0000000001

void* CAddVectors::CombineData(CClusterNode* pNode1, CClusterNode* pNode2)
{
  if (NULL == pNode1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode1 is NULL");
  if (NULL == pNode2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode2 is NULL");
  
  vector<double>* pData1 = (vector<double>*)pNode1->GetData();
  vector<double>::iterator i1 = pData1->begin();
  vector<double>* pData2 = (vector<double>*)pNode2->GetData();
  vector<double>::iterator i2 = pData2->begin();
  vector<double>* pResult = new vector<double>;
  
  if (pData1->size() != pData2->size())
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "pData1->size() != pData2->size()");
  
  for(; i1 != pData1->end() ; i1++, i2++)
    {
      pResult->push_back(*i1+*i2);
    }
  return pResult;
}

CCorrelation::CCorrelation(int iDataCount)
{
  int iNodeCount = 2*iDataCount - 1;
  m_pComputed = new char[iNodeCount];
  memset((void*)m_pComputed, 0, iNodeCount*sizeof(char));
  m_pAv = new double[iNodeCount];
  m_pStdDev = new double[iNodeCount];

  m_memReq = CSafeUIntMax::Multiply((sizeof(char) + 2*sizeof(double)), iNodeCount);
}

CCorrelation::~CCorrelation()
{
  delete [] m_pComputed;
  delete [] m_pAv;
  delete [] m_pStdDev;
}

double CCorrelation::Distance(CClusterNode* pNode1, CClusterNode* pNode2)
{
  // Find the correlation between two vectors
  double d = 1.0 - GetCorrelation(pNode1, pNode2);
  if(!(d >= 0.0 && d <= 2.0))
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "distance was out of the range [0,2]");

  return d;
}

double CCorrelation::GetCorrelation(CClusterNode* pNode1, CClusterNode* pNode2)
{
  if (NULL == pNode1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode1 is NULL");
  if (NULL == pNode2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode2 is NULL");

  TDoubleVector* pData1 = (TDoubleVector*)pNode1->GetData();
  TDoubleVector* pData2 = (TDoubleVector*)pNode2->GetData();

  if (NULL == pData1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData1 is NULL");
  if (NULL == pData2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData2 is NULL");
  if (pData1->size() != pData2->size())
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "pData1->size() != pData2->size()");

  // get the average of each vector (TODO: cache these somewhere)
  double avg1;
  double avg2;
  double s1;
  double s2;
  GetAverageAndStdDev(pNode1, pData1, &avg1, &s1);
  GetAverageAndStdDev(pNode2, pData2, &avg2, &s2);

  TDoubleVector::iterator i1 = pData1->begin();
  TDoubleVector::iterator i2 = pData2->begin();

  double corr = 0;
  for(;i1 != pData1->end(); i1++, i2++)
    {
     corr += ( ((*i1) - avg1)* ((*i2) - avg2) );
    }
  
  corr /= (s1*s2);
  
  // Account for rounding error
  double abscorr = abs(corr);
  if(abscorr > 1.0 && abscorr < (1.0+ROUNDERROR_FACTOR))
    corr = (corr < 0 ? -1.0 : 1.0);

  if(!(corr >= -1.0 && corr <= 1.0))
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "correlation value was outside the range (-1, 1)");

  return corr;
}

/*
 * GetStandardDeviation
 * Gets the standard deviation of a vector of data, according to the formula
 * std dev = sqrt( sum[ (xi - avg)^2 ] )
 *
 * Note: Standard deviation is not divided by n-1.
 *
 */
double CCorrelation::GetStandardDeviation(TDoubleVector* pData, double avg)
{
  if (NULL == pData)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData is NULL");

  double s = 0;
  for(TDoubleVector::iterator i = pData->begin() ; i != pData->end() ; i++)
    {
      s += pow(((*i) - avg),2);
    }

  //  s /= (pData->size() - 1);
  
  return sqrt(s);
}

double CCorrelation::GetAverage(TDoubleVector* pData)
{
  if (NULL == pData)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData is NULL");

  double sum = 0;
  for(TDoubleVector::iterator i = pData->begin() ; i != pData->end() ; i++)
    {
      sum += *i;
    }
  
  double avg = sum / pData->size();

  return avg ;
}

void CCorrelation::GetAverageAndStdDev(CClusterNode* pNode, TDoubleVector* pData, double* pAvg, double* pStdDev)
{
  if (NULL == pData)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData is NULL");
  if (NULL == pNode)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode is NULL");

  int index = pNode->GetIndex();
  if(Computed(pNode))
    {
      *pAvg = m_pAv[index];
      *pStdDev = m_pStdDev[pNode->GetIndex()];
    }

  m_pAv[index] = *pAvg = GetAverage(pData);
  m_pStdDev[index] = *pStdDev = GetStandardDeviation(pData, *pAvg);
  m_pComputed[index] = 1;
}

void* CCentroidLinkage::CombineData(CClusterNode* pNode1, CClusterNode* pNode2)
{
  if (NULL == pNode1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode1 is NULL");
  if (NULL == pNode2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode2 is NULL");
  TDoubleVector* pData1 = (TDoubleVector*)pNode1->GetData();
  TDoubleVector* pData2 = (TDoubleVector*)pNode2->GetData();

  if (NULL == pData1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData1 is NULL");
  if (NULL == pData2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData2 is NULL");
  if (pData1->size() != pData2->size())
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "pData1->size() != pData2->size()");

  int count1 = pNode1->IsLeaf() ? 1 : pNode1->GetLeafCount();
  int count2 = pNode2->IsLeaf() ? 1 : pNode2->GetLeafCount();
  int totalCount = count1+count2;

  TDoubleVector::iterator i1 = pData1->begin();
  TDoubleVector::iterator i2 = pData2->begin();
  double avg;
  TDoubleVector* pNewData = new TDoubleVector();

  for(;i1 != pData1->end(); i1++, i2++)
    {
      avg = (count1 * (*i1) + count2 * (*i2)) / totalCount;
      pNewData->push_back( avg );
    }

  return (void*)pNewData;
}


double CEuclidean::Distance(CClusterNode* pNode1, CClusterNode* pNode2)
{
  if (NULL == pNode1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode1 is NULL");
  if (NULL == pNode2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode2 is NULL");
  TDoubleVector* pData1 = (TDoubleVector*)pNode1->GetData();
  TDoubleVector* pData2 = (TDoubleVector*)pNode2->GetData();

  if (NULL == pData1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData1 is NULL");
  if (NULL == pData2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pData2 is NULL");
  if (pData1->size() == 0)
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "pData1->size() == 0");
  if (pData1->size() != pData2->size())
    __throw_cluster_ex(CLUSTEX_OUTOFRANGE, "pData1->size() != pData2->size()");

  TDoubleVector::iterator i1 = pData1->begin();
  TDoubleVector::iterator i2 = pData2->begin();

  double sum = 0;
  for(;i1 != pData1->end(); i1++, i2++)
    {
      sum += pow(*i2 - *i1, 2);
    }
  
  sum = sqrt(sum);
  return sum;
}

uintmax_t CAddVectors::GetMemoryRequirement()
{
  return 2*sizeof(TDoubleVector::iterator) + 10*sizeof(double);
}

uintmax_t CCentroidLinkage::GetMemoryRequirement()
{
  return 2*sizeof(TDoubleVector::iterator) + 10*sizeof(double);
}

uintmax_t CEuclidean::GetMemoryRequirement()
{
  return 2*sizeof(TDoubleVector::iterator) + 10*sizeof(double);
}

uintmax_t CCorrelation::GetMemoryRequirement()
{
  return m_memReq;
}
