/*********************************************************
** Copyright (c) 2005
** University of Washington
** Licensed under the terms set forth by University of
** Washington. If you did not sign such a license, you
** are using this software/code illegally and you do not
** have permission to use, modify, or redistribute
** this or any files in this software package.
**
** File: BasicCluster.cpp
**
**********************************************************/
#include "BasicCluster.h"
#include "ClusterException.h"
#include "Perf.h"
#include "MsgIds.h"
#include "SafeInt.h"

#include <stdexcept>
using namespace std;

#define MEMORY_MSG "Not enough memory to complete clustering."

CBasicCluster::~CBasicCluster()
{
  if (NULL != m_ppNodes)
    delete m_ppNodes;
  if (NULL != m_pDistances)
    delete m_pDistances;
}

CMatrix* CBasicCluster::GetDistanceMatrix()
{
  return m_pDistances;
}

CClusterNode* CBasicCluster::Cluster(void **ppData, int iDataCount) 
{
  // The number of nodes that will ultimately be in the cluster tree.
  // size: sizeof(int)
  int iNodeCount = 2* iDataCount - 1;

  if (NULL != m_ppNodes)
    {
      throw new CClusterException(CLUSTEX_REUSEDMETHOD, 
				  "cannot call Cluster more than once");
    }

  // size: (sizeof(CClusterNode*) + data size)*iNodeCount
  __catch_badalloc(m_ppNodes = new CClusterNode*[iNodeCount]);
  if (NULL == m_ppNodes)
    __throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);

  char* used = NULL;
  // size: sizeof(char)*iNodeCount
  __catch_badalloc(used = new char[iNodeCount]);
  //(char*)malloc(sizeof(char)*iNodeCount);
  if (NULL == used)
    __throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);

  // initialize "used" tree array to zero
  memset(used, 0, sizeof(char)*iNodeCount);

  MARK_TIME("Create single clusters");
  // Create CClusterNode objects around each data item
  for(int i = 0 ; i < iDataCount; i++)
    {
      CClusterNode *pNode = NULL;
      // size: sizeof(ClusterNode)
      try {
	pNode = new CClusterNode(ppData[i], i);
      }
      catch(bad_alloc&) {
	__throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);
      }
      catch(invalid_index&) {
	__throw_cluster_ex(CLUSTEX_OUTOFRANGE, "Invalid index when creating cluster node.");
      }
      if (NULL == pNode)
	__throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);
      SetNodeByIndex(i, pNode);
    }

  CDistanceFunction* pDist = GetDistanceFunction();
  if (pDist == 0)
    {
      throw new CClusterException(CLUSTEX_MISSINGDISTANCEFUN,
				  "no distance function provided");
    }

  // size: CUpperDiagonalMatrix::GetMemoryRequirement(iNodeCount)
  __catch_badalloc(m_pDistances = new CUpperDiagonalMatrix(iNodeCount));
  if (NULL == m_pDistances)
    __throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);

  CClusterNode *ti, *tj;
  double fDist;
  int iCu = 0, iTotal = iDataCount*iDataCount / 2;
  double current = 0, total = iTotal;

  MARK_TIME("Create distance matrix");
  double fMinDist = -1; int mini = -1, minj = -1;
  /* Create distance "matrix", saving the initial minimum */
  for(int i = 0; i < iDataCount; i++)
    {
      ti = GetNodeByIndex(i);
      for(int j = i+1; j < iDataCount; j++, current++, iCu++)
	{
	  if(0 == iCu % 1000)
	    {
	      DoCallback(total, current, BCLUST_CREATE_DISTANCE_MATRIX);
	      iCu = 0;
	    }

	  // get the distance measure between data point i and data point j
	  tj = GetNodeByIndex(j);
	  fDist = pDist->Distance(ti, tj);
	  if (fMinDist < 0 || fDist < fMinDist)
	    {
	      fMinDist = fDist;
	      mini = i;
	      minj = j;
	      }

	  m_pDistances->SetValue(i, j, fDist);
	}
    }

  MARK_TIME("Begin cluster creation");
  
  int iCurrent = iDataCount;

  // need to set the "data" of the new node

  bool searchMin = true;
  /* Put the first two "clusters" together */
  /* Store the new "cluster" at the end of the m_pCosts array */
  void* pNewData;
  iCu = 0; 
  iTotal = iDataCount*(iDataCount - 1) + (iDataCount - 1)*(iDataCount) / 2;
  current = 0; total = iTotal;
  for(;iCurrent < iNodeCount-1 ; iCurrent++)
    {
      if (mini < 0)
	__throw_cluster_ex(CLUSTEX_OUTOFRANGE, "mini < 0");
      if (minj < 0)
	__throw_cluster_ex(CLUSTEX_OUTOFRANGE, "minj < 0");

      pNewData = GetCombineDataFunction()->CombineData
		       (GetNodeByIndex(mini), 
			GetNodeByIndex(minj));

      // size: sizeof(ClusterNode*) + data size
      CClusterNode* pNode = NULL;
      __catch_badalloc(pNode = new CClusterNode(pNewData, iCurrent, 
						GetNodeByIndex(mini), 
						GetNodeByIndex(minj)));
      if (NULL == pNode)
	__throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);
      SetNodeByIndex(iCurrent, pNode);
      used[mini] = (used[minj] = 1); 

      searchMin = true;
      // compute the distance from the new node to all of the other nodes
      for(int i = 0; i < iCurrent ; i++, current++, iCu++)
	{
	  if(0 == iCu % 1000)
	    {
	      DoCallback(total, current, BCLUST_CREATE_CLUSTERS);
	      iCu = 0;
	    }

	  if (used[i] == 0)
	    {
	      fDist =  pDist->Distance(GetNodeByIndex(i), 
				       GetNodeByIndex(iCurrent));
	      m_pDistances->SetValue(i, iCurrent, fDist);
	      //m_mapDist.insert(pair<double, pair<int,int> >(fDist, pair<int, int>(i, iCurrent)));
	      /* keep track in case there is a new minimum */
	      
		if (fMinDist < 0 || fDist < fMinDist)
		{
		  fMinDist = fDist;
		  mini = i;
		  minj = iCurrent;
		  searchMin = false;
		}
	      
	      }
	}

      // find the new minimum distance
      if (searchMin)
	{
	  fMinDist = -1;
	  for (int i = 0; i < iCurrent ; i++)
	    {
	      if (used[i] == 0)
		{
		  for ( int j = i+1; j <= iCurrent ; j++)
		    {
		      if (used[j] == 0)
			{
			  fDist = m_pDistances->GetValue(i, j);
			  if (fMinDist < 0 || fDist < fMinDist)
			    {
			      fMinDist = fDist;
			      mini = i;
			      minj = j;
			    }
			}
		    }
		}
	    }
	}
  

    }
  pNewData = GetCombineDataFunction()->CombineData(GetNodeByIndex(mini), 
						   GetNodeByIndex(minj));

  // Last node to be added is the root of the clustering tree.
  CClusterNode* pRoot = NULL;
  __catch_badalloc(pRoot = new CClusterNode(pNewData, iCurrent, 
					    GetNodeByIndex(mini), 
					    GetNodeByIndex(minj)));
  if (NULL == pRoot)
    __throw_cluster_ex(CLUSTEX_OUTOFMEMORY, MEMORY_MSG);

  SetNodeByIndex(iCurrent, pRoot);

  DoCallback(total, total, BCLUST_CREATE_CLUSTERS);
  MARK_TIME("Done clustering.");

  return pRoot;
}


double CBasicCluster::GetCachedDistance(CClusterNode* pNode1, CClusterNode* pNode2)
{
  if (NULL == pNode1)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode1 is NULL");
  if (NULL == pNode2)
    __throw_cluster_ex(CLUSTEX_NULLARG, "pNode2 is NULL");
  return m_pDistances->GetValue(pNode1->GetIndex(), pNode2->GetIndex());
}

CClusterNode* CBasicCluster::GetNodeByIndex(int index)
{
  return m_ppNodes[index];
}

void CBasicCluster::SetNodeByIndex(int index, CClusterNode *pNode)
{
  m_ppNodes[index] = pNode;
}

/* Returns number of bytes required to cluster data with given size & count */
uintmax_t CBasicCluster::GetMemoryRequirement(uintmax_t iDataSize, uintmax_t iDataCount)
{
  // There are iNodeCount = 2*iDataCount -1 nodes in the clustering tree
  uintmax_t iNodeCount = 2 * iDataCount - 1;

  uintmax_t memReq = CUpperDiagonalMatrix::GetMemoryRequirement(uintmax_t(iNodeCount));
  uintmax_t size1 = CSafeUIntMax::Add(sizeof(CClusterNode) + sizeof(CClusterNode*), iDataSize);
  size1 = CSafeUIntMax::Multiply(size1, iNodeCount);
  memReq = CSafeUIntMax::Add(memReq, size1);
  size1 = CSafeUIntMax::Multiply(iNodeCount, sizeof(char));
  memReq = CSafeUIntMax::Add(memReq, size1);
  memReq = CSafeUIntMax::Add(memReq, sizeof(double)*50);
  memReq = CSafeUIntMax::Add(memReq, (NULL == GetDistanceFunction() ? 0 : GetDistanceFunction()->GetMemoryRequirement()));
  memReq = CSafeUIntMax::Add(memReq, (NULL == GetCombineDataFunction() ? 0 : GetCombineDataFunction()->GetMemoryRequirement()));

  return memReq;

  /*
  // For the matrix, ask UpperDiagonalMatrix for its size
  return 
  // For the clustering tree, each node requires
  //      sizeof(ClusterNode)
  //    + sizeof(ClusterNode*) [for the pointer from the parent]
  //    + data size
    + 
  // Cluster also allocates a character array with iNodeCount chars
    + sizeof(char)*iNodeCount
  // Add an educated fudge factor for other memory allocated by the function
    + sizeof(double)*50
  // Add memory requirements from distance function & combine data function
    + (NULL == GetDistanceFunction() ? 0 : GetDistanceFunction()->GetMemoryRequirement())
    + (NULL == GetCombineDataFunction() ? 0 : GetCombineDataFunction()->GetMemoryRequirement())
    ;*/
}

