!     Last change: JD 18 Nov 2007.
!     Author: Jonas Debosscher
!     Contact: jonas@ster.kuleuven.be
      Program GMCLASS
!-------------------------------------
!     Declarations.
!-------------------------------------
      parameter (idef=10)
      parameter (iobj=20)
      parameter (init=30)
      parameter (iout=40)


      character*50 deffile
      character*50 objfile
      character*50 initfile
      character*50 outfile

      parameter (MAXCLASS=50)
      parameter (MAXOBJ=400000)
      parameter (MAXNATTS=50)

      character*100 DEFATTFORMAT,ATTFORMAT
      character*10 ATTDS(MAXNATTS)
      character*10 CLASSATTDS(MAXNATTS)
      integer CLASSATTNUM(MAXNATTS)

      character*6 CLASSCODE(MAXCLASS),CLASS


      integer NCLASS,NDEFOBJ,NOBJ,NATT,NCLASSATT
      integer NATTMT

      character*50 DEFOBJNAMES(MAXOBJ)
      character*50 OBJNAMES(MAXOBJ)
      character*6 DEFOBJCLASS(MAXOBJ)

      real*8 DEFATTS(MAXOBJ,MAXNATTS)
      real*8 ATTS(MAXOBJ,MAXNATTS)
      real*8 CLASSDEFATTS(MAXOBJ,MAXNATTS)
      real*8 CLASSATTS(MAXOBJ,MAXNATTS)
      real*8 CLASSATTS1(MAXNATTS)
      integer CLASSNDEFOBJ(MAXCLASS)
      real*8 DEFCLASSDAT(MAXOBJ,MAXNATTS)
      integer itel,itottel
      real*8 mean(MAXNATTS)
      real*8 classmean(MAXCLASS,MAXNATTS)
      real*8 S(MAXNATTS,MAXNATTS)
      integer INDX1(MAXNATTS)
      real*8 D,DET(MAXCLASS)
      real*8 INVCOV(MAXCLASS,MAXNATTS,MAXNATTS)
      real*8 INVS(MAXNATTS,MAXNATTS)
      real*8 TEMP(MAXNATTS,MAXNATTS)
      real*8 TEST(MAXNATTS,MAXNATTS)
      real*8 dist,sigdist,minsigdist,prob,mindist,maxprob
      real*8 C(MAXNATTS,MAXNATTS)

      real*8 AMP(MAXNATTS),PHASEDIFF(MAXNATTS)

      integer objtel
      integer CONF(MAXCLASS)
      real*8 CLASSPROB(MAXCLASS),CLASSPROBS(MAXCLASS)
      real*8 CLASSDIST(MAXCLASS),CLASSDISTS(MAXCLASS)
      character*6 CLASSCODES(MAXCLASS)
      real*8 PROBSUM

*****************************************************************************
! Main Program.
*****************************************************************************
!---------------------------------------------------------------------------
! Command line arguments.
!---------------------------------------------------------------------------
! Filename of the training set.
      Call getarg(1,deffile)
! Filename of the dataset to be classified.
      Call getarg(2,objfile)
! Initialization file.
      Call getarg(3,initfile)
! Output filename for classification results.
      Call getarg(4,outfile)
!---------------------------------------------------------------------------
! Reading the training set (check the format!).
!---------------------------------------------------------------------------
! Optional screen output.
      write(6,*)'Reading the class definitions...'
! Open the initialization file.
      open(init,file=initfile,status='old')
! Read the number of classes considered for the classification and test if not too many.
      read(init,*)NCLASS
      if(NCLASS.gt.MAXCLASS)STOP 'NCLASS > MAXCLASS'
! Read the classcodes.
      Do i=1,NCLASS
       read(init,fmt='(a6)')CLASSCODE(i)
      end do
! Read the total number of attributes (number of columns for every objectline) and test if not too many.
      read(init,*)NATT
      if(NATT.gt.MAXNATTS)STOP 'NATT > MAXNATTS'
! Number of all the numeric attributes (objectname and classcode now exluded).
      NATTMT=NATT-2
! Read the short descriptions of the attributes.
      Do i=1,NATT
       read(init,fmt='(a10)')ATTDS(i)

      end do
! Number of attributes that will be used for classifying.
      read(init,*)NCLASSATT
! Read the short descriptions of the attributes that will be used for classifying.
      Do i=1,NCLASSATT
       read(init,fmt='(a10)')CLASSATTDS(i)
      end do
! Format of the lines with training objects.
      read(init,fmt='(a100)')DEFATTFORMAT
! Format of the lines with objects to be classified.
      read(init,fmt='(a100)')ATTFORMAT
! Close the initialization file.
      close(init)
! Open the file with the training set.
      open(idef,file=deffile,status='old')
! Read all the attributes for the training objects (one line per object).
      Do i=1,MAXOBJ
       read(idef,fmt=DEFATTFORMAT,end=10)
     &  DEFOBJNAMES(i),(DEFATTS(i,j),j=1,NATTMT),DEFOBJCLASS(i)
      end do
! Total number of training objects.
   10 NDEFOBJ=i-1
! Closing the file with the training set.
      close(idef)
! Test if not too many objects.
      if(NDEFOBJ.eq.MAXOBJ)STOP 'NDEFOBJ = MAXOBJ'
! Find and store the numbers of the attributes that will be used for classifying.
      Do i=1,NCLASSATT
       Do j=1,NATT
        if(CLASSATTDS(i).eq.ATTDS(j))then
         CLASSATTNUM(i)=j
        end if
       end do
      end do
! Fill an array with only the attributes that will be used for classifying.
      do i=1,NDEFOBJ
       do j=1,NCLASSATT
         CLASSDEFATTS(i,j)=DEFATTS(i,CLASSATTNUM(j)-1)
       end do
      end do
!-----------------------------------------------
! Reading the data to be classified.
!-----------------------------------------------
! Optional screen output.
      write(6,*)'Reading the data to be classified...'
! Open the file with the database to be classified.
      open(iobj,file=objfile,status='old')
! Read the data (attributes).
      Do i=1,MAXOBJ
       read(iobj,fmt=ATTFORMAT,end=20)OBJNAMES(i),
     &  (ATTS(i,j),j=1,NATTMT)
      end do
! Number of objects to be classified.
   20 NOBJ=i-1
! Closing the file with the database.
      close(iobj)
! Check if not too many objects.
      if(NOBJ.eq.MAXOBJ)STOP 'NOBJ = MAXOBJ'
! Fill an array with only the attributes that will be used for classifying.
      do i=1,NOBJ
       do j=1,NCLASSATT
         CLASSATTS(i,j)=ATTS(i,CLASSATTNUM(j)-1)
       end do
      end do
!----------------------------------------------------------------------------------------------------
! Construction of training classes.
!----------------------------------------------------------------------------------------------------
! Optional screen output.
      write(6,*)'Constructing the classes...'
! Initializing the total number of training objects (total (itottel) and separately for every class (array CLASSNDEFATTS)).
      CLASSNDEFOBJ=0
      itottel=0
! Select the training objects for every class, put their classification attributes in a separate array and update the number of objects for every class.
      Do i=1,NCLASS
       itel=0
       DEFCLASSDAT=0.D0
       Do j=1,NDEFOBJ
        if(DEFOBJCLASS(j).eq.CLASSCODE(i))then
        itel=itel+1
        itottel=itottel+1
        CLASSNDEFOBJ(i)=CLASSNDEFOBJ(i)+1 
        DEFCLASSDAT(itel,1:NCLASSATT)=CLASSDEFATTS(j,1:NCLASSATT)
        end if
       end do
! Check if there are no less training objects for the class than classification attributes. This would cause numerical problems.
      if(CLASSNDEFOBJ(i).le.NCLASSATT)STOP 'CLASSNDEFOBJ <= NCLASSATT'
! Call subroutine for the computation of mean attribute values and the Variance-Covariance matrix of the class.
      call covar(MAXOBJ,MAXNATTS,itel,NCLASSATT,DEFCLASSDAT,S,mean)
! Store the mean attribute values of the class in a separate array.
      classmean(i,1:NCLASSATT)=mean(1:NCLASSATT)
! Store the Variance-Covariance matrix of the class in a separate array.
      TEMP(1:NCLASSATT,1:NCLASSATT)=S(1:NCLASSATT,1:NCLASSATT)
! Compute the determinant of the Variance-Covariance matrix.
      call DTRM(MAXNATTS,TEMP,NCLASSATT,D,INDX1)
! Store the value in an array.
      DET(i)=D
! Optional output to screen: determinant of the Variance-Covariance matrix, the classcode and the number of objects used to define the class.
!      write(6,*)det(i),classcode(i),CLASSNDEFOBJ(i)
! Invert the Variance-Covariance matrix and store the resulting matrix.
      call matrix_inversion(MAXNATTS,S,NCLASSATT)
      INVCOV(i,1:NCLASSATT,1:NCLASSATT)=S(1:NCLASSATT,1:NCLASSATT)
! End of the loop over the training classes.
      end do
!-------------------------------------------------------------------------------------------
! Classification.
!-------------------------------------------------------------------------------------------
! Optional screen output.
      write(6,*)'Classifying the dataset...'
! Open outputfile for classification results.
      open(iout,file=outfile,status='unknown')
! Initialization of object counters.
      itel=0
      objtel=0
      CONF=0.D0
! Loop over the objects to be classified.
      do i=1,NOBJ
! Initializing the minimum statistical distance, the minimum statistical distance in number of sigmas (Mahalanobis distance) and the maximum probability density.
       mindist=10.D10
       minsigdist=10.D10
       maxprob=-1.d0
! Put the object attributes in a separate array.
       CLASSATTS1(1:NCLASSATT)=CLASSATTS(i,1:NCLASSATT)
! Loop over the training classes.
       do j=1,NCLASS
! Put the inverse of the Variance-Covariance matrix and the class mean vector in separate arrays.
        INVS(1:NCLASSATT,1:NCLASSATT)=
     &   INVCOV(j,1:NCLASSATT,1:NCLASSATT)
        mean(1:NCLASSATT)=classmean(j,1:NCLASSATT)
! Call subroutine for the calculation of the statistical distance of the object to this class.
        call covdist(MAXNATTS,NCLASSATT,INVS,CLASSATTS1,mean,
     &        det(j),dist,sigdist,prob)
! Store the probability density and the distance.
        CLASSPROB(j)=prob
        CLASSDIST(j)=dist
! Check if the minimum distance needs to be updated (and the corresponding probability density, Mahalanobis distance, and the classcode).
        if(dist.lt.mindist)then
         mindist=dist
         minsigdist=sigdist
         maxprob=prob
         CLASS=CLASSCODE(j)
        end if
! End loop over the classes.
       end do
! Optional for resampling test: check how many training objects are correctly classified (for the construction of confusion matrices).
!        Do k=1,NCLASS
!         if(CLASS.eq.CLASSCODE(k))CONF(k)=CONF(k)+1
!        end do
!       end if
! Sort the class probabilities in decreasing order.
      call sort(NCLASS,CLASSDIST,CLASSDISTS,CLASSCODE,CLASSCODES,
     & CLASSPROB,CLASSPROBS)
! Computation of normalized relative class probabilities in case the highest probability is not zero.
      if (CLASSPROBS(1).ne.0.d0)then
       PROBSUM=sum(CLASSPROBS(1:NCLASS))
       CLASSPROBS=CLASSPROBS/PROBSUM
      end if 
! Write classification results to the output file: name of the object, codes of the 3 most probable classes, Mahalanobis distance for the most probable class, normalized relative probabilities for the 3 most probable classes.
      write(iout,fmt='(a30,1x,3(a6,1x),f8.2,1x,3(f8.6,1x))')
     & objnames(i),CLASSCODES(1:3),DSQRT(minsigdist),CLASSPROBS(1:3)
! End of the classification loop over all the objects.
      end do
! Close the output file.
      close(iout)
! Optional screen output.
      write(6,*)'Done!'
! End program!
      end program
**************************************************************************************
! Subroutines.
**************************************************************************************
!-----------------------------------------------
! Subroutine for square matrix multiplication.
!-----------------------------------------------
      Subroutine multiply(mna,n,m,l,A,B,C)

      integer mna
      real*8 A(mna,mna),B(mna,mna)
      real*8 C(mna,mna)

      C=0.D0

       do i=1,n
        do j=1,l
         do k=1,m
         C(i,j)=C(i,j)+A(i,k)*B(k,j)
         end do
        end do
       end do
       return
       end
!-------------------------------------------------------------
! Subroutine for the calculation of the statistical distance.
!-------------------------------------------------------------
       Subroutine covdist(mna,k,INVS,x,y,detrm,dist,sigdist,prob)

       integer mna
       integer k
       real*8 INVS(mna,mna)
       real*8 x(mna),y(mna)
       real*8 xt(mna)
       real*8 dist,sigdist
       real*8 detrm
       real*8 prob
       real*8 TPI


       TPI=6.2831853071795865D0

       xt=0.D0
       dist=0.D0
       do i=1,k
        do j=1,k
         xt(i)=xt(i)+(x(j)-y(j))*INVS(i,j)
        end do
       end do
       do l=1,k
        dist=dist+xt(l)*(x(l)-y(l))
       end do
       sigdist=dist
       prob=(1.D0/(DSQRT(detrm)*((DSQRT(TPI))**k)))*
     &       (dexp(-0.5D0*dist))

       dist=dist+dlog(detrm)

       return
       end

!--------------------------------------------------------------------
! Subroutine for the calculation of the variance-covariance matrix.
!--------------------------------------------------------------------

       Subroutine covar(mo,mna,n,k,G,S,mean)

       integer mo,mna,n,k
       real*8 G(mo,mna)
       real*8 S(mna,mna)
       real*8 mean(mna)

       S=0.D0
       mean=0.D0
       do i=1,n
        mean(1:k)=mean(1:k)+G(i,1:k)
       end do
       mean(1:k)=mean(1:k)/dfloat(n)
       do j=1,n
        G(j,1:k)=G(j,1:k)-mean(1:k)
       end do 
       do i=1,k
        do j=1,k
         do l=1,n
          S(i,j)=S(i,j)+G(l,i)*G(l,j)
         end do
        end do 
       end do

       S(1:k,1:k)=S(1:k,1:k)/Dfloat(n-1)

       return
       end

!---------------------------------
! Subroutine for sorting.
!---------------------------------

       Subroutine sort(N,A,B,C,D,E,F)
       real*8 A(N),B(N) 
       character*6 C(N),D(N) 
       real*8 E(N),F(N)
       integer IND
       real*8 maxim
       real*8 minim
       maxim=A(1)
       IND=1
       do i=1,N
        if(A(i).gt.maxim)then
         maxim=A(i)
         IND=i
        end if
       end do 
       do j=1,N
        minim=maxim
         do k=1,N
          if((A(k).le.minim).and.(A(k).ne.-1))then
           minim=A(k)
           IND=k
          end if
         end do
         D(j)=C(IND)
         B(j)=minim
         F(j)=E(IND)
         A(IND)=-1
       end do
       return
       end

!-----------------------------------
! Subroutine for matrix inversion.
!-----------------------------------
      subroutine matrix_inversion(mna,A,NP)

      integer mna,NP
      PARAMETER (NMAX=100)

      real*8 A(mna,mna)
      real*8 INDXR(NMAX),INDXC(NMAX)
      real*8 BIG
      integer n,IPIV(NMAX) 
      n = np

      DO 11 J=1,N
        IPIV(J)=0
11    CONTINUE
      DO 22 I=1,N
        BIG=0.D0
        DO 13 J=1,N
          IF(IPIV(J).NE.1)THEN
            DO 12 K=1,N
              IF (IPIV(K).EQ.0) THEN
                IF (DABS(A(J,K)).GE.BIG)THEN
                  BIG=DABS(A(J,K))
                  IROW=J
                  ICOL=K
                ENDIF
              ELSE IF (IPIV(K).GT.1) THEN
                PAUSE 'Singular matrix'
              ENDIF
12          CONTINUE
          ENDIF
13      CONTINUE

        IPIV(ICOL)=IPIV(ICOL)+1
        IF (IROW.NE.ICOL) THEN
          DO 14 L=1,N
            DUM=A(IROW,L)
            A(IROW,L)=A(ICOL,L)
            A(ICOL,L)=DUM
14        CONTINUE
        ENDIF
        INDXR(I)=IROW
        INDXC(I)=ICOL
        IF (A(ICOL,ICOL).EQ.0.D0) PAUSE 'Singular matrix.'
        PIVINV=1.D0/A(ICOL,ICOL)
        A(ICOL,ICOL)=1.D0
        DO 16 L=1,N
          A(ICOL,L)=A(ICOL,L)*PIVINV
16      CONTINUE
        DO 21 LL=1,N
          IF(LL.NE.ICOL)THEN
            DUM=A(LL,ICOL)
            A(LL,ICOL)=0.D0
            DO 18 L=1,N
              A(LL,L)=A(LL,L)-A(ICOL,L)*DUM
18          CONTINUE
          END IF
21      CONTINUE
22    CONTINUE
      DO 24 L=N,1,-1
        IF(INDXR(L).NE.INDXC(L))THEN
          DO 23 K=1,N
            DUM=A(K,INDXR(L))
            A(K,INDXR(L))=A(K,INDXC(L))
            A(K,INDXC(L))=DUM
23        CONTINUE
        ENDIF
24    CONTINUE

      RETURN
      END

!---------------------------------------------------------------
! Subroutine for evaluating the determinant of a matrix using
! the partial-pivoting Gaussian elimination scheme.
!---------------------------------------------------------------

      SUBROUTINE DTRM(mna,A,N,D,INDX1)

      integer mna
      integer INDX1(mna)
      real*8 A(mna,mna)
      real*8 D,MSGN
      integer J

      CALL ELGS(mna,A,N,INDX1)
      D = 1.D0
      DO     100 I = 1, N
         D = D*A(INDX1(I),I)

  100 CONTINUE

      MSGN = 1.D0
      DO     200 I = 1, N
        DO   150 WHILE (I.NE.INDX1(I))
          MSGN = -MSGN
          J = INDX1(I)
          INDX1(I) = INDX1(J)
          INDX1(J) = J
  150   END DO
  200 CONTINUE
      D = MSGN*D

      RETURN
      END
!-------------------------------------------------------------------
! Subroutine to perform the partial-pivoting Gaussian elimination.
! A(N,N) is the original matrix in the input and transformed
! matrix plus the pivoting element ratios below the diagonal in
! the output. INDX1(N) records the pivoting order.
!-------------------------------------------------------------------
      SUBROUTINE ELGS(mna,A,N,INDX1)


      integer mna 
      integer INDX1(mna)
      real*8 A(mna,mna),C(mna)
      integer ITMP,K
      real*8 PI,PI1,PJ,C1

! Initialize the index.

      DO     50    I = 1, N
        INDX1(I) = I

   50 CONTINUE

! Find the rescaling factors, one from each row.

        DO     100   I = 1, N
          C1= 0.D0
          DO    90   J = 1, N
            C1 = DMAX1(C1,DABS(A(I,J)))
   90     CONTINUE
          C(I) = C1

  100   CONTINUE

! Search the pivoting (largest) element from each column.

      DO     200   J = 1, N-1
        PI1 = 0.D0
        DO   150   I = J, N
          PI = DABS(A(INDX1(I),J))/C(INDX1(I))
          IF (PI.GT.PI1) THEN
            PI1 = PI
            K   = I
          ELSE
          ENDIF
  150   CONTINUE

! Interchange the rows via INDX1(N) to record pivoting order.

        ITMP    = INDX1(J)
        INDX1(J) = INDX1(K)
        INDX1(K) = ITMP
        DO   170   I = J+1, N
          PJ = A(INDX1(I),J)/A(INDX1(J),J)

! Record pivoting ratios below the diagonal.

          A(INDX1(I),J) = PJ

! Modify other elements accordingly.

          DO 160   K = J+1, N
            A(INDX1(I),K) = A(INDX1(I),K)-PJ*A(INDX1(J),K)
  160     CONTINUE
  170   CONTINUE
  200 CONTINUE

      RETURN
      END
!--------------------------------------------------------------------------