function agrupa, data_in, kclusters, ini=ini
;does clustering of data_in
;A. Sainz Dalda and L. Kleint
;data_in: arr[nprofiles,nwavelengths]
;kclusters: number of clusters wanted
;ini=initial guess
;mod sep 28, 2017 to return matrix with numbers cluster association in gfin

  
dim=double(size(data_in))
kclusters=double(kclusters)
print,'clusters: ',kclusters

c_ini=intarr(kclusters)-1
cond=0
ctr=0d

while cond eq 0 do begin
      ran=abs(floor(randomu(s)*dim[1])) ;index of random profile within range [0,#profiles]
      diff=(where(c_ini eq ran))(0)     ;find if randomly chosen profile was chosen before
      if diff eq -1 then  begin         ;-1 -> new random profile
         c_ini[ctr]=ran                 ;assign index of random profile to c_ini
         if ctr eq kclusters-1 then cond=1
         ctr++
      endif
endwhile  
kmean=float(data_in[c_ini,*])  ;kmean contains randomly selected starting profiles


if keyword_set(ini) then begin
   ini=reform(ini)  ;LK added to avoid crash if single profile in array
   dim_ini=size(ini)
   if dim_ini[0] eq 1 then begin
              kmean[0,*]=ini               ;single profile initial guess
              c_ini[0] = -1
   endif
   if dim_ini[0] eq 2 then begin
              kmean[0:dim_ini[1]-1,*]=ini  ;multiple profiles initial guess
              c_ini[0:dim_ini[1]-1] = -1
   endif
endif 


d  = fltarr(dim[1], kclusters)  ;array (#profiles,#clusters), least sq values
daux = fltarr(dim[1], kclusters)
;g = fltarr(dim[1], kclusters)  ;old
g = fltarr(dim[1])  ;array (#profiles), contains number of cluster
gfin = fltarr(dim[1], kclusters)+1.
h=0d

while (where(g ne gfin))[0] ne -1  do begin
      gfin=g
;      g=fltarr(dim[1], kclusters) ;old
      g=fltarr(dim[1])
;      for k=0d, kclusters-1d do begin   ;for each cluster (randomly chosen profile)
;          for i=0d, dim[1]-1d do begin  ;for each profile
;              d[i,k]=sqrt(total((data_in[i,*]-kmean[k,*])^2.)) ;"least squares"
;          endfor
;      endfor
;vectorize
      for k=0d, kclusters-1d do begin   ;for each cluster (randomly chosen profile)
       tmp = (data_in - transpose(rebin(reform(kmean[k,*]),dim[2],dim[1])))^2.
       d[0,k] = sqrt(total(tmp,2))  ;use 0 for faster execution (will assign full vector)
      endfor



      for i=0d, dim[1]-1d do begin
          w=where(d[i,*] eq min(d[i,*]))  ;look for minimum least square value
;          g[i,w[0]]=1.  ;g matrix has 1 if profile belongs to cluster
          g[i]=w  ;g matrix has number of cluster
      endfor

      for k=0, kclusters-1 do begin
 ;         w=where(g[*,k] eq 1., cuan)
          w=where(g eq k, cuan)
;         if w[0] ne -1 then kmean[k,*]=total(float(data_in[w,*]),1)/float(cuan) ;save average profile of cluster
         if w[0] ne -1 then kmean[k,*]=avg(float(data_in[w,*]),0) ;save avg
      endfor 
      h++
print,h
endwhile

gfin=g

return, {gfin:gfin, kmean:kmean, d:d, c_ini:c_ini} 
;gfin: matrix with 0 and 1 for cluster association
;kmean: average profile for each cluster
;d: least squares values
;c_ini: positions of randomly chosen profiles (-1 for initial guesses)

end
