
/*  KOMPRESUJE/DEKOMPRESUJE pliki tekstowe ORAZ binarne :-)   */
/*  >> Wersja Z TABLICA mieszajaca do zliczania znakow <<  */

/*  ---  Autor: ADAM RYCERZ, WFAIS UJ, Grudzien 2023  ---  */

/*
    ----------------------------------------------------------
    Program kompresuje plik zliczajac znaki i budujac drzewo
    Huffmana. Poprawne uzycie:
   
    ./hufmann [-u | -i | -t] source_file target_file
  
    (Jesli nie podano argumentu "target_file" wynik zostanie
    wypisany na standardowe wyjscie.)
    Przelaczniki opcjonalne:
      -u :  tryb dekompresji 
      -i :  wypisanie dodatkowych informacji na  stderr
      -t :  wynik kompresji i dekompresji wypisany na  stderr
    ----------------------------------------------------------
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>

#define NCHARS (01 << 8)
#define CHAR_SEP '|'
#define TEST1 0  /* Testujemy zliczanie i sortowanie... */
#define TEST2 0  /* ... efekt laczenia 2 elementow  */
#define TEST3 0  /* ... generowanie kodow ... */
#define TEST4 0  /* ... oraz dekompresje!  */

#define print_var(a) printf("## " #a " = %u\n", a) 


struct node {
  unsigned char c;
  unsigned count;
  char code[NCHARS+1];
  int len;
  struct node *left;
  struct node *right;
} *char_map[NCHARS+1], *char_tree[NCHARS+1];  /* Na koncu - 'NULL' */


/* --- Prototypy funkcji wolanych w MAIN() --- */
int CountChars(FILE *fnam, struct node *tab[],
	       unsigned *file_size, int *last_char);
void BuildTree(struct node *tab[], int nchars, int last, int info);
void BuildCodes(struct node *root, char new_code[]);
unsigned PrintCodes(FILE *file, struct node *tab[], int last,
		    char sep, int info,
		    unsigned *comp_size);
unsigned Compress(FILE *ifile, struct node *tab[], FILE *ofile);

/*  F-cje dla opcji '-u':  */
struct node *ReadTree(FILE *file, unsigned tsize, char sep);
unsigned UCompress(FILE *ifile, struct node *root, unsigned file_size,
		   FILE *ofile); 




/*  ############################  MAIN  ############################  */

int main(int narg, char *argv[])
{
  char *prog = argv[0], *opts = "-u | -i | -t";
  FILE *ifile, *ofile;
  unsigned xsize[3], csize1; 
  int nchars, nx = sizeof(xsize)/sizeof(xsize[0]),
    c, last, ucomp = 0, info = 0, test = 0;
  
  /* Rozpoznawanie opcji: */
  while (--narg > 0 && (*++argv)[0]=='-')
    while ((c = *++argv[0]))
      switch (c) {
      case 'u':
	ucomp = 1; break;
      case 'i':
	info = 1; break;
      case 't':
	test = 1; break;
      default:
	fprintf(stderr, "%s: illegal option "
		"-%c\n", prog, c);
	narg = 0;
	break;
      }
    
  if (narg < 1) {
    fprintf(stderr,"Usage: "
	    "%s [%s] source_file target_file\n" "       "
	    "%s [%s] source_file > target_file\n",
	    prog, opts, prog, opts);
    exit(1);
  }
  else if (narg < 2)
    ofile = stdout;
  else
    assert(ofile=fopen(argv[1], "wb"));
  assert(ifile=fopen(argv[0], "rb"));
    
  if (!ucomp) {
    
    /* ZLICZANIE ZNAKOW I SORTOWANIE */
    nchars = CountChars(ifile, char_map, &xsize[0], &last);
  
    memcpy(char_tree, char_map,
	   (last+1)*sizeof(struct node*));
    BuildTree(char_tree, nchars, last, info);
    assert(xsize[0] == char_tree[0]->count);
  
    BuildCodes(char_tree[0], ""); 
    xsize[1] = PrintCodes(NULL, char_map, last, CHAR_SEP, info, &xsize[2]);
    
    if (TEST1 || info) {
      fprintf(stderr, "# Rozmiar pliku wejsciowego:  %u\n"
	      "# Rozmiar drzewa (z separatorami):  %u\n"
	      "# Rozmiar po kompresji (bez naglowka):  %u\n"
	      "# CALKOWITY rozmiar po kompresji:  ==>  %u  <==\n\n",
	      xsize[0], xsize[1], xsize[2],
	      (unsigned)sizeof(xsize) + xsize[1] + xsize[2]);
    }

    fwrite(xsize, sizeof(xsize[0]), nx, ofile);
    PrintCodes(ofile, char_map, last, CHAR_SEP, 0, &xsize[2]);
    /* print_var(xsize[2]); */
    rewind(ifile);
    csize1 = Compress(ifile, char_map, ofile);
    /* print_var(csize1); */
    assert(csize1 == xsize[2]); 
  
    if ((TEST4 || test) && argv[1]) {
      fclose(ifile);
      fflush(ofile);
      assert(ifile=fopen(argv[1], "rb"));
      fseek(ifile, (unsigned)sizeof(xsize) + xsize[1], SEEK_SET);
      
      if (UCompress(ifile, char_tree[0], xsize[0], stderr))
	fprintf(stderr, "## Blad dekompresji !!!\n");
      
    }
  }
  else { /* ucomp == 1 */
    
    assert(nx == fread(xsize, sizeof(xsize[0]), nx, ifile));

    char_tree[0] = ReadTree(ifile,  xsize[1], CHAR_SEP);    
    if (NULL == char_tree[0]) {
      fprintf(stderr, "%s: failed reading tree from %s\n",
	      prog, argv[0]);
      exit(3); 
    }
    if (UCompress(ifile, char_tree[0], xsize[0], ofile))
      fprintf(stderr, "%s: decompression failed.\n", prog);
  }
  
  fclose(ifile);
  if (ofile != stdout) 
    fclose(ofile);
  
  return 0;
}

/*  ################################################################  */




/* --> TUTAJ prototypy 'niskiego poziomu' <-- */ 
struct node *NewNode(int c);
void Print1Leaf(FILE *fnam, struct node *t);
void PrintCounts(FILE *fnam, struct node *tab[]); 
int comp(const void *a, const void *b);
void SortCounts(struct node *tab[], int nchars);
struct node *Join2Nodes(struct node *left, struct node *right); 
int put1bit(FILE *f, int bit);
int get1bit(FILE *f);


int CountChars(FILE *fnam, struct node *tab[], 
	       unsigned *isize, int *last_char)
{
  int c, j, n=0;
  
  *isize = *last_char = 0;
  while ( EOF != (c = getc(fnam)) ) {    
    if ((*isize)++ >= UINT_MAX) {
      fprintf(stderr, "ERROR: maximum file size exceeded.\n");
      exit(2);
    }
    if (tab[c] == NULL) {
      tab[c] = NewNode(c);
      n++;
      if (c > *last_char) 
	*last_char = c;
    }
    else {
      tab[c]->count++;
    }
  }

  return n;
}


void BuildTree(struct node *tab[], int n, int last, int info)
{
  if (TEST1 || info) {
    fprintf(stderr,
	    "## Zidentyfikowano %d rozn%s\n", n,
	    (2<=(n%10) && (n%10)<=4) ? "e znaki" : "ych znakow");
    fprintf(stderr, "\n# ... a teraz sortujemy ...\n\n");
  }
  
  /* SortCounts(tab, n); */
  SortCounts(tab, last+1); 
  
  if (TEST1 || info) {  
    PrintCounts(stderr, tab);
    fprintf(stderr, "\n");
  }

  while (n-- > 1) {
    tab[n-1] = Join2Nodes(tab[n-1], tab[n]);
    tab[n] = NULL;
    
    SortCounts(tab, n);
    
    if (TEST2) {
      PrintCounts(stderr, tab);
    }
  }
}


void BuildCodes(struct node *t, char new_code[])
{
  if (t == NULL)
    return;

  strcat(t->code, new_code);
  t->len = strlen(t->code); 
  
  if (t->left == NULL && t->right == NULL) 
    return;

  strcpy(t->left->code, t->code);
  BuildCodes(t->left, "0");
  strcpy(t->right->code, t->code);
  BuildCodes(t->right, "1");
}


unsigned PrintCodes(FILE *fnam, struct node *tab[], int last,
		    char sep, int info,
		    unsigned *csize)
{
  unsigned tsize = *csize = 0, offset = 0;
  int j, clen;

  info = TEST3 || info;
  if (!fnam && info)
    fprintf(stderr, "# Kody Huffmana:\n");
  for (j=0; j<=last; j++)
    if (tab[j]) {
      if (fnam != NULL) {  
	fputs(tab[j]->code, fnam);
	putc(sep, fnam);
	putc(tab[j]->c, fnam);
	/* DRUGI 'SEP' NIEPOTRZEBNY... */
      }
      if (!fnam && info)
	Print1Leaf(stderr, tab[j]);
    
      tsize += tab[j]->len + 2;  /* -->patrz wyzej<-- */
      clen = tab[j]->count * tab[j]->len;
      *csize += clen / 8;
      offset += clen % 8;
      *csize += offset/8;
      offset %= 8;
    }
  if (!fnam && info)
    fprintf(stderr, "\n");
  
  *csize += (offset > 0);
  return tsize;
}


unsigned Compress(FILE *ifile, struct node *tab[], FILE *ofile)
{
  unsigned csize = 0, offset = 0;
  int c, j, bit, last;

  while ( EOF != (c = getc(ifile)) )
    if (tab[c]) {
    
      csize += (tab[c]->len) / 8;
      offset += (tab[c]->len) % 8;
      csize += offset/8;
      offset %= 8;

      if (ofile != NULL) {
	j=0;
	while ((bit = tab[c]->code[j++]) != '\0')
	  last = put1bit(ofile, bit);	  
      }
      
    }
    else {
      fprintf(stderr, "ERROR: incomplete tree. \n");
      return 0;
    }
  if (offset > 0) {
    csize++;
    putc(last, ofile);
  }
  
  return csize;
}


struct node *ReadTree(FILE *file, unsigned tsize, char sep)
{
  struct node *root = NewNode('\0'), *p;
  unsigned char *codes, *ptr, c;

  assert( (codes = (unsigned char*)malloc(tsize)) );
  assert(tsize == fread(codes, 1, tsize, file));
    
  p = root;
  ptr = codes;
  while (tsize-- > 0) {
    c = *ptr++;
    if (c == '0') {
      if (p->left == NULL)
	p->left = NewNode('\0');
      p = p->left;
    }
    else if (c == '1') {
      if (p->right == NULL)
	p->right = NewNode('\0');
      p = p->right;
    }
    else if (c == sep) {
      if (tsize-- < 1) { /* Brakuje znaku do wczytania */
	p = NULL;
	break;
      }
      p->c = *ptr++;
      p = root; 
    }
    else { /* Nieprawidlowy znak */
      p = NULL;
      break;
    }
  }
  free(codes);
  
  if (p != root)  /* Cos poszlo nie tak ... */
    return NULL;
  
  return root;
}


unsigned UCompress(FILE *ifile, struct node *root, unsigned file_size,
	      FILE *ofile)
{
  int bit;
  struct node *p = root;

  while (file_size && EOF != (bit = get1bit(ifile))) {
    if (bit == '0')
      p = p->left;
    else if (bit == '1')
      p = p->right;
    if (p->left == NULL && p->right == NULL) {
      putc(p->c, ofile);
      p = root;
      file_size--;
    }
    
  }
  
  return file_size;
}


/*  ################################################################  */
/*  ---- >>>  F U N K C J E   N I S K O P O Z I O M O W E  <<< -----  */
/*  ################################################################  */


struct node *NewNode(int c)
{
  struct node *t;

  t = (struct node*)malloc(sizeof(struct node));
  if (NULL == t) return NULL;
  t->c = c;
  t->count = 1;
  t->code[0] = '\0';
  t->len = 0;
  t->left = NULL;
  t->right = NULL;
  
  return t;
}


void Print1Leaf(FILE *fnam, struct node *t)
{
  int c = t->c;

  fprintf(fnam, "%s", t->code); 
  fprintf(fnam, "  --> ");
  if (c == '\n')
    fprintf(fnam, "\'\\n\': ");
  else if (c == '\t')
    fprintf(fnam, "\'\\t\': ");
  else if (c == '\0')
    fprintf(fnam, "\'\\0\': ");
  else
    fprintf(fnam, "\'%c\': ", c);
  fprintf(fnam, "%u\n", t->count); 
}


void PrintCounts(FILE *fnam, struct node *tab[])
/*
  Pozwala testowac program na plikach tekstowych
*/ 
{  
  fprintf(fnam, "# Wyniki zliczania znakow: \n");
  while (*tab) {
    Print1Leaf(fnam, *tab);
    tab++;
  }
}


int comp(const void *a, const void *b)
{
  struct node **px = (struct node **)a, **py = (struct node **)b;
  int x = (*px)?(*px)->count:0, y = (*py)?(*py)->count:0;
  return (y-x);
}


void SortCounts(struct node *tab[], int nchars)
{
  qsort(tab, nchars, sizeof(struct node *), comp); 
}


struct node *Join2Nodes(struct node *left, struct node *right)
{
  struct node *newnode = NewNode('\0');

  if (NULL == newnode) return NULL;
  newnode->count = left->count + right->count;
  newnode->left = left;
  newnode->right = right;
  return newnode;
}


int put1bit(FILE *f, int bit)
{
  static unsigned char b = 00u;
  static int n=0, out;

  if (bit == '1')
    b += 01u << n;
  n++;
  
  if (n == 8) {
    out = putc(b, f);
    b =  00u;
    n = 0;
    return out;
  }
  
  return b;
}


int get1bit(FILE *f)
{
  static unsigned char b;
  static int n=0, out;

  if (n == 0) {
    if (EOF == (out = getc(f)))
	return EOF;
    b = out;
  }
  if (b & 01u)
    out = '1';
  else
    out = '0';
  b >>= 1;
  if (++n == 8)
    n = 0;  

  return out; 
}


