/***************************** LICENSE START ***********************************

 Copyright 2012 ECMWF and INPE. This software is distributed under the terms
 of the Apache License version 2.0. In applying this license, ECMWF does not
 waive the privileges and immunities granted to it by virtue of its status as
 an Intergovernmental Organization or submit itself to any jurisdiction.

 ***************************** LICENSE END *************************************/

#include <string>
#include "inc_iostream.h"
#include "inc_stl.h"
#include "mars.h"

#include "MvScanFileType.h"

#include <sys/types.h>
#include <dirent.h>
#include <errno.h>

#include "netcdfcpp.h"


enum eFileType              //-- types checked by local func 'SearchMarsParserBreakers'
{
   eMissingFile
  ,eBinaryFile
  ,eNumericTextFile
  ,eOtherTextFile
};

static const char* MvNetcdfType(const char* file,bool checkStatus=false);

//_______________________________________________________________________
//
// This function is used to check for files that contain only numeric
// values and thus cannot be Metview requests, but which can break
// MARS parser if allowed to be parsed (late 2006 MARS parser was
// modified to allow IDENT to contain '+' and spaces => too long IDENT!).
//
// Accept characters that are used in files containing only numbers:
// digits, plus, minus, decimal point, exponent char e/E and spaces
//

static bool isValidForNumbersOnly( int c )
{
   if( isdigit(c) )
      return true;

   if( isspace(c) )
      return true;

   if(  c == '.' || c == '-' || c == '+' )
      return true;

   if( c == 'e' || c == 'E' )
      return true;

   return false;
}


//_______________________________________________________________________

#ifndef DOXYGEN_SHOULD_SKIP_THIS
struct File {            //-- wrapper function always closes the file

  FILE *f_;

public:
  File(const char *name, const char *mode) { f_ = fopen(name,mode); };
  ~File()                     { if(f_) fclose(f_);   };
};
#endif


//_______________________________________________________________________
//
// This function is used to find files that may break MARS parser,
// e.g. binary files and numeric-values-only text files (like the
// one produced by Cross Section).

static eFileType SearchMarsParserBreakers( const char* fileName )
{
   File f(fileName, "r");             //-- will be closed automatically

   if(!f.f_)
     return eMissingFile;             //-- missing file

   bool isBinary      = false;
   bool isNumbersOnly = true;
   const int CHECKLEN = 4000;         //-- may be a huge PostScript file
                                      //-- => check beginning only is ok
   char buf;
   int  chr;

   int readCount = fread(&buf, 1, 1, f.f_);
   chr = (int) buf;

   for( int ci=0; ci<CHECKLEN; ++ci ) //-- look for "binary" chars
     {
       if( feof(f.f_) )
         break;

       if( ! isprint(chr) && ! isspace(chr) )
         {
           isBinary = true;           //-- binary file, stop here
           break;
         }

       if( isNumbersOnly )
           isNumbersOnly = isNumbersOnly && isValidForNumbersOnly(chr);

       readCount = fread(&buf, 1, 1, f.f_);
       chr = (int) buf;
     }

   if( isBinary )
      return eBinaryFile;             //-- binary files break MARS parser

   if( isNumbersOnly )
      return eNumericTextFile;        //-- 'numbers only' may break MARS parser

   return eOtherTextFile;             //-- other files should be parsable
}


// scan_file_extension
// - checks whether the extension on the given file matches the given
//   extension, and returns the corresponding Metview class name
//   if it does

inline static bool scan_file_extension (const char *file, int length, const char *ext)
{
  int ext_length = strlen (ext);

  return (length >= ext_length && !strcasecmp (&file[length-ext_length], ext));
}

// Checks if a directory is an ODB database

static bool scanDirForOdb(const char *dir)
{
    DIR *dp;
    struct dirent *dirp;
    if((dp  = opendir(dir)) == NULL) 
    {
        cout << "scanDirForOdb() --> Error(" << errno << ") opening " << dir << endl;
        return false;
    }

    bool retval=false;	
    string::size_type pos;
    while ((dirp = readdir(dp)) != NULL) 
    {
	string name(dirp->d_name);

	//cout << "dir: " << name;
	if((pos=name.find(".sch")) != string::npos &&
            name.size() >= 5  && pos > 0 && pos+4 == name.size())
	{
		retval=true;
		break;
	} 	
    }
    closedir(dp);
    return retval;
}


//_______________________________________________________________________

//
// NOTE: a close cousin of scan_file(...) is in ./src/libMars/guess.c !!!
//

static const char *scan_file(const char *file)
{

  const int cMAXSHIFT = 8; //-- search tolerance for word "GRIB" or "BUFR"
  const int cMINREAD  = 4; //-- need to be able to read at least this much
  int length = (file != NULL) ? strlen (file) : 0;

  eFileType fileType = SearchMarsParserBreakers( file ); //-- binary? only numeric values?

  File f(file,"r");
  if(!f.f_)
    return "BAD";                 //-- missing or non-readable file



  // check the file extension first - remember to put the longest
  // ones at the start.
  if (scan_file_extension (file, length, ".tar.gz"))  return "TAR_GZ";
  if (scan_file_extension (file, length, ".tar.bz"))  return "TAR_BZ2";
  if (scan_file_extension (file, length, ".tar.bz2")) return "TAR_BZ2";
  if (scan_file_extension (file, length, ".tar"))     return "TAR";
  if (scan_file_extension (file, length, ".magml"))   return "MAGML";
  if (scan_file_extension (file, length, ".html"))    return "HTML";
  if (scan_file_extension (file, length, ".zip"))     return "ZIP";
  if (scan_file_extension (file, length, ".gif"))     return "GIF";
  if (scan_file_extension (file, length, ".tiff"))    return "TIFF";
  if (scan_file_extension (file, length, ".tif"))     return "TIFF";
  if (scan_file_extension (file, length, ".svg"))     return "SVG";
  if (scan_file_extension (file, length, ".kml"))     return "KML";
  if (scan_file_extension (file, length, ".kmz"))     return "KML";
  if (scan_file_extension (file, length, ".mv"))      return "MACRO";
  if (scan_file_extension (file, length, ".csv"))     return "TABLE";
  if (scan_file_extension (file, length, ".tsv"))     return "TABLE";
  if (scan_file_extension (file, length, ".vdf"))     return "VAPOR_VDF_FILE";
  if (scan_file_extension (file, length, ".XPIF"))    return "XPIF";
  if (scan_file_extension (file, length, ".gtif"))    return "GEOTIFF";
  
  //if (scan_file_extension (file, length, ".odb"))     return "ODB_DB";

  // check the file extension for known types
  /*
  char *ext = strrchr (filename, '.');
  
  if (ext != NULL)
  {
    ext++;  // proceed to the first character after the dot

    if (!strcasecmp (ext, "tar")) return "TAR";
    if (!strcasecmp (ext, "zip")) return "ZIP";
    if (!strcasecmp (ext, "gif")) return "GIF";
    

    // either treat .gz as a gzip file or else see if it is a .tar.gz file
    if (!strcasecmp (ext, "gz"))
    {
      char *prev_ext;

      *(ext-1) = '\0';  // 

      prev_ext = strrchr (filename, '.');
      
      if (prev_ext != NULL && prev_ext >= file)
      {
        prev_ext++;
        if (!strcasecmp (prev_ext, "tar")) return "TAR_GZ";
        else                               return "GZ";
      }
      else
      {
        return "GZ";
      }
    }
  }
*/


  union {
    char  c;
    short s;
    long  l;
    char  ch[ cMAXSHIFT + 4 ];
  } buf;
  memset(&buf,0,sizeof(buf));

  int readCount = fread((char*)&buf,1,sizeof(buf),f.f_);

  if( readCount < cMINREAD )      //-- if not enough bytes for safe tests
    {
      return fileType == eBinaryFile ? "BINARY" : "NOTE";
    }


  if( fileType == eBinaryFile )           //-- first check for binary types
  {
      for( int s=0; s <= cMAXSHIFT; ++s ) //-- for GRIB & BUFR allow some leading rubbish
      {
        if(strncmp(buf.ch+s,"GRIB",4) == 0)
          return "GRIB";

        if(strncmp(buf.ch+s,"BUFR",4) == 0)
          return "BUFR";

        if(strncmp(buf.ch+s,"ODA",3) == 0)
           return "ODB_DB";
      }

      if(strncmp(buf.ch,"TIDE",4) == 0)
        return "GRIB";

      if(strncmp(buf.ch,"BUDG",4) == 0)
        return "GRIB";

      if(strncmp(buf.ch,"CDF",3) == 0)
	return MvNetcdfType(file);  
		
      if(strncmp(buf.ch+1,"HDF",3) == 0)
       return "NETCDF";

      if(strncmp(buf.ch+1,"PNG",3) == 0)
        return "PNG";

      if(strncmp(buf.ch+6,"JFIF",4) == 0)
        return "JPEG";                //--  JPEG - JFIF

      if(strncmp(buf.ch+6,"Exif",4) == 0)
        return "JPEG";                //--  JPEG - EXIF/DCF

      if(strncmp(buf.ch,"%PDF",4) == 0)   // PDF can be binary
        return "PDF";
  }

  //We have to call it after FLEXTRA (see belows) since 
  //FLEXTRA v5 generates ASCII files with some rubbish in them and
  //we identify them as BINARY files!!
  
  //if( fileType == eBinaryFile )   //-- rest of the known types are all text files
  //    return "BINARY";            //-- thus this one is of unknown binary type

  if( fileType == eNumericTextFile )
      return "NOTE";   //"NUMERIC"; //-- need to avoid MARS parser!

                                  //-- check for text types that cannot be requests
                                  //-- fileType must be eOtherTextFile
  if(strncmp(buf.ch,"%!",2) == 0)
    return "PSFILE";

  if(strncmp(buf.ch,"%PDF",4) == 0) // PDF can be text
    return "PDF";

  if(strncmp(buf.ch,"#!",2) == 0)
    return "SHELL";

  if(strncmp(buf.ch,"#GEO",4) == 0)
    return "GEOPOINTS";

  if(strncmp(buf.ch,"#LLM",4) == 0)
    return "LLMATRIX";

  if(strncmp(buf.ch,"#LLV",4) == 0)
    return "LLVALUE";

  if(strncmp(buf.ch,"#MACRO",6) == 0   || strncmp(buf.ch,"# Macro",7) == 0)
    return "MACRO";
  if(strncmp(buf.ch,"#Metview",8) == 0 || strncmp(buf.ch,"# Metview",9) == 0)
    return "MACRO";

  if(strncmp(buf.ch,"<magics",7) == 0)
    return "MAGML";

  if(strncmp(buf.ch,"#obstat scatter",15) == 0)
    return "OBSTAT_SCATTER";
 
  if(strncmp(buf.ch,"#obstat",7) == 0)
    return "OBSTAT_CURVE";
  
  
  
#if 0
                                  //-- check if a Metview request

                                       //-- WARNING (2004-05-10):
  request *r = read_request_file(file);//-- a file NOT ending in CR can crash parser!!!
  if(r != 0)
    {
      string name = r->name?r->name:"";
      free_all_requests(r);

      map<string,const IconClass*>::iterator j = classes.find(name);
      if(j != classes.end())
        {
    return (*j).second->name().c_str();  //-- it is Metview request
        }
    }
#endif

  rewind(f.f_);

  //-- Check for custom ASCII files 
  // We need at least 311 characters for flextra!
  char asciiBuf[400];
  memset(&asciiBuf[0],'\0',400);
  fread(&asciiBuf[0],1,311,f.f_);
  //Flextra output files
  if(strstr(asciiBuf,"FLEXTRA MODEL OUTPUT") != 0)
  {
	return "FLEXTRA_FILE";
  }
  //IFS namelist file
  else if(strstr(asciiBuf,"&NAM") != 0 || 
          strstr(asciiBuf,"&NAE") != 0 ||
          strstr(asciiBuf,"&NEM") != 0)
  {
    	return "NAMELIST";
  }
  // RTTOV output files - we may need to adjust this test in the future
  else if (strstr(asciiBuf, "# RTTOV OUTPUT") != 0)
  {
    return "RTTOV_OUTPUT_FILE";
  }

  rewind(f.f_);

   //-- Check for IFS fortran namelists
  
  
  
  
  //We have to do this check at this point, after
  //FLEXTRA was checked! 
  if( fileType == eBinaryFile )   
      return "BINARY";            


  fclose( f.f_ );                 //-- destructor will not close (end-of-file => !f.f_)
  f.f_ = 0;                       //-- but after fclose destructor would try and...

  return "NOTE";                  //-- unknown text file; make it NOTE
}

//_______________________________________________________________________

//static string guess_file(const char* file) //-- old name in MetviewUI

string ScanFileType(const char* file)
{

  struct stat buf;

  if(stat(file,&buf) <0)
    return "BAD";

  switch(buf.st_mode & S_IFMT)
  {
    case S_IFDIR:
      return (scanDirForOdb(file)==true)?"ODB_DB":"FOLDER";
    case S_IFREG:
      return scan_file(file);

    default:
      return "SPECIAL";
  }

}

//_______________________________________________________________________

//----
//--- Check if a file is a binary file or a missing file i.e.
//--  does the file have any chance to contain a Metview request!
//-

bool
IsBinaryOrMissingFile( const char* fileName )
{
   eFileType t = SearchMarsParserBreakers( fileName );
   return t == eBinaryFile || t == eMissingFile;
}

//_______________________________________________________________________

int
MvGribVersion( const char* fileName )
{
  File f( fileName, "r" );
  if( ! f.f_ )
    return -13;                 //-- missing or non-readable file

  char buf[ 10 ];

  fread( buf, 1, 9, f.f_ );

  int gribVersion = buf[7];     //-- octet 8

  return gribVersion;
}

//----------------------------------------------------------------------------
//
// Here we suppose that the file type was identified as ODB previously. Now
// we want to find out the type only!!
//
//----------------------------------------------------------------------------

string MvOdbType(const char* file,bool scanBeforeCheck)
{
	string t;

	if(scanBeforeCheck)
	{
		t=ScanFileType(file);
		if(t != "ODB_DB")
		{	
			return t;
		}
	}
	
	//At this point type has to be ODB_DB
	//We check if it is a file or directory

	struct stat buf;

 	if(stat(file,&buf) <0)
    		return "BAD";

     	if(buf.st_size ==0 )
		return "NOTE";
	
	switch(buf.st_mode & S_IFMT)
  	{
    	case S_IFDIR:
		return "ODB_OLD";
  		break;
  	case S_IFREG:
		return "ODB_NEW";
		break;
    	default:
      		return "SPECIAL";
  	}
  	
  	return "BAD";
}

const char* MvNetcdfType(const char* file,bool checkStatus)
{
	if(checkStatus)
	{  	
		struct stat buf;
 		if(stat(file,&buf) <0)
    			return "BAD";		
		else if(buf.st_size ==0 )
			return "NOTE";
	}
	
	NcFile nc(file);	
	if(!nc.is_valid())
	{  
		return "BINARY";
	}	
	  
	//Check for SCM input data
	int n=nc.num_atts();
	for(int i=0; i < n; i++)
	{  	
		NcAtt* att=nc.get_att(i);
		if(att != NULL && att->name() != NULL && 
		   strcmp(att->name(),"dataID") == 0 && att->num_vals() > 0)
		{  
			char* v=att->as_string(0);
			if(v)
			{
		  		string vStr(v);
				if(vStr.find("scm_") == 0 || vStr.find("SCM_INPUT") == 0)
				{
					return "SCM_INPUT_DATA";
				}
				else if(vStr.find("SCM_OUTPUT") == 0)
				{
					return "SCM_OUTPUT_DATA";
				}
				else if(vStr.find("NETCDF_RTTOV_INPUT") == 0)
				{
					return "NETCDF_RTTOV_INPUT";
				}
			}
		}
		if(att != NULL)
			delete att;
	}
	
	return "NETCDF";
}
