Logo Search packages:      
Sourcecode: kbib version File versions  Download package

bibfile.cpp

/***************************************************************************
                          binfile.cpp  -  description
                             -------------------
    begin                : Sun May 25 2003
    copyright            : (C) 2003 by Thach Nguyen
    email                : thach@dragon.thach.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/


#include "bibfile.h"
#include "bibentrydeftable.h"


// avoid recognizing characters >127 (negative when c is signed)
// as control cahrs
int lt_space ( char c )
{
      return ( ( c>=0 ) && ( c<=' ' ) );
}


int ispseudoalpha ( char c )
{
      if ( isalpha ( c ) ) return 1;
      if ( isdigit ( c ) ) return 1;
      return ( ( c=='-' ) || ( c=='_' ) );
}


// Read everything inside delimiters
char *read_field ( FILE *inf, char &c, bool &asStringMacro )
{
      asStringMacro = false;
      int i=0, braceCount = 0, quoteCount = 0, parenCount=0;
      bool wasslash = false, is_brace, is_quote, is_paren, no_delim;
      static char buf[20000];

      buf[0] = '\0';

      // Eat spaces
      while ( lt_space ( c ) && !feof ( inf ) ) c=getc ( inf );

      is_brace = bool ( c == '{' );
      is_quote = bool ( c == '"' );
      is_paren = bool ( c == '(' );
      no_delim = bool ( !is_brace && !is_quote && !is_paren );

      while ( !feof ( inf ) )
      {
            if ( ( c==',' || c=='\n' ) && ( no_delim || ( is_brace && braceCount==0 ) ||
                                            ( is_quote && quoteCount==0 ) || ( is_paren && parenCount==0 ) ) )
            {
                  buf[i] = 0;
                  break;
            }

            if ( c=='{' ) braceCount++;

            if ( c=='}' )
            {
                  braceCount--;
                  //if ((braceCount<0)||(braceCount==0 && is_brace))
                  if ( braceCount==0 && is_brace )
                  {
                        buf[i] = c;
                        /*
                                    if (braceCount)
                                    {
                                        braceCount++;
                                        buf[i] = 0;
                                    }
                                    else
                        */
                        c=' ';
                        break;
                  }
            }

            if ( c=='"' && !wasslash && is_quote )
            {
                  quoteCount = 1 - quoteCount;
                  if ( quoteCount==0 )
                  {
                        buf[i] = '"';
                        break;
                  }
            }

            if ( c=='(' ) parenCount++;

            if ( c==')' )
            {
                  parenCount--;
                  //if ((parenCount<0)||(parenCount==0 && is_paren))
                  if ( parenCount==0 && is_paren )
                  {
                        buf[i] = c;
                        /*
                                    if (parenCount)
                                    {
                                        parenCount++;
                                        buf[i] = 0;
                                    }
                                    else
                        */
                        c=' ';
                        break;
                  }
            }

            buf[i++] = c;

            // Size Limit
            if ( i > 19998 ) break;

            wasslash = ( c == '\\' );
            c = getc ( inf );
      } // main while

      // Broken file
      if ( feof ( inf ) && braceCount>0 ) return 0;
      /*
         if (braceCount<0)
         {
             buf[i] = 0;
             cerr << "More } than {, possible error " << &buf[0] << "\n";
             i--;
             ungetc('}',inf);
         }

         if (parenCount<0)
         {
             buf[i] = 0;
             cerr << "More ) than (, possible error " << &buf[0] << "\n";
             i--;
             ungetc(')',inf);
         }
      */
      // Eat right spaces
      while ( lt_space ( buf[i] ) && i>0 ) i--;

      if ( ( is_brace && buf[0]=='{' && buf[i]=='}' ) ||
              ( is_quote && buf[0]=='"' && buf[i]=='"' ) ||
              ( is_paren && buf[0]=='(' && buf[i]==')' ) )
      {
            buf[i]=0;
            asStringMacro = false;
            return &buf[1];
      }
      asStringMacro = true;
      buf[i+1]=0;

      return &buf[0];
}



BibEntry* processBibEntryFromString ( const char *st, int &last_char,bool ignoreUndefinedType, bool ignoredUndefinedField )
{

      bool undefinedType = false;
      unsigned int i,j;
      char c, name[256], key[256], fieldname[256];
      BibEntry *entry = 0;
      Delimiter delim=BRACE;
      char ld='{',rd='}';
      bool asStringMacro = false;
      bool stringMacro = false;

      // Eat space
      i = 0;


      do
      {
            c = st[i];
            i++;
      }
      while ( lt_space ( c ) && i<strlen ( st ) );


      // Eat everything before the @
      while ( c!='@' && i<strlen ( st ) )
      {
            // skip comments
            if ( c=='%' )
                  while ( i<strlen ( st ) && st[i]!='\n' )
                  {
                        i++;
                  }

            c = st[i];
            i++;
      }

      // This is not a BibTeX entry!
      if ( c != '@' )
      {
            last_char = i-1;
            return 0;
      }

      // Eat everything before the name
      do
      {
            c=st[i];
            i++;

      }
      while ( !ispseudoalpha ( c ) && i<strlen ( st ) );

      // read the entry type name
      for ( j=0; ispseudoalpha ( c ) && i < strlen ( st ) && j < 255; j++ )
      {
            name[j] = c;
            c = st[i];
            i++;
      }
      name[j] = 0;

      // Non-case sensitive
      char lName[255];
      for ( j=0; name[j] > ' '; j++ )
            lName[j] = tolower ( name[j] );
      lName[j] = '\0';

      // Check that the name exist, otherwise it's a special case
      if ( !BibEntryDefTable::self()->getBibEntryDef ( QString::fromLatin1 ( lName ) ) )
      {
            if ( strcmp ( lName, "preamble" ) ==0 )
            {
                  return 0;
            }

            if ( strcmp ( lName, "comment" ) == 0 )
            {
                  return 0;
            }

            if ( strcmp ( lName, "string" ) == 0 )
            {
                  return 0;
            }

            else
            {

                  cerr << "Entry type: " << name << " is not found\n";
                  undefinedType = true;
                  if ( !ignoreUndefinedType )
                  {
                        cerr << "Using other type\n";
                  }

                  else
                        return 0;
            }
      }


//    delim=BRACE;
      delim=UNKNOWN;

      // Eat everything before the key
      while ( ( c==' ' || c=='{' || c=='(' || c=='\n' || c=='\t' || c=='\r' ) && i<strlen ( st ) )
      {
            if ( c=='{' )
                  delim=BRACE;
//          if ( c=='"' )
//                delim=QUOTE;
            if ( c=='(' )
                  delim=PAREN;
            c=st[i];
            i++;
            
      }

            
      
      switch ( delim )
      {
            case BRACE:
                  ld='{';
                  rd='}';
                  break;
            case QUOTE:
                  ld='"';
                  rd='"';
                  break;
            case PAREN:
                  ld='(';
                  rd=')';
                  break;
            case UNKNOWN:
                  last_char = i-1;
                  return 0;   
      }

      for ( j=0; ( c!=',' && i<strlen ( st ) ) && j < 255; j++ )
      {
            key[j] = c;
            c = st[i];
            i++;
      }
      key[j] = 0;
      
      if ( i==strlen ( st ) )
      {
            last_char = i-1;
            return 0;
      }
      
      //Eat right white space
      while ( lt_space ( key[j] ) && j > 0 ) j--;
      key[j+1] = 0;
      
      cerr << "key = " << key << "\n";
      if ( !undefinedType )
      {
            entry = new BibEntry ( lName, key );
      }
      if ( undefinedType && !ignoreUndefinedType )
      {
            entry = new BibEntry ( QString::fromLatin1 ( "other" ), key );
            undefinedType = false;
      }

      // Read fields
      do
      {
            // Eat any non-alpha, all field names start with alpha
            do
            {
                  c = st[i];
                  i++;
            }
            while ( !isalpha ( c ) && c!=rd && i<strlen ( st ) );
            //while (!ispseudoalpha(c) && c!=rd && i<strlen(st));


            for ( j=0; ispseudoalpha ( c ) && j<strlen ( st ) && j<255; j++ )
            {
                  fieldname[j] = tolower ( c );
                  c= st[i];
                  i++;
            }
            fieldname[j] = 0;


            while ( lt_space ( c ) && i<strlen ( st ) )
            {
                  c = st[i];
                  i++;
            }

            if ( c=='=' )
            {
                  c = st[i];
                  i++;
                  //char *s = read_field(inf, c);
                  char *s;
                  // Read everything inside delimiters

                  int k=0, braceCount = 0, quoteCount = 0, parenCount=0;
                  bool wasslash = false, is_brace, is_quote, is_paren, no_delim;
                  char buf[20000];

                  buf[0] = '\0';

                  // Eat spaces
                  while ( lt_space ( c ) && i<strlen ( st ) )
                  {
                        //c=getc(inf);
                        c=st[i];
                        i++;
                  }

                  is_brace = bool ( c == '{' );
                  is_quote = bool ( c == '"' );
                  is_paren = bool ( c == '(' );
                  no_delim = bool ( !is_brace && !is_quote && !is_paren );

                  while ( i<strlen ( st ) )
                  {
                        if ( ( c==',' || c=='\n' ) && ( no_delim || ( is_brace && braceCount==0 ) ||
                                                        ( is_quote && quoteCount==0 ) || ( is_paren && parenCount==0 ) ) )
                        {
                              buf[k] = 0;
                              break;
                        }

                        if ( c=='{' )
                              braceCount++;

                        if ( c=='}' )
                        {
                              braceCount--;
                              //if ((braceCount<0)||(braceCount==0 && is_brace))
                              if ( braceCount==0 && is_brace )
                              {
                                    buf[k] = c;

                                    c=' ';

                                    break;
                              }
                        }



                        if ( c=='"' && !wasslash && is_quote )
                        {
                              quoteCount = 1 - quoteCount;
                              if ( quoteCount==0 )
                              {
                                    buf[k] = '"';
                                    break;
                              }
                        }


                        if ( c=='(' )
                              parenCount++;

                        if ( c==')' )
                        {
                              parenCount--;
                              //                    if ((parenCount<0)||(parenCount==0 && is_paren))
                              if ( parenCount==0 && is_paren )
                              {
                                    buf[k] = c;

                                    c=' ';

                                    break;
                              }
                        }

                        buf[k++] = c;

                        // Size Limit
                        if ( k > 19998 )
                              break;

                        wasslash = ( c == '\\' );
                        c= st[i];
                        i++;
                  } // main while

                  // Broken file
                  if ( i==strlen ( st ) && braceCount>0 )
                  {
                        last_char = i-1;
                        return 0;
                  }


                  // Eat right spaces
                  while ( lt_space ( buf[k] ) && k>0 )
                        k--;

                  if ( ( is_brace && buf[0]=='{' && buf[k]=='}' ) ||
                          ( is_quote && buf[0]=='"' && buf[k]=='"' ) ||
                          ( is_paren && buf[0]=='(' && buf[k]==')' ) )
                  {
                        buf[k]=0;
                        s= &buf[1];
                        asStringMacro = false;
                  }
                  else
                  {
                        buf[k+1]=0;
                        s= &buf[0];
                        asStringMacro = true;
                  }

                  if ( strlen ( s ) >0 )
                  {

                        if ( !undefinedType )
                        {
                              if ( strcmp ( fieldname, "kbibnote" ) ==0 )
                              {
                                    entry->setField ( "comment", s );
                                    entry->setStringMacroIndicator ( QString::fromLatin1 ( "comment" ), asStringMacro );
                              }
                              else
                              {
                                    entry->setField ( fieldname, s );
                                    entry->setStringMacroIndicator ( fieldname, asStringMacro );
                              }
                        }

                  }
            }

      }
      while ( c != rd && i<strlen ( st ) );
      last_char = i-1;

      return entry;

}


Generated by  Doxygen 1.6.0   Back to index