[eros-cvs] cvs commit: eros/src/base/cross/bin/capidl capidl-baseline-grammar.txt capidl-revised-grammar.txt

shap@eros.cs.jhu.edu shap@eros.cs.jhu.edu
Tue, 23 Jan 2001 16:21:33 -0500


shap        01/01/23 16:21:32

  Added:       src/base/cross/bin/capidl capidl-baseline-grammar.txt
                        capidl-revised-grammar.txt
  Log:
  Keep copies of baseline and revised grammars

Revision  Changes    Path
1.1                  eros/src/base/cross/bin/capidl/capidl-baseline-grammar.txt

Index: capidl-baseline-grammar.txt
===================================================================
/*
This file is hereby placed in the public domain
*/

/*
  A YACC Grammar for CapIDL

  By Mark Miller, with much feedback from the eros-arch list,
  especially Jonathan Shapiro.

  Distantly derived from Corba IDL, by the OMG.
*/

/* Categorical terminals */
%token Identifier
%token IntegerLiteral
%token CharLiteral
%token FloatingPtLiteral
%token StringLiteral
 


/* Corba Keywords */
%token BOOLEAN CASE CHAR CONST DEFAULT 
%token DOUBLE ENUM EXCEPTION FALSE FLOAT 
%token INTERFACE LONG MODULE OBJECT OCTET ONEWAY OUT RAISES
%token SHORT STRING STRUCT SWITCH TRUE TYPEDEF
%token UNSIGNED UNION VOID WCHAR WSTRING

/* Other Keywords */
%token INTEGER REPR 

/* Reserved Corba Keywords */
%token ANY ATTRIBUTE CONTEXT FIXED IN INOUT NATIVE 
%token READONLY SEQUENCE TRUNCATABLE VALUETYPE 

/* Other Reserved Keywords */
%token ABSTRACT AN AS BEGIN BEHALF BIND 
%token CATCH CLASS CONSTRUCTOR 
%token DECLARE DEF DEFINE DEFMACRO DELEGATE DEPRECATED DISPATCH DO
%token ELSE END ENSURE EVENTUAL ESCAPE EVENTUALLY EXPORT EXTENDS 
%token FACET FINALLY FOR FORALL FUNCTION
%token IMPLEMENTS IN IS
%token LAMBDA LET LOOP MATCH META METHOD METHODS 
%token NAMESPACE ON 
%token PACKAGE PRIVATE PROTECTED PUBLIC 
%token RELIANCE RELIANT RELIES RELY REVEAL
%token SAKE SIGNED STATIC
%token SUPPORTS SUSPECT SUSPECTS SYNCHRONIZED
%token THIS THROWS TO TRANSIENT TRY 
%token USES USING UTF8 UTF16 
%token VIRTUAL VOLATILE WHEN WHILE

/* operators */
%token OpScope /* :: */


/* Grammar follows */
%%

start:
        definitions
 ;

definitions:
        /*empty*/
 |      definitions definition
 ;

/**
 * Defined a name to have a specified meaning in a scope.  The name
 * may be defined as a type, a scope, or a constant value.
 */
definition:
        ';'                                     // empty definition
 |      MODULE name_def '{' definitions '}'     // a scope
 |      struct_dcl                              // a struct
 |      except_dcl                              // a struct to throw
 |      union_dcl                               // a discriminated union
 |      enum_dcl                                // a set of const unsigneds
 |      TYPEDEF type name_def ';'               // names a type
 |      TYPEDEF name_def ';'                    // forward declaration
 |      CONST type name_def '=' const_expr ';'  // names a constant value
 |      interface_dcl                           // a capability
 |      repr_dcl                                // advises on representation
 ;



/********************** Names ***************************/



/**
 * A defining-occurrence of an identifier.  The identifier is defined
 * as a name within the scope in which it textually appears.
 */
name_def:
        ident
 ;

/**
 * A use-occurrence of a name.  The name may be unqualified, fully
 * qualified, or partially qualified.  Corba scoping rules are used to
 * associate a use-occurrence with a defining-occurrence.
 */
scoped_name:
        ident                           // unqualified
 |      OpScope ident                   // global?
 |      scoped_name OpScope ident       // qualified
 ;

/**
 * These extra productions exist so that a better diagnostic can be
 * given when a reserved keyword is used where a normal identifier is
 * expected.  The reserved: production should eventually list all the
 * reserved keywords.
 */
ident:
        Identifier
 |      reserved                        { yyerror("reserved: " + $1); }
 ;
reserved:
        ANY | ATTRIBUTE | CONTEXT | FIXED | IN | INOUT | NATIVE 
 |      READONLY | SEQUENCE | TRUNCATABLE | VALUETYPE 
 ;




/********************** Types ***************************/



/**
 * Can be a capability type, a pure data type, or a mixed type.  Most
 * of the contexts where a mixed type is syntactically accepted
 * actually require a pure data type, but this is enforced after
 * parsing.
 */
type:
        scalar_type                     // atomic pure data value
 |      seq_type                        // sequences of other data types
 |      OBJECT                          // generic capability
 |      scoped_name                     // must name a defined type
 ;

scalar_type:
        integer_type                    // subranges of INTEGER
 |      floating_pt_type                // various IEEE precisions
 |      char_type                       // subranges of Unicode WCHAR
 |      BOOLEAN                         // TRUE or FALSE
 ;

/**
 * The only values of these types are integers.  The individual types
 * differ regarding what subrange of integer they accept.  Not all
 * syntactically expressible variations will supported in the
 * forseable future.  We expect to initially support only INTEGER<N>
 * and UNSIGNED<N> for N == 16, 32, 64, as well as UNSIGNED<8>.
 */
integer_type:
        INTEGER '<' const_expr ',' const_expr '>'       // inclusive subrange
 |      INTEGER '<' const_expr '>'      // == INTEGER<-2**(N-1),2**(N-1)-1>
 |      INTEGER                         // all integers
 |      SHORT                           // == INTEGER<16>
 |      LONG                            // == INTEGER<32>
 |      LONG LONG                       // == INTEGER<64>

 |      UNSIGNED '<' const_expr '>'     // == INTEGER<0,2**N-1>
 |      UNSIGNED                        // all non-negative integers
 |      OCTET                           // == UNSIGNED<8>
 |      UNSIGNED SHORT                  // == UNSIGNED<16>
 |      UNSIGNED LONG                   // == UNSIGNED<32>
 |      UNSIGNED LONG LONG              // == UNSIGNED<64>
 ;

/**
 * The only values of these types are real numbers, positive and
 * negative infinity, and the NaNs defined by IEEE.  As each IEEE
 * precision is a unique beast, the sizes may only be those defined as
 * standard IEEE precisions.  We expect to initially support only
 * FLOAT<32> and FLOAT<64>.
 */
floating_pt_type:
        FLOAT '<' const_expr '>'        // == IEEE std floating precision N
 |      FLOAT                           // == FLOAT<32>
 |      DOUBLE                          // == FLOAT<64>
 |      LONG DOUBLE                     // == FLOAT<128>
 ;

/**
 * The only values of these types are 32 bit Unicode characters.
 * These types differ regarding the subrange of Unicode character
 * codes they will accept.  We expect to initially support only
 * WCHAR<7> (ascii), WCHAR<8> (latin-1), WCHAR<16> (java-unicode), and
 * WCHAR<32> (full unicode). 
 */
char_type:
        WCHAR '<' const_expr ',' const_expr '>'
                                   // inclusive subrange of Unicode characters
 |      WCHAR '<' const_expr '>'        // == WCHAR<0,2**N-1>
 |      CHAR                            // == WCHAR<8>
 |      WCHAR                           // == WCHAR<32> == Unicode character
 ;

/**
 * A sequence is some number of repeatitions of some base type.  The
 * number of repeatitions may be bounded or unbounded.  Strings are
 * simply sequences of characters, but are singled out as special for
 * three reasons: 1) The subrange of character they repeat does not
 * include '\0'. 2) The marshalled representation includes an extra
 * '\0' character on after the end of the string. 3) Many languages
 * have a special String data type to which this must be bound.
 */
seq_type:
        type '[' ']'                    // some number of repeatitions of type
 |      type '[' const_expr ']'         // no more than N repeatitions of type
 |      STRING                          // == WCHAR<1,255>[]
 |      STRING '<' const_expr '>'       // == WCHAR<1,255>[N]
 |      WSTRING                         // == WCHAR<1,2**32-1>[]
 |      WSTRING '<' const_expr '>'      // == WCHAR<1,2**32-1>[N]
 ;



/********************** Structs ***************************/



/**
 * Like a C struct, a struct defines an aggregate type consisting of
 * each of the member types in order.  Whereas the members of a
 * sequence are accessed by numeric index, the members of a structure
 * are accessed by member name (ie, field name). <p>
 *
 * Like a Corba or CapIDL module, a CapIDL struct also defines a named
 * scope, such that definitions between the curly brackets define
 * names in that scope.
 */
struct_dcl:
        STRUCT name_def '{' members '}'
 ;

members:
        /*empty*/
 |      members member
 ;

member:
        definition              // defines a name inside this scope
 |      type name_def ';'       // defines an actual member (ie, field)
 ;



/********************** Exceptions ***************************/


/**
 * Structs that are sent (as in RAISES) to explain problems
 */
except_dcl:
        EXCEPTION name_def '{' members '}'
 ;



/********************** Discriminated Unions ***************************/


/**
 * Like the Corba discriminated union, this has a typed scalar that is
 * compared against the case labels to determine what the rest of the
 * data is.  So this is an aggregate data type consisting of the value
 * to be switched on followed by the element determined by this
 * value.  Unlike the Corba union, we also name the field holding the
 * value switched on. 
 *
 * The union as a whole creates a named nested scope for further name
 * definitions (as does module, struct, and interface), but the
 * individual case labels do not create further subscopes.
 */
union_dcl:
        UNION name_def '{' 
            SWITCH '(' switch_type name_def ')' '{' 
                cases 
            '}'
        '}'
 ;

/**
 * One may only switch non scalar types other than floating point.
 * Any objections?  We expect to initially support only small enough
 * subranges of these types that an array lookup implementation is
 * reasonable. Let's say 0..255.
 */
switch_type:
        integer_type            // subranges of INTEGER
 |      char_type               // subranges of Unicode WCHAR
 |      BOOLEAN                 // TRUE or FALSE
 |      scoped_name             // must name one of the other switch_types
 ;

cases:
        /*empty*/
 |      cases case
 ;

/**
 * Each case consists of one or more case labels, zero or more name
 * definitions (scoped to the union as a whole), and one element
 * declaration.  (Note: I would like to scope these definitions to the
 * case, but the case has no natural name.)
 */
case:
        case_labels definitions element_dcl
 ;

case_labels:
        case_label
 |      case_labels case_label
 ;

case_label:
        CASE const_expr ':'
 |      DEFAULT ':'
 ;

element_dcl:
        type name_def ';'
 ;



/********************** Enums ***************************/


/**
 * Just as characters have character codes but the character is not
 * its character code (it is only represented by its character code),
 * so an enumerated type consists of a set of named enumerated values,
 * each of which happens to be represented by a unique integer.  This
 * declaration declares the name of the type and the names of the
 * values of that type.  <p>
 *
 * Are the value names scoped to the type name?  In order words, given
 * "enum Color { RED, GREEN, BLUE }", must one then say "Color::RED"
 * or simply "RED"?  What's Corba's answer? 
 */
enum_dcl:
        ENUM name_def '{' enum_defs '}'
 ;

enum_defs:
        name_def
 |      enum_defs ',' name_def
 ;



/********************** Constant Expressions ***************************/


/**
 * Eventually, we expect to support the full set of operators that
 * Corba constant exressions support, although we will interpret these
 * operators in a precision unlimited fashion.  (It is not appropriate
 * for a language neutral framework to use a standard for arithmetic
 * other than mathematics, unless motivated by efficiency.  Since
 * constant expressions are evaluated at CapIDL compile time,
 * efficiency isn't an issue.)
 */
const_expr:
        scoped_name
 |      literal
 |      '(' const_expr ')'
 ;

literal:
        IntegerLiteral
 |      StringLiteral
 |      CharLiteral
 |      FloatingPtLiteral
 |      TRUE
 |      FALSE
 ;



/******************* Interfaces / Capabilities **********************/


/**
 * Defines a capability type.  The type of a capability is defined by
 * what you can send it.  By far the most common convention is to send
 * it a message, which consists of an order code, a sequence of
 * capability arguments, and a sequence of pure data arguments.  (For
 * present purposes, it would be inappropriate to try to support
 * an argument type that mixed data and capabilities.)  One of the
 * capability arguments is also special -- the resume argument.  When
 * the invoker of a capability does an EROS CALL, the resume argument
 * position is filled in by the OS with a Resume key which, when
 * invoked, will cause the caller to continue with the arguments to
 * the Resume key invocation. <p>
 *
 * CapIDL supports three levels of description, from most convenient,
 * conventional, and high level, to most flexible and low level. <p>
 *
 * The lowest level description is the "struct level", in which a
 * capability is defined as a one argument procedure with no return,
 * where the argument describes the capabilities and data that may be
 * passed to that capability.  At this level, there are not
 * necessarily any resume parameters.  However, when CALLing such a
 * capability, the OS generated Resume key will be passed in the first
 * capability parameter position.  At this level, there are not
 * necessarily any order codes.  However, the type will often by a
 * discriminated union switching on an enum, in which case the values
 * of this enum are the moral equivalent of order codes.  The struct
 * level is the only context in CapIDL where structs and unions can mix
 * data and capabilities. <p>
 *
 * Next is the "oneway message level" or just the "oneway level".
 * Here, a capability is described by explicitly declaring the
 * separate messages you can send to the capability.  This level
 * expands to the struct level by turning each message name into an
 * enum value (of an enum type specific to this capability type),
 * gathering all the arguments for each message in order into a
 * struct, and gathering all these enum values and argument structs
 * into one big discriminated union.  At this level, order codes are
 * implicit and built in, but are still not necessarily any resume
 * parameters. <p>
 *
 * Next is the "twoway message level" or just the "twoway level".
 * Here, a capability is still described as a set of messages, but any
 * one of these messages may instead be declared as twoway.  At the
 * twoway level, the resume parameter is implicit and built in as
 * well.  Instead, OUT parameters and a list of RAISEd Exceptions is
 * added to the oneway message declaration.  These effectively declare
 * the type of the Resume parameter, at the price of imposing some
 * conventional restrictions.  Specifically, the Resume parameter type
 * may only have one success order code, and all other order codes
 * must either be well known system exception codes, or must pass only
 * an Exception (which will be one of those in the RAISES clause).
 * The twoway level expands into the oneway level by turning these
 * extra declarations into an explicit type on an explicit Resume
 * parameter. 
 */
interface_dcl:
        INTERFACE name_def '(' type name_def ')'   // struct-level
 |      INTERFACE name_def '{' messages '}'        // message-levels
 ;

/**
 * Message declarations, interspersed with name definitions for this
 * scope 
 */
messages:
        /*empty*/
 |      messages message
 ;
message:
        definition
 |      message_dcl
 ;

message_dcl:
        ONEWAY name_def '(' params ')'                      // oneway level
 |      ret_type name_def '(' param_2s ')' opt_raises       // twoway level
 ;

/**
 * In expanding to the struct level, all the params are gathered into
 * a struct.  First come all the capability arguments, and then all
 * the pure data parameters.  A param cannot be of mixed data and
 * capability type.
 */
params:
        /*empty*/
 |      param_list
 ;

param_list:
        param
 |      param_list ',' param
 ;

/**
 * The parameter name is defined within the scope of this message
 * name.  In this one regard, each individual named message is also a
 * named nested scope.  This allows, for example, a following REPR
 * clause to give placement advice on a parameter by refering to it as
 * "messageName::parameterName" 
 */
param:
        type name_def
 ;

/**
 * In expanding to the oneway level, a non-VOID return type becomes a
 * first OUT argument of the same type, and the message is left with a
 * VOID return type.  The generated out parameter will be named
 * "_result", and so REPR advice can refer to the result by this name
 * even when the return type syntax is used.
 */
ret_type:
        type
 |      VOID
 ;

param_2s:
        /*empty*/
 |      param_2_list
 ;

param_2_list:
        param_2
 |      param_2_list ',' param_2
 ;

/**
 * In expanding to the oneway level, a normal (IN) parameter is left
 * alone, but the out parameters are gathered together to form the
 * normal parameter list of the oneway success message to the Resume
 * parameter. 
 */
param_2:
        param
 |      OUT param
 ;

/**
 * In expanding to the oneway level, each Exception listed in the
 * RAISES clause becomes a separate oneway problem reporting message
 * on the Resume parameter type, whose argument is just that
 * exception.  Should the order codes for these problem messages come
 * from the Exception declaration or the RAISES clause?  The first
 * would seem to make more sense, except there's no natural way to
 * coordinate uniqueness.  How do EROS or KeyKOS currently assign
 * order codes for reporting problems?
 */
opt_raises:
        /*empty*/
 |      raises
 ;

raises:
        RAISES '(' exceptions ')'
 ;

exceptions:
        /*empty*/
 |      exceptions ',' scoped_name
 ;




/********************** Representation ***************************/


/**
 * A placeholder for representation advice, including placement
 * advice.  Because, by Corba scoping rules, names in subscopes can be
 * refered to using paths, REPR advice could appear anywhere in the
 * compilation unit (ie, source file) after the declaration of the
 * thing being advised.  (Is there a possible security problem with
 * this?)  However, good style is for the REPR advice to follow as
 * closely as possible the declarations it is advising. <p>
 *
 * The reason the rest of the spec was careful never to refer to bit
 * representation, but rather speaks in terms of subranges (except for
 * floating point, where it's unavoidable), is that the rest of the
 * spec is only about semantics, not representation.  The REPR advice can
 * therefore be only about representation, not semantics. <p>
 *
 * Conflicting advice, or advice that specifies a representation not
 * able to preserve the semantics (such as insufficient bits for a
 * given subrange) must be caught and reported statically, and must
 * cause a failure to compile. <p>
 *
 * In the absence of advice, default advice applies.  This is
 * appropriate for human written source, but is a poor way for
 * programs to speak to other programs.  Instead, there needs to be a
 * tool for turning source capidl files into fully-advised capidl
 * files.  The fully advised files are likely to be canonicalized in
 * other ways as well.  These will be written once and read many
 * times, so the goal is to make it easier on the reading program.
 * The getAllegedType query may even return a fast binary equivalent
 * to a fully advised and somewhat canonicalized capIDL file.
 */
repr_dcl:
        REPR '{' advisories '}'
 |      REPR advice
 ;

advisories:
        /*empty*/
 |      advisories advice
 ;

/**
 * The side to the right of the colon says how the thing named on the
 * left side should be represented, and where it should be placed.
 * This production is currently a placeholder until we figure out what
 * kind of advice we'd like to express.  As we figure this out, expect
 * the right side to grow. <p><pre>
 *
 * Some plausible meanings:
 *      enum_value: integer     // defines this enum value to be this integer
 *      message_name: integer   // gives the message this order code
 *      a_wstring: "UTF-8"      // represents the wide string in UTF-8
 *      a_wstring: "UTF-16"     // represents the wide string in UTF-16
 *      a_module: "LITTLE_ENDIAN" // inherited unless overridden?
 * </pre>
 */
advice:
        scoped_name ':' const_expr ';'
 ;


%%



1.1                  eros/src/base/cross/bin/capidl/capidl-revised-grammar.txt

Index: capidl-revised-grammar.txt
===================================================================
/*
This file is hereby placed in the public domain
*/

/*
  A YACC Grammar for CapIDL

  By Mark Miller, with much feedback from the eros-arch list,
  especially Jonathan Shapiro.

  Distantly derived from Corba IDL, by the OMG.
*/

/* Categorical terminals */
%token Identifier
%token IntegerLiteral
%token CharLiteral
%token FloatingPtLiteral
%token StringLiteral

%token ReservedWord
 


/* Corba Keywords */
%token BOOLEAN CASE CHAR CONST DEFAULT 
%token DOUBLE ENUM EXCEPTION FALSE FLOAT 
%token INTERFACE LONG MODULE OBJECT OCTET ONEWAY OUT RAISES
%token SHORT STRING STRUCT SWITCH TRUE TYPEDEF
%token UNSIGNED UNION VOID WCHAR WSTRING

/* Other Keywords */
%token INTEGER REPR 

/* Other Reserved Keywords */
%token ABSTRACT AN AS BEGIN BEHALF BIND 
%token CATCH CLASS CONSTRUCTOR 
%token DECLARE DEF DEFINE DEFMACRO DELEGATE DEPRECATED DISPATCH DO
%token ELSE END ENSURE EVENTUAL ESCAPE EVENTUALLY EXPORT EXTENDS 
%token FACET FINALLY FOR FORALL FUNCTION
%token IMPLEMENTS IN IS
%token LAMBDA LET LOOP MATCH META METHOD METHODS 
%token NAMESPACE ON 
%token PACKAGE PRIVATE PROTECTED PUBLIC 
%token RELIANCE RELIANT RELIES RELY REVEAL
%token SAKE SIGNED STATIC
%token SUPPORTS SUSPECT SUSPECTS SYNCHRONIZED
%token THIS THROWS TO TRANSIENT TRY 
%token USES USING UTF8 UTF16 
%token VIRTUAL VOLATILE WHEN WHILE

/* operators */
%token OpScope /* :: */


/* Grammar follows */
%%

start:
        definitions
 ;

definitions:
        /*empty*/
 |      definitions definition
 ;

/**
 * Defined a name to have a specified meaning in a scope.  The name
 * may be defined as a type, a scope, or a constant value.
 */
definition:
        ';'                                     // empty definition
 |      MODULE name_def '{' definitions '}'     // a scope
 |      struct_dcl                              // a struct
 |      except_dcl                              // a struct to throw
 |      union_dcl                               // a discriminated union
 |      enum_dcl                                // a set of const unsigneds
 |      TYPEDEF type name_def ';'               // names a type
 |      TYPEDEF name_def ';'                    // forward declaration
 |      const_dcl;                              // constant value
 |      interface_dcl                           // a capability
 |      repr_dcl                                // advises on representation
 ;



/********************** Names ***************************/



/**
 * A defining-occurrence of an identifier.  The identifier is defined
 * as a name within the scope in which it textually appears.
 */
name_def:
        ident
 ;

/**
 * A use-occurrence of a name.  The name may be unqualified, fully
 * qualified, or partially qualified.  Corba scoping rules are used to
 * associate a use-occurrence with a defining-occurrence.
 */
scoped_name:
        ident                           // unqualified
 |      OpScope ident                   // global?
 |      scoped_name OpScope ident       // qualified
 ;

/**
 * These extra productions exist so that a better diagnostic can be
 * given when a reserved keyword is used where a normal identifier is
 * expected.  The reserved: production should eventually list all the
 * reserved keywords.
 */
ident:
        Identifier
 |      ReservedWord                     { yyerror("reserved: " + $1); }
 ;




/********************** Types ***************************/



/**
 * Can be a capability type, a pure data type, or a mixed type.  Most
 * of the contexts where a mixed type is syntactically accepted
 * actually require a pure data type, but this is enforced after
 * parsing.
 */
type:
        scalar_type                     // atomic pure data value
 |      seq_type                        // sequences of other data types
 |      OBJECT                          // generic capability
 |      scoped_name                     // must name a defined type
 ;

scalar_type:
        integer_type                    // subranges of INTEGER
 |      floating_pt_type                // various IEEE precisions
 |      char_type                       // subranges of Unicode WCHAR
 |      BOOLEAN                         // TRUE or FALSE
 ;

/**
 * The only values of these types are integers.  The individual types
 * differ regarding what subrange of integer they accept.  Not all
 * syntactically expressible variations will supported in the
 * forseable future.  We expect to initially support only INTEGER<N>
 * and UNSIGNED<N> for N == 16, 32, 64, as well as UNSIGNED<8>.
 */
integer_type:
        INTEGER '<' const_expr ',' const_expr '>'       // inclusive subrange
 |      INTEGER '<' const_expr '>'      // == INTEGER<-2**(N-1),2**(N-1)-1>
 |      INTEGER                         // all integers
 |      SHORT                           // == INTEGER<16>
 |      LONG                            // == INTEGER<32>
 |      LONG LONG                       // == INTEGER<64>

 |      UNSIGNED '<' const_expr '>'     // == INTEGER<0,2**N-1>
 |      UNSIGNED                        // all non-negative integers
 |      OCTET                           // == UNSIGNED<8>
 |      UNSIGNED SHORT                  // == UNSIGNED<16>
 |      UNSIGNED LONG                   // == UNSIGNED<32>
 |      UNSIGNED LONG LONG              // == UNSIGNED<64>
 ;

/**
 * The only values of these types are real numbers, positive and
 * negative infinity, and the NaNs defined by IEEE.  As each IEEE
 * precision is a unique beast, the sizes may only be those defined as
 * standard IEEE precisions.  We expect to initially support only
 * FLOAT<32> and FLOAT<64>.
 */
floating_pt_type:
        FLOAT '<' const_expr '>'        // == IEEE std floating precision N
 |      FLOAT                           // == FLOAT<32>
 |      DOUBLE                          // == FLOAT<64>
 |      LONG DOUBLE                     // == FLOAT<128>
 ;

/**
 * The only values of these types are 32 bit Unicode characters.
 * These types differ regarding the subrange of Unicode character
 * codes they will accept.  We expect to initially support only
 * WCHAR<7> (ascii), WCHAR<8> (latin-1), WCHAR<16> (java-unicode), and
 * WCHAR<32> (full unicode). 
 */
char_type:
        WCHAR '<' const_expr ',' const_expr '>'
                                   // inclusive subrange of Unicode characters
 |      WCHAR '<' const_expr '>'        // == WCHAR<0,2**N-1>
 |      CHAR                            // == WCHAR<8>
 |      WCHAR                           // == WCHAR<32> == Unicode character
 ;

/**
 * A sequence is some number of repeatitions of some base type.  The
 * number of repeatitions may be bounded or unbounded.  Strings are
 * simply sequences of characters, but are singled out as special for
 * three reasons: 1) The subrange of character they repeat does not
 * include '\0'. 2) The marshalled representation includes an extra
 * '\0' character on after the end of the string. 3) Many languages
 * have a special String data type to which this must be bound.
 */
seq_type:
        type '[' ']'                    // some number of repeatitions of type
 |      type '[' const_expr ']'         // no more than N repeatitions of type
 |      STRING                          // == WCHAR<1,255>[]
 |      STRING '<' const_expr '>'       // == WCHAR<1,255>[N]
 |      WSTRING                         // == WCHAR<1,2**32-1>[]
 |      WSTRING '<' const_expr '>'      // == WCHAR<1,2**32-1>[N]
 ;



/********************** Structs ***************************/



/**
 * Like a C struct, a struct defines an aggregate type consisting of
 * each of the member types in order.  Whereas the members of a
 * sequence are accessed by numeric index, the members of a structure
 * are accessed by member name (ie, field name). <p>
 *
 * Like a Corba or CapIDL module, a CapIDL struct also defines a named
 * scope, such that definitions between the curly brackets define
 * names in that scope.
 */
struct_dcl:
        STRUCT name_def '{' members '}'
 ;

members:
        /*empty*/
 |      members member
 ;

member:
        definition              // defines a name inside this scope
 |      type name_def ';'       // defines an actual member (ie, field)
 ;



/********************** Exceptions ***************************/


/**
 * Structs that are sent (as in RAISES) to explain problems
 */
except_dcl:
        EXCEPTION name_def '{' members '}'
 ;



/********************** Discriminated Unions ***************************/


/**
 * Like the Corba discriminated union, this has a typed scalar that is
 * compared against the case labels to determine what the rest of the
 * data is.  So this is an aggregate data type consisting of the value
 * to be switched on followed by the element determined by this
 * value.  Unlike the Corba union, we also name the field holding the
 * value switched on. 
 *
 * The union as a whole creates a named nested scope for further name
 * definitions (as does module, struct, and interface), but the
 * individual case labels do not create further subscopes.
 */
union_dcl:
        UNION name_def '{' 
            SWITCH '(' switch_type name_def ')' '{' 
                cases 
            '}'
        '}'
 ;

/**
 * One may only switch non scalar types other than floating point.
 * Any objections?  We expect to initially support only small enough
 * subranges of these types that an array lookup implementation is
 * reasonable. Let's say 0..255.
 */
switch_type:
        integer_type            // subranges of INTEGER
 |      char_type               // subranges of Unicode WCHAR
 |      BOOLEAN                 // TRUE or FALSE
 |      scoped_name             // must name one of the other switch_types
 ;

cases:
        /*empty*/
 |      cases case
 ;

/**
 * Each case consists of one or more case labels, zero or more name
 * definitions (scoped to the union as a whole), and one element
 * declaration.  (Note: I would like to scope these definitions to the
 * case, but the case has no natural name.)
 */
case:
        case_labels definitions element_dcl
 ;

case_labels:
        case_label
 |      case_labels case_label
 ;

case_label:
        CASE const_expr ':'
 |      DEFAULT ':'
 ;

element_dcl:
        type name_def ';'
 ;



/********************** Enums ***************************/


/**
 * Just as characters have character codes but the character is not
 * its character code (it is only represented by its character code),
 * so an enumerated type consists of a set of named enumerated values,
 * each of which happens to be represented by a unique integer.  This
 * declaration declares the name of the type and the names of the
 * values of that type.  <p>
 *
 * Are the value names scoped to the type name?  In order words, given
 * "enum Color { RED, GREEN, BLUE }", must one then say "Color::RED"
 * or simply "RED"?  What's Corba's answer? 
 */
enum_dcl:
        ENUM name_def '{' enum_defs '}'
 ;

enum_defs:
        name_def
 |      enum_defs ',' name_def
 |      enum_defs ',' name_def '=' const_expr;
 ;


/********************** Constant Declarations ***************************/
const_dcl:
       CONST type name_def '=' const_expr ';'

/********************** Constant Expressions ***************************/


/**
 * Eventually, we expect to support the full set of operators that
 * Corba constant exressions support, although we will interpret these
 * operators in a precision unlimited fashion.  (It is not appropriate
 * for a language neutral framework to use a standard for arithmetic
 * other than mathematics, unless motivated by efficiency.  Since
 * constant expressions are evaluated at CapIDL compile time,
 * efficiency isn't an issue.)
 */
const_expr:
        scoped_name
 |      literal
 |      '(' const_expr ')'
 ;

literal:
        IntegerLiteral
 |      StringLiteral
 |      CharLiteral
 |      FloatingPtLiteral
 |      TRUE
 |      FALSE
 ;



/******************* Interfaces / Capabilities **********************/


/**
 * Defines a capability type.  The type of a capability is defined by
 * what you can send it.  By far the most common convention is to send
 * it a message, which consists of an order code, a sequence of
 * capability arguments, and a sequence of pure data arguments.  (For
 * present purposes, it would be inappropriate to try to support
 * an argument type that mixed data and capabilities.)  One of the
 * capability arguments is also special -- the resume argument.  When
 * the invoker of a capability does an EROS CALL, the resume argument
 * position is filled in by the OS with a Resume key which, when
 * invoked, will cause the caller to continue with the arguments to
 * the Resume key invocation. <p>
 *
 * CapIDL supports three levels of description, from most convenient,
 * conventional, and high level, to most flexible and low level. <p>
 *
 * The lowest level description is the "struct level", in which a
 * capability is defined as a one argument procedure with no return,
 * where the argument describes the capabilities and data that may be
 * passed to that capability.  At this level, there are not
 * necessarily any resume parameters.  However, when CALLing such a
 * capability, the OS generated Resume key will be passed in the first
 * capability parameter position.  At this level, there are not
 * necessarily any order codes.  However, the type will often by a
 * discriminated union switching on an enum, in which case the values
 * of this enum are the moral equivalent of order codes.  The struct
 * level is the only context in CapIDL where structs and unions can mix
 * data and capabilities. <p>
 *
 * Next is the "oneway message level" or just the "oneway level".
 * Here, a capability is described by explicitly declaring the
 * separate messages you can send to the capability.  This level
 * expands to the struct level by turning each message name into an
 * enum value (of an enum type specific to this capability type),
 * gathering all the arguments for each message in order into a
 * struct, and gathering all these enum values and argument structs
 * into one big discriminated union.  At this level, order codes are
 * implicit and built in, but are still not necessarily any resume
 * parameters. <p>
 *
 * Next is the "twoway message level" or just the "twoway level".
 * Here, a capability is still described as a set of messages, but any
 * one of these messages may instead be declared as twoway.  At the
 * twoway level, the resume parameter is implicit and built in as
 * well.  Instead, OUT parameters and a list of RAISEd Exceptions is
 * added to the oneway message declaration.  These effectively declare
 * the type of the Resume parameter, at the price of imposing some
 * conventional restrictions.  Specifically, the Resume parameter type
 * may only have one success order code, and all other order codes
 * must either be well known system exception codes, or must pass only
 * an Exception (which will be one of those in the RAISES clause).
 * The twoway level expands into the oneway level by turning these
 * extra declarations into an explicit type on an explicit Resume
 * parameter. 
 */
interface_dcl:
        INTERFACE name_def '(' type name_def ')'   // struct-level
 |      INTERFACE name_def '{' messages '}'        // message-levels
 ;

/**
 * Message declarations, interspersed with name definitions for this
 * scope 
 */
messages:
        /*empty*/
 |      messages message
 ;
message:
        definition
 |      message_dcl
 ;

message_dcl:
        ONEWAY name_def '(' params ')'                      // oneway level
 |      ret_type name_def '(' param_2s ')' opt_raises       // twoway level
 ;

/**
 * In expanding to the struct level, all the params are gathered into
 * a struct.  First come all the capability arguments, and then all
 * the pure data parameters.  A param cannot be of mixed data and
 * capability type.
 */
params:
        /*empty*/
 |      param_list
 ;

param_list:
        param
 |      param_list ',' param
 ;

/**
 * The parameter name is defined within the scope of this message
 * name.  In this one regard, each individual named message is also a
 * named nested scope.  This allows, for example, a following REPR
 * clause to give placement advice on a parameter by refering to it as
 * "messageName::parameterName" 
 */
param:
        type name_def
 ;

/**
 * In expanding to the oneway level, a non-VOID return type becomes a
 * first OUT argument of the same type, and the message is left with a
 * VOID return type.  The generated out parameter will be named
 * "_result", and so REPR advice can refer to the result by this name
 * even when the return type syntax is used.
 */
ret_type:
        type
 |      VOID
 ;

param_2s:
        /*empty*/
 |      param_2_list
 ;

param_2_list:
        param_2
 |      param_2_list ',' param_2
 ;

/**
 * In expanding to the oneway level, a normal (IN) parameter is left
 * alone, but the out parameters are gathered together to form the
 * normal parameter list of the oneway success message to the Resume
 * parameter. 
 */
param_2:
        param
 |      OUT param
 ;

/**
 * In expanding to the oneway level, each Exception listed in the
 * RAISES clause becomes a separate oneway problem reporting message
 * on the Resume parameter type, whose argument is just that
 * exception.  Should the order codes for these problem messages come
 * from the Exception declaration or the RAISES clause?  The first
 * would seem to make more sense, except there's no natural way to
 * coordinate uniqueness.  How do EROS or KeyKOS currently assign
 * order codes for reporting problems?
 */
opt_raises:
        /*empty*/
 |      raises
 ;

raises:
        RAISES '(' exceptions ')'
 ;

exceptions:
        /*empty*/
 |      exceptions ',' scoped_name
 ;




/********************** Representation ***************************/


/**
 * A placeholder for representation advice, including placement
 * advice.  Because, by Corba scoping rules, names in subscopes can be
 * refered to using paths, REPR advice could appear anywhere in the
 * compilation unit (ie, source file) after the declaration of the
 * thing being advised.  (Is there a possible security problem with
 * this?)  However, good style is for the REPR advice to follow as
 * closely as possible the declarations it is advising. <p>
 *
 * The reason the rest of the spec was careful never to refer to bit
 * representation, but rather speaks in terms of subranges (except for
 * floating point, where it's unavoidable), is that the rest of the
 * spec is only about semantics, not representation.  The REPR advice can
 * therefore be only about representation, not semantics. <p>
 *
 * Conflicting advice, or advice that specifies a representation not
 * able to preserve the semantics (such as insufficient bits for a
 * given subrange) must be caught and reported statically, and must
 * cause a failure to compile. <p>
 *
 * In the absence of advice, default advice applies.  This is
 * appropriate for human written source, but is a poor way for
 * programs to speak to other programs.  Instead, there needs to be a
 * tool for turning source capidl files into fully-advised capidl
 * files.  The fully advised files are likely to be canonicalized in
 * other ways as well.  These will be written once and read many
 * times, so the goal is to make it easier on the reading program.
 * The getAllegedType query may even return a fast binary equivalent
 * to a fully advised and somewhat canonicalized capIDL file.
 */
repr_dcl:
        REPR '{' advisories '}'
 |      REPR advice
 ;

advisories:
        /*empty*/
 |      advisories advice
 ;

/**
 * The side to the right of the colon says how the thing named on the
 * left side should be represented, and where it should be placed.
 * This production is currently a placeholder until we figure out what
 * kind of advice we'd like to express.  As we figure this out, expect
 * the right side to grow. <p><pre>
 *
 * Some plausible meanings:
 *      enum_value: integer     // defines this enum value to be this integer
 *      message_name: integer   // gives the message this order code
 *      a_wstring: "UTF-8"      // represents the wide string in UTF-8
 *      a_wstring: "UTF-16"     // represents the wide string in UTF-16
 *      a_module: "LITTLE_ENDIAN" // inherited unless overridden?
 * </pre>
 */
advice:
        scoped_name ':' const_expr ';'
 ;


%%