Syntax Specification
The grammar and lexical specification for the standard ODIN syntax is shown below in ANTLR4 form.
//
// description: Antlr4 grammar for Object Data Instance Notation (ODIN)
// author: Thomas Beale <thomas.beale@openehr.org>
// contributors:Pieter Bos <pieter.bos@nedap.com>
// support: openEHR Specifications PR tracker <https://openehr.atlassian.net/projects/SPECPR/issues>
// copyright: Copyright (c) 2015- openEHR Foundation <http://www.openEHR.org>
// license: Apache 2.0 License <http://www.apache.org/licenses/LICENSE-2.0.html>
//
grammar odin;
import odin_values;
//
// -------------------------- Parse Rules --------------------------
//
odin_text :
attr_vals
| object_value_block
;
attr_vals : ( attr_val ';'? )+ ;
attr_val : odin_object_key '=' object_block ;
odin_object_key : ALPHA_UC_ID | ALPHA_UNDERSCORE_ID | rm_attribute_id ;
object_block :
object_value_block
| object_reference_block
;
object_value_block : ( '(' rm_type_id ')' )? '<' ( primitive_object | attr_vals? | keyed_object* ) '>' | EMBEDDED_URI;
keyed_object : '[' key_id ']' '=' object_block ;
key_id :
string_value
| integer_value
| date_value
| time_value
| date_time_value
;
object_reference_block : '<' odin_path_list '>' ;
odin_path_list : odin_path ( ( ',' odin_path )+ | SYM_LIST_CONTINUE )? ;
odin_path : '/' | ADL_PATH ;
rm_type_id : ALPHA_UC_ID ( '<' rm_type_id ( ',' rm_type_id )* '>' )? ;
rm_attribute_id : ALPHA_LC_ID ;
The following grammar defines ODIN terminal value syntax, and can be imported by any parser needing the ODIN values.
//
// grammar defining ODIN terminal value types, including atoms, lists and intervals
// author: Pieter Bos <pieter.bos@nedap.com>
// support: openEHR Specifications PR tracker <https://openehr.atlassian.net/projects/SPECPR/issues>
// copyright: Copyright (c) 2018- openEHR Foundation <http://www.openEHR.org>
//
grammar odin_values;
import base_lexer;
//
// ========================= Parser ============================
//
primitive_object :
primitive_value
| primitive_list_value
| primitive_interval_value
;
primitive_value :
string_value
| integer_value
| real_value
| boolean_value
| character_value
| term_code_value
| date_value
| time_value
| date_time_value
| duration_value
;
primitive_list_value :
string_list_value
| integer_list_value
| real_list_value
| boolean_list_value
| character_list_value
| term_code_list_value
| date_list_value
| time_list_value
| date_time_list_value
| duration_list_value
;
primitive_interval_value :
integer_interval_value
| real_interval_value
| date_interval_value
| time_interval_value
| date_time_interval_value
| duration_interval_value
;
string_value : STRING ;
string_list_value : string_value ( ( ',' string_value )+ | ',' SYM_LIST_CONTINUE ) ;
integer_value : ( '+' | '-' )? INTEGER ;
integer_list_value : integer_value ( ( ',' integer_value )+ | ',' SYM_LIST_CONTINUE ) ;
integer_interval_value :
'|' SYM_GT? integer_value '..' SYM_LT? integer_value '|'
| '|' relop? integer_value '|'
| '|' integer_value SYM_PLUS_OR_MINUS integer_value '|'
;
integer_interval_list_value : integer_interval_value ( ( ',' integer_interval_value )+ | ',' SYM_LIST_CONTINUE ) ;
real_value : ( '+' | '-' )? REAL ;
real_list_value : real_value ( ( ',' real_value )+ | ',' SYM_LIST_CONTINUE ) ;
real_interval_value :
'|' SYM_GT? real_value '..' SYM_LT? real_value '|'
| '|' relop? real_value '|'
| '|' real_value SYM_PLUS_OR_MINUS real_value '|'
;
real_interval_list_value : real_interval_value ( ( ',' real_interval_value )+ | ',' SYM_LIST_CONTINUE ) ;
boolean_value : SYM_TRUE | SYM_FALSE ;
boolean_list_value : boolean_value ( ( ',' boolean_value )+ | ',' SYM_LIST_CONTINUE ) ;
character_value : CHARACTER ;
character_list_value : character_value ( ( ',' character_value )+ | ',' SYM_LIST_CONTINUE ) ;
date_value : ISO8601_DATE ;
date_list_value : date_value ( ( ',' date_value )+ | ',' SYM_LIST_CONTINUE ) ;
date_interval_value :
'|' SYM_GT? date_value '..' SYM_LT? date_value '|'
| '|' relop? date_value '|'
| '|' date_value SYM_PLUS_OR_MINUS duration_value '|'
;
date_interval_list_value : date_interval_value ( ( ',' date_interval_value )+ | ',' SYM_LIST_CONTINUE ) ;
time_value : ISO8601_TIME ;
time_list_value : time_value ( ( ',' time_value )+ | ',' SYM_LIST_CONTINUE ) ;
time_interval_value :
'|' SYM_GT? time_value '..' SYM_LT? time_value '|'
| '|' relop? time_value '|'
| '|' time_value SYM_PLUS_OR_MINUS duration_value '|'
;
time_interval_list_value : time_interval_value ( ( ',' time_interval_value )+ | ',' SYM_LIST_CONTINUE ) ;
date_time_value : ISO8601_DATE_TIME ;
date_time_list_value : date_time_value ( ( ',' date_time_value )+ | ',' SYM_LIST_CONTINUE ) ;
date_time_interval_value :
'|' SYM_GT? date_time_value '..' SYM_LT? date_time_value '|'
| '|' relop? date_time_value '|'
| '|' date_time_value SYM_PLUS_OR_MINUS duration_value '|'
;
date_time_interval_list_value : date_time_interval_value ( ( ',' date_time_interval_value )+ | ',' SYM_LIST_CONTINUE ) ;
duration_value : ISO8601_DURATION ;
duration_list_value : duration_value ( ( ',' duration_value )+ | ',' SYM_LIST_CONTINUE ) ;
duration_interval_value :
'|' SYM_GT? duration_value '..' SYM_LT? duration_value '|'
| '|' relop? duration_value '|'
| '|' duration_value SYM_PLUS_OR_MINUS duration_value '|'
;
duration_interval_list_value : duration_interval_value ( ( ',' duration_interval_value )+ | ',' SYM_LIST_CONTINUE ) ;
term_code_value : TERM_CODE_REF ;
term_code_list_value : term_code_value ( ( ',' term_code_value )+ | ',' SYM_LIST_CONTINUE ) ;
relop : SYM_GT | SYM_LT | SYM_LE | SYM_GE ;
The following grammar defines lexer patterns of generic lexical tokens.
//
// General purpose patterns used in all openEHR parser and lexer tools
// author: Pieter Bos <pieter.bos@nedap.com>
// support: openEHR Specifications PR tracker <https://openehr.atlassian.net/projects/SPECPR/issues>
// copyright: Copyright (c) 2018- openEHR Foundation <http://www.openEHR.org>
//
lexer grammar base_lexer;
// ---------- whitespace & comments ----------
WS : [ \t\r]+ -> channel(HIDDEN) ;
LINE : '\r'? EOL -> channel(HIDDEN) ; // increment line count
CMT_LINE : '--' .*? '\r'? EOL -> skip ; // (increment line count)
fragment EOL : '\n' ;
// -------------- template overlay cannot be handled in a more simple way because it includes the comment line
SYM_TEMPLATE_OVERLAY : H_CMT_LINE (WS|LINE)* SYM_TEMPLATE_OVERLAY_ONLY;
fragment H_CMT_LINE : '--------' '-'*? ('\n'|'\r''\n'|'\r') ; // special type of comment for splitting template overlays
fragment SYM_TEMPLATE_OVERLAY_ONLY : [Tt][Ee][Mm][Pp][Ll][Aa][Tt][Ee]'_'[Oo][Vv][Ee][Rr][Ll][Aa][Yy] ;
// ---------- path patterns -----------
ADL_PATH : ADL_ABSOLUTE_PATH | ADL_RELATIVE_PATH;
fragment ADL_ABSOLUTE_PATH : ('/' ADL_PATH_SEGMENT)+;
fragment ADL_RELATIVE_PATH : ADL_PATH_SEGMENT ('/' ADL_PATH_SEGMENT)+;
fragment ADL_PATH_SEGMENT : ALPHA_LC_ID ('[' ADL_PATH_ATTRIBUTE ']')?;
fragment ADL_PATH_ATTRIBUTE : ID_CODE | STRING | INTEGER | ARCHETYPE_REF | ARCHETYPE_HRID;
// ---------- ISO8601-based date/time/duration constraint patterns
DATE_CONSTRAINT_PATTERN : YEAR_PATTERN '-' MONTH_PATTERN '-' DAY_PATTERN ;
TIME_CONSTRAINT_PATTERN : HOUR_PATTERN ':' MINUTE_PATTERN ':' SECOND_PATTERN TZ_PATTERN? ;
DATE_TIME_CONSTRAINT_PATTERN : DATE_CONSTRAINT_PATTERN 'T' TIME_CONSTRAINT_PATTERN ;
fragment YEAR_PATTERN : 'yyyy' | 'YYYY' | 'yyy' | 'YYY' ;
fragment MONTH_PATTERN : 'mm' | 'MM' | '??' | 'XX' | 'xx' ;
fragment DAY_PATTERN : 'dd' | 'DD' | '??' | 'XX' | 'xx' ;
fragment HOUR_PATTERN : 'hh' | 'HH' | '??' | 'XX' | 'xx' ;
fragment MINUTE_PATTERN : 'mm' | 'MM' | '??' | 'XX' | 'xx' ;
fragment SECOND_PATTERN : 'ss' | 'SS' | '??' | 'XX' | 'xx' ;
fragment TZ_PATTERN : '±' ('hh' | 'HH') (':'? ('mm' | 'MM'))? | 'Z' ;
DURATION_CONSTRAINT_PATTERN : 'P' [yY]?[mM]?[Ww]?[dD]? ( 'T' [hH]?[mM]?[sS]? )? ;
// ---------- Delimited Regex matcher ------------
// In ADL, a regexp can only exist between {}.
// allows for '/' or '^' delimiters
// logical form - REGEX: '/' ( '\\/' | ~'/' )+ '/' | '^' ( '\\^' | ~'^' )+ '^';
// The following is used to ensure REGEXes don't get mixed up with paths, which use '/' chars
//a
// regexp can only exist between {}. It can optionally have an assumed value, by adding ;"value"
CONTAINED_REGEXP: '{'WS* (SLASH_REGEXP | CARET_REGEXP) WS* (';' WS* STRING)? WS* '}';
fragment SLASH_REGEXP: '/' SLASH_REGEXP_CHAR+ '/';
fragment SLASH_REGEXP_CHAR: ~[/\n\r] | ESCAPE_SEQ | '\\/';
fragment CARET_REGEXP: '^' CARET_REGEXP_CHAR+ '^';
fragment CARET_REGEXP_CHAR: ~[^\n\r] | ESCAPE_SEQ | '\\^';
// ---------- various ADL2 codes -------
ROOT_ID_CODE : 'id1' '.1'* ;
ID_CODE : 'id' CODE_STR ;
AT_CODE : 'at' CODE_STR ;
AC_CODE : 'ac' CODE_STR ;
fragment CODE_STR : ('0' | [1-9][0-9]*) ( '.' ('0' | [1-9][0-9]* ))* ;
// ---------- ISO8601 Date/Time values ----------
ISO8601_DATE : YEAR '-' MONTH ( '-' DAY )? | YEAR '-' MONTH '-' UNKNOWN_DT | YEAR '-' UNKNOWN_DT '-' UNKNOWN_DT ;
ISO8601_TIME : ( HOUR ':' MINUTE ( ':' SECOND ( SECOND_DEC_SEP DIGIT+ )?)? | HOUR ':' MINUTE ':' UNKNOWN_DT | HOUR ':' UNKNOWN_DT ':' UNKNOWN_DT ) TIMEZONE? ;
ISO8601_DATE_TIME : ( YEAR '-' MONTH '-' DAY 'T' HOUR (':' MINUTE (':' SECOND ( SECOND_DEC_SEP DIGIT+ )?)?)? | YEAR '-' MONTH '-' DAY 'T' HOUR ':' MINUTE ':' UNKNOWN_DT | YEAR '-' MONTH '-' DAY 'T' HOUR ':' UNKNOWN_DT ':' UNKNOWN_DT ) TIMEZONE? ;
fragment TIMEZONE : 'Z' | ('+'|'-') HOUR_MIN ; // hour offset, e.g. `+0930`, or else literal `Z` indicating +0000.
fragment YEAR : [0-9][0-9][0-9][0-9] ; // Year in ISO8601:2004 is 4 digits with 0-filling as needed
fragment MONTH : ( [0][1-9] | [1][0-2] ) ; // month in year
fragment DAY : ( [0][1-9] | [12][0-9] | [3][0-1] ) ; // day in month
fragment HOUR : ( [01]?[0-9] | [2][0-3] ) ; // hour in 24 hour clock
fragment MINUTE : [0-5][0-9] ; // minutes
fragment HOUR_MIN : ( [01]?[0-9] | [2][0-3] ) [0-5][0-9] ; // hour / minutes quad digit pattern
fragment SECOND : [0-5][0-9] ; // seconds
fragment SECOND_DEC_SEP : '.' | ',' ;
fragment UNKNOWN_DT : '??' ; // any unknown date/time value, except years.
// ISO8601 DURATION PnYnMnWnDTnnHnnMnn.nnnS
// here we allow a deviation from the standard to allow weeks to be // mixed in with the rest since this commonly occurs in medicine
// TODO: the following will incorrectly match just 'P'
ISO8601_DURATION : '-'?'P' (DIGIT+ [yY])? (DIGIT+ [mM])? (DIGIT+ [wW])? (DIGIT+[dD])? ('T' (DIGIT+[hH])? (DIGIT+[mM])? (DIGIT+ (SECOND_DEC_SEP DIGIT+)?[sS])?)? ;
// ------------------- special word symbols --------------
SYM_TRUE : [Tt][Rr][Uu][Ee] ;
SYM_FALSE : [Ff][Aa][Ll][Ss][Ee] ;
// ---------------------- Identifiers ---------------------
ARCHETYPE_HRID : ARCHETYPE_HRID_ROOT '.v' ARCHETYPE_VERSION_ID ;
ARCHETYPE_REF : ARCHETYPE_HRID_ROOT '.v' INTEGER ( '.' DIGIT+ )* ;
fragment ARCHETYPE_HRID_ROOT : (NAMESPACE '::')? IDENTIFIER '-' IDENTIFIER '-' IDENTIFIER '.' LABEL ;
fragment ARCHETYPE_VERSION_ID: DIGIT+ ('.' DIGIT+ ('.' DIGIT+ ( ( '-rc' | '-alpha' | '-beta' ) ( '.' DIGIT+ )? )?)?)? ;
VERSION_ID : DIGIT+ '.' DIGIT+ '.' DIGIT+ ( ( '-rc' | '-alpha' | '-beta' ) ( '.' DIGIT+ )? )? ;
fragment IDENTIFIER : ALPHA_CHAR WORD_CHAR* ;
// --------------------- composed primitive types -------------------
// e.g. [ICD10AM(1998)::F23]; [ISO_639-1::en]
TERM_CODE_REF : '[' TERM_CODE_CHAR+ ( '(' TERM_CODE_CHAR+ ')' )? '::' TERM_CODE_CHAR+ ']' ;
fragment TERM_CODE_CHAR: NAME_CHAR | '.';
// --------------------- URIs --------------------
// URI recogniser based on https://tools.ietf.org/html/rfc3986 and
// http://www.w3.org/Addressing/URL/5_URI_BNF.html
EMBEDDED_URI: '<' ([ \t\r\n]|CMT_LINE)* URI ([ \t\r\n]|CMT_LINE)* '>';
fragment URI : URI_SCHEME ':' URI_HIER_PART ( '?' URI_QUERY )? ('#' URI_FRAGMENT)? ;
fragment URI_HIER_PART : ( '//' URI_AUTHORITY ) URI_PATH_ABEMPTY
| URI_PATH_ABSOLUTE
| URI_PATH_ROOTLESS
| URI_PATH_EMPTY;
fragment URI_SCHEME : ALPHA_CHAR ( ALPHA_CHAR | DIGIT | '+' | '-' | '.')* ;
fragment URI_AUTHORITY : ( URI_USERINFO '@' )? URI_HOST ( ':' URI_PORT )? ;
fragment URI_USERINFO: (URI_UNRESERVED | URI_PCT_ENCODED | URI_SUB_DELIMS | ':' )* ;
fragment URI_HOST : URI_IP_LITERAL | URI_IPV4_ADDRESS | URI_REG_NAME ; //TODO: ipv6
fragment URI_PORT: DIGIT*;
fragment URI_IP_LITERAL : '[' URI_IPV6_LITERAL ']'; //TODO, if needed: IPvFuture
fragment URI_IPV4_ADDRESS : URI_DEC_OCTET '.' URI_DEC_OCTET '.' URI_DEC_OCTET '.' URI_DEC_OCTET ;
fragment URI_IPV6_LITERAL : HEX_QUAD (':' HEX_QUAD )* ':' ':' HEX_QUAD (':' HEX_QUAD )* ;
fragment URI_DEC_OCTET : DIGIT | [1-9] DIGIT | '1' DIGIT DIGIT | '2' [0-4] DIGIT | '25' [0-5];
fragment URI_REG_NAME: (URI_UNRESERVED | URI_PCT_ENCODED | URI_SUB_DELIMS)*;
fragment HEX_QUAD : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT ;
fragment URI_PATH_ABEMPTY: ('/' URI_SEGMENT ) *;
fragment URI_PATH_ABSOLUTE: '/' ( URI_SEGMENT_NZ ( '/' URI_SEGMENT )* )?;
fragment URI_PATH_NOSCHEME: URI_SEGMENT_NZ_NC ( '/' URI_SEGMENT )*;
fragment URI_PATH_ROOTLESS: URI_SEGMENT_NZ ( '/' URI_SEGMENT )*;
fragment URI_PATH_EMPTY: ;
fragment URI_SEGMENT: URI_PCHAR*;
fragment URI_SEGMENT_NZ: URI_PCHAR+;
fragment URI_SEGMENT_NZ_NC: ( URI_UNRESERVED | URI_PCT_ENCODED | URI_SUB_DELIMS | '@' )+; //non-zero-length segment without any colon ":"
fragment URI_PCHAR: URI_UNRESERVED | URI_PCT_ENCODED | URI_SUB_DELIMS | ':' | '@';
//fragment URI_PATH : '/' | ( '/' URI_XPALPHA+ )+ ('/')?;
fragment URI_QUERY : (URI_PCHAR | '/' | '?')*;
fragment URI_FRAGMENT : (URI_PCHAR | '/' | '?')*;
fragment URI_PCT_ENCODED : '%' HEX_DIGIT HEX_DIGIT ;
fragment URI_UNRESERVED: ALPHA_CHAR | DIGIT | '-' | '.' | '_' | '~';
fragment URI_RESERVED: URI_GEN_DELIMS | URI_SUB_DELIMS;
fragment URI_GEN_DELIMS: ':' | '/' | '?' | '#' | '[' | ']' | '@'; //TODO: migrate to [/?#...] notation
fragment URI_SUB_DELIMS: '!' | '$' | '&' | '\'' | '(' | ')'
| '*' | '+' | ',' | ';' | '=';
// According to IETF http://tools.ietf.org/html/rfc1034[RFC 1034] and http://tools.ietf.org/html/rfc1035[RFC 1035],
// as clarified by http://tools.ietf.org/html/rfc2181[RFC 2181] (section 11)
fragment NAMESPACE : LABEL ('.' LABEL)* ;
fragment LABEL : ALPHA_CHAR (NAME_CHAR|URI_PCT_ENCODED)* ;
GUID : HEX_DIGIT+ '-' HEX_DIGIT+ '-' HEX_DIGIT+ '-' HEX_DIGIT+ '-' HEX_DIGIT+ ;
ALPHA_UC_ID : ALPHA_UCHAR WORD_CHAR* ; // used for type ids
ALPHA_LC_ID : ALPHA_LCHAR WORD_CHAR* ; // used for attribute / method ids
ALPHA_UNDERSCORE_ID : '_' WORD_CHAR* ; // usually used for meta-model ids
// --------------------- atomic primitive types -------------------
INTEGER : DIGIT+ E_SUFFIX? ;
REAL : DIGIT+ '.' DIGIT+ E_SUFFIX? ;
fragment E_SUFFIX : [eE][+-]? DIGIT+ ;
STRING : '"' STRING_CHAR*? '"' ;
fragment STRING_CHAR : ~["\\] | ESCAPE_SEQ | UTF8CHAR ; // strings can be multi-line
CHARACTER : '\'' CHAR '\'' ;
fragment CHAR : ~['\\\r\n] | ESCAPE_SEQ | UTF8CHAR ;
fragment ESCAPE_SEQ: '\\' ['"?abfnrtv\\] ;
// ------------------- character fragments ------------------
fragment NAME_CHAR : WORD_CHAR | '-' ;
fragment WORD_CHAR : ALPHANUM_CHAR | '_' ;
fragment ALPHANUM_CHAR : ALPHA_CHAR | DIGIT ;
fragment ALPHA_CHAR : [a-zA-Z] ;
fragment ALPHA_UCHAR : [A-Z] ;
fragment ALPHA_LCHAR : [a-z] ;
fragment UTF8CHAR : '\\u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT ;
fragment DIGIT : [0-9] ;
fragment HEX_DIGIT : [0-9a-fA-F] ;
// -------------------- common symbols ---------------------
SYM_COMMA: ',' ;
SYM_SEMI_COLON : ';' ;
SYM_LPAREN : '(';
SYM_RPAREN : ')';
SYM_LBRACKET : '[';
SYM_RBRACKET : ']';
SYM_LCURLY : '{' ;
SYM_RCURLY : '}' ;
// --------- symbols ----------
SYM_ASSIGNMENT: ':=' | '::=' ;
SYM_NE : '/=' | '!=' | '≠' ;
SYM_EQ : '=' ;
SYM_GT : '>' ;
SYM_LT : '<' ;
SYM_LE : '<=' | '≤' ;
SYM_GE : '>=' | '≥' ;
// TODO: remove when [] path predicates supported
VARIABLE_WITH_PATH: VARIABLE_ID ADL_ABSOLUTE_PATH ;
VARIABLE_ID: '$' ALPHA_LC_ID ;
//
// ========================= Lexer ============================
//
// -------------------- symbols for lists ------------------------
SYM_LIST_CONTINUE: '...' ;
// ------------------ symbols for intervals ----------------------
SYM_PLUS : '+' ;
SYM_MINUS : '-' ;
SYM_PLUS_OR_MINUS : '+/-' | '±' ;
SYM_PERCENT : '%' ;
SYM_CARAT: '^' ;
SYM_IVL_DELIM: '|' ;
SYM_IVL_SEP : '..' ;