[Mulgara-svn] r603 - branches/mgr-61-sparql/src/jar/sparql/grammar
pag at mulgara.org
pag at mulgara.org
Sun Dec 9 20:42:41 UTC 2007
Author: pag
Date: 2007-12-09 14:42:41 -0600 (Sun, 09 Dec 2007)
New Revision: 603
Modified:
branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex
branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar
Log:
Expanded on the lexer capability, including some grammar updates. Not yet compiling.
Modified: branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex
===================================================================
--- branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex 2007-12-09 20:39:03 UTC (rev 602)
+++ branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex 2007-12-09 20:42:41 UTC (rev 603)
@@ -3,7 +3,7 @@
import beaver.Symbol;
import beaver.Scanner;
-import org.mulgara.sparql.compiler.ExpressionParser.Terminals;
+import static org.mulgara.sparql.compiler.ExpressionParser.Terminals.*;
%%
@@ -21,128 +21,155 @@
%column
%{
StringBuffer string = new StringBuffer(128);
- private Symbol newToken(short id)
- {
+
+ private Symbol newToken(short id) {
return new Symbol(id, yyline + 1, yycolumn + 1, yylength());
}
- private Symbol newToken(short id, Object value)
- {
+ private Symbol newToken(short id, Object value) {
return new Symbol(id, yyline + 1, yycolumn + 1, yylength(), value);
}
+
+ private String chop(String str) {
+ return str.substring(0, str.length() - 1);
+ }
+
+ private String unquote(String str) {
+ return str.substring(1, str.length() - 1);
+ }
+
%}
+
LineTerminator = \r|\n|\r\n
WhiteSpace = {LineTerminator} | [ \t\f]
+
+IriRef = "<" [^<>{}\"\\|^`\x00-\x20]* ">"
+BlankPrefix = "_:"
+LangtagStart = "@"
+Langtag = [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
+Integer = [0-9]+
+Decimal = [0-9]+ "." [0-9]* | "." [0-9]+
+Exponent = [eE][+-]?[0-9]+
+Double = [0-9]+ "." [0-9]* {Exponent} | "." ([0-9])+ {Exponent} | ([0-9])+ {Exponent}
+IntegerPositive = "+" {Integer}
+DecimalPositive = "+" {Decimal}
+DoublePositive = "+" {Double}
+IntegerNegative = "-" {Integer}
+DecimalNegative = "-" {Decimal}
+DoubleNegative = "-" {Double}
+StringLiteralDelim1 = "'"
+StringLiteralDelim2 = \"
+StringLiteralLongDelim1 = "'''"
+StringLiteralLongDelim2 = \"\"\"
+Echar = "\\" [tbnrf'\"\\]
+LiteralChar1 = (([^\x27\x5C\x0A\x0D]) | {Echar})*
+LiteralChar2 = (([^\x22\x5C\x0A\x0D]) | {Echar})*
WS = \u0020 | \u0009 | \u000D | \u000A
-StringCharacter = [^\r\n\"\\]
-
-Number = [:digit:] [:digit:]*
-Identifier = [:jletter:][:jletterdigit:]*
-Variable = [$?]{Identifier}
+Nil = "(" {WS}* ")"
+Anon = "[" {WS}* "]"
+PnCharsBase = [A-Z] | [a-z] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD] | [\u10000-\uEFFFF]
+PnCharsU = {PnCharsBase} | "_"
VariableStart = [$?]
-NIL = \({WS}*\)
-Q_IRI_REF = <[^<>{}]*>
-NCCHAR1P = [a-zA-Z]
-NCCHAR1 = {NCCHAR1P} | "_"
-NCCHAR = {NCCHAR1} | "-" | [0-9]
-NCNAME_PREFIX = {NCCHAR1P} (({NCCHAR} | ".")* {NCCHAR})?
-NCNAME = {NCCHAR1}(({NCCHAR} | ".")* {NCCHAR})?
-QNAME_NS = {NCNAME_PREFIX}?[ ]*:
-QNAME_LN = {QNAME_NS}{NCNAME}
+Varname = ({PnCharsU} | [0-9]) ({PnCharsU} | [0-9] | \u00B7 | [\u0300-\u036F] | [\u203F-\u2040])*
+PnChars = {PnCharsU} | "-" | [0-9] | \u00B7 | [\u0300-\u036F] | [\u203F-\u2040]
+PnPrefix = {PnCharsBase} (({PnChars} | ".")* {PnChars})?
+PnLocal = ({PnCharsU} | [0-9]) (({PnChars} | ".")* {PnChars})?
-NCNAME = {NCCHAR1} (({NCCHAR} | ".")* {NCCHAR})?
-ORDER_BY = ORDER{WhiteSpace}+BY
+OrderBy = "ORDER" {WhiteSpace}+ "BY"
-SignlessInteger = {Number}
-SignlessDecimal = [.]{Number} | {Number}[.][:digit:]*
-Exponent = [eE][+-]?{Number}
-SignlessDouble = [.]{Number}{Exponent} | {Number}[.][:digit:]*{Exponent} | {Number}{Exponent}
-SignedInteger = [+-]{SignlessInteger}
-SignedDecimal = [+-]{SignlessDecimal}
-SignedDouble = [+-]{SignlessDouble}
-Integer = {SignlessInteger} | {SignedInteger}
-Decimal = {SignlessDecimal} | {SignedDecimal}
-Double = {SignlessDouble} | {SignedDouble}
+%state YYSTRING_LITERAL_LONG1
+%state YYSTRING_LITERAL_LONG2
-
-%state STRING
-%state VARIABLE
-
%%
{WhiteSpace}+ { /* ignore */ }
<YYINITIAL> {
- {Integer} { return newToken(Terminals.INTEGER, new Integer(yytext())); }
- {Decimal} { return newToken(Terminals.DECIMAL, new Float(yytext())); }
- {Double} { return newToken(Terminals.DOUBLE, new Double(yytext())); }
+ {Double} { return newToken(DOUBLE, new Double(yytext())); }
+ {Decimal} { return newToken(DECIMAL, new Float(yytext())); }
+ {Integer} { return newToken(INTEGER, new Integer(yytext())); }
+ {DoublePositive} { return newToken(DOUBLE_POSITIVE, new Double(yytext())); }
+ {DecimalPositive} { return newToken(DECIMAL_POSITIVE, new Float(yytext())); }
+ {IntegerPositive} { return newToken(INTEGER_POSITIVE, new Integer(yytext())); }
+ {DoubleNegative} { return newToken(DOUBLE_NEGATIVE, new Double(yytext())); }
+ {DecimalNegative} { return newToken(DECIMAL_NEGATIVE, new Float(yytext())); }
+ {IntegerNegative} { return newToken(INTEGER_NEGATIVE, new Integer(yytext())); }
- "(" { return newToken(Terminals.LPAREN, yytext()); }
- ")" { return newToken(Terminals.RPAREN, yytext()); }
- "*" { return newToken(Terminals.MULT, yytext()); }
- "/" { return newToken(Terminals.DIV, yytext()); }
- "+" { return newToken(Terminals.PLUS, yytext()); }
- "-" { return newToken(Terminals.MINUS, yytext()); }
- "SELECT"|"select" { return newToken(Terminals.SELECT, yytext()); }
- "DISTINCT"|"distinct" {return newToken(Terminals.DISTINCT, yytext()); }
- "REDUCED"|"reduced" { return newToken(Terminals.REDUCED, yytext()); }
- "FROM"|"from" { return newToken(Terminals.FROM, yytext()); }
- "WHERE"|"where" { System.out.println("flex: doing the where"); return newToken(Terminals.WHERE, yytext()); }
- "{" { System.out.println("flex: Got the lbrace"); return newToken(Terminals.LBRACE, yytext()); }
- "}" { System.out.println("flex: Got the rbrace");return newToken(Terminals.RBRACE, yytext()); }
- "." { return newToken(Terminals.DOT, yytext()); }
- "OPTIONAL"|"optional" { return newToken(Terminals.OPTIONAL, yytext()); }
- "FILTER" | "filter" { return newToken(Terminals.FILTER, yytext()); }
- "||" { return newToken(Terminals.OR, yytext()); }
- "&&" { return newToken(Terminals.AND, yytext()); }
- "=" { return newToken(Terminals.EQUALS, yytext()); }
- "!=" { return newToken(Terminals.NOT_EQUALS, yytext()); }
- "<" { return newToken(Terminals.LESS_THAN, yytext()); }
- "<=" { return newToken(Terminals.LESS_THAN_EQUALS, yytext()); }
- ">" { return newToken(Terminals.GREATER_THAN, yytext()); }
- ">=" { return newToken(Terminals.GREATER_THAN_EQUALS, yytext()); }
- "!" { return newToken(Terminals.NOT, yytext()); }
- "STR" | "str" { return newToken(Terminals.STR, yytext()); }
- "LANG" | "lang" { return newToken(Terminals.LANG, yytext()); }
- "LANGMATCHES" | "langmatches" { return newToken(Terminals.LANGMATCHES, yytext()); }
- "DATATYPE" | "datatype" { return newToken(Terminals.DATATYPE, yytext()); }
- "BOUND" | "bound" { return newToken(Terminals.BOUND, yytext()); }
- "SAMETERM" | "sameterm" | "sameTerm" { return newToken(Terminals.SAME_TERM, yytext()); }
- "ISIRI" | "isiri" | "isIRI" { return newToken(Terminals.IS_IRI, yytext()); }
- "ISURI" | "isuri" | "isURI" { return newToken(Terminals.IS_URI, yytext()); }
- "ISBLANK" | "isblank" | "isBLANK" { return newToken(Terminals.IS_BLANK, yytext()); }
- "ISLITERAL" | "isliteral" | "isLITERAL" { return newToken(Terminals.IS_LITERAL, yytext()); }
- "REGEX" | "regex" { return newToken(Terminals.REGEX, yytext()); }
- ";" { return newToken(Terminals.SEMI_COLON, yytext()); }
- "," { return newToken(Terminals.COMMA, yytext()); }
- "a" { return newToken(Terminals.LETTER_A, yytext()); }
- "NAMED" | "named" { return newToken(Terminals.NAMED, yytext()); }
- "UNION" | "union" { return newToken(Terminals.UNION, yytext()); }
- "GRAPH" | "graph" { return newToken(Terminals.GRAPH, yytext()); }
- {VariableStart} { string.setLength(0); yybegin(VARIABLE); }
- \" { yybegin(STRING); string.setLength(0); }
- {Q_IRI_REF} { return newToken(Terminals.Q_IRI_REF, yytext()); }
- {QNAME_NS} { return newToken(Terminals.QNAME_NS, yytext()); }
- {QNAME_LN} { return newToken(Terminals.QNAME_LN, yytext()); }
- {NIL} { return newToken(Terminals.NIL, yytext()); }
- {ORDER_BY} { return newToken(Terminals.ORDER_BY, yytext()); }
- "ASC" | "asc" { return newToken(Terminals.ASC, yytext()); }
- "DESC" | "desc" { return newToken(Terminals.DESC, yytext()); }
- "LIMIT" | "limit" { return newToken(Terminals.LIMIT, yytext()); }
- "OFFSET" | "offset" { return newToken(Terminals.OFFSET, yytext()); }
+ {PnPrefix}? ":" { return newToken(PNAME_NS, chop(yytext())); }
+ {VariableStart}{Varname} { return newToken(VARNAME, yytext().substring(1)); }
+ {LangtagStart}{Langtag} { return newToken(LANGTAG, yytext().substring(1)); }
+ {BlankPrefix}{PnLocal} { return newToken(BLANK_NODE, yytext().substring(2)); }
+ {IriRef} { return newToken(IRI_REF, unquote(yytext())); }
+ {Anon} { return newToken(ANON); }
+ {Nil} { return newToken(NIL); }
+ "'" {LiteralChar1} "'" { return newToken(STRING_LITERAL, unquote(yytext())); }
+ \" {LiteralChar2} \" { return newToken(STRING_LITERAL, unquote(yytext())); }
+ {StringLiteralLongDelim1} { string.setLength(0); yybegin(YYSTRING_LITERAL_LONG1); }
+ {StringLiteralLongDelim2} { string.setLength(0); yybegin(YYSTRING_LITERAL_LONG2); }
+
+ "(" { return newToken(LPAREN); }
+ ")" { return newToken(RPAREN); }
+ "*" { return newToken(MULT); }
+ "/" { return newToken(DIV); }
+ "+" { return newToken(PLUS); }
+ "-" { return newToken(MINUS); }
+ "SELECT"|"select" { return newToken(SELECT); }
+ "DISTINCT"|"distinct" { return newToken(DISTINCT); }
+ "REDUCED"|"reduced" { return newToken(REDUCED); }
+ "FROM"|"from" { return newToken(FROM); }
+ "WHERE"|"where" { System.out.println("flex: doing the where"); return newToken(WHERE); }
+ "{" { System.out.println("flex: Got the lbrace"); return newToken(LBRACE); }
+ "}" { System.out.println("flex: Got the rbrace");return newToken(RBRACE); }
+ "." { return newToken(DOT); }
+ "OPTIONAL"|"optional" { return newToken(OPTIONAL); }
+ "FILTER" | "filter" { return newToken(FILTER); }
+ "||" { return newToken(OR); }
+ "&&" { return newToken(AND); }
+ "=" { return newToken(EQUALS); }
+ "!=" { return newToken(NOT_EQUALS); }
+ "<" { return newToken(LESS_THAN); }
+ "<=" { return newToken(LESS_THAN_EQUALS); }
+ ">" { return newToken(GREATER_THAN); }
+ ">=" { return newToken(GREATER_THAN_EQUALS); }
+ "!" { return newToken(NOT); }
+ "^^" { return newToken(LITERAL_TYPE_SEP); }
+ "STR" | "str" { return newToken(STR); }
+ "DATATYPE" | "datatype" { return newToken(DATATYPE); }
+ "BOUND" | "bound" { return newToken(BOUND); }
+ "LANG" | "lang" { return newToken(LANG); }
+ "LANGMATCHES" | "langmatches" { return newToken(LANGMATCHES); }
+ "SAMETERM" | "sameterm" | "sameTerm" { return newToken(SAME_TERM); }
+ "ISIRI" | "isiri" | "isIRI" { return newToken(IS_IRI); }
+ "ISURI" | "isuri" | "isURI" { return newToken(IS_URI); }
+ "ISBLANK" | "isblank" | "isBLANK" { return newToken(IS_BLANK); }
+ "ISLITERAL" | "isliteral" | "isLITERAL" { return newToken(IS_LITERAL); }
+ "REGEX" | "regex" { return newToken(REGEX); }
+ ";" { return newToken(SEMI_COLON); }
+ "," { return newToken(COMMA); }
+ "a" { return newToken(LETTER_A); }
+ "NAMED" | "named" { return newToken(NAMED); }
+ "UNION" | "union" { return newToken(UNION); }
+ "GRAPH" | "graph" { return newToken(GRAPH); }
+ "ASC" | "asc" { return newToken(ASC); }
+ "DESC" | "desc" { return newToken(DESC); }
+ "LIMIT" | "limit" { return newToken(LIMIT); }
+ "OFFSET" | "offset" { return newToken(OFFSET); }
+ {OrderBy} { return newToken(ORDER_BY); }
+
}
-<VARIABLE> {
- {Identifier} { yybegin(YYINITIAL); return newToken(Terminals.VARIABLE, yytext()); }
+<YYSTRING_LITERAL_LONG1> {
+ {StringLiteralLongDelim1} { yybegin(YYINITIAL); return newToken(STRING_LITERAL, string.toString()); }
+ ([^'\\] | {Echar}))* { string.append(yytext()); }
+ "'" { string.append("'"); }
}
-<STRING> {
- \" {
- }
- {StringCharacter}+ { string.append(yytext()); }
+<YYSTRING_LITERAL_LONG2> {
+ {StringLiteralLongDelim2} {yybegin(YYINITIAL); return newToken(STRING_LITERAL, string.toString()); }
+ ([^\"\\] | {Echar}))* { string.append(yytext()); }
+ \" { string.append("\""); }
}
-
.|\n { throw new Scanner.Exception("unexpected character '" + yytext() + "'"); }
Modified: branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar
===================================================================
--- branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar 2007-12-09 20:39:03 UTC (rev 602)
+++ branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar 2007-12-09 20:42:41 UTC (rev 603)
@@ -35,7 +35,7 @@
%typeof verb_object_list_list, verb_object_list_list_element = "ArrayList<Triple>";
%typeof prop_list_not_empty = "ArrayList<Triple>";
%typeof triples_block = "ArrayList<Triple>";
-%typeof var_list = "ArrayList<>";
+%typeof var_list = "ArrayList<String>";
%typeof var_or_term, var, iri_ref = "String";
%typeof verb = "Predicate";
%typeof INTEGER = "Integer";
@@ -60,9 +60,9 @@
%goal select_query;
select_query
- = SELECT distinct_or_reduced.d select_vars dataset_clause.f where_clause.w solution_modifier.s
+ = SELECT distinct_or_reduced.d select_vars.v dataset_clause.f where_clause.w solution_modifier.s
{:
- return new SelectQuery(d, f, w, s);
+ return new SelectQuery(v, d, f, w, s);
:}
;
@@ -78,10 +78,19 @@
;
var_list
- = var
- | var_list var
- ;
+ = var.v {:
+ ArrayList<String> variables = new ArrayList<String>();
+ variables.add(v);
+ return new Symbol(variables);
+ :}
+ | var_list.vl var.v
+ {:
+ vl.add(v);
+ return _symbol_vl;
+ :}
+ ;
+
dataset_clause
= from_clause
|
@@ -524,18 +533,33 @@
var_or_term
= var
+ | graph_term
;
var
- = VARIABLE.e {: return new Symbol(e); :}
+ = VARIABLE.e {: return new Variable(e); :}
;
+
+graph_term
+ = iri_ref.ref {: return new IRIReference(ref); :}
+ | rdf_literal // not yet implemented
+ | numeric_literal
+ | boolean_literal // not yet implemented
+ | blank_node
+ | NIL;
+rdf_literal =
+ | STRING_LITERAL.str LANGTAG.lang {: return Literal.newLang(str, lang); :}
+ | STRING_LITERAL.str ( LITERAL_TYPE_SEP iri_ref.type )? {: return Literal.newTyped(str, type); :}
+ ;
+
iri_ref
- = Q_IRI_REF
- | Q_Name
- ;
-
-Q_Name
- = QNAME_NS
- | QNAME_LN
- ;
+ = IRI_REF
+ | PNAME_LN
+ | PNAME_NS
+ ;
+
+blank_node
+ = BLANK_NODE.bn {: return new BlankNode(bn); :}
+ | ANON {: return new BlankNode(); :}
+ ;
\ No newline at end of file
More information about the Mulgara-svn
mailing list