[Mulgara-svn] r603 - branches/mgr-61-sparql/src/jar/sparql/grammar

pag at mulgara.org pag at mulgara.org
Sun Dec 9 20:42:41 UTC 2007


Author: pag
Date: 2007-12-09 14:42:41 -0600 (Sun, 09 Dec 2007)
New Revision: 603

Modified:
   branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex
   branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar
Log:
Expanded on the lexer capability, including some grammar updates.  Not yet compiling.

Modified: branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex
===================================================================
--- branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex	2007-12-09 20:39:03 UTC (rev 602)
+++ branches/mgr-61-sparql/src/jar/sparql/grammar/expr.flex	2007-12-09 20:42:41 UTC (rev 603)
@@ -3,7 +3,7 @@
 import beaver.Symbol;
 import beaver.Scanner;
 
-import org.mulgara.sparql.compiler.ExpressionParser.Terminals;
+import static org.mulgara.sparql.compiler.ExpressionParser.Terminals.*;
 
 %%
 
@@ -21,128 +21,155 @@
 %column
 %{
 	StringBuffer string = new StringBuffer(128);
-	private Symbol newToken(short id)
-	{
+
+	private Symbol newToken(short id) {
 		return new Symbol(id, yyline + 1, yycolumn + 1, yylength());
 	}
 
-	private Symbol newToken(short id, Object value)
-	{
+	private Symbol newToken(short id, Object value) {
 		return new Symbol(id, yyline + 1, yycolumn + 1, yylength(), value);
 	}
+	
+	private String chop(String str) {
+	  return str.substring(0, str.length() - 1);
+	}
+
+  private String unquote(String str) {
+    return str.substring(1, str.length() - 1);
+  }
+
 %}
+
 LineTerminator = \r|\n|\r\n
 WhiteSpace     = {LineTerminator} | [ \t\f]
+
+IriRef = "<" [^<>{}\"\\|^`\x00-\x20]* ">"
+BlankPrefix = "_:"
+LangtagStart = "@"
+Langtag = [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
+Integer = [0-9]+
+Decimal = [0-9]+ "." [0-9]* | "." [0-9]+
+Exponent = [eE][+-]?[0-9]+
+Double = [0-9]+ "." [0-9]* {Exponent} | "." ([0-9])+ {Exponent} | ([0-9])+ {Exponent}
+IntegerPositive = "+" {Integer}
+DecimalPositive = "+" {Decimal}
+DoublePositive = "+" {Double}
+IntegerNegative = "-" {Integer}
+DecimalNegative = "-" {Decimal}
+DoubleNegative = "-" {Double}
+StringLiteralDelim1 = "'"
+StringLiteralDelim2 = \"
+StringLiteralLongDelim1 = "'''"
+StringLiteralLongDelim2 = \"\"\"
+Echar = "\\" [tbnrf'\"\\]
+LiteralChar1 = (([^\x27\x5C\x0A\x0D]) | {Echar})*
+LiteralChar2 = (([^\x22\x5C\x0A\x0D]) | {Echar})*
 WS = \u0020 | \u0009 | \u000D | \u000A
-StringCharacter = [^\r\n\"\\]
-
-Number = [:digit:] [:digit:]*
-Identifier = [:jletter:][:jletterdigit:]*
-Variable = [$?]{Identifier}
+Nil = "(" {WS}* ")"
+Anon = "[" {WS}* "]"
+PnCharsBase = [A-Z] | [a-z] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD] | [\u10000-\uEFFFF]
+PnCharsU = {PnCharsBase} | "_"
 VariableStart = [$?]
-NIL = \({WS}*\)
-Q_IRI_REF = <[^<>{}]*>
-NCCHAR1P = [a-zA-Z]
-NCCHAR1 = {NCCHAR1P} | "_"
-NCCHAR = {NCCHAR1} | "-" | [0-9]
-NCNAME_PREFIX = {NCCHAR1P} (({NCCHAR} | ".")* {NCCHAR})?
-NCNAME = {NCCHAR1}(({NCCHAR} | ".")* {NCCHAR})?
-QNAME_NS = {NCNAME_PREFIX}?[ ]*:
-QNAME_LN = {QNAME_NS}{NCNAME}
+Varname = ({PnCharsU} | [0-9]) ({PnCharsU} | [0-9] | \u00B7 | [\u0300-\u036F] | [\u203F-\u2040])*
+PnChars = {PnCharsU} | "-" | [0-9] | \u00B7 | [\u0300-\u036F] | [\u203F-\u2040]
+PnPrefix = {PnCharsBase} (({PnChars} | ".")* {PnChars})?
+PnLocal = ({PnCharsU} | [0-9]) (({PnChars} | ".")* {PnChars})?
 
-NCNAME = {NCCHAR1} (({NCCHAR} | ".")* {NCCHAR})?
 
-ORDER_BY = ORDER{WhiteSpace}+BY 
+OrderBy = "ORDER" {WhiteSpace}+ "BY" 
 
-SignlessInteger = {Number}
-SignlessDecimal = [.]{Number} | {Number}[.][:digit:]*
-Exponent = [eE][+-]?{Number}
-SignlessDouble = [.]{Number}{Exponent} | {Number}[.][:digit:]*{Exponent} | {Number}{Exponent}
-SignedInteger = [+-]{SignlessInteger}
-SignedDecimal = [+-]{SignlessDecimal}
-SignedDouble = [+-]{SignlessDouble}
 
-Integer = {SignlessInteger} | {SignedInteger}
-Decimal = {SignlessDecimal} | {SignedDecimal}
-Double = {SignlessDouble} | {SignedDouble}
+%state YYSTRING_LITERAL_LONG1
+%state YYSTRING_LITERAL_LONG2
 
-
-%state STRING
-%state VARIABLE
-
 %%
 
 {WhiteSpace}+   { /* ignore */ }
 
 <YYINITIAL> {
-	{Integer}  	{ return newToken(Terminals.INTEGER, new Integer(yytext())); }
-	{Decimal}   	{ return newToken(Terminals.DECIMAL, new Float(yytext())); }
-	{Double}    	{ return newToken(Terminals.DOUBLE, new Double(yytext())); }
+  {Double}                    { return newToken(DOUBLE, new Double(yytext())); }
+  {Decimal}                   { return newToken(DECIMAL, new Float(yytext())); }
+	{Integer}  	                { return newToken(INTEGER, new Integer(yytext())); }
+  {DoublePositive}            { return newToken(DOUBLE_POSITIVE, new Double(yytext())); }
+  {DecimalPositive}           { return newToken(DECIMAL_POSITIVE, new Float(yytext())); }
+  {IntegerPositive}           { return newToken(INTEGER_POSITIVE, new Integer(yytext())); }
+  {DoubleNegative}            { return newToken(DOUBLE_NEGATIVE, new Double(yytext())); }
+  {DecimalNegative}           { return newToken(DECIMAL_NEGATIVE, new Float(yytext())); }
+  {IntegerNegative}           { return newToken(INTEGER_NEGATIVE, new Integer(yytext())); }
 
-	"("		{ return newToken(Terminals.LPAREN, yytext()); }
-	")"        	{ return newToken(Terminals.RPAREN, yytext()); }
-	"*"        	{ return newToken(Terminals.MULT,   yytext()); }
-	"/"        	{ return newToken(Terminals.DIV,    yytext()); }
-	"+"        	{ return newToken(Terminals.PLUS,   yytext()); }
-	"-"        	{ return newToken(Terminals.MINUS,  yytext()); }
-	"SELECT"|"select"	{ return newToken(Terminals.SELECT, yytext()); }
-	"DISTINCT"|"distinct"	{return newToken(Terminals.DISTINCT, yytext()); }
-	"REDUCED"|"reduced"	{ return newToken(Terminals.REDUCED, yytext()); }
-	"FROM"|"from"	{ return newToken(Terminals.FROM, yytext()); }
-	"WHERE"|"where"	{ System.out.println("flex: doing the where"); return newToken(Terminals.WHERE, yytext()); }
-	"{"		{ System.out.println("flex: Got the lbrace"); return newToken(Terminals.LBRACE, yytext()); }
-	"}"		{ System.out.println("flex: Got the rbrace");return newToken(Terminals.RBRACE, yytext()); }
-	"."		{ return newToken(Terminals.DOT, yytext()); }
-	"OPTIONAL"|"optional" { return newToken(Terminals.OPTIONAL, yytext()); }
-	"FILTER" | "filter"   { return newToken(Terminals.FILTER, yytext()); }
-	"||"	   	{ return newToken(Terminals.OR, yytext()); }
-	"&&"	   	{ return newToken(Terminals.AND, yytext()); }
-	"="	   	{ return newToken(Terminals.EQUALS, yytext()); }
-	"!="	   	{ return newToken(Terminals.NOT_EQUALS, yytext()); }
-	"<"		{ return newToken(Terminals.LESS_THAN, yytext()); }
-	"<="		{ return newToken(Terminals.LESS_THAN_EQUALS, yytext()); }
-	">"		{ return newToken(Terminals.GREATER_THAN, yytext()); }
-	">="		{ return newToken(Terminals.GREATER_THAN_EQUALS, yytext()); }
-	"!"		{ return newToken(Terminals.NOT, yytext()); }
-	"STR" | "str"	{ return newToken(Terminals.STR, yytext()); }
-	"LANG" | "lang"	{ return newToken(Terminals.LANG, yytext()); }
-	"LANGMATCHES" | "langmatches"	{ return newToken(Terminals.LANGMATCHES, yytext()); }
-	"DATATYPE" | "datatype"	{ return newToken(Terminals.DATATYPE, yytext()); }
-	"BOUND" | "bound"	{ return newToken(Terminals.BOUND, yytext()); }
-	"SAMETERM" | "sameterm" | "sameTerm"	{ return newToken(Terminals.SAME_TERM, yytext()); }
-	"ISIRI" | "isiri" | "isIRI"	{ return newToken(Terminals.IS_IRI, yytext()); }
-	"ISURI" | "isuri" | "isURI"	{ return newToken(Terminals.IS_URI, yytext()); }
-	"ISBLANK" | "isblank" | "isBLANK"	{ return newToken(Terminals.IS_BLANK, yytext()); }
-	"ISLITERAL" | "isliteral" | "isLITERAL"	{ return newToken(Terminals.IS_LITERAL, yytext()); }
-	"REGEX" | "regex"	{ return newToken(Terminals.REGEX, yytext()); }
-	";"		{ return newToken(Terminals.SEMI_COLON, yytext()); }
-	","		{ return newToken(Terminals.COMMA, yytext()); }
-	"a"		{ return newToken(Terminals.LETTER_A, yytext()); }
-	"NAMED" | "named"	{ return newToken(Terminals.NAMED, yytext()); }
-	"UNION" | "union"	{ return newToken(Terminals.UNION, yytext()); }
-	"GRAPH" | "graph"	{ return newToken(Terminals.GRAPH, yytext()); }
-	{VariableStart} { string.setLength(0); yybegin(VARIABLE); }
-	\"	        { yybegin(STRING); string.setLength(0); }
-	{Q_IRI_REF}	{ return newToken(Terminals.Q_IRI_REF, yytext()); }
-	{QNAME_NS}	{ return newToken(Terminals.QNAME_NS, yytext()); }
-	{QNAME_LN}	{ return newToken(Terminals.QNAME_LN, yytext()); }
-	{NIL}		{ return newToken(Terminals.NIL, yytext()); }
-	{ORDER_BY}	{ return newToken(Terminals.ORDER_BY, yytext()); }
-	"ASC" | "asc" { return newToken(Terminals.ASC, yytext()); }
-	"DESC" | "desc" { return newToken(Terminals.DESC, yytext()); }
-	"LIMIT" | "limit" { return newToken(Terminals.LIMIT, yytext()); }
-	"OFFSET" | "offset" { return newToken(Terminals.OFFSET, yytext()); }
+  {PnPrefix}? ":"             { return newToken(PNAME_NS, chop(yytext())); }
+  {VariableStart}{Varname}    { return newToken(VARNAME, yytext().substring(1)); }
+  {LangtagStart}{Langtag}     { return newToken(LANGTAG, yytext().substring(1)); }
+  {BlankPrefix}{PnLocal}      { return newToken(BLANK_NODE, yytext().substring(2)); }
+  {IriRef}                    { return newToken(IRI_REF, unquote(yytext())); }
+  {Anon}                      { return newToken(ANON); }
+  {Nil}                       { return newToken(NIL); }
+  "'" {LiteralChar1} "'"      { return newToken(STRING_LITERAL, unquote(yytext())); }
+  \" {LiteralChar2} \"        { return newToken(STRING_LITERAL, unquote(yytext())); }
+  {StringLiteralLongDelim1}   { string.setLength(0); yybegin(YYSTRING_LITERAL_LONG1); }
+  {StringLiteralLongDelim2}   { string.setLength(0); yybegin(YYSTRING_LITERAL_LONG2); }
+
+	"("                         { return newToken(LPAREN); }
+	")"                         { return newToken(RPAREN); }
+	"*"                         { return newToken(MULT); }
+	"/"                         { return newToken(DIV); }
+	"+"                         { return newToken(PLUS); }
+	"-"                         { return newToken(MINUS); }
+	"SELECT"|"select"           { return newToken(SELECT); }
+	"DISTINCT"|"distinct"       { return newToken(DISTINCT); }
+	"REDUCED"|"reduced"         { return newToken(REDUCED); }
+	"FROM"|"from"	              { return newToken(FROM); }
+	"WHERE"|"where"	            { System.out.println("flex: doing the where"); return newToken(WHERE); }
+	"{"                         { System.out.println("flex: Got the lbrace"); return newToken(LBRACE); }
+	"}"                         { System.out.println("flex: Got the rbrace");return newToken(RBRACE); }
+	"."                         { return newToken(DOT); }
+	"OPTIONAL"|"optional"       { return newToken(OPTIONAL); }
+	"FILTER" | "filter"         { return newToken(FILTER); }
+	"||"                        { return newToken(OR); }
+	"&&"                        { return newToken(AND); }
+	"="                         { return newToken(EQUALS); }
+	"!="                        { return newToken(NOT_EQUALS); }
+	"<"                         { return newToken(LESS_THAN); }
+	"<="                        { return newToken(LESS_THAN_EQUALS); }
+	">"	                        { return newToken(GREATER_THAN); }
+	">="	                      { return newToken(GREATER_THAN_EQUALS); }
+	"!"		                      { return newToken(NOT); }
+	"^^"                        { return newToken(LITERAL_TYPE_SEP); }
+	"STR" | "str"             	{ return newToken(STR); }
+	"DATATYPE" | "datatype"	    { return newToken(DATATYPE); }
+	"BOUND" | "bound"	          { return newToken(BOUND); }
+  "LANG" | "lang"             { return newToken(LANG); }
+  "LANGMATCHES" | "langmatches"            { return newToken(LANGMATCHES); }
+	"SAMETERM" | "sameterm" | "sameTerm"	   { return newToken(SAME_TERM); }
+	"ISIRI" | "isiri" | "isIRI"	             { return newToken(IS_IRI); }
+	"ISURI" | "isuri" | "isURI"	             { return newToken(IS_URI); }
+	"ISBLANK" | "isblank" | "isBLANK"	       { return newToken(IS_BLANK); }
+	"ISLITERAL" | "isliteral" | "isLITERAL"	 { return newToken(IS_LITERAL); }
+	"REGEX" | "regex"	          { return newToken(REGEX); }
+	";"	                      	{ return newToken(SEMI_COLON); }
+	","                      		{ return newToken(COMMA); }
+	"a"		                      { return newToken(LETTER_A); }
+	"NAMED" | "named"	          { return newToken(NAMED); }
+	"UNION" | "union"          	{ return newToken(UNION); }
+	"GRAPH" | "graph"	          { return newToken(GRAPH); }
+	"ASC" | "asc"               { return newToken(ASC); }
+	"DESC" | "desc"             { return newToken(DESC); }
+	"LIMIT" | "limit"           { return newToken(LIMIT); }
+	"OFFSET" | "offset"         { return newToken(OFFSET); }
+  {OrderBy}                   { return newToken(ORDER_BY); }
+
 }
 
-<VARIABLE> {
-	{Identifier} { yybegin(YYINITIAL); return newToken(Terminals.VARIABLE, yytext()); }
+<YYSTRING_LITERAL_LONG1> {
+  {StringLiteralLongDelim1}      { yybegin(YYINITIAL); return newToken(STRING_LITERAL, string.toString()); }
+  ([^'\\] | {Echar}))*           { string.append(yytext()); }
+  "'"                            { string.append("'"); }
 }
 
-<STRING> {
-	\"		{ 	
-			}
-	{StringCharacter}+	{ string.append(yytext()); }
+<YYSTRING_LITERAL_LONG2> {
+  {StringLiteralLongDelim2}      {yybegin(YYINITIAL); return newToken(STRING_LITERAL, string.toString()); }
+  ([^\"\\] | {Echar}))*          { string.append(yytext()); }
+  \"                             { string.append("\""); }
 }
 
-
 .|\n            { throw new Scanner.Exception("unexpected character '" + yytext() + "'"); }

Modified: branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar
===================================================================
--- branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar	2007-12-09 20:39:03 UTC (rev 602)
+++ branches/mgr-61-sparql/src/jar/sparql/grammar/expr.grammar	2007-12-09 20:42:41 UTC (rev 603)
@@ -35,7 +35,7 @@
 %typeof verb_object_list_list, verb_object_list_list_element = "ArrayList<Triple>";
 %typeof prop_list_not_empty = "ArrayList<Triple>";
 %typeof triples_block = "ArrayList<Triple>";
-%typeof var_list = "ArrayList<>";
+%typeof var_list = "ArrayList<String>";
 %typeof var_or_term, var, iri_ref = "String";
 %typeof verb = "Predicate";
 %typeof INTEGER = "Integer";
@@ -60,9 +60,9 @@
 %goal select_query;
 
 select_query
-	= SELECT distinct_or_reduced.d select_vars dataset_clause.f where_clause.w solution_modifier.s 
+	= SELECT distinct_or_reduced.d select_vars.v dataset_clause.f where_clause.w solution_modifier.s 
 		{: 
-			return new SelectQuery(d, f, w, s); 
+			return new SelectQuery(v, d, f, w, s); 
 		:}
 	;
 	
@@ -78,10 +78,19 @@
 	;
 	
 var_list
-	= var
-	| var_list var
-	;
+  = var.v   {:
+            ArrayList<String> variables = new ArrayList<String>();
+            variables.add(v);
+            return new Symbol(variables);
+          :}
+  | var_list.vl var.v
+          {:
+            vl.add(v);
+            return _symbol_vl;
+          :}
+  ;
 
+
 dataset_clause
 	= from_clause
 	|
@@ -524,18 +533,33 @@
 
 var_or_term
 	= var
+	| graph_term
 	;
 
 var
-	= VARIABLE.e		{: return new Symbol(e); :}
+	= VARIABLE.e		{: return new Variable(e); :}
 	;
+	
+graph_term
+  = iri_ref.ref      {: return new IRIReference(ref); :}
+  | rdf_literal // not yet implemented
+  | numeric_literal
+  | boolean_literal // not yet implemented
+  | blank_node
+  | NIL;
 
+rdf_literal =
+  | STRING_LITERAL.str LANGTAG.lang                         {: return Literal.newLang(str, lang); :}
+  | STRING_LITERAL.str ( LITERAL_TYPE_SEP iri_ref.type )?   {: return Literal.newTyped(str, type); :}
+  ;
+  
 iri_ref
-	= Q_IRI_REF
-	| Q_Name
-	;
-
-Q_Name
-	= QNAME_NS
-	| QNAME_LN
-	;
+  = IRI_REF
+  | PNAME_LN
+  | PNAME_NS
+  ;
+  
+blank_node
+  = BLANK_NODE.bn     {: return new BlankNode(bn); :}
+  | ANON              {: return new BlankNode(); :}
+  ;
\ No newline at end of file




More information about the Mulgara-svn mailing list