dlvhex  2.5.0
src/DLVresultParserDriver.cpp
Go to the documentation of this file.
00001 /* dlvhex -- Answer-Set Programming with external interfaces.
00002  * Copyright (C) 2005-2007 Roman Schindlauer
00003  * Copyright (C) 2006-2015 Thomas Krennwallner
00004  * Copyright (C) 2009-2016 Peter Schüller
00005  * Copyright (C) 2011-2016 Christoph Redl
00006  * Copyright (C) 2015-2016 Tobias Kaminski
00007  * Copyright (C) 2015-2016 Antonius Weinzierl
00008  * Copyright (C) 2009 Peter Schller
00009  *
00010  * This file is part of dlvhex.
00011  *
00012  * dlvhex is free software; you can redistribute it and/or modify it
00013  * under the terms of the GNU Lesser General Public License as
00014  * published by the Free Software Foundation; either version 2.1 of
00015  * the License, or (at your option) any later version.
00016  *
00017  * dlvhex is distributed in the hope that it will be useful, but
00018  * WITHOUT ANY WARRANTY; without even the implied warranty of
00019  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00020  * Lesser General Public License for more details.
00021  *
00022  * You should have received a copy of the GNU Lesser General Public
00023  * License along with dlvhex; if not, write to the Free Software
00024  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00025  * 02110-1301 USA.
00026  */
00027 
00039 // activate benchmarking if activated by configure option --enable-debug
00040 #ifdef HAVE_CONFIG_H
00041 #  include "config.h"
00042 #endif
00043 
00044 // use this for developing the parser
00045 #undef CWDEBUG
00046 #ifdef CWDEBUG
00047 # ifndef _GNU_SOURCE
00048 #  define _GNU_SOURCE
00049 # endif
00050 # include <libcwd/sys.h>
00051 # include <libcwd/debug.h>
00052 #endif
00053 
00054 //#include "dlvhex2/DLVResultParser.hpp"
00055 #include "dlvhex2/DLVresultParserDriver.h"
00056 
00057 // use this for debugging parser progress (XML style)
00058 #undef BOOST_SPIRIT_DEBUG
00059 
00060 #include "dlvhex2/Benchmarking.h"
00061 #include "dlvhex2/ID.h"
00062 #include "dlvhex2/Term.h"
00063 #include "dlvhex2/Atoms.h"
00064 #include "dlvhex2/Registry.h"
00065 #include "dlvhex2/Printer.h"
00066 #include "dlvhex2/ProgramCtx.h"
00067 
00068 #include <boost/optional.hpp>
00069 #include <boost/spirit/include/qi.hpp>
00070 #include <boost/spirit/include/phoenix_core.hpp>
00071 #include <boost/spirit/include/phoenix_operator.hpp>
00072 #include <boost/spirit/include/phoenix_object.hpp>
00073 #include <boost/spirit/include/phoenix_fusion.hpp>
00074 #include <boost/spirit/include/support_multi_pass.hpp>
00075 #include <boost/algorithm/string/trim.hpp>
00076 
00077 #include <sstream>
00078 #include <iostream>
00079 
00080 namespace spirit = boost::spirit;
00081 namespace qi = boost::spirit::qi;
00082 namespace ascii = boost::spirit::ascii;
00083 namespace fusion = boost::fusion;
00084 namespace phoenix = boost::phoenix;
00085 
00086 DLVHEX_NAMESPACE_BEGIN
00087 
00088 DLVResultParser::DLVResultParser(RegistryPtr reg):
00089 reg(reg),
00090 pMode(FirstOrder)
00091 {
00092 }
00093 
00094 
00095 DLVResultParser::DLVResultParser(RegistryPtr reg, ParseMode mode):
00096 reg(reg),
00097 pMode(mode)
00098 {
00099 }
00100 
00101 
00102 DLVResultParser::~DLVResultParser()
00103 {
00104 }
00105 
00106 
00107 void
00108 DLVResultParser::setParseMode(ParseMode mode)
00109 {
00110     pMode = mode;
00111 }
00112 
00113 
00114 struct ParserState
00115 {
00116     RegistryPtr registry;
00117     AnswerSet::Ptr current;
00118     DLVResultParser::AnswerSetAdder adder;
00119     bool dropPredicates;
00120 
00121     ParserState(
00122         RegistryPtr registry,
00123         DLVResultParser::AnswerSetAdder adder,
00124         bool dropPredicates):
00125     registry(registry),
00126         current(new AnswerSet(registry)),
00127         adder(adder),
00128         dropPredicates(dropPredicates) {}
00129 };
00130 
00131 #if 1
00132 //
00133 // useful phoenix operator debugging stuff:
00134 // -> just add [ handle_dbg("message") ] to any rule/parser to get information
00135 //
00136 template<typename Attrib>
00137 void houtput(Attrib const& a)
00138 {
00139     #ifdef CWDEBUG
00140     std::cerr << "XXX handling attribute " << libcwd::type_info_of(a).demangled_name() << std::endl;
00141     #endif
00142 };
00143 
00144 template<>
00145 void houtput(std::vector<char> const& a)
00146 {
00147     std::cerr << "XXX got string  attribute '" << std::string(a.begin(), a.end()) << "'" << std::endl;
00148 }
00149 
00150 
00151 template<typename Content>
00152 void houtput(boost::optional<Content> const& a)
00153 {
00154     if( !a ) {
00155         std::cerr << "XXX optional (unset):" << std::endl;
00156     }
00157     else {
00158         std::cerr << "XXX optional:" << std::endl;
00159     }
00160     houtput(a.get());
00161 }
00162 
00163 
00164 struct handle_dbg
00165 {
00166     handle_dbg(std::string s): s(s) {}
00167     std::string s;
00168     template<typename Attrib>
00169         void operator()(Attrib& a, qi::unused_type, qi::unused_type) const
00170     {
00171         std::cerr << "DBG=" << s << std::endl;
00172         houtput(a);
00173     }
00174 };
00175 #endif
00176 
00177 struct handle_int
00178 {
00179     template <typename Context>
00180         void operator()(int i, Context& ctx, qi::unused_type) const
00181     {
00182         ID& ruleAttr = fusion::at_c<0>(ctx.attributes);
00183 
00184         ruleAttr = ID::termFromInteger(i);
00185         //std::cerr << "created int " << i << std::endl;
00186     }
00187 };
00188 
00189 namespace
00190 {
00191     inline ID getOrRegisterTerm(RegistryPtr registry, const std::string& s) {
00192         ID id = registry->terms.getIDByString(s);
00193         if( id == ID_FAIL ) {
00194             id = registry->preds.getIDByString(s);
00195             if( id == ID_FAIL ) {
00196                 Term term(ID::MAINKIND_TERM, s);
00197                 // we can only get strings or constants
00198                 assert(s[0] == '"' || islower(s[0]));
00199                 id = registry->terms.storeAndGetID(term);
00200             }
00201         }
00202         return id;
00203     }
00204 }
00205 
00206 
00207 struct handle_ident
00208 {
00209     handle_ident(ParserState& state): state(state) {}
00210 
00211     template <typename Context>
00212         void operator()(const std::string& s, Context& ctx, qi::unused_type) const
00213     {
00214         ID& ruleAttr = fusion::at_c<0>(ctx.attributes);
00215 
00216         ID id = getOrRegisterTerm(state.registry, s);
00217         ruleAttr = id;
00218         //std::cerr << "created ident '" << s << "'" << std::endl;
00219     }
00220 
00221     ParserState& state;
00222 };
00223 
00224 struct handle_finished_answerset
00225 {
00226     handle_finished_answerset(ParserState& state): state(state) {}
00227 
00228     void operator()(qi::unused_type, qi::unused_type, qi::unused_type) const
00229     {
00230         // add current answer set as full answer set
00231         DBGLOG(DBG,"handling parsed answer set " << *state.current);
00232         state.adder(state.current);
00233         // create empty answer set for subsequent parsing
00234         state.current.reset(new AnswerSet(state.registry));
00235     }
00236 
00237     ParserState& state;
00238 };
00239 
00240 struct handle_fact
00241 {
00242     handle_fact(ParserState& state): state(state) {}
00243 
00244     void operator()(
00245         boost::fusion::vector3<
00246         boost::optional<char>,
00247         std::string,
00248         boost::optional<Tuple> >& attr,
00249         qi::unused_type, qi::unused_type) const
00250     {
00251         // alias for fusion input
00252         bool strong_neg = !!fusion::at_c<0>(attr);
00253         assert(!strong_neg);
00254         const std::string& predicate = fusion::at_c<1>(attr);
00255         ID predid = getOrRegisterTerm(state.registry, predicate);
00256         OrdinaryAtom atom(ID::MAINKIND_ATOM);
00257         atom.tuple.push_back(predid);
00258 
00259         // aux predicates create aux atoms
00260         if( (predid & ID::PROPERTY_AUX) != 0 )
00261             atom.kind |= ID::PROPERTY_AUX;
00262 
00263         boost::optional<Tuple>& tup = fusion::at_c<2>(attr);
00264         if( !!tup )
00265             atom.tuple.insert(atom.tuple.end(), tup.get().begin(), tup.get().end());
00266 
00267         // TODO lookup by string in registry, then by tuple
00268         ID id = state.registry->ogatoms.getIDByTuple(atom.tuple);
00269         if( id == ID_FAIL ) { {
00270                 WARNING("parsing efficiency problem see HexGrammarPTToASTConverter")
00271                     std::stringstream ss;
00272                 RawPrinter printer(ss, state.registry);
00273                 Tuple::const_iterator it = atom.tuple.begin();
00274                 printer.print(*it);
00275                 it++;
00276                 if( it != atom.tuple.end() ) {
00277                     ss << "(";
00278                     printer.print(*it);
00279                     it++;
00280                     while(it != atom.tuple.end()) {
00281                         ss << ",";
00282                         printer.print(*it);
00283                         it++;
00284                     }
00285                     ss << ")";
00286                 }
00287                 atom.text = ss.str();
00288             }
00289             //DBGLOG(DBG,"storing atom " << atom);
00290             id = state.registry->ogatoms.storeAndGetID(atom);
00291         }
00292         //TODO make more efficient (cache pointer to interpretation or even function object)
00293         //DBGLOG(DBG,"setting fact " << id);
00294         state.current->interpretation->setFact(id.address);
00295     }
00296 
00297     ParserState& state;
00298 };
00299 
00300 // "The Grammar"
00301 template<typename Iterator>
00302 struct DLVResultGrammar:
00303 public qi::grammar<Iterator, ascii::space_type>
00304 {
00305     DLVResultGrammar(ParserState& state):
00306     DLVResultGrammar::base_type(dlvline), state(state) {
00307         using spirit::int_;
00308         using spirit::_val;
00309         using spirit::_1;
00310         using qi::lexeme;
00311         using qi::char_;
00312         using qi::omit;
00313         using qi::lit;
00314 
00315         ident
00316             = lexeme[char_('"') > *(char_ - '"') > char_('"')]
00317         // @todo test performance
00318         //| lexeme[char_("a-z") >> *char_("a-zA-Z0-9_")];
00319             | lexeme[ascii::lower > *(ascii::alnum|char_('_'))];
00320         groundterm
00321             = int_ [ handle_int() ]
00322             | ident [ handle_ident(state) ]
00323             ;
00324         fact
00325         // char_ synthesizes a char attribute!
00326             = ( -char_('-') > ident > -params ) [ handle_fact(state) ] ;
00327         params
00328             %= '(' > groundterm > *(',' > groundterm) > ')';
00329         answerset
00330             = (lit('{') >> '}') [ handle_finished_answerset(state) ]
00331             | (lit('{') > fact > *(',' > fact) > lit('}') [ handle_finished_answerset(state) ]);
00333         costline
00334             = lit("Cost") > +(ascii::alnum|char_("[]<>():"));
00335         dlvline
00336             = (-lit("Best model:") >> answerset)
00337             |
00338             costline;
00339 
00340         #ifdef BOOST_SPIRIT_DEBUG
00341         BOOST_SPIRIT_DEBUG_NODE(dlvline);
00342         BOOST_SPIRIT_DEBUG_NODE(answerset);
00343         BOOST_SPIRIT_DEBUG_NODE(costline);
00344         BOOST_SPIRIT_DEBUG_NODE(fact);
00345         BOOST_SPIRIT_DEBUG_NODE(groundterm);
00346         BOOST_SPIRIT_DEBUG_NODE(ident);
00347         #endif
00348     }
00349 
00350     qi::rule<Iterator, ascii::space_type>                  dlvline,
00351         answerset,
00352         fact,
00353         costline;
00354     qi::rule<Iterator, ID(), ascii::space_type>            groundterm;
00355     qi::rule<Iterator, std::string(), ascii::space_type>   ident;
00356     qi::rule<Iterator, Tuple(), ascii::space_type>         params;
00357 
00358     ParserState& state;
00359 };
00360 
00361 void
00362 DLVResultParser::parse(
00363 std::istream& is,
00364 AnswerSetAdder adder) throw (SyntaxError)
00365 {
00366     DLVHEX_BENCHMARK_REGISTER_AND_SCOPE(sid,"DLVResultParser::parse");
00367 
00368     bool dropPredicates =
00369         (pMode == DLVResultParser::HO);
00370     ParserState state(reg, adder, dropPredicates);
00371 
00372     typedef std::string::const_iterator forward_iterator_type;
00373     DLVResultGrammar<forward_iterator_type> grammar(state);
00374     unsigned errors = 0;
00375     do {
00376         // @todo: read each line to the next endl, then parse, then get exactly one answer set per parse, maybe reuse the grammar (state can be avoided by using attributes, AtomSetPtr and AtomPtr or just use a new AST with integers instead)
00377 
00378         // get next input line
00379         std::string input;
00380         std::getline(is, input);
00381 
00382         // break silently
00383         if( input.empty() )
00384             break;
00385 
00386         DBGLOG(DBG,"obtained " << input.size() << " characters from input stream via getline");
00387 
00388         if( is.bad() ) {
00389             DBGLOG(DBG,"leaving DLVResultParser loop, stream bits are: "
00390                 "fail " << is.fail() << ", bad " << is.bad() << ", eof " << is.eof());
00391             break;
00392         }
00393 
00394         // TODO allocate answer set here, just set bits in parser
00395         LOG(DBG,"parsing input from DLV: '" << input << "'");
00396 
00397         // convert input iterator to forward iterator, usable by spirit parser
00398         forward_iterator_type fwd_begin = input.begin();
00399         forward_iterator_type fwd_end = input.end();
00400 
00401         try
00402         {
00403             bool r = qi::phrase_parse(fwd_begin, fwd_end, grammar, ascii::space);
00404 
00405             // @todo: add better error message with position iterator
00406             if (!r || fwd_begin != fwd_end) {
00407                 LOG(ERROR,"for input '" << input << "': r=" << r << " (begin!=end)=" << (fwd_begin != fwd_end));
00408                 errors++;
00409             }
00410         }
00411         catch(const qi::expectation_failure<forward_iterator_type>& e) {
00412             LOG(ERROR,"for input '" << input << "': could not parse DLV output(expectation failure) " << e.what_);
00413             errors++;
00414         }
00415     }
00416     while(errors < 20);
00417 
00418     if( errors != 0  ) {
00419         LOG(ERROR,"error count for parsing DLV output = " << errors);
00420         throw SyntaxError("Could not parse complete DLV output! (see error log messages)");
00421     }
00422 }
00423 
00424 
00425 DLVHEX_NAMESPACE_END
00426 
00427 
00428 // vim:expandtab:ts=4:sw=4:
00429 // mode: C++
00430 // End: