all: wikiq
-wikiq: wikiq.c $(OBJECTS)
- $(CXX) $(CFLAGS) wikiq.c $(OBJECTS) -o wikiq -lexpat
+wikiq: wikiq.cpp $(OBJECTS)
+ $(CXX) $(CFLAGS) wikiq.cpp $(OBJECTS) -o wikiq -lexpat
disorder.o: disorder.c disorder.h
$(CXX) $(CFLAGS) -c disorder.c
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_DIFF_H
+#define DTL_DIFF_H
+
+namespace dtl {
+
+ /**
+ * diff class template
+ * sequence must support random_access_iterator.
+ */
+ template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
+ class Diff
+ {
+ private :
+ dtl_typedefs(elem, sequence)
+ sequence A;
+ sequence B;
+ size_t M;
+ size_t N;
+ size_t delta;
+ size_t offset;
+ long long *fp;
+ long long editDistance;
+ Lcs< elem > lcs;
+ Ses< elem > ses;
+ editPath path;
+ editPathCordinates pathCordinates;
+ bool reverse;
+ bool huge;
+ bool unserious;
+ bool onlyEditDistance;
+ uniHunkVec uniHunks;
+ comparator cmp;
+ public :
+ Diff () {}
+
+ Diff (const sequence& a,
+ const sequence& b) : A(a), B(b) {
+ init();
+ }
+
+ Diff (const sequence& a,
+ const sequence& b,
+ const comparator& comp) : A(a), B(b), cmp(comp) {
+ init();
+ }
+
+ ~Diff() {}
+
+ long long getEditDistance () const {
+ return editDistance;
+ }
+
+ Lcs< elem > getLcs () const {
+ return lcs;
+ }
+
+ elemVec getLcsVec () const {
+ return lcs.getSequence();
+ }
+
+ Ses< elem > getSes () const {
+ return ses;
+ }
+
+ uniHunkVec getUniHunks () const {
+ return uniHunks;
+ }
+
+ bool isHuge () const {
+ return huge;
+ }
+
+ void onHuge () {
+ this->huge = true;
+ }
+
+ void offHuge () {
+ this->huge = false;
+ }
+
+ bool isUnserious () const {
+ return unserious;
+ }
+
+ void onUnserious () {
+ this->unserious = true;
+ }
+
+ void offUnserious () {
+ this->unserious = false;
+ }
+
+ void onOnlyEditDistance () {
+ this->onlyEditDistance = true;
+ }
+
+ /**
+ * patching with Unified Format Hunks
+ */
+ sequence uniPatch (const sequence& seq) {
+ elemList seqLst(seq.begin(), seq.end());
+ sesElemVec shunk;
+ sesElemVec_iter vsesIt;
+ elemList_iter lstIt = seqLst.begin();
+ long long inc_dec_total = 0;
+ long long gap = 1;
+ for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
+ joinSesVec(shunk, it->common[0]);
+ joinSesVec(shunk, it->change);
+ joinSesVec(shunk, it->common[1]);
+ it->a += inc_dec_total;
+ inc_dec_total += it->inc_dec_count;
+ for (long long i=0;i<it->a - gap;++i) {
+ ++lstIt;
+ }
+ gap = it->a + it->b + it->inc_dec_count;
+ vsesIt = shunk.begin();
+ while (vsesIt!=shunk.end()) {
+ switch (vsesIt->second.type) {
+ case SES_ADD :
+ seqLst.insert(lstIt, vsesIt->first);
+ break;
+ case SES_DELETE :
+ if (lstIt != seqLst.end()) {
+ lstIt = seqLst.erase(lstIt);
+ }
+ break;
+ case SES_COMMON :
+ if (lstIt != seqLst.end()) {
+ ++lstIt;
+ }
+ break;
+ default :
+ // no through
+ break;
+ }
+ ++vsesIt;
+ }
+ shunk.clear();
+ }
+
+ sequence patchedSeq(seqLst.begin(), seqLst.end());
+ return patchedSeq;
+ }
+
+ /**
+ * patching with Shortest Edit Script
+ */
+ sequence patch (const sequence& seq) const {
+ sesElemVec sesSeq = ses.getSequence();
+ elemList seqLst(seq.begin(), seq.end());
+ elemList_iter lstIt = seqLst.begin();
+ for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
+ switch (sesIt->second.type) {
+ case SES_ADD :
+ seqLst.insert(lstIt, sesIt->first);
+ break;
+ case SES_DELETE :
+ lstIt = seqLst.erase(lstIt);
+ break;
+ case SES_COMMON :
+ ++lstIt;
+ break;
+ default :
+ // no through
+ break;
+ }
+ }
+ sequence patchedSeq(seqLst.begin(), seqLst.end());
+ return patchedSeq;
+ }
+
+ /**
+ * compose Longest Common Subsequence and Shortest Edit Script.
+ * The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
+ * described by Sun Wu, Udi Manber and Gene Myers
+ */
+ void compose() {
+
+ if (isHuge()) {
+ pathCordinates.reserve(MAX_CORDINATES_SIZE);
+ }
+
+ long long p = -1;
+ fp = new long long[M + N + 3];
+ fill(&fp[0], &fp[M + N + 3], -1);
+ path = editPath(M + N + 3);
+ fill(path.begin(), path.end(), -1);
+ ONP:
+ do {
+ ++p;
+ for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
+ fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
+ }
+ for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
+ fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
+ }
+ fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
+ } while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
+
+ editDistance += static_cast<long long>(delta) + 2 * p;
+ long long r = path[delta+offset];
+ P cordinate;
+ editPathCordinates epc(0);
+
+ // only recoding editdistance
+ if (onlyEditDistance) {
+ delete[] this->fp;
+ return;
+ }
+
+ while(r != -1) {
+ cordinate.x = pathCordinates[(size_t)r].x;
+ cordinate.y = pathCordinates[(size_t)r].y;
+ epc.push_back(cordinate);
+ r = pathCordinates[(size_t)r].k;
+ }
+
+ // record Longest Common Subsequence & Shortest Edit Script
+ if (!recordSequence(epc)) {
+ pathCordinates.resize(0);
+ epc.resize(0);
+ p = -1;
+ goto ONP;
+ }
+ delete[] this->fp;
+ }
+
+ /**
+ * print difference between A and B with SES
+ */
+ template < typename stream >
+ void printSES (stream& out) const {
+ sesElemVec ses_v = ses.getSequence();
+ for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
+ }
+
+ void printSES (ostream& out = cout) const {
+ printSES< ostream >(out);
+ }
+
+ /**
+ * print difference with given SES
+ */
+ template < typename stream >
+ static void printSES (const Ses< elem >& s, stream& out) {
+ sesElemVec ses_v = s.getSequence();
+ for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
+ }
+
+ static void printSES (const Ses< elem >& s, ostream& out = cout) {
+ printSES< ostream >(s, out);
+ }
+
+ /**
+ * print difference between A and B with the format such as Unified Format
+ */
+ template < typename stream >
+ void printUnifiedFormat (stream& out) const {
+ for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
+ }
+
+ void printUnifiedFormat (ostream& out = cout) const {
+ printUnifiedFormat< ostream >(out);
+ }
+
+ /**
+ * print unified format difference with given unified format hunks
+ */
+ template < typename stream >
+ static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
+ for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
+ }
+
+ static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
+ printUnifiedFormat< ostream >(hunks, out);
+ }
+
+ /**
+ * compose Unified Format Hunks from Shortest Edit Script
+ */
+ void composeUnifiedHunks () {
+ sesElemVec common[2];
+ sesElemVec change;
+ sesElemVec ses_v = ses.getSequence();
+ long long l_cnt = 1;
+ long long length = distance(ses_v.begin(), ses_v.end());
+ long long middle = 0;
+ bool isMiddle, isAfter;
+ elem e;
+ elemInfo einfo;
+ long long a, b, c, d; // @@ -a,b +c,d @@
+ long long inc_dec_count = 0;
+ uniHunk< sesElem > hunk;
+ sesElemVec adds;
+ sesElemVec deletes;
+
+ isMiddle = isAfter = false;
+ a = b = c = d = 0;
+
+ for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
+ e = it->first;
+ einfo = it->second;
+ switch (einfo.type) {
+ case SES_ADD :
+ middle = 0;
+ ++inc_dec_count;
+ adds.push_back(*it);
+ if (!isMiddle) isMiddle = true;
+ if (isMiddle) ++d;
+ if (l_cnt >= length) {
+ joinSesVec(change, deletes);
+ joinSesVec(change, adds);
+ isAfter = true;
+ }
+ break;
+ case SES_DELETE :
+ middle = 0;
+ --inc_dec_count;
+ deletes.push_back(*it);
+ if (!isMiddle) isMiddle = true;
+ if (isMiddle) ++b;
+ if (l_cnt >= length) {
+ joinSesVec(change, deletes);
+ joinSesVec(change, adds);
+ isAfter = true;
+ }
+ break;
+ case SES_COMMON :
+ ++b;++d;
+ if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
+ if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
+ if (a == 0 && c == 0) {
+ a = einfo.beforeIdx;
+ c = einfo.afterIdx;
+ }
+ common[0].push_back(*it);
+ } else {
+ rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
+ common[0].pop_back();
+ common[0].push_back(*it);
+ ++a;++c;
+ --b;--d;
+ }
+ }
+ if (isMiddle && !isAfter) {
+ ++middle;
+ joinSesVec(change, deletes);
+ joinSesVec(change, adds);
+ change.push_back(*it);
+ if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
+ isAfter = true;
+ }
+ adds.clear();
+ deletes.clear();
+ }
+ break;
+ default :
+ // no through
+ break;
+ }
+ // compose unified format hunk
+ if (isAfter && !change.empty()) {
+ sesElemVec_iter cit = it;
+ long long cnt = 0;
+ for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
+ if (cit->second.type == SES_COMMON) {
+ ++cnt;
+ }
+ }
+ if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
+ middle = 0;
+ isAfter = false;
+ continue;
+ }
+ if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
+ long long c0size = static_cast<long long>(common[0].size());
+ rotate(common[0].begin(),
+ common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
+ common[0].end());
+ for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
+ common[0].pop_back();
+ }
+ a += c0size - DTL_SEPARATE_SIZE;
+ c += c0size - DTL_SEPARATE_SIZE;
+ }
+ if (a == 0) ++a;
+ if (c == 0) ++c;
+ if (isReverse()) swap(a, c);
+ hunk.a = a;hunk.b = b;hunk.c = c;hunk.d = d;
+ hunk.common[0] = common[0];
+ hunk.change = change;
+ hunk.common[1] = common[1];
+ hunk.inc_dec_count = inc_dec_count;
+ uniHunks.push_back(hunk);
+ isMiddle = false;
+ isAfter = false;
+ common[0].clear();
+ common[1].clear();
+ adds.clear();
+ deletes.clear();
+ change.clear();
+ a = b = c = d = middle = inc_dec_count = 0;
+ }
+ }
+ }
+
+ /**
+ * compose ses from stream
+ */
+ template <typename stream>
+ static Ses< elem > composeSesFromStream (stream& st)
+ {
+ elem line;
+ Ses< elem > ret;
+ long long x_idx, y_idx;
+ x_idx = y_idx = 1;
+ while (getline(st, line)) {
+ elem mark(line.begin(), line.begin() + 1);
+ elem e(line.begin() + 1, line.end());
+ if (mark == SES_MARK_DELETE) {
+ ret.addSequence(e, x_idx, 0, SES_DELETE);
+ ++x_idx;
+ } else if (mark == SES_MARK_ADD) {
+ ret.addSequence(e, y_idx, 0, SES_ADD);
+ ++y_idx;
+ } else if (mark == SES_MARK_COMMON) {
+ ret.addSequence(e, x_idx, y_idx, SES_COMMON);
+ ++x_idx;
+ ++y_idx;
+ }
+ }
+ return ret;
+ }
+
+ private :
+ /**
+ * initialize
+ */
+ void init () {
+ M = distance(A.begin(), A.end());
+ N = distance(B.begin(), B.end());
+ if (M < N) {
+ reverse = false;
+ } else {
+ swap(A, B);
+ swap(M, N);
+ reverse = true;
+ }
+ editDistance = 0;
+ delta = N - M;
+ offset = M + 1;
+ huge = false;
+ unserious = false;
+ onlyEditDistance = false;
+ fp = NULL;
+ }
+
+ /**
+ * search shortest path and record the path
+ */
+ long long snake(const long long& k, const long long& above, const long long& below) {
+ long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
+ long long y = max(above, below);
+ long long x = y - k;
+ while ((size_t)x < M && (size_t)y < N && cmp.impl(A[(size_t)x], B[(size_t)y])) {
+ ++x;++y;
+ }
+
+ path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
+ if (!onlyEditDistance) {
+ P p;
+ p.x = x;p.y = y;p.k = r;
+ pathCordinates.push_back(p);
+ }
+ return y;
+ }
+
+ /**
+ * record SES and LCS
+ */
+ bool recordSequence (const editPathCordinates& v) {
+ sequence_const_iter x(A.begin());
+ sequence_const_iter y(B.begin());
+ long long x_idx, y_idx; // line number for Unified Format
+ long long px_idx, py_idx; // cordinates
+ bool complete = false;
+ x_idx = y_idx = 1;
+ px_idx = py_idx = 0;
+ for (size_t i=v.size()-1;!complete;--i) {
+ while(px_idx < v[i].x || py_idx < v[i].y) {
+ if (v[i].y - v[i].x > py_idx - px_idx) {
+ if (!isReverse()) {
+ ses.addSequence(*y, y_idx, 0, SES_ADD);
+ } else {
+ ses.addSequence(*y, y_idx, 0, SES_DELETE);
+ }
+ ++y;
+ ++y_idx;
+ ++py_idx;
+ } else if (v[i].y - v[i].x < py_idx - px_idx) {
+ if (!isReverse()) {
+ ses.addSequence(*x, x_idx, 0, SES_DELETE);
+ } else {
+ ses.addSequence(*x, x_idx, 0, SES_ADD);
+ }
+ ++x;
+ ++x_idx;
+ ++px_idx;
+ } else {
+ lcs.addSequence(*x);
+ ses.addSequence(*x, x_idx, y_idx, SES_COMMON);
+ ++x;
+ ++y;
+ ++x_idx;
+ ++y_idx;
+ ++px_idx;
+ ++py_idx;
+ }
+ }
+ if (i == 0) complete = true;
+ }
+
+ if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
+ // all recording success
+ } else {
+ // unserious difference
+ if (isUnserious()) {
+ if (!isReverse()) {
+ recordOddSequence(x_idx, M, x, SES_DELETE);
+ recordOddSequence(y_idx, N, y, SES_ADD);
+ } else {
+ recordOddSequence(x_idx, M, x, SES_ADD);
+ recordOddSequence(y_idx, N, y, SES_DELETE);
+ }
+ return true;
+ }
+
+ // decent difference
+ sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
+ sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
+ A = A_;
+ B = B_;
+ M = distance(A.begin(), A.end());
+ N = distance(B.begin(), B.end());
+ delta = N - M;
+ offset = M + 1;
+ delete[] fp;
+ fp = new long long[M + N + 3];
+ fill(&fp[0], &fp[M + N + 3], -1);
+ fill(path.begin(), path.end(), -1);
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * record odd sequence to ses
+ */
+ void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
+ while(idx < length){
+ ses.addSequence(*it, idx, 0, et);
+ ++it;
+ ++idx;
+ ++editDistance;
+ }
+ ses.addSequence(*it, idx, 0, et);
+ ++editDistance;
+ }
+
+ /**
+ * join ses vectors
+ */
+ void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
+ if (!s2.empty()) {
+ for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
+ s1.push_back(*vit);
+ }
+ }
+ }
+
+ /**
+ * check sequence is replaced each other
+ */
+ bool inline isReverse () const {
+ return reverse;
+ }
+
+ };
+}
+
+#endif // DTL_DIFF_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_DIFF3_H
+#define DTL_DIFF3_H
+
+namespace dtl {
+
+ /**
+ * diff3 class template
+ * sequence must support random_access_iterator.
+ */
+ template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
+ class Diff3
+ {
+ private:
+ dtl_typedefs(elem, sequence)
+ sequence A;
+ sequence B;
+ sequence C;
+ sequence S;
+ Diff< elem, sequence, comparator > diff_ba;
+ Diff< elem, sequence, comparator > diff_bc;
+ bool conflict;
+ elem csepabegin;
+ elem csepa;
+ elem csepaend;
+ public :
+ Diff3 () {}
+ Diff3 (const sequence& a,
+ const sequence& b,
+ const sequence& c) : A(a), B(b), C(c),
+ diff_ba(b, a), diff_bc(b, c),
+ conflict(false) {}
+
+ ~Diff3 () {}
+
+ bool isConflict () const {
+ return conflict;
+ }
+
+ sequence getMergedSequence () const {
+ return S;
+ }
+
+ /**
+ * merge changes B and C to A
+ */
+ bool merge () {
+ if (diff_ba.getEditDistance() == 0) { // A == B
+ if (diff_bc.getEditDistance() == 0) { // A == B == C
+ S = B;
+ return true;
+ }
+ S = C;
+ return true;
+ } else { // A != B
+ if (diff_bc.getEditDistance() == 0) { // A != B == C
+ S = A;
+ return true;
+ } else { // A != B != C
+ S = merge_();
+ if (isConflict()) { // conflict occured
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * compose differences
+ */
+ void compose () {
+ diff_ba.compose();
+ diff_bc.compose();
+ }
+
+ private :
+ /**
+ * merge implementation
+ */
+ sequence merge_ () {
+ elemVec seq;
+ Ses< elem > ses_ba = diff_ba.getSes();
+ Ses< elem > ses_bc = diff_bc.getSes();
+ sesElemVec ses_ba_v = ses_ba.getSequence();
+ sesElemVec ses_bc_v = ses_bc.getSequence();
+ sesElemVec_iter ba_it = ses_ba_v.begin();
+ sesElemVec_iter bc_it = ses_bc_v.begin();
+ sesElemVec_iter ba_end = ses_ba_v.end();
+ sesElemVec_iter bc_end = ses_bc_v.end();
+
+ while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) {
+ while (true) {
+ if (!isEnd(ba_end, ba_it) &&
+ !isEnd(bc_end, bc_it) &&
+ ba_it->first == bc_it->first &&
+ ba_it->second.type == SES_COMMON &&
+ bc_it->second.type == SES_COMMON) {
+ // do nothing
+ } else {
+ break;
+ }
+ if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first);
+ else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first);
+ forwardUntilEnd(ba_end, ba_it);
+ forwardUntilEnd(bc_end, bc_it);
+ }
+ if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break;
+ if ( ba_it->second.type == SES_COMMON
+ && bc_it->second.type == SES_DELETE) {
+ forwardUntilEnd(ba_end, ba_it);
+ forwardUntilEnd(bc_end, bc_it);
+ } else if (ba_it->second.type == SES_COMMON &&
+ bc_it->second.type == SES_ADD) {
+ seq.push_back(bc_it->first);
+ forwardUntilEnd(bc_end, bc_it);
+ } else if (ba_it->second.type == SES_DELETE &&
+ bc_it->second.type == SES_COMMON) {
+ forwardUntilEnd(ba_end, ba_it);
+ forwardUntilEnd(bc_end, bc_it);
+ } else if (ba_it->second.type == SES_DELETE &&
+ bc_it->second.type == SES_DELETE) {
+ if (ba_it->first == bc_it->first) {
+ forwardUntilEnd(ba_end, ba_it);
+ forwardUntilEnd(bc_end, bc_it);
+ } else {
+ // conflict
+ conflict = true;
+ return B;
+ }
+ } else if (ba_it->second.type == SES_DELETE &&
+ bc_it->second.type == SES_ADD) {
+ // conflict
+ conflict = true;
+ return B;
+ } else if (ba_it->second.type == SES_ADD &&
+ bc_it->second.type == SES_COMMON) {
+ seq.push_back(ba_it->first);
+ forwardUntilEnd(ba_end, ba_it);
+ } else if (ba_it->second.type == SES_ADD &&
+ bc_it->second.type == SES_DELETE) {
+ // conflict
+ conflict = true;
+ return B;
+ } else if (ba_it->second.type == SES_ADD &&
+ bc_it->second.type == SES_ADD) {
+ if (ba_it->first == bc_it->first) {
+ seq.push_back(ba_it->first);
+ forwardUntilEnd(ba_end, ba_it);
+ forwardUntilEnd(bc_end, bc_it);
+ } else {
+ // conflict
+ conflict = true;
+ return B;
+ }
+ }
+ }
+
+ if (isEnd(ba_end, ba_it)) {
+ addDecentSequence(bc_end, bc_it, seq);
+ } else if (isEnd(bc_end, bc_it)) {
+ addDecentSequence(ba_end, ba_it, seq);
+ }
+
+ sequence mergedSeq(seq.begin(), seq.end());
+ return mergedSeq;
+ }
+
+ /**
+ * join elem vectors
+ */
+ void inline joinElemVec (elemVec& s1, elemVec& s2) const {
+ if (!s2.empty()) {
+ for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
+ s1.push_back(*vit);
+ }
+ }
+ }
+
+ /**
+ * check sequence is end
+ */
+ template <typename T_iter>
+ bool inline isEnd (const T_iter& end, const T_iter& it) const {
+ return it == end ? true : false;
+ }
+
+ /**
+ * increment iterator until iterator is end
+ */
+ template <typename T_iter>
+ void inline forwardUntilEnd (const T_iter& end, T_iter& it) const {
+ if (!isEnd(end, it)) ++it;
+ }
+
+ /**
+ * add elements which SES's type is ADD
+ */
+ void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const {
+ while (!isEnd(end, it)) {
+ if (it->second.type == SES_ADD) seq.push_back(it->first);
+ ++it;
+ }
+ }
+
+ };
+}
+
+#endif // DTL_DIFF3_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_LCS_H
+#define DTL_LCS_H
+
+namespace dtl {
+
+ /**
+ * Longest Common Subsequence template class
+ */
+ template <typename elem>
+ class Lcs : public Sequence< elem >
+ {
+ public :
+ Lcs () {}
+ ~Lcs () {}
+ };
+}
+
+#endif // DTL_LCS_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_SEQUENCE_H
+#define DTL_SEQUENCE_H
+
+namespace dtl {
+
+ /**
+ * sequence class template
+ */
+ template <typename elem>
+ class Sequence
+ {
+ public :
+ typedef vector< elem > elemVec;
+ Sequence () {}
+ virtual ~Sequence () {}
+
+ elemVec getSequence () const {
+ return sequence;
+ }
+ void addSequence (elem e) {
+ sequence.push_back(e);
+ }
+ protected :
+ elemVec sequence;
+ };
+}
+
+#endif // DTL_SEQUENCE_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_SES_H
+#define DTL_SES_H
+
+namespace dtl {
+
+ /**
+ * Shortest Edit Script template class
+ */
+ template <typename elem>
+ class Ses : public Sequence< elem >
+ {
+ private :
+ typedef pair< elem, elemInfo > sesElem;
+ typedef vector< sesElem > sesElemVec;
+ public :
+
+ Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true) { }
+ ~Ses () {}
+
+ bool isOnlyAdd () const {
+ return onlyAdd;
+ }
+
+ bool isOnlyDelete () const {
+ return onlyDelete;
+ }
+
+ bool isOnlyCopy () const {
+ return onlyCopy;
+ }
+
+ bool isOnlyOneOperation () const {
+ return isOnlyAdd() || isOnlyAdd() || isOnlyCopy();
+ }
+
+ bool isChange () const {
+ return !onlyCopy;
+ }
+
+ using Sequence< elem >::addSequence;
+ void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) {
+ elemInfo info;
+ info.beforeIdx = beforeIdx;
+ info.afterIdx = afterIdx;
+ info.type = type;
+ sesElem pe(e, info);
+ sequence.push_back(pe);
+ switch (type) {
+ case SES_DELETE:
+ onlyCopy = false;
+ onlyAdd = false;
+ break;
+ case SES_COMMON:
+ onlyAdd = false;
+ onlyDelete = false;
+ break;
+ case SES_ADD:
+ onlyDelete = false;
+ onlyCopy = false;
+ break;
+ }
+ }
+
+ sesElemVec getSequence () const {
+ return sequence;
+ }
+ private :
+ sesElemVec sequence;
+ bool onlyAdd;
+ bool onlyDelete;
+ bool onlyCopy;
+ };
+}
+
+#endif // DTL_SES_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DTL_H
+#define DTL_H
+
+#include "variables.hpp"
+#include "functors.hpp"
+#include "Sequence.hpp"
+#include "Lcs.hpp"
+#include "Ses.hpp"
+#include "Diff.hpp"
+#include "Diff3.hpp"
+
+#endif // DTL_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_FUNCTORS_H
+#define DTL_FUNCTORS_H
+
+namespace dtl {
+
+ /**
+ * printer class template
+ */
+ template <typename sesElem, typename stream = ostream >
+ class Printer
+ {
+ public :
+ Printer () : out_(cout) {}
+ Printer (stream& out) : out_(out) {}
+ virtual ~Printer () {}
+ virtual void operator() (const sesElem& se) const = 0;
+ protected :
+ stream& out_;
+ };
+
+ /**
+ * common element printer class template
+ */
+ template <typename sesElem, typename stream = ostream >
+ class CommonPrinter : public Printer < sesElem, stream >
+ {
+ public :
+ CommonPrinter () : Printer < sesElem, stream > () {}
+ CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {}
+ ~CommonPrinter () {}
+ void operator() (const sesElem& se) const {
+ this->out_ << SES_MARK_COMMON << se.first << endl;
+ }
+ };
+
+ /**
+ * ses element printer class template
+ */
+ template <typename sesElem, typename stream = ostream >
+ class ChangePrinter : public Printer < sesElem, stream >
+ {
+ public :
+ ChangePrinter () : Printer < sesElem, stream > () {}
+ ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {}
+ ~ChangePrinter () {}
+ void operator() (const sesElem& se) const {
+ switch (se.second.type) {
+ case SES_ADD:
+ this->out_ << SES_MARK_ADD << se.first << endl;
+ break;
+ case SES_DELETE:
+ this->out_ << SES_MARK_DELETE << se.first << endl;
+ break;
+ case SES_COMMON:
+ this->out_ << SES_MARK_COMMON << se.first << endl;
+ break;
+ }
+ }
+ };
+
+ /**
+ * unfiend format element printer class template
+ */
+ template <typename sesElem, typename stream = ostream >
+ class UniHunkPrinter
+ {
+ public :
+ UniHunkPrinter () : out_(cout) {}
+ UniHunkPrinter (stream& out) : out_(out) {}
+ ~UniHunkPrinter () {}
+ void operator() (const uniHunk< sesElem >& hunk) const {
+ out_ << "@@"
+ << " -" << hunk.a << "," << hunk.b
+ << " +" << hunk.c << "," << hunk.d
+ << " @@" << endl;
+
+ for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_));
+ for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_));
+ for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_));
+ }
+ private :
+ stream& out_;
+ };
+
+ /**
+ * compare class template
+ */
+ template <typename elem>
+ class Compare
+ {
+ public :
+ Compare () {}
+ virtual ~Compare () {}
+ virtual inline bool impl (const elem& e1, const elem& e2) const {
+ return e1 == e2;
+ }
+ };
+}
+
+#endif // DTL_FUNCTORS_H
--- /dev/null
+/**
+ dtl-1.12 -- Diff Template Library
+
+ In short, Diff Template Library is distributed under so called "BSD license",
+
+ Copyright (c) 2008-2011 Tatsuhiko Kubo <cubicdaiya@gmail.com>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the authors nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* If you use this library, you must include dtl.hpp only. */
+
+#ifndef DTL_VARIABLES_H
+#define DTL_VARIABLES_H
+
+#include <vector>
+#include <list>
+#include <string>
+#include <algorithm>
+#include <iostream>
+
+namespace dtl {
+
+ using std::vector;
+ using std::string;
+ using std::pair;
+ using std::ostream;
+ using std::list;
+ using std::for_each;
+ using std::distance;
+ using std::fill;
+ using std::cout;
+ using std::endl;
+ using std::rotate;
+ using std::swap;
+ using std::max;
+
+ /**
+ * type of edit for SES
+ */
+ typedef int edit_t;
+ const edit_t SES_DELETE = -1;
+ const edit_t SES_COMMON = 0;
+ const edit_t SES_ADD = 1;
+
+ /**
+ * mark of SES
+ */
+#define SES_MARK_DELETE "-"
+#define SES_MARK_COMMON " "
+#define SES_MARK_ADD "+"
+
+ /**
+ * info for Unified Format
+ */
+ typedef struct eleminfo {
+ long long beforeIdx; // index of prev sequence
+ long long afterIdx; // index of after sequence
+ edit_t type; // type of edit(Add, Delete, Common)
+ } elemInfo;
+
+ const long long DTL_SEPARATE_SIZE = 3;
+ const long long DTL_CONTEXT_SIZE = 3;
+
+ /**
+ * cordinate for registering route
+ */
+ typedef struct Point {
+ long long x; // x cordinate
+ long long y; // y cordinate
+ long long k; // vertex
+ } P;
+
+ /**
+ * limit of cordinate size
+ */
+ const unsigned long long MAX_CORDINATES_SIZE = 2000000;
+
+ typedef vector< long long > editPath;
+ typedef vector< P > editPathCordinates;
+
+ /**
+ * Structure of Unified Format Hunk
+ */
+ template <typename sesElem>
+ struct uniHunk {
+ long long a, b, c, d; // @@ -a,b +c,d @@
+ vector< sesElem > common[2]; // anteroposterior commons on changes
+ vector< sesElem > change; // changes
+ long long inc_dec_count; // count of increace and decrease
+ };
+
+#define dtl_typedefs(elem, sequence) \
+ typedef pair< elem, elemInfo > sesElem; \
+ typedef vector< sesElem > sesElemVec; \
+ typedef vector< uniHunk< sesElem > > uniHunkVec; \
+ typedef list< elem > elemList; \
+ typedef vector< elem > elemVec; \
+ typedef typename uniHunkVec::iterator uniHunkVec_iter; \
+ typedef typename sesElemVec::iterator sesElemVec_iter; \
+ typedef typename elemList::iterator elemList_iter; \
+ typedef typename sequence::iterator sequence_iter; \
+ typedef typename sequence::const_iterator sequence_const_iter; \
+ typedef typename elemVec::iterator elemVec_iter;
+
+
+}
+
+#endif // DTL_VARIABLES_H
*/
#include <stdio.h>
+#include <iostream>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <getopt.h>
#include "disorder.h"
#include "md5.h"
+#include "dtl/dtl.hpp"
+#include <vector>
+
+using namespace std;
// timestamp of the form 2003-11-07T00:43:23Z
#define DATE_LENGTH 10
char *editorid;
char *comment;
char *text;
+ vector<string> last_tokens;
// track string size of the elements, to prevent O(N^2) processing in charhndl
// when we have to take strlen for every character which we append to the buffer
free(data->editorid);
free(data->comment);
free(data->text);
+ data->last_tokens.clear();
}
void cleanup_revision(revisionData *data) {
void cleanup_article(revisionData *data) {
clean_data(data, 1);
+ data->last_tokens.clear();
}
sprintf(md5_hex_output + di * 2, "%02x", digest[di]);
}
+ string text = string(data->text, data->text_size);
+ vector<string> text_tokens;
+ size_t period_pos = 0;
+ size_t paragraph_pos = 0;
+ size_t start = 0;
+ while ((period_pos = text.find(".", period_pos + 1)) != string::npos &&
+ (paragraph_pos = text.find("\n\n", paragraph_pos + 1)) != string::npos) {
+ if (paragraph_pos < period_pos) {
+ text_tokens.push_back(text.substr(start, paragraph_pos - start));
+ start = paragraph_pos;
+ } else {
+ text_tokens.push_back(text.substr(start, period_pos - start));
+ start = period_pos;
+ }
+ }
+
+ vector<string> additions;
+ vector<string> deletions;
+
+ if (data->last_tokens.empty()) {
+ data->last_tokens = text_tokens;
+ } else {
+ // do the diff
+
+ dtl::Diff< string, vector<string> > d(data->last_tokens, text_tokens);
+ //d.onOnlyEditDistance();
+ d.compose();
+
+ vector<pair<string, dtl::elemInfo> > ses_v = d.getSes().getSequence();
+ for (vector<pair<string, dtl::elemInfo> >::iterator sit=ses_v.begin(); sit!=ses_v.end(); ++sit) {
+ switch (sit->second.type) {
+ case dtl::SES_ADD:
+ cout << "ADD: \"" << sit->first << "\"" << endl;
+ additions.push_back(sit->first);
+ break;
+ case dtl::SES_DELETE:
+ cout << "DEL: \"" << sit->first << "\"" << endl;
+ deletions.push_back(sit->first);
+ break;
+ }
+ }
+
+ // apply regex to the diff
+
+
+ data->last_tokens = text_tokens;
+ }
+
+
// print line of tsv output
- printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%s\t%s\t%i\t%f\t%s\n",
+ printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%s\t%s\t%i\t%f\t%s\t%i\t%i\n",
data->title,
data->articleid,
data->revid,
(data->minor) ? "1" : "0",
(unsigned int) data->text_size,
shannon_H(data->text, data->text_size),
- md5_hex_output
+ md5_hex_output,
+ (int) additions.size(),
+ (int) deletions.size()
);
//