using System;
using System.Linq;
using System.Collections.Generic;

namespace SpreadsheetLLM.Heuristic
{
    internal partial class TableDetectionHybrid
    {
        private void OverlapPivotFilter()
        {
            // remove candidates overlaps the pivottables, and add all pivot tables to candidates
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box = _boxes[i];
                if (Utils.isOverlap(box, _sheet.pivotBoxes))
                {
                    removedBoxes.Add(box);
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);

            foreach (var pivotBox in _sheet.pivotBoxes)
            {
                _boxes.Add(pivotBox);
            }
        }

        private void MergeFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box = _boxes[i];
                if (Utils.ContainsBox(box, _sheet.mergeBoxes, 2) && Utils.ContainsBox(_sheet.mergeBoxes, box, 2) || _sheet.mergeBoxes.Contains(box))
                {
                    removedBoxes.Add(box);
                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void OverlapBorderCohensionFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                if (Utils.isOverlap(box, _sheet.smallCohensionBorderRegions, exceptForward: true, exceptBackward: true, exceptSuppression: true))
                {
                    removedBoxes.Add(box);
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void OverlapCohensionFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                if (removedBoxes.Contains(box))
                {
                    continue;
                }
                if (Utils.isOverlap(box, _sheet.conhensionRegions, exceptForward: true, exceptBackward: true))
                {
                    //foreach (var forcedRegion in sheet.forcedConhensionRegions)
                    //{
                    //    if (Utils.isOverlap(forcedRegion, box) && !Utils.isContainsBox(forcedRegion, box) && !Utils.isContainsBox(box, forcedRegion)) /// && !IsPart(box, forcedRegion)
                    //    {
                    ////var headerUp = new Boundary ( box.up, box.up, box.left, box.right );
                    ////var headerLeft = new Boundary ( box.up, box.down, box.left, box.left );
                    ////if (Utils.isOverlap(forcedRegion, headerUp) && isHeaderUp(headerUp) && headerUp[1] - headerUp[0] >= 2) { continue; }
                    ////if (Utils.isOverlap(forcedRegion, headerLeft) && isHeaderLeft(headerLeft) && headerLeft[3] - headerLeft[2] >= 2) { continue; }
                    removedBoxes.Add(box);
                    //    break;
                    //}
                    //}
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void EliminateOverlaps()
        {
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count - 1; i++)
            {
                var box1 = _boxes[i];
                if (removedBoxes.Contains(box1))
                {
                    continue;
                }
                var removedBoxes1 = new HashSet<Boundary>();
                bool markRemoval = false;
                for (int j = i + 1; j < _boxes.Count; j++)
                {
                    var box2 = _boxes[j];
                    if (removedBoxes.Contains(box2))
                    {
                        continue;
                    }
                    if (Utils.isOverlap(box1, box2))
                    {
                        if (Utils.AreaSize(box1) >= Utils.AreaSize(box2))
                        {
                            removedBoxes1.Add(box2);
                        }
                        else
                        {
                            markRemoval = true;
                            break;
                        }
                    }
                }
                if (markRemoval)
                {
                    removedBoxes.Add(box1);
                }
                else
                {
                    foreach (var box in removedBoxes1)
                    {
                        removedBoxes.Add(box);
                    }
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void ForcedBorderFilter()
        {
            //  preserve the _boxes same as border regions and remove the other _boxes overlap with them
            var removedBoxes = new HashSet<Boundary>();

            //  find out the _boxes same as border regions
            List<Boundary> borderRegions = new List<Boundary>();
            if (_sheet.smallCohensionBorderRegions != null)
            {
                foreach (var box2 in _sheet.smallCohensionBorderRegions)
                {
                    if (_boxes.Contains(box2))
                    {
                        borderRegions.Add(box2);
                    }
                }
            }
            // remove the other _boxes overlap with them
            foreach (var box1 in _boxes)
            {
                if (Utils.isOverlap(box1, borderRegions, exceptForward: true))

                {
                    removedBoxes.Add(box1);
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void LittleBoxesFilter()
        {
            // filter little and sparse _boxes
            var removedBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {

                // filter thin _boxes
                #region
                if (box.bottom - box.top < 1 || box.right - box.left < 1)
                {
                    removedBoxes.Add(box);
                    continue;
                }
                else if ((box.bottom - box.top < 2 || box.right - box.left < 2) && Utils.AreaSize(box) < 8)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.7)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                else if ((box.bottom - box.top < 2 || box.right - box.left < 2) && Utils.AreaSize(box) < 24)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.6)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                else if (box.bottom - box.top < 2 || box.right - box.left < 2)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.55)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                else if (box.bottom - box.top < 3 || box.right - box.left < 3)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.35)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                #endregion

                // filter small _boxes
                #region
                if (Utils.AreaSize(box) < 7)
                {
                    removedBoxes.Add(box);
                    continue;
                }
                else if ((box.bottom - box.top < 5 && box.right - box.left < 3) || (box.bottom - box.top < 3 && box.right - box.left < 5))
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.55)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                else if (box.bottom - box.top < 5 && box.right - box.left < 5)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.4)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                else if (box.bottom - box.top < 8 && box.right - box.left < 8)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.35)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                else if (box.bottom - box.top < 14 && box.right - box.left < 14)
                {
                    if (_sheet.ContentExistValueDensity(box) < 2 * 0.25)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }

                #endregion

                // filter thin _boxes with continuous empty rows/cols
                #region
                if (box.bottom - box.top == 2)
                {
                    Boundary boxWindow = new Boundary(box.top + 1, box.bottom - 1, box.left, box.right);
                    if (_sheet.sumContentExist.SubmatrixSum(boxWindow) <= 5 && _sheet.ContentExistValueDensity(box) < 2 * 0.45)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                if (box.right - box.left == 2)
                {
                    Boundary boxWindow = new Boundary(box.top, box.bottom, box.left + 1, box.right - 1);
                    if (_sheet.sumContentExist.SubmatrixSum(boxWindow) <= 5 && _sheet.ContentExistValueDensity(box) < 2 * 0.45)
                    {
                        removedBoxes.Add(box);
                        continue;
                    }
                }
                if (box.bottom - box.top > 3 && box.bottom - box.top < 4)
                {
                    for (int index = box.top + 1; index < box.bottom; index++)
                    {
                        Boundary boxWindow = new Boundary(index, index + 1, box.left, box.right);
                        if (_sheet.sumContentExist.SubmatrixSum(boxWindow) <= 3 && _sheet.ContentExistValueDensity(box) < 2 * 0.4)
                        {
                            removedBoxes.Add(box);
                            break;
                        }
                    }
                }
                if (box.right - box.left > 2 && box.right - box.left <= 4)
                {
                    for (int index = box.left + 2; index < box.right; index++)
                    {
                        Boundary boxWindow = new Boundary(box.top, box.bottom, index, index + 1);
                        if (_sheet.sumContentExist.SubmatrixSum(boxWindow) <= 3 && _sheet.ContentExistValueDensity(box) < 2 * 0.4)
                        {
                            removedBoxes.Add(box);
                            break;
                        }
                    }
                }
                if (box.right - box.left > 4 && box.right - box.left <= 7)
                {
                    for (int index = box.left + 2; index < box.right - 1; index++)
                    {
                        Boundary boxWindow = new Boundary(box.top, box.bottom, index, index + 1);
                        if (_sheet.sumContentExist.SubmatrixSum(boxWindow) <= 3 && _sheet.ContentExistValueDensity(box) < 2 * 0.5)
                        {
                            removedBoxes.Add(box);
                            break;
                        }
                    }
                }
                if (box.bottom - box.top > 4 && box.bottom - box.top <= 7)
                {
                    for (int index = box.top + 1; index < box.bottom - 1; index++)
                    {
                        Boundary boxWindow = new Boundary(index, index + 1, box.left, box.right);
                        if (_sheet.sumContentExist.SubmatrixSum(boxWindow) <= 3 && _sheet.ContentExistValueDensity(box) < 2 * 0.5)
                        {
                            removedBoxes.Add(box);
                            break;
                        }
                    }
                }
                #endregion

                //if (sheet.valueSumRange(box) / Utils.areaSize(box) < 2 * 0.2)
                //{
                //    removedBoxes.Add(box);
                //    continue;
                //}

            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void NoneBorderFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                var boxUp = new Boundary(box.top, box.top, box.left, box.right);
                var boxDown = new Boundary(box.bottom, box.bottom, box.left, box.right);
                var boxLeft = new Boundary(box.top, box.bottom, box.left, box.left);
                var boxRight = new Boundary(box.top, box.bottom, box.right, box.right);
                if (_sheet.sumContent.SubmatrixSum(boxUp) + _sheet.sumColor.SubmatrixSum(boxUp) == 0
                    || _sheet.sumContent.SubmatrixSum(boxDown) + _sheet.sumColor.SubmatrixSum(boxDown) == 0
                    || _sheet.sumContent.SubmatrixSum(boxLeft) + _sheet.sumColor.SubmatrixSum(boxLeft) == 0
                    || _sheet.sumContent.SubmatrixSum(boxRight) + _sheet.sumColor.SubmatrixSum(boxRight) == 0)
                {
                    removedBoxes.Add(box);
                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void SplittedEmptyLinesFilter()
        {// find out continuous empty rows/cols that can split the box into two irrelevant regions
            var removedBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                if (removedBoxes.Contains(box))
                {
                    continue;
                }
                if (!VerifyBoxSplit(box))
                {
                    removedBoxes.Add(box);
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        #region overlap header filter
        private void AdjoinHeaderFilter()
        {
            // two candidate _boxes, with their headers overlapping each others
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                for (int j = i + 1; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];

                    if (box1.Equals(box2)) continue;
                    //overlap
                    if (!Utils.isOverlap(box1, box2)) continue;
                    // overlap header
                    if (!((box1.top == box2.top && box2.bottom - box2.top > 4 && box1.bottom - box1.top > 4 && IsHeaderUp(new Boundary(box1.top, box1.top, Math.Min(box1.left, box2.left), Math.Max(box1.right, box2.right))))
                        || (box1.left == box2.left && box2.right - box2.left > 4 && box1.right - box1.left > 4 && IsHeaderLeft(new Boundary(Math.Min(box1.top, box2.top), Math.Max(box1.bottom, box2.bottom), box1.left, box1.left)))))
                        continue;
                    Boundary boxMerge = Utils.UnifyBox(box1, box2);
                    // there are no other boxe overlaps them
                    bool markOverlap = false;
                    foreach (var box3 in _boxes)
                    {
                        if (box1.Equals(box3) || box2.Equals(box3)) continue;
                        if (Utils.isOverlap(boxMerge, box3)) { markOverlap = true; break; }
                    }
                    // remove them and append the merged
                    if (!markOverlap)
                    {
                        if (!box1.Equals(boxMerge))
                        {
                            removedBoxes.Add(box1);
                        }
                        if (!box2.Equals(boxMerge))
                        {
                            removedBoxes.Add(box2);
                        }
                        appendBoxes.Add(boxMerge);
                    }
                }
            }

            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
        }

        private void OverlapUpHeaderFilter()
        {
            // overlap other _boxes' up header
            var removedBoxes = new HashSet<Boundary>();
            // find out upheaders of all _boxes
            List<Boundary> upHeaders = _sheet.FindoutUpheaders(this, _boxes);

            #region filter _boxes based on headers
            foreach (var box in _boxes)
            {
                foreach (var headerBox in upHeaders)
                {
                    Boundary upsideOfHeader = new Boundary(headerBox.top - 1, headerBox.bottom - 1, headerBox.left, headerBox.right);
                    // incorrectly with an upheader inside the data area, and they share the same left and right boundary lines
                    if (((upsideOfHeader.left == box.left && upsideOfHeader.right == box.right)
                        || (Math.Abs(upsideOfHeader.left - box.left) <= 1 && Math.Abs(upsideOfHeader.right - box.right) <= 1 && box.right - box.left > 5)
                        || (Math.Abs(upsideOfHeader.left - box.left) <= 2 && Math.Abs(upsideOfHeader.right - box.right) <= 2 && box.right - box.left > 10)
                        || (Math.Abs(box.bottom - upsideOfHeader.top - 1) < 2 && upsideOfHeader.right - upsideOfHeader.left > 3)
                       )
                        && Utils.isOverlap(box, upsideOfHeader) && Math.Abs(upsideOfHeader.top - box.top) > 1)// && box.left == forcedbox.left && box.right == forcedbox.right)
                    {
                        removedBoxes.Add(box);
                        break;
                    }
                    // cases that a box only overlaps the right part of the header
                    if (Math.Abs(upsideOfHeader.top + 1 - box.top) <= 1 && Utils.isOverlap(box, headerBox) && box.left >= upsideOfHeader.left + 1 && box.left <= upsideOfHeader.right - 1)
                    {
                        Boundary deviationWindow = new Boundary(headerBox.top, headerBox.bottom, box.left - 2, box.left);
                        //verify if the deviationWindow is compact
                        if (_sheet.sumContentExist.SubmatrixSum(deviationWindow) >= 6 && HeaderRate(deviationWindow) == 1)
                        {
                            removedBoxes.Add(box);
                        }
                    }
                    // cases that a box only overlaps left part of the header 
                    if (Math.Abs(upsideOfHeader.top + 1 - box.top) <= 1 && Utils.isOverlap(box, headerBox) && box.right >= upsideOfHeader.left + 1 && box.right <= upsideOfHeader.right - 1)
                    {
                        Boundary deviationWindow = new Boundary(headerBox.top, headerBox.bottom, box.right, box.right + 2);
                        //verify if the deviationWindow is compact
                        if (_sheet.sumContentExist.SubmatrixSum(deviationWindow) >= 6 && HeaderRate(deviationWindow) == 1)
                        {
                            removedBoxes.Add(box);
                        }
                    }
                }
            }
            #endregion
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void OverlapHeaderFilter()
        {
            // overlap other _boxes' up header in the top or bottom region of this box
            // overlap other _boxes' left header in the left or right region of this box
            var removedBoxes = new HashSet<Boundary>();

            // find out upheaders of all _boxes
            List<Boundary> upHeaders = _sheet.FindoutUpheaders(this, _boxes);
            List<Boundary> leftHeaders = _sheet.FindoutLeftheaders(this, _boxes);

            foreach (var box in _boxes)
            {
                foreach (var upHeader in upHeaders)
                {
                    // the bottom of the box overlap the whole header box with left and right edge
                    if (!Utils.ContainsBox(box, upHeader) && Utils.isOverlap(new Boundary(Math.Max(box.bottom - 4, box.top), box.bottom, box.left - 1, box.right - 1), upHeader))
                    {
                        bool markAlternativeBox = false;
                        //there exists alternative candidates dont overlap this header
                        foreach (var box2 in _boxes)
                        {
                            if (Utils.isOverlap(box, box2) && !Utils.isOverlap(box2, upHeader) && (Math.Abs(box.right - box2.right) < 2) && (Math.Abs(box.left - box2.left) < 2))
                            {
                                markAlternativeBox = true;
                            }
                        }
                        if (markAlternativeBox)
                        {
                            removedBoxes.Add(box);
                            break;
                        }
                    }
                }
                foreach (var leftHeader in leftHeaders)
                {
                    if (!Utils.ContainsBox(box, leftHeader) && Utils.isOverlap(new Boundary(box.top - 1, box.bottom - 1, Math.Max(box.right - 5, box.left), box.right), leftHeader))
                    {
                        // find our if there are alternatives that overlap this box but not overlap forcedbox
                        bool markExistAlternative = false;
                        foreach (var box2 in _boxes)
                        {
                            if (Utils.isOverlap(box, box2) && !Utils.isOverlap(box2, leftHeader))
                            {
                                markExistAlternative = true;
                            }
                        }
                        if (markExistAlternative)
                        {
                            removedBoxes.Add(box);
                            break;
                        }
                    }
                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }
        #endregion

        #region general filter related
        private bool GeneralFilter(Boundary box)
        {
            if (box.bottom - box.top < 1 || box.right - box.left < 1)
            {
                return false;
            }

            // border out edge sparse
            if (!VerifyBoxBorderValueInOutSimple(box))
            {
                return false;
            }
            else if (!VerifyBoxBorderValueOutSparse(box))
            {
                return false;
            }
            //// cause wu sha for null line
            else if (!VerifyBoxBorderValueNotNull(box))
            {
                return false;
            }

            // continuous none-content line split the box into two not related regions
            else if (!VerifyBoxSplit(box))
            {
                return false;
            }

            return true;
        }

        private bool VerifyBoxBorderValueNotNull(Boundary box)
        {
            var boxUp = new Boundary(box.top, box.top, box.left, box.right);
            var boxDown = new Boundary(box.bottom, box.bottom, box.left, box.right);
            var boxLeft = new Boundary(box.top, box.bottom, box.left, box.left);
            var boxRight = new Boundary(box.top, box.bottom, box.right, box.right);
            if (_sheet.sumContent.SubmatrixSum(boxUp) + _sheet.sumColor.SubmatrixSum(boxUp) == 0
                || _sheet.sumContent.SubmatrixSum(boxDown) + _sheet.sumColor.SubmatrixSum(boxDown) == 0
                || _sheet.sumContent.SubmatrixSum(boxLeft) + _sheet.sumColor.SubmatrixSum(boxLeft) == 0
                || _sheet.sumContent.SubmatrixSum(boxRight) + _sheet.sumColor.SubmatrixSum(boxRight) == 0)
            {
                return false;
            }
            return true;
        }

        private bool VerifyBoxBorderValueInOutSimple(Boundary box)
        {
            // border inside not none, outside sparse
            var boxUp = new Boundary(box.top - 1, box.top - 1, box.left, box.right);
            int sumUp = _sheet.sumContentExist.SubmatrixSum(boxUp);
            if (sumUp >= 6)
            {
                return false;
            }
            var boxDown = new Boundary(box.bottom + 1, box.bottom + 1, box.left, box.right);
            int sumDown = _sheet.sumContentExist.SubmatrixSum(boxDown);
            if (sumDown >= 6)
            {
                return false;
            }
            var boxLeft = new Boundary(box.top, box.bottom, box.left - 1, box.left - 1);
            int sumLeft = _sheet.sumContentExist.SubmatrixSum(boxLeft);
            if (sumLeft >= 6)
            {
                return false;
            }
            var boxRight = new Boundary(box.top, box.bottom, box.right + 1, box.right + 1);
            int sumRight = _sheet.sumContentExist.SubmatrixSum(boxRight);
            if (sumRight >= 6)
            {
                return false;
            }

            boxUp = new Boundary(box.top, box.top, box.left, box.right);
            boxDown = new Boundary(box.bottom, box.bottom, box.left, box.right);
            boxLeft = new Boundary(box.top, box.bottom, box.left, box.left);
            boxRight = new Boundary(box.top, box.bottom, box.right, box.right);
            if (_sheet.sumContent.SubmatrixSum(boxUp) + _sheet.sumColor.SubmatrixSum(boxUp) == 0
                || _sheet.sumContent.SubmatrixSum(boxDown) + _sheet.sumColor.SubmatrixSum(boxDown) == 0
                || _sheet.sumContent.SubmatrixSum(boxLeft) + _sheet.sumColor.SubmatrixSum(boxLeft) == 0
                || _sheet.sumContent.SubmatrixSum(boxRight) + _sheet.sumColor.SubmatrixSum(boxRight) == 0)
            {
                return false;
            }
            return true;
        }

        private bool VerifyBoxBorderValueOutSparse(Boundary box)
        {
            var boxUp = new Boundary(box.top - 1, box.top - 1, box.left, box.right);
            var boxDown = new Boundary(box.bottom + 1, box.bottom + 1, box.left, box.right);
            var boxLeft = new Boundary(box.top, box.bottom, box.left - 1, box.left - 1);
            var boxRight = new Boundary(box.top, box.bottom, box.right + 1, box.right + 1);
            int sumUp = _sheet.sumContentExist.SubmatrixSum(boxUp);
            int sumRight = _sheet.sumContentExist.SubmatrixSum(boxRight);
            int sumDown = _sheet.sumContentExist.SubmatrixSum(boxDown);
            int sumLeft = _sheet.sumContentExist.SubmatrixSum(boxLeft);
            if (sumUp >= 6 || sumDown >= 6 || sumLeft >= 6 || sumRight >= 6)
            {
                return false;
            }
            if (box.bottom - box.top <= 2)
            {
                if (sumLeft >= 2 || sumRight >= 2)
                {
                    return false;
                }

            }
            if (box.right - box.left <= 1)
            {
                if (sumUp >= 2 || sumDown >= 2)
                {
                    return false;
                }
            }
            if (box.bottom - box.top <= 4)
            {
                if (sumLeft >= 4 || sumRight >= 4)
                {
                    return false;
                }

            }
            if (box.right - box.left <= 4)
            {
                if (sumUp >= 4 || sumDown >= 4)
                {
                    return false;
                }
            }

            return true;
        }

        private bool VerifyBoxSplit(Boundary box)
        {
            // find out continuous empty rows/cols that can split the box into two irrelevant regions
            int up = box.top;
            int down = box.bottom;
            int left = box.left;
            int right = box.right;
            // avoid up header, so from up + 2
            int upOffset = 0;
            int leftOffset = 0;
            if (box.bottom - box.top > 12) upOffset = 2;
            if (box.right - box.left > 12) leftOffset = 2;
            for (int i = up + 3 + upOffset; i < down - 4; i++)
            {
                // one row without format and  three continuous rows without contents
                Boundary edgeBox3 = new Boundary(i, i + 2, left, right);
                Boundary edgeBox1 = new Boundary(i + 1, i + 1, left, right);

                if (_sheet.sumContent.SubmatrixSum(edgeBox1) < 3)
                {
                    if (_sheet.sumContent.SubmatrixSum(edgeBox1) + _sheet.sumColor.SubmatrixSum(edgeBox1) == 0 && _sheet.sumContentExist.SubmatrixSum(edgeBox3) == 0)
                    {
                        #region find out the empty rows which are not empty in the upside and downside
                        int k = i + 3;
                        Boundary edgeBoxDown = new Boundary(k, k, left, right);
                        while (k < down)
                        {
                            edgeBoxDown = new Boundary(k, k, left, right);
                            if (_sheet.sumContent.SubmatrixSum(edgeBoxDown) > 5) break;
                            k++;
                        }
                        k = i - 1;
                        Boundary edgeBoxUp = new Boundary(k, k, left, right);
                        while (k > up)
                        {
                            edgeBoxUp = new Boundary(k, k, left, right);
                            if (_sheet.sumContent.SubmatrixSum(edgeBoxUp) > 5) break;
                            k--;
                        }
                        #endregion
                        // verify the relation of the up and down rows
                        /////// may exist some problem, may remove the right box
                        if (_sheet.sumContentExist.SubmatrixSum(edgeBoxUp) > 5 && _sheet.sumContentExist.SubmatrixSum(edgeBoxDown) > 5)
                        {
                            return false;
                        }
                    }
                    else if (_sheet.sumColor.SubmatrixSum(edgeBox1) + _sheet.sumBorderCol.SubmatrixSum(edgeBox1) < 5 && !Utils.isOverlap(edgeBox1, _sheet.conhensionRegions, exceptForward: true))
                    {
                        #region homogeneous of four corner regions in the box
                        Boundary BoxUpLeft = new Boundary(up, i + 1, left, left + 2);
                        Boundary BoxUpRight = new Boundary(up, i + 1, right - 2, right);
                        Boundary BoxDownLeft = new Boundary(i + 1, down, left, left + 2);
                        Boundary BoxDownRight = new Boundary(i + 1, down, right - 2, right);

                        double densityUpLeft = (_sheet.sumContent.SubmatrixSum(BoxUpLeft) + _sheet.sumColor.SubmatrixSum(BoxUpLeft) + _sheet.sumBorderCol.SubmatrixSum(BoxUpLeft)) / Utils.AreaSize(BoxUpLeft);
                        double densityUpRight = (_sheet.sumContent.SubmatrixSum(BoxUpRight) + _sheet.sumColor.SubmatrixSum(BoxUpRight) + _sheet.sumBorderCol.SubmatrixSum(BoxUpRight)) / Utils.AreaSize(BoxUpRight);
                        double densityDownLeft = (_sheet.sumContent.SubmatrixSum(BoxDownLeft) + _sheet.sumColor.SubmatrixSum(BoxDownLeft) + _sheet.sumBorderCol.SubmatrixSum(BoxDownLeft)) / Utils.AreaSize(BoxDownLeft);
                        double densityDownRight = (_sheet.sumContent.SubmatrixSum(BoxDownRight) + _sheet.sumColor.SubmatrixSum(BoxDownRight) + _sheet.sumBorderCol.SubmatrixSum(BoxDownRight)) / Utils.AreaSize(BoxDownRight);

                        if (densityUpLeft == 0 && densityDownLeft > 2 * 0.2)
                        {
                            return false;
                        }
                        if (densityUpRight == 0 && densityDownRight > 2 * 0.2)
                        {
                            return false;
                        }
                        if (densityDownLeft == 0 && densityUpLeft > 2 * 0.2)
                        {
                            return false;
                        }
                        if (densityDownRight == 0 && densityUpRight > 2 * 0.2)
                        {
                            return false;
                        }
                        #endregion
                    }
                }
            }
            for (int i = left + 3 + leftOffset; i < right - 4; i++)
            {
                Boundary edgeBox3 = new Boundary(up, down, i, i + 2);
                Boundary edgeBox1 = new Boundary(up, down, i + 1, i + 1);
                if (_sheet.sumContent.SubmatrixSum(edgeBox1) < 3)
                {

                    if (_sheet.sumContent.SubmatrixSum(edgeBox1) + _sheet.sumColor.SubmatrixSum(edgeBox1) == 0 && _sheet.sumContentExist.SubmatrixSum(edgeBox3) == 0)
                    {
                        #region find out the empty columns which are not empty in the leftside and rightside
                        int k = i + 3;
                        Boundary edgeBoxRight = new Boundary(up, down, k, k);
                        while (k < down)
                        {
                            edgeBoxRight = new Boundary(up, down, k, k);
                            if (_sheet.sumContent.SubmatrixSum(edgeBoxRight) > 5) break;
                            k++;
                        }

                        k = i - 1;
                        Boundary edgeBoxLeft = new Boundary(up, down, k, k);
                        while (k > up)
                        {
                            edgeBoxLeft = new Boundary(up, down, k, k);
                            if (_sheet.sumContent.SubmatrixSum(edgeBoxLeft) > 5) break;
                            k--;
                        }
                        #endregion
                        if (edgeBoxRight.right - edgeBoxLeft.right >= 3)
                        {
                            return false;
                        }

                    }
                    #region homogeneous of four corner regions in the box
                    else if (_sheet.sumColor.SubmatrixSum(edgeBox1) + _sheet.sumBorderRow.SubmatrixSum(edgeBox1) < 5 && !Utils.isOverlap(edgeBox1, _sheet.conhensionRegions, exceptForward: true))
                    {
                        Boundary BoxUpLeft = new Boundary(up, up + 2, left, i + 1);
                        Boundary BoxUpRight = new Boundary(up, up + 2, i + 1, right);
                        Boundary BoxDownLeft = new Boundary(down - 2, down, left, i + 1);
                        Boundary BoxDownRight = new Boundary(down - 2, down, i + 1, right);

                        double densityUpLeft = (_sheet.sumContent.SubmatrixSum(BoxUpLeft) + _sheet.sumColor.SubmatrixSum(BoxUpLeft) + _sheet.sumBorderRow.SubmatrixSum(BoxUpLeft)) / Utils.AreaSize(BoxUpLeft);
                        double densityUpRight = (_sheet.sumContent.SubmatrixSum(BoxUpRight) + _sheet.sumColor.SubmatrixSum(BoxUpRight) + _sheet.sumBorderRow.SubmatrixSum(BoxUpRight)) / Utils.AreaSize(BoxUpRight);
                        double densityDownLeft = (_sheet.sumContent.SubmatrixSum(BoxDownLeft) + _sheet.sumColor.SubmatrixSum(BoxDownLeft) + _sheet.sumBorderRow.SubmatrixSum(BoxDownLeft)) / Utils.AreaSize(BoxDownLeft);
                        double densityDownRight = (_sheet.sumContent.SubmatrixSum(BoxDownRight) + _sheet.sumColor.SubmatrixSum(BoxDownRight) + _sheet.sumBorderRow.SubmatrixSum(BoxDownRight)) / Utils.AreaSize(BoxDownRight);

                        if (densityUpLeft == 0 && densityUpRight / Utils.AreaSize(BoxUpRight) > 2 * 0.2)
                        {
                            return false;
                        }
                        if (densityUpRight == 0 && densityUpLeft / Utils.AreaSize(BoxUpLeft) > 2 * 0.2)
                        {
                            return false;
                        }
                        if (densityDownLeft == 0 && densityDownRight / Utils.AreaSize(BoxDownRight) > 2 * 0.2)
                        {
                            return false;
                        }
                        if (densityDownRight == 0 && densityDownLeft / Utils.AreaSize(BoxDownLeft) > 2 * 0.2)
                        {
                            return false;
                        }
                    }
                    #endregion
                }
            }

            return true;
        }
        #endregion

        #region suppression filter related
        private int CompareSuppressionBoxesHeader(Boundary box1, Boundary box2)
        {
            // compare if the up / down / left / right boundary contains header
            if (box1.top != box2.top)
            {
                bool isHeaderUp1 = IsHeaderUp(Utils.UpRow(box1));
                bool isHeaderUp2_1 = IsHeaderUp(Utils.UpRow(box2));
                bool isHeaderUp2_2 = IsHeaderUp(Utils.UpRow(box2, start: 1));
                if (isHeaderUp1 && !isHeaderUp2_1)
                {
                    return 1;
                }
                if (!isHeaderUp1 && (isHeaderUp2_1 || isHeaderUp2_2))
                {
                    return 2;
                }
            }
            if (box1.left != box2.left)
            {
                bool isHeaderLeft1 = IsHeaderLeft(Utils.LeftCol(box1));
                bool isHeaderLeft2 = IsHeaderLeft(Utils.LeftCol(box2));
                if (isHeaderLeft1 && !isHeaderLeft2)
                {
                    return 1;
                }
                if (!isHeaderLeft1 && isHeaderLeft2)
                {
                    return 2;
                }

            }
            if (box1.bottom != box2.bottom)
            {
                if (IsHeaderUp(Utils.DownRow(box1)) && !IsHeaderUp(Utils.DownRow(box2))
                    && _sheet.ContentExistValueDensity(Utils.DownRow(box2, start: -1)) < 0.2 * 2)
                {
                    return 2;
                }

            }
            if (box1.right != box2.right)
            {
                if (IsHeaderLeft(Utils.RightCol(box1)) && !IsHeaderLeft(Utils.RightCol(box2))
                    && _sheet.ContentExistValueDensity(Utils.RightCol(box2, start: -1)) < 0.2 * 2)
                {
                    return 2;
                }

            }
            return 0;
        }

        private int CompareSuppressionBoxesMerge(Boundary box1, Boundary box2)
        {
            // compare if the down/right boundary overlaps merge cells
            if (_sheet.ExistsMerged(Utils.RightCol(box1, step: 2)) && !_sheet.ExistsMerged(Utils.RightCol(box2, step: 2)))
            {
                return 2;
            }
            if (_sheet.ExistsMerged(Utils.DownRow(box1, step: 2)) && !_sheet.ExistsMerged(Utils.DownRow(box2, step: 2)))
            {
                return 2;
            }

            return 0;
        }

        private int CompareSuppressionBoxesSparsity(Boundary box1, Boundary box2)
        {
            // check if the boundaries are sparse
            Boundary boxUp = Utils.UpRow(box1);
            Boundary boxDown = Utils.DownRow(box1);
            Boundary boxLeft = Utils.LeftCol(box1);
            Boundary boxRight = Utils.RightCol(box1);

            // verify the very up/down/left/right first
            if (box1.top != box2.top && CheckSparsityofUpRow(boxUp, 1) == 2)
            {
                return 2;
            }
            if (box1.bottom != box2.bottom && CheckSparsityofDownRow(boxDown, 1) == 2)
            {
                return 2;
            }
            if (box1.left != box2.left && CheckSparsityofCol(boxLeft, 1) == 2)
            {
                return 2;
            }
            if (box1.right != box2.right && CheckSparsityofCol(boxRight, 1) == 2)
            {
                return 2;
            }

            for (int i = 0; i < box2.top - box1.top; i++)
            {
                Boundary boxUp1 = Utils.UpRow(box1, start: i);
                int valid = CheckSparsityofUpRow(boxUp1, i - box1.top + 1);
                if (valid != 0)
                {
                    return valid;
                }
            }
            for (int i = 0; i > box2.bottom - box1.bottom; i--)
            {
                Boundary boxDown1 = Utils.DownRow(box1, start: i);
                int valid = CheckSparsityofDownRow(boxDown1, -i + 1);
                if (valid != 0)
                {
                    return valid;
                }
            }

            for (int i = 0; i < box2.left - box1.left; i++)
            {
                Boundary boxLeft1 = Utils.LeftCol(box1, i);
                int valid = CheckSparsityofCol(boxLeft1, i + 1);
                if (valid != 0)
                {
                    return valid;
                }
            }
            for (int i = 0; i > box2.right - box1.right; i--)
            {
                Boundary boxRight1 = Utils.RightCol(box1, i);
                int valid = CheckSparsityofCol(boxRight1, -i + 1);
                if (valid != 0)
                {
                    return valid;
                }
            }

            return 0;
        }

        private int CompareSuppressionBoxes(Boundary box1, Boundary box2)
        {
            if (box1.Equals(box2)) return 0;

            // compare if the up / down / left / right boundary contains header
            int compareHeader = CompareSuppressionBoxesHeader(box1, box2);
            if (compareHeader != 0)
            {
                return compareHeader;
            }
            // check if the boundaries are sparse
            int comparValid = CompareSuppressionBoxesSparsity(box1, box2);
            if (comparValid != 0)
            {
                return comparValid;
            }

            // compare if the down/right boundary overlaps merge cells
            int compareMerege = CompareSuppressionBoxesMerge(box1, box2);
            if (compareMerege != 0)
            {
                return compareMerege;
            }
            return 0;
        }

        private void SuppressionSoftFilter()
        {
            // resolve suppression conficts of candidats _boxes

            // resolve the four directions of suppression separetely, respect to the order up, down, left, right
            //suppressionSoftFilter(direction: 0);
            //suppressionSoftFilter(direction: 1);
            //suppressionSoftFilter(direction: 2);
            //suppressionSoftFilter(direction: 3);

            // // resolve the four directions of suppression together
            SuppressionSoftFilter(ref _boxes, direction: 4);
        }

        private void SuppressionSoftFilter(ref List<Boundary> _, int direction)
        {
            // resolve suppression conficts of candidats _boxes
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                if (removedBoxes.Contains(box1))
                {
                    continue;
                }

                for (int j = i + 1; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (removedBoxes.Contains(box2))
                    {
                        continue;
                    }
                    if (!Utils.isSuppressionBox(box1, box2, directionNum: direction) || box1.Equals(box2) || !Utils.isOverlap(box1, box2))
                    {
                        continue;
                    }

                    if (Utils.Height(box2) < 0.6 * Utils.Height(box1) || Utils.Height(box1) < 0.6 * Utils.Height(box2)
                        || Utils.Width(box2) < 0.6 * Utils.Width(box1) || Utils.Width(box1) < 0.6 * Utils.Width(box2))
                    {
                        continue;
                    }
                    #region compare two _boxes

                    Boundary boxSuppresion1;
                    Boundary boxSuppresion2;
                    if (direction == 4)
                    {
                        boxSuppresion1 = Utils.OverlapBox(box1, box2);
                        boxSuppresion2 = Utils.OverlapBox(box1, box2);
                    }
                    else
                    {
                        boxSuppresion1 = box1;
                        boxSuppresion2 = box2;
                        boxSuppresion1[direction] = box1[direction];
                        boxSuppresion2[direction] = box2[direction];

                    }

                    // 0 means the default value, 1 means choose the first one, 2 means for the second one 
                    int compareReuslt1 = CompareSuppressionBoxes(box1, boxSuppresion1);
                    int compareReuslt2 = compareReuslt1 == 0 ? CompareSuppressionBoxes(box2, boxSuppresion2) : 0;

                    #endregion
                    if (compareReuslt1 == 1 || compareReuslt2 == 2)
                    {
                        removedBoxes.Add(box2);

                    }
                    else if (compareReuslt1 == 2 || compareReuslt2 == 1)
                    {
                        removedBoxes.Add(box1);
                        break;
                    }


                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void SuppressionHardFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                if (removedBoxes.Contains(box1)) continue;
                for (int j = i + 1; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (removedBoxes.Contains(box2)) continue;
                    if (Utils.Height(box2) < 0.6 * Utils.Height(box1) || Utils.Height(box1) < 0.6 * Utils.Height(box2)
                         || Utils.Width(box2) < 0.6 * Utils.Width(box1) || Utils.Width(box1) < 0.6 * Utils.Width(box2))
                    {
                        continue;
                    }
                    if (box1.Equals(box2) || !Utils.isSuppressionBox(box1, box2)) continue;
                    if (_sheet.ComputeBorderDiffsRow(box1) >= _sheet.ComputeBorderDiffsRow(box2)
                        || _sheet.ComputeBorderDiffsCol(box1) >= _sheet.ComputeBorderDiffsCol(box2))
                    {
                        removedBoxes.Add(box2);
                    }
                    else
                    {
                        removedBoxes.Add(box1);
                    }
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private int CheckSparsityofCol(Boundary box, int depth)
        {
            double areaCol = Utils.AreaSize(box);
            if (_sheet.ContentExistValueDensity(box) >= 2 * 0.3
                && _sheet.TextDistinctCount(box) >= Math.Max(areaCol * 0.2, 3))
            {
                return 1;
            }
            if (depth == 1 && _sheet.sumContentExist.SubmatrixSum(box) <= 4 && areaCol >= 5)
            {
                return 2;
            }

            return 0;
        }

        private int CheckSparsityofUpRow(Boundary box, int depth)
        {
            double areaRow = Utils.AreaSize(box);
            if (IsHeaderUp(box)) return 1;
            if (_sheet.ContentExistValueDensity(box) >= 2 * 0.3
                && _sheet.TextDistinctCount(box) >= Math.Max(0.2 * areaRow, 3))
            {
                return 1;
            }
            if (depth == 1 && (_sheet.sumContentExist.SubmatrixSum(box) <= 4) && areaRow >= 6)
            {
                return 2;
            }
            if (depth == 1 && (_sheet.sumContentExist.SubmatrixSum(box) <= 6) && areaRow >= 10)
            {
                return 2;
            }

            return 0;
        }

        private int CheckSparsityofDownRow(Boundary box, int depth)
        {
            double areaRow = Utils.AreaSize(box);
            if (_sheet.ContentExistValueDensity(box) >= 2 * 0.3
                && _sheet.TextDistinctCount(box) >= Math.Max(areaRow * 0.2, 3))
            {
                return 1;
            }
            if (depth == 1 && (_sheet.sumContentExist.SubmatrixSum(box) <= 4 || _sheet.TextDistinctCount(box) < 2) && areaRow >= 6)
            {
                return 2;
            }
            if (depth == 1 && (_sheet.sumContentExist.SubmatrixSum(box) <= 6
                || (_sheet.ContentExistValueDensity(box) <= 0.3 * 2 && _sheet.TextDistinctCount(box) < 3))
                && areaRow >= 10)
            {
                return 2;
            }

            return 0;
        }
        #endregion

        #region various contains filters
        private void NestingCombinationFilter()
        {
            // in nesting combination cases, filter the intermediate candidates

            List<Boundary> removedBoxes = new List<Boundary>();

            // vertival
            foreach (int left in _sheet.colBoundaryLines)
            {
                foreach (int right in _sheet.colBoundaryLines)
                {
                    if (left >= right)
                    {
                        continue;
                    }
                    List<Boundary> UpDownBoxes = new List<Boundary>();
                    foreach (var box in _boxes)
                    {
                        if (box.left >= left - 1 && box.left <= left + 3 && box.right >= right - 1 && box.right <= right + 3)
                        {
                            UpDownBoxes.Add(box);
                        }
                    }
                    removedBoxes.AddRange(FindInterCandidates(UpDownBoxes));

                }
            }

            // horizontal
            foreach (int up in _sheet.rowBoundaryLines)
            {
                foreach (int down in _sheet.rowBoundaryLines)
                {
                    if (up >= down) continue;
                    List<Boundary> LeftRightBoxes = new List<Boundary>();
                    foreach (var box in _boxes)
                    {
                        if (box.top >= up - 1 && box.bottom >= down - 1 && box.top <= up + 3 && box.bottom <= down + 3)
                        {
                            LeftRightBoxes.Add(box);
                        }
                    }
                    removedBoxes.AddRange(FindInterCandidates(LeftRightBoxes));
                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private static List<Boundary> FindInterCandidates(List<Boundary> ranges)
        {
            List<Boundary> iterBoxes = new List<Boundary>();
            foreach (var box in ranges)
            {
                bool markIn = false;
                bool markOut = false;
                foreach (var box2 in ranges)
                {
                    if (!box.Equals(box2))
                    {
                        if (Utils.ContainsBox(box2, box, step: 2))
                        {
                            markIn = true;
                        }
                        if (Utils.ContainsBox(box, box2, step: 2))
                        {
                            markOut = true;
                        }
                    }
                }
                if (markIn && markOut)
                {
                    iterBoxes.Add(box);
                }
            }
            return iterBoxes;
        }

        private void HeaderPriorityFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (box1.Equals(box2) || !Utils.isOverlap(box1, box2) || Utils.ContainsBox(box2, box1, 2)) continue;
                    // with similar up and down boudaries

                    if (Math.Abs(box1.top - box2.top) > 2 || Math.Abs(box1.bottom - box2.bottom) > 2)
                    {
                        continue;
                    }
                    // header are closed to each other
                    if (Math.Abs(box1.left - box2.left) <= 1) { continue; }
                    Boundary box1Left = Utils.LeftCol(box1);
                    Boundary box2Left = Utils.LeftCol(box2);
                    if (Utils.ContainsBox(box1, box2) && IsHeaderUp(box2) && !IsHeaderUp(new Boundary(box1.top, box1.top, box2.left, box2.right)))
                    {
                        removedBoxes.Add(box1);
                    }
                    else if (Utils.ContainsBox(box1, box2) && box2.left - box1.left > 3 && IsHeaderUp(box2) && !IsHeaderUp(new Boundary(box1.top, box1.top, box1.left, box2.left - 1)))
                    {
                        removedBoxes.Add(box1);
                    }
                    else if (IsHeaderLeft(box1Left) && !IsHeaderLeft(box2Left))
                    {
                        removedBoxes.Add(box2);
                    }
                    else if (IsHeaderLeft(box2Left) && !IsHeaderLeft(box1Left))
                    {
                        removedBoxes.Add(box1);
                        break;
                    }
                    else
                    {
                        continue;
                    }
                }
            }
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (box2.Equals(box1) || !Utils.isOverlap(box1, box2)) continue;
                    if (Math.Abs(box1.left - box2.left) > 2 || Math.Abs(box1.right - box2.right) > 2)
                    {
                        continue;
                    }
                    // header are closed to each other
                    if (Math.Abs(box1.top - box2.top) <= 1) continue;

                    Boundary box1Up = Utils.UpRow(box1);
                    Boundary box2Up = Utils.UpRow(box2);
                    if (IsHeaderUp(box1Up) && !IsHeaderUp(box2Up))
                    {
                        removedBoxes.Add(box2);
                    }
                    else if (IsHeaderUp(box2Up) && !IsHeaderUp(box1Up))
                    {
                        removedBoxes.Add(box1);
                        break;
                    }
                    else
                    {
                        continue;
                    }


                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void PairAlikeContainsFilter()
        {
            // solve the contradict containing  pairs with same similar left-right boundaries or similar up-down boundaries
            var removedBoxes = new HashSet<Boundary>();
            //vertical
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                for (int j = 0; j < _boxes.Count; j++)
                {
                    // find a box so as to box1 contains box2 and shares left and right header
                    Boundary box2 = _boxes[j];
                    if (!Utils.ContainsBox(box1, box2, 1) || box1.bottom <= box2.bottom) continue;
                    // similar left right border
                    if (Math.Abs(box1.left - box2.left) > 2 || Math.Abs(box1.right - box2.right) > 2
                        || box1.right - box1.left >= 2 * (box2.right - box2.left) || box1.right - box1.left <= 0.5 * (box2.right - box2.left))
                    {
                        continue;
                    }
                    // similar up heaer
                    if (Math.Abs(box1.top - box2.top) > 2)
                    {
                        continue;
                    }
                    int cntInside = 0;

                    // the remaining box that excluded box2 from box1 
                    Boundary remainBox = new Boundary(box2.bottom + 1, box1.bottom, box1.left, box1.right);
                    while (remainBox.top < remainBox.bottom && _sheet.sumContentExist.SubmatrixSum(Utils.UpRow(remainBox)) < 3)
                    {
                        remainBox.top = remainBox.top + 1;
                    }
                    // count the closed related _boxes
                    for (int k = 0; k < _boxes.Count; k++)
                    {
                        Boundary box3 = _boxes[k];
                        if (removedBoxes.Contains(box3)) { continue; }
                        if (Utils.isOverlap(box2, box3) || box1.Equals(box3) || box2.Equals(box3)) { continue; }
                        if (Utils.ContainsBox(box3, remainBox, 2) || (Utils.ContainsBox(remainBox, box3, 2) && IsHeaderUp(Utils.UpRow(remainBox))))
                        {
                            cntInside += 1;
                            break;
                        }

                        if ((Math.Abs(box3.left - box1.left) <= 2 || Math.Abs(box3.right - box1.right) <= 2) && Utils.ContainsBox(box1, box3, 1))
                        {
                            cntInside += 1;
                            break;
                        }
                    }

                    if (cntInside == 0)
                    {
                        Boundary box2Bottom = new Boundary(Math.Max(box2.top, box2.bottom - 12), box2.bottom, box2.left, box2.right);
                        if (!IsHeaderLeft(Utils.LeftCol(remainBox)) && _sheet.ContentExistValueDensity(box2Bottom) > 2 * _sheet.ContentExistValueDensity(remainBox)
                            && (_sheet.ContentExistValueDensity(remainBox) > 2 * 0.25 || _sheet.ContentExistValueDensity(box2Bottom) > 2 * 0.5))
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (box2.bottom - box2.top >= 4 && _sheet.ContentExistValueDensity(remainBox) < 2 * 0.25 && _sheet.ContentExistValueDensity(box2Bottom) > 2 * 0.5)
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (box2.bottom - box2.top >= 4 && _sheet.ContentExistValueDensity(remainBox) < 2 * 0.1 && _sheet.ContentExistValueDensity(box2Bottom) > 2 * 0.35)
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (box1.bottom - box2.bottom <= 4 && box2.bottom - box2.top >= 5 && IsHeaderLeft(Utils.LeftCol(box2)) && !IsHeaderLeft(Utils.LeftCol(box1)))
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (IsHeaderUp(Utils.DownRow(box1)) && _sheet.ContentExistValueDensity(Utils.DownRow(box1, start: 1)) < 0.2 * 2
                            && !IsHeaderUp(Utils.DownRow(box1, start: 3)) && !IsHeaderUp(Utils.DownRow(box1, start: 2)))
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (box1.bottom - box2.bottom <= 4 && box2.bottom - box2.top >= 5 && remainBox.right - remainBox.left >= 4
                            && _sheet.sumContentExist.SubmatrixSum(remainBox) / Utils.Width(remainBox) <= 4
                            && _sheet.ContentExistValueDensity(remainBox) < 2 * 0.45 && _sheet.ContentExistValueDensity(box2Bottom) > 2 * 0.6)
                        {
                            removedBoxes.Add(box1);
                        }
                        else
                        {
                            removedBoxes.Add(box2);
                        }

                    }
                }

            }

            // horizontal
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (!Utils.ContainsBox(box1, box2, 1)) continue;
                    if (Math.Abs(box1.top - box2.top) > 2 || Math.Abs(box1.bottom - box2.bottom) > 2
                        || box1.bottom - box1.top >= 2 * (box2.bottom - box2.top) || box1.bottom - box1.top <= 0.5 * (box2.bottom - box2.top)
                        || Math.Abs(box1.left - box2.left) > 2 || box1.right <= box2.right)
                    {
                        continue;
                    }
                    int cntInside = 0;
                    Boundary remainBox = new Boundary(box1.top, box1.bottom, box2.right + 1, box1.right);
                    while (remainBox.left < remainBox.right && _sheet.sumContentExist.SubmatrixSum(Utils.LeftCol(remainBox)) < 3)
                    {
                        remainBox.left = remainBox.left + 1;
                    }

                    for (int k = 0; k < _boxes.Count; k++)
                    {
                        Boundary box3 = _boxes[k];
                        if (removedBoxes.Contains(box3)) { continue; }
                        if (Utils.isOverlap(box2, box3) || box1.Equals(box3) || box2.Equals(box3)) { continue; }
                        if (Utils.ContainsBox(box3, remainBox, 2) || (Utils.ContainsBox(remainBox, box3, 2) && IsHeaderLeft(Utils.LeftCol(remainBox))))
                        {
                            cntInside = cntInside + 1;
                            break;
                        }
                        if ((Math.Abs(box3.top - box1.top) <= 2 || Math.Abs(box3.bottom - box1.bottom) <= 2) && Utils.ContainsBox(box1, box3, 1))
                        {
                            cntInside = cntInside + 1;
                            break;
                        }
                    }
                    if (cntInside == 0)
                    {

                        Boundary box2Right = new Boundary(box2.top, box2.bottom, Math.Max(box2.left, box2.right - 12), box2.right);
                        if (!IsHeaderUp(Utils.UpRow(remainBox)) && _sheet.ContentExistValueDensity(box2Right) > 2 * _sheet.ContentExistValueDensity(remainBox)
                            && (_sheet.ContentExistValueDensity(box2Right) > 0.5 || _sheet.ContentExistValueDensity(remainBox) > 0.25))
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (box2.right - box2.left >= 4 && _sheet.ContentExistValueDensity(remainBox) < 2 * 0.25 && _sheet.ContentExistValueDensity(box2Right) > 2 * 0.5)
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (box2.right - box2.left >= 4 && _sheet.ContentExistValueDensity(remainBox) < 2 * 0.1 && _sheet.ContentExistValueDensity(box2Right) > 2 * 0.35)
                        {
                            removedBoxes.Add(box1);
                        }
                        else if ((box1.right - box2.right) <= 4 && box2.right - box2.left >= 5 && IsHeaderUp(Utils.UpRow(box2)) && !IsHeaderUp(Utils.UpRow(box1)))
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (IsHeaderLeft(Utils.RightCol(box1)) && _sheet.ContentExistValueDensity(Utils.RightCol(box1, start: 1)) < 0.2 * 2
                            && !IsHeaderLeft(Utils.RightCol(box1, start: 2)) && !IsHeaderLeft(Utils.RightCol(box1, start: 3)))
                        {
                            removedBoxes.Add(box1);
                        }
                        else
                        {
                            removedBoxes.Add(box2);
                        }

                    }
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void PairContainsFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                int cntOverlap = 0;
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (Utils.isOverlap(box1, box2) && !Utils.ContainsBox(box1, box2) && !Utils.ContainsBox(box2, box1))
                    {
                        cntOverlap = cntOverlap + 1;
                        break;
                    }
                }
                if (cntOverlap != 0) continue;
                //if (removedBoxes.Contains(box1)) continue;
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    //if (removedBoxes.Contains(box2)) continue;
                    if (!Utils.ContainsBox(box1, box2) || box1.Equals(box2))
                    {
                        continue;
                    }
                    int cntInside = 0;

                    for (int k = 0; k < _boxes.Count; k++)
                    {
                        Boundary box3 = _boxes[k];
                        if (removedBoxes.Contains(box3)) continue;
                        if (box1.Equals(box3) || box2.Equals(box3)) continue;
                        if (Utils.ContainsBox(box1, box3))
                        {
                            cntInside++;
                            break;
                        }

                    }
                    if (cntInside == 0)
                    {
                        Boundary box1Up = new Boundary(box1.top, box1.top, box1.left, box1.right);
                        Boundary box2Up = new Boundary(box2.top, box2.top, box2.left, box2.right);
                        Boundary box1Left = new Boundary(box1.top, box1.bottom, box1.left, box1.left);
                        Boundary box2Left = new Boundary(box2.top, box2.bottom, box2.left, box2.left);

                        if (IsHeaderLeft(box1Left) && !IsHeaderLeft(box2Left))
                        {
                            removedBoxes.Add(box2);
                        }
                        if (IsHeaderUp(box1Up) && !IsHeaderUp(box2Up))
                        {
                            removedBoxes.Add(box2);
                        }

                        List<Boundary> remainBoxs = new List<Boundary>();
                        remainBoxs.Add(new Boundary(box2.bottom + 2, box1.bottom, box1.left, box1.right));
                        remainBoxs.Add(new Boundary(box1.top, box1.bottom, box2.right + 2, box1.right));
                        remainBoxs.Add(new Boundary(box1.top, box1.bottom, box1.left, box2.left - 2));
                        remainBoxs.Add(new Boundary(box1.top, box2.top - 2, box1.left, box1.right));

                        double maxRemainValueRate = 0;
                        foreach (var remainBox in remainBoxs)
                        {
                            if (remainBox.bottom < remainBox.top || remainBox.right < remainBox.left) continue;
                            if (Utils.AreaSize(remainBox) != 0) maxRemainValueRate = Math.Max(maxRemainValueRate, _sheet.ContentExistValueDensity(remainBox));
                        }
                        if (maxRemainValueRate < 2 * 0.3 && _sheet.ContentExistValueDensity(box2) > 2 * 0.7)
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (maxRemainValueRate < 2 * 0.2 && _sheet.ContentExistValueDensity(box2) > 2 * 0.5)
                        {
                            removedBoxes.Add(box1);
                        }
                        else if (maxRemainValueRate < 2 * 0.4 && _sheet.ContentExistValueDensity(box2) > 2 * 0.9)
                        {
                            removedBoxes.Add(box1);
                        }
                        else
                        {
                            removedBoxes.Add(box2);
                        }

                    }
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void CombineContainsHeaderFilter()
        {
            var removedBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                int cntInside = 0;
                int cntNoneHead = 0;
                //if (removedBoxes.Contains(box1)) continue;
                List<Boundary> insideBoxes = new List<Boundary>();
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    //if (removedBoxes.Contains(box2)) continue;
                    if (box2.Equals(box1) || !Utils.isOverlap(box1, box2) || Utils.ContainsBox(box2, box1, 2))//if (box2.Equals(box1) || !Utils.isContainsBox(box1, box2))
                    {
                        continue;
                    }


                    Boundary box2Left = new Boundary(box2.top, box2.bottom, box2.left, box2.left);
                    Boundary box2Up = new Boundary(box2.top, box2.top, box2.left, box2.right);
                    if (!(Math.Abs(box1.top - box2.top) > 2 || Math.Abs(box1.bottom - box2.bottom) > 2) && !IsHeaderLeft(box2Left))
                    {
                        cntNoneHead++;
                        //break;
                    }
                    else if (!(Math.Abs(box1.left - box2.left) > 2 || Math.Abs(box1.right - box2.right) > 2) && !IsHeaderUp(box2Up))
                    {
                        cntNoneHead++;
                        //break;
                    }
                    else if ((Math.Abs(box1.left - box2.left) > 2 || Math.Abs(box1.right - box2.right) > 2) && (Math.Abs(box1.top - box2.top) > 2 || Math.Abs(box1.bottom - box2.bottom) > 2) && (!IsHeaderUp(box2Up) || !IsHeaderLeft(box2Left)))
                    {
                        cntNoneHead++;
                        //break;
                    }
                    else
                    {
                        cntInside++;
                        insideBoxes.Add(box2);
                    }

                }
                if (cntInside >= 2 && Utils.IsFillBox(box1, insideBoxes))
                {
                    removedBoxes.Add(box1);

                }

            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void CombineContainsFillAreaFilterSoft()
        {
            var removedBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                int cntInside = 0;
                //if (removedBoxes.Contains(box1)) continue;
                List<Boundary> insideBoxes = new List<Boundary>();
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (box2.Equals(box1)) continue;
                    //if (removedBoxes.Contains(box2)) continue;
                    if (box2.Equals(box1) || Utils.ContainsBox(box2, box1, 2) || !Utils.isOverlap(box1, box2))//if (box2.Equals(box1) || !Utils.isContainsBox(box1, box2))
                    {
                        continue;
                    }
                    cntInside++;
                    insideBoxes.Add(box2);

                }
                double areaSizeCombine = 0;
                int valueSumInside = 0;

                foreach (var inBox in insideBoxes)
                {
                    Boundary inBoxOverlapRegion = Utils.OverlapBox(inBox, box1);
                    areaSizeCombine = areaSizeCombine + Utils.AreaSize(inBoxOverlapRegion);
                    valueSumInside += _sheet.sumContentExist.SubmatrixSum(inBoxOverlapRegion);
                }
                if (cntInside > 0 && areaSizeCombine < 0.4 * Utils.AreaSize(box1))
                {
                    if (valueSumInside < 0.7 * _sheet.sumContentExist.SubmatrixSum(box1))
                    {
                        foreach (var inBox in insideBoxes)
                        {
                            if (Utils.ContainsBox(box1, inBox, 2))
                            {
                                removedBoxes.Add(inBox);
                            }
                        }
                    }
                    if (valueSumInside > 0.85 * _sheet.sumContentExist.SubmatrixSum(box1))
                    {
                        removedBoxes.Add(box1);
                    }
                }

            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void CombineContainsFillLineFilterSoft()
        {
            var removedBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                //if (removedBoxes.Contains(box1)) continue;
                List<Boundary> insideBoxes = new List<Boundary>();
                List<int> rows = new List<int>();
                List<int> cols = new List<int>();
                List<Boundary> rowBoxes = new List<Boundary>();
                List<Boundary> colBoxes = new List<Boundary>();
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    //if (removedBoxes.Contains(box2)) continue;
                    if (!box2.Equals(box1) && Utils.isOverlap(box1, box2))//if (box2.Equals(box1) || !Utils.isContainsBox(box1, box2))
                    {
                        Boundary box2Left = new Boundary(Math.Max(box2.top, box1.top), Math.Min(box2.bottom, box1.bottom), box2.left, box2.left);
                        Boundary box2Up = new Boundary(box2.top, box2.top, Math.Max(box1.left, box2.left), Math.Min(box2.right, box1.right));
                        if (box1.right >= box1.left + 2 && !(box2.right > box1.right + 1 && box2.left < box1.left - 1) && box2.top >= box1.top + 3 && box2.bottom >= box1.bottom - 2 && box2.bottom <= box1.bottom + 2 && IsHeaderUp(box2Up))
                        {
                            colBoxes.Add(box2);
                            for (int k = box2.left - 2; k <= box2.right + 2; k++)
                            {
                                cols.Add(k);
                            }
                        }
                        if (box1.bottom >= box1.top + 2 && !(box2.bottom > box1.bottom + 1 && box2.top < box1.top - 1) && box2.left >= box1.left + 3 && box2.right >= box1.right - 2 && box2.right <= box1.right + 2 && IsHeaderLeft(box2Left))
                        {
                            rowBoxes.Add(box2);
                            for (int k = box2.top - 2; k <= box2.bottom + 2; k++)
                            {
                                rows.Add(k);
                            }
                        }
                    }

                }
                if (Utils.IsFillBoxRowColLines(box1, rows, cols))
                {
                    removedBoxes.Add(box1);
                }
                foreach (var rowBoxin1 in rowBoxes)
                {
                    Boundary rowBox1 = new Boundary(box1.top, box1.bottom, rowBoxin1.left, rowBoxin1.right);
                    List<Boundary> OverlapBoxes = new List<Boundary>();
                    foreach (var rowBoxin2 in rowBoxes)
                    {
                        if (Utils.isOverlap(rowBoxin2, rowBox1))
                        {
                            OverlapBoxes.Add(Utils.OverlapBox(rowBoxin2, rowBox1));
                        }
                    }
                    if (_sheet.sumContentExist.SubmatrixSum(rowBox1) - SheetMap.ValueSumRange(OverlapBoxes, _sheet.sumContentExist) < 10)
                    {
                        removedBoxes.Add(box1);
                    }
                }
                foreach (var colBoxin1 in colBoxes)
                {
                    Boundary colBox1 = new Boundary(colBoxin1.top, colBoxin1.bottom, box1.left, box1.right);
                    List<Boundary> OverlapBoxes = new List<Boundary>();
                    foreach (var colBoxin2 in colBoxes)
                    {
                        if (Utils.isOverlap(colBoxin2, colBox1))
                        {
                            OverlapBoxes.Add(Utils.OverlapBox(colBoxin2, colBox1));
                        }
                    }
                    if (_sheet.sumContentExist.SubmatrixSum(colBox1) - SheetMap.ValueSumRange(OverlapBoxes, _sheet.sumContentExist) < 10)
                    {
                        removedBoxes.Add(box1);
                    }
                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void CombineContainsFilterHard()
        {
            var removedBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                //if (removedBoxes.Contains(box1)) continue;
                List<int> rows = new List<int>();
                List<int> cols = new List<int>();
                List<Boundary> boxesIn = new List<Boundary>();
                bool containsHeader = false;
                int rowsCount = 0;
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];

                    //if (removedBoxes.Contains(box2)) continue;
                    if (!Utils.isOverlap(box1, box2) || Utils.ContainsBox(box2, box1, 2)) continue;
                    Boundary overlapRegion = Utils.OverlapBox(box1, box2);
                    if (box2.Equals(overlapRegion)) overlapRegion = box2;

                    if (IsHeaderUp(Utils.UpRow(overlapRegion)) && !IsHeaderUp(Utils.UpRow(box1)) &&
                        Math.Abs(Utils.UpRow(box1).top - Utils.UpRow(overlapRegion).top) > 3 && _sheet.sumContentExist.SubmatrixSum(Utils.UpRow(overlapRegion)) > 0.8 * _sheet.sumContentExist.SubmatrixSum(new Boundary(overlapRegion.top, overlapRegion.top, box1.left, box1.right)))
                    {
                        containsHeader = true;
                    }

                    if (!overlapRegion.Equals(box1) && Utils.ContainsBox(box1, overlapRegion, 2) && !Utils.isSuppressionBox(box1, overlapRegion))//if (box2.Equals(box1) || !Utils.isContainsBox(box1, box2))
                    {
                        boxesIn.Add(overlapRegion);
                        for (int k = overlapRegion.left - 2; k <= overlapRegion.right + 2; k++)
                        {
                            cols.Add(k);
                        }
                        for (int k = overlapRegion.top - 2; k <= overlapRegion.bottom + 2; k++)
                        {
                            rows.Add(k);
                        }
                        rowsCount += overlapRegion.bottom - overlapRegion.top + 1;
                    }
                }

                double sizeSum = Utils.AreaSize(boxesIn);

                int valueSum = SheetMap.ValueSumRange(boxesIn, _sheet.sumContentExist);

                bool columnOverlap = false;
                for (int k = 0; k < boxesIn.Count; k++)
                {
                    for (int t = k + 1; t < boxesIn.Count; t++)
                    {
                        if (!(boxesIn[k].left > boxesIn[t].right || boxesIn[k].right < boxesIn[t].left))
                        {
                            columnOverlap = true;
                        }
                    }
                }

                // sheet.valueSumRange(box1) - valueSum < 2 *Math.Max(box1.down - box1.up + 1, box1.right - box1.left + 1)
                if ((containsHeader || (float)(box1.bottom - box1.top + 1 - rowsCount) / (float)(boxesIn.Count - 1) > 1.5) && Utils.IsFillLines(box1.top, box1.bottom, rows) && Utils.IsFillLines(box1.left, box1.right, cols) && sizeSum > 0.6 * Utils.AreaSize(box1) && valueSum > 0.8 * _sheet.sumContentExist.SubmatrixSum(box1))
                {
                    removedBoxes.Add(box1);
                }
                // filter big box when sub-boxes has no overlaped columns
                else if (!columnOverlap && boxesIn.Count >= 2)
                {
                    removedBoxes.Add(box1);
                }
                else
                {
                    foreach (var box2 in boxesIn)
                    {
                        removedBoxes.Add(box2);
                    }
                }
            }
            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void ContainsLittleFilter()
        {
            // filter small  _boxes with row-col direction  sub _boxes containing header
            var removedBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                if (removedBoxes.Contains(box1)) continue;
                List<Boundary> insideBoxes = new List<Boundary>();
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (removedBoxes.Contains(box2)) continue;
                    if (!Utils.ContainsBox(box1, box2) || box2.Equals(box1))
                    {
                        continue;
                    }
                    insideBoxes.Add(box2);
                    // TODO: similar with surround
                    if (box2.bottom - box2.top < 6 && box2.right - box2.left < 6 && (box2.bottom - box2.top < 4 || box2.right - box2.left < 4))
                    {
                        var box2OutUp = new Boundary(box2.top - 1, box2.top - 1, box2.left, box2.right);
                        var box2OutDown = new Boundary(box2.bottom + 1, box2.bottom + 1, box2.left, box2.right);
                        var box2OutLeft = new Boundary(box2.top, box2.bottom, box2.left - 1, box2.left - 1);
                        var box2OutRight = new Boundary(box2.top, box2.bottom, box2.right + 1, box2.right + 1);
                        int cntNotNone = 0;
                        int cntNotSparse = 0;
                        if (_sheet.sumContentExist.SubmatrixSum(box2OutUp) > 0) cntNotNone++;
                        if (_sheet.sumContentExist.SubmatrixSum(box2OutDown) > 0) cntNotNone++;
                        if (_sheet.sumContentExist.SubmatrixSum(box2OutLeft) > 0) cntNotNone++;
                        if (_sheet.sumContentExist.SubmatrixSum(box2OutRight) > 0) cntNotNone++;

                        if (_sheet.sumContentExist.SubmatrixSum(box2OutDown) > 2) cntNotSparse++;
                        if (_sheet.sumContentExist.SubmatrixSum(box2OutRight) > 2) cntNotSparse++;

                        if ((cntNotNone >= 3 && cntNotSparse >= 1) || cntNotNone == 4)
                        {
                            removedBoxes.Add(box2);
                        }
                    }
                }
            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }

        private void PairContainsFilterHard()
        {
            var removedBoxes = new HashSet<Boundary>();

            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box1 = _boxes[i];
                if (removedBoxes.Contains(box1)) continue;
                List<Boundary> insideBoxes = new List<Boundary>();
                List<Boundary> overlapBoxes = new List<Boundary>();
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary box2 = _boxes[j];
                    if (removedBoxes.Contains(box2)) continue;
                    if (!Utils.ContainsBox(box1, box2, 1) && !Utils.ContainsBox(box2, box1, 1) && !box2.Equals(box1) && Utils.isOverlap(box2, box1))
                    {
                        overlapBoxes.Add(box2);
                    }
                    if (!Utils.ContainsBox(box1, box2, 1) || box2.Equals(box1))
                    {
                        continue;
                    }

                    // TODO: similar with surround
                    Boundary upHeader1 = new Boundary(box1.top, box1.top, box1.left, box1.right);
                    Boundary upHeader2 = new Boundary(box2.top, box2.top, box2.left, box2.right);
                    Boundary leftHeader1 = new Boundary(box1.top, box1.bottom, box1.left, box1.left);
                    Boundary leftHeader2 = new Boundary(box2.top, box2.bottom, box2.left, box2.left);
                    if ((IsHeaderUp(upHeader1) && !IsHeaderUp(upHeader2) && Math.Abs(upHeader1.top - upHeader2.top) >= 2) || (IsHeaderLeft(leftHeader1) && !IsHeaderLeft(leftHeader2) && Math.Abs(upHeader1.left - upHeader2.left) >= 2))
                    {
                        removedBoxes.Add(box2);
                    }
                    else { insideBoxes.Add(box2); }

                }
                if (insideBoxes.Count == 1 && overlapBoxes.Count == 0)
                {
                    removedBoxes.Add(insideBoxes[0]);
                }

            }

            Utils.RemoveTheseCandidates(removedBoxes, _boxes);
        }
        #endregion

        #region formula related
        private void FormulaCorrelationFilter()
        {
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary boxFrom = _boxes[i];
                for (int j = 0; j < _boxes.Count; j++)
                {
                    Boundary boxTo = _boxes[j];
                    if (boxFrom.Equals(boxTo))
                    {
                        continue;
                    }
                    // two _boxes are far away for  vertical or horizontal directitons
                    if (boxTo.bottom < boxFrom.top - 10 || boxTo.right < boxFrom.left - 10 || boxFrom.bottom < boxTo.top - 10 || boxFrom.right < boxTo.left - 10)
                    {
                        continue;
                    }
                    // two _boxes are crossing each other both in vertical or horizontal directitons
                    // +2 is for skipping the header region
                    if ((boxFrom.left <= boxTo.left - 2 || boxFrom.right >= boxTo.right + 2)
                          && (boxFrom.top <= boxTo.top - 2 || boxFrom.bottom >= boxTo.bottom + 2))
                    {
                        continue;
                    }
                    //// two _boxes are not overlaped both in vertical or horizontal directitons
                    //if ((boxTo.bottom < boxFrom.top && boxTo.right < boxFrom.left) || (boxFrom.bottom < boxTo.top && boxFrom.right < boxTo.left))
                    //{
                    //    continue;
                    //}

                    // vertical relation cases
                    // this condition refer to that the boxFrom should "thiner" than boxTo, this regulation can avoid some noises in combination tables
                    if (boxFrom.left > boxTo.left - 2 && boxFrom.right < boxTo.right + 2)
                    {
                        int verticalCorrelation = IsFormulaCorrelation(boxFrom, boxTo, "vertical");
                        if (verticalCorrelation == 2 || verticalCorrelation == 1)
                        {
                            removedBoxes.Add(boxFrom);
                        }
                        Boundary boxLarge = Utils.UnifyBox(boxFrom, boxTo);
                        if (verticalCorrelation == 2
                            && !(boxFrom.top >= boxTo.top && boxFrom.bottom <= boxTo.bottom)
                            && _sheet.sumContentExist.SubmatrixSum(new Boundary(boxTo.bottom + 3, boxFrom.top - 3, boxTo.left, boxTo.right)) <= 10)
                        {
                            // because boxFrom maybe a combined box with left border inside
                            removedBoxes.Add(boxTo);
                            appendBoxes.Add(boxLarge);
                        }
                        if (verticalCorrelation == 2
                            && !(!(boxFrom.top >= boxTo.top && boxFrom.bottom <= boxTo.bottom)
                            && _sheet.sumContentExist.SubmatrixSum(new Boundary(boxTo.bottom + 3, boxFrom.top - 3, boxTo.left, boxTo.right)) <= 10))
                        {
                            appendBoxes.Add(boxLarge);
                        }
                    }

                    // horizontal relation cases
                    int horizontalCorrelation = IsFormulaCorrelation(boxFrom, boxTo, "horizontal");
                    if (boxFrom.top > boxTo.top - 2 && boxFrom.bottom < boxTo.bottom + 2 && horizontalCorrelation != 0)
                    {
                        // because reference ranges overlap boxFrom, so remove boxFrom directly
                        removedBoxes.Add(boxFrom);
                        Boundary boxLarge = Utils.UnifyBox(boxFrom, boxTo);
                        if (horizontalCorrelation == 2
                            && !(boxFrom.left >= boxTo.left && boxFrom.right <= boxTo.right)
                            && _sheet.sumContentExist.SubmatrixSum(new Boundary(boxTo.top, boxTo.bottom, boxTo.right + 3, boxFrom.left - 3)) <= 10)
                        {
                            removedBoxes.Add(boxTo);
                            appendBoxes.Add(boxLarge);
                        }
                        if (horizontalCorrelation == 2
                            && !((boxFrom.left < boxTo.left || boxFrom.right > boxTo.right)
                            && _sheet.sumContentExist.SubmatrixSum(new Boundary(boxTo.top, boxTo.bottom, boxTo.right + 3, boxFrom.left - 3)) <= 10))
                        {
                            appendBoxes.Add(boxLarge);
                        }
                    }
                }
            }

            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private int IsFormulaCorrelation(Boundary boxFrom, Boundary boxTo, string direction)
        {
            // go over all the cells in boxFrom, to  find our all the formulas
            for (int row = boxFrom.top; row <= boxFrom.bottom; row++)
            {
                for (int col = boxFrom.left; col <= boxFrom.right; col++)
                {
                    Boundary curCell = new Boundary(row, row, col, col);
                    // when verify vertical reference,make sure the current cell is is downside or upside the boxFrom
                    if (direction == "vertical" && boxTo.top <= curCell.top && curCell.top <= boxTo.bottom)
                    {
                        continue;
                    }
                    // when verify horizontal reference, make sure the current cell is leftside or rightside the boxFrom
                    if (direction == "horizontal" && boxTo.left <= curCell.left && curCell.left <= boxTo.right)
                    {
                        continue;
                    }

                    foreach (var referRange in _sheet.formulaRanges[row, col])
                    {
                        if (!Utils.isOverlap(referRange, boxTo))
                        {
                            continue;
                        }

                        // to judge if two _boxes should be merged to one due to formulas
                        int formulaRelation = direction == "vertical"
                            ? formulaRelation = IsFormulaRelatedUpDown(boxFrom, boxTo, curCell, referRange)
                            : IsFormulaRelatedLeftRight(boxFrom, boxTo, curCell, referRange);
                        if (formulaRelation != 0)
                            return formulaRelation;
                    }
                }
            }
            return 0;
        }

        private int IsFormulaRelatedUpDown(Boundary boxFrom, Boundary boxTo, Boundary cell, Boundary referRange)
        {
            Boundary overlapRange = Utils.OverlapBox(referRange, boxTo);
            // the reference range should not be contained by the source box
            if (Utils.ContainsBox(boxFrom, overlapRange, 1)) { return 0; }
            if (Utils.isOverlap(boxFrom, new Boundary(1, _sheet.Height, overlapRange.left, overlapRange.right)))
            {
                Boundary box1Up1 = Utils.UpRow(boxFrom);
                Boundary box1Up2 = Utils.UpRow(boxFrom, start: 1);
                if ((boxFrom.bottom > boxTo.bottom || boxFrom.top > boxTo.top) && overlapRange.top < boxFrom.top
                    && boxFrom.right > boxTo.left && boxTo.right > boxFrom.left
                    && !IsHeaderUp(box1Up1) && !IsHeaderUp(box1Up2))
                {
                    if (Utils.ContainsBox(boxTo, cell))
                        return 1;
                    else
                        return 2;
                }
            }
            return 0;
        }

        private int IsFormulaRelatedLeftRight(Boundary boxFrom, Boundary boxTo, Boundary cell, Boundary referRange)
        {
            Boundary overlapRange = Utils.OverlapBox(referRange, boxTo);
            if (Utils.ContainsBox(boxFrom, overlapRange, 1)) { return 0; }
            if (Utils.isOverlap(boxFrom, new Boundary(overlapRange.top, overlapRange.bottom, 1, _sheet.Width)))
            {
                Boundary box1Left1 = Utils.LeftCol(boxFrom);
                Boundary box1Left2 = Utils.LeftCol(boxFrom, start: 1);

                if ((boxFrom.right > boxTo.right || boxFrom.left > boxTo.left) && referRange.left < boxFrom.left
                    && boxFrom.bottom > boxTo.top && boxTo.bottom > boxFrom.top
                    && !IsHeaderLeft(box1Left1) && !IsHeaderLeft(box1Left2))
                {
                    if (Utils.ContainsBox(boxTo, cell))
                    {
                        return 1;
                    }
                    else
                    {
                        return 2;
                    }
                }

            }
            return 0;
        }
        #endregion
    }
}
