using System;
using System.Linq;
using System.Collections.Generic;

namespace SpreadsheetLLM.Heuristic
{
    internal partial class TableDetectionHybrid
    {
        #region retrive header
        private void RetrieveDistantUpHeader()
        {
            //found out the up header apart from the data region
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box1 in _boxes)
            {
                bool markContainsHeaderAlready = IsHeaderUpWithDataArea(Utils.UpRow(box1), box1);
                if (markContainsHeaderAlready) { continue; }

                // find if exist header out the box in the upside
                bool markContainsHeaderUp = false;

                // find first compact row upside
                Boundary compactBoxUpRow = Utils.UpRow(box1, start: -1);
                for (int headerStartPoint = 1; headerStartPoint < 5; headerStartPoint++)
                {
                    compactBoxUpRow = Utils.UpRow(box1, start: -headerStartPoint);
                    if (_sheet.sumContentExist.SubmatrixSum(compactBoxUpRow) >= 6
                        && _sheet.TextDistinctCount(compactBoxUpRow) > 1
                        && _sheet.ContentExistValueDensity(compactBoxUpRow) >= 2 * 0.5)
                    {
                        break;
                    }
                }
                // the first compact row should be  new header
                if (IsHeaderUp(compactBoxUpRow) && HeaderRate(compactBoxUpRow) > 0.8) { markContainsHeaderUp = true; }

                int cntHeaderHeight = 0;
                // skip the new header region
                while (cntHeaderHeight < 3 && markContainsHeaderUp && IsHeaderUpSimple(Utils.UpRow(compactBoxUpRow, start: -1)))
                {
                    cntHeaderHeight++;
                    compactBoxUpRow = Utils.UpRow(compactBoxUpRow, start: -1);
                }

                // verify there is empty rows upside this new header
                if (markContainsHeaderUp
                    && (_sheet.sumContentExist.SubmatrixSum(Utils.UpRow(compactBoxUpRow, start: -1)) < 3
                    || _sheet.sumContentExist.SubmatrixSum(Utils.UpRow(compactBoxUpRow, start: -2)) < 3
                    || _sheet.sumContentExist.SubmatrixSum(Utils.UpRow(compactBoxUpRow, start: -3)) < 3))
                {
                    removedBoxes.Add(box1);
                    Boundary newBox = new Boundary(compactBoxUpRow.top, box1.bottom, box1.left, box1.right);
                    appendBoxes.Add(newBox);
                }
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
        }

        private void RetrieveUpHeader(int step)
        {
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                bool markHeader = false;
                if (IsHeaderUp(new Boundary(box.top, box.top, box.left, box.right)))
                {
                    markHeader = true;
                }
                int up;
                for (up = box.top - step; up > 0; up--)
                {
                    Boundary boxUp = new Boundary(up, up, box.left, box.right);
                    if (markHeader && !IsHeaderUp(boxUp) && !_sheet.ExistsMerged(boxUp))
                    {
                        break;
                    }
                    if (Utils.DistinctStrs(_sheet.contentStrs, up, up, box.left, box.right) >= 2)
                    {
                        continue;
                    }
                    if (_sheet.ExistsMerged(boxUp) && Utils.DistinctStrs(_sheet.contentStrs, up, up, box.left, box.right) >= 2)
                    {
                        continue;
                    }
                    else if (_sheet.ContentExistValueDensity(boxUp) >= 2 * 0.4 && _sheet.sumContentExist.SubmatrixSum(boxUp) >= 4 && Utils.DistinctStrs(_sheet.contentStrs, up, up, box.left, box.right) >= 2)
                    {
                        continue;
                    }
                    else if (box.right - box.left >= 8 && (_sheet.RowContentExistValueDensitySplit(boxUp, 4) >= 0.7 || _sheet.RowContentExistValueDensitySplit(boxUp, 8) >= 0.7)
                        && !(_sheet.ContentExistValueDensity(boxUp) >= 2 * 0.4 && !IsHeaderUp(boxUp)))
                    {
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }
                if (up < box.top - step && up >= box.top - 6)
                {
                    removedBoxes.Add(box);
                    Boundary newBox = new Boundary(up + 1, box.bottom, box.left, box.right);
                    appendBoxes.Add(newBox);
                }

            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void RetrieveLeft(int step)
        {
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                int left;
                for (left = box.left - step; left > 0; left--)
                {
                    Boundary boxLeft = new Boundary(box.top, box.bottom, left, left);
                    if (_sheet.ExistsMerged(boxLeft))
                    {
                        continue;
                    }
                    else if (_sheet.ContentExistValueDensity(boxLeft) >= 2 * 0.4 && _sheet.sumContentExist.SubmatrixSum(boxLeft) >= 4)
                    {
                        continue;
                    }
                    else if (box.bottom - box.top >= 8 && (_sheet.ColContentExistValueDensitySplit(boxLeft, 4) == 1 || _sheet.ColContentExistValueDensitySplit(boxLeft, 8) >= 0.8)
                        && !(_sheet.ContentExistValueDensity(boxLeft) >= 2 * 0.4 && !IsHeaderLeft(boxLeft)))
                    {
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }
                if (left < box.left - step && left >= box.left - 5 && !Utils.isOverlap(new Boundary(box.top, box.bottom, left + 1, box.left - 1), _boxes))
                {
                    removedBoxes.Add(box);
                    Boundary newBox = new Boundary(box.top, box.bottom, left + 1, box.right);
                    appendBoxes.Add(newBox);
                }

            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void RetrieveLeftHeader()
        {
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                Boundary newBox = box;

                // Iteratively expand left border
                while (newBox.left > 1)
                {
                    Boundary boxLeft = Utils.LeftCol(newBox);
                    Boundary boxLeft1 = Utils.LeftCol(newBox, start: -1);
                    Boundary boxLeft2 = Utils.LeftCol(newBox, start: -2);
                    Boundary boxLeft3 = Utils.LeftCol(newBox, start: -3);
                    if (Utils.isOverlap(boxLeft2, _boxes)) break;

                    Boundary tmpBox = newBox;
                    if (!IsHeaderLeft(boxLeft)
                        && _sheet.sumContent.SubmatrixSum(boxLeft1) > 3
                        && _sheet.sumContent.SubmatrixSum(boxLeft2) == 0)
                    {
                        tmpBox = new Boundary(newBox.top, newBox.bottom, newBox.left - 1, newBox.right);
                    }
                    else if (!IsHeaderLeft(boxLeft)
                        && _sheet.sumContent.SubmatrixSum(boxLeft1) + _sheet.sumContent.SubmatrixSum(boxLeft2) > 3
                        && _sheet.sumContent.SubmatrixSum(boxLeft3) == 0)
                    {
                        tmpBox = new Boundary(newBox.top, newBox.bottom, newBox.left - 2, newBox.right);
                    }
                    else if (_sheet.sumContent.SubmatrixSum(boxLeft1) > 5 &&
                        _sheet.ColContentExistValueDensitySplit(boxLeft1, 5) > 0.2 &&
                        _sheet.sumBorderRow.SubmatrixSum(boxLeft2) == 0)
                    {
                        tmpBox = new Boundary(newBox.top, newBox.bottom, newBox.left - 1, newBox.right);
                    }
                    else if (_sheet.sumBorderRow.SubmatrixSum(boxLeft1) > 0)
                    {
                        tmpBox = new Boundary(newBox.top, newBox.bottom, newBox.left - 1, newBox.right);
                    }

                    if (tmpBox.Equals(newBox)) break;
                    newBox = tmpBox;
                }
                if (!newBox.Equals(box))
                {
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }
        #endregion

        private List<Boundary> ProProcessReduceToCompact(List<Boundary> ranges)
        {
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            foreach (var box in ranges)
            {
                var newBox = box;
                bool markChanged = true;
                while (markChanged && newBox.left <= newBox.right && newBox.top <= newBox.bottom)
                {
                    #region remove the cols and rows that are not compact
                    markChanged = false;
                    int startRowUp = newBox.top;
                    while (startRowUp < newBox.bottom && _sheet.ContentExistValueDensity(Utils.UpRow(newBox, start: startRowUp - newBox.top)) < 0.8)
                    {
                        startRowUp++;
                        markChanged = true;
                        newBox.top = startRowUp;
                    }

                    int startRowDown = newBox.bottom;
                    while (startRowDown > newBox.top && _sheet.ContentExistValueDensity(Utils.DownRow(newBox, start: newBox.bottom - startRowDown)) < 0.8)
                    {
                        startRowDown--;
                        markChanged = true;
                        newBox.bottom = startRowDown;
                    }

                    int startColLeft = newBox.left;
                    while (startColLeft < newBox.right && _sheet.ContentExistValueDensity(Utils.LeftCol(newBox, start: startColLeft - newBox.left)) < 0.8)
                    {
                        startColLeft++;
                        markChanged = true;
                        newBox.left = startColLeft;
                    }

                    int startColRight = newBox.right;
                    while (startColRight > newBox.left && _sheet.ContentExistValueDensity(Utils.RightCol(newBox, start: newBox.right - startColRight)) < 0.8)
                    {
                        startColRight--;
                        markChanged = true;
                        newBox.right = startColRight;
                    }
                    #endregion
                }

                if (!box.Equals(newBox))
                {
                    removedBoxes.Add(box);
                    if (newBox.left < newBox.right && newBox.top < newBox.bottom)
                    {
                        appendBoxes.Add(newBox);
                    }
                }
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, ranges);
            return ranges;
        }

        private void SparseBoundariesTrim()
        {
            // for four directions, skip the sparse edges
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box in _boxes)
            {
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                bool markChange = true;
                while (markChange && newBox.left < newBox.right && newBox.top < newBox.bottom)
                {
                    markChange = false;
                    #region four directions
                    // left
                    if (newBox.left > 0)
                    {
                        Boundary lineBox = new Boundary(newBox.top, newBox.bottom, newBox.left, newBox.left);
                        // find the sparse row until empty 
                        while (_sheet.sumContentExist.SubmatrixSum(lineBox) < 3)
                        {
                            lineBox = new Boundary(lineBox.top, lineBox.bottom, lineBox.left + 1, lineBox.left + 1);
                            if (_sheet.sumContentExist.SubmatrixSum(lineBox) == 0) { break; }
                        }
                        // change newBox when empty row found
                        while (lineBox.left < newBox.right && _sheet.sumContentExist.SubmatrixSum(lineBox) == 0)
                        {
                            lineBox = new Boundary(lineBox.top, lineBox.bottom, lineBox.left + 1, lineBox.left + 1);
                            markChange = true;
                            newBox.left = lineBox.left;
                        }
                    }
                    // right
                    if (newBox.right <= _sheet.Width)
                    {
                        Boundary lineBox = new Boundary(newBox.top, newBox.bottom, newBox.right, newBox.right);
                        while (_sheet.sumContentExist.SubmatrixSum(lineBox) < 3)
                        {
                            lineBox = new Boundary(lineBox.top, lineBox.bottom, lineBox.left - 1, lineBox.right - 1);
                            if (_sheet.sumContentExist.SubmatrixSum(lineBox) == 0) { break; }
                        }
                        while (lineBox.right > newBox.left && _sheet.sumContentExist.SubmatrixSum(lineBox) == 0)
                        {
                            lineBox = new Boundary(lineBox.top, lineBox.bottom, lineBox.left - 1, lineBox.right - 1);
                            markChange = true;
                            newBox.right = lineBox.right;
                        }
                    }
                    // up
                    if (newBox.top > 0)
                    {
                        Boundary lineBox = new Boundary(newBox.top, newBox.top, newBox.left, newBox.right);
                        while (!IsHeaderUp(lineBox) && (_sheet.sumContentExist.SubmatrixSum(lineBox) < 3 || _sheet.ContentExistValueDensity(lineBox) < 2 * 0.1
                            || (_sheet.sumContentExist.SubmatrixSum(lineBox) < 5 && newBox.right - newBox.left + 1 > 7)))
                        {
                            lineBox = new Boundary(lineBox.top + 1, lineBox.bottom + 1, lineBox.left, lineBox.right);
                            if (_sheet.sumContentExist.SubmatrixSum(lineBox) == 0) { break; }
                        }
                        while (lineBox.top < newBox.bottom && _sheet.sumContentExist.SubmatrixSum(lineBox) == 0)
                        {
                            lineBox = new Boundary(lineBox.top + 1, lineBox.bottom + 1, lineBox.left, lineBox.right);
                            markChange = true;
                            newBox.top = lineBox.top;
                        }
                    }

                    // down
                    if (newBox.bottom <= _sheet.Height)
                    {
                        Boundary lineBox = new Boundary(newBox.bottom, newBox.bottom, newBox.left, newBox.right);
                        while (_sheet.sumContentExist.SubmatrixSum(lineBox) < 3)
                        {
                            lineBox = new Boundary(lineBox.top - 1, lineBox.top - 1, newBox.left, newBox.right);
                            if (_sheet.sumContentExist.SubmatrixSum(lineBox) == 0) break;
                        }
                        while (lineBox.bottom > newBox.top && _sheet.sumContentExist.SubmatrixSum(lineBox) == 0)
                        {
                            lineBox = new Boundary(lineBox.top - 1, lineBox.top - 1, newBox.left, newBox.right);
                            markChange = true;
                            newBox.bottom = lineBox.bottom;
                        }
                    }
                    #endregion
                }

                if (!box.Equals(newBox))
                {
                    removedBoxes.Add(box);
                    if (newBox.left < newBox.right && newBox.top < newBox.bottom)
                    {
                        appendBoxes.Add(newBox);
                    }
                }
            }

            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void SurroundingBoudariesTrim()
        {
            int cntBoxes = -1;
            while (_boxes.Count != cntBoxes)
            {
                _boxes = Utils.DistinctBoxes(_boxes);
                cntBoxes = _boxes.Count;

                // find left not sparse column with simple rules
                FindLeftBoundaryNotSparse();

                // find first row with compact contents inside
                FindUpBoundaryNotSparse();

                // for four directions, skip the sparse edges
                SparseBoundariesTrim();

                // find the bottom boundary, not sparse, and dont overlap any upheaders of other _boxes
                BottomBoundaryTrim();

                // find the bottom boundary, not sparse, and dont overlap any upheaders of other _boxes
                UpBoundaryCompactTrim();

                // for four directions, remove the _boxes with the none edges
                NoneBorderFilter();
            }
        }

        private void UpHeaderTrim()
        {
            // refine the upper boundary of the _boxes to compact, especially when header exists

            // find the first not sparse row or header row as upper bound
            FindUpBoundaryNotSparse();
            // find the true header if it exists
            FindUpBoundaryIsHeader();
            // find the true header with a sparse upside row if it exists
            FindUpBoundaryIsClearHeader();
            // find first up header with compact contents
            FindUpBoundaryIsCompactHeader(0.6, 0.8);
            FindUpBoundaryIsCompactHeader(0.4, 0.7);
            FindUpBoundaryIsCompactHeader(0.2, 0.5);
        }

        private void FindUpBoundaryNotSparse()
        {
            // find the first not sparse row as up boundary
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box = _boxes[i];
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                // find the first not sparse row as upper bound
                #region find the first not sparse row as upper bound
                Boundary upperBoundRowCandidate = new Boundary(box.top, box.top, box.left, box.right);
                Boundary upperBoundRowCandidateDown = new Boundary(box.top + 1, box.top + 1, box.left, box.right);
                while (upperBoundRowCandidate.top < newBox.bottom)
                {
                    if (_sheet.ContentExistValueDensity(upperBoundRowCandidate) < 2 * 0.2
                       || ((box.right - box.left + 1) >= 5 && (_sheet.TextDistinctCount(upperBoundRowCandidate) <= 1)))
                    {
                        upperBoundRowCandidate = new Boundary(upperBoundRowCandidate.top + 1, upperBoundRowCandidate.top + 1, newBox.left, newBox.right);
                    }
                    else if (!IsHeaderUp(upperBoundRowCandidate) && box.right - box.left > 7 && 2 * _sheet.sumContentExist.SubmatrixSum(upperBoundRowCandidate) <= _sheet.sumContentExist.SubmatrixSum(upperBoundRowCandidateDown)
                        && (_sheet.sumContentExist.SubmatrixSum(upperBoundRowCandidate) < 7 || _sheet.ContentExistValueDensity(upperBoundRowCandidate) < 0.3 * 2))
                    {
                        upperBoundRowCandidate = new Boundary(upperBoundRowCandidate.top + 1, upperBoundRowCandidate.top + 1, newBox.left, newBox.right);
                    }
                    else
                    {
                        break;
                    }
                }
                newBox.top = upperBoundRowCandidate.top;
                if (!box.Equals(newBox) && newBox.top < newBox.bottom)
                {
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }
                #endregion
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void FindUpBoundaryIsHeader()
        {
            // find the true header if it exists
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box = _boxes[i];
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                // find the first header if it exists
                #region find the first header
                Boundary upperBoundaryRow = new Boundary(box.top, box.top, box.left, box.right);
                while (!IsHeaderUp(upperBoundaryRow) && upperBoundaryRow.top <= box.top + 3 && upperBoundaryRow.top < box.bottom)
                {
                    upperBoundaryRow = new Boundary(upperBoundaryRow.top + 1, upperBoundaryRow.top + 1, newBox.left, newBox.right);
                }
                newBox.top = upperBoundaryRow.top;

                if (box.Equals(newBox)) continue;


                if (IsHeaderUpWithDataArea(upperBoundaryRow, box) && newBox.top < newBox.bottom)
                {

                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }
                #endregion
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void FindUpBoundaryIsCompactHeader(double threshDensityLow, double threshDensityHigh)
        {
            // find first header with compact rows
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box in _boxes)
            {
                if (box.right - box.left + 1 <= 4) continue;

                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                // find the first compact header 
                Boundary upperBoundaryRow = new Boundary(box.top, box.top, box.left, box.right);

                while (_sheet.ContentExistValueDensity(upperBoundaryRow) < 2 * threshDensityLow && upperBoundaryRow.top < box.top + 6 && upperBoundaryRow.top < box.bottom)
                {
                    upperBoundaryRow = new Boundary(upperBoundaryRow.top + 1, upperBoundaryRow.top + 1, newBox.left, newBox.right);
                }

                newBox.top = upperBoundaryRow.top;

                if (box.Equals(newBox) || _sheet.ContentExistValueDensity(upperBoundaryRow) < 2 * threshDensityLow)
                    continue;

                if (IsHeaderUpWithDataArea(upperBoundaryRow, box) && newBox.top < newBox.bottom)
                {
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }

            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void LeftHeaderTrim()
        {
            // find first header with compact rows
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box in _boxes)
            {
                Boundary newBox = new Boundary(box.top, box.bottom, box.left + 1, box.right);

                if (Utils.DistinctStrs(_sheet.contentStrs, box.top, box.bottom, box.left, box.left) <= 1)
                {
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }

            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void BottomTrim()
        {
            // find first header with compact rows
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box in _boxes)
            {
                int k = 0;
                while (k < box.bottom - box.top)
                {
                    Boundary bottomBox = new Boundary(box.bottom - k, box.bottom - k, box.left, box.right);

                    if (_sheet.ContentExistValueDensity(bottomBox) < 0.3 * 2 || (box.right - box.left + 1 > 3 && Utils.DistinctStrs(_sheet.contentStrs, box.bottom - k, box.bottom - k, box.left, box.right) <= 1))
                    {
                        k++;
                    }
                    else
                    {
                        break;
                    }
                }
                if (k > 0)
                {
                    Boundary newBox = new Boundary(box.top, box.bottom - k, box.left, box.right);
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void UpTrimSimple()
        {
            // find first header with compact rows
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box in _boxes)
            {
                int k = 0;
                while (k < box.bottom - box.top)
                {
                    if (Utils.DistinctStrs(_sheet.contentStrs, box.top + k, box.top + k, box.left, box.right) <= 1)
                    {
                        k++;
                    }
                    else
                    {
                        break;
                    }
                }
                if (k > 0)
                {
                    Boundary newBox = new Boundary(box.top + k, box.bottom, box.left, box.right);
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void UpBoundaryCompactTrim()
        {
            // find first header with compact rows
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();

            foreach (var box in _boxes)
            {
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                // find the first compact header 
                Boundary upperBoundaryRow1 = new Boundary(box.top, box.top, box.left, box.right);
                Boundary upperBoundaryRow2 = new Boundary(box.top + 1, box.top + 1, box.left, box.right);

                while (!(IsHeaderUp(upperBoundaryRow1) && !IsHeaderUp(upperBoundaryRow2))
                    && 2 * _sheet.ContentExistValueDensity(upperBoundaryRow1) <= _sheet.ContentExistValueDensity(upperBoundaryRow2)
                    && upperBoundaryRow1.top < box.top + 6 && upperBoundaryRow2.top < box.bottom)
                {
                    upperBoundaryRow1 = new Boundary(upperBoundaryRow1.top + 1, upperBoundaryRow1.top + 1, newBox.left, newBox.right);
                    upperBoundaryRow2 = new Boundary(upperBoundaryRow2.top + 1, upperBoundaryRow2.top + 1, newBox.left, newBox.right);
                }

                newBox.top = upperBoundaryRow1.top;

                if (box.Equals(newBox) || _sheet.ContentExistValueDensity(upperBoundaryRow1) <= 0.6 * _sheet.ContentExistValueDensity(upperBoundaryRow2)) { continue; }


                if (newBox.top < newBox.bottom)
                {
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }

            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void FindUpBoundaryIsClearHeader()
        {
            // find the true header with a sparse upside row if it exists
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            for (int i = 0; i < _boxes.Count; i++)
            {
                Boundary box = _boxes[i];
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                // find the first header with a sparse upside row if it exists
                #region find the first header with a sparse upside row if it exists
                Boundary upperBoundaryRow = new Boundary(box.top, box.top, box.left, box.right);

                // find the first none line
                while (_sheet.sumContentExist.SubmatrixSum(upperBoundaryRow) > 3 && upperBoundaryRow.top < box.top + 6 && upperBoundaryRow.top < box.bottom)
                {
                    upperBoundaryRow = new Boundary(upperBoundaryRow.top + 1, upperBoundaryRow.top + 1, newBox.left, newBox.right);
                }

                // verify the boundary is sparse
                if (_sheet.sumContentExist.SubmatrixSum(upperBoundaryRow) > 3) continue;

                // continue tot find the first not sparse line below
                while (_sheet.sumContentExist.SubmatrixSum(upperBoundaryRow) < 3 && upperBoundaryRow.top < box.top + 2 && upperBoundaryRow.top < box.bottom)
                {
                    upperBoundaryRow = new Boundary(upperBoundaryRow.top + 1, upperBoundaryRow.top + 1, newBox.left, newBox.right);
                }
                newBox.top = upperBoundaryRow.top;

                if (box.Equals(newBox) || _sheet.sumContentExist.SubmatrixSum(upperBoundaryRow) < 3) continue;


                if (IsHeaderUpWithDataArea(upperBoundaryRow, box) && newBox.top < newBox.bottom)
                {
                    removedBoxes.Add(box);
                    appendBoxes.Add(newBox);
                }
                #endregion
            }
            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void BottomBoundaryTrim()
        {
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                // find out the boundary row line which is not sparse and without merge ranges
                // for merge ranges usually exists in the top of the box, but bot bottom
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                int bottomLine = box.bottom;
                while (bottomLine > box.top)
                {
                    if (box.right - box.left + 1 > 6 &&
                            //exist merged in the bottoom(exclude the very left bide to avoid the left header)
                            (_sheet.ExistsMerged(new Boundary(bottomLine, bottomLine, Math.Min(box.left + 5, box.right - 1), box.right))
                            // exist merged region separately in the bottoom
                            || (_sheet.ExistsMerged(new Boundary(bottomLine, bottomLine, box.left, box.right))
                            && !_sheet.ExistsMerged(new Boundary(Math.Max(box.top, bottomLine - 4), bottomLine - 2, box.left, box.right)))
                            // sparsity
                            || (_sheet.sumContentExist.SubmatrixSum(new Boundary(bottomLine, bottomLine, box.left, box.left)) == 0
                            && _sheet.ContentExistValueDensity(new Boundary(bottomLine, bottomLine, box.left, box.right)) < 0.15 * 2)
                            || (_sheet.sumContentExist.SubmatrixSum(new Boundary(bottomLine, bottomLine, box.left, box.left + 2)) == 0
                            && _sheet.ContentExistValueDensity(new Boundary(bottomLine, bottomLine, box.left, box.right)) < 0.3 * 2)))
                    {
                        bottomLine -= 1;
                        continue;
                    }
                    else if (box.right - box.left + 1 >= 2 && _sheet.sumContentExist.SubmatrixSum(new Boundary(bottomLine, bottomLine, box.left, box.right)) < 3)
                    {
                        bottomLine -= 1;
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }
                newBox.bottom = bottomLine;

                // if the bottom of the box overlaps other _boxes' header, then need to skip them
                int bottomLineSkipHeader = bottomLine;
                int cnt = 0;
                while (cnt < 5 && bottomLine > box.top &&
                    _sheet.sumContentExist.SubmatrixSum(new Boundary(bottomLineSkipHeader, bottomLineSkipHeader, box.left, box.right)) >= 2
                    && HeaderRate(new Boundary(bottomLineSkipHeader, bottomLineSkipHeader, box.left, box.right), step: 0) > 0.6)
                {
                    cnt += 1;
                    bottomLineSkipHeader -= 1;
                }
                // usually header have less than three lines
                if (cnt < 3 && _sheet.sumContentExist.SubmatrixSum(new Boundary(bottomLineSkipHeader, bottomLineSkipHeader, box.left, box.right)) == 0)
                {
                    while (bottomLineSkipHeader > box.top && _sheet.sumContentExist.SubmatrixSum(new Boundary(bottomLineSkipHeader, bottomLineSkipHeader, box.left, box.right)) == 0)
                    {
                        bottomLineSkipHeader -= 1;
                    }
                    bottomLine = bottomLineSkipHeader;
                    newBox.bottom = bottomLine;
                }

                if (box.Equals(newBox)) continue;
                removedBoxes.Add(box);
                if (newBox.top < newBox.bottom)
                {
                    appendBoxes.Add(newBox);
                }
            }

            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }

        private void FindLeftBoundaryNotSparse()
        {
            // find left not sparse column with simple rules
            var removedBoxes = new HashSet<Boundary>();
            var appendBoxes = new HashSet<Boundary>();
            foreach (var box in _boxes)
            {
                Boundary newBox = new Boundary(box.top, box.bottom, box.left, box.right);
                bool mark = true;
                Boundary leftLine = new Boundary(box.top, box.bottom, box.left, box.left);
                while (mark && newBox.left < newBox.right)
                {
                    mark = false;
                    if (newBox.left > 0)
                    {
                        leftLine = new Boundary(newBox.top, newBox.bottom, newBox.left, newBox.left);

                        if ((box.bottom - box.top + 1) >= 5 && !(_sheet.ContentExistValueDensity(leftLine) >= 0.7) && _sheet.TextDistinctCount(leftLine) <= 1)
                        {
                            mark = true;
                            newBox.left = newBox.left + 1;
                        }
                        //else if (((box.down - box.up + 1) >= 10 && sheet.TextDistinctCount(lineBox) <= 2))
                        //{
                        //    mark = true;
                        //    newBox.left = newBox.left + 1;
                        //}
                        else if (((box.bottom - box.top) > 3 && _sheet.sumContentExist.SubmatrixSum(leftLine) < 5)
                            || ((box.bottom - box.top + 1) > 10 && (_sheet.sumContentExist.SubmatrixSum(leftLine) < 7 || _sheet.ContentExistValueDensity(leftLine) < 2 * 0.15)))
                        {
                            mark = true;
                            newBox.left = newBox.left + 1;
                        }

                    }
                }
                if (!box.Equals(newBox))
                {
                    removedBoxes.Add(box);
                    if (newBox.left < newBox.right && newBox.top < newBox.bottom)
                    {
                        appendBoxes.Add(newBox);
                    }
                }
            }

            Utils.RemoveAndAppendCandidates(removedBoxes, appendBoxes, _boxes);
            _boxes = Utils.DistinctBoxes(_boxes);
        }
    }
}
