Option Explicit
' =========================================================
' SHEBANQ CSV -> phrase_summary.csv + clause_summary.csv
'              + appendix_b_clause_candidates_v3.csv
'              + appendix_b_verse_candidates_v3.csv
'              + count_by_clause_label.csv
'              + count_by_verse_label.csv
'              + count_by_reason.csv
'
' Version 3:
' - verse-level aggregation
' - free 'et vs suffixed 'et distinction
' - clause/verse provisional labels
' - automatic counts by label and reason
' =========================================================
Dim inFile, outDir
'If WScript.Arguments.Count < 2 Then
'    WScript.Echo "Usage: cscript //nologo shebanq_appendix_b_v3.vbs input.csv output_folder"
'    WScript.Quit 1
'End If
dtsnow = Now()
WScript.Echo "Start: " & dtsnow
inFile = "2021_q7273_data_book_01_Genesis.csv"
outDir = "output_folder_v3_book_01_Genesis"
Dim lines
lines = ReadUtf8Lines(inFile)
dtsnow = Now()
WScript.Echo dtsnow & " Input file rows=" & UBound(lines)
If UBound(lines) < 1 Then
    WScript.Echo "Input file appears empty."
    WScript.Quit 1
End If
Dim headerMap
Set headerMap = CreateObject("Scripting.Dictionary")
headerMap.CompareMode = 1 ' TextCompare
BuildHeaderMap lines(0), headerMap
Dim clauses, phrases, verses
Set clauses = CreateObject("Scripting.Dictionary")
clauses.CompareMode = 1
Set phrases = CreateObject("Scripting.Dictionary")
phrases.CompareMode = 1
Set verses = CreateObject("Scripting.Dictionary")
verses.CompareMode = 1
Dim i, row
For i = 1 To UBound(lines)
    If Trim(lines(i)) <> "" Then
        row = ParseCsvLine(lines(i))
        Dim book, chapter, verse, txtH, lexemeV, textT, gloss
        Dim rela, subphraseNo, det, func, relaA
        Dim phraseA, phraseNo, clauseA, clauseNo
        book        = GetField(row, headerMap, "book")
        chapter     = GetField(row, headerMap, "chapter")
        verse       = GetField(row, headerMap, "verse")
        txtH        = GetField(row, headerMap, "text-h")
        lexemeV     = GetField(row, headerMap, "lexeme-v")
        textT       = GetField(row, headerMap, "text-t")
        gloss       = GetField(row, headerMap, "gloss")
        rela        = GetField(row, headerMap, "rela")
        subphraseNo = GetField(row, headerMap, "subphrase#")
        det         = GetField(row, headerMap, "determination")
        func        = GetField(row, headerMap, "function")
        relaA       = GetField(row, headerMap, "rela_a")
        phraseA     = GetField(row, headerMap, "phrase_a#")
        phraseNo    = GetField(row, headerMap, "phrase#")
        clauseA     = GetField(row, headerMap, "clause_a#")
        clauseNo    = GetField(row, headerMap, "clause#")
        If Trim(clauseNo) = "" Then clauseNo = clauseA
        If Trim(phraseNo) = "" Then phraseNo = phraseA
        If Trim(book) <> "" And Trim(chapter) <> "" And Trim(verse) <> "" Then
            Dim clauseKey, phraseKey
            clauseKey = book & "|" & chapter & "|" & verse & "|" & clauseNo
            phraseKey = clauseKey & "|" & phraseNo
            If Not clauses.Exists(clauseKey) Then
                clauses.Add clauseKey, NewClauseRecord(book, chapter, verse, clauseNo)
            End If
            AddWordToClause clauses(clauseKey), txtH, func, det, gloss, lexemeV, textT
            If Not phrases.Exists(phraseKey) Then
                phrases.Add phraseKey, NewPhraseRecord(book, chapter, verse, clauseNo, phraseNo, func, det)
            End If
            AddWordToPhrase phrases(phraseKey), txtH, gloss, lexemeV, textT, rela, subphraseNo, relaA
        End If
    End If
Next
' attach phrase info back to clauses
Dim pKey, pRec, cKey
For Each pKey In phrases.Keys
    Set pRec = phrases(pKey)
    cKey = pRec("clauseKey")
    If clauses.Exists(cKey) Then
        AddPhraseToClause clauses(cKey), pRec
    End If
Next
' finalize clauses
' Dim clauseKey, cRec  I removed clauseKey from Dim statement
Dim cRec
For Each clauseKey In clauses.Keys
    Set cRec = clauses(clauseKey)
    FinalizeClause cRec
Next
' build verses from clauses
For Each clauseKey In clauses.Keys
    Set cRec = clauses(clauseKey)
    Dim verseKey
    verseKey = cRec("book") & "|" & cRec("chapter") & "|" & cRec("verse")
    If Not verses.Exists(verseKey) Then
        verses.Add verseKey, NewVerseRecord(cRec("book"), cRec("chapter"), cRec("verse"))
    End If
    AddClauseToVerse verses(verseKey), cRec
Next
' finalize verses
Dim vKey, vRec
For Each vKey In verses.Keys
    Set vRec = verses(vKey)
    FinalizeVerse vRec
Next
' counts
Dim clauseLabelCounts, verseLabelCounts, reasonCounts
Set clauseLabelCounts = CreateObject("Scripting.Dictionary")
clauseLabelCounts.CompareMode = 1
Set verseLabelCounts = CreateObject("Scripting.Dictionary")
verseLabelCounts.CompareMode = 1
Set reasonCounts = CreateObject("Scripting.Dictionary")
reasonCounts.CompareMode = 1
For Each clauseKey In clauses.Keys
    Set cRec = clauses(clauseKey)
    AddCount clauseLabelCounts, cRec("clauseProvisionalLabel")
    AddPipeReasonsToCounts reasonCounts, "clause", cRec("clauseLabelReason")
Next
For Each vKey In verses.Keys
    Set vRec = verses(vKey)
    AddCount verseLabelCounts, vRec("verseProvisionalLabel")
    AddPipeReasonsToCounts reasonCounts, "verse", vRec("verseLabelReason")
Next
' write files
WritePhraseSummary outDir & "\phrase_summary.csv", phrases
WriteClauseSummary outDir & "\clause_summary.csv", clauses
WriteClauseCandidates outDir & "\appendix_b_clause_candidates_v3.csv", clauses
WriteVerseCandidates outDir & "\appendix_b_verse_candidates_v3.csv", verses
WriteCountFile outDir & "\count_by_clause_label.csv", "clause_provisional_label,count", clauseLabelCounts
WriteCountFile outDir & "\count_by_verse_label.csv", "verse_provisional_label,count", verseLabelCounts
WriteReasonCountFile outDir & "\count_by_reason.csv", reasonCounts
dtsnow = Now()
WScript.Echo dtsnow & " Done."
WScript.Echo "phrase_summary.csv"
WScript.Echo "clause_summary.csv"
WScript.Echo "appendix_b_clause_candidates_v3.csv"
WScript.Echo "appendix_b_verse_candidates_v3.csv"
WScript.Echo "count_by_clause_label.csv"
WScript.Echo "count_by_verse_label.csv"
WScript.Echo "count_by_reason.csv"
' =========================================================
' Record constructors
' =========================================================
Function NewClauseRecord(book, chapter, verse, clauseNo)
    Dim d
    Set d = CreateObject("Scripting.Dictionary")
    d.CompareMode = 1
    d.Add "book", book
    d.Add "chapter", chapter
    d.Add "verse", verse
    d.Add "clauseNo", clauseNo
    d.Add "clauseTextH", ""
    d.Add "functionsSeen", ""
    d.Add "phraseCount", 0
    d.Add "objcPhraseCount", 0
    d.Add "freeEtCount", 0
    d.Add "suffixedEtCount", 0
    d.Add "repeatedEt", 0
    d.Add "hasAppo", 0
    d.Add "likelyNaming", 0
    d.Add "hasCallGloss", 0
    d.Add "hasNameGloss", 0
    d.Add "detSeenDet", 0
    d.Add "detSeenUnd", 0
    d.Add "objcDetSeenDet", 0
    d.Add "objcDetSeenUnd", 0
    d.Add "possibleMixedDetermination", 0
    d.Add "detClass", "EMPTY"
    d.Add "objcDetSeq", ""
    d.Add "objcPhraseTexts", ""
    d.Add "objcInternalParallel", 0
    d.Add "maxObjcSubphraseCount", 0
    d.Add "objcBareParallel", 0
    d.Add "objcLargeParallel", 0
    d.Add "clauseComplete", 0
    d.Add "possibleSplitChain", 0
    d.Add "isDetCandidate", 0
    d.Add "clauseProvisionalLabel", "NOISE"
    d.Add "clauseReviewNeeded", "N"
    d.Add "clauseLabelReason", ""
    Set d("phraseKeys") = CreateObject("Scripting.Dictionary")
    d("phraseKeys").CompareMode = 1
    Set NewClauseRecord = d
End Function
Function NewPhraseRecord(book, chapter, verse, clauseNo, phraseNo, func, det)
    Dim d, clauseKey
    Set d = CreateObject("Scripting.Dictionary")
    d.CompareMode = 1
    clauseKey = book & "|" & chapter & "|" & verse & "|" & clauseNo
    d.Add "book", book
    d.Add "chapter", chapter
    d.Add "verse", verse
    d.Add "clauseNo", clauseNo
    d.Add "clauseKey", clauseKey
    d.Add "phraseNo", phraseNo
    d.Add "function", func
    d.Add "determination", det
    d.Add "phraseTextH", ""
    d.Add "subphraseCount", 0
    d.Add "containsFreeEt", 0
    d.Add "containsSuffixedEt", 0
    d.Add "startsWithWaw", 0
    d.Add "hasAppo", 0
    d.Add "hasPar", 0
    d.Add "relaValues", ""
    Set d("subphrases") = CreateObject("Scripting.Dictionary")
    d("subphrases").CompareMode = 1
    Set NewPhraseRecord = d
End Function
Function NewVerseRecord(book, chapter, verse)
    Dim d
    Set d = CreateObject("Scripting.Dictionary")
    d.CompareMode = 1
    d.Add "book", book
    d.Add "chapter", chapter
    d.Add "verse", verse
    d.Add "verseTextH", ""
    d.Add "verseClauseCount", 0
    d.Add "verseCandidateClauseCount", 0
    d.Add "verseFreeEtTotal", 0
    d.Add "verseSuffixedEtTotal", 0
    d.Add "verseHasAppoCandidate", 0
    d.Add "verseHasPossibleCoreTarget", 0
    d.Add "verseHasInternalMixNearMiss", 0
    d.Add "verseHasCoordinationOnly", 0
    d.Add "verseHasSplitChain", 0
    d.Add "verseHasDetOnlyCandidate", 0
    d.Add "verseHasUndOnlyCandidate", 0
    d.Add "verseClauseLabelSet", ""
    d.Add "verseProvisionalLabel", "NOISE"
    d.Add "verseReviewNeeded", "N"
    d.Add "verseLabelReason", ""
    d.Add "manualLabel", ""
    d.Add "manualNotes", ""
    Set NewVerseRecord = d
End Function
' =========================================================
' Clause / Phrase updaters
' =========================================================
Sub AddWordToClause(ByRef cRec, ByVal txtH, ByVal func, ByVal det, ByVal gloss, ByVal lexemeV, ByVal textT)
    If Trim(txtH) <> "" Then
        If cRec("clauseTextH") = "" Then
            cRec("clauseTextH") = txtH
        Else
            cRec("clauseTextH") = cRec("clauseTextH") & " " & txtH
        End If
    End If
    If Trim(func) <> "" Then
        If InStr(1, cRec("functionsSeen"), func, vbTextCompare) = 0 Then
            If cRec("functionsSeen") = "" Then
                cRec("functionsSeen") = func
            Else
                cRec("functionsSeen") = cRec("functionsSeen") & " | " & func
            End If
        End If
    End If
    If IsFreeEt(gloss, txtH) Then
        cRec("freeEtCount") = cRec("freeEtCount") + 1
    ElseIf IsSuffixedEt(gloss, lexemeV, txtH) Then
        cRec("suffixedEtCount") = cRec("suffixedEtCount") + 1
    End If
    If LCase(Trim(gloss)) = "call" Then cRec("hasCallGloss") = 1
    If LCase(Trim(gloss)) = "name" Then cRec("hasNameGloss") = 1
    If LCase(Trim(det)) = "det" Then cRec("detSeenDet") = 1
    If LCase(Trim(det)) = "und" Then cRec("detSeenUnd") = 1
End Sub
Sub AddWordToPhrase(ByRef pRec, ByVal txtH, ByVal gloss, ByVal lexemeV, ByVal textT, ByVal rela, ByVal subphraseNo, ByVal relaA)
    If Trim(txtH) <> "" Then
        If pRec("phraseTextH") = "" Then
            pRec("phraseTextH") = txtH
        Else
            pRec("phraseTextH") = pRec("phraseTextH") & " " & txtH
        End If
    End If
    If IsFreeEt(gloss, txtH) Then pRec("containsFreeEt") = 1
    If IsSuffixedEt(gloss, lexemeV, txtH) Then pRec("containsSuffixedEt") = 1
    If Len(Trim(pRec("phraseTextH"))) = Len(Trim(txtH)) Then
        If Left(NormalizeHebrew(txtH), 1) = "ו" Then
            pRec("startsWithWaw") = 1
        End If
    End If
    If Trim(subphraseNo) <> "" Then
        If Not pRec("subphrases").Exists(subphraseNo) Then
            pRec("subphrases").Add subphraseNo, True
            pRec("subphraseCount") = pRec("subphrases").Count
        End If
    End If
    AddRelaToPhrase pRec, rela
    AddRelaToPhrase pRec, relaA
End Sub
Sub AddRelaToPhrase(ByRef pRec, ByVal r)
    If Trim(r) = "" Or UCase(Trim(r)) = "NA" Then Exit Sub
    If InStr(1, pRec("relaValues"), r, vbTextCompare) = 0 Then
        If pRec("relaValues") = "" Then
            pRec("relaValues") = r
        Else
            pRec("relaValues") = pRec("relaValues") & " | " & r
        End If
    End If
    If UCase(Trim(r)) = "APPO" Then pRec("hasAppo") = 1
    If UCase(Trim(r)) = "PAR" Then pRec("hasPar") = 1
End Sub
Sub AddPhraseToClause(ByRef cRec, ByRef pRec)
    Dim pKey
    pKey = pRec("phraseNo")
    If Not cRec("phraseKeys").Exists(pKey) Then
        cRec("phraseKeys").Add pKey, True
        cRec("phraseCount") = cRec("phraseKeys").Count
    End If
    If pRec("hasAppo") = 1 Then cRec("hasAppo") = 1
    If UCase(Trim(pRec("function"))) = "OBJC" Then
        cRec("objcPhraseCount") = cRec("objcPhraseCount") + 1
        If LCase(Trim(pRec("determination"))) = "det" Then cRec("objcDetSeenDet") = 1
        If LCase(Trim(pRec("determination"))) = "und" Then cRec("objcDetSeenUnd") = 1
        If cRec("objcDetSeq") = "" Then
            cRec("objcDetSeq") = pRec("determination")
            cRec("objcPhraseTexts") = pRec("phraseTextH")
        Else
            cRec("objcDetSeq") = cRec("objcDetSeq") & " | " & pRec("determination")
            cRec("objcPhraseTexts") = cRec("objcPhraseTexts") & " || " & pRec("phraseTextH")
        End If
        If pRec("hasPar") = 1 Then cRec("objcInternalParallel") = 1
        If CLng(pRec("subphraseCount")) > CLng(cRec("maxObjcSubphraseCount")) Then
            cRec("maxObjcSubphraseCount") = pRec("subphraseCount")
        End If
    End If
End Sub
' =========================================================
' Finalizers
' =========================================================
Sub FinalizeClause(ByRef cRec)
    If CLng(cRec("freeEtCount")) >= 2 Then
        cRec("repeatedEt") = 1
        AddReason cRec("clauseLabelReason"), "free_et_ge_2"
    End If
    If cRec("objcDetSeenDet") = 1 And cRec("objcDetSeenUnd") = 1 Then
        cRec("possibleMixedDetermination") = 1
        cRec("detClass") = "MIXED"
        AddReason cRec("clauseLabelReason"), "det_mixed"
    ElseIf cRec("objcDetSeenDet") = 1 Then
        cRec("detClass") = "DET_ONLY"
    ElseIf cRec("objcDetSeenUnd") = 1 Then
        cRec("detClass") = "UND_ONLY"
    Else
        cRec("detClass") = "EMPTY"
    End If
    If cRec("hasCallGloss") = 1 And cRec("hasNameGloss") = 1 Then
        cRec("likelyNaming") = 1
        AddReason cRec("clauseLabelReason"), "likely_naming"
    End If
    If cRec("hasAppo") = 1 Then AddReason cRec("clauseLabelReason"), "apposition_present"
    If cRec("objcInternalParallel") = 1 Then AddReason cRec("clauseLabelReason"), "objc_internal_parallel"
    If CLng(cRec("maxObjcSubphraseCount")) >= 2 Then AddReason cRec("clauseLabelReason"), "objc_subphrases_ge_2"
    If cRec("objcInternalParallel") = 1 And CLng(cRec("freeEtCount")) = 0 And CLng(cRec("maxObjcSubphraseCount")) >= 2 Then
        cRec("objcBareParallel") = 1
    End If
    If cRec("objcInternalParallel") = 1 And CLng(cRec("maxObjcSubphraseCount")) >= 4 Then
        cRec("objcLargeParallel") = 1
    End If
    cRec("clauseComplete") = ClauseEndsWithSofPasuq(cRec("clauseTextH"))
    If cRec("clauseComplete") = 0 Then
        If CLng(cRec("freeEtCount")) >= 1 Or cRec("objcInternalParallel") = 1 Or CLng(cRec("objcPhraseCount")) >= 1 Then
            cRec("possibleSplitChain") = 1
            AddReason cRec("clauseLabelReason"), "possible_split_chain"
        End If
    End If
    If CLng(cRec("objcPhraseCount")) >= 1 Then
        If CLng(cRec("freeEtCount")) >= 1 Or cRec("objcInternalParallel") = 1 Or CLng(cRec("maxObjcSubphraseCount")) >= 2 Then
            cRec("isDetCandidate") = 1
        End If
    End If
    ' Label precedence
    If cRec("likelyNaming") = 1 Then
        cRec("clauseProvisionalLabel") = "NOISE"
    ElseIf cRec("hasAppo") = 1 Then
        cRec("clauseProvisionalLabel") = "APPOSITIVE_CONTROL"
    ElseIf CLng(cRec("freeEtCount")) >= 2 _
       And cRec("objcInternalParallel") = 1 _
       And cRec("detClass") = "DET_ONLY" _
       And CLng(cRec("maxObjcSubphraseCount")) >= 2 _
       And CLng(cRec("maxObjcSubphraseCount")) <= 4 _
       And cRec("clauseComplete") = 1 Then
        cRec("clauseProvisionalLabel") = "POSSIBLE_CORE_TARGET"
    ElseIf cRec("detClass") = "MIXED" Then
        cRec("clauseProvisionalLabel") = "INTERNAL_MIX_NEAR_MISS"
    ElseIf CLng(cRec("freeEtCount")) >= 2 _
       And cRec("detClass") = "UND_ONLY" _
       And CLng(cRec("maxObjcSubphraseCount")) >= 2 _
       And cRec("hasAppo") = 0 Then
        cRec("clauseProvisionalLabel") = "INTERNAL_MIX_NEAR_MISS"
    ElseIf CLng(cRec("freeEtCount")) >= 2 _
       And cRec("hasAppo") = 0 Then
        cRec("clauseProvisionalLabel") = "COORDINATION_ONLY"
    ElseIf cRec("objcBareParallel") = 1 Then
        cRec("clauseProvisionalLabel") = "COORDINATION_ONLY"
    ElseIf cRec("objcLargeParallel") = 1 Then
        cRec("clauseProvisionalLabel") = "COORDINATION_ONLY"
    Else
        cRec("clauseProvisionalLabel") = "NOISE"
    End If
    If cRec("clauseProvisionalLabel") <> "NOISE" Or cRec("possibleSplitChain") = 1 Then
        cRec("clauseReviewNeeded") = "Y"
    Else
        cRec("clauseReviewNeeded") = "N"
    End If
End Sub
Sub AddClauseToVerse(ByRef vRec, ByRef cRec)
    vRec("verseClauseCount") = CLng(vRec("verseClauseCount")) + 1
    If vRec("verseTextH") = "" Then
        vRec("verseTextH") = cRec("clauseTextH")
    Else
        vRec("verseTextH") = vRec("verseTextH") & " || " & cRec("clauseTextH")
    End If
    vRec("verseFreeEtTotal") = CLng(vRec("verseFreeEtTotal")) + CLng(cRec("freeEtCount"))
    vRec("verseSuffixedEtTotal") = CLng(vRec("verseSuffixedEtTotal")) + CLng(cRec("suffixedEtCount"))
    If cRec("clauseReviewNeeded") = "Y" Then
        vRec("verseCandidateClauseCount") = CLng(vRec("verseCandidateClauseCount")) + 1
    End If
    If cRec("hasAppo") = 1 Then vRec("verseHasAppoCandidate") = 1
    If cRec("possibleSplitChain") = 1 Then vRec("verseHasSplitChain") = 1
    If cRec("clauseProvisionalLabel") = "POSSIBLE_CORE_TARGET" Then vRec("verseHasPossibleCoreTarget") = 1
    If cRec("clauseProvisionalLabel") = "INTERNAL_MIX_NEAR_MISS" Then vRec("verseHasInternalMixNearMiss") = 1
    If cRec("clauseProvisionalLabel") = "COORDINATION_ONLY" Then vRec("verseHasCoordinationOnly") = 1
    If cRec("isDetCandidate") = 1 Then
        If cRec("detClass") = "DET_ONLY" Then vRec("verseHasDetOnlyCandidate") = 1
        If cRec("detClass") = "UND_ONLY" Then vRec("verseHasUndOnlyCandidate") = 1
    End If
    AddSetValue vRec("verseClauseLabelSet"), cRec("clauseProvisionalLabel")
End Sub
Sub FinalizeVerse(ByRef vRec)
    ' Label precedence
    If vRec("verseHasPossibleCoreTarget") = 1 And vRec("verseHasAppoCandidate") = 0 Then
        vRec("verseProvisionalLabel") = "POSSIBLE_CORE_TARGET"
        AddReason vRec("verseLabelReason"), "possible_core_target_clause_present"
    ElseIf vRec("verseHasDetOnlyCandidate") = 1 _
       And vRec("verseHasUndOnlyCandidate") = 1 _
       And vRec("verseHasSplitChain") = 1 Then
        vRec("verseProvisionalLabel") = "INTERNAL_MIX_NEAR_MISS"
        AddReason vRec("verseLabelReason"), "mixed_det_across_split_clauses"
    ElseIf vRec("verseHasInternalMixNearMiss") = 1 Then
        vRec("verseProvisionalLabel") = "INTERNAL_MIX_NEAR_MISS"
        AddReason vRec("verseLabelReason"), "internal_mix_clause_present"
    ElseIf vRec("verseHasAppoCandidate") = 1 Then
        vRec("verseProvisionalLabel") = "APPOSITIVE_CONTROL"
        AddReason vRec("verseLabelReason"), "apposition_present"
    ElseIf vRec("verseHasCoordinationOnly") = 1 Then
        vRec("verseProvisionalLabel") = "COORDINATION_ONLY"
        AddReason vRec("verseLabelReason"), "coordination_only_clause_present"
    Else
        vRec("verseProvisionalLabel") = "NOISE"
    End If
    If vRec("verseProvisionalLabel") <> "NOISE" Then
        vRec("verseReviewNeeded") = "Y"
    Else
        vRec("verseReviewNeeded") = "N"
    End If
End Sub
' =========================================================
' Detection helpers
' =========================================================
Function IsFreeEt(ByVal gloss, ByVal txtH)
    IsFreeEt = False
    If LCase(Trim(gloss)) = "<object marker>" Then
        IsFreeEt = True
        Exit Function
    End If
End Function
Function IsSuffixedEt(ByVal gloss, ByVal lexemeV, ByVal txtH)
    Dim nLex
    nLex = NormalizeHebrew(lexemeV)
    IsSuffixedEt = False
    If LCase(Trim(gloss)) = "<object marker>" Then Exit Function
    If nLex = "את" Then
        If NormalizeHebrew(txtH) <> "את" Then
            IsSuffixedEt = True
            Exit Function
        End If
    End If
End Function
Function ClauseEndsWithSofPasuq(ByVal s)
    Dim t
    t = Trim(s)
    ClauseEndsWithSofPasuq = 0
    If Len(t) = 0 Then Exit Function
    If Right(t, 1) = "׃" Then
        ClauseEndsWithSofPasuq = 1
    End If
End Function
Function NormalizeHebrew(ByVal s)
    Dim i, ch, code, out
    out = ""
    For i = 1 To Len(s)
        ch = Mid(s, i, 1)
        code = AscW(ch)
        If code < 0 Then code = code + 65536
        ' keep Hebrew letters only
        If code >= 1488 And code <= 1514 Then
            out = out & ch
        End If
    Next
    NormalizeHebrew = out
End Function
' =========================================================
' Generic helpers
' =========================================================
Sub AddReason(ByRef pipeString, ByVal reasonText)
    If Trim(reasonText) = "" Then Exit Sub
    If pipeString = "" Then
        pipeString = reasonText
    ElseIf InStr(1, pipeString, reasonText, vbTextCompare) = 0 Then
        pipeString = pipeString & " | " & reasonText
    End If
End Sub
Sub AddSetValue(ByRef pipeString, ByVal valueText)
    If Trim(valueText) = "" Then Exit Sub
    If UCase(Trim(valueText)) = "NOISE" Then Exit Sub
    If pipeString = "" Then
        pipeString = valueText
    ElseIf InStr(1, pipeString, valueText, vbTextCompare) = 0 Then
        pipeString = pipeString & " | " & valueText
    End If
End Sub
Sub AddCount(ByRef d, ByVal keyText)
    If Trim(keyText) = "" Then keyText = "(blank)"
    If d.Exists(keyText) Then
        d(keyText) = CLng(d(keyText)) + 1
    Else
        d.Add keyText, 1
    End If
End Sub
Sub AddPipeReasonsToCounts(ByRef d, ByVal sourceName, ByVal reasonText)
    If Trim(reasonText) = "" Then Exit Sub
    Dim parts, i, k
    parts = Split(reasonText, "|")
    For i = 0 To UBound(parts)
        k = Trim(parts(i))
        If k <> "" Then
            AddCount d, sourceName & "|" & k
        End If
    Next
End Sub
' =========================================================
' Writers
' =========================================================
Sub WritePhraseSummary(filePath, ByRef phrases)
    Dim sb, k, pRec
    sb = "book,chapter,verse,clause#,phrase#,function,determination,subphrase_count,contains_free_et,contains_suffixed_et,starts_with_waw,has_appo,has_par,phrase_text_h,rela_values" & vbCrLf
    For Each k In phrases.Keys
        Set pRec = phrases(k)
        sb = sb _
            & Csv(pRec("book")) & "," _
            & Csv(pRec("chapter")) & "," _
            & Csv(pRec("verse")) & "," _
            & Csv(pRec("clauseNo")) & "," _
            & Csv(pRec("phraseNo")) & "," _
            & Csv(pRec("function")) & "," _
            & Csv(pRec("determination")) & "," _
            & Csv(pRec("subphraseCount")) & "," _
            & Csv(pRec("containsFreeEt")) & "," _
            & Csv(pRec("containsSuffixedEt")) & "," _
            & Csv(pRec("startsWithWaw")) & "," _
            & Csv(pRec("hasAppo")) & "," _
            & Csv(pRec("hasPar")) & "," _
            & Csv(pRec("phraseTextH")) & "," _
            & Csv(pRec("relaValues")) & vbCrLf
    Next
    WriteUtf8Text filePath, sb
End Sub
Sub WriteClauseSummary(filePath, ByRef clauses)
    Dim sb, k, cRec
    sb = "book,chapter,verse,clause#,phrase_count,objc_phrase_count,free_et_count,suffixed_et_count,repeated_et,possible_mixed_determination,has_appo,likely_naming,objc_internal_parallel,max_objc_subphrase_count,det_class,objc_det_seq,possible_split_chain,clause_complete,clause_provisional_label,clause_review_needed,clause_label_reason,objc_phrase_texts,functions_seen,clause_text_h" & vbCrLf
    For Each k In clauses.Keys
        Set cRec = clauses(k)
        sb = sb _
            & Csv(cRec("book")) & "," _
            & Csv(cRec("chapter")) & "," _
            & Csv(cRec("verse")) & "," _
            & Csv(cRec("clauseNo")) & "," _
            & Csv(cRec("phraseCount")) & "," _
            & Csv(cRec("objcPhraseCount")) & "," _
            & Csv(cRec("freeEtCount")) & "," _
            & Csv(cRec("suffixedEtCount")) & "," _
            & Csv(cRec("repeatedEt")) & "," _
            & Csv(cRec("possibleMixedDetermination")) & "," _
            & Csv(cRec("hasAppo")) & "," _
            & Csv(cRec("likelyNaming")) & "," _
            & Csv(cRec("objcInternalParallel")) & "," _
            & Csv(cRec("maxObjcSubphraseCount")) & "," _
            & Csv(cRec("detClass")) & "," _
            & Csv(cRec("objcDetSeq")) & "," _
            & Csv(cRec("possibleSplitChain")) & "," _
            & Csv(cRec("clauseComplete")) & "," _
            & Csv(cRec("clauseProvisionalLabel")) & "," _
            & Csv(cRec("clauseReviewNeeded")) & "," _
            & Csv(cRec("clauseLabelReason")) & "," _
            & Csv(cRec("objcPhraseTexts")) & "," _
            & Csv(cRec("functionsSeen")) & "," _
            & Csv(cRec("clauseTextH")) & vbCrLf
    Next
    WriteUtf8Text filePath, sb
End Sub
Sub WriteClauseCandidates(filePath, ByRef clauses)
    Dim sb, k, cRec
    sb = "book,chapter,verse,clause#,clause_provisional_label,clause_review_needed,free_et_count,suffixed_et_count,repeated_et,possible_mixed_determination,has_appo,likely_naming,objc_internal_parallel,max_objc_subphrase_count,det_class,objc_det_seq,possible_split_chain,clause_label_reason,objc_phrase_texts,clause_text_h" & vbCrLf
    For Each k In clauses.Keys
        Set cRec = clauses(k)
        If cRec("clauseReviewNeeded") = "Y" Then
            sb = sb _
                & Csv(cRec("book")) & "," _
                & Csv(cRec("chapter")) & "," _
                & Csv(cRec("verse")) & "," _
                & Csv(cRec("clauseNo")) & "," _
                & Csv(cRec("clauseProvisionalLabel")) & "," _
                & Csv(cRec("clauseReviewNeeded")) & "," _
                & Csv(cRec("freeEtCount")) & "," _
                & Csv(cRec("suffixedEtCount")) & "," _
                & Csv(cRec("repeatedEt")) & "," _
                & Csv(cRec("possibleMixedDetermination")) & "," _
                & Csv(cRec("hasAppo")) & "," _
                & Csv(cRec("likelyNaming")) & "," _
                & Csv(cRec("objcInternalParallel")) & "," _
                & Csv(cRec("maxObjcSubphraseCount")) & "," _
                & Csv(cRec("detClass")) & "," _
                & Csv(cRec("objcDetSeq")) & "," _
                & Csv(cRec("possibleSplitChain")) & "," _
                & Csv(cRec("clauseLabelReason")) & "," _
                & Csv(cRec("objcPhraseTexts")) & "," _
                & Csv(cRec("clauseTextH")) & vbCrLf
        End If
    Next
    WriteUtf8Text filePath, sb
End Sub
Sub WriteVerseCandidates(filePath, ByRef verses)
    Dim sb, k, vRec
    sb = "book,chapter,verse,verse_clause_count,verse_candidate_clause_count,verse_free_et_total,verse_suffixed_et_total,verse_has_appo_candidate,verse_has_possible_core_target,verse_has_internal_mix_near_miss,verse_has_coordination_only,verse_has_split_chain,verse_has_det_only_candidate,verse_has_und_only_candidate,verse_clause_label_set,verse_provisional_label,verse_review_needed,verse_label_reason,manual_label,manual_notes,verse_text_h" & vbCrLf
    For Each k In verses.Keys
        Set vRec = verses(k)
        If vRec("verseReviewNeeded") = "Y" Then
            sb = sb _
                & Csv(vRec("book")) & "," _
                & Csv(vRec("chapter")) & "," _
                & Csv(vRec("verse")) & "," _
                & Csv(vRec("verseClauseCount")) & "," _
                & Csv(vRec("verseCandidateClauseCount")) & "," _
                & Csv(vRec("verseFreeEtTotal")) & "," _
                & Csv(vRec("verseSuffixedEtTotal")) & "," _
                & Csv(vRec("verseHasAppoCandidate")) & "," _
                & Csv(vRec("verseHasPossibleCoreTarget")) & "," _
                & Csv(vRec("verseHasInternalMixNearMiss")) & "," _
                & Csv(vRec("verseHasCoordinationOnly")) & "," _
                & Csv(vRec("verseHasSplitChain")) & "," _
                & Csv(vRec("verseHasDetOnlyCandidate")) & "," _
                & Csv(vRec("verseHasUndOnlyCandidate")) & "," _
                & Csv(vRec("verseClauseLabelSet")) & "," _
                & Csv(vRec("verseProvisionalLabel")) & "," _
                & Csv(vRec("verseReviewNeeded")) & "," _
                & Csv(vRec("verseLabelReason")) & "," _
                & Csv(vRec("manualLabel")) & "," _
                & Csv(vRec("manualNotes")) & "," _
                & Csv(vRec("verseTextH")) & vbCrLf
        End If
    Next
    WriteUtf8Text filePath, sb
End Sub
Sub WriteCountFile(filePath, headerLine, ByRef d)
    Dim sb, k
    sb = headerLine & vbCrLf
    For Each k In d.Keys
        sb = sb & Csv(k) & "," & Csv(d(k)) & vbCrLf
    Next
    WriteUtf8Text filePath, sb
End Sub
Sub WriteReasonCountFile(filePath, ByRef d)
    Dim sb, k, parts
    sb = "source,reason,count" & vbCrLf
    For Each k In d.Keys
        parts = Split(k, "|", 2)
        If UBound(parts) = 1 Then
            sb = sb & Csv(parts(0)) & "," & Csv(parts(1)) & "," & Csv(d(k)) & vbCrLf
        Else
            sb = sb & Csv("") & "," & Csv(k) & "," & Csv(d(k)) & vbCrLf
        End If
    Next
    WriteUtf8Text filePath, sb
End Sub
' =========================================================
' CSV / header helpers
' =========================================================
Sub BuildHeaderMap(ByVal headerLine, ByRef d)
    Dim arr, i, raw, h
    Dim seenCounts
    Set seenCounts = CreateObject("Scripting.Dictionary")
    seenCounts.CompareMode = 1
    arr = ParseCsvLine(headerLine)
    For i = 0 To UBound(arr)
        raw = arr(i)
        If i = 0 Then raw = StripBom(raw)
        h = Trim(raw)
        If h = "" Then
            h = "__blank_" & CStr(i)
            If Not d.Exists(h) Then d.Add h, i
        Else
            If Not d.Exists(h) Then
                d.Add h, i
                seenCounts.Add h, 1
            Else
                seenCounts(h) = CLng(seenCounts(h)) + 1
                d.Add h & "__" & CStr(seenCounts(h)), i
            End If
        End If
    Next
End Sub
Function StripBom(ByVal s)
    If Len(s) > 0 Then
        If AscW(Left(s, 1)) = &HFEFF Then
            StripBom = Mid(s, 2)
            Exit Function
        End If
    End If
    StripBom = s
End Function
Function GetField(ByVal arr, ByRef map, ByVal fieldName)
    If map.Exists(fieldName) Then
        If map(fieldName) <= UBound(arr) Then
            GetField = arr(map(fieldName))
        Else
            GetField = ""
        End If
    Else
        GetField = ""
    End If
End Function
Function ParseCsvLine(ByVal line)
    Dim result(), i, ch, inQuotes, cur, n
    ReDim result(0)
    n = 0
    cur = ""
    inQuotes = False
    For i = 1 To Len(line)
        ch = Mid(line, i, 1)
        If ch = """" Then
            If inQuotes And i < Len(line) And Mid(line, i + 1, 1) = """" Then
                cur = cur & """"
                i = i + 1
            Else
                inQuotes = Not inQuotes
            End If
        ElseIf ch = "," And Not inQuotes Then
            result(n) = cur
            n = n + 1
            ReDim Preserve result(n)
            cur = ""
        Else
            cur = cur & ch
        End If
    Next
    result(n) = cur
    ParseCsvLine = result
End Function
Function Csv(ByVal s)
    s = CStr(s)
    s = Replace(s, """", """""")
    Csv = """" & s & """"
End Function
' =========================================================
' UTF-8 helpers
' =========================================================
Function ReadUtf8Lines(filePath)
    Dim stm, text
    Set stm = CreateObject("ADODB.Stream")
    stm.Type = 2
    stm.Charset = "utf-8"
    stm.Open
    stm.LoadFromFile filePath
    text = stm.ReadText
    stm.Close
    Set stm = Nothing
    text = Replace(text, vbCrLf, vbLf)
    text = Replace(text, vbCr, vbLf)
    ReadUtf8Lines = Split(text, vbLf)
End Function
Sub WriteUtf8Text(filePath, text)
    Dim stm
    Set stm = CreateObject("ADODB.Stream")
    stm.Type = 2
    stm.Charset = "utf-8"
    stm.Open
    stm.WriteText text
    stm.SaveToFile filePath, 2
    stm.Close
    Set stm = Nothing
End Sub