- Open the Excel file that you want to convert to single-line format.
- Save the file as an Excel Macro-Enabled Workbook.
- Back in this Word file, select all of the green text on pages 5 through 23and copy it.
- In Excel, right click on the tab at the bottom of the screen that corresponds to the data.
- In the menu that just opened, click View Code.
- This will open a new window that will look something like the one below.
- Arrange the new window and the normal Excel worksheet window so that you can see both of them.
- Paste the text you copied earlier into the big white part of the new window.
- Save the file.
- Click the green play button to start the program.
- Follow the prompts.
- If you mess up and want to start over, that is fine. Just click the green play button to restart the process.
Sub DatabaseCleanUp()
'From user input coverts data into single line format one header section at a time
'*****************************************************************************
'* This code was created by Jacob W. Roden-Foreman during his employment *
'* at Baylor Scott and White Research Institute *
'* and completed on June 2, 2016. *
'* *
'* It removes extra spaces to reduce file size, merges headers that are *
'* split across multiple rows, then converts the database into single *
'* line format one header section at a time. *
'*****************************************************************************
'********BLOCK START: PROGRESS BAR
Dim x As Long
Dim Timer As Long
Application.StatusBar = False
'********BLOCK END: PROGRESS BAR
'********BLOCK START: INTRO
Output = MsgBox("This program requires that you pay attention to and actually read ALL of the instructions that are displayed." & _
" If you don't follow the instructions carefully, you will end up ruining the data.", vbExclamation, "BEFORE YOU START")
If MsgBox("Are you actually going to follow the instructions TO THE LETTER?", vbYesNo) = vbNo Then
MsgBox ("Why did I even bother making this program then?")
Exit Sub
End If
MsgBox ("Good. Let's begin.")
'********BLOCK END: INTRO
'********BLOCK START: CHECK FOR FORMULAS
Dim UsedCells As Range
Dim FormulaCounter As Long
Dim onecell As Range
Set UsedCells = ActiveSheet.UsedRange
On Error Resume Next
FormulaCounter = UsedCells.SpecialCells(xlCellTypeFormulas, 23).Count
x = 1
If FormulaCounter > 0 Then
UsedCells.SpecialCells(xlCellTypeFormulas, 23).Interior.ColorIndex = 9
UsedCells.FormatConditions.Add Type:=xlExpression, Formula1:="=ISFORMULA(A1)"
UsedCells.FormatConditions(UsedCells.FormatConditions.Count).SetFirstPriority
With UsedCells.FormatConditions(1).Interior
.ColorIndex = 9
End With
UsedCells.FormatConditions(1).StopIfTrue = False
UsedCells.SpecialCells(xlCellTypeFormulas, 23).Select
MsgBox ("There are " & FormulaCounter & " cells containing formulas. They are selected & highlighted in dark red." & _
vbNewLine & vbNewLine & "Please change those cell formulas to values then restart the macro.")
Exit Sub
End If
'********BLOCK END: CHECK FOR FORMULAS
'********BLOCK START: COPY
Dim rng As Range
Dim SheetName As String
If MsgBox("Do you want to make a copy of the worksheet in case something goes wrong?", vbYesNo) = vbYes Then
SheetName = ActiveSheet.Name
Worksheets(SheetName).Copy After:=Sheets(SheetName)
Worksheets(SheetName).Activate
End If
'********BLOCK END: COPY
'********BLOCK START: TRIM
'This block removes all the extra spaces in the active spreadsheet
Dim rng1 As Range
Set rng1 = Range("A4")
Dim rng2 As Range
Dim rng3 As Range
ActiveWindow.ScrollColumn = 1
ActiveWindow.ScrollRow = 1
Cells(1, 1).Select
If MsgBox("Do you want to get rid of any extra spaces in the worksheet? This can reduce the file size and make " & _
"the program run faster.", vbYesNo) = vbYes Then
MsgBox ("The program is now going to go through and remove extra spaces in the worksheet to reduce file size." & vbNewLine & vbNewLine & _
"This part can take a while and it might freeze the program." & vbNewLine & vbNewLine & _
"Don't worry, it's working and it will give you a confirmation box at the end." & vbNewLine & vbNewLine & _
"Just don't close the file.")
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
.EnableEvents = False
Set rng1 = Cells.Find("*", [A1], , , xlByRows, xlPrevious)
Set rng2 = Cells.Find("*", [A1], , , xlByColumns, xlPrevious)
Set rng3 = Range([A1], Cells(rng1.Row, rng2.Column))
x = 1
Timer = UsedCells.Count
For Each onecell In UsedCells.Cells
onecell.Value = Trim(onecell.Value)
If x Mod 500 = 0 Then
Application.StatusBar = "Removing spaces: " & x & " of " & Timer & ": " & Format(x / Timer, "0.000%")
End If
x = x + 1
Next onecell
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
.EnableEvents = True
End With
Application.StatusBar = False
MsgBox "The cells " & rng3.Address(0, 0) & " have had the extra spaces removed."
End If
GoTo MergeHeaders
MergeHeaders:
ActiveWindow.ScrollColumn = 52
'*******BLOCK END: TRIM
'********BLOCK START: MERGE HEADERS
'This block merges headers that are split across multiple rows
Dim TopHeaders As Range
Dim NewHeader As Range
Dim BottomHeaders As Range
Dim HeadersWidth As Long
Dim Headers As Range
Dim FirstHeader As Range
Dim LastTopHeader As Range
Dim LastHeader As Range
Dim i As Long
ActiveWindow.ScrollColumn = 1
ActiveWindow.ScrollRow = 1
If MsgBox("Are all of the header labels on a single row?" & vbNewLine & vbNewLine & _
"Header labels are things like Patient Number and Risk Code." & _
vbNewLine & "Often they will be split accross two or three rows.", vbYesNo) = vbNo Then
Set Headers = Application.InputBox("Click and drag a box arround ALL of the cells that make up all of the column headers." & vbNewLine & _
"This could include multiple rows and columns.", Type:=8)
Set FirstHeader = Range("2D")
Set FirstHeader = Headers.Find("*", , , , xlByRows, xlNext)
Set FirstHeader = FirstHeader.Offset(0, -1) 'set to first cell
HeaderHeight = Headers.Rows.Count
HeaderWidth = Headers.Columns.Count
Set LastTopHeader = FirstHeader.Offset(0, (HeaderWidth - 1))
Set TopHeaders = Range(FirstHeader, LastTopHeader)
Set LastHeader = FirstHeader.Offset((HeaderHeight - 1), (HeaderWidth - 1))
Set BottomHeaders = Range(FirstHeader.Offset(1, 0), LastHeader)
x = 70
x = 1
Timer = TopHeaders.Count
For Each onecell In TopHeaders.Cells
i = 1
Do While i < HeaderHeight
onecell.Value = onecell.Value & " " & onecell.Offset(i, 0).Value
i = i + 1
Loop
Application.StatusBar = "Converting headers: " & x & " of " & Timer & ": " & Format(x / Timer, "0.0%")
x = x + 1
Next onecell
BottomHeaders.ClearContents
For Each onecell In TopHeaders.Cells
With onecell
.Value = Trim(.Value)
End With
Next onecell
TopHeaders.ColumnWidth = 17
Application.StatusBar = False
End If
'********BLOCK END: MERGE HEADERS
'********BLOCK START: MOVE
Dim lRow As Long
Dim LastRow As Range
Dim HeadersLength As Long
Dim HeadersHeight As Long
Dim RepeatNum As Long
Dim Label As Long
Dim MaxPtRow As Long
Dim CountCells As Range
Dim Count As Long
Dim MaxCount As Long
Dim MaxVal As Long
Dim CellsToMove As Range
Dim FirstPtTrigger As Long
Dim NewPtTrigger As Long
Dim iRow As Long
Dim iCol As Long
Dim tester As Range
Dim PtRow As Long
Dim CountA As Long
Dim MoveRange As Range
Dim TwoCell As Range
Dim Uniques As Range
If MsgBox("Are all of the cells in single line format (one row per patient)?", vbYesNo) = vbYes Then
GoTo DoneWithMove
End If
ActiveWindow.ScrollColumn = 1
ActiveWindow.ScrollRow = 1
Cells(1, 1).EntireColumn.Select
If MsgBox("In Column A (the column that is selected), is there something in the cells that indicates that there is a new " & _
"patient on that line (something like a patient number or name that only occurs at the top section for " & _
"each patient)?", vbYesNo) = vbNo Then
Cells(1, 1).Select
If MsgBox("Is there a column that indicates that there is a new patient on that line (something like a patient " & _
"number or name that only occurs at the top section for each patient) somewhere else?", vbYesNo) = vbNo Then
MsgBox ("There must be something like that for the code to work. Code execution is ending.")
Exit Sub
Else
Set Uniques = Application.InputBox("Select the column containing the new patient indicators.", Type:=8)
End If
End If
If IsNull(TopHeaders) Or TopHeaders Is Nothing Then
If MsgBox("This next part is going to ask you to select header sections." & vbNewLine & vbNewLine & _
"Header sections label cells that belong to the same category." & vbNewLine & _
"For example, under the header " & Chr(34) & "Risk Code" & Chr(34) & ", a value might be " & Chr(34) & "Other" & Chr(34) & "." & _
"The " & Chr(34) & "Risk Factor Desc" & Chr(34) & " on the same row might give more detail about that " & Chr(34) & "Other" & Chr(34) & ", so you would want " & _
"to keep the headers " & Chr(34) & "Risk Code" & Chr(34) & " and " & Chr(34) & "Risk Factor Desc" & Chr(34) & " together by selecting those two column headers " & _
"(the cells that contain the text " & Chr(34) & "Risk Code" & Chr(34) & " and " & Chr(34) & "Risk Factor Desc" & Chr(34) & "). " & vbNewLine & vbNewLine & _
"Another set of headers that make one section would be " & Chr(34) & "Providers Code" & Chr(34) & ", " & Chr(34) & "Providers Name" & Chr(34) & ", and " & Chr(34) & "Providers Type" & Chr(34) & ". " & _
"Other header sections are things like all of the vital signs info, all of the procedures info, or all of the " & _
"diagnosis info." & vbNewLine & vbNewLine & "Does that make sense?", vbYesNo) = vbNo Then
MsgBox ("Get in contact with Jake Roden-Foreman. He will help you.")
Exit Sub
End If
Else
If MsgBox("This next part is going to ask you to select header sections." & vbNewLine & vbNewLine & _
"Header sections label cells that belong to the same category." & vbNewLine & _
"For example, under the header " & Chr(34) & "Risk Code" & Chr(34) & ", a value might be " & Chr(34) & "Other" & Chr(34) & "." & _
"The " & Chr(34) & "Risk Factor Desc" & Chr(34) & " on the same row might give more detail about that " & Chr(34) & "Other" & Chr(34) & ", so you would want " & _
"to keep the headers " & Chr(34) & "Risk Code" & Chr(34) & " and " & Chr(34) & "Risk Factor Desc" & Chr(34) & " together by selecting those two column headers " & _
"(the cells that say " & Chr(34) & "Risk Code" & Chr(34) & " and " & Chr(34) & "Risk Factor Desc" & Chr(34) & " on row " & TopHeaders.Row & "). " & vbNewLine & vbNewLine & _
"Another set of headers that make one section would be " & Chr(34) & "Providers Code" & Chr(34) & ", " & Chr(34) & "Providers Name" & Chr(34) & ", and " & Chr(34) & "Providers Type" & Chr(34) & ". " & _
"Other header sections are things like all of the vital signs info, all of the procedures info, or all of the " & _
"diagnosis info." & vbNewLine & vbNewLine & "Does that make sense?", vbYesNo) = vbNo Then
MsgBox ("Get in contact with Jake Roden-Foreman. He will help you.")
Exit Sub
End If
End If
Cells(1, 1).Select
AnotherHeaderSection:
'get header section, insert columns, and add a count
Set Headers = Nothing
On Error Resume Next
Set Headers = Application.InputBox("Select the labels for a header section." & vbNewLine & _
"You only have to do this for columns that contain multiple data points per patient.", Type:=8)
Err.Clear
On Error GoTo MoreHeaders
HeadersHeight = Headers.Rows.Count
HeadersLength = Headers.Columns.Count
If HeadersHeight > 1000 Then Set Headers = Range(Cells(1, Headers.Column), Cells(1, (Headers.Column + HeadersLength - 1)))
If HeadersLength = 1 Then
If MsgBox("You selected the header " & Chr(34) & Cells(Headers.Row, Headers.Column) & Chr(34) & " at " & _
Cells(Headers.Row, Headers.Column).Address(RowAbsolute:=False, ColumnAbsolute:=False) & vbNewLine & vbNewLine & _
"Is that what you wanted?", vbYesNo) = vbNo Then
GoTo AnotherHeaderSection
End If
End If
If HeadersLength > 1 Then
If MsgBox("You selected the headers " & Chr(34) & Cells(Headers.Row, Headers.Column) & Chr(34) & " at " & _
Cells(Headers.Row, Headers.Column).Address(RowAbsolute:=False, ColumnAbsolute:=False) & " through " & Chr(34) & Cells(Headers.Row, (Headers.Column + HeadersLength - 1)) & _
Chr(34) & " at " & Cells(Headers.Row, (Headers.Column + HeadersLength - 1)).Address(RowAbsolute:=False, ColumnAbsolute:=False) & vbNewLine & vbNewLine & _
"Is that what you wanted?", vbYesNo) = vbNo Then
GoTo AnotherHeaderSection
End If
End If
Dim Dest As Range
Dim NewHeaders As Range
Dim C As Long
Dim DeleteCounter As Long
If Uniques Is Nothing Then
Cells(Headers.Row, Headers.Column).EntireColumn.Insert
Set LastRow = Cells.Find("*", [A1], , , xlByRows, xlPrevious)
lRow = LastRow.Row
Set CountCells = Range(Cells(Headers.Row + 1, Headers.Column - 1), Cells(lRow, Headers.Column - 1))
CountCells.ClearFormats
Set CellsToMove = Range(Headers.Offset(1, 0), Cells(lRow, Headers.Offset(1, 0).Column))
x = 1
Timer = CountCells.Rows.Count
MaxVal = 1
If Timer > 100000 Then
MsgBox "This might take a while and Excel might appear to freeze. Just give it a couple of minutes before you try anything."
End If
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
For Each onecell In CountCells 'Range(Headers.Offset(1, -1), Cells(lRow, Headers.Offset(1, -1).Column))
If x Mod 250 = 0 Then
Application.StatusBar = "Counting values: " & x & " of " & Timer & ": " & Format(x / Timer, "0.0%")
End If
x = x + 1
CountA = Application.WorksheetFunction.CountA(Range(onecell.Offset(0, 1), onecell.Offset(0, HeadersLength)))
If onecell.Offset(0, (1 - onecell.Column)) > "" Then onecell = 1
'if this is the first line for a patient, the count is 1
If onecell.Offset(0, (1 - onecell.Column)) = "" Then
onecell = onecell.Offset(-1, 0).Value + 1
End If
If onecell.Value > MaxVal Then
MaxVal = onecell.Value
End If
BothBlank:
Next onecell
MaxCount = MaxVal
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
End With
Application.StatusBar = False
'Using the input, repeat the header section MaxCount many times
i = 1
Label = MaxCount
x = 1
Timer = MaxCount
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
Do While i < MaxCount
Headers.EntireColumn.Offset(0, HeadersLength).Insert
Headers.EntireColumn.Offset(0, HeadersLength).ClearFormats
i = i + 1
For Each onecell In Headers.Cells
onecell.Offset(0, HeadersLength).Value = onecell.Value & " " & Label
Next onecell
Label = Label - 1
Application.StatusBar = "Making new header labels: " & x & " of " & Timer & ": " & Format(x / Timer, "0.0%")
x = x + 1
Loop
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
End With
Application.StatusBar = False
'Below is the move section
i = 1
iRow = 0
iCol = 0
FirstPtTrigger = 0
NewPtTrigger = 0
x = 1
Timer = CountCells.Rows.Count
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
For Each onecell In CountCells
If onecell.Row Mod 250 = 0 Then
Application.StatusBar = "Moving cells: " & onecell.Row & " of " & Timer & ": " & Format(onecell.Row / Timer, "0.0%")
End If
If onecell.Value = "" Or onecell.Value = 1 Or onecell.Offset(0, (1 - onecell.Column)).Value > "" Then
'if the count is blank, the first count for a patient, or this is a new patient, do nothing
GoTo NextCell
End If
If onecell.Offset(0, (1 - onecell.Column)).Value = "" Then 'if this is not a new patient line, move the values
Set MoveRange = Range(onecell.Offset(0, 1), onecell.Offset(0, HeadersLength)) 'establish the cells to move
Set Dest = MoveRange.Offset(1 - (onecell.Value), (HeadersLength * (onecell.Value - 1))) 'establish where to move them
For Each TwoCell In MoveRange 'change the formatting in the Dest cells
TwoCell.Offset(1 - (onecell.Value), (HeadersLength * (onecell.Value - 1))).NumberFormat = TwoCell.NumberFormat 'establish where to move them
Next TwoCell
Dest.Value = MoveRange.Value 'move the values
MoveRange.Clear 'clear the old values
End If
NextCell:
Next onecell
CountCells.EntireColumn.Delete
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
End With
Application.StatusBar = False
'for each cell in Headers, if the COUNTA of the column = 1 then entirecolumn.delete
DeleteCounter = 0
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
Set NewHeaders = Range(Cells(Headers.Row, Headers.Column), Cells(Headers.Row, (Headers.Column + ((HeadersLength * MaxCount) - 1))))
For C = NewHeaders.Cells.Count To 1 Step -1
CountA = Application.WorksheetFunction.CountA(Range(Cells(NewHeaders.Item(C).Row, NewHeaders.Item(C).Column), Cells((NewHeaders.Item(C).Row + 100000), NewHeaders.Item(C).Column)))
If CountA = 1 Then
NewHeaders.Item(C).EntireColumn.Delete
DeleteCounter = DeleteCounter + 1
End If
If CountA > 1 Then Exit For
Next C
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
End With
Cells(Headers.Row, (Headers.Column + ((HeadersLength * MaxCount) - DeleteCounter))).Select
ActiveWindow.ScrollColumn = (Headers.Column + ((HeadersLength * MaxCount)) - DeleteCounter) - 3
ActiveWindow.ScrollRow = 1
MoreHeaders:
If MsgBox("Is there another section of headers that you want to convert to single line format?", vbYesNo) = vbYes Then
GoTo AnotherHeaderSection
Else
GoTo DeleteBlankRows
End If
End If
If Not Uniques Is Nothing Then
Cells(Headers.Row, Headers.Column).EntireColumn.Insert
Set LastRow = Cells.Find("*", [A1], , , xlByRows, xlPrevious)
lRow = LastRow.Row
Set CountCells = Range(Cells(Headers.Row + 1, Headers.Column - 1), Cells(lRow, Headers.Column - 1))
CountCells.ClearFormats
Set CellsToMove = Range(Headers.Offset(1, 0), Cells(lRow, Headers.Offset(1, 0).Column))
x = 1
Timer = CountCells.Rows.Count
MaxVal = 1
If Timer > 100000 Then
MsgBox "This might take a while and Excel might appear to freeze. Just give it a couple of minutes before you try anything."
End If
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
For Each onecell In CountCells 'Range(Headers.Offset(1, -1), Cells(lRow, Headers.Offset(1, -1).Column))
If x Mod 250 = 0 Then
Application.StatusBar = "Counting values: " & x & " of " & Timer & ": " & Format(x / Timer, "0.0%")
End If
x = x + 1
CountA = Application.WorksheetFunction.CountA(Range(onecell.Offset(0, 1), onecell.Offset(0, HeadersLength)))
If Cells(onecell.Row, Uniques.Column) > "" Then onecell = 1
'if this is the first line for a patient, the count is 1
If Cells(onecell.Row, Uniques.Column) = "" Then
onecell = onecell.Offset(-1, 0).Value + 1
End If
If onecell.Value > MaxVal Then
MaxVal = onecell.Value
End If
BothBlank2:
Next onecell
MaxCount = MaxVal
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
End With
Application.StatusBar = False
'Using the input, repeat the header section MaxCount many times
i = 1
Label = MaxCount
x = 1
Timer = MaxCount
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
Do While i < MaxCount
Headers.EntireColumn.Offset(0, HeadersLength).Insert
Headers.EntireColumn.Offset(0, HeadersLength).ClearFormats
i = i + 1
For Each onecell In Headers.Cells
onecell.Offset(0, HeadersLength).Value = onecell.Value & " " & Label
Next onecell
Label = Label - 1
Application.StatusBar = "Making new header labels: " & x & " of " & Timer & ": " & Format(x / Timer, "0.0%")
x = x + 1
Loop
.Calculation = xlCalculationAutomatic
.ScreenUpdating = True
End With
Application.StatusBar = False
'Below is the move section
i = 1
iRow = 0
iCol = 0
FirstPtTrigger = 0
NewPtTrigger = 0
x = 1
Timer = CountCells.Rows.Count
With Application
.Calculation = xlCalculationManual
.ScreenUpdating = False
For Each onecell In CountCells
If onecell.Row Mod 250 = 0 Then