Step 4: Data cleaning (if needed)
# Use pandas's functions to clean/process data
df_data['Gender_new'] = df_data['Gender']
df_data.replace({
'Q1_SP1': {1: 5, 2: 4, 3: 3, 4: 2, 5: 1},
'Q1_SP2': {1: 5, 2: 4, 3: 3, 4: 2, 5: 1},
}, inplace=True)
df_data.loc[(df_data['Gender_new'] == 2) & (df_data['Age'] == 5), ['Gender_new']] = [np.nan]
df_info.loc[df_info['var_name'] == 'Q1_SP1', ['val_lbl']] = [{'1': 'a', '2': 'b', '3': 'c', '4': 'd', '5': 'e'}]
df_info = pd.concat([df_info, pd.DataFrame(
columns=['var_name', 'var_lbl', 'var_type', 'val_lbl'],
data=[
['Gender_new', 'Please indicate your gender', 'SA', {'1': 'aaa', '2': 'bb', '3': 'cc'}]
]
)], ignore_index=True)
Step 8: Export data tables
- init DataTableGenerator(df_data=df_data, df_info=df_info, xlsx_name=str_tbl_file_name)
- df_data: pandas.Dataframe
- df_info: pandas.Dataframe
- xlsx_name: str
- Returns: NONE
- class DataTableGenerator.run_tables_by_js_files(lst_func_to_run)
- lst_func_to_run: list
- Returns: NONE
- init TableFormatter(xlsx_name=str_tbl_file_name)
- xlsx_name: str
- Returns: NONE
- class TableFormatter.format_sig_table()
lst_side_qres = [
{"qre_name": "CC1", "sort": "des"},
{"qre_name": "$CC3", "sort": "asc"},
{"qre_name": "$CC4", "sort": "des"},
{"qre_name": "$CC6"},
{"qre_name": "$CC10"},
{"qre_name": "LSM"},
{"qre_name": "Gender"},
{"qre_name": "Age"},
{"qre_name": "City"},
{"qre_name": "HHI"},
# MA Question with net/combine (can apply to SA questions)
{"qre_name": "$Q15", "cats": {
'net_code': {
'900001|combine|Group 1 + 2': {
'1': 'Yellow/dull teeth',
'3': 'Dental plaque',
'5': 'Bad breath',
'7': 'Aphthousulcer',
'2': 'Sensitive teeth',
'4': 'Caries',
'6': 'Gingivitis (bleeding, swollen gums)',
},
'900002|net|Group 1': {
'1': 'Yellow/dull teeth',
'3': 'Dental plaque',
'5': 'Bad breath',
'7': 'Aphthousulcer',
},
'900003|net|Group 2': {
'2': 'Sensitive teeth',
'4': 'Caries',
'6': 'Gingivitis (bleeding, swollen gums)',
},
},
'8': 'Other (specify)',
'9': 'No problem',
}},
# Scale question with full properties
{
"qre_name": "Perception",
"cats": {
'1': 'Totally disagree', '2': 'Disagree', '3': 'Neutral', '4': 'Agree', '5': 'Totally agree',
'net_code': {
'900001|combine|B2B': {'1': 'Totally disagree', '2': 'Disagree'},
'900002|combine|Medium': {'3': 'Neutral'},
'900003|combine|T2B': {'4': 'Agree', '5': 'Totally agree'},
}
},
"mean": {1: 1, 2: 2, 3: 3, 4: 4, 5: 5}
},
]
lst_header_qres = [
[
{
"qre_name": "Age",
"qre_lbl": "Age",
"cats": {
'TOTAL': 'TOTAL',
'2': '18 - 24', '3': '25 - 30', '4': '31 - 39', '5': '40 - 50', '6': 'Trên 50'
}
},
{
"qre_name": "@City2",
"qre_lbl": "Location",
"cats": {
'City.isin([1, 5, 10, 11, 12])': 'All South',
'City.isin([2, 4, 16, 17, 18])': 'All North',
}
},
],
]
lst_func_to_run = [
{
'func_name': 'run_standard_table_sig',
'tables_to_run': [
'Tbl_1_Pct', # this table use df_data & df_info to run
'Tbl_1_Count', # this table use df_data & df_info to run
],
'tables_format': {
"Tbl_1_Pct": {
"tbl_name": "Table 1 - Pct",
"tbl_filter": "City > 0",
"is_count": 0,
"is_pct_sign": 1,
"is_hide_oe_zero_cats": 1,
"sig_test_info": {
"sig_type": "", # ind / rel
"sig_cols": [],
"lst_sig_lvl": []
},
"lst_side_qres": lst_side_qres,
"lst_header_qres": lst_header_qres
},
"Tbl_1_Count": {
"tbl_name": "Table 1 - Count",
"tbl_filter": "City > 0",
"is_count": 1,
"is_pct_sign": 0,
"is_hide_oe_zero_cats": 1,
"sig_test_info": {
"sig_type": "",
"sig_cols": [],
"lst_sig_lvl": []
},
"lst_side_qres": lst_side_qres,
"lst_header_qres": lst_header_qres
},
},
},
]
dtg = DataTableGenerator(df_data=df_data, df_info=df_info, xlsx_name=str_tbl_file_name)
dtg.run_tables_by_js_files(lst_func_to_run)
dtf = TableFormatter(xlsx_name=str_tbl_file_name)
dtf.format_sig_table()