[{"data":1,"prerenderedAt":611},["ShallowReactive",2],{"article-ai\u002Fai-drug-week06-tox21-multilabel":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"tags":11,"listed":15,"body":16,"_type":605,"_id":606,"_source":607,"_file":608,"_stem":609,"_extension":610},"\u002Farticles\u002Fai\u002Fai-drug-week06-tox21-multilabel","ai",false,"","第 6 周：Tox21 多标签毒性预测","记录从 ESOL 回归转向 Tox21 多标签分类时，模型输出、loss、mask 和评估指标的变化。","2026-06-03",[12,13,14],"AI制药","深度学习","人工智能",true,{"type":17,"children":18,"toc":597},"root",[19,27,33,193,198,203,208,213,218,232,237,251,256,263,268,273,278,292,297,303,324,344,349,363,368,373,378,383,414,419,425,430,435,502,507,512,517,522,527,532,537,542,581,586,591],{"type":20,"tag":21,"props":22,"children":24},"element","h1",{"id":23},"前情提要",[25],{"type":26,"value":23},"text",{"type":20,"tag":28,"props":29,"children":30},"p",{},[31],{"type":26,"value":32},"正在做一个AI制药平台项目，前端使用vue3、后端采用双后端(主后端Java、SpringBoot做业务，次后端Python、FastAPI做AI服务)",{"type":20,"tag":34,"props":35,"children":39},"pre",{"className":36,"code":37,"language":38,"meta":7,"style":7},"language-plain shiki shiki-themes github-dark","Vue3 前端\n  |\n  | REST API\n  v\nSpringBoot 主后端\n  |\n  | 任务管理 \u002F 数据管理\n  v\nPostgreSQL + Redis\n  |\n  | 调用\n  v\nPython AI Service\n  |\n  | RDKit \u002F PyTorch \u002F PyG \u002F scikit-learn\n  v\n模型推理与分子计算\n","plain",[40],{"type":20,"tag":41,"props":42,"children":43},"code",{"__ignoreMap":7},[44,55,64,73,82,91,99,108,116,125,133,142,150,159,167,176,184],{"type":20,"tag":45,"props":46,"children":49},"span",{"class":47,"line":48},"line",1,[50],{"type":20,"tag":45,"props":51,"children":52},{},[53],{"type":26,"value":54},"Vue3 前端\n",{"type":20,"tag":45,"props":56,"children":58},{"class":47,"line":57},2,[59],{"type":20,"tag":45,"props":60,"children":61},{},[62],{"type":26,"value":63},"  |\n",{"type":20,"tag":45,"props":65,"children":67},{"class":47,"line":66},3,[68],{"type":20,"tag":45,"props":69,"children":70},{},[71],{"type":26,"value":72},"  | REST API\n",{"type":20,"tag":45,"props":74,"children":76},{"class":47,"line":75},4,[77],{"type":20,"tag":45,"props":78,"children":79},{},[80],{"type":26,"value":81},"  v\n",{"type":20,"tag":45,"props":83,"children":85},{"class":47,"line":84},5,[86],{"type":20,"tag":45,"props":87,"children":88},{},[89],{"type":26,"value":90},"SpringBoot 主后端\n",{"type":20,"tag":45,"props":92,"children":94},{"class":47,"line":93},6,[95],{"type":20,"tag":45,"props":96,"children":97},{},[98],{"type":26,"value":63},{"type":20,"tag":45,"props":100,"children":102},{"class":47,"line":101},7,[103],{"type":20,"tag":45,"props":104,"children":105},{},[106],{"type":26,"value":107},"  | 任务管理 \u002F 数据管理\n",{"type":20,"tag":45,"props":109,"children":111},{"class":47,"line":110},8,[112],{"type":20,"tag":45,"props":113,"children":114},{},[115],{"type":26,"value":81},{"type":20,"tag":45,"props":117,"children":119},{"class":47,"line":118},9,[120],{"type":20,"tag":45,"props":121,"children":122},{},[123],{"type":26,"value":124},"PostgreSQL + Redis\n",{"type":20,"tag":45,"props":126,"children":128},{"class":47,"line":127},10,[129],{"type":20,"tag":45,"props":130,"children":131},{},[132],{"type":26,"value":63},{"type":20,"tag":45,"props":134,"children":136},{"class":47,"line":135},11,[137],{"type":20,"tag":45,"props":138,"children":139},{},[140],{"type":26,"value":141},"  | 调用\n",{"type":20,"tag":45,"props":143,"children":145},{"class":47,"line":144},12,[146],{"type":20,"tag":45,"props":147,"children":148},{},[149],{"type":26,"value":81},{"type":20,"tag":45,"props":151,"children":153},{"class":47,"line":152},13,[154],{"type":20,"tag":45,"props":155,"children":156},{},[157],{"type":26,"value":158},"Python AI Service\n",{"type":20,"tag":45,"props":160,"children":162},{"class":47,"line":161},14,[163],{"type":20,"tag":45,"props":164,"children":165},{},[166],{"type":26,"value":63},{"type":20,"tag":45,"props":168,"children":170},{"class":47,"line":169},15,[171],{"type":20,"tag":45,"props":172,"children":173},{},[174],{"type":26,"value":175},"  | RDKit \u002F PyTorch \u002F PyG \u002F scikit-learn\n",{"type":20,"tag":45,"props":177,"children":179},{"class":47,"line":178},16,[180],{"type":20,"tag":45,"props":181,"children":182},{},[183],{"type":26,"value":81},{"type":20,"tag":45,"props":185,"children":187},{"class":47,"line":186},17,[188],{"type":20,"tag":45,"props":189,"children":190},{},[191],{"type":26,"value":192},"模型推理与分子计算\n",{"type":20,"tag":28,"props":194,"children":195},{},[196],{"type":26,"value":197},"会把自己认为比较重要的部分，单独写成文章以便记录。写这个项目的主要目的是以赛代练，因为不想从头到尾啃生物信息学、生物化学原理这些AI制药理论。通过项目能快速理解核心概念，剩下零零散散的后续慢慢补。",{"type":20,"tag":28,"props":199,"children":200},{},[201],{"type":26,"value":202},"此篇就是第 6 周的记录。",{"type":20,"tag":21,"props":204,"children":206},{"id":205},"第-6-周tox21-多标签毒性预测",[207],{"type":26,"value":8},{"type":20,"tag":28,"props":209,"children":210},{},[211],{"type":26,"value":212},"第 6 周从 ESOL 回归切到 Tox21 毒性预测。任务性质变了，模型训练里的很多细节也跟着变了。",{"type":20,"tag":28,"props":214,"children":215},{},[216],{"type":26,"value":217},"ESOL 是一个分子对应一个连续值：",{"type":20,"tag":34,"props":219,"children":221},{"className":36,"code":220,"language":38,"meta":7,"style":7},"SMILES -> logS\n",[222],{"type":20,"tag":41,"props":223,"children":224},{"__ignoreMap":7},[225],{"type":20,"tag":45,"props":226,"children":227},{"class":47,"line":48},[228],{"type":20,"tag":45,"props":229,"children":230},{},[231],{"type":26,"value":220},{"type":20,"tag":28,"props":233,"children":234},{},[235],{"type":26,"value":236},"Tox21 更接近 ADMET 里的毒性筛查语境。一个分子可能同时对应多个毒性相关标签，每个标签都可以是阳性、阴性或缺失：",{"type":20,"tag":34,"props":238,"children":240},{"className":36,"code":239,"language":38,"meta":7,"style":7},"SMILES -> [NR-AR, NR-AhR, SR-p53, ...]\n",[241],{"type":20,"tag":41,"props":242,"children":243},{"__ignoreMap":7},[244],{"type":20,"tag":45,"props":245,"children":246},{"class":47,"line":48},[247],{"type":20,"tag":45,"props":248,"children":249},{},[250],{"type":26,"value":239},{"type":20,"tag":28,"props":252,"children":253},{},[254],{"type":26,"value":255},"所以这一周的核心不是换一个数据集而已，而是把回归任务切换成多标签分类任务。",{"type":20,"tag":257,"props":258,"children":260},"h2",{"id":259},"tox21-的任务形态",[261],{"type":26,"value":262},"Tox21 的任务形态",{"type":20,"tag":28,"props":264,"children":265},{},[266],{"type":26,"value":267},"Tox21 用小分子结构预测化合物是否会激活或抑制某些毒性相关通路。它不是多分类，而是多标签分类。",{"type":20,"tag":28,"props":269,"children":270},{},[271],{"type":26,"value":272},"多分类通常表示几个类别里只能选一个，比如 A \u002F B \u002F C。多标签分类表示多个标签可以同时成立。一个分子可以在某个毒性通路上为阳性，也可以在另一个通路上为阴性。",{"type":20,"tag":28,"props":274,"children":275},{},[276],{"type":26,"value":277},"模型输出因此不是一个类别编号，而是一组 logits：",{"type":20,"tag":34,"props":279,"children":281},{"className":36,"code":280,"language":38,"meta":7,"style":7},"fingerprint -> MLP -> 12 个 logits\n",[282],{"type":20,"tag":41,"props":283,"children":284},{"__ignoreMap":7},[285],{"type":20,"tag":45,"props":286,"children":287},{"class":47,"line":48},[288],{"type":20,"tag":45,"props":289,"children":290},{},[291],{"type":26,"value":280},{"type":20,"tag":28,"props":293,"children":294},{},[295],{"type":26,"value":296},"每个 logit 对应一个 Tox21 任务。",{"type":20,"tag":257,"props":298,"children":300},{"id":299},"loss-从-mseloss-换成-bcewithlogitsloss",[301],{"type":26,"value":302},"loss 从 MSELoss 换成 BCEWithLogitsLoss",{"type":20,"tag":28,"props":304,"children":305},{},[306,308,314,316,322],{"type":26,"value":307},"第 5 周 ESOL 回归用的是 ",{"type":20,"tag":41,"props":309,"children":311},{"className":310},[],[312],{"type":26,"value":313},"MSELoss",{"type":26,"value":315},"。Tox21 是多标签二分类，因此这一周用 ",{"type":20,"tag":41,"props":317,"children":319},{"className":318},[],[320],{"type":26,"value":321},"BCEWithLogitsLoss",{"type":26,"value":323},"。",{"type":20,"tag":28,"props":325,"children":326},{},[327,329,335,337,342],{"type":26,"value":328},"这里没有在模型最后手动加 ",{"type":20,"tag":41,"props":330,"children":332},{"className":331},[],[333],{"type":26,"value":334},"sigmoid",{"type":26,"value":336},"，因为 ",{"type":20,"tag":41,"props":338,"children":340},{"className":339},[],[341],{"type":26,"value":321},{"type":26,"value":343}," 内部已经把 sigmoid 和 binary cross entropy 合在一起，数值上更稳定。",{"type":20,"tag":28,"props":345,"children":346},{},[347],{"type":26,"value":348},"训练阶段处理的是 logits。评估阶段再对 logits 做 sigmoid，得到每个标签的概率：",{"type":20,"tag":34,"props":350,"children":352},{"className":36,"code":351,"language":38,"meta":7,"style":7},"logit -> sigmoid -> probability\n",[353],{"type":20,"tag":41,"props":354,"children":355},{"__ignoreMap":7},[356],{"type":20,"tag":45,"props":357,"children":358},{"class":47,"line":48},[359],{"type":20,"tag":45,"props":360,"children":361},{},[362],{"type":26,"value":351},{"type":20,"tag":28,"props":364,"children":365},{},[366],{"type":26,"value":367},"这个区别很小，但如果搞混，训练和评估都会变得不可靠。",{"type":20,"tag":257,"props":369,"children":371},{"id":370},"缺失标签不能当阴性",[372],{"type":26,"value":370},{"type":20,"tag":28,"props":374,"children":375},{},[376],{"type":26,"value":377},"Tox21 里有缺失标签。缺失不是阴性，而是没有实验记录。",{"type":20,"tag":28,"props":379,"children":380},{},[381],{"type":26,"value":382},"如果把缺失值直接填成 0，模型会把“未知”当成“无毒”，评估结果会被污染。这里我用 mask 只在有效标签上计算 loss：",{"type":20,"tag":34,"props":384,"children":386},{"className":36,"code":385,"language":38,"meta":7,"style":7},"labels:  0 \u002F 1 \u002F missing\nmask:    valid \u002F invalid\nloss:    only valid labels\n",[387],{"type":20,"tag":41,"props":388,"children":389},{"__ignoreMap":7},[390,398,406],{"type":20,"tag":45,"props":391,"children":392},{"class":47,"line":48},[393],{"type":20,"tag":45,"props":394,"children":395},{},[396],{"type":26,"value":397},"labels:  0 \u002F 1 \u002F missing\n",{"type":20,"tag":45,"props":399,"children":400},{"class":47,"line":57},[401],{"type":20,"tag":45,"props":402,"children":403},{},[404],{"type":26,"value":405},"mask:    valid \u002F invalid\n",{"type":20,"tag":45,"props":407,"children":408},{"class":47,"line":66},[409],{"type":20,"tag":45,"props":410,"children":411},{},[412],{"type":26,"value":413},"loss:    only valid labels\n",{"type":20,"tag":28,"props":415,"children":416},{},[417],{"type":26,"value":418},"这个处理比模型结构更重要。毒性数据本来就不完整，错误处理缺失标签会让后面所有指标都失去意义。",{"type":20,"tag":257,"props":420,"children":422},{"id":421},"accuracy-不够用",[423],{"type":26,"value":424},"Accuracy 不够用",{"type":20,"tag":28,"props":426,"children":427},{},[428],{"type":26,"value":429},"Tox21 还有类别不平衡问题。很多毒性标签里，阳性样本比例并不高。如果只看 Accuracy，一个模型即使大部分都预测阴性，也可能得到看起来不错的分数。",{"type":20,"tag":28,"props":431,"children":432},{},[433],{"type":26,"value":434},"这一周主要看：",{"type":20,"tag":436,"props":437,"children":438},"table",{},[439,458],{"type":20,"tag":440,"props":441,"children":442},"thead",{},[443],{"type":20,"tag":444,"props":445,"children":446},"tr",{},[447,453],{"type":20,"tag":448,"props":449,"children":450},"th",{},[451],{"type":26,"value":452},"指标",{"type":20,"tag":448,"props":454,"children":455},{},[456],{"type":26,"value":457},"用途",{"type":20,"tag":459,"props":460,"children":461},"tbody",{},[462,476,489],{"type":20,"tag":444,"props":463,"children":464},{},[465,471],{"type":20,"tag":466,"props":467,"children":468},"td",{},[469],{"type":26,"value":470},"ROC-AUC",{"type":20,"tag":466,"props":472,"children":473},{},[474],{"type":26,"value":475},"看模型区分阳性\u002F阴性的排序能力",{"type":20,"tag":444,"props":477,"children":478},{},[479,484],{"type":20,"tag":466,"props":480,"children":481},{},[482],{"type":26,"value":483},"PR-AUC",{"type":20,"tag":466,"props":485,"children":486},{},[487],{"type":26,"value":488},"类别不平衡时更敏感",{"type":20,"tag":444,"props":490,"children":491},{},[492,497],{"type":20,"tag":466,"props":493,"children":494},{},[495],{"type":26,"value":496},"F1",{"type":20,"tag":466,"props":498,"children":499},{},[500],{"type":26,"value":501},"平衡 precision 和 recall",{"type":20,"tag":28,"props":503,"children":504},{},[505],{"type":26,"value":506},"输出报告会按任务列出指标，而不是只给一个总分。每个毒性任务的数据分布不一样，平均值只能做粗略参考。",{"type":20,"tag":257,"props":508,"children":510},{"id":509},"小分子任务的语境更清楚了",[511],{"type":26,"value":509},{"type":20,"tag":28,"props":513,"children":514},{},[515],{"type":26,"value":516},"做完 ESOL 和 Tox21 后，当前项目的对象基本明确：前半段主要处理小分子。",{"type":20,"tag":28,"props":518,"children":519},{},[520],{"type":26,"value":521},"小分子药物通常可以用 SMILES 表示，适合用 RDKit、Morgan Fingerprint、分子图、描述符、QSAR、ADMET、分子对接这些工具和任务。ESOL 是性质预测，Tox21 是毒性预测，都属于小分子方向。",{"type":20,"tag":28,"props":523,"children":524},{},[525],{"type":26,"value":526},"大分子药物则是另一套语境，比如蛋白、抗体、肽、核酸药物。它们更多依赖氨基酸序列、3D 结构、蛋白 embedding、AlphaFold 结构、蛋白语言模型等表示方式。",{"type":20,"tag":28,"props":528,"children":529},{},[530],{"type":26,"value":531},"这个区分对工程路线很重要。当前平台先把小分子的标准化、性质预测、毒性预测和后续 DTI 中的 drug 端打稳，再逐步接触蛋白端，会比一开始同时铺开小分子和大分子更可控。",{"type":20,"tag":257,"props":533,"children":535},{"id":534},"本周产物",[536],{"type":26,"value":534},{"type":20,"tag":28,"props":538,"children":539},{},[540],{"type":26,"value":541},"第 6 周对应的核心文件是：",{"type":20,"tag":34,"props":543,"children":545},{"className":36,"code":544,"language":38,"meta":7,"style":7},"ml-experiments\u002Ftox21_mlp.py\nmodels\u002Ftox21_mlp.pt\nml-experiments\u002Freports\u002Ftox21_report.md\ndocs\u002Fweek06_tox21_multilabel.md\n",[546],{"type":20,"tag":41,"props":547,"children":548},{"__ignoreMap":7},[549,557,565,573],{"type":20,"tag":45,"props":550,"children":551},{"class":47,"line":48},[552],{"type":20,"tag":45,"props":553,"children":554},{},[555],{"type":26,"value":556},"ml-experiments\u002Ftox21_mlp.py\n",{"type":20,"tag":45,"props":558,"children":559},{"class":47,"line":57},[560],{"type":20,"tag":45,"props":561,"children":562},{},[563],{"type":26,"value":564},"models\u002Ftox21_mlp.pt\n",{"type":20,"tag":45,"props":566,"children":567},{"class":47,"line":66},[568],{"type":20,"tag":45,"props":569,"children":570},{},[571],{"type":26,"value":572},"ml-experiments\u002Freports\u002Ftox21_report.md\n",{"type":20,"tag":45,"props":574,"children":575},{"class":47,"line":75},[576],{"type":20,"tag":45,"props":577,"children":578},{},[579],{"type":26,"value":580},"docs\u002Fweek06_tox21_multilabel.md\n",{"type":20,"tag":28,"props":582,"children":583},{},[584],{"type":26,"value":585},"这周把模型任务从“预测一个连续性质”推进到“同时预测多个毒性标签”。它更接近 AI 制药里真实会遇到的数据形态：标签缺失、类别不平衡、任务之间分布不同、单一指标不够解释模型质量。",{"type":20,"tag":28,"props":587,"children":588},{},[589],{"type":26,"value":590},"第 6 周之后，再看模型评估时就不能只问“分数高不高”，还要看标签怎么来的、缺失怎么处理、阳性比例是多少、指标是否适合这个任务。",{"type":20,"tag":592,"props":593,"children":594},"style",{},[595],{"type":26,"value":596},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":7,"searchDepth":57,"depth":57,"links":598},[599,600,601,602,603,604],{"id":259,"depth":57,"text":262},{"id":299,"depth":57,"text":302},{"id":370,"depth":57,"text":370},{"id":421,"depth":57,"text":424},{"id":509,"depth":57,"text":509},{"id":534,"depth":57,"text":534},"markdown","content:articles:ai:ai-drug-week06-tox21-multilabel.md","content","articles\u002Fai\u002Fai-drug-week06-tox21-multilabel.md","articles\u002Fai\u002Fai-drug-week06-tox21-multilabel","md",1780481290973]