[{"data":1,"prerenderedAt":613},["ShallowReactive",2],{"article-\u002Fai-drug-week06-tox21-multilabel":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":5,"title":7,"description":8,"date":9,"tags":10,"listed":14,"body":15,"_type":607,"_id":608,"_source":609,"_file":610,"_stem":611,"_extension":612},"\u002Farticles\u002F\u002Fai-drug-week06-tox21-multilabel","",false,"第 6 周：Tox21 多标签毒性预测","记录从 ESOL 回归转向 Tox21 多标签分类时，模型输出、loss、mask 和评估指标的变化。","2026-06-03",[11,12,13],"AI制药","深度学习","人工智能",true,{"type":16,"children":17,"toc":596},"root",[18,26,32,192,197,202,207,212,217,231,236,250,255,262,267,272,277,291,296,302,323,343,348,362,367,372,377,382,413,418,424,429,434,501,506,511,516,521,526,531,536,541,580,585,590],{"type":19,"tag":20,"props":21,"children":23},"element","h2",{"id":22},"前情提要",[24],{"type":25,"value":22},"text",{"type":19,"tag":27,"props":28,"children":29},"p",{},[30],{"type":25,"value":31},"正在做一个AI制药平台项目，前端使用vue3、后端采用双后端(主后端Java、SpringBoot做业务，次后端Python、FastAPI做AI服务)",{"type":19,"tag":33,"props":34,"children":38},"pre",{"className":35,"code":36,"language":37,"meta":5,"style":5},"language-plain shiki shiki-themes github-dark","Vue3 前端\n  |\n  | REST API\n  v\nSpringBoot 主后端\n  |\n  | 任务管理 \u002F 数据管理\n  v\nPostgreSQL + Redis\n  |\n  | 调用\n  v\nPython AI Service\n  |\n  | RDKit \u002F PyTorch \u002F PyG \u002F scikit-learn\n  v\n模型推理与分子计算\n","plain",[39],{"type":19,"tag":40,"props":41,"children":42},"code",{"__ignoreMap":5},[43,54,63,72,81,90,98,107,115,124,132,141,149,158,166,175,183],{"type":19,"tag":44,"props":45,"children":48},"span",{"class":46,"line":47},"line",1,[49],{"type":19,"tag":44,"props":50,"children":51},{},[52],{"type":25,"value":53},"Vue3 前端\n",{"type":19,"tag":44,"props":55,"children":57},{"class":46,"line":56},2,[58],{"type":19,"tag":44,"props":59,"children":60},{},[61],{"type":25,"value":62},"  |\n",{"type":19,"tag":44,"props":64,"children":66},{"class":46,"line":65},3,[67],{"type":19,"tag":44,"props":68,"children":69},{},[70],{"type":25,"value":71},"  | REST API\n",{"type":19,"tag":44,"props":73,"children":75},{"class":46,"line":74},4,[76],{"type":19,"tag":44,"props":77,"children":78},{},[79],{"type":25,"value":80},"  v\n",{"type":19,"tag":44,"props":82,"children":84},{"class":46,"line":83},5,[85],{"type":19,"tag":44,"props":86,"children":87},{},[88],{"type":25,"value":89},"SpringBoot 主后端\n",{"type":19,"tag":44,"props":91,"children":93},{"class":46,"line":92},6,[94],{"type":19,"tag":44,"props":95,"children":96},{},[97],{"type":25,"value":62},{"type":19,"tag":44,"props":99,"children":101},{"class":46,"line":100},7,[102],{"type":19,"tag":44,"props":103,"children":104},{},[105],{"type":25,"value":106},"  | 任务管理 \u002F 数据管理\n",{"type":19,"tag":44,"props":108,"children":110},{"class":46,"line":109},8,[111],{"type":19,"tag":44,"props":112,"children":113},{},[114],{"type":25,"value":80},{"type":19,"tag":44,"props":116,"children":118},{"class":46,"line":117},9,[119],{"type":19,"tag":44,"props":120,"children":121},{},[122],{"type":25,"value":123},"PostgreSQL + Redis\n",{"type":19,"tag":44,"props":125,"children":127},{"class":46,"line":126},10,[128],{"type":19,"tag":44,"props":129,"children":130},{},[131],{"type":25,"value":62},{"type":19,"tag":44,"props":133,"children":135},{"class":46,"line":134},11,[136],{"type":19,"tag":44,"props":137,"children":138},{},[139],{"type":25,"value":140},"  | 调用\n",{"type":19,"tag":44,"props":142,"children":144},{"class":46,"line":143},12,[145],{"type":19,"tag":44,"props":146,"children":147},{},[148],{"type":25,"value":80},{"type":19,"tag":44,"props":150,"children":152},{"class":46,"line":151},13,[153],{"type":19,"tag":44,"props":154,"children":155},{},[156],{"type":25,"value":157},"Python AI Service\n",{"type":19,"tag":44,"props":159,"children":161},{"class":46,"line":160},14,[162],{"type":19,"tag":44,"props":163,"children":164},{},[165],{"type":25,"value":62},{"type":19,"tag":44,"props":167,"children":169},{"class":46,"line":168},15,[170],{"type":19,"tag":44,"props":171,"children":172},{},[173],{"type":25,"value":174},"  | RDKit \u002F PyTorch \u002F PyG \u002F scikit-learn\n",{"type":19,"tag":44,"props":176,"children":178},{"class":46,"line":177},16,[179],{"type":19,"tag":44,"props":180,"children":181},{},[182],{"type":25,"value":80},{"type":19,"tag":44,"props":184,"children":186},{"class":46,"line":185},17,[187],{"type":19,"tag":44,"props":188,"children":189},{},[190],{"type":25,"value":191},"模型推理与分子计算\n",{"type":19,"tag":27,"props":193,"children":194},{},[195],{"type":25,"value":196},"会把自己认为比较重要的部分，单独写成文章以便记录。写这个项目的主要目的是在干中学，因为不想从头到尾啃生物信息学、生物化学原理这些AI制药理论。通过项目能快速理解核心概念，剩下零零散散的后续慢慢补。",{"type":19,"tag":27,"props":198,"children":199},{},[200],{"type":25,"value":201},"此篇就是第 6 周的记录。",{"type":19,"tag":20,"props":203,"children":205},{"id":204},"第-6-周tox21-多标签毒性预测",[206],{"type":25,"value":7},{"type":19,"tag":27,"props":208,"children":209},{},[210],{"type":25,"value":211},"第 6 周从 ESOL 回归切到 Tox21 毒性预测。任务性质变了，模型训练里的很多细节也跟着变了。",{"type":19,"tag":27,"props":213,"children":214},{},[215],{"type":25,"value":216},"ESOL 是一个分子对应一个连续值：",{"type":19,"tag":33,"props":218,"children":220},{"className":35,"code":219,"language":37,"meta":5,"style":5},"SMILES -> logS\n",[221],{"type":19,"tag":40,"props":222,"children":223},{"__ignoreMap":5},[224],{"type":19,"tag":44,"props":225,"children":226},{"class":46,"line":47},[227],{"type":19,"tag":44,"props":228,"children":229},{},[230],{"type":25,"value":219},{"type":19,"tag":27,"props":232,"children":233},{},[234],{"type":25,"value":235},"Tox21 更接近 ADMET 里的毒性筛查语境。一个分子可能同时对应多个毒性相关标签，每个标签都可以是阳性、阴性或缺失：",{"type":19,"tag":33,"props":237,"children":239},{"className":35,"code":238,"language":37,"meta":5,"style":5},"SMILES -> [NR-AR, NR-AhR, SR-p53, ...]\n",[240],{"type":19,"tag":40,"props":241,"children":242},{"__ignoreMap":5},[243],{"type":19,"tag":44,"props":244,"children":245},{"class":46,"line":47},[246],{"type":19,"tag":44,"props":247,"children":248},{},[249],{"type":25,"value":238},{"type":19,"tag":27,"props":251,"children":252},{},[253],{"type":25,"value":254},"所以这一周的核心不是换一个数据集而已，而是把回归任务切换成多标签分类任务。",{"type":19,"tag":256,"props":257,"children":259},"h3",{"id":258},"tox21-的任务形态",[260],{"type":25,"value":261},"Tox21 的任务形态",{"type":19,"tag":27,"props":263,"children":264},{},[265],{"type":25,"value":266},"Tox21 用小分子结构预测化合物是否会激活或抑制某些毒性相关通路。它不是多分类，而是多标签分类。",{"type":19,"tag":27,"props":268,"children":269},{},[270],{"type":25,"value":271},"多分类通常表示几个类别里只能选一个，比如 A \u002F B \u002F C。多标签分类表示多个标签可以同时成立。一个分子可以在某个毒性通路上为阳性，也可以在另一个通路上为阴性。",{"type":19,"tag":27,"props":273,"children":274},{},[275],{"type":25,"value":276},"模型输出因此不是一个类别编号，而是一组 logits：",{"type":19,"tag":33,"props":278,"children":280},{"className":35,"code":279,"language":37,"meta":5,"style":5},"fingerprint -> MLP -> 12 个 logits\n",[281],{"type":19,"tag":40,"props":282,"children":283},{"__ignoreMap":5},[284],{"type":19,"tag":44,"props":285,"children":286},{"class":46,"line":47},[287],{"type":19,"tag":44,"props":288,"children":289},{},[290],{"type":25,"value":279},{"type":19,"tag":27,"props":292,"children":293},{},[294],{"type":25,"value":295},"每个 logit 对应一个 Tox21 任务。",{"type":19,"tag":256,"props":297,"children":299},{"id":298},"loss-从-mseloss-换成-bcewithlogitsloss",[300],{"type":25,"value":301},"loss 从 MSELoss 换成 BCEWithLogitsLoss",{"type":19,"tag":27,"props":303,"children":304},{},[305,307,313,315,321],{"type":25,"value":306},"第 5 周 ESOL 回归用的是 ",{"type":19,"tag":40,"props":308,"children":310},{"className":309},[],[311],{"type":25,"value":312},"MSELoss",{"type":25,"value":314},"。Tox21 是多标签二分类，因此这一周用 ",{"type":19,"tag":40,"props":316,"children":318},{"className":317},[],[319],{"type":25,"value":320},"BCEWithLogitsLoss",{"type":25,"value":322},"。",{"type":19,"tag":27,"props":324,"children":325},{},[326,328,334,336,341],{"type":25,"value":327},"这里没有在模型最后手动加 ",{"type":19,"tag":40,"props":329,"children":331},{"className":330},[],[332],{"type":25,"value":333},"sigmoid",{"type":25,"value":335},"，因为 ",{"type":19,"tag":40,"props":337,"children":339},{"className":338},[],[340],{"type":25,"value":320},{"type":25,"value":342}," 内部已经把 sigmoid 和 binary cross entropy 合在一起，数值上更稳定。",{"type":19,"tag":27,"props":344,"children":345},{},[346],{"type":25,"value":347},"训练阶段处理的是 logits。评估阶段再对 logits 做 sigmoid，得到每个标签的概率：",{"type":19,"tag":33,"props":349,"children":351},{"className":35,"code":350,"language":37,"meta":5,"style":5},"logit -> sigmoid -> probability\n",[352],{"type":19,"tag":40,"props":353,"children":354},{"__ignoreMap":5},[355],{"type":19,"tag":44,"props":356,"children":357},{"class":46,"line":47},[358],{"type":19,"tag":44,"props":359,"children":360},{},[361],{"type":25,"value":350},{"type":19,"tag":27,"props":363,"children":364},{},[365],{"type":25,"value":366},"这个区别很小，但如果搞混，训练和评估都会变得不可靠。",{"type":19,"tag":256,"props":368,"children":370},{"id":369},"缺失标签不能当阴性",[371],{"type":25,"value":369},{"type":19,"tag":27,"props":373,"children":374},{},[375],{"type":25,"value":376},"Tox21 里有缺失标签。缺失不是阴性，而是没有实验记录。",{"type":19,"tag":27,"props":378,"children":379},{},[380],{"type":25,"value":381},"如果把缺失值直接填成 0，模型会把“未知”当成“无毒”，评估结果会被污染。这里我用 mask 只在有效标签上计算 loss：",{"type":19,"tag":33,"props":383,"children":385},{"className":35,"code":384,"language":37,"meta":5,"style":5},"labels:  0 \u002F 1 \u002F missing\nmask:    valid \u002F invalid\nloss:    only valid labels\n",[386],{"type":19,"tag":40,"props":387,"children":388},{"__ignoreMap":5},[389,397,405],{"type":19,"tag":44,"props":390,"children":391},{"class":46,"line":47},[392],{"type":19,"tag":44,"props":393,"children":394},{},[395],{"type":25,"value":396},"labels:  0 \u002F 1 \u002F missing\n",{"type":19,"tag":44,"props":398,"children":399},{"class":46,"line":56},[400],{"type":19,"tag":44,"props":401,"children":402},{},[403],{"type":25,"value":404},"mask:    valid \u002F invalid\n",{"type":19,"tag":44,"props":406,"children":407},{"class":46,"line":65},[408],{"type":19,"tag":44,"props":409,"children":410},{},[411],{"type":25,"value":412},"loss:    only valid labels\n",{"type":19,"tag":27,"props":414,"children":415},{},[416],{"type":25,"value":417},"这个处理比模型结构更重要。毒性数据本来就不完整，错误处理缺失标签会让后面所有指标都失去意义。",{"type":19,"tag":256,"props":419,"children":421},{"id":420},"accuracy-不够用",[422],{"type":25,"value":423},"Accuracy 不够用",{"type":19,"tag":27,"props":425,"children":426},{},[427],{"type":25,"value":428},"Tox21 还有类别不平衡问题。很多毒性标签里，阳性样本比例并不高。如果只看 Accuracy，一个模型即使大部分都预测阴性，也可能得到看起来不错的分数。",{"type":19,"tag":27,"props":430,"children":431},{},[432],{"type":25,"value":433},"这一周主要看：",{"type":19,"tag":435,"props":436,"children":437},"table",{},[438,457],{"type":19,"tag":439,"props":440,"children":441},"thead",{},[442],{"type":19,"tag":443,"props":444,"children":445},"tr",{},[446,452],{"type":19,"tag":447,"props":448,"children":449},"th",{},[450],{"type":25,"value":451},"指标",{"type":19,"tag":447,"props":453,"children":454},{},[455],{"type":25,"value":456},"用途",{"type":19,"tag":458,"props":459,"children":460},"tbody",{},[461,475,488],{"type":19,"tag":443,"props":462,"children":463},{},[464,470],{"type":19,"tag":465,"props":466,"children":467},"td",{},[468],{"type":25,"value":469},"ROC-AUC",{"type":19,"tag":465,"props":471,"children":472},{},[473],{"type":25,"value":474},"看模型区分阳性\u002F阴性的排序能力",{"type":19,"tag":443,"props":476,"children":477},{},[478,483],{"type":19,"tag":465,"props":479,"children":480},{},[481],{"type":25,"value":482},"PR-AUC",{"type":19,"tag":465,"props":484,"children":485},{},[486],{"type":25,"value":487},"类别不平衡时更敏感",{"type":19,"tag":443,"props":489,"children":490},{},[491,496],{"type":19,"tag":465,"props":492,"children":493},{},[494],{"type":25,"value":495},"F1",{"type":19,"tag":465,"props":497,"children":498},{},[499],{"type":25,"value":500},"平衡 precision 和 recall",{"type":19,"tag":27,"props":502,"children":503},{},[504],{"type":25,"value":505},"输出报告会按任务列出指标，而不是只给一个总分。每个毒性任务的数据分布不一样，平均值只能做粗略参考。",{"type":19,"tag":256,"props":507,"children":509},{"id":508},"小分子任务的语境更清楚了",[510],{"type":25,"value":508},{"type":19,"tag":27,"props":512,"children":513},{},[514],{"type":25,"value":515},"做完 ESOL 和 Tox21 后，当前项目的对象基本明确：前半段主要处理小分子。",{"type":19,"tag":27,"props":517,"children":518},{},[519],{"type":25,"value":520},"小分子药物通常可以用 SMILES 表示，适合用 RDKit、Morgan Fingerprint、分子图、描述符、QSAR、ADMET、分子对接这些工具和任务。ESOL 是性质预测，Tox21 是毒性预测，都属于小分子方向。",{"type":19,"tag":27,"props":522,"children":523},{},[524],{"type":25,"value":525},"大分子药物则是另一套语境，比如蛋白、抗体、肽、核酸药物。它们更多依赖氨基酸序列、3D 结构、蛋白 embedding、AlphaFold 结构、蛋白语言模型等表示方式。",{"type":19,"tag":27,"props":527,"children":528},{},[529],{"type":25,"value":530},"这个区分对工程路线很重要。当前平台先把小分子的标准化、性质预测、毒性预测和后续 DTI 中的 drug 端打稳，再逐步接触蛋白端，会比一开始同时铺开小分子和大分子更可控。",{"type":19,"tag":256,"props":532,"children":534},{"id":533},"本周产物",[535],{"type":25,"value":533},{"type":19,"tag":27,"props":537,"children":538},{},[539],{"type":25,"value":540},"第 6 周对应的核心文件是：",{"type":19,"tag":33,"props":542,"children":544},{"className":35,"code":543,"language":37,"meta":5,"style":5},"ml-experiments\u002Ftox21_mlp.py\nmodels\u002Ftox21_mlp.pt\nml-experiments\u002Freports\u002Ftox21_report.md\ndocs\u002Fweek06_tox21_multilabel.md\n",[545],{"type":19,"tag":40,"props":546,"children":547},{"__ignoreMap":5},[548,556,564,572],{"type":19,"tag":44,"props":549,"children":550},{"class":46,"line":47},[551],{"type":19,"tag":44,"props":552,"children":553},{},[554],{"type":25,"value":555},"ml-experiments\u002Ftox21_mlp.py\n",{"type":19,"tag":44,"props":557,"children":558},{"class":46,"line":56},[559],{"type":19,"tag":44,"props":560,"children":561},{},[562],{"type":25,"value":563},"models\u002Ftox21_mlp.pt\n",{"type":19,"tag":44,"props":565,"children":566},{"class":46,"line":65},[567],{"type":19,"tag":44,"props":568,"children":569},{},[570],{"type":25,"value":571},"ml-experiments\u002Freports\u002Ftox21_report.md\n",{"type":19,"tag":44,"props":573,"children":574},{"class":46,"line":74},[575],{"type":19,"tag":44,"props":576,"children":577},{},[578],{"type":25,"value":579},"docs\u002Fweek06_tox21_multilabel.md\n",{"type":19,"tag":27,"props":581,"children":582},{},[583],{"type":25,"value":584},"这周把模型任务从“预测一个连续性质”推进到“同时预测多个毒性标签”。它更接近 AI 制药里真实会遇到的数据形态：标签缺失、类别不平衡、任务之间分布不同、单一指标不够解释模型质量。",{"type":19,"tag":27,"props":586,"children":587},{},[588],{"type":25,"value":589},"第 6 周之后，再看模型评估时就不能只问“分数高不高”，还要看标签怎么来的、缺失怎么处理、阳性比例是多少、指标是否适合这个任务。",{"type":19,"tag":591,"props":592,"children":593},"style",{},[594],{"type":25,"value":595},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":5,"searchDepth":56,"depth":56,"links":597},[598,599],{"id":22,"depth":56,"text":22},{"id":204,"depth":56,"text":7,"children":600},[601,602,603,604,605,606],{"id":258,"depth":65,"text":261},{"id":298,"depth":65,"text":301},{"id":369,"depth":65,"text":369},{"id":420,"depth":65,"text":423},{"id":508,"depth":65,"text":508},{"id":533,"depth":65,"text":533},"markdown","content:articles:生信基础:ai-drug-week06-tox21-multilabel.md","content","articles\u002F生信基础\u002Fai-drug-week06-tox21-multilabel.md","articles\u002F生信基础\u002Fai-drug-week06-tox21-multilabel","md",1780895158452]